InferenceLab commited on
Commit
abb2d34
·
verified ·
1 Parent(s): f9cae0f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from google import genai
3
+ from google.genai import types
4
+ import wave
5
+ import os
6
+ from dotenv import load_dotenv
7
+
8
+ # Load API key
9
+ load_dotenv()
10
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
11
+ client = genai.Client(api_key=GOOGLE_API_KEY)
12
+
13
+ # Save audio from PCM to WAV
14
+ def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
15
+ with wave.open(filename, "wb") as wf:
16
+ wf.setnchannels(channels)
17
+ wf.setsampwidth(sample_width)
18
+ wf.setframerate(rate)
19
+ wf.writeframes(pcm)
20
+
21
+ # Gemini TTS generation function
22
+ def generate_speech(text, voice):
23
+ try:
24
+ response = client.models.generate_content(
25
+ model="gemini-2.5-flash-preview-tts",
26
+ contents=text,
27
+ config=types.GenerateContentConfig(
28
+ response_modalities=["AUDIO"],
29
+ speech_config=types.SpeechConfig(
30
+ voice_config=types.VoiceConfig(
31
+ prebuilt_voice_config=types.PrebuiltVoiceConfig(
32
+ voice_name=voice
33
+ )
34
+ )
35
+ )
36
+ )
37
+ )
38
+
39
+ audio_data = response.candidates[0].content.parts[0].inline_data.data
40
+ output_path = "output.wav"
41
+ wave_file(output_path, audio_data)
42
+ return output_path, output_path, "Speech generated successfully."
43
+
44
+ except Exception as e:
45
+ return None, None, f"Error: {str(e)}"
46
+
47
+ # Gradio app using Blocks
48
+ with gr.Blocks(title="Gemini TTS Demo") as demo:
49
+ gr.Markdown("## Google Gemini Text-to-Speech")
50
+ gr.Markdown("Enter text below, choose a voice, and listen to the generated speech.")
51
+
52
+ with gr.Row():
53
+ text_input = gr.Textbox(
54
+ lines=3,
55
+ label="Enter Text",
56
+ placeholder="Example: Welcome to the world of AI."
57
+ )
58
+ voice_input = gr.Dropdown(
59
+ choices=["Kore", "Wes"],
60
+ value="Kore",
61
+ label="Select Voice"
62
+ )
63
+
64
+ with gr.Row():
65
+ generate_btn = gr.Button("Generate Speech", variant="primary")
66
+
67
+ with gr.Row():
68
+ audio_output = gr.Audio(label="Generated Audio")
69
+ file_output = gr.File(label="Download Audio File")
70
+ status_output = gr.Textbox(label="Status", interactive=False)
71
+
72
+ examples = gr.Examples(
73
+ examples=[
74
+ ["Good morning! Hope you have a great day ahead.", "Kore"],
75
+ ["Welcome to the future of AI voice generation.", "Wes"],
76
+ ["Your appointment is scheduled for 3 PM on Monday.", "Kore"],
77
+ ["This is a demo of Google's Gemini text-to-speech feature.", "Wes"],
78
+ ],
79
+ inputs=[text_input, voice_input],
80
+ )
81
+
82
+ generate_btn.click(
83
+ fn=generate_speech,
84
+ inputs=[text_input, voice_input],
85
+ outputs=[audio_output, file_output, status_output],
86
+ )
87
+
88
+ demo.launch()