Update app.py
Browse files
app.py
CHANGED
@@ -300,6 +300,72 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
|
|
300 |
outputs=together_image_to_image_output,
|
301 |
)
|
302 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
# --- Launch the Gradio app ---
|
304 |
if __name__ == "__main__":
|
305 |
demo.launch(mcp_server=True)
|
|
|
300 |
outputs=together_image_to_image_output,
|
301 |
)
|
302 |
|
303 |
+
with gr.Tab("Together AI - Text to Audio"):
|
304 |
+
gr.Markdown("Generate audio from text using Together AI's text-to-speech models.")
|
305 |
+
gr.Warning("This requires setting the TOGETHER_API_KEY environment variable.")
|
306 |
+
|
307 |
+
with gr.Row():
|
308 |
+
tts_input_text = gr.Textbox(label="Enter text to convert to speech", lines=3)
|
309 |
+
tts_voice_selection = gr.Dropdown(
|
310 |
+
label="Select Voice",
|
311 |
+
choices=[
|
312 |
+
'helpful woman',
|
313 |
+
'customer support lady',
|
314 |
+
'professional woman',
|
315 |
+
'pleasant man',
|
316 |
+
'calm lady',
|
317 |
+
'friendly reading man',
|
318 |
+
'wise man',
|
319 |
+
'newsman',
|
320 |
+
'reading lady',
|
321 |
+
'british lady',
|
322 |
+
'australian woman',
|
323 |
+
'american woman',
|
324 |
+
'american man',
|
325 |
+
'narrator woman',
|
326 |
+
'narrator man'
|
327 |
+
],
|
328 |
+
value="helpful woman"
|
329 |
+
)
|
330 |
+
tts_generate_button = gr.Button("Generate Audio")
|
331 |
+
|
332 |
+
tts_audio_output = gr.Audio(label="Generated Audio", interactive=False)
|
333 |
+
|
334 |
+
def text_to_speech(text: str = "", voice: str = ""):
|
335 |
+
"""
|
336 |
+
Converts text to speech using Together AI's audio API.
|
337 |
+
|
338 |
+
Args:
|
339 |
+
text (str): The text to convert to speech
|
340 |
+
voice (str): The voice to use for speech synthesis. All available voices are: helpful woman, customer support lady, professional woman, pleasant man, calm lady, friendly reading man, wise man, newsman, reading lady, british lady, australian woman, american woman, american man, narrator woman, narrator man. Default is Helpful Woman.
|
341 |
+
|
342 |
+
Returns:
|
343 |
+
str: Path to the generated audio file or error message
|
344 |
+
"""
|
345 |
+
if not client:
|
346 |
+
return None, "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable."
|
347 |
+
if not text:
|
348 |
+
return None, "Please enter text to convert to speech."
|
349 |
+
|
350 |
+
try:
|
351 |
+
speech_file_path = "speech.mp3"
|
352 |
+
response = client.audio.speech.create(
|
353 |
+
model="cartesia/sonic",
|
354 |
+
input=text,
|
355 |
+
voice=voice,
|
356 |
+
)
|
357 |
+
response.stream_to_file(speech_file_path)
|
358 |
+
return speech_file_path
|
359 |
+
except Exception as e:
|
360 |
+
return None, f"Error generating speech: {e}"
|
361 |
+
|
362 |
+
tts_generate_button.click(
|
363 |
+
fn=text_to_speech,
|
364 |
+
inputs=[tts_input_text, tts_voice_selection],
|
365 |
+
outputs=tts_audio_output
|
366 |
+
)
|
367 |
+
|
368 |
+
|
369 |
# --- Launch the Gradio app ---
|
370 |
if __name__ == "__main__":
|
371 |
demo.launch(mcp_server=True)
|