Update app.py
Browse files
app.py
CHANGED
@@ -329,11 +329,11 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
|
|
329 |
def together_text_to_speech(text: str = "", voice: str = ""):
|
330 |
"""
|
331 |
Converts text to speech using Together AI's audio API.
|
332 |
-
|
333 |
Args:
|
334 |
text (str): The text to convert to speech
|
335 |
voice (str): The voice to use for speech synthesis. All available voices are: calm lady, meditation lady, storyteller lady, wise lady, teacher lady, wise man, customer support man, tutorial man, helpful woman, customer support lady, asmr lady, pleasant man, professional woman, reading lady, reading man. Default is Helpful Woman.
|
336 |
-
|
337 |
Returns:
|
338 |
str: Path to the generated audio file or error message
|
339 |
"""
|
@@ -341,7 +341,7 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
|
|
341 |
return None, "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable."
|
342 |
if not text:
|
343 |
return None, "Please enter text to convert to speech."
|
344 |
-
|
345 |
try:
|
346 |
speech_file_path = "speech.mp3"
|
347 |
response = client.audio.speech.create(
|
@@ -366,11 +366,15 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
|
|
366 |
gr.Warning("This requires setting the OPENAI_TTS_TEMPLATE environment variable.")
|
367 |
|
368 |
# --- Environment Variable for OpenAI TTS Template ---
|
369 |
-
|
|
|
370 |
|
371 |
-
if not OPENAI_TTS_TEMPLATE:
|
|
|
|
|
372 |
gr.Warning("Warning: OPENAI_TTS_TEMPLATE not set. OpenAI TTS tab will not function.")
|
373 |
|
|
|
374 |
# A list of available voices for the TTS model.
|
375 |
OPENAI_VOICES = [
|
376 |
"alloy", "echo", "fable", "onyx", "nova", "shimmer",
|
@@ -379,9 +383,9 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
|
|
379 |
|
380 |
def openai_generate_audio(prompt: str, voice: str, emotion_style: str) -> bytes:
|
381 |
"""
|
382 |
-
Generates audio by calling the specified OpenAI-compatible TTS API endpoint.
|
383 |
|
384 |
-
This function constructs the API request
|
385 |
the response, returning the audio content as bytes.
|
386 |
|
387 |
Args:
|
@@ -399,24 +403,28 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
|
|
399 |
raise gr.Error("OPENAI_TTS_TEMPLATE is not configured.")
|
400 |
|
401 |
try:
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
)
|
412 |
-
|
413 |
-
response = requests.get(url, timeout=60)
|
414 |
response.raise_for_status()
|
415 |
|
416 |
content_type = response.headers.get('content-type', '').lower()
|
417 |
if 'audio' not in content_type:
|
418 |
print(f"Warning: Unexpected content type '{content_type}'. Response: {response.text[:500]}")
|
419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
420 |
|
421 |
return response.content
|
422 |
|
@@ -448,13 +456,26 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
|
|
448 |
raise gr.Error("Prompt cannot be empty.")
|
449 |
if not voice:
|
450 |
raise gr.Error("Please select a voice.")
|
|
|
451 |
if not emotion_style:
|
452 |
-
|
|
|
453 |
|
454 |
try:
|
455 |
audio_bytes = openai_generate_audio(prompt, voice, emotion_style)
|
456 |
|
457 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
temp_audio_file.write(audio_bytes)
|
459 |
temp_file_path = temp_audio_file.name
|
460 |
|
|
|
329 |
def together_text_to_speech(text: str = "", voice: str = ""):
|
330 |
"""
|
331 |
Converts text to speech using Together AI's audio API.
|
332 |
+
|
333 |
Args:
|
334 |
text (str): The text to convert to speech
|
335 |
voice (str): The voice to use for speech synthesis. All available voices are: calm lady, meditation lady, storyteller lady, wise lady, teacher lady, wise man, customer support man, tutorial man, helpful woman, customer support lady, asmr lady, pleasant man, professional woman, reading lady, reading man. Default is Helpful Woman.
|
336 |
+
|
337 |
Returns:
|
338 |
str: Path to the generated audio file or error message
|
339 |
"""
|
|
|
341 |
return None, "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable."
|
342 |
if not text:
|
343 |
return None, "Please enter text to convert to speech."
|
344 |
+
|
345 |
try:
|
346 |
speech_file_path = "speech.mp3"
|
347 |
response = client.audio.speech.create(
|
|
|
366 |
gr.Warning("This requires setting the OPENAI_TTS_TEMPLATE environment variable.")
|
367 |
|
368 |
# --- Environment Variable for OpenAI TTS Template ---
|
369 |
+
# Changed to match the new POST endpoint structure
|
370 |
+
OPENAI_TTS_TEMPLATE = "https://www.openai.fm/api/generate"
|
371 |
|
372 |
+
if OPENAI_TTS_TEMPLATE == "https://www.openai.fm/api/generate" and not os.getenv("OPENAI_TTS_TEMPLATE"):
|
373 |
+
gr.Warning(f"Using default OPENAI_TTS_TEMPLATE: {OPENAI_TTS_TEMPLATE}. You can override this by setting the OPENAI_TTS_TEMPLATE environment variable.")
|
374 |
+
elif not OPENAI_TTS_TEMPLATE:
|
375 |
gr.Warning("Warning: OPENAI_TTS_TEMPLATE not set. OpenAI TTS tab will not function.")
|
376 |
|
377 |
+
|
378 |
# A list of available voices for the TTS model.
|
379 |
OPENAI_VOICES = [
|
380 |
"alloy", "echo", "fable", "onyx", "nova", "shimmer",
|
|
|
383 |
|
384 |
def openai_generate_audio(prompt: str, voice: str, emotion_style: str) -> bytes:
|
385 |
"""
|
386 |
+
Generates audio by calling the specified OpenAI-compatible TTS API endpoint using POST.
|
387 |
|
388 |
+
This function constructs the API request body, sends the POST request, and handles
|
389 |
the response, returning the audio content as bytes.
|
390 |
|
391 |
Args:
|
|
|
403 |
raise gr.Error("OPENAI_TTS_TEMPLATE is not configured.")
|
404 |
|
405 |
try:
|
406 |
+
# Construct the POST request body as form data
|
407 |
+
data = {
|
408 |
+
'input': prompt,
|
409 |
+
'voice': voice,
|
410 |
+
'prompt': f"Voice: {emotion_style}", # Format for the 'prompt' field in the POST body
|
411 |
+
'vibe': 'null' # As observed in the payload
|
412 |
+
}
|
413 |
+
|
414 |
+
response = requests.post(OPENAI_TTS_TEMPLATE, data=data, timeout=60)
|
|
|
|
|
|
|
415 |
response.raise_for_status()
|
416 |
|
417 |
content_type = response.headers.get('content-type', '').lower()
|
418 |
if 'audio' not in content_type:
|
419 |
print(f"Warning: Unexpected content type '{content_type}'. Response: {response.text[:500]}")
|
420 |
+
# Check if the response might contain an error message as JSON
|
421 |
+
try:
|
422 |
+
error_data = response.json()
|
423 |
+
error_message = error_data.get('error', 'Unknown error from API')
|
424 |
+
raise gr.Error(f"The API did not return an audio file: {error_message}")
|
425 |
+
except json.JSONDecodeError:
|
426 |
+
raise gr.Error("The API did not return an audio file. It may be temporarily down or returned a non-audio response.")
|
427 |
+
|
428 |
|
429 |
return response.content
|
430 |
|
|
|
456 |
raise gr.Error("Prompt cannot be empty.")
|
457 |
if not voice:
|
458 |
raise gr.Error("Please select a voice.")
|
459 |
+
# Allow empty emotion style, default handling is in openai_generate_audio if needed
|
460 |
if not emotion_style:
|
461 |
+
emotion_style = "neutral"
|
462 |
+
|
463 |
|
464 |
try:
|
465 |
audio_bytes = openai_generate_audio(prompt, voice, emotion_style)
|
466 |
|
467 |
+
# Determine audio format from content type if possible, default to mp3
|
468 |
+
content_type = requests.head(OPENAI_TTS_TEMPLATE).headers.get('content-type', '').lower()
|
469 |
+
suffix = ".mp3" # Default
|
470 |
+
if 'wav' in content_type:
|
471 |
+
suffix = ".wav"
|
472 |
+
elif 'ogg' in content_type:
|
473 |
+
suffix = ".ogg"
|
474 |
+
elif 'aac' in content_type:
|
475 |
+
suffix = ".aac"
|
476 |
+
|
477 |
+
|
478 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_audio_file:
|
479 |
temp_audio_file.write(audio_bytes)
|
480 |
temp_file_path = temp_audio_file.name
|
481 |
|