KingNish commited on
Commit
9a91aa1
·
verified ·
1 Parent(s): 9c6636c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -22
app.py CHANGED
@@ -329,11 +329,11 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
329
  def together_text_to_speech(text: str = "", voice: str = ""):
330
  """
331
  Converts text to speech using Together AI's audio API.
332
-
333
  Args:
334
  text (str): The text to convert to speech
335
  voice (str): The voice to use for speech synthesis. All available voices are: calm lady, meditation lady, storyteller lady, wise lady, teacher lady, wise man, customer support man, tutorial man, helpful woman, customer support lady, asmr lady, pleasant man, professional woman, reading lady, reading man. Default is Helpful Woman.
336
-
337
  Returns:
338
  str: Path to the generated audio file or error message
339
  """
@@ -341,7 +341,7 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
341
  return None, "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable."
342
  if not text:
343
  return None, "Please enter text to convert to speech."
344
-
345
  try:
346
  speech_file_path = "speech.mp3"
347
  response = client.audio.speech.create(
@@ -366,11 +366,15 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
366
  gr.Warning("This requires setting the OPENAI_TTS_TEMPLATE environment variable.")
367
 
368
  # --- Environment Variable for OpenAI TTS Template ---
369
- OPENAI_TTS_TEMPLATE = os.getenv("OPENAI_TTS_TEMPLATE")
 
370
 
371
- if not OPENAI_TTS_TEMPLATE:
 
 
372
  gr.Warning("Warning: OPENAI_TTS_TEMPLATE not set. OpenAI TTS tab will not function.")
373
 
 
374
  # A list of available voices for the TTS model.
375
  OPENAI_VOICES = [
376
  "alloy", "echo", "fable", "onyx", "nova", "shimmer",
@@ -379,9 +383,9 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
379
 
380
  def openai_generate_audio(prompt: str, voice: str, emotion_style: str) -> bytes:
381
  """
382
- Generates audio by calling the specified OpenAI-compatible TTS API endpoint.
383
 
384
- This function constructs the API request URL, sends the request, and handles
385
  the response, returning the audio content as bytes.
386
 
387
  Args:
@@ -399,24 +403,28 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
399
  raise gr.Error("OPENAI_TTS_TEMPLATE is not configured.")
400
 
401
  try:
402
- encoded_input = urllib.parse.quote(prompt)
403
-
404
- style_prompt = f"Tone: {emotion_style}"
405
- encoded_style_prompt = urllib.parse.quote(style_prompt)
406
-
407
- url = OPENAI_TTS_TEMPLATE.format(
408
- input=encoded_input,
409
- style_prompt=encoded_style_prompt,
410
- voice=voice
411
- )
412
-
413
- response = requests.get(url, timeout=60)
414
  response.raise_for_status()
415
 
416
  content_type = response.headers.get('content-type', '').lower()
417
  if 'audio' not in content_type:
418
  print(f"Warning: Unexpected content type '{content_type}'. Response: {response.text[:500]}")
419
- raise gr.Error("The API did not return an audio file. It may be temporarily down.")
 
 
 
 
 
 
 
420
 
421
  return response.content
422
 
@@ -448,13 +456,26 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
448
  raise gr.Error("Prompt cannot be empty.")
449
  if not voice:
450
  raise gr.Error("Please select a voice.")
 
451
  if not emotion_style:
452
- emotion_style = "neutral"
 
453
 
454
  try:
455
  audio_bytes = openai_generate_audio(prompt, voice, emotion_style)
456
 
457
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
 
 
 
 
 
 
 
 
 
 
 
458
  temp_audio_file.write(audio_bytes)
459
  temp_file_path = temp_audio_file.name
460
 
 
329
  def together_text_to_speech(text: str = "", voice: str = ""):
330
  """
331
  Converts text to speech using Together AI's audio API.
332
+
333
  Args:
334
  text (str): The text to convert to speech
335
  voice (str): The voice to use for speech synthesis. All available voices are: calm lady, meditation lady, storyteller lady, wise lady, teacher lady, wise man, customer support man, tutorial man, helpful woman, customer support lady, asmr lady, pleasant man, professional woman, reading lady, reading man. Default is Helpful Woman.
336
+
337
  Returns:
338
  str: Path to the generated audio file or error message
339
  """
 
341
  return None, "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable."
342
  if not text:
343
  return None, "Please enter text to convert to speech."
344
+
345
  try:
346
  speech_file_path = "speech.mp3"
347
  response = client.audio.speech.create(
 
366
  gr.Warning("This requires setting the OPENAI_TTS_TEMPLATE environment variable.")
367
 
368
  # --- Environment Variable for OpenAI TTS Template ---
369
+ # Changed to match the new POST endpoint structure
370
+ OPENAI_TTS_TEMPLATE = "https://www.openai.fm/api/generate"
371
 
372
+ if OPENAI_TTS_TEMPLATE == "https://www.openai.fm/api/generate" and not os.getenv("OPENAI_TTS_TEMPLATE"):
373
+ gr.Warning(f"Using default OPENAI_TTS_TEMPLATE: {OPENAI_TTS_TEMPLATE}. You can override this by setting the OPENAI_TTS_TEMPLATE environment variable.")
374
+ elif not OPENAI_TTS_TEMPLATE:
375
  gr.Warning("Warning: OPENAI_TTS_TEMPLATE not set. OpenAI TTS tab will not function.")
376
 
377
+
378
  # A list of available voices for the TTS model.
379
  OPENAI_VOICES = [
380
  "alloy", "echo", "fable", "onyx", "nova", "shimmer",
 
383
 
384
  def openai_generate_audio(prompt: str, voice: str, emotion_style: str) -> bytes:
385
  """
386
+ Generates audio by calling the specified OpenAI-compatible TTS API endpoint using POST.
387
 
388
+ This function constructs the API request body, sends the POST request, and handles
389
  the response, returning the audio content as bytes.
390
 
391
  Args:
 
403
  raise gr.Error("OPENAI_TTS_TEMPLATE is not configured.")
404
 
405
  try:
406
+ # Construct the POST request body as form data
407
+ data = {
408
+ 'input': prompt,
409
+ 'voice': voice,
410
+ 'prompt': f"Voice: {emotion_style}", # Format for the 'prompt' field in the POST body
411
+ 'vibe': 'null' # As observed in the payload
412
+ }
413
+
414
+ response = requests.post(OPENAI_TTS_TEMPLATE, data=data, timeout=60)
 
 
 
415
  response.raise_for_status()
416
 
417
  content_type = response.headers.get('content-type', '').lower()
418
  if 'audio' not in content_type:
419
  print(f"Warning: Unexpected content type '{content_type}'. Response: {response.text[:500]}")
420
+ # Check if the response might contain an error message as JSON
421
+ try:
422
+ error_data = response.json()
423
+ error_message = error_data.get('error', 'Unknown error from API')
424
+ raise gr.Error(f"The API did not return an audio file: {error_message}")
425
+ except json.JSONDecodeError:
426
+ raise gr.Error("The API did not return an audio file. It may be temporarily down or returned a non-audio response.")
427
+
428
 
429
  return response.content
430
 
 
456
  raise gr.Error("Prompt cannot be empty.")
457
  if not voice:
458
  raise gr.Error("Please select a voice.")
459
+ # Allow empty emotion style, default handling is in openai_generate_audio if needed
460
  if not emotion_style:
461
+ emotion_style = "neutral"
462
+
463
 
464
  try:
465
  audio_bytes = openai_generate_audio(prompt, voice, emotion_style)
466
 
467
+ # Determine audio format from content type if possible, default to mp3
468
+ content_type = requests.head(OPENAI_TTS_TEMPLATE).headers.get('content-type', '').lower()
469
+ suffix = ".mp3" # Default
470
+ if 'wav' in content_type:
471
+ suffix = ".wav"
472
+ elif 'ogg' in content_type:
473
+ suffix = ".ogg"
474
+ elif 'aac' in content_type:
475
+ suffix = ".aac"
476
+
477
+
478
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_audio_file:
479
  temp_audio_file.write(audio_bytes)
480
  temp_file_path = temp_audio_file.name
481