KingNish commited on
Commit
a2f6e94
·
verified ·
1 Parent(s): 9a91aa1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -198
app.py CHANGED
@@ -3,30 +3,19 @@ import requests
3
  import random
4
  import json
5
  import os
6
- import urllib.parse
7
- import tempfile
8
  from together import Together
9
 
10
  # --- Environment Variable for Together API Key ---
11
  TOGETHER_API_KEY = os.environ.get('TOGETHER_API_KEY')
12
 
13
  # --- Together API Client Configuration ---
14
- # Initialize client only if key is available to avoid errors on startup
15
- client = None
16
- if TOGETHER_API_KEY:
17
- client = Together(api_key=TOGETHER_API_KEY)
18
- else:
19
- print("Warning: TOGETHER_API_KEY not set. Together AI tabs will not function.")
20
-
21
 
22
  # --- Pixabay API Configuration ---
23
  PIXABAY_API_KEY = os.environ.get('PIXABAY_API_KEY')
24
  IMAGE_API_URL = 'https://pixabay.com/api/'
25
  VIDEO_API_URL = 'https://pixabay.com/api/videos/'
26
  PER_PAGE = 5
27
- if not PIXABAY_API_KEY:
28
- print("Warning: PIXABAY_API_KEY not set. Pixabay Search tab will not function.")
29
-
30
 
31
  def image_to_url(image_path):
32
  """
@@ -224,7 +213,7 @@ def together_image_to_image(image_path: str = None, prompt: str = ""):
224
  # --- Gradio Blocks Interface Definition ---
225
  with gr.Blocks(title="Media Generation and Search Explorer") as demo:
226
  gr.Markdown("## Media Generation and Search Explorer")
227
- gr.Markdown("Explore royalty-free media from Pixabay and generate/transform images and audio using Together AI.")
228
 
229
  with gr.Tab("Pixabay Search"):
230
  gr.Markdown("Search for royalty-free images and videos on Pixabay.")
@@ -311,10 +300,10 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
311
  outputs=together_image_to_image_output,
312
  )
313
 
314
- with gr.Tab("Together AI - Text to Speech"):
315
  gr.Markdown("Generate audio from text using Together AI's text-to-speech models.")
316
  gr.Warning("This requires setting the TOGETHER_API_KEY environment variable.")
317
-
318
  with gr.Row():
319
  tts_input_text = gr.Textbox(label="Enter text to convert to speech", lines=3)
320
  tts_voice_selection = gr.Dropdown(
@@ -323,17 +312,17 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
323
  value="helpful woman"
324
  )
325
  tts_generate_button = gr.Button("Generate Audio")
326
-
327
  tts_audio_output = gr.Audio(label="Generated Audio", interactive=False)
328
-
329
- def together_text_to_speech(text: str = "", voice: str = ""):
330
  """
331
  Converts text to speech using Together AI's audio API.
332
-
333
  Args:
334
  text (str): The text to convert to speech
335
  voice (str): The voice to use for speech synthesis. All available voices are: calm lady, meditation lady, storyteller lady, wise lady, teacher lady, wise man, customer support man, tutorial man, helpful woman, customer support lady, asmr lady, pleasant man, professional woman, reading lady, reading man. Default is Helpful Woman.
336
-
337
  Returns:
338
  str: Path to the generated audio file or error message
339
  """
@@ -341,7 +330,7 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
341
  return None, "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable."
342
  if not text:
343
  return None, "Please enter text to convert to speech."
344
-
345
  try:
346
  speech_file_path = "speech.mp3"
347
  response = client.audio.speech.create(
@@ -352,188 +341,14 @@ with gr.Blocks(title="Media Generation and Search Explorer") as demo:
352
  response.stream_to_file(speech_file_path)
353
  return speech_file_path
354
  except Exception as e:
355
- return f"Error generating speech: {e}"
356
-
357
-
358
  tts_generate_button.click(
359
- fn=together_text_to_speech,
360
  inputs=[tts_input_text, tts_voice_selection],
361
  outputs=tts_audio_output
362
  )
363
 
364
- with gr.Tab("OpenAI - Text to Speech"):
365
- gr.Markdown("Generate audio from text using an OpenAI-compatible text-to-speech API.")
366
- gr.Warning("This requires setting the OPENAI_TTS_TEMPLATE environment variable.")
367
-
368
- # --- Environment Variable for OpenAI TTS Template ---
369
- # Changed to match the new POST endpoint structure
370
- OPENAI_TTS_TEMPLATE = "https://www.openai.fm/api/generate"
371
-
372
- if OPENAI_TTS_TEMPLATE == "https://www.openai.fm/api/generate" and not os.getenv("OPENAI_TTS_TEMPLATE"):
373
- gr.Warning(f"Using default OPENAI_TTS_TEMPLATE: {OPENAI_TTS_TEMPLATE}. You can override this by setting the OPENAI_TTS_TEMPLATE environment variable.")
374
- elif not OPENAI_TTS_TEMPLATE:
375
- gr.Warning("Warning: OPENAI_TTS_TEMPLATE not set. OpenAI TTS tab will not function.")
376
-
377
-
378
- # A list of available voices for the TTS model.
379
- OPENAI_VOICES = [
380
- "alloy", "echo", "fable", "onyx", "nova", "shimmer",
381
- "coral", "verse", "ballad", "ash", "sage", "amuch", "dan"
382
- ]
383
-
384
- def openai_generate_audio(prompt: str, voice: str, emotion_style: str) -> bytes:
385
- """
386
- Generates audio by calling the specified OpenAI-compatible TTS API endpoint using POST.
387
-
388
- This function constructs the API request body, sends the POST request, and handles
389
- the response, returning the audio content as bytes.
390
-
391
- Args:
392
- prompt: The text to be converted to speech.
393
- voice: The voice model to use for generation (e.g., 'alloy', 'coral').
394
- emotion_style: A description of the desired emotional tone (e.g., 'happy', 'sad').
395
-
396
- Returns:
397
- The raw audio data in bytes.
398
-
399
- Raises:
400
- gr.Error: If the API call fails, times out, or returns an unexpected content type.
401
- """
402
- if not OPENAI_TTS_TEMPLATE:
403
- raise gr.Error("OPENAI_TTS_TEMPLATE is not configured.")
404
-
405
- try:
406
- # Construct the POST request body as form data
407
- data = {
408
- 'input': prompt,
409
- 'voice': voice,
410
- 'prompt': f"Voice: {emotion_style}", # Format for the 'prompt' field in the POST body
411
- 'vibe': 'null' # As observed in the payload
412
- }
413
-
414
- response = requests.post(OPENAI_TTS_TEMPLATE, data=data, timeout=60)
415
- response.raise_for_status()
416
-
417
- content_type = response.headers.get('content-type', '').lower()
418
- if 'audio' not in content_type:
419
- print(f"Warning: Unexpected content type '{content_type}'. Response: {response.text[:500]}")
420
- # Check if the response might contain an error message as JSON
421
- try:
422
- error_data = response.json()
423
- error_message = error_data.get('error', 'Unknown error from API')
424
- raise gr.Error(f"The API did not return an audio file: {error_message}")
425
- except json.JSONDecodeError:
426
- raise gr.Error("The API did not return an audio file. It may be temporarily down or returned a non-audio response.")
427
-
428
-
429
- return response.content
430
-
431
- except requests.exceptions.RequestException as e:
432
- print(f"Error during audio generation: {e}")
433
- raise gr.Error("Failed to generate audio. The external API may be busy or down. Please try again later.")
434
- except Exception as e:
435
- print(f"Unexpected error during audio generation: {e}")
436
- raise gr.Error("An unexpected error occurred during audio generation.")
437
-
438
-
439
- def openai_text_to_speech_app(prompt: str, voice: str, emotion_style: str):
440
- """
441
- Main Gradio function for OpenAI TTS to handle the text-to-speech conversion process.
442
-
443
- It validates user inputs, calls the audio generation function, saves the
444
- resulting audio to a temporary file, and returns the file path and a
445
- status message to the Gradio interface.
446
-
447
- Args:
448
- prompt: The text input from the user.
449
- voice: The selected voice.
450
- emotion_style: The desired emotional style.
451
-
452
- Returns:
453
- A tuple containing the filepath to the generated audio and a status string.
454
- """
455
- if not prompt:
456
- raise gr.Error("Prompt cannot be empty.")
457
- if not voice:
458
- raise gr.Error("Please select a voice.")
459
- # Allow empty emotion style, default handling is in openai_generate_audio if needed
460
- if not emotion_style:
461
- emotion_style = "neutral"
462
-
463
-
464
- try:
465
- audio_bytes = openai_generate_audio(prompt, voice, emotion_style)
466
-
467
- # Determine audio format from content type if possible, default to mp3
468
- content_type = requests.head(OPENAI_TTS_TEMPLATE).headers.get('content-type', '').lower()
469
- suffix = ".mp3" # Default
470
- if 'wav' in content_type:
471
- suffix = ".wav"
472
- elif 'ogg' in content_type:
473
- suffix = ".ogg"
474
- elif 'aac' in content_type:
475
- suffix = ".aac"
476
-
477
-
478
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_audio_file:
479
- temp_audio_file.write(audio_bytes)
480
- temp_file_path = temp_audio_file.name
481
-
482
- status_message = f"Audio generated successfully with voice '{voice}' and style '{emotion_style}'."
483
- return temp_file_path, status_message
484
-
485
- except gr.Error as e:
486
- return None, str(e)
487
- except Exception as e:
488
- print(f"Unexpected error in main function: {e}")
489
- return None, f"An unexpected error occurred: {e}"
490
-
491
-
492
- with gr.Row():
493
- with gr.Column(scale=2):
494
- openai_prompt_input = gr.Textbox(
495
- label="Prompt",
496
- placeholder="Enter the text you want to convert to speech..."
497
- )
498
- openai_emotion_input = gr.Textbox(
499
- label="Emotion Style",
500
- placeholder="e.g., happy, sad, excited, sarcastic and mocking..."
501
- )
502
- openai_voice_dropdown = gr.Dropdown(
503
- label="Voice",
504
- choices=OPENAI_VOICES,
505
- value="alloy"
506
- )
507
-
508
- openai_submit_button = gr.Button("Generate Audio", variant="primary")
509
-
510
- with gr.Row():
511
- openai_audio_output = gr.Audio(label="Generated Audio", type="filepath")
512
- openai_status_output = gr.Textbox(label="Status", interactive=False)
513
-
514
- openai_submit_button.click(
515
- fn=openai_text_to_speech_app,
516
- inputs=[
517
- openai_prompt_input,
518
- openai_voice_dropdown,
519
- openai_emotion_input,
520
- ],
521
- outputs=[openai_audio_output, openai_status_output],
522
- )
523
-
524
- gr.Examples(
525
- examples=[
526
- ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral"],
527
- ["This technology is absolutely amazing! I can't believe how real it sounds.", "nova", "excited and joyful"],
528
- ["Surely *you* wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking"],
529
- ["[sobbing] I am feeling... [sighs] a bit down today. [cry]", "ballad", "sad and depressed, with stammering"],
530
- ],
531
- inputs=[openai_prompt_input, openai_voice_dropdown, openai_emotion_input],
532
- outputs=[openai_audio_output, openai_status_output],
533
- fn=openai_text_to_speech_app,
534
- cache_examples=False,
535
- )
536
-
537
 
538
  # --- Launch the Gradio app ---
539
  if __name__ == "__main__":
 
3
  import random
4
  import json
5
  import os
 
 
6
  from together import Together
7
 
8
  # --- Environment Variable for Together API Key ---
9
  TOGETHER_API_KEY = os.environ.get('TOGETHER_API_KEY')
10
 
11
  # --- Together API Client Configuration ---
12
+ client = Together(api_key=TOGETHER_API_KEY)
 
 
 
 
 
 
13
 
14
  # --- Pixabay API Configuration ---
15
  PIXABAY_API_KEY = os.environ.get('PIXABAY_API_KEY')
16
  IMAGE_API_URL = 'https://pixabay.com/api/'
17
  VIDEO_API_URL = 'https://pixabay.com/api/videos/'
18
  PER_PAGE = 5
 
 
 
19
 
20
  def image_to_url(image_path):
21
  """
 
213
  # --- Gradio Blocks Interface Definition ---
214
  with gr.Blocks(title="Media Generation and Search Explorer") as demo:
215
  gr.Markdown("## Media Generation and Search Explorer")
216
+ gr.Markdown("Explore royalty-free media from Pixabay and generate/transform images using Together AI.")
217
 
218
  with gr.Tab("Pixabay Search"):
219
  gr.Markdown("Search for royalty-free images and videos on Pixabay.")
 
300
  outputs=together_image_to_image_output,
301
  )
302
 
303
+ with gr.Tab("Together AI - Text to Audio"):
304
  gr.Markdown("Generate audio from text using Together AI's text-to-speech models.")
305
  gr.Warning("This requires setting the TOGETHER_API_KEY environment variable.")
306
+
307
  with gr.Row():
308
  tts_input_text = gr.Textbox(label="Enter text to convert to speech", lines=3)
309
  tts_voice_selection = gr.Dropdown(
 
312
  value="helpful woman"
313
  )
314
  tts_generate_button = gr.Button("Generate Audio")
315
+
316
  tts_audio_output = gr.Audio(label="Generated Audio", interactive=False)
317
+
318
+ def text_to_speech(text: str = "", voice: str = ""):
319
  """
320
  Converts text to speech using Together AI's audio API.
321
+
322
  Args:
323
  text (str): The text to convert to speech
324
  voice (str): The voice to use for speech synthesis. All available voices are: calm lady, meditation lady, storyteller lady, wise lady, teacher lady, wise man, customer support man, tutorial man, helpful woman, customer support lady, asmr lady, pleasant man, professional woman, reading lady, reading man. Default is Helpful Woman.
325
+
326
  Returns:
327
  str: Path to the generated audio file or error message
328
  """
 
330
  return None, "Together AI client not initialized. Please set the TOGETHER_API_KEY environment variable."
331
  if not text:
332
  return None, "Please enter text to convert to speech."
333
+
334
  try:
335
  speech_file_path = "speech.mp3"
336
  response = client.audio.speech.create(
 
341
  response.stream_to_file(speech_file_path)
342
  return speech_file_path
343
  except Exception as e:
344
+ return None, f"Error generating speech: {e}"
345
+
 
346
  tts_generate_button.click(
347
+ fn=text_to_speech,
348
  inputs=[tts_input_text, tts_voice_selection],
349
  outputs=tts_audio_output
350
  )
351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
  # --- Launch the Gradio app ---
354
  if __name__ == "__main__":