Files changed (1) hide show
  1. app.py +62 -116
app.py CHANGED
@@ -9,167 +9,116 @@ import os
9
  NSFW_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
10
  TTS_URL_TEMPLATE = os.getenv("TTS_API_URL_TEMPLATE")
11
 
12
-
13
  if not NSFW_URL_TEMPLATE:
14
- raise ValueError("Missing Secret: NSFW_API_URL_TEMPLATE is not set in Hugging Face Space secrets.")
15
  if not TTS_URL_TEMPLATE:
16
- raise ValueError("Missing Secret: TTS_API_URL_TEMPLATE is not set in Hugging Face Space secrets.")
17
- # VOICES
18
  VOICES = [
19
- "alloy", "echo", "fable", "onyx", "nova", "shimmer", # Standard OpenAI Voices
20
- "coral", "verse", "ballad", "ash", "sage", "amuch", "dan" # Some additional pre-trained
21
  ]
22
 
23
 
24
-
25
  def check_nsfw(prompt: str) -> bool:
26
- global NSFW_URL_TEMPLATE
27
  try:
28
  encoded_prompt = urllib.parse.quote(prompt)
29
  url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt)
30
- print(f"DEBUG: Checking NSFW URL: {url.split('?')[0]}... (query params hidden)")
31
 
32
- response = requests.get(url, timeout=20)
33
  response.raise_for_status()
34
 
35
  result = response.text.strip().upper()
36
- print(f"DEBUG: NSFW Check Response: '{result}'")
37
-
38
- if result == "YES":
39
- return True
40
- elif result == "NO":
41
- return False
42
- else:
43
- print(f"Warning: Unexpected response from NSFW checker: {response.text}")
44
- return True # unexpected responses = potentially NSFW
45
-
46
- except requests.exceptions.RequestException as e:
47
- print(f"Error during NSFW check: {e}")
48
- raise gr.Error(f"Failed to check prompt safety.")
49
  except Exception as e:
50
- print(f"Unexpected error during NSFW check: {e}")
51
- raise gr.Error(f"An unexpected error occurred during safety check. Please wait for a second and try again.")
52
 
53
 
54
  def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes:
55
- # Generates audio using the API from server
56
- global TTS_URL_TEMPLATE
57
  try:
58
- encoded_prompt = urllib.parse.quote(prompt)
59
- encoded_emotion = urllib.parse.quote(emotion)
60
-
61
  url = TTS_URL_TEMPLATE.format(
62
- prompt=encoded_prompt,
63
- emotion=encoded_emotion,
64
  voice=voice,
65
  seed=seed
66
  )
67
- print(f"DEBUG: Generating Audio URL: {url.split('?')[0]}... (query params hidden)")
68
-
69
  response = requests.get(url, timeout=60)
70
  response.raise_for_status()
71
 
72
- content_type = response.headers.get('content-type', '').lower()
73
- if 'audio' not in content_type:
74
- print(f"Warning: Unexpected content type received: {content_type}")
75
- print(f"Response Text: {response.text[:500]}")
76
- raise gr.Error(f"API did not return audio.")
77
 
78
  return response.content
79
-
80
- except requests.exceptions.RequestException as e:
81
- print(f"Error during audio generation: {e}")
82
- error_details = ""
83
- if hasattr(e, 'response') and e.response is not None:
84
- error_details = e.response.text[:200]
85
- raise gr.Error(f"Failed to generate audio. Please wait for a second and try again.")
86
  except Exception as e:
87
- print(f"Unexpected error during audio generation: {e}")
88
- raise gr.Error(f"An unexpected error occurred during audio generation. Please wait for a second and try again.")
89
 
90
 
91
-
92
- def text_to_speech_app(prompt: str, voice: str, emotion: str, use_random_seed: bool, specific_seed: int):
93
-
94
- print("\n\n\n"+prompt+"\n\n\n")
95
  if not prompt:
96
  raise gr.Error("Prompt cannot be empty.")
 
 
97
  if not emotion:
98
  emotion = "neutral"
99
- print("Warning: No emotion provided, defaulting to 'neutral'.")
100
- if not voice:
101
- raise gr.Error("Please select a voice.")
102
 
103
  seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
104
- print(f"Using Seed: {seed}")
105
 
106
- # check NSFW
107
- print("Checking prompt safety...")
108
  try:
109
- is_nsfw = check_nsfw(prompt)
 
110
  except gr.Error as e:
111
- return None, f"There was an error. Please wait for a second and try again."
112
-
113
- if is_nsfw:
114
- print("Prompt flagged as inappropriate.")
115
- return None, "Error: The prompt was flagged as inappropriate and cannot be processed."
116
 
117
- # if not nsfw
118
- print("Prompt is safe. Generating audio...")
119
  try:
120
  audio_bytes = generate_audio(prompt, voice, emotion, seed)
 
 
 
 
 
121
 
122
- # audio save to a temporary file
123
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio_file:
124
- temp_audio_file.write(audio_bytes)
125
- temp_file_path = temp_audio_file.name
126
- print(f"Audio saved temporarily to: {temp_file_path}")
127
 
128
- return temp_file_path, f"Audio generated successfully with voice '{voice}', emotion '{emotion}', and seed {seed}."
 
129
 
130
- except gr.Error as e:
131
- return None, str(e)
132
- except Exception as e:
133
- print(f"Unexpected error in main function: {e}")
134
- return None, f"An unexpected error occurred: {e}"
135
 
 
 
136
 
137
 
 
 
138
 
139
- def toggle_seed_input(use_random_seed):
140
-
141
- return gr.update(visible=not use_random_seed, value=12345)
142
 
143
- with gr.Blocks() as app:
144
- gr.Markdown("# Advanced OpenAI Text-To-Speech Unlimited")
145
- gr.Markdown(
146
- """Enter text, choose a voice and emotion, and generate audio.
147
- The text will be checked for appropriateness before generation.
148
- Use it as much as you want.
149
-
150
-
151
- **Like & follow** for more AI projects:
152
-
153
-
154
- • Instagram: [@nihal_gazi_io](https://www.instagram.com/nihal_gazi_io/)
155
- • Discord: nihal_gazi_io"""
156
- )
157
 
158
  with gr.Row():
159
  with gr.Column(scale=2):
160
- prompt_input = gr.Textbox(label="Prompt", placeholder="Enter the text you want to convert to speech...")
161
- emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, sad, excited, calm...")
162
  voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
163
  with gr.Column(scale=1):
164
  random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
165
  seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)
166
 
167
- submit_button = gr.Button("Generate Audio", variant="primary")
 
168
 
169
  with gr.Row():
170
  audio_output = gr.Audio(label="Generated Audio", type="filepath")
171
- status_output = gr.Textbox(label="Status")
172
-
173
 
174
  random_seed_checkbox.change(
175
  fn=toggle_seed_input,
@@ -178,36 +127,33 @@ with gr.Blocks() as app:
178
  )
179
 
180
  submit_button.click(
 
 
 
 
181
  fn=text_to_speech_app,
182
- inputs=[
183
- prompt_input,
184
- voice_dropdown,
185
- emotion_input,
186
- random_seed_checkbox,
187
- seed_input
188
- ],
189
  outputs=[audio_output, status_output]
 
 
 
 
190
  )
191
 
192
-
193
  gr.Examples(
194
  examples=[
195
- ["Hello there! This is a test of the text-to-speech system.", "alloy", "neutral", False, 12345],
196
- ["Surely *you* wouldn't want *that*. [laughs]", "shimmer", "sarcastic and mocking", True, 12345],
197
- ["[sobbing] I am feeling... [sighs] a bit down today [cry]", "fable", "sad and depressed, with stammering", True, 662437],
198
- ["This technology is absolutely amazing!", "nova", "excited and joyful", True, 12345],
199
  ],
200
  inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input],
201
  outputs=[audio_output, status_output],
202
  fn=text_to_speech_app,
203
- cache_examples=False,
204
  )
205
 
206
-
207
  if __name__ == "__main__":
208
-
209
  if NSFW_URL_TEMPLATE and TTS_URL_TEMPLATE:
210
  app.launch()
211
  else:
212
- print("ERROR: Cannot launch app. Required API URL secrets are missing.")
213
-
 
9
  NSFW_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
10
  TTS_URL_TEMPLATE = os.getenv("TTS_API_URL_TEMPLATE")
11
 
 
12
  if not NSFW_URL_TEMPLATE:
13
+ raise ValueError("Missing Secret: NSFW_API_URL_TEMPLATE is not set.")
14
  if not TTS_URL_TEMPLATE:
15
+ raise ValueError("Missing Secret: TTS_API_URL_TEMPLATE is not set.")
16
+
17
  VOICES = [
18
+ "alloy", "echo", "fable", "onyx", "nova", "shimmer",
19
+ "coral", "verse", "ballad", "ash", "sage", "amuch", "dan"
20
  ]
21
 
22
 
 
23
  def check_nsfw(prompt: str) -> bool:
 
24
  try:
25
  encoded_prompt = urllib.parse.quote(prompt)
26
  url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt)
27
+ print(f"DEBUG: Checking NSFW URL: {url.split('?')[0]}...")
28
 
29
+ response = requests.get(url, timeout=10)
30
  response.raise_for_status()
31
 
32
  result = response.text.strip().upper()
33
+ return result != "NO"
 
 
 
 
 
 
 
 
 
 
 
 
34
  except Exception as e:
35
+ print(f"NSFW check error: {e}")
36
+ raise gr.Error("Safety check failed. Please try again.")
37
 
38
 
39
  def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes:
 
 
40
  try:
 
 
 
41
  url = TTS_URL_TEMPLATE.format(
42
+ prompt=urllib.parse.quote(prompt),
43
+ emotion=urllib.parse.quote(emotion),
44
  voice=voice,
45
  seed=seed
46
  )
47
+ print(f"DEBUG: Audio URL: {url.split('?')[0]}...")
 
48
  response = requests.get(url, timeout=60)
49
  response.raise_for_status()
50
 
51
+ if 'audio' not in response.headers.get("content-type", "").lower():
52
+ raise gr.Error("Invalid response: No audio returned.")
 
 
 
53
 
54
  return response.content
 
 
 
 
 
 
 
55
  except Exception as e:
56
+ print(f"TTS error: {e}")
57
+ raise gr.Error("Audio generation failed. Please try again.")
58
 
59
 
60
+ def text_to_speech_app(prompt, voice, emotion, use_random_seed, specific_seed):
 
 
 
61
  if not prompt:
62
  raise gr.Error("Prompt cannot be empty.")
63
+ if not voice:
64
+ raise gr.Error("Please select a voice.")
65
  if not emotion:
66
  emotion = "neutral"
 
 
 
67
 
68
  seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
69
+ print(f"Seed: {seed}")
70
 
 
 
71
  try:
72
+ if check_nsfw(prompt):
73
+ return None, "⚠️ Prompt flagged as inappropriate."
74
  except gr.Error as e:
75
+ return None, str(e)
 
 
 
 
76
 
 
 
77
  try:
78
  audio_bytes = generate_audio(prompt, voice, emotion, seed)
79
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
80
+ f.write(audio_bytes)
81
+ return f.name, f"✅ Audio generated with voice '{voice}', emotion '{emotion}', seed {seed}."
82
+ except gr.Error as e:
83
+ return None, str(e)
84
 
 
 
 
 
 
85
 
86
+ def toggle_seed_input(use_random_seed):
87
+ return gr.update(visible=not use_random_seed, value=12345)
88
 
 
 
 
 
 
89
 
90
+ def show_loading():
91
+ return gr.update(value="⏳ Generating...", interactive=False), gr.update(interactive=False)
92
 
93
 
94
+ def hide_loading():
95
+ return gr.update(value="", interactive=True), gr.update(interactive=True)
96
 
 
 
 
97
 
98
+ with gr.Blocks(theme=gr.themes.Base()) as app:
99
+ gr.Markdown("""
100
+ # 🎤 Advanced TTS Generator
101
+ Convert your text into expressive speech using multiple voice styles.
102
+ _Safe, fast, and unlimited!_
103
+
104
+ ---
105
+ """)
 
 
 
 
 
 
106
 
107
  with gr.Row():
108
  with gr.Column(scale=2):
109
+ prompt_input = gr.Textbox(label="Prompt", placeholder="Type something...")
110
+ emotion_input = gr.Textbox(label="Emotion Style", placeholder="e.g., happy, calm, angry...")
111
  voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
112
  with gr.Column(scale=1):
113
  random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
114
  seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)
115
 
116
+ submit_button = gr.Button("Generate Audio", variant="primary")
117
+ loading_status = gr.Textbox(visible=False)
118
 
119
  with gr.Row():
120
  audio_output = gr.Audio(label="Generated Audio", type="filepath")
121
+ status_output = gr.Textbox(label="Status", interactive=False)
 
122
 
123
  random_seed_checkbox.change(
124
  fn=toggle_seed_input,
 
127
  )
128
 
129
  submit_button.click(
130
+ fn=show_loading,
131
+ inputs=[],
132
+ outputs=[status_output, submit_button]
133
+ ).then(
134
  fn=text_to_speech_app,
135
+ inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input],
 
 
 
 
 
 
136
  outputs=[audio_output, status_output]
137
+ ).then(
138
+ fn=hide_loading,
139
+ inputs=[],
140
+ outputs=[status_output, submit_button]
141
  )
142
 
 
143
  gr.Examples(
144
  examples=[
145
+ ["Hello! Testing text-to-speech.", "alloy", "neutral", True, 12345],
146
+ ["I'm excited to show you what I can do!", "nova", "excited", True, 12345],
147
+ ["This is surprisingly realistic.", "shimmer", "calm and robotic", False, 56789],
 
148
  ],
149
  inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input],
150
  outputs=[audio_output, status_output],
151
  fn=text_to_speech_app,
152
+ cache_examples=False
153
  )
154
 
 
155
  if __name__ == "__main__":
 
156
  if NSFW_URL_TEMPLATE and TTS_URL_TEMPLATE:
157
  app.launch()
158
  else:
159
+ print("Missing environment variables for API URLs.")