Rausda6 commited on
Commit
2720196
·
verified ·
1 Parent(s): 4c6c365

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +290 -113
app.py CHANGED
@@ -1,59 +1,84 @@
1
  import gradio as gr
2
- import random
3
- import time
4
- import os
5
- import torch
6
- from pathlib import Path
7
- from transformers import AutoTokenizer, AutoModelForCausalLM
8
  import json
9
  import uuid
10
  import edge_tts
11
  import asyncio
12
  import aiofiles
 
 
13
  import mimetypes
14
- from typing import List
15
-
16
- from PyPDF2 import PdfReader
17
- from pydub import AudioSegment
18
 
19
- # Define model name clearly
20
- MODEL_NAME = "unsloth/gemma-3-1b-pt"
 
21
 
22
- # Device setup
23
- device = "cuda" if torch.cuda.is_available() else "cpu"
24
- print(f"Using device: {device}")
25
 
26
- # Load model and tokenizer
27
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
28
  model = AutoModelForCausalLM.from_pretrained(
29
- MODEL_NAME,
30
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
31
- ).eval().to(device)
 
32
 
33
- # Constants
34
- MAX_FILE_SIZE_MB = 20
35
- MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
36
 
37
  class PodcastGenerator:
38
  def __init__(self):
39
  pass
40
 
41
- async def generate_script(self, prompt: str, language: str, api_key: str, file_obj=None, progress=None):
42
  example = """
43
  {
44
  "topic": "AGI",
45
  "podcast": [
46
- {"speaker": 2, "line": "So, AGI, huh? Seems like everyone's talking about it these days."},
47
- {"speaker": 1, "line": "Yeah, it's definitely having a moment, isn't it?"},
48
- {"speaker": 2, "line": "It is and for good reason, right? I mean, you've been digging into this stuff, listening to the podcasts and everything. What really stood out to you? What got you hooked?"},
49
- {"speaker": 1, "line": "It's easy to get lost in the noise, for sure."},
50
- {"speaker": 2, "line": "Exactly. So how about we try to cut through some of that, shall we?"},
51
- {"speaker": 1, "line": "Sounds like a plan."},
52
- {"speaker": 2, "line": "It certainly is and on that note, we'll wrap up this deep dive. Thanks for listening, everyone."},
53
- {"speaker": 1, "line": "Peace."}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  ]
55
  }
56
  """
 
57
  if language == "Auto Detect":
58
  language_instruction = "- The podcast MUST be in the same language as the user input."
59
  else:
@@ -71,7 +96,7 @@ You are a professional podcast generator. Your task is to generate a professiona
71
  Follow this example structure:
72
  {example}
73
  """
74
- # Build the user prompt
75
  if prompt and file_obj:
76
  user_prompt = f"Please generate a podcast script based on the uploaded file following user input:\n{prompt}"
77
  elif prompt:
@@ -79,114 +104,186 @@ Follow this example structure:
79
  else:
80
  user_prompt = "Please generate a podcast script based on the uploaded file."
81
 
82
- # Handle file content
83
  if file_obj:
84
- file_size = getattr(file_obj, 'size', os.path.getsize(file_obj.name))
85
- if file_size > MAX_FILE_SIZE_BYTES:
86
- raise Exception(f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file.")
87
- ext = os.path.splitext(file_obj.name)[1].lower()
88
- if ext == '.pdf':
89
- reader = PdfReader(file_obj)
90
- text = "\n\n".join(page.extract_text() or '' for page in reader.pages)
91
- else:
92
- raw = file_obj.read() if hasattr(file_obj, 'read') else await aiofiles.open(file_obj.name, 'rb').read()
93
- text = raw.decode(errors='ignore')
94
- user_prompt += f"\n\n―― FILE CONTENT ――\n{text}"
95
-
96
- prompt_text = system_prompt + "\n" + user_prompt
 
 
 
 
97
  try:
98
  if progress:
99
  progress(0.3, "Generating podcast script...")
100
- def hf_generate(p):
101
- inputs = tokenizer(p, return_tensors="pt").to(model.device)
102
- outs = model.generate(
103
- **inputs,
104
- max_new_tokens=1024,
105
- do_sample=True,
106
- temperature=1.0
107
- )
108
- return tokenizer.decode(outs[0], skip_special_tokens=True)
109
- generated_text = await asyncio.wait_for(
110
- asyncio.to_thread(hf_generate, prompt_text),
111
- timeout=60
112
- )
113
- except asyncio.TimeoutError:
114
- raise Exception("The script generation request timed out. Please try again later.")
115
  except Exception as e:
116
  raise Exception(f"Failed to generate podcast script: {e}")
117
 
 
 
118
  if progress:
119
  progress(0.4, "Script generated successfully!")
120
 
121
- return json.loads(generated_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
124
  voice = speaker1 if speaker == 1 else speaker2
125
  speech = edge_tts.Communicate(text, voice)
 
126
  temp_filename = f"temp_{uuid.uuid4()}.wav"
127
  try:
128
- await asyncio.wait_for(speech.save(temp_filename), timeout=30)
 
129
  return temp_filename
130
  except asyncio.TimeoutError:
131
- if os.path.exists(temp_filename): os.remove(temp_filename)
 
132
  raise Exception("Text-to-speech generation timed out. Please try with a shorter text.")
133
  except Exception as e:
134
- if os.path.exists(temp_filename): os.remove(temp_filename)
 
135
  raise e
136
 
137
  async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
138
- if progress: progress(0.9, "Combining audio files...")
 
 
139
  combined_audio = AudioSegment.empty()
140
  for audio_file in audio_files:
141
  combined_audio += AudioSegment.from_file(audio_file)
142
- os.remove(audio_file)
 
143
  output_filename = f"output_{uuid.uuid4()}.wav"
144
  combined_audio.export(output_filename, format="wav")
145
- if progress: progress(1.0, "Podcast generated successfully!")
 
 
 
146
  return output_filename
147
 
148
  async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
149
  try:
150
- if progress: progress(0.1, "Starting podcast generation...")
 
 
 
151
  return await asyncio.wait_for(
152
  self._generate_podcast_internal(input_text, language, speaker1, speaker2, api_key, file_obj, progress),
153
- timeout=600
154
  )
155
  except asyncio.TimeoutError:
156
  raise Exception("The podcast generation process timed out. Please try with shorter text or try again later.")
157
  except Exception as e:
158
  raise Exception(f"Error generating podcast: {str(e)}")
159
-
160
  async def _generate_podcast_internal(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
161
- if progress: progress(0.2, "Generating podcast script...")
 
 
162
  podcast_json = await self.generate_script(input_text, language, api_key, file_obj, progress)
163
- if progress: progress(0.5, "Converting text to speech...")
 
 
 
 
164
  audio_files = []
165
  total_lines = len(podcast_json['podcast'])
166
- batch_size = 10
 
 
 
 
167
  for batch_start in range(0, total_lines, batch_size):
168
  batch_end = min(batch_start + batch_size, total_lines)
169
  batch = podcast_json['podcast'][batch_start:batch_end]
170
- tts_tasks = [self.tts_generate(item['line'], item['speaker'], speaker1, speaker2) for item in batch]
 
 
 
 
 
 
171
  try:
 
172
  batch_results = await asyncio.gather(*tts_tasks, return_exceptions=True)
173
- for result in batch_results:
 
 
174
  if isinstance(result, Exception):
 
175
  for file in audio_files:
176
- if os.path.exists(file): os.remove(file)
 
177
  raise Exception(f"Error generating speech: {str(result)}")
178
- audio_files.append(result)
 
 
 
179
  if progress:
180
- progress(0.5 + (0.4 * (batch_end / total_lines)), f"Processed {batch_end}/{total_lines} speech segments...")
 
 
181
  except Exception as e:
 
182
  for file in audio_files:
183
- if os.path.exists(file): os.remove(file)
 
184
  raise Exception(f"Error in batch TTS generation: {str(e)}")
185
- combined = await self.combine_audio_files(audio_files, progress)
186
- return combined
 
187
 
188
  async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, api_key: str = "", progress=None) -> str:
189
  start_time = time.time()
 
190
  voice_names = {
191
  "Andrew - English (United States)": "en-US-AndrewMultilingualNeural",
192
  "Ava - English (United States)": "en-US-AvaMultilingualNeural",
@@ -197,44 +294,124 @@ async def process_input(input_text: str, input_file, language: str, speaker1: st
197
  "Remy - French (France)": "fr-FR-RemyMultilingualNeural",
198
  "Vivienne - French (France)": "fr-FR-VivienneMultilingualNeural"
199
  }
 
200
  speaker1 = voice_names[speaker1]
201
  speaker2 = voice_names[speaker2]
 
202
  try:
203
- if progress: progress(0.05, "Processing input...")
 
 
204
  if not api_key:
205
- api_key = "saf"
206
  if not api_key:
207
  raise Exception("No API key provided. Please provide a Gemini API key.")
208
- generator = PodcastGenerator()
209
- output = await generator.generate_podcast(input_text, lan
210
- guage, speaker1, speaker2, api_key, input_file, progress)
211
- print(f"Total podcast generation time: {time.time() - start_time:.2f} seconds")
212
- return output
 
 
 
213
  except Exception as e:
214
- msg = str(e)
215
- if "rate limit" in msg.lower():
 
216
  raise Exception("Rate limit exceeded. Please try again later or use your own API key.")
217
- elif "timeout" in msg.lower():
218
- raise Exception("The request timed out... Please try with shorter text.")
219
  else:
220
- raise Exception(f"Error: {msg}")
 
221
  # Gradio UI
222
- with gr.Blocks(title="PodcastGen 🎙️") as demo:
223
- gr.Markdown("""
224
- # PodcastGen 🎙️
225
- Generate a 2-speaker podcast from text or PDF!
226
- """ )
227
- with gr.Row():
228
- with gr.Column():
229
- input_text = gr.Textbox(label="Input Text", lines=10, placeholder="Enter podcast topic or paste text here...", elem_id="input_text")
230
- input_file = gr.File(label="Or Upload a PDF or TXT file", file_types=[".pdf", ".txt"] )
231
- with gr.Column():
232
- language = gr.Dropdown(label="Podcast Language", choices=["Auto Detect","English","German","French","Spanish","Italian","Dutch","Portuguese","Russian","Chinese","Japanese","Korean","Other" ], value="Auto Detect")
233
- speaker1 = gr.Dropdown(label="Speaker 1 Voice", choices=["Andrew - English (United States)","Ava - English (United States)","Brian - English (United States)","Emma - English (United States)","Florian - German (Germany)","Seraphina - German (Germany)","Remy - French (France)","Vivienne - French (France)" ], value="Andrew - English (United States)")
234
- speaker2 = gr.Dropdown(label="Speaker 2 Voice", choices=["Andrew - English (United States)","Ava - English (United States)","Brian - English (United States)","Emma - English (United States)","Florian - German (Germany)","Seraphina - German (Germany)","Remy - French (France)","Vivienne - French (France)" ], value="Ava - English (United States)")
235
- api_key = gr.Textbox(label="Gemini API Key (Optional)", type="password", placeholder="Needed only if you're getting rate limited.")
236
- generate_btn = gr.Button("Generate Podcast 🎙️", variant="primary")
237
- output_audio = gr.Audio(label="Generated Podcast", type="filepath", format="wav", elem_id="output_audio")
238
- generate_btn.click(fn=process_input, inputs=[input_text, input_file, language, speaker1, speaker2, api_key], outputs=output_audio, show_progress=True)
239
- demo.queue()
240
- demo.launch(server_name="0.0.0.0", share=True, debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from pydub import AudioSegment
 
 
 
 
 
3
  import json
4
  import uuid
5
  import edge_tts
6
  import asyncio
7
  import aiofiles
8
+ import os
9
+ import time
10
  import mimetypes
11
+ import torch
12
+ from typing import List, Dict
13
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
14
 
15
+ # Constants
16
+ MAX_FILE_SIZE_MB = 20
17
+ MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024 # Convert MB to bytes
18
 
19
+ MODEL_ID = "HuggingFaceH4/zephyr-7b-alpha"
 
 
20
 
21
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 
22
  model = AutoModelForCausalLM.from_pretrained(
23
+ MODEL_ID,
24
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
25
+ device_map="auto"
26
+ ).eval()
27
 
 
 
 
28
 
29
  class PodcastGenerator:
30
  def __init__(self):
31
  pass
32
 
33
+ async def generate_script(self, prompt: str, language: str, api_key: str, file_obj=None, progress=None) -> Dict:
34
  example = """
35
  {
36
  "topic": "AGI",
37
  "podcast": [
38
+ {
39
+ "speaker": 2,
40
+ "line": "So, AGI, huh? Seems like everyone's talking about it these days."
41
+ },
42
+ {
43
+ "speaker": 1,
44
+ "line": "Yeah, it's definitely having a moment, isn't it?"
45
+ },
46
+ {
47
+ "speaker": 2,
48
+ "line": "It is and for good reason, right? I mean, you've been digging into this stuff, listening to the podcasts and everything. What really stood out to you? What got you hooked?"
49
+ },
50
+ {
51
+ "speaker": 1,
52
+ "line": "I like that. It really is."
53
+ },
54
+ {
55
+ "speaker": 2,
56
+ "line": "And honestly, that's a responsibility that extends beyond just the researchers and the policymakers."
57
+ },
58
+ {
59
+ "speaker": 1,
60
+ "line": "100%"
61
+ },
62
+ {
63
+ "speaker": 2,
64
+ "line": "So to everyone listening out there I'll leave you with this. As AGI continues to develop, what role do you want to play in shaping its future?"
65
+ },
66
+ {
67
+ "speaker": 1,
68
+ "line": "That's a question worth pondering."
69
+ },
70
+ {
71
+ "speaker": 2,
72
+ "line": "It certainly is and on that note, we'll wrap up this deep dive. Thanks for listening, everyone."
73
+ },
74
+ {
75
+ "speaker": 1,
76
+ "line": "Peace."
77
+ }
78
  ]
79
  }
80
  """
81
+
82
  if language == "Auto Detect":
83
  language_instruction = "- The podcast MUST be in the same language as the user input."
84
  else:
 
96
  Follow this example structure:
97
  {example}
98
  """
99
+ # Construct system and user prompt
100
  if prompt and file_obj:
101
  user_prompt = f"Please generate a podcast script based on the uploaded file following user input:\n{prompt}"
102
  elif prompt:
 
104
  else:
105
  user_prompt = "Please generate a podcast script based on the uploaded file."
106
 
107
+ # NOTE: file_obj cannot be passed to a text-only LLM
108
  if file_obj:
109
+ print("Warning: Uploaded file is ignored in this version because external LLM does not support file input.")
110
+
111
+ # Build prompt
112
+ full_prompt = f"""{system_prompt}
113
+
114
+ {user_prompt}
115
+
116
+ Return the result strictly as a JSON object in the format:
117
+ {{
118
+ "topic": "{prompt}",
119
+ "podcast": [
120
+ {{ "speaker": 1, "line": "..." }},
121
+ {{ "speaker": 2, "line": "..." }}
122
+ ]
123
+ }}
124
+ """
125
+
126
  try:
127
  if progress:
128
  progress(0.3, "Generating podcast script...")
129
+
130
+ inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
131
+ output = model.generate(**inputs, max_new_tokens=1024)
132
+ text = tokenizer.decode(output[0], skip_special_tokens=True)
133
+
 
 
 
 
 
 
 
 
 
 
134
  except Exception as e:
135
  raise Exception(f"Failed to generate podcast script: {e}")
136
 
137
+ print(f"Generated podcast script:\n{text}")
138
+
139
  if progress:
140
  progress(0.4, "Script generated successfully!")
141
 
142
+ try:
143
+ return json.loads(text)
144
+ except json.JSONDecodeError:
145
+ raise Exception("The model did not return valid JSON. Please refine the prompt.")
146
+
147
+
148
+ async def _read_file_bytes(self, file_obj) -> bytes:
149
+ """Read file bytes from a file object"""
150
+ # Check file size before reading
151
+ if hasattr(file_obj, 'size'):
152
+ file_size = file_obj.size
153
+ else:
154
+ file_size = os.path.getsize(file_obj.name)
155
+
156
+ if file_size > MAX_FILE_SIZE_BYTES:
157
+ raise Exception(f"File size exceeds the {MAX_FILE_SIZE_MB}MB limit. Please upload a smaller file.")
158
+
159
+ if hasattr(file_obj, 'read'):
160
+ return file_obj.read()
161
+ else:
162
+ async with aiofiles.open(file_obj.name, 'rb') as f:
163
+ return await f.read()
164
+
165
+ def _get_mime_type(self, filename: str) -> str:
166
+ """Determine MIME type based on file extension"""
167
+ ext = os.path.splitext(filename)[1].lower()
168
+ if ext == '.pdf':
169
+ return "application/pdf"
170
+ elif ext == '.txt':
171
+ return "text/plain"
172
+ else:
173
+ # Fallback to the default mime type detector
174
+ mime_type, _ = mimetypes.guess_type(filename)
175
+ return mime_type or "application/octet-stream"
176
 
177
  async def tts_generate(self, text: str, speaker: int, speaker1: str, speaker2: str) -> str:
178
  voice = speaker1 if speaker == 1 else speaker2
179
  speech = edge_tts.Communicate(text, voice)
180
+
181
  temp_filename = f"temp_{uuid.uuid4()}.wav"
182
  try:
183
+ # Add timeout to TTS generation
184
+ await asyncio.wait_for(speech.save(temp_filename), timeout=30) # 30 seconds timeout
185
  return temp_filename
186
  except asyncio.TimeoutError:
187
+ if os.path.exists(temp_filename):
188
+ os.remove(temp_filename)
189
  raise Exception("Text-to-speech generation timed out. Please try with a shorter text.")
190
  except Exception as e:
191
+ if os.path.exists(temp_filename):
192
+ os.remove(temp_filename)
193
  raise e
194
 
195
  async def combine_audio_files(self, audio_files: List[str], progress=None) -> str:
196
+ if progress:
197
+ progress(0.9, "Combining audio files...")
198
+
199
  combined_audio = AudioSegment.empty()
200
  for audio_file in audio_files:
201
  combined_audio += AudioSegment.from_file(audio_file)
202
+ os.remove(audio_file) # Clean up temporary files
203
+
204
  output_filename = f"output_{uuid.uuid4()}.wav"
205
  combined_audio.export(output_filename, format="wav")
206
+
207
+ if progress:
208
+ progress(1.0, "Podcast generated successfully!")
209
+
210
  return output_filename
211
 
212
  async def generate_podcast(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
213
  try:
214
+ if progress:
215
+ progress(0.1, "Starting podcast generation...")
216
+
217
+ # Set overall timeout for the entire process
218
  return await asyncio.wait_for(
219
  self._generate_podcast_internal(input_text, language, speaker1, speaker2, api_key, file_obj, progress),
220
+ timeout=600 # 10 minutes total timeout
221
  )
222
  except asyncio.TimeoutError:
223
  raise Exception("The podcast generation process timed out. Please try with shorter text or try again later.")
224
  except Exception as e:
225
  raise Exception(f"Error generating podcast: {str(e)}")
226
+
227
  async def _generate_podcast_internal(self, input_text: str, language: str, speaker1: str, speaker2: str, api_key: str, file_obj=None, progress=None) -> str:
228
+ if progress:
229
+ progress(0.2, "Generating podcast script...")
230
+
231
  podcast_json = await self.generate_script(input_text, language, api_key, file_obj, progress)
232
+
233
+ if progress:
234
+ progress(0.5, "Converting text to speech...")
235
+
236
+ # Process TTS in batches for concurrent processing
237
  audio_files = []
238
  total_lines = len(podcast_json['podcast'])
239
+
240
+ # Define batch size to control concurrency
241
+ batch_size = 10 # Adjust based on system resources
242
+
243
+ # Process in batches
244
  for batch_start in range(0, total_lines, batch_size):
245
  batch_end = min(batch_start + batch_size, total_lines)
246
  batch = podcast_json['podcast'][batch_start:batch_end]
247
+
248
+ # Create tasks for concurrent processing
249
+ tts_tasks = []
250
+ for item in batch:
251
+ tts_task = self.tts_generate(item['line'], item['speaker'], speaker1, speaker2)
252
+ tts_tasks.append(tts_task)
253
+
254
  try:
255
+ # Process batch concurrently
256
  batch_results = await asyncio.gather(*tts_tasks, return_exceptions=True)
257
+
258
+ # Check for exceptions and handle results
259
+ for i, result in enumerate(batch_results):
260
  if isinstance(result, Exception):
261
+ # Clean up any files already created
262
  for file in audio_files:
263
+ if os.path.exists(file):
264
+ os.remove(file)
265
  raise Exception(f"Error generating speech: {str(result)}")
266
+ else:
267
+ audio_files.append(result)
268
+
269
+ # Update progress
270
  if progress:
271
+ current_progress = 0.5 + (0.4 * (batch_end / total_lines))
272
+ progress(current_progress, f"Processed {batch_end}/{total_lines} speech segments...")
273
+
274
  except Exception as e:
275
+ # Clean up any files already created
276
  for file in audio_files:
277
+ if os.path.exists(file):
278
+ os.remove(file)
279
  raise Exception(f"Error in batch TTS generation: {str(e)}")
280
+
281
+ combined_audio = await self.combine_audio_files(audio_files, progress)
282
+ return combined_audio
283
 
284
  async def process_input(input_text: str, input_file, language: str, speaker1: str, speaker2: str, api_key: str = "", progress=None) -> str:
285
  start_time = time.time()
286
+
287
  voice_names = {
288
  "Andrew - English (United States)": "en-US-AndrewMultilingualNeural",
289
  "Ava - English (United States)": "en-US-AvaMultilingualNeural",
 
294
  "Remy - French (France)": "fr-FR-RemyMultilingualNeural",
295
  "Vivienne - French (France)": "fr-FR-VivienneMultilingualNeural"
296
  }
297
+
298
  speaker1 = voice_names[speaker1]
299
  speaker2 = voice_names[speaker2]
300
+
301
  try:
302
+ if progress:
303
+ progress(0.05, "Processing input...")
304
+
305
  if not api_key:
306
+ api_key = os.getenv("GENAI_API_KEY")
307
  if not api_key:
308
  raise Exception("No API key provided. Please provide a Gemini API key.")
309
+
310
+ podcast_generator = PodcastGenerator()
311
+ podcast = await podcast_generator.generate_podcast(input_text, language, speaker1, speaker2, api_key, input_file, progress)
312
+
313
+ end_time = time.time()
314
+ print(f"Total podcast generation time: {end_time - start_time:.2f} seconds")
315
+ return podcast
316
+
317
  except Exception as e:
318
+ # Ensure we show a user-friendly error
319
+ error_msg = str(e)
320
+ if "rate limit" in error_msg.lower():
321
  raise Exception("Rate limit exceeded. Please try again later or use your own API key.")
322
+ elif "timeout" in error_msg.lower():
323
+ raise Exception("The request timed out. This could be due to server load or the length of your input. Please try again with shorter text.")
324
  else:
325
+ raise Exception(f"Error: {error_msg}")
326
+
327
  # Gradio UI
328
+ def generate_podcast_gradio(input_text, input_file, language, speaker1, speaker2, api_key, progress=gr.Progress()):
329
+ # Handle the file if uploaded
330
+ file_obj = None
331
+ if input_file is not None:
332
+ file_obj = input_file
333
+
334
+ # Use the progress function from Gradio
335
+ def progress_callback(value, text):
336
+ progress(value, text)
337
+
338
+ # Run the async function in the event loop
339
+ result = asyncio.run(process_input(
340
+ input_text,
341
+ file_obj,
342
+ language,
343
+ speaker1,
344
+ speaker2,
345
+ api_key,
346
+ progress_callback
347
+ ))
348
+
349
+ return result
350
+
351
+ def main():
352
+ # Define language options
353
+ language_options = [
354
+ "Auto Detect",
355
+ "Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Azerbaijani",
356
+ "Bahasa Indonesian", "Bangla", "Basque", "Bengali", "Bosnian", "Bulgarian",
357
+ "Burmese", "Catalan", "Chinese Cantonese", "Chinese Mandarin",
358
+ "Chinese Taiwanese", "Croatian", "Czech", "Danish", "Dutch", "English",
359
+ "Estonian", "Filipino", "Finnish", "French", "Galician", "Georgian",
360
+ "German", "Greek", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Irish",
361
+ "Italian", "Japanese", "Javanese", "Kannada", "Kazakh", "Khmer", "Korean",
362
+ "Lao", "Latvian", "Lithuanian", "Macedonian", "Malay", "Malayalam",
363
+ "Maltese", "Mongolian", "Nepali", "Norwegian Bokmål", "Pashto", "Persian",
364
+ "Polish", "Portuguese", "Romanian", "Russian", "Serbian", "Sinhala",
365
+ "Slovak", "Slovene", "Somali", "Spanish", "Sundanese", "Swahili",
366
+ "Swedish", "Tamil", "Telugu", "Thai", "Turkish", "Ukrainian", "Urdu",
367
+ "Uzbek", "Vietnamese", "Welsh", "Zulu"
368
+ ]
369
+
370
+ # Define voice options
371
+ voice_options = [
372
+ "Andrew - English (United States)",
373
+ "Ava - English (United States)",
374
+ "Brian - English (United States)",
375
+ "Emma - English (United States)",
376
+ "Florian - German (Germany)",
377
+ "Seraphina - German (Germany)",
378
+ "Remy - French (France)",
379
+ "Vivienne - French (France)"
380
+ ]
381
+
382
+ # Create Gradio interface
383
+ with gr.Blocks(title="PodcastGen 🎙️") as demo:
384
+ gr.Markdown("# PodcastGen 🎙️")
385
+ gr.Markdown("Generate a 2-speaker podcast from text input or documents!")
386
+
387
+ with gr.Row():
388
+ with gr.Column(scale=2):
389
+ input_text = gr.Textbox(label="Input Text", lines=10, placeholder="Enter text for podcast generation...")
390
+
391
+ with gr.Column(scale=1):
392
+ input_file = gr.File(label="Or Upload a PDF or TXT file", file_types=[".pdf", ".txt"])
393
+
394
+ with gr.Row():
395
+ with gr.Column():
396
+ api_key = gr.Textbox(label="Your Gemini API Key (Optional)", placeholder="Enter API key here if you're getting rate limited", type="password")
397
+ language = gr.Dropdown(label="Language", choices=language_options, value="Auto Detect")
398
+
399
+ with gr.Column():
400
+ speaker1 = gr.Dropdown(label="Speaker 1 Voice", choices=voice_options, value="Andrew - English (United States)")
401
+ speaker2 = gr.Dropdown(label="Speaker 2 Voice", choices=voice_options, value="Ava - English (United States)")
402
+
403
+ generate_btn = gr.Button("Generate Podcast", variant="primary")
404
+
405
+ with gr.Row():
406
+ output_audio = gr.Audio(label="Generated Podcast", type="filepath", format="wav")
407
+
408
+ generate_btn.click(
409
+ fn=generate_podcast_gradio,
410
+ inputs=[input_text, input_file, language, speaker1, speaker2, api_key],
411
+ outputs=[output_audio]
412
+ )
413
+
414
+ demo.launch()
415
+
416
+ if __name__ == "__main__":
417
+ main()