Athspi commited on
Commit
86417c2
·
verified ·
1 Parent(s): 9421446

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -66
app.py CHANGED
@@ -1,9 +1,14 @@
1
  import os
2
  import google.generativeai as genai
3
  from moviepy.video.io.VideoFileClip import VideoFileClip
 
 
 
4
  import tempfile
5
  import logging
6
  import gradio as gr
 
 
7
 
8
  # Suppress moviepy logs
9
  logging.getLogger("moviepy").setLevel(logging.ERROR)
@@ -13,7 +18,7 @@ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
13
 
14
  # Create the Gemini model
15
  generation_config = {
16
- "temperature": 0.7, # Lower temperature for more accurate results
17
  "top_p": 0.9,
18
  "top_k": 40,
19
  "max_output_tokens": 8192,
@@ -27,55 +32,80 @@ model = genai.GenerativeModel(
27
 
28
  # List of all supported languages
29
  SUPPORTED_LANGUAGES = [
30
- "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
31
- "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
32
- "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
33
- "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
34
- "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
35
- "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
36
- "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
37
- "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
38
- "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
39
- "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
40
- "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
41
- "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
42
- "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
43
- "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
44
- "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
45
  "Sundanese"
46
  ]
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def extract_audio_from_video(video_file):
49
  """Extract audio from a video file and save it as a WAV file."""
50
  video = VideoFileClip(video_file)
51
  audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
52
- video.audio.write_audiofile(audio_file, fps=16000, logger=None) # Suppress logs
53
  return audio_file
54
 
55
  def transcribe_audio_with_gemini(audio_file):
56
  """Transcribe audio using Gemini with a magic prompt for accurate timestamps."""
57
  with open(audio_file, "rb") as f:
58
  audio_data = f.read()
59
-
60
- # Create proper audio blob
61
  audio_blob = {
62
  'mime_type': 'audio/wav',
63
  'data': audio_data
64
  }
65
 
66
- # Magic prompt for transcription with timestamps
67
  prompt = """
68
  You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language.
69
  Include timestamps for each sentence in the following format:
70
  [HH:MM:SS] Sentence 1
71
  [HH:MM:SS] Sentence 2
72
- don't change any in format
73
  ...
74
  Ensure the timestamps are accurate and correspond to the start of each sentence.
75
  Respond only with the transcription and timestamps. Do not add explanations or extra text.
76
  """
77
-
78
- # Transcribe audio
79
  convo = model.start_chat()
80
  convo.send_message(prompt)
81
  response = convo.send_message(audio_blob)
@@ -84,13 +114,12 @@ def transcribe_audio_with_gemini(audio_file):
84
  def generate_subtitles(transcription):
85
  """Generate SRT subtitles from transcription with timestamps."""
86
  lines = transcription.split("\n")
87
- srt_subtitles = ""
88
 
89
  for i, line in enumerate(lines, start=1):
90
  if not line.strip():
91
  continue
92
 
93
- # Extract timestamp and text
94
  if line.startswith("["):
95
  timestamp = line.split("]")[0] + "]"
96
  text = line.split("]")[1].strip()
@@ -98,15 +127,19 @@ def generate_subtitles(transcription):
98
  timestamp = "[00:00:00]"
99
  text = line.strip()
100
 
101
- # Convert timestamp to SRT format
102
- start_time = timestamp[1:-1] # Remove brackets
103
  start_seconds = time_to_seconds(start_time)
104
- end_seconds = start_seconds + 5 # Placeholder: 5 seconds per line
105
- end_time = seconds_to_time(end_seconds)
106
 
107
- srt_subtitles += f"{i}\n{start_time},000 --> {end_time},000\n{text}\n\n"
 
 
 
 
 
 
108
 
109
- return srt_subtitles
110
 
111
  def time_to_seconds(time_str):
112
  """Convert HH:MM:SS to seconds."""
@@ -121,23 +154,56 @@ def seconds_to_time(seconds):
121
  return f"{hh:02}:{mm:02}:{ss:02}"
122
 
123
  def translate_srt(srt_text, target_language):
124
- """Translate an SRT file while preserving timestamps using a magic prompt."""
125
- # Magic prompt for translation
126
  prompt = f"""
127
- Translate the following SRT subtitles into {target_language}.
128
- Preserve the SRT format (timestamps and structure).
129
- Translate only the text after the timestamp.
130
  Do not add explanations or extra text.
131
  Ensure the translation is accurate and culturally appropriate.
132
  Here is the SRT file:
133
  {srt_text}
134
  """
135
-
136
  response = model.generate_content(prompt)
137
  return response.text
138
 
139
- def process_video(video_file, language="Auto Detect", translate_to=None):
140
- """Process a video file to generate and translate subtitles."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  # Extract audio from the video
142
  audio_file = extract_audio_from_video(video_file)
143
 
@@ -147,12 +213,12 @@ def process_video(video_file, language="Auto Detect", translate_to=None):
147
  # Generate subtitles
148
  subtitles = generate_subtitles(transcription)
149
 
150
- # Save original subtitles to an SRT file
151
  original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
152
  with open(original_srt_file, "w", encoding="utf-8") as f:
153
  f.write(subtitles)
154
 
155
- # Translate subtitles if a target language is provided
156
  translated_srt_file = None
157
  if translate_to and translate_to != "None":
158
  translated_subtitles = translate_srt(subtitles, translate_to)
@@ -160,48 +226,88 @@ def process_video(video_file, language="Auto Detect", translate_to=None):
160
  with open(translated_srt_file, "w", encoding="utf-8") as f:
161
  f.write(translated_subtitles)
162
 
163
- # Clean up extracted audio file
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  os.remove(audio_file)
165
 
166
- return original_srt_file, translated_srt_file, "Detected Language: Auto"
167
 
168
  # Define the Gradio interface
169
- with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
170
  # Header
171
  with gr.Column():
172
- gr.Markdown("# 🎥 AutoSubGen")
173
- gr.Markdown("### AI-Powered Video Subtitle Generator")
174
- gr.Markdown("Automatically generate and translate subtitles for your videos in **SRT format**. Supports **100+ languages** and **auto-detection**.")
175
 
176
  # Main content
177
  with gr.Tab("Generate Subtitles"):
178
- gr.Markdown("### Upload a video file to generate subtitles.")
179
  with gr.Row():
180
  video_input = gr.Video(label="Upload Video File", scale=2)
181
- language_dropdown = gr.Dropdown(
182
- choices=SUPPORTED_LANGUAGES,
183
- label="Select Language",
184
- value="Auto Detect",
185
- scale=1
186
- )
187
- translate_to_dropdown = gr.Dropdown(
188
- choices=["None"] + SUPPORTED_LANGUAGES[1:], # Exclude "Auto Detect"
189
- label="Translate To",
190
- value="None",
191
- scale=1
192
- )
193
- generate_button = gr.Button("Generate Subtitles", variant="primary")
 
 
 
194
  with gr.Row():
195
- original_subtitle_output = gr.File(label="Download Original Subtitles (SRT)")
196
- translated_subtitle_output = gr.File(label="Download Translated Subtitles (SRT)")
 
 
 
 
 
197
  detected_language_output = gr.Textbox(label="Detected Language")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  # Link button to function
200
  generate_button.click(
201
  process_video,
202
- inputs=[video_input, language_dropdown, translate_to_dropdown],
203
- outputs=[original_subtitle_output, translated_subtitle_output, detected_language_output]
204
  )
205
 
206
- # Launch the Gradio interface with a public link
207
  demo.launch(share=True)
 
1
  import os
2
  import google.generativeai as genai
3
  from moviepy.video.io.VideoFileClip import VideoFileClip
4
+ from moviepy.audio.io.AudioFileClip import AudioFileClip
5
+ from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
6
+ from moviepy.video.tools.subtitles import SubtitlesClip
7
  import tempfile
8
  import logging
9
  import gradio as gr
10
+ from gtts import gTTS
11
+ import srt
12
 
13
  # Suppress moviepy logs
14
  logging.getLogger("moviepy").setLevel(logging.ERROR)
 
18
 
19
  # Create the Gemini model
20
  generation_config = {
21
+ "temperature": 0.7,
22
  "top_p": 0.9,
23
  "top_k": 40,
24
  "max_output_tokens": 8192,
 
32
 
33
  # List of all supported languages
34
  SUPPORTED_LANGUAGES = [
35
+ "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
36
+ "French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan", "Dutch",
37
+ "Arabic", "Swedish", "Italian", "Indonesian", "Hindi", "Finnish", "Vietnamese",
38
+ "Hebrew", "Ukrainian", "Greek", "Malay", "Czech", "Romanian", "Danish",
39
+ "Hungarian", "Tamil", "Norwegian", "Thai", "Urdu", "Croatian", "Bulgarian",
40
+ "Lithuanian", "Latin", "Maori", "Malayalam", "Welsh", "Slovak", "Telugu",
41
+ "Persian", "Latvian", "Bengali", "Serbian", "Azerbaijani", "Slovenian",
42
+ "Kannada", "Estonian", "Macedonian", "Breton", "Basque", "Icelandic",
43
+ "Armenian", "Nepali", "Mongolian", "Bosnian", "Kazakh", "Albanian",
44
+ "Swahili", "Galician", "Marathi", "Punjabi", "Sinhala", "Khmer", "Shona",
45
+ "Yoruba", "Somali", "Afrikaans", "Occitan", "Georgian", "Belarusian",
46
+ "Tajik", "Sindhi", "Gujarati", "Amharic", "Yiddish", "Lao", "Uzbek",
47
+ "Faroese", "Haitian Creole", "Pashto", "Turkmen", "Nynorsk", "Maltese",
48
+ "Sanskrit", "Luxembourgish", "Burmese", "Tibetan", "Tagalog", "Malagasy",
49
+ "Assamese", "Tatar", "Hawaiian", "Lingala", "Hausa", "Bashkir", "Javanese",
50
  "Sundanese"
51
  ]
52
 
53
+ # Language code mapping for gTTS
54
+ LANGUAGE_CODES = {
55
+ "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es",
56
+ "Russian": "ru", "Korean": "ko", "French": "fr", "Japanese": "ja",
57
+ "Portuguese": "pt", "Turkish": "tr", "Polish": "pl", "Catalan": "ca",
58
+ "Dutch": "nl", "Arabic": "ar", "Swedish": "sv", "Italian": "it",
59
+ "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
60
+ "Hebrew": "he", "Ukrainian": "uk", "Greek": "el", "Malay": "ms",
61
+ "Czech": "cs", "Romanian": "ro", "Danish": "da", "Hungarian": "hu",
62
+ "Tamil": "ta", "Norwegian": "no", "Thai": "th", "Urdu": "ur",
63
+ "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt", "Latin": "la",
64
+ "Maori": "mi", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk",
65
+ "Telugu": "te", "Persian": "fa", "Latvian": "lv", "Bengali": "bn",
66
+ "Serbian": "sr", "Azerbaijani": "az", "Slovenian": "sl", "Kannada": "kn",
67
+ "Estonian": "et", "Macedonian": "mk", "Breton": "br", "Basque": "eu",
68
+ "Icelandic": "is", "Armenian": "hy", "Nepali": "ne", "Mongolian": "mn",
69
+ "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
70
+ "Galician": "gl", "Marathi": "mr", "Punjabi": "pa", "Sinhala": "si",
71
+ "Khmer": "km", "Shona": "sn", "Yoruba": "yo", "Somali": "so",
72
+ "Afrikaans": "af", "Occitan": "oc", "Georgian": "ka", "Belarusian": "be",
73
+ "Tajik": "tg", "Sindhi": "sd", "Gujarati": "gu", "Amharic": "am",
74
+ "Yiddish": "yi", "Lao": "lo", "Uzbek": "uz", "Faroese": "fo",
75
+ "Haitian Creole": "ht", "Pashto": "ps", "Turkmen": "tk", "Nynorsk": "nn",
76
+ "Maltese": "mt", "Sanskrit": "sa", "Luxembourgish": "lb", "Burmese": "my",
77
+ "Tibetan": "bo", "Tagalog": "tl", "Malagasy": "mg", "Assamese": "as",
78
+ "Tatar": "tt", "Hawaiian": "haw", "Lingala": "ln", "Hausa": "ha",
79
+ "Bashkir": "ba", "Javanese": "jv", "Sundanese": "su"
80
+ }
81
+
82
  def extract_audio_from_video(video_file):
83
  """Extract audio from a video file and save it as a WAV file."""
84
  video = VideoFileClip(video_file)
85
  audio_file = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
86
+ video.audio.write_audiofile(audio_file, fps=16000, logger=None)
87
  return audio_file
88
 
89
  def transcribe_audio_with_gemini(audio_file):
90
  """Transcribe audio using Gemini with a magic prompt for accurate timestamps."""
91
  with open(audio_file, "rb") as f:
92
  audio_data = f.read()
93
+
 
94
  audio_blob = {
95
  'mime_type': 'audio/wav',
96
  'data': audio_data
97
  }
98
 
 
99
  prompt = """
100
  You are a professional transcriber. Transcribe this audio accurately and verbatim in the original language.
101
  Include timestamps for each sentence in the following format:
102
  [HH:MM:SS] Sentence 1
103
  [HH:MM:SS] Sentence 2
 
104
  ...
105
  Ensure the timestamps are accurate and correspond to the start of each sentence.
106
  Respond only with the transcription and timestamps. Do not add explanations or extra text.
107
  """
108
+
 
109
  convo = model.start_chat()
110
  convo.send_message(prompt)
111
  response = convo.send_message(audio_blob)
 
114
  def generate_subtitles(transcription):
115
  """Generate SRT subtitles from transcription with timestamps."""
116
  lines = transcription.split("\n")
117
+ srt_subtitles = []
118
 
119
  for i, line in enumerate(lines, start=1):
120
  if not line.strip():
121
  continue
122
 
 
123
  if line.startswith("["):
124
  timestamp = line.split("]")[0] + "]"
125
  text = line.split("]")[1].strip()
 
127
  timestamp = "[00:00:00]"
128
  text = line.strip()
129
 
130
+ start_time = timestamp[1:-1]
 
131
  start_seconds = time_to_seconds(start_time)
132
+ end_seconds = start_seconds + 5 # Placeholder duration
 
133
 
134
+ subtitle = srt.Subtitle(
135
+ index=i,
136
+ start=datetime.timedelta(seconds=start_seconds),
137
+ end=datetime.timedelta(seconds=end_seconds),
138
+ content=text
139
+ )
140
+ srt_subtitles.append(subtitle)
141
 
142
+ return srt.compose(srt_subtitles)
143
 
144
  def time_to_seconds(time_str):
145
  """Convert HH:MM:SS to seconds."""
 
154
  return f"{hh:02}:{mm:02}:{ss:02}"
155
 
156
  def translate_srt(srt_text, target_language):
157
+ """Translate an SRT file while preserving timestamps."""
 
158
  prompt = f"""
159
+ Translate the following SRT subtitles into {target_language}.
160
+ Preserve the SRT format (timestamps and structure).
161
+ Translate only the text after the timestamp.
162
  Do not add explanations or extra text.
163
  Ensure the translation is accurate and culturally appropriate.
164
  Here is the SRT file:
165
  {srt_text}
166
  """
167
+
168
  response = model.generate_content(prompt)
169
  return response.text
170
 
171
+ def generate_tts_audio(srt_text, language):
172
+ """Generate TTS audio from SRT text."""
173
+ # Extract all text from SRT
174
+ subtitles = list(srt.parse(srt_text))
175
+ all_text = " ".join([sub.content for sub in subtitles])
176
+
177
+ # Get language code
178
+ lang_code = LANGUAGE_CODES.get(language, "en")
179
+
180
+ # Generate TTS
181
+ tts = gTTS(text=all_text, lang=lang_code, slow=False)
182
+ audio_file = os.path.join(tempfile.gettempdir(), "tts_audio.mp3")
183
+ tts.save(audio_file)
184
+ return audio_file
185
+
186
+ def add_subtitles_to_video(video_file, srt_file, output_file):
187
+ """Add subtitles to video and return the path to the new video."""
188
+ # Create subtitle clip
189
+ generator = lambda txt: TextClip(txt, font='Arial', fontsize=24, color='white')
190
+ subtitles = SubtitlesClip(srt_file, generator)
191
+
192
+ # Load video
193
+ video = VideoFileClip(video_file)
194
+
195
+ # Composite video with subtitles
196
+ result = CompositeVideoClip([
197
+ video,
198
+ subtitles.set_position(('center', 'bottom'))
199
+ ])
200
+
201
+ # Write output
202
+ result.write_videofile(output_file, codec='libx264', audio_codec='aac', threads=4)
203
+ return output_file
204
+
205
+ def process_video(video_file, language="Auto Detect", translate_to=None, add_tts=False, add_subtitles=False):
206
+ """Process a video file with full options."""
207
  # Extract audio from the video
208
  audio_file = extract_audio_from_video(video_file)
209
 
 
213
  # Generate subtitles
214
  subtitles = generate_subtitles(transcription)
215
 
216
+ # Save original subtitles
217
  original_srt_file = os.path.join(tempfile.gettempdir(), "original_subtitles.srt")
218
  with open(original_srt_file, "w", encoding="utf-8") as f:
219
  f.write(subtitles)
220
 
221
+ # Translate subtitles if requested
222
  translated_srt_file = None
223
  if translate_to and translate_to != "None":
224
  translated_subtitles = translate_srt(subtitles, translate_to)
 
226
  with open(translated_srt_file, "w", encoding="utf-8") as f:
227
  f.write(translated_subtitles)
228
 
229
+ # Generate TTS audio if requested
230
+ tts_audio_file = None
231
+ if add_tts:
232
+ target_lang = translate_to if translate_to and translate_to != "None" else language
233
+ tts_audio_file = generate_tts_audio(subtitles if not translated_srt_file else translated_subtitles, target_lang)
234
+
235
+ # Create video with subtitles if requested
236
+ output_video_file = None
237
+ if add_subtitles:
238
+ srt_to_use = translated_srt_file if translated_srt_file else original_srt_file
239
+ output_video_file = os.path.join(tempfile.gettempdir(), "output_video.mp4")
240
+ add_subtitles_to_video(video_file, srt_to_use, output_video_file)
241
+
242
+ # Clean up
243
  os.remove(audio_file)
244
 
245
+ return original_srt_file, translated_srt_file, tts_audio_file, output_video_file, "Detected Language: Auto"
246
 
247
  # Define the Gradio interface
248
+ with gr.Blocks(title="AutoSubGen Pro - AI Video Subtitle Generator") as demo:
249
  # Header
250
  with gr.Column():
251
+ gr.Markdown("# 🎥 AutoSubGen Pro")
252
+ gr.Markdown("### Advanced AI-Powered Video Subtitle Generator")
253
+ gr.Markdown("Generate, translate, and add subtitles with text-to-speech audio to your videos.")
254
 
255
  # Main content
256
  with gr.Tab("Generate Subtitles"):
257
+ gr.Markdown("### Upload a video file to process")
258
  with gr.Row():
259
  video_input = gr.Video(label="Upload Video File", scale=2)
260
+ with gr.Column():
261
+ language_dropdown = gr.Dropdown(
262
+ choices=SUPPORTED_LANGUAGES,
263
+ label="Source Language",
264
+ value="Auto Detect",
265
+ )
266
+ translate_to_dropdown = gr.Dropdown(
267
+ choices=["None"] + SUPPORTED_LANGUAGES[1:],
268
+ label="Translate To",
269
+ value="None",
270
+ )
271
+ tts_checkbox = gr.Checkbox(label="Generate Text-to-Speech Audio")
272
+ subtitles_checkbox = gr.Checkbox(label="Add Subtitles to Video")
273
+
274
+ generate_button = gr.Button("Process Video", variant="primary")
275
+
276
  with gr.Row():
277
+ with gr.Column():
278
+ original_subtitle_output = gr.File(label="Original Subtitles (SRT)")
279
+ translated_subtitle_output = gr.File(label="Translated Subtitles (SRT)")
280
+ with gr.Column():
281
+ tts_audio_output = gr.Audio(label="Text-to-Speech Audio", visible=False)
282
+ video_output = gr.Video(label="Video with Subtitles", visible=False)
283
+
284
  detected_language_output = gr.Textbox(label="Detected Language")
285
+
286
+ # Show/hide outputs based on checkboxes
287
+ def toggle_outputs(tts, subs):
288
+ return [
289
+ gr.Audio(visible=tts),
290
+ gr.Video(visible=subs)
291
+ ]
292
+
293
+ tts_checkbox.change(
294
+ fn=lambda x: gr.Audio(visible=x),
295
+ inputs=tts_checkbox,
296
+ outputs=tts_audio_output
297
+ )
298
+
299
+ subtitles_checkbox.change(
300
+ fn=lambda x: gr.Video(visible=x),
301
+ inputs=subtitles_checkbox,
302
+ outputs=video_output
303
+ )
304
 
305
  # Link button to function
306
  generate_button.click(
307
  process_video,
308
+ inputs=[video_input, language_dropdown, translate_to_dropdown, tts_checkbox, subtitles_checkbox],
309
+ outputs=[original_subtitle_output, translated_subtitle_output, tts_audio_output, video_output, detected_language_output]
310
  )
311
 
312
+ # Launch the interface
313
  demo.launch(share=True)