Spaces:

Athspi-ai
/

AutoSubGen

Running

App Files Files Community

Athspi commited on Jun 25

Commit

e2954cc

verified ·

1 Parent(s): c9abf90

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -48

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import os
 import google.generativeai as genai
-from moviepy.video.io.VideoFileClip import VideoFileClip
-from moviepy.audio.io.AudioFileClip import AudioFileClip
-from moviepy.video.compositing.CompositeVideoClip import CompositeVideoClip
-from moviepy.video.tools.subtitles import SubtitlesClip
 import tempfile
 import logging
 import srt
@@ -19,12 +17,28 @@ import uuid
 logging.basicConfig(level=logging.INFO)
 logging.getLogger("moviepy").setLevel(logging.ERROR)
 # Configure Gemini API
-# IMPORTANT: Set your GEMINI_API_KEY as an environment variable
 try:
     genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 except KeyError:
-    raise Exception("GEMINI_API_KEY environment variable not set. Please set it before running the app.")
 # --- Flask App Initialization ---
@@ -47,11 +61,6 @@ generation_config = {
     "response_mime_type": "text/plain",
 }
-model = genai.GenerativeModel(
-    model_name="gemini-2.0-flash",
-    generation_config=generation_config,
-)
 # List of all supported languages for the UI
 SUPPORTED_LANGUAGES = [
     "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
@@ -74,24 +83,33 @@ SUPPORTED_LANGUAGES = [
 # Language code mapping for Google Text-to-Speech (gTTS)
 LANGUAGE_CODES = {
-    "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", "Russian": "ru",
-    "Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr",
-    "Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv",
-    "Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
-    "Hebrew": "iw", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs",
-    "Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no",
-    "Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt",
-    "Latin": "la", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", "Telugu": "te",
-    "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr", "Slovenian": "sl",
-    "Kannada": "kn", "Estonian": "et", "Macedonian": "mk", "Armenian": "hy", "Nepali": "ne",
-    "Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
-    "Gujarati": "gu", "Lao": "lo", "Uzbek": "uz", "Maltese": "mt", "Burmese": "my",
     "Tagalog": "tl", "Javanese": "jw", "Sundanese": "su", "Afrikaans": "af"
 }
 # --- Core Processing Functions ---
 def extract_audio_from_video(video_path, audio_path):
     """Extract audio from a video file into a WAV file."""
     try:
@@ -104,12 +122,15 @@ def extract_audio_from_video(video_path, audio_path):
 def transcribe_audio_with_gemini(audio_path, source_language):
     """Transcribe audio using Gemini, requesting output in SRT format."""
     try:
         logging.info(f"Uploading audio file: {audio_path}")
         audio_file = genai.upload_file(path=audio_path)
         language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
         prompt = f"""
         You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
         Your output MUST be in the SRT (SubRip) format.
@@ -125,7 +146,7 @@ def transcribe_audio_with_gemini(audio_path, source_language):
         Ensure timestamps are precise. Respond ONLY with the transcription in the SRT format.
         Do not add explanations, notes, or any other text outside of the valid SRT content.
         """
         logging.info("Sending transcription request to Gemini...")
         response = model.generate_content([prompt, audio_file], request_options={"timeout": 600})
         genai.delete_file(audio_file.name) # Clean up the uploaded file
@@ -138,6 +159,9 @@ def transcribe_audio_with_gemini(audio_path, source_language):
 def translate_srt(srt_text, target_language):
     """Translate an SRT file using Gemini while preserving its structure."""
     try:
         prompt = f"""
         Translate the following SRT subtitles into {target_language}.
@@ -162,7 +186,7 @@ def generate_tts_audio(srt_text, language, tts_audio_path):
     try:
         subtitles = list(srt.parse(srt_text))
         full_text = " ".join([sub.content.replace('\n', ' ') for sub in subtitles])
         if not full_text:
             return None
@@ -170,7 +194,7 @@ def generate_tts_audio(srt_text, language, tts_audio_path):
         if not lang_code:
             flash(f"Language '{language}' not supported for TTS, defaulting to English.", "warning")
             lang_code = "en"
         logging.info(f"Generating TTS audio in '{language}' ({lang_code})...")
         tts = gTTS(text=full_text, lang=lang_code, slow=False)
         tts.save(tts_audio_path)
@@ -189,42 +213,45 @@ def create_final_video(original_video_path, srt_text, new_audio_path, output_pat
     try:
         logging.info("Creating final video...")
         original_clip = VideoFileClip(original_video_path)
         # If new audio is provided, replace the original audio track
         if new_audio_path:
             tts_audio_clip = AudioFileClip(new_audio_path)
             final_clip = original_clip.set_audio(tts_audio_clip)
-        else:
-            final_clip = original_clip
         # If subtitle text is provided, burn it into the video
-        if srt_text:
             with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
                 temp_srt.write(srt_text)
                 srt_filepath = temp_srt.name
             # Subtitle styling
             generator = lambda txt: TextClip(
                 txt, font='Arial-Bold', fontsize=24, color='white',
-                stroke_color='black', stroke_width=1.5, method='caption',
                 size=(final_clip.w * 0.8, None) # Subtitles take 80% of video width
             )
             subtitles_clip = SubtitlesClip(srt_filepath, generator)
             # Composite the video with the subtitles
             final_video = CompositeVideoClip([final_clip, subtitles_clip.set_position(('center', 'bottom'))])
             os.remove(srt_filepath) # Clean up temp srt file
         else:
             final_video = final_clip
         # Write the final video file
         final_video.write_videofile(output_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
         # Close all clips to release memory
-        if 'tts_audio_clip' in locals():
             tts_audio_clip.close()
         final_video.close()
         logging.info(f"Final video saved to {output_path}")
         return output_path
     except Exception as e:
@@ -238,23 +265,35 @@ def create_final_video(original_video_path, srt_text, new_audio_path, output_pat
 def index():
     """Render the main page. Clear session for a fresh start."""
     session.clear()
-    return render_template('index.html', supported_languages=SUPPORTED_LANGUAGES)
 @app.route('/process', methods=['POST'])
 def process():
     """Handle the video processing request from the form."""
     if 'video' not in request.files or request.files['video'].filename == '':
         flash('No video file selected. Please upload a video.', 'error')
         return redirect(url_for('index'))
     video_file = request.files['video']
     # --- Get form options ---
     source_language = request.form.get('source_language', 'Auto Detect')
     translate_to = request.form.get('translate_to', 'None')
     add_tts = 'add_tts' in request.form
     add_subtitles = 'add_subtitles' in request.form
     # --- Setup a unique session directory for this request ---
     session_id = str(uuid.uuid4())
     session['session_id'] = session_id
@@ -279,12 +318,12 @@ def process():
     if not original_srt_text:
         flash('Failed to transcribe the audio. The API call might have failed or the audio is silent.', 'error')
         return redirect(url_for('index'))
     original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
     with open(original_srt_path, "w", encoding="utf-8") as f:
         f.write(original_srt_text)
     results['original_srt_file'] = "original_subtitles.srt"
     srt_for_final_video = None
     tts_audio_path = None
     final_srt_text = original_srt_text
@@ -300,14 +339,14 @@ def process():
             final_srt_text = translated_srt_text # Use translated text for subsequent steps
         else:
             flash(f'Failed to translate subtitles to {translate_to}. Using original subtitles.', 'warning')
     # 4. Generate TTS Audio (if requested)
     if add_tts:
         tts_lang = translate_to if translate_to != "None" else source_language
         if tts_lang == 'Auto Detect':
             flash('TTS language cannot be "Auto Detect". Defaulting to English.', 'warning')
             tts_lang = 'English'
         tts_audio_path_out = os.path.join(session_dir, "tts_audio.mp3")
         tts_audio_path = generate_tts_audio(final_srt_text, tts_lang, tts_audio_path_out)
         if tts_audio_path:
@@ -328,10 +367,13 @@ def process():
         else:
             flash('Failed to create the final processed video.', 'error')
     return render_template('index.html',
                            supported_languages=SUPPORTED_LANGUAGES,
                            results=results,
-                           session_id=session_id)
 @app.route('/download/<session_id>/<path:filename>')
 def download_file(session_id, filename):
@@ -342,4 +384,4 @@ def download_file(session_id, filename):
 # --- Run the App ---
 if __name__ == '__main__':
-    app.run(host="0.0.0.0", port=7860)

 import os
 import google.generativeai as genai
+from moviepy.editor import *
+import moviepy.config as mp_config
 import tempfile
 import logging
 import srt
 logging.basicConfig(level=logging.INFO)
 logging.getLogger("moviepy").setLevel(logging.ERROR)
+# Set a marker to check if ImageMagick is not found
+IMAGEMAGICK_NOT_FOUND = False
+try:
+    # This will raise an exception if ImageMagick is not found
+    TextClip("test", font='Arial', fontsize=24, color='white')
+except Exception as e:
+    if "ImageMagick is not installed" in str(e) or "magick: not found" in str(e):
+        IMAGEMAGICK_NOT_FOUND = True
+        logging.warning("ImageMagick is not installed or not found in the system's PATH. Subtitle generation will be disabled.")
+        logging.warning("Please install ImageMagick and ensure its 'magick' command is accessible from your shell.")
+    else:
+        # Handle other potential startup errors from TextClip
+        logging.error(f"An unexpected error occurred with MoviePy/ImageMagick: {e}")
 # Configure Gemini API
+# IMPORTANT: Set your GEMINI_API_KEY as an environment variable for this to work.
 try:
     genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 except KeyError:
+    # This will be handled in the Flask app to show a message to the user
+    pass
 # --- Flask App Initialization ---
     "response_mime_type": "text/plain",
 }
 # List of all supported languages for the UI
 SUPPORTED_LANGUAGES = [
     "Auto Detect", "English", "Chinese", "German", "Spanish", "Russian", "Korean",
 # Language code mapping for Google Text-to-Speech (gTTS)
 LANGUAGE_CODES = {
+    "English": "en", "Chinese": "zh", "German": "de", "Spanish": "es", "Russian": "ru",
+    "Korean": "ko", "French": "fr", "Japanese": "ja", "Portuguese": "pt", "Turkish": "tr",
+    "Polish": "pl", "Catalan": "ca", "Dutch": "nl", "Arabic": "ar", "Swedish": "sv",
+    "Italian": "it", "Indonesian": "id", "Hindi": "hi", "Finnish": "fi", "Vietnamese": "vi",
+    "Hebrew": "iw", "Ukrainian": "uk", "Greek": "el", "Malay": "ms", "Czech": "cs",
+    "Romanian": "ro", "Danish": "da", "Hungarian": "hu", "Tamil": "ta", "Norwegian": "no",
+    "Thai": "th", "Urdu": "ur", "Croatian": "hr", "Bulgarian": "bg", "Lithuanian": "lt",
+    "Latin": "la", "Malayalam": "ml", "Welsh": "cy", "Slovak": "sk", "Telugu": "te",
+    "Persian": "fa", "Latvian": "lv", "Bengali": "bn", "Serbian": "sr", "Slovenian": "sl",
+    "Kannada": "kn", "Estonian": "et", "Macedonian": "mk", "Armenian": "hy", "Nepali": "ne",
+    "Mongolian": "mn", "Bosnian": "bs", "Kazakh": "kk", "Albanian": "sq", "Swahili": "sw",
+    "Gujarati": "gu", "Lao": "lo", "Uzbek": "uz", "Maltese": "mt", "Burmese": "my",
     "Tagalog": "tl", "Javanese": "jw", "Sundanese": "su", "Afrikaans": "af"
 }
 # --- Core Processing Functions ---
+def get_gemini_model():
+    """Initializes and returns the Gemini model, checking for API key."""
+    if "GEMINI_API_KEY" not in os.environ:
+        return None
+    return genai.GenerativeModel(
+        model_name="gemini-1.5-flash",
+        generation_config=generation_config,
+    )
 def extract_audio_from_video(video_path, audio_path):
     """Extract audio from a video file into a WAV file."""
     try:
 def transcribe_audio_with_gemini(audio_path, source_language):
     """Transcribe audio using Gemini, requesting output in SRT format."""
+    model = get_gemini_model()
+    if not model:
+        return None
     try:
         logging.info(f"Uploading audio file: {audio_path}")
         audio_file = genai.upload_file(path=audio_path)
         language_prompt = f"in {source_language}" if source_language != "Auto Detect" else ""
         prompt = f"""
         You are a professional transcriber. Transcribe this audio accurately and verbatim {language_prompt}.
         Your output MUST be in the SRT (SubRip) format.
         Ensure timestamps are precise. Respond ONLY with the transcription in the SRT format.
         Do not add explanations, notes, or any other text outside of the valid SRT content.
         """
         logging.info("Sending transcription request to Gemini...")
         response = model.generate_content([prompt, audio_file], request_options={"timeout": 600})
         genai.delete_file(audio_file.name) # Clean up the uploaded file
 def translate_srt(srt_text, target_language):
     """Translate an SRT file using Gemini while preserving its structure."""
+    model = get_gemini_model()
+    if not model:
+        return None
     try:
         prompt = f"""
         Translate the following SRT subtitles into {target_language}.
     try:
         subtitles = list(srt.parse(srt_text))
         full_text = " ".join([sub.content.replace('\n', ' ') for sub in subtitles])
         if not full_text:
             return None
         if not lang_code:
             flash(f"Language '{language}' not supported for TTS, defaulting to English.", "warning")
             lang_code = "en"
         logging.info(f"Generating TTS audio in '{language}' ({lang_code})...")
         tts = gTTS(text=full_text, lang=lang_code, slow=False)
         tts.save(tts_audio_path)
     try:
         logging.info("Creating final video...")
         original_clip = VideoFileClip(original_video_path)
+        final_clip = original_clip
         # If new audio is provided, replace the original audio track
         if new_audio_path:
             tts_audio_clip = AudioFileClip(new_audio_path)
+            # If the new audio is shorter than the video, loop it.
+            if tts_audio_clip.duration < original_clip.duration:
+                 tts_audio_clip = tts_audio_clip.fx(vfx.loop, duration=original_clip.duration)
             final_clip = original_clip.set_audio(tts_audio_clip)
         # If subtitle text is provided, burn it into the video
+        if srt_text and not IMAGEMAGICK_NOT_FOUND:
             with tempfile.NamedTemporaryFile(mode='w', suffix='.srt', delete=False, encoding='utf-8') as temp_srt:
                 temp_srt.write(srt_text)
                 srt_filepath = temp_srt.name
             # Subtitle styling
             generator = lambda txt: TextClip(
                 txt, font='Arial-Bold', fontsize=24, color='white',
+                stroke_color='black', stroke_width=1, method='caption',
                 size=(final_clip.w * 0.8, None) # Subtitles take 80% of video width
             )
             subtitles_clip = SubtitlesClip(srt_filepath, generator)
             # Composite the video with the subtitles
             final_video = CompositeVideoClip([final_clip, subtitles_clip.set_position(('center', 'bottom'))])
             os.remove(srt_filepath) # Clean up temp srt file
         else:
             final_video = final_clip
         # Write the final video file
         final_video.write_videofile(output_path, codec='libx264', audio_codec='aac', threads=4, logger=None)
         # Close all clips to release memory
+        if 'tts_audio_clip' in locals() and tts_audio_clip:
             tts_audio_clip.close()
         final_video.close()
+        original_clip.close()
         logging.info(f"Final video saved to {output_path}")
         return output_path
     except Exception as e:
 def index():
     """Render the main page. Clear session for a fresh start."""
     session.clear()
+    api_key_status = "GEMINI_API_KEY" in os.environ
+    return render_template('index.html',
+                           supported_languages=SUPPORTED_LANGUAGES,
+                           imagemagick_missing=IMAGEMAGICK_NOT_FOUND,
+                           api_key_set=api_key_status)
 @app.route('/process', methods=['POST'])
 def process():
     """Handle the video processing request from the form."""
+    if "GEMINI_API_KEY" not in os.environ:
+        flash('Your GEMINI_API_KEY environment variable is not set. Please set it to use the application.', 'error')
+        return redirect(url_for('index'))
     if 'video' not in request.files or request.files['video'].filename == '':
         flash('No video file selected. Please upload a video.', 'error')
         return redirect(url_for('index'))
     video_file = request.files['video']
     # --- Get form options ---
     source_language = request.form.get('source_language', 'Auto Detect')
     translate_to = request.form.get('translate_to', 'None')
     add_tts = 'add_tts' in request.form
     add_subtitles = 'add_subtitles' in request.form
+    if IMAGEMAGICK_NOT_FOUND and add_subtitles:
+        flash("Cannot add subtitles because ImageMagick is not installed. The video will be processed without them.", "warning")
+        add_subtitles = False
     # --- Setup a unique session directory for this request ---
     session_id = str(uuid.uuid4())
     session['session_id'] = session_id
     if not original_srt_text:
         flash('Failed to transcribe the audio. The API call might have failed or the audio is silent.', 'error')
         return redirect(url_for('index'))
     original_srt_path = os.path.join(session_dir, "original_subtitles.srt")
     with open(original_srt_path, "w", encoding="utf-8") as f:
         f.write(original_srt_text)
     results['original_srt_file'] = "original_subtitles.srt"
     srt_for_final_video = None
     tts_audio_path = None
     final_srt_text = original_srt_text
             final_srt_text = translated_srt_text # Use translated text for subsequent steps
         else:
             flash(f'Failed to translate subtitles to {translate_to}. Using original subtitles.', 'warning')
     # 4. Generate TTS Audio (if requested)
     if add_tts:
         tts_lang = translate_to if translate_to != "None" else source_language
         if tts_lang == 'Auto Detect':
             flash('TTS language cannot be "Auto Detect". Defaulting to English.', 'warning')
             tts_lang = 'English'
         tts_audio_path_out = os.path.join(session_dir, "tts_audio.mp3")
         tts_audio_path = generate_tts_audio(final_srt_text, tts_lang, tts_audio_path_out)
         if tts_audio_path:
         else:
             flash('Failed to create the final processed video.', 'error')
+    api_key_status = "GEMINI_API_KEY" in os.environ
     return render_template('index.html',
                            supported_languages=SUPPORTED_LANGUAGES,
                            results=results,
+                           session_id=session_id,
+                           imagemagick_missing=IMAGEMAGICK_NOT_FOUND,
+                           api_key_set=api_key_status)
 @app.route('/download/<session_id>/<path:filename>')
 def download_file(session_id, filename):
 # --- Run the App ---
 if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=7860)