Spaces:

mgbam
/

CingenAI

Running

App Files Files Community

mgbam commited on 10 days ago

Commit

9840152

verified ·

1 Parent(s): cc8faa4

Update core/visual_engine.py

Browse files

Files changed (1) hide show

core/visual_engine.py +165 -112

core/visual_engine.py CHANGED Viewed

@@ -1,14 +1,15 @@
 # core/visual_engine.py
 from PIL import Image, ImageDraw, ImageFont
 from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip,
-                            CompositeVideoClip)
 import moviepy.video.fx.all as vfx
 import numpy as np
 import os
 import openai
 import requests
 import io
-import time # For adding slight delay if API rate limits are hit
 class VisualEngine:
     def __init__(self, output_dir="temp_cinegen_media"):
@@ -17,141 +18,186 @@ class VisualEngine:
         self.font_filename = "arial.ttf"
         self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
-        self.font_size_pil = 20 # Slightly smaller for placeholder text to fit more
-        self.video_overlay_font_size = 32
         self.video_overlay_font_color = 'white'
-        self.video_overlay_font = 'Arial-Bold' # Try specific variant; ensure ImageMagick can find it or use full path
         try:
             self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
-            print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
         except IOError:
-            print(f"Warning: Could not load font from '{self.font_path_in_container}'. Placeholders will use default font.")
-            self.font = ImageFont.load_default()
-            self.font_size_pil = 10 # Default font size estimate
-        self.openai_api_key = None
-        self.USE_AI_IMAGE_GENERATION = False
-        self.dalle_model = "dall-e-3"
-        # DALL-E 3 standard size for highest quality generally. Other options: "1792x1024", "1024x1792"
-        self.image_size_dalle3 = "1792x1024" # Landscape, good for cinematic
-        self.video_frame_size = (1280, 720) # 16:9 aspect ratio for video output
     def set_openai_api_key(self, api_key):
         if api_key:
-            self.openai_api_key = api_key
-            self.USE_AI_IMAGE_GENERATION = True
-            print(f"OpenAI API key set. AI Image Generation Enabled with {self.dalle_model}.")
-        else:
-            self.USE_AI_IMAGE_GENERATION = False
-            print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")
-    def _get_text_dimensions(self, text_content, font_obj): # Remains the same
         if not text_content: return 0, self.font_size_pil
         try:
-            if hasattr(font_obj, 'getbbox'):
-                bbox = font_obj.getbbox(text_content); width = bbox[2] - bbox[0]; height = bbox[3] - bbox[1]
-                return width, height if height > 0 else self.font_size_pil
-            elif hasattr(font_obj, 'getsize'):
-                width, height = font_obj.getsize(text_content)
-                return width, height if height > 0 else self.font_size_pil
-            else:
-                return int(len(text_content) * self.font_size_pil*0.6), int(self.font_size_pil*1.2 if self.font_size_pil*1.2 > 0 else self.font_size_pil)
-        except Exception: return int(len(text_content) * self.font_size_pil*0.6), int(self.font_size_pil*1.2)
-    def _create_placeholder_image_content(self, text_description, filename, size=(1280, 720)): # Default to video_frame_size
-        img = Image.new('RGB', size, color=(20, 20, 40)) # Darker
-        draw = ImageDraw.Draw(img)
-        padding = 25
-        max_text_width = size[0] - (2 * padding)
-        lines = []
-        if not text_description: text_description = "(Placeholder: No prompt provided)"
-        # Simplified text wrapping for placeholder
-        words = text_description.split()
-        current_line = ""
         for word in words:
-            test_line = current_line + word + " "
-            if self._get_text_dimensions(test_line, self.font)[0] <= max_text_width:
-                current_line = test_line
-            else:
                 if current_line: lines.append(current_line.strip())
                 current_line = word + " "
         if current_line: lines.append(current_line.strip())
-        if not lines: lines.append("(Text too long or unrenderable for placeholder)")
-        _, single_line_height = self._get_text_dimensions("Ay", self.font)
-        if single_line_height == 0: single_line_height = self.font_size_pil + 2
-        num_lines_to_display = min(len(lines), (size[1] - 2 * padding) // (single_line_height + 2)) # Max lines based on height
-        y_text = padding + (size[1] - 2*padding - num_lines_to_display * (single_line_height + 2)) / 2.0
-        for i in range(num_lines_to_display):
-            line = lines[i]
-            line_width, _ = self._get_text_dimensions(line, self.font)
-            x_text = (size[0] - line_width) / 2.0
-            draw.text((x_text, y_text), line, font=self.font, fill=(200, 200, 180))
-            y_text += single_line_height + 2 # Line spacing
-            if i == 6 and num_lines_to_display > 7: # Show ellipsis if more text
-                draw.text((x_text, y_text), "...", font=self.font, fill=(200, 200, 180))
-                break
-        filepath = os.path.join(self.output_dir, filename)
-        try: img.save(filepath); return filepath
-        except Exception as e: print(f"Error saving placeholder: {e}"); return None
-    def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
         filepath = os.path.join(self.output_dir, scene_identifier_filename)
         if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
             max_retries = 2
             for attempt in range(max_retries):
                 try:
                     print(f"Attempt {attempt+1}: DALL-E ({self.dalle_model}) for: {image_prompt_text[:120]}...")
-                    client = openai.OpenAI(api_key=self.openai_api_key, timeout=60.0) # Timeout for client
                     response = client.images.generate(
-                        model=self.dalle_model,
-                        prompt=image_prompt_text,
-                        n=1,
-                        size=self.image_size_dalle3,
-                        quality="hd", # Use "hd" for DALL-E 3 for better detail, "standard" for faster/cheaper
-                        response_format="url",
-                        style="vivid" # "vivid" or "natural" for DALL-E 3
                     )
                     image_url = response.data[0].url
                     revised_prompt = getattr(response.data[0], 'revised_prompt', None)
                     if revised_prompt: print(f"DALL-E 3 revised_prompt: {revised_prompt[:100]}...")
-                    image_response = requests.get(image_url, timeout=90) # Increased download timeout
-                    image_response.raise_for_status()
                     img_data = Image.open(io.BytesIO(image_response.content))
                     if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
-                    img_data.save(filepath)
-                    print(f"AI Image (DALL-E) saved: {filepath}")
-                    return filepath
-                except openai.RateLimitError as e:
-                    print(f"OpenAI Rate Limit Error: {e}. Retrying after delay...")
-                    if attempt < max_retries - 1: time.sleep(5 * (attempt + 1)); continue
-                    else: print("Max retries reached for RateLimitError."); break
                 except openai.APIError as e: print(f"OpenAI API Error: {e}"); break
-                except requests.exceptions.RequestException as e: print(f"Requests Error (DALL-E image download): {e}"); break
-                except Exception as e: print(f"Generic error (DALL-E image gen): {e}"); break
-            print("DALL-E generation failed after retries. Falling back to placeholder.")
             return self._create_placeholder_image_content(
-                f"[AI Gen Failed] Prompt: {image_prompt_text[:100]}...",
                 scene_identifier_filename, size=self.video_frame_size
             )
-        else:
             return self._create_placeholder_image_content(
                 image_prompt_text, scene_identifier_filename, size=self.video_frame_size
             )
-    def create_video_from_images(self, image_data_list, output_filename="final_video.mp4", fps=24, duration_per_image=4):
         if not image_data_list: return None
         print(f"Creating video from {len(image_data_list)} image sets.")
         processed_clips = []
         for i, data in enumerate(image_data_list):
             img_path, scene_num, key_action = data.get('path'), data.get('scene_num', i+1), data.get('key_action', '')
@@ -160,51 +206,58 @@ class VisualEngine:
             try:
                 pil_img_orig = Image.open(img_path)
                 if pil_img_orig.mode != 'RGB': pil_img_orig = pil_img_orig.convert('RGB')
-                # Resize and letterbox/pillarbox to video_frame_size
                 img_for_frame = pil_img_orig.copy()
                 img_for_frame.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS)
                 canvas = Image.new('RGB', self.video_frame_size, (0,0,0))
                 x_offset = (self.video_frame_size[0] - img_for_frame.width) // 2
                 y_offset = (self.video_frame_size[1] - img_for_frame.height) // 2
                 canvas.paste(img_for_frame, (x_offset, y_offset))
                 frame_np = np.array(canvas)
                 img_clip = ImageClip(frame_np).set_duration(duration_per_image)
-                # Ken Burns: zoom from 100% to 110%
-                img_clip = img_clip.fx(vfx.resize, lambda t: 1 + 0.1 * (t / duration_per_image))
-                img_clip = img_clip.set_position('center')
-                # Text Overlay
                 if key_action:
                     overlay_text = f"Scene {scene_num}\n{key_action}"
                     txt_clip = TextClip(overlay_text, fontsize=self.video_overlay_font_size,
                                         color=self.video_overlay_font_color, font=self.video_overlay_font,
                                         bg_color='rgba(0,0,0,0.7)', method='caption', align='West',
                                         size=(self.video_frame_size[0]*0.85, None), kerning=-1, stroke_color='black', stroke_width=0.5
-                                       ).set_duration(duration_per_image - 1.0).set_start(0.5) # Show for duration-1s, slight delay
-                    txt_clip = txt_clip.set_position(('center', 0.88), relative=True)
                     final_scene_clip = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
-                else:
-                    final_scene_clip = img_clip
                 processed_clips.append(final_scene_clip)
             except Exception as e: print(f"Error processing clip for {img_path}: {e}. Skipping.")
         if not processed_clips: print("No clips processed for video."); return None
-        transition_duration = 0.75 # Crossfade duration
-        final_video = concatenate_videoclips(processed_clips, padding=-transition_duration, method="compose")
-        if final_video.duration > transition_duration*2: # Ensure enough duration for fades
-            final_video = final_video.fx(vfx.fadein, transition_duration).fx(vfx.fadeout, transition_duration)
         output_path = os.path.join(self.output_dir, output_filename)
         try:
-            final_video.write_videofile(output_path, fps=fps, codec='libx264', preset='medium', audio_codec='aac',
                                         temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
                                         remove_temp=True, threads=os.cpu_count() or 2, logger='bar')
             print(f"Video created: {output_path}"); return output_path
         except Exception as e: print(f"Error writing video file: {e}"); return None
-        finally:
-            for clip in processed_clips: clip.close()
-            if hasattr(final_video, 'close'): final_video.close()

 # core/visual_engine.py
 from PIL import Image, ImageDraw, ImageFont
 from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip,
+                            CompositeVideoClip, AudioFileClip)
 import moviepy.video.fx.all as vfx
 import numpy as np
 import os
 import openai
 import requests
 import io
+import time
+from elevenlabs import generate as elevenlabs_generate_audio, set_api_key as elevenlabs_set_api_key_func
 class VisualEngine:
     def __init__(self, output_dir="temp_cinegen_media"):
         self.font_filename = "arial.ttf"
         self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
+        self.font_size_pil = 20
+        self.video_overlay_font_size = 30 # Slightly smaller for more text with narration
         self.video_overlay_font_color = 'white'
+        self.video_overlay_font = 'Arial-Bold'
         try:
             self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
+            print(f"Font for placeholders: {self.font_path_in_container}.")
         except IOError:
+            print(f"Warning: Placeholder font '{self.font_path_in_container}' not loaded. Using default.")
+            self.font = ImageFont.load_default(); self.font_size_pil = 10
+        self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False
+        self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
+        self.video_frame_size = (1280, 720)
+        self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False
+        self.elevenlabs_voice_id = "Rachel" # Default, can be made configurable
+        self.pexels_api_key = None; self.USE_PEXELS = False
     def set_openai_api_key(self, api_key):
+        if api_key: self.openai_api_key = api_key; self.USE_AI_IMAGE_GENERATION = True; print(f"DALL-E ({self.dalle_model}) Ready.")
+        else: self.USE_AI_IMAGE_GENERATION = False; print("DALL-E Disabled.")
+    def set_elevenlabs_api_key(self, api_key):
         if api_key:
+            self.elevenlabs_api_key = api_key
+            try:
+                elevenlabs_set_api_key_func(api_key) # Set for the elevenlabs library
+                self.USE_ELEVENLABS = True
+                print("ElevenLabs Ready.")
+            except Exception as e:
+                print(f"Error setting ElevenLabs API key for library: {e}. ElevenLabs disabled.")
+                self.USE_ELEVENLABS = False
+        else: self.USE_ELEVENLABS = False; print("ElevenLabs Disabled.")
+    def set_pexels_api_key(self, api_key):
+        if api_key: self.pexels_api_key = api_key; self.USE_PEXELS = True; print("Pexels Ready.")
+        else: self.USE_PEXELS = False; print("Pexels Disabled.")
+    def _get_text_dimensions(self, text_content, font_obj): # Remains same
         if not text_content: return 0, self.font_size_pil
         try:
+            if hasattr(font_obj, 'getbbox'): bbox = font_obj.getbbox(text_content); w = bbox[2]-bbox[0]; h = bbox[3]-bbox[1]; return w, h if h>0 else self.font_size_pil
+            elif hasattr(font_obj, 'getsize'): w,h = font_obj.getsize(text_content); return w, h if h>0 else self.font_size_pil
+            else: return int(len(text_content)*self.font_size_pil*0.6), int(self.font_size_pil*1.2 if self.font_size_pil*1.2 > 0 else self.font_size_pil)
+        except: return int(len(text_content)*self.font_size_pil*0.6), int(self.font_size_pil*1.2)
+    def _create_placeholder_image_content(self, text_description, filename, size=(1280, 720)): # Remains same
+        img = Image.new('RGB', size, color=(20,20,40)); draw = ImageDraw.Draw(img); padding = 25; max_w = size[0]-(2*padding); lines = []
+        if not text_description: text_description = "(Placeholder)"
+        words = text_description.split(); current_line = ""
         for word in words:
+            test_line = current_line + word + " ";
+            if self._get_text_dimensions(test_line, self.font)[0] <= max_w: current_line = test_line
+            else:
                 if current_line: lines.append(current_line.strip())
                 current_line = word + " "
         if current_line: lines.append(current_line.strip())
+        if not lines: lines.append("(Text err)")
+        _, line_h = self._get_text_dimensions("Ay", self.font); line_h = line_h if line_h>0 else self.font_size_pil+2
+        max_lines = min(len(lines), (size[1]-2*padding)//(line_h+2))
+        y = padding + (size[1]-2*padding - max_lines*(line_h+2))/2.0
+        for i in range(max_lines):
+            line = lines[i]; line_w, _ = self._get_text_dimensions(line, self.font); x = (size[0]-line_w)/2.0
+            draw.text((x,y), line, font=self.font, fill=(200,200,180)); y += line_h+2
+            if i==6 and max_lines>7: draw.text((x,y), "...", font=self.font, fill=(200,200,180)); break
+        fp = os.path.join(self.output_dir, filename);
+        try: img.save(fp); return fp
+        except Exception as e: print(f"Err placeholder save: {e}"); return None
+    def _search_pexels_image(self, query, output_filename):
+        if not self.USE_PEXELS or not self.pexels_api_key: return None
+        headers = {"Authorization": self.pexels_api_key}
+        params = {"query": query, "per_page": 1, "orientation": "landscape", "size": "large"}
+        # Ensure JPG for pexels typical format, but DALL-E images are PNG. Filename will be distinct.
+        pexels_filename = output_filename.replace(".png", "_pexels.jpg")
+        filepath = os.path.join(self.output_dir, pexels_filename)
+        try:
+            print(f"Searching Pexels for: '{query}' (max 3 words for relevance)")
+            # Limit query length for Pexels to improve relevance
+            query_parts = query.split()
+            effective_query = " ".join(query_parts[:5]) # Use first 5 words
+            params["query"] = effective_query
+            response = requests.get("https://api.pexels.com/v1/search", headers=headers, params=params, timeout=15)
+            response.raise_for_status()
+            data = response.json()
+            if data.get("photos"):
+                photo_url = data["photos"][0]["src"]["large2x"]
+                image_response = requests.get(photo_url, timeout=45)
+                image_response.raise_for_status()
+                img_data = Image.open(io.BytesIO(image_response.content))
+                if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
+                img_data.save(filepath)
+                print(f"Pexels image saved: {filepath}")
+                return filepath
+            else:
+                print(f"No photos found on Pexels for query: '{effective_query}'")
+        except Exception as e: print(f"Pexels search/download error for '{query}': {e}")
+        return None
+    def generate_image_visual(self, image_prompt_text, scene_data_for_fallbacks, scene_identifier_filename):
         filepath = os.path.join(self.output_dir, scene_identifier_filename)
         if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
             max_retries = 2
             for attempt in range(max_retries):
                 try:
                     print(f"Attempt {attempt+1}: DALL-E ({self.dalle_model}) for: {image_prompt_text[:120]}...")
+                    client = openai.OpenAI(api_key=self.openai_api_key, timeout=90.0) # Increased client timeout
                     response = client.images.generate(
+                        model=self.dalle_model, prompt=image_prompt_text, n=1,
+                        size=self.image_size_dalle3, quality="hd", response_format="url", style="vivid"
                     )
                     image_url = response.data[0].url
                     revised_prompt = getattr(response.data[0], 'revised_prompt', None)
                     if revised_prompt: print(f"DALL-E 3 revised_prompt: {revised_prompt[:100]}...")
+                    image_response = requests.get(image_url, timeout=120) # Increased download timeout
+                    image_response.raise_for_status()
                     img_data = Image.open(io.BytesIO(image_response.content))
                     if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
+                    img_data.save(filepath); print(f"AI Image (DALL-E) saved: {filepath}"); return filepath
+                except openai.RateLimitError as e:
+                    print(f"OpenAI Rate Limit: {e}. Retrying after {5*(attempt+1)}s...")
+                    time.sleep(5*(attempt+1));
+                    if attempt == max_retries -1: print("Max retries for RateLimitError."); break # Break from loop
+                    else: continue # Go to next attempt
                 except openai.APIError as e: print(f"OpenAI API Error: {e}"); break
+                except requests.exceptions.RequestException as e: print(f"Requests Error (DALL-E download): {e}"); break
+                except Exception as e: print(f"Generic error (DALL-E gen): {e}"); break
+            print("DALL-E generation failed. Trying Pexels fallback...")
+            pexels_query = f"{scene_data_for_fallbacks.get('emotional_beat','')} {scene_data_for_fallbacks.get('setting_description','')} {scene_data_for_fallbacks.get('genre','')} {scene_data_for_fallbacks.get('mood','')}"
+            pexels_path = self._search_pexels_image(pexels_query, scene_identifier_filename)
+            if pexels_path: return pexels_path
+            print("Pexels also failed/disabled. Using placeholder.")
             return self._create_placeholder_image_content(
+                f"[AI/Pexels Failed] Original Prompt: {image_prompt_text[:100]}...",
                 scene_identifier_filename, size=self.video_frame_size
             )
+        else: # AI image generation not enabled
             return self._create_placeholder_image_content(
                 image_prompt_text, scene_identifier_filename, size=self.video_frame_size
             )
+    def generate_narration_audio(self, text_to_narrate, output_filename="narration_overall.mp3"):
+        if not self.USE_ELEVENLABS or not self.elevenlabs_api_key or not text_to_narrate:
+            print("ElevenLabs not enabled, API key missing, or no text provided. Skipping audio generation.")
+            return None
+        audio_filepath = os.path.join(self.output_dir, output_filename)
+        try:
+            print(f"Generating ElevenLabs audio (Voice: {self.elevenlabs_voice_id}) for: {text_to_narrate[:70]}...")
+            # Ensure API key is set for the elevenlabs library context if it's not global
+            # elevenlabs_set_api_key_func(self.elevenlabs_api_key) # Usually set once globally is enough
+            audio_data = elevenlabs_generate_audio(
+                text=text_to_narrate,
+                voice=self.elevenlabs_voice_id,
+                model="eleven_multilingual_v2" # Or other suitable model like "eleven_turbo_v2"
+            )
+            with open(audio_filepath, "wb") as f:
+                f.write(audio_data)
+            print(f"ElevenLabs audio saved: {audio_filepath}")
+            return audio_filepath
+        except ImportError:
+            print("ElevenLabs library not installed. Cannot generate audio.")
+        except Exception as e:
+            print(f"Error generating ElevenLabs audio: {e}")
+        return None
+    def create_video_from_images(self, image_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24, duration_per_image=4):
         if not image_data_list: return None
         print(f"Creating video from {len(image_data_list)} image sets.")
         processed_clips = []
+        narration_audio_clip = None # Initialize
+        final_video_clip_obj = None # Initialize
         for i, data in enumerate(image_data_list):
             img_path, scene_num, key_action = data.get('path'), data.get('scene_num', i+1), data.get('key_action', '')
             try:
                 pil_img_orig = Image.open(img_path)
                 if pil_img_orig.mode != 'RGB': pil_img_orig = pil_img_orig.convert('RGB')
                 img_for_frame = pil_img_orig.copy()
                 img_for_frame.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS)
                 canvas = Image.new('RGB', self.video_frame_size, (0,0,0))
                 x_offset = (self.video_frame_size[0] - img_for_frame.width) // 2
                 y_offset = (self.video_frame_size[1] - img_for_frame.height) // 2
                 canvas.paste(img_for_frame, (x_offset, y_offset))
                 frame_np = np.array(canvas)
                 img_clip = ImageClip(frame_np).set_duration(duration_per_image)
+                img_clip = img_clip.fx(vfx.resize, lambda t: 1 + 0.1 * (t / duration_per_image)).set_position('center')
                 if key_action:
                     overlay_text = f"Scene {scene_num}\n{key_action}"
                     txt_clip = TextClip(overlay_text, fontsize=self.video_overlay_font_size,
                                         color=self.video_overlay_font_color, font=self.video_overlay_font,
                                         bg_color='rgba(0,0,0,0.7)', method='caption', align='West',
                                         size=(self.video_frame_size[0]*0.85, None), kerning=-1, stroke_color='black', stroke_width=0.5
+                                       ).set_duration(duration_per_image - 1.0).set_start(0.5).set_position(('center', 0.88), relative=True)
                     final_scene_clip = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
+                else: final_scene_clip = img_clip
                 processed_clips.append(final_scene_clip)
             except Exception as e: print(f"Error processing clip for {img_path}: {e}. Skipping.")
         if not processed_clips: print("No clips processed for video."); return None
+        video_wo_audio = concatenate_videoclips(processed_clips, padding=-0.75, method="compose")
+        if video_wo_audio.duration > 1.5:
+            video_wo_audio = video_wo_audio.fx(vfx.fadein, 0.75).fx(vfx.fadeout, 0.75)
+        final_video_clip_obj = video_wo_audio
+        if overall_narration_path and os.path.exists(overall_narration_path):
+            try:
+                narration_audio_clip = AudioFileClip(overall_narration_path)
+                final_video_clip_obj = final_video_clip_obj.set_audio(narration_audio_clip)
+                if narration_audio_clip.duration < final_video_clip_obj.duration:
+                    final_video_clip_obj = final_video_clip_obj.subclip(0, narration_audio_clip.duration)
+                elif narration_audio_clip.duration > final_video_clip_obj.duration:
+                     # If audio is longer, we might want to loop video or extend last frame - complex.
+                     # For now, video duration dictates. Audio will be cut.
+                     # Or, ensure narration script length matches expected video length.
+                     pass # Moviepy will cut audio to video duration by default with set_audio
+                print("Overall narration added to video.")
+            except Exception as e:
+                print(f"Error adding overall narration: {e}. Proceeding without main narration.")
         output_path = os.path.join(self.output_dir, output_filename)
         try:
+            final_video_clip_obj.write_videofile(output_path, fps=fps, codec='libx264', preset='medium', audio_codec='aac',
                                         temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
                                         remove_temp=True, threads=os.cpu_count() or 2, logger='bar')
             print(f"Video created: {output_path}"); return output_path
         except Exception as e: print(f"Error writing video file: {e}"); return None
+        finally: # Ensure clips are closed
+            for clip_item in processed_clips:
+                if hasattr(clip_item, 'close'): clip_item.close()
+            if narration_audio_clip and hasattr(narration_audio_clip, 'close'): narration_audio_clip.close()
+            if final_video_clip_obj and hasattr(final_video_clip_obj, 'close'): final_video_clip_obj.close()