mgbam commited on
Commit
9840152
·
verified ·
1 Parent(s): cc8faa4

Update core/visual_engine.py

Browse files
Files changed (1) hide show
  1. core/visual_engine.py +165 -112
core/visual_engine.py CHANGED
@@ -1,14 +1,15 @@
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont
3
  from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip,
4
- CompositeVideoClip)
5
  import moviepy.video.fx.all as vfx
6
  import numpy as np
7
  import os
8
  import openai
9
  import requests
10
  import io
11
- import time # For adding slight delay if API rate limits are hit
 
12
 
13
  class VisualEngine:
14
  def __init__(self, output_dir="temp_cinegen_media"):
@@ -17,141 +18,186 @@ class VisualEngine:
17
 
18
  self.font_filename = "arial.ttf"
19
  self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
20
- self.font_size_pil = 20 # Slightly smaller for placeholder text to fit more
21
- self.video_overlay_font_size = 32
22
  self.video_overlay_font_color = 'white'
23
- self.video_overlay_font = 'Arial-Bold' # Try specific variant; ensure ImageMagick can find it or use full path
24
 
25
  try:
26
  self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
27
- print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
28
  except IOError:
29
- print(f"Warning: Could not load font from '{self.font_path_in_container}'. Placeholders will use default font.")
30
- self.font = ImageFont.load_default()
31
- self.font_size_pil = 10 # Default font size estimate
32
 
33
- self.openai_api_key = None
34
- self.USE_AI_IMAGE_GENERATION = False
35
- self.dalle_model = "dall-e-3"
36
- # DALL-E 3 standard size for highest quality generally. Other options: "1792x1024", "1024x1792"
37
- self.image_size_dalle3 = "1792x1024" # Landscape, good for cinematic
38
- self.video_frame_size = (1280, 720) # 16:9 aspect ratio for video output
 
 
39
 
40
  def set_openai_api_key(self, api_key):
 
 
 
 
41
  if api_key:
42
- self.openai_api_key = api_key
43
- self.USE_AI_IMAGE_GENERATION = True
44
- print(f"OpenAI API key set. AI Image Generation Enabled with {self.dalle_model}.")
45
- else:
46
- self.USE_AI_IMAGE_GENERATION = False
47
- print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")
 
 
 
48
 
49
- def _get_text_dimensions(self, text_content, font_obj): # Remains the same
 
 
 
 
50
  if not text_content: return 0, self.font_size_pil
51
  try:
52
- if hasattr(font_obj, 'getbbox'):
53
- bbox = font_obj.getbbox(text_content); width = bbox[2] - bbox[0]; height = bbox[3] - bbox[1]
54
- return width, height if height > 0 else self.font_size_pil
55
- elif hasattr(font_obj, 'getsize'):
56
- width, height = font_obj.getsize(text_content)
57
- return width, height if height > 0 else self.font_size_pil
58
- else:
59
- return int(len(text_content) * self.font_size_pil*0.6), int(self.font_size_pil*1.2 if self.font_size_pil*1.2 > 0 else self.font_size_pil)
60
- except Exception: return int(len(text_content) * self.font_size_pil*0.6), int(self.font_size_pil*1.2)
61
 
62
- def _create_placeholder_image_content(self, text_description, filename, size=(1280, 720)): # Default to video_frame_size
63
- img = Image.new('RGB', size, color=(20, 20, 40)) # Darker
64
- draw = ImageDraw.Draw(img)
65
- padding = 25
66
- max_text_width = size[0] - (2 * padding)
67
- lines = []
68
- if not text_description: text_description = "(Placeholder: No prompt provided)"
69
- # Simplified text wrapping for placeholder
70
- words = text_description.split()
71
- current_line = ""
72
  for word in words:
73
- test_line = current_line + word + " "
74
- if self._get_text_dimensions(test_line, self.font)[0] <= max_text_width:
75
- current_line = test_line
76
- else:
77
  if current_line: lines.append(current_line.strip())
78
  current_line = word + " "
79
  if current_line: lines.append(current_line.strip())
80
- if not lines: lines.append("(Text too long or unrenderable for placeholder)")
 
 
 
 
 
 
 
 
 
 
81
 
82
- _, single_line_height = self._get_text_dimensions("Ay", self.font)
83
- if single_line_height == 0: single_line_height = self.font_size_pil + 2
84
-
85
- num_lines_to_display = min(len(lines), (size[1] - 2 * padding) // (single_line_height + 2)) # Max lines based on height
86
-
87
- y_text = padding + (size[1] - 2*padding - num_lines_to_display * (single_line_height + 2)) / 2.0
 
 
 
 
 
 
 
88
 
89
- for i in range(num_lines_to_display):
90
- line = lines[i]
91
- line_width, _ = self._get_text_dimensions(line, self.font)
92
- x_text = (size[0] - line_width) / 2.0
93
- draw.text((x_text, y_text), line, font=self.font, fill=(200, 200, 180))
94
- y_text += single_line_height + 2 # Line spacing
95
- if i == 6 and num_lines_to_display > 7: # Show ellipsis if more text
96
- draw.text((x_text, y_text), "...", font=self.font, fill=(200, 200, 180))
97
- break
98
- filepath = os.path.join(self.output_dir, filename)
99
- try: img.save(filepath); return filepath
100
- except Exception as e: print(f"Error saving placeholder: {e}"); return None
 
 
 
 
101
 
102
- def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
103
  filepath = os.path.join(self.output_dir, scene_identifier_filename)
104
  if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
105
  max_retries = 2
106
  for attempt in range(max_retries):
107
  try:
108
  print(f"Attempt {attempt+1}: DALL-E ({self.dalle_model}) for: {image_prompt_text[:120]}...")
109
- client = openai.OpenAI(api_key=self.openai_api_key, timeout=60.0) # Timeout for client
110
-
111
  response = client.images.generate(
112
- model=self.dalle_model,
113
- prompt=image_prompt_text,
114
- n=1,
115
- size=self.image_size_dalle3,
116
- quality="hd", # Use "hd" for DALL-E 3 for better detail, "standard" for faster/cheaper
117
- response_format="url",
118
- style="vivid" # "vivid" or "natural" for DALL-E 3
119
  )
120
  image_url = response.data[0].url
121
  revised_prompt = getattr(response.data[0], 'revised_prompt', None)
122
  if revised_prompt: print(f"DALL-E 3 revised_prompt: {revised_prompt[:100]}...")
123
-
124
- image_response = requests.get(image_url, timeout=90) # Increased download timeout
125
- image_response.raise_for_status()
126
 
 
 
127
  img_data = Image.open(io.BytesIO(image_response.content))
128
  if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
129
-
130
- img_data.save(filepath)
131
- print(f"AI Image (DALL-E) saved: {filepath}")
132
- return filepath
133
- except openai.RateLimitError as e:
134
- print(f"OpenAI Rate Limit Error: {e}. Retrying after delay...")
135
- if attempt < max_retries - 1: time.sleep(5 * (attempt + 1)); continue
136
- else: print("Max retries reached for RateLimitError."); break
137
  except openai.APIError as e: print(f"OpenAI API Error: {e}"); break
138
- except requests.exceptions.RequestException as e: print(f"Requests Error (DALL-E image download): {e}"); break
139
- except Exception as e: print(f"Generic error (DALL-E image gen): {e}"); break
140
 
141
- print("DALL-E generation failed after retries. Falling back to placeholder.")
 
 
 
 
 
142
  return self._create_placeholder_image_content(
143
- f"[AI Gen Failed] Prompt: {image_prompt_text[:100]}...",
144
  scene_identifier_filename, size=self.video_frame_size
145
  )
146
- else:
147
  return self._create_placeholder_image_content(
148
  image_prompt_text, scene_identifier_filename, size=self.video_frame_size
149
  )
150
 
151
- def create_video_from_images(self, image_data_list, output_filename="final_video.mp4", fps=24, duration_per_image=4):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  if not image_data_list: return None
153
  print(f"Creating video from {len(image_data_list)} image sets.")
154
  processed_clips = []
 
 
155
 
156
  for i, data in enumerate(image_data_list):
157
  img_path, scene_num, key_action = data.get('path'), data.get('scene_num', i+1), data.get('key_action', '')
@@ -160,51 +206,58 @@ class VisualEngine:
160
  try:
161
  pil_img_orig = Image.open(img_path)
162
  if pil_img_orig.mode != 'RGB': pil_img_orig = pil_img_orig.convert('RGB')
163
-
164
- # Resize and letterbox/pillarbox to video_frame_size
165
  img_for_frame = pil_img_orig.copy()
166
  img_for_frame.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS)
167
  canvas = Image.new('RGB', self.video_frame_size, (0,0,0))
168
  x_offset = (self.video_frame_size[0] - img_for_frame.width) // 2
169
  y_offset = (self.video_frame_size[1] - img_for_frame.height) // 2
170
  canvas.paste(img_for_frame, (x_offset, y_offset))
171
-
172
  frame_np = np.array(canvas)
173
  img_clip = ImageClip(frame_np).set_duration(duration_per_image)
174
-
175
- # Ken Burns: zoom from 100% to 110%
176
- img_clip = img_clip.fx(vfx.resize, lambda t: 1 + 0.1 * (t / duration_per_image))
177
- img_clip = img_clip.set_position('center')
178
-
179
- # Text Overlay
180
  if key_action:
181
  overlay_text = f"Scene {scene_num}\n{key_action}"
182
  txt_clip = TextClip(overlay_text, fontsize=self.video_overlay_font_size,
183
  color=self.video_overlay_font_color, font=self.video_overlay_font,
184
  bg_color='rgba(0,0,0,0.7)', method='caption', align='West',
185
  size=(self.video_frame_size[0]*0.85, None), kerning=-1, stroke_color='black', stroke_width=0.5
186
- ).set_duration(duration_per_image - 1.0).set_start(0.5) # Show for duration-1s, slight delay
187
- txt_clip = txt_clip.set_position(('center', 0.88), relative=True)
188
  final_scene_clip = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
189
- else:
190
- final_scene_clip = img_clip
191
  processed_clips.append(final_scene_clip)
192
  except Exception as e: print(f"Error processing clip for {img_path}: {e}. Skipping.")
193
 
194
  if not processed_clips: print("No clips processed for video."); return None
195
 
196
- transition_duration = 0.75 # Crossfade duration
197
- final_video = concatenate_videoclips(processed_clips, padding=-transition_duration, method="compose")
198
- if final_video.duration > transition_duration*2: # Ensure enough duration for fades
199
- final_video = final_video.fx(vfx.fadein, transition_duration).fx(vfx.fadeout, transition_duration)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  output_path = os.path.join(self.output_dir, output_filename)
202
  try:
203
- final_video.write_videofile(output_path, fps=fps, codec='libx264', preset='medium', audio_codec='aac',
204
  temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
205
  remove_temp=True, threads=os.cpu_count() or 2, logger='bar')
206
  print(f"Video created: {output_path}"); return output_path
207
  except Exception as e: print(f"Error writing video file: {e}"); return None
208
- finally:
209
- for clip in processed_clips: clip.close()
210
- if hasattr(final_video, 'close'): final_video.close()
 
 
 
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont
3
  from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip,
4
+ CompositeVideoClip, AudioFileClip)
5
  import moviepy.video.fx.all as vfx
6
  import numpy as np
7
  import os
8
  import openai
9
  import requests
10
  import io
11
+ import time
12
+ from elevenlabs import generate as elevenlabs_generate_audio, set_api_key as elevenlabs_set_api_key_func
13
 
14
  class VisualEngine:
15
  def __init__(self, output_dir="temp_cinegen_media"):
 
18
 
19
  self.font_filename = "arial.ttf"
20
  self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
21
+ self.font_size_pil = 20
22
+ self.video_overlay_font_size = 30 # Slightly smaller for more text with narration
23
  self.video_overlay_font_color = 'white'
24
+ self.video_overlay_font = 'Arial-Bold'
25
 
26
  try:
27
  self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
28
+ print(f"Font for placeholders: {self.font_path_in_container}.")
29
  except IOError:
30
+ print(f"Warning: Placeholder font '{self.font_path_in_container}' not loaded. Using default.")
31
+ self.font = ImageFont.load_default(); self.font_size_pil = 10
 
32
 
33
+ self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False
34
+ self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
35
+ self.video_frame_size = (1280, 720)
36
+
37
+ self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False
38
+ self.elevenlabs_voice_id = "Rachel" # Default, can be made configurable
39
+
40
+ self.pexels_api_key = None; self.USE_PEXELS = False
41
 
42
  def set_openai_api_key(self, api_key):
43
+ if api_key: self.openai_api_key = api_key; self.USE_AI_IMAGE_GENERATION = True; print(f"DALL-E ({self.dalle_model}) Ready.")
44
+ else: self.USE_AI_IMAGE_GENERATION = False; print("DALL-E Disabled.")
45
+
46
+ def set_elevenlabs_api_key(self, api_key):
47
  if api_key:
48
+ self.elevenlabs_api_key = api_key
49
+ try:
50
+ elevenlabs_set_api_key_func(api_key) # Set for the elevenlabs library
51
+ self.USE_ELEVENLABS = True
52
+ print("ElevenLabs Ready.")
53
+ except Exception as e:
54
+ print(f"Error setting ElevenLabs API key for library: {e}. ElevenLabs disabled.")
55
+ self.USE_ELEVENLABS = False
56
+ else: self.USE_ELEVENLABS = False; print("ElevenLabs Disabled.")
57
 
58
+ def set_pexels_api_key(self, api_key):
59
+ if api_key: self.pexels_api_key = api_key; self.USE_PEXELS = True; print("Pexels Ready.")
60
+ else: self.USE_PEXELS = False; print("Pexels Disabled.")
61
+
62
+ def _get_text_dimensions(self, text_content, font_obj): # Remains same
63
  if not text_content: return 0, self.font_size_pil
64
  try:
65
+ if hasattr(font_obj, 'getbbox'): bbox = font_obj.getbbox(text_content); w = bbox[2]-bbox[0]; h = bbox[3]-bbox[1]; return w, h if h>0 else self.font_size_pil
66
+ elif hasattr(font_obj, 'getsize'): w,h = font_obj.getsize(text_content); return w, h if h>0 else self.font_size_pil
67
+ else: return int(len(text_content)*self.font_size_pil*0.6), int(self.font_size_pil*1.2 if self.font_size_pil*1.2 > 0 else self.font_size_pil)
68
+ except: return int(len(text_content)*self.font_size_pil*0.6), int(self.font_size_pil*1.2)
 
 
 
 
 
69
 
70
+ def _create_placeholder_image_content(self, text_description, filename, size=(1280, 720)): # Remains same
71
+ img = Image.new('RGB', size, color=(20,20,40)); draw = ImageDraw.Draw(img); padding = 25; max_w = size[0]-(2*padding); lines = []
72
+ if not text_description: text_description = "(Placeholder)"
73
+ words = text_description.split(); current_line = ""
 
 
 
 
 
 
74
  for word in words:
75
+ test_line = current_line + word + " ";
76
+ if self._get_text_dimensions(test_line, self.font)[0] <= max_w: current_line = test_line
77
+ else:
 
78
  if current_line: lines.append(current_line.strip())
79
  current_line = word + " "
80
  if current_line: lines.append(current_line.strip())
81
+ if not lines: lines.append("(Text err)")
82
+ _, line_h = self._get_text_dimensions("Ay", self.font); line_h = line_h if line_h>0 else self.font_size_pil+2
83
+ max_lines = min(len(lines), (size[1]-2*padding)//(line_h+2))
84
+ y = padding + (size[1]-2*padding - max_lines*(line_h+2))/2.0
85
+ for i in range(max_lines):
86
+ line = lines[i]; line_w, _ = self._get_text_dimensions(line, self.font); x = (size[0]-line_w)/2.0
87
+ draw.text((x,y), line, font=self.font, fill=(200,200,180)); y += line_h+2
88
+ if i==6 and max_lines>7: draw.text((x,y), "...", font=self.font, fill=(200,200,180)); break
89
+ fp = os.path.join(self.output_dir, filename);
90
+ try: img.save(fp); return fp
91
+ except Exception as e: print(f"Err placeholder save: {e}"); return None
92
 
93
+ def _search_pexels_image(self, query, output_filename):
94
+ if not self.USE_PEXELS or not self.pexels_api_key: return None
95
+ headers = {"Authorization": self.pexels_api_key}
96
+ params = {"query": query, "per_page": 1, "orientation": "landscape", "size": "large"}
97
+ # Ensure JPG for pexels typical format, but DALL-E images are PNG. Filename will be distinct.
98
+ pexels_filename = output_filename.replace(".png", "_pexels.jpg")
99
+ filepath = os.path.join(self.output_dir, pexels_filename)
100
+ try:
101
+ print(f"Searching Pexels for: '{query}' (max 3 words for relevance)")
102
+ # Limit query length for Pexels to improve relevance
103
+ query_parts = query.split()
104
+ effective_query = " ".join(query_parts[:5]) # Use first 5 words
105
+ params["query"] = effective_query
106
 
107
+ response = requests.get("https://api.pexels.com/v1/search", headers=headers, params=params, timeout=15)
108
+ response.raise_for_status()
109
+ data = response.json()
110
+ if data.get("photos"):
111
+ photo_url = data["photos"][0]["src"]["large2x"]
112
+ image_response = requests.get(photo_url, timeout=45)
113
+ image_response.raise_for_status()
114
+ img_data = Image.open(io.BytesIO(image_response.content))
115
+ if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
116
+ img_data.save(filepath)
117
+ print(f"Pexels image saved: {filepath}")
118
+ return filepath
119
+ else:
120
+ print(f"No photos found on Pexels for query: '{effective_query}'")
121
+ except Exception as e: print(f"Pexels search/download error for '{query}': {e}")
122
+ return None
123
 
124
+ def generate_image_visual(self, image_prompt_text, scene_data_for_fallbacks, scene_identifier_filename):
125
  filepath = os.path.join(self.output_dir, scene_identifier_filename)
126
  if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
127
  max_retries = 2
128
  for attempt in range(max_retries):
129
  try:
130
  print(f"Attempt {attempt+1}: DALL-E ({self.dalle_model}) for: {image_prompt_text[:120]}...")
131
+ client = openai.OpenAI(api_key=self.openai_api_key, timeout=90.0) # Increased client timeout
 
132
  response = client.images.generate(
133
+ model=self.dalle_model, prompt=image_prompt_text, n=1,
134
+ size=self.image_size_dalle3, quality="hd", response_format="url", style="vivid"
 
 
 
 
 
135
  )
136
  image_url = response.data[0].url
137
  revised_prompt = getattr(response.data[0], 'revised_prompt', None)
138
  if revised_prompt: print(f"DALL-E 3 revised_prompt: {revised_prompt[:100]}...")
 
 
 
139
 
140
+ image_response = requests.get(image_url, timeout=120) # Increased download timeout
141
+ image_response.raise_for_status()
142
  img_data = Image.open(io.BytesIO(image_response.content))
143
  if img_data.mode != 'RGB': img_data = img_data.convert('RGB')
144
+ img_data.save(filepath); print(f"AI Image (DALL-E) saved: {filepath}"); return filepath
145
+ except openai.RateLimitError as e:
146
+ print(f"OpenAI Rate Limit: {e}. Retrying after {5*(attempt+1)}s...")
147
+ time.sleep(5*(attempt+1));
148
+ if attempt == max_retries -1: print("Max retries for RateLimitError."); break # Break from loop
149
+ else: continue # Go to next attempt
 
 
150
  except openai.APIError as e: print(f"OpenAI API Error: {e}"); break
151
+ except requests.exceptions.RequestException as e: print(f"Requests Error (DALL-E download): {e}"); break
152
+ except Exception as e: print(f"Generic error (DALL-E gen): {e}"); break
153
 
154
+ print("DALL-E generation failed. Trying Pexels fallback...")
155
+ pexels_query = f"{scene_data_for_fallbacks.get('emotional_beat','')} {scene_data_for_fallbacks.get('setting_description','')} {scene_data_for_fallbacks.get('genre','')} {scene_data_for_fallbacks.get('mood','')}"
156
+ pexels_path = self._search_pexels_image(pexels_query, scene_identifier_filename)
157
+ if pexels_path: return pexels_path
158
+
159
+ print("Pexels also failed/disabled. Using placeholder.")
160
  return self._create_placeholder_image_content(
161
+ f"[AI/Pexels Failed] Original Prompt: {image_prompt_text[:100]}...",
162
  scene_identifier_filename, size=self.video_frame_size
163
  )
164
+ else: # AI image generation not enabled
165
  return self._create_placeholder_image_content(
166
  image_prompt_text, scene_identifier_filename, size=self.video_frame_size
167
  )
168
 
169
+ def generate_narration_audio(self, text_to_narrate, output_filename="narration_overall.mp3"):
170
+ if not self.USE_ELEVENLABS or not self.elevenlabs_api_key or not text_to_narrate:
171
+ print("ElevenLabs not enabled, API key missing, or no text provided. Skipping audio generation.")
172
+ return None
173
+
174
+ audio_filepath = os.path.join(self.output_dir, output_filename)
175
+ try:
176
+ print(f"Generating ElevenLabs audio (Voice: {self.elevenlabs_voice_id}) for: {text_to_narrate[:70]}...")
177
+ # Ensure API key is set for the elevenlabs library context if it's not global
178
+ # elevenlabs_set_api_key_func(self.elevenlabs_api_key) # Usually set once globally is enough
179
+
180
+ audio_data = elevenlabs_generate_audio(
181
+ text=text_to_narrate,
182
+ voice=self.elevenlabs_voice_id,
183
+ model="eleven_multilingual_v2" # Or other suitable model like "eleven_turbo_v2"
184
+ )
185
+ with open(audio_filepath, "wb") as f:
186
+ f.write(audio_data)
187
+ print(f"ElevenLabs audio saved: {audio_filepath}")
188
+ return audio_filepath
189
+ except ImportError:
190
+ print("ElevenLabs library not installed. Cannot generate audio.")
191
+ except Exception as e:
192
+ print(f"Error generating ElevenLabs audio: {e}")
193
+ return None
194
+
195
+ def create_video_from_images(self, image_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24, duration_per_image=4):
196
  if not image_data_list: return None
197
  print(f"Creating video from {len(image_data_list)} image sets.")
198
  processed_clips = []
199
+ narration_audio_clip = None # Initialize
200
+ final_video_clip_obj = None # Initialize
201
 
202
  for i, data in enumerate(image_data_list):
203
  img_path, scene_num, key_action = data.get('path'), data.get('scene_num', i+1), data.get('key_action', '')
 
206
  try:
207
  pil_img_orig = Image.open(img_path)
208
  if pil_img_orig.mode != 'RGB': pil_img_orig = pil_img_orig.convert('RGB')
 
 
209
  img_for_frame = pil_img_orig.copy()
210
  img_for_frame.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS)
211
  canvas = Image.new('RGB', self.video_frame_size, (0,0,0))
212
  x_offset = (self.video_frame_size[0] - img_for_frame.width) // 2
213
  y_offset = (self.video_frame_size[1] - img_for_frame.height) // 2
214
  canvas.paste(img_for_frame, (x_offset, y_offset))
 
215
  frame_np = np.array(canvas)
216
  img_clip = ImageClip(frame_np).set_duration(duration_per_image)
217
+ img_clip = img_clip.fx(vfx.resize, lambda t: 1 + 0.1 * (t / duration_per_image)).set_position('center')
 
 
 
 
 
218
  if key_action:
219
  overlay_text = f"Scene {scene_num}\n{key_action}"
220
  txt_clip = TextClip(overlay_text, fontsize=self.video_overlay_font_size,
221
  color=self.video_overlay_font_color, font=self.video_overlay_font,
222
  bg_color='rgba(0,0,0,0.7)', method='caption', align='West',
223
  size=(self.video_frame_size[0]*0.85, None), kerning=-1, stroke_color='black', stroke_width=0.5
224
+ ).set_duration(duration_per_image - 1.0).set_start(0.5).set_position(('center', 0.88), relative=True)
 
225
  final_scene_clip = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
226
+ else: final_scene_clip = img_clip
 
227
  processed_clips.append(final_scene_clip)
228
  except Exception as e: print(f"Error processing clip for {img_path}: {e}. Skipping.")
229
 
230
  if not processed_clips: print("No clips processed for video."); return None
231
 
232
+ video_wo_audio = concatenate_videoclips(processed_clips, padding=-0.75, method="compose")
233
+ if video_wo_audio.duration > 1.5:
234
+ video_wo_audio = video_wo_audio.fx(vfx.fadein, 0.75).fx(vfx.fadeout, 0.75)
235
+
236
+ final_video_clip_obj = video_wo_audio
237
+ if overall_narration_path and os.path.exists(overall_narration_path):
238
+ try:
239
+ narration_audio_clip = AudioFileClip(overall_narration_path)
240
+ final_video_clip_obj = final_video_clip_obj.set_audio(narration_audio_clip)
241
+ if narration_audio_clip.duration < final_video_clip_obj.duration:
242
+ final_video_clip_obj = final_video_clip_obj.subclip(0, narration_audio_clip.duration)
243
+ elif narration_audio_clip.duration > final_video_clip_obj.duration:
244
+ # If audio is longer, we might want to loop video or extend last frame - complex.
245
+ # For now, video duration dictates. Audio will be cut.
246
+ # Or, ensure narration script length matches expected video length.
247
+ pass # Moviepy will cut audio to video duration by default with set_audio
248
+ print("Overall narration added to video.")
249
+ except Exception as e:
250
+ print(f"Error adding overall narration: {e}. Proceeding without main narration.")
251
 
252
  output_path = os.path.join(self.output_dir, output_filename)
253
  try:
254
+ final_video_clip_obj.write_videofile(output_path, fps=fps, codec='libx264', preset='medium', audio_codec='aac',
255
  temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
256
  remove_temp=True, threads=os.cpu_count() or 2, logger='bar')
257
  print(f"Video created: {output_path}"); return output_path
258
  except Exception as e: print(f"Error writing video file: {e}"); return None
259
+ finally: # Ensure clips are closed
260
+ for clip_item in processed_clips:
261
+ if hasattr(clip_item, 'close'): clip_item.close()
262
+ if narration_audio_clip and hasattr(narration_audio_clip, 'close'): narration_audio_clip.close()
263
+ if final_video_clip_obj and hasattr(final_video_clip_obj, 'close'): final_video_clip_obj.close()