mgbam commited on
Commit
62838f2
·
verified ·
1 Parent(s): 8a6537e

Update core/visual_engine.py

Browse files
Files changed (1) hide show
  1. core/visual_engine.py +516 -679
core/visual_engine.py CHANGED
@@ -1,49 +1,42 @@
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont, ImageOps
3
- import base64
4
- import mimetypes
5
- import numpy as np
6
- import os
7
- import openai
8
- import requests
9
- import io
10
- import time
11
- import random
12
- import logging
13
 
14
- # --- MoviePy Imports ---
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  from moviepy.editor import (
16
  ImageClip,
17
  VideoFileClip,
18
  concatenate_videoclips,
19
  TextClip,
20
  CompositeVideoClip,
21
- AudioFileClip,
22
  )
23
  import moviepy.video.fx.all as vfx
24
-
25
- # --- MONKEY PATCH for Pillow/MoviePy compatibility ---
26
- try:
27
- if hasattr(Image, "Resampling") and hasattr(Image.Resampling, "LANCZOS"): # Pillow 9+
28
- if not hasattr(Image, "ANTIALIAS"):
29
- Image.ANTIALIAS = Image.Resampling.LANCZOS
30
- elif hasattr(Image, "LANCZOS"): # Pillow 8
31
- if not hasattr(Image, "ANTIALIAS"):
32
- Image.ANTIALIAS = Image.LANCZOS
33
- elif not hasattr(Image, "ANTIALIAS"): # Fallback if no common resampling attributes found
34
- print(
35
- "WARNING: Pillow version lacks common Resampling attributes or ANTIALIAS. MoviePy effects might fail or look different."
36
- )
37
- except Exception as e_monkey_patch:
38
- print(
39
- f"WARNING: An unexpected error occurred during Pillow ANTIALIAS monkey-patch: {e_monkey_patch}"
40
- )
41
 
42
  logger = logging.getLogger(__name__)
43
- # Consider setting level in main app if not already configured:
44
- # logger.setLevel(logging.DEBUG) # For very verbose output during debugging
45
 
46
- # --- External Service Client Imports ---
47
  ELEVENLABS_CLIENT_IMPORTED = False
48
  ElevenLabsAPIClient = None
49
  Voice = None
@@ -51,92 +44,57 @@ VoiceSettings = None
51
  try:
52
  from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
53
  from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
54
-
55
  ElevenLabsAPIClient = ImportedElevenLabsClient
56
  Voice = ImportedVoice
57
  VoiceSettings = ImportedVoiceSettings
58
  ELEVENLABS_CLIENT_IMPORTED = True
59
- logger.info("ElevenLabs client components imported successfully.")
60
- except ImportError:
61
- logger.warning(
62
- "ElevenLabs SDK not found (pip install elevenlabs). Audio generation will be disabled."
63
- )
64
- except Exception as e_eleven_import:
65
- logger.warning(
66
- f"Error importing ElevenLabs client components: {e_eleven_import}. Audio generation disabled."
67
- )
68
 
 
69
  RUNWAYML_SDK_IMPORTED = False
70
- RunwayMLAPIClient = None # Using a more specific name for the client class
71
  try:
72
- from runwayml import RunwayML as ImportedRunwayMLClient # Actual SDK import
73
-
74
- RunwayMLAPIClient = ImportedRunwayMLClient
75
- RUNWAYML_SDK_IMPORTED = True
76
- logger.info("RunwayML SDK imported successfully.")
77
  except ImportError:
78
- logger.warning(
79
- "RunwayML SDK not found (pip install runwayml). RunwayML video generation will be disabled."
80
- )
81
- except Exception as e_runway_sdk_import:
82
- logger.warning(
83
- f"Error importing RunwayML SDK: {e_runway_sdk_import}. RunwayML features disabled."
84
- )
85
 
86
 
87
  class VisualEngine:
88
- DEFAULT_FONT_SIZE_PIL = 10 # For default Pillow font
89
- PREFERRED_FONT_SIZE_PIL = 20 # For custom font
90
- VIDEO_OVERLAY_FONT_SIZE = 30
91
- VIDEO_OVERLAY_FONT_COLOR = "white"
92
- # Standard font names ImageMagick (used by TextClip) is likely to find in Linux containers
93
- DEFAULT_MOVIEPY_FONT = "DejaVu-Sans-Bold"
94
- PREFERRED_MOVIEPY_FONT = "Liberation-Sans-Bold" # Often available
95
-
96
- def __init__(
97
- self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"
98
- ):
99
  self.output_dir = output_dir
100
  os.makedirs(self.output_dir, exist_ok=True)
101
 
102
- self.font_filename_pil = "DejaVuSans-Bold.ttf" # A more standard Linux font
103
  font_paths_to_try = [
104
- self.font_filename_pil, # If in working dir or PATH
105
- f"/usr/share/fonts/truetype/dejavu/{self.font_filename_pil}",
106
- f"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", # Alternative
107
- f"/System/Library/Fonts/Supplemental/Arial.ttf", # macOS fallback
108
- f"C:/Windows/Fonts/arial.ttf", # Windows fallback
109
- f"/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf", # User's previous custom path
110
  ]
111
- self.font_path_pil_resolved = next(
112
- (p for p in font_paths_to_try if os.path.exists(p)), None
113
- )
114
-
115
- self.font_pil = ImageFont.load_default() # Default
116
- self.current_font_size_pil = self.DEFAULT_FONT_SIZE_PIL
117
 
118
- if self.font_path_pil_resolved:
119
- try:
120
- self.font_pil = ImageFont.truetype(
121
- self.font_path_pil_resolved, self.PREFERRED_FONT_SIZE_PIL
122
- )
123
- self.current_font_size_pil = self.PREFERRED_FONT_SIZE_PIL
124
- logger.info(
125
- f"Pillow font loaded: {self.font_path_pil_resolved} at size {self.current_font_size_pil}."
126
- )
127
- # Determine MoviePy font based on loaded PIL font
128
- if "dejavu" in self.font_path_pil_resolved.lower():
129
- self.video_overlay_font = "DejaVu-Sans-Bold"
130
- elif "liberation" in self.font_path_pil_resolved.lower():
131
- self.video_overlay_font = "Liberation-Sans-Bold"
132
- else: # Fallback if custom font doesn't have an obvious ImageMagick name
133
- self.video_overlay_font = self.DEFAULT_MOVIEPY_FONT
134
- except IOError as e_font_load:
135
- logger.error(
136
- f"Pillow font loading IOError for '{self.font_path_pil_resolved}': {e_font_load}. Using default."
137
- )
138
- else:
139
- logger.warning("Custom Pillow font not found. Using default.")
140
 
141
  self.openai_api_key = None
142
  self.USE_AI_IMAGE_GENERATION = False
@@ -153,44 +111,24 @@ class VisualEngine:
153
  stability=0.60,
154
  similarity_boost=0.80,
155
  style=0.15,
156
- use_speaker_boost=True,
157
  )
158
  else:
159
  self.elevenlabs_voice_settings = None
160
 
161
  self.pexels_api_key = None
162
  self.USE_PEXELS = False
 
163
  self.runway_api_key = None
164
  self.USE_RUNWAYML = False
165
- self.runway_ml_client_instance = None # More specific name
166
-
167
- # Attempt to initialize Runway client if SDK is present and env var might be set
168
- if (
169
- RUNWAYML_SDK_IMPORTED
170
- and RunwayMLAPIClient
171
- and os.getenv("RUNWAYML_API_SECRET")
172
- ):
173
- try:
174
- self.runway_ml_client_instance = RunwayMLAPIClient() # SDK uses env var
175
- self.USE_RUNWAYML = True # Assume enabled if client initializes
176
- logger.info(
177
- "RunwayML Client initialized from RUNWAYML_API_SECRET env var at startup."
178
- )
179
- except Exception as e_runway_init_startup:
180
- logger.error(
181
- f"Initial RunwayML client init failed (env var RUNWAYML_API_SECRET might be invalid): {e_runway_init_startup}"
182
- )
183
- self.USE_RUNWAYML = False
184
 
185
  logger.info("VisualEngine initialized.")
186
 
187
- # --- API Key Setters ---
188
- def set_openai_api_key(self, api_key):
189
- self.openai_api_key = api_key
190
- self.USE_AI_IMAGE_GENERATION = bool(api_key)
191
- logger.info(
192
- f"DALL-E ({self.dalle_model}) status: {'Ready' if self.USE_AI_IMAGE_GENERATION else 'Disabled'}"
193
- )
194
 
195
  def set_elevenlabs_api_key(self, api_key, voice_id_from_secret=None):
196
  self.elevenlabs_api_key = api_key
@@ -201,423 +139,285 @@ class VisualEngine:
201
  self.elevenlabs_client = ElevenLabsAPIClient(api_key=api_key)
202
  self.USE_ELEVENLABS = bool(self.elevenlabs_client)
203
  logger.info(
204
- f"ElevenLabs Client status: {'Ready' if self.USE_ELEVENLABS else 'Failed Initialization'} (Using Voice ID: {self.elevenlabs_voice_id})"
 
205
  )
206
  except Exception as e:
207
- logger.error(
208
- f"ElevenLabs client initialization error: {e}. Service Disabled.",
209
- exc_info=True,
210
- )
211
  self.USE_ELEVENLABS = False
212
- self.elevenlabs_client = None
213
  else:
214
  self.USE_ELEVENLABS = False
215
- logger.info(
216
- f"ElevenLabs Service Disabled (API key not provided or SDK import issue)."
217
- )
218
-
219
- def set_pexels_api_key(self, api_key):
220
- self.pexels_api_key = api_key
221
- self.USE_PEXELS = bool(api_key)
222
- logger.info(
223
- f"Pexels Search status: {'Ready' if self.USE_PEXELS else 'Disabled'}"
224
- )
225
-
226
- def set_runway_api_key(self, api_key):
227
- self.runway_api_key = api_key # Store key regardless for potential direct HTTP use
228
- if api_key:
229
- if RUNWAYML_SDK_IMPORTED and RunwayMLAPIClient:
230
- if not self.runway_ml_client_instance: # If not already initialized by env var
231
- try:
232
- # The RunwayML Python SDK expects the API key via the RUNWAYML_API_SECRET env var.
233
- # If it's not set, we set it temporarily for client initialization.
234
- original_env_secret = os.getenv("RUNWAYML_API_SECRET")
235
- if not original_env_secret:
236
- logger.info(
237
- "Temporarily setting RUNWAYML_API_SECRET from provided key for SDK client init."
238
- )
239
- os.environ["RUNWAYML_API_SECRET"] = api_key
240
-
241
- self.runway_ml_client_instance = RunwayMLAPIClient()
242
- self.USE_RUNWAYML = True # SDK client successfully initialized
243
- logger.info(
244
- "RunwayML Client initialized successfully using provided API key."
245
- )
246
 
247
- if not original_env_secret: # Clean up if we set it
248
- del os.environ["RUNWAYML_API_SECRET"]
249
- logger.info(
250
- "Cleared temporary RUNWAYML_API_SECRET env var."
251
- )
252
 
253
- except Exception as e_client_init:
254
- logger.error(
255
- f"RunwayML Client initialization via set_runway_api_key failed: {e_client_init}",
256
- exc_info=True,
257
- )
258
- self.USE_RUNWAYML = False
259
- self.runway_ml_client_instance = None
260
- else: # Client was already initialized (likely via env var during __init__)
261
- self.USE_RUNWAYML = True
262
- logger.info(
263
- "RunwayML Client was already initialized (likely from env var). API key stored."
264
- )
265
- else: # SDK not imported
266
- logger.warning(
267
- "RunwayML SDK not imported. API key stored, but integration requires SDK. Service effectively disabled."
268
  )
269
  self.USE_RUNWAYML = False
270
- else: # No API key provided
 
 
 
271
  self.USE_RUNWAYML = False
272
- self.runway_ml_client_instance = None
273
- logger.info("RunwayML Service Disabled (no API key provided).")
274
-
275
- # --- Helper Methods ---
276
- def _image_to_data_uri(self, image_path):
277
- try:
278
- mime_type, _ = mimetypes.guess_type(image_path)
279
- if not mime_type:
280
- ext = os.path.splitext(image_path)[1].lower()
281
- mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg"}
282
- mime_type = mime_map.get(ext, "application/octet-stream")
283
- if mime_type == "application/octet-stream":
284
- logger.warning(
285
- f"Could not determine MIME type for {image_path}, using default."
286
- )
287
-
288
- with open(image_path, "rb") as image_file:
289
- encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
290
- data_uri = f"data:{mime_type};base64,{encoded_string}"
291
- logger.debug(
292
- f"Generated data URI for {os.path.basename(image_path)} (first 100 chars): {data_uri[:100]}..."
293
- )
294
- return data_uri
295
- except FileNotFoundError:
296
- logger.error(f"Image file not found at {image_path} for data URI conversion.")
297
- return None
298
- except Exception as e:
299
- logger.error(
300
- f"Error converting image {image_path} to data URI: {e}", exc_info=True
301
- )
302
- return None
303
 
304
- def _map_resolution_to_runway_ratio(self, width, height):
305
- ratio_str = f"{width}:{height}"
306
- # Gen-4 supports: "1280:720", "720:1280", "1104:832", "832:1104", "960:960", "1584:672"
307
- supported_ratios_gen4 = [
308
- "1280:720",
309
- "720:1280",
310
- "1104:832",
311
- "832:1104",
312
- "960:960",
313
- "1584:672",
314
- ]
315
- if ratio_str in supported_ratios_gen4:
316
- return ratio_str
317
- # Fallback or find closest - for now, strict matching or default
318
- logger.warning(
319
- f"Resolution {ratio_str} not directly in Gen-4 supported list. Defaulting to 1280:720."
320
- )
321
- return "1280:720"
322
-
323
- def _get_text_dimensions(self, text_content, font_object):
324
- # (Robust version from before)
325
- default_char_height = getattr(font_object, "size", self.current_font_size_pil)
326
  if not text_content:
327
- return 0, default_char_height
328
  try:
329
- if hasattr(font_object, "getbbox"):
330
- bbox = font_object.getbbox(text_content)
331
- w = bbox[2] - bbox[0]
332
- h = bbox[3] - bbox[1]
333
- return w, h if h > 0 else default_char_height
334
- elif hasattr(font_object, "getsize"):
335
- w, h = font_object.getsize(text_content)
336
- return w, h if h > 0 else default_char_height
337
  else:
338
- return (
339
- int(len(text_content) * default_char_height * 0.6),
340
- int(default_char_height * 1.2),
341
- )
342
  except Exception as e:
343
- logger.warning(f"Error in _get_text_dimensions: {e}")
344
- return (
345
- int(len(text_content) * self.current_font_size_pil * 0.6),
346
- int(self.current_font_size_pil * 1.2),
347
- )
348
 
349
  def _create_placeholder_image_content(self, text_description, filename, size=None):
350
- # (Corrected version from previous response)
351
  if size is None:
352
  size = self.video_frame_size
353
- img = Image.new("RGB", size, color=(20, 20, 40))
354
- d = ImageDraw.Draw(img)
 
355
  padding = 25
356
- max_w = size[0] - (2 * padding)
357
  lines = []
 
358
  if not text_description:
359
- text_description = "(Placeholder Image)"
360
- words = text_description.split()
361
- current_line_text = ""
362
- for word_idx, word in enumerate(words):
363
- prospective_addition = word + (" " if word_idx < len(words) - 1 else "")
364
- test_line_text = current_line_text + prospective_addition
365
- current_w, _ = self._get_text_dimensions(test_line_text, self.font_pil)
366
- if current_w == 0 and test_line_text.strip():
367
- current_w = len(test_line_text) * (self.current_font_size_pil * 0.6) # Estimate
368
 
369
- if current_w <= max_w:
370
- current_line_text = test_line_text
 
 
 
 
 
371
  else:
372
- if current_line_text.strip():
373
- lines.append(current_line_text.strip())
374
- current_line_text = prospective_addition # Start new line
375
- if current_line_text.strip():
376
- lines.append(current_line_text.strip())
 
 
 
 
 
 
 
 
 
 
 
377
 
378
  if not lines and text_description:
379
- avg_char_w, _ = self._get_text_dimensions("W", self.font_pil)
380
- avg_char_w = avg_char_w or (self.current_font_size_pil * 0.6)
381
- chars_per_line = int(max_w / avg_char_w) if avg_char_w > 0 else 20
382
- lines.append(
383
- text_description[:chars_per_line]
384
- + ("..." if len(text_description) > chars_per_line else "")
385
  )
 
386
  elif not lines:
387
- lines.append("(Placeholder Error)")
388
-
389
- _, single_line_h = self._get_text_dimensions("Ay", self.font_pil)
390
- single_line_h = single_line_h if single_line_h > 0 else self.current_font_size_pil + 2
391
- max_lines = (
392
- min(len(lines), (size[1] - (2 * padding)) // (single_line_h + 2))
393
- if single_line_h > 0
394
- else 1
 
 
 
 
 
 
 
395
  )
396
- max_lines = max(1, max_lines) # Ensure at least one line
397
-
398
- y_pos = padding + (size[1] - (2 * padding) - max_lines * (single_line_h + 2)) / 2.0
399
- for i in range(max_lines):
400
- line_text = lines[i]
401
- line_w, _ = self._get_text_dimensions(line_text, self.font_pil)
402
- if line_w == 0 and line_text.strip():
403
- line_w = len(line_text) * (self.current_font_size_pil * 0.6)
404
- x_pos = (size[0] - line_w) / 2.0
405
- try:
406
- d.text((x_pos, y_pos), line_text, font=self.font_pil, fill=(200, 200, 180))
407
- except Exception as e_draw:
408
- logger.error(f"Pillow d.text error: {e_draw} for '{line_text}'")
409
- y_pos += single_line_h + 2
410
- if i == 6 and max_lines > 7:
411
- try:
412
- d.text((x_pos, y_pos), "...", font=self.font_pil, fill=(200, 200, 180))
413
- except Exception as e_elip:
414
- logger.error(f"Pillow d.text ellipsis error: {e_elip}")
415
- break
416
 
417
  filepath = os.path.join(self.output_dir, filename)
418
  try:
419
  img.save(filepath)
420
  return filepath
421
- except Exception as e_save:
422
- logger.error(
423
- f"Saving placeholder image '{filepath}' error: {e_save}", exc_info=True
424
- )
425
  return None
426
 
427
  def _search_pexels_image(self, query, output_filename_base):
428
- # <<< THIS IS THE CORRECTED METHOD >>>
429
  if not self.USE_PEXELS or not self.pexels_api_key:
430
  return None
 
431
  headers = {"Authorization": self.pexels_api_key}
432
  params = {"query": query, "per_page": 1, "orientation": "landscape", "size": "large2x"}
433
- base_name_for_pexels, _ = os.path.splitext(output_filename_base)
434
- pexels_filename = base_name_for_pexels + f"_pexels_{random.randint(1000,9999)}.jpg"
435
  filepath = os.path.join(self.output_dir, pexels_filename)
 
436
  try:
437
- logger.info(f"Pexels: Searching for '{query}'")
438
  effective_query = " ".join(query.split()[:5])
439
  params["query"] = effective_query
440
  response = requests.get(
441
- "https://api.pexels.com/v1/search", headers=headers, params=params, timeout=20
 
 
 
442
  )
443
  response.raise_for_status()
444
  data = response.json()
445
  if data.get("photos") and len(data["photos"]) > 0:
446
  photo_details = data["photos"][0]
447
- photo_url = photo_details.get("src", {}).get("large2x")
448
- if not photo_url:
449
- logger.warning(
450
- f"Pexels: 'large2x' URL missing for '{effective_query}'. Details: {photo_details}"
451
- )
452
- return None
453
  image_response = requests.get(photo_url, timeout=60)
454
  image_response.raise_for_status()
455
- img_data_pil = Image.open(io.BytesIO(image_response.content))
456
- if img_data_pil.mode != "RGB":
457
- img_data_pil = img_data_pil.convert("RGB")
458
- img_data_pil.save(filepath)
459
- logger.info(f"Pexels: Image saved to {filepath}")
 
460
  return filepath
461
  else:
462
- logger.info(f"Pexels: No photos for '{effective_query}'.")
463
  return None
464
  except requests.exceptions.RequestException as e_req:
465
- logger.error(f"Pexels: RequestException for '{query}': {e_req}", exc_info=False)
466
- return None # Less verbose for network
 
467
  except Exception as e:
468
- logger.error(f"Pexels: General error for '{query}': {e}", exc_info=True)
469
- return None
470
 
471
- # --- RunwayML Video Generation (Gen-4 Aligned with SDK) ---
472
- def _generate_video_clip_with_runwayml(
473
- self,
474
- text_prompt_for_motion,
475
- input_image_path,
476
- scene_identifier_filename_base,
477
- target_duration_seconds=5,
478
- ):
479
- if not self.USE_RUNWAYML or not self.runway_ml_client_instance:
480
- logger.warning("RunwayML not enabled or client not initialized. Cannot generate video clip.")
481
- return None
482
- if not input_image_path or not os.path.exists(input_image_path):
483
- logger.error(
484
- f"Runway Gen-4 requires an input image. Path not provided or invalid: {input_image_path}"
485
- )
486
  return None
487
-
488
- image_data_uri = self._image_to_data_uri(input_image_path)
489
- if not image_data_uri:
490
  return None
491
 
492
- runway_duration = 10 if target_duration_seconds >= 8 else 5 # Map to 5s or 10s for Gen-4
493
- runway_ratio_str = self._map_resolution_to_runway_ratio(
494
- self.video_frame_size[0], self.video_frame_size[1]
495
- )
496
-
497
- # Use a more descriptive output filename for Runway videos
498
- base_name_for_runway, _ = os.path.splitext(scene_identifier_filename_base)
499
- output_video_filename = base_name_for_runway + f"_runway_gen4_d{runway_duration}s.mp4"
500
- output_video_filepath = os.path.join(self.output_dir, output_video_filename)
501
-
502
  logger.info(
503
- f"Initiating Runway Gen-4 task: motion='{text_prompt_for_motion[:100]}...', image='{os.path.basename(input_image_path)}', dur={runway_duration}s, ratio='{runway_ratio_str}'"
 
504
  )
505
- try:
506
- # Using the RunwayML Python SDK structure
507
- task_submission = self.runway_ml_client_instance.image_to_video.create(
508
- model="gen4_turbo",
509
- prompt_image=image_data_uri,
510
- prompt_text=text_prompt_for_motion, # This is the motion prompt
511
- duration=runway_duration,
512
- ratio=runway_ratio_str,
513
- # seed=random.randint(0, 4294967295), # Optional: for reproducibility
514
- # Other Gen-4 params (motion_score, upscale, watermark etc. can be added here if available in SDK)
515
- )
516
- task_id = task_submission.id
517
- logger.info(f"Runway Gen-4 task created with ID: {task_id}. Polling for completion...")
518
-
519
- poll_interval_seconds = 10
520
- max_polling_duration_seconds = 6 * 60 # 6 minutes
521
- start_time = time.time()
522
-
523
- while time.time() - start_time < max_polling_duration_seconds:
524
- time.sleep(poll_interval_seconds)
525
- task_details = self.runway_ml_client_instance.tasks.retrieve(id=task_id)
526
- logger.info(f"Runway task {task_id} status: {task_details.status}")
527
-
528
- if task_details.status == "SUCCEEDED":
529
- # Determine output URL (this structure might vary based on SDK version)
530
- output_url = None
531
- if hasattr(task_details, "output") and task_details.output and hasattr(
532
- task_details.output, "url"
533
- ):
534
- output_url = task_details.output.url
535
- elif (
536
- hasattr(task_details, "artifacts")
537
- and task_details.artifacts
538
- and isinstance(task_details.artifacts, list)
539
- and len(task_details.artifacts) > 0
540
- ):
541
- first_artifact = task_details.artifacts[0]
542
- if hasattr(first_artifact, "url"):
543
- output_url = first_artifact.url
544
- elif hasattr(first_artifact, "download_url"):
545
- output_url = first_artifact.download_url
546
 
547
- if not output_url:
548
- logger.error(
549
- f"Runway task {task_id} SUCCEEDED, but no output URL found. Details: {vars(task_details) if hasattr(task_details,'__dict__') else str(task_details)}"
550
- )
551
- return None
552
-
553
- logger.info(f"Runway task {task_id} SUCCEEDED. Downloading video from: {output_url}")
554
- video_response = requests.get(output_url, stream=True, timeout=300)
555
- video_response.raise_for_status()
556
- with open(output_video_filepath, "wb") as f:
557
- for chunk in video_response.iter_content(chunk_size=8192):
558
- f.write(chunk)
559
- logger.info(
560
- f"Runway Gen-4 video successfully downloaded to: {output_video_filepath}"
561
- )
562
- return output_video_filepath
563
-
564
- elif task_details.status in ["FAILED", "ABORTED", "ERROR"]: # Added ERROR
565
- error_msg = (
566
- getattr(task_details, "error_message", None)
567
- or getattr(getattr(task_details, "output", None), "error", "Unknown error from Runway task.")
568
- )
569
- logger.error(
570
- f"Runway task {task_id} final status: {task_details.status}. Error: {error_msg}"
571
- )
572
- return None
573
-
574
- logger.warning(
575
- f"Runway task {task_id} timed out polling after {max_polling_duration_seconds} seconds."
576
- )
577
- return None
578
-
579
- except AttributeError as ae: # If SDK methods are not as expected
580
- logger.error(
581
- f"AttributeError with RunwayML SDK: {ae}. Ensure SDK is up to date and methods/attributes match documentation.",
582
- exc_info=True,
583
  )
584
- return None
585
- except Exception as e_runway_call:
586
- logger.error(
587
- f"General error during Runway Gen-4 API call or processing: {e_runway_call}",
588
- exc_info=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
589
  )
 
 
 
 
590
  return None
591
-
592
- def _create_placeholder_video_content(self, text_description, filename, duration=4, size=None):
593
- # (Keeping as before)
594
- if size is None:
595
- size = self.video_frame_size
596
- fp = os.path.join(self.output_dir, filename)
 
 
 
 
 
 
597
  tc = None
598
  try:
599
  tc = TextClip(
600
- text_description,
601
  fontsize=50,
602
- color="white",
603
  font=self.video_overlay_font,
604
- bg_color="black",
605
- size=size,
606
- method="caption",
607
- ).set_duration(duration)
608
  tc.write_videofile(
609
- fp, fps=24, codec="libx264", preset="ultrafast", logger=None, threads=2
 
 
 
 
 
610
  )
611
  logger.info(f"Generic placeholder video: {fp}")
612
  return fp
613
  except Exception as e:
614
- logger.error(f"Generic placeholder video error {fp}: {e}", exc_info=True)
615
  return None
616
  finally:
617
- if tc and hasattr(tc, "close"):
618
  tc.close()
619
 
620
- # --- generate_scene_asset (Main asset generation logic using Runway Gen-4 workflow) ---
621
  def generate_scene_asset(
622
  self,
623
  image_generation_prompt_text,
@@ -625,202 +425,210 @@ class VisualEngine:
625
  scene_data,
626
  scene_identifier_filename_base,
627
  generate_as_video_clip=False,
628
- runway_target_duration=5,
629
  ):
630
- # (Logic updated for improved DALL·E and RunwayML fallback)
631
- base_name, _ = os.path.splitext(scene_identifier_filename_base)
632
  asset_info = {
633
- "path": None,
634
- "type": "none",
635
- "error": True,
636
- "prompt_used": image_generation_prompt_text,
637
- "error_message": "Asset generation init failed",
638
  }
639
  input_image_for_runway_path = None
640
- # Use a distinct name for the base image if it's only an intermediate step for video
641
- base_image_filename = base_name + ("_base_for_video.png" if generate_as_video_clip else ".png")
642
- base_image_filepath = os.path.join(self.output_dir, base_image_filename)
 
 
 
643
 
644
- # STEP 1: Generate/acquire the base image via DALL·E
645
  if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
646
- try:
647
- logger.info(f"Calling DALL·E with prompt: {image_generation_prompt_text[:70]}...")
648
- response = openai.Image.create(
649
- prompt=image_generation_prompt_text,
650
- n=1,
651
- size=self.image_size_dalle3,
652
- model=self.dalle_model,
653
- )
654
- image_url = response["data"][0]["url"]
655
- ir = requests.get(image_url, timeout=120)
656
- ir.raise_for_status()
657
- id_img = Image.open(io.BytesIO(ir.content))
658
- if id_img.mode != "RGB":
659
- id_img = id_img.convert("RGB")
660
- id_img.save(base_image_filepath)
661
- logger.info(f"DALL·E base image saved: {base_image_filepath}")
662
- input_image_for_runway_path = base_image_filepath
663
- asset_info = {
664
- "path": base_image_filepath,
665
- "type": "image",
666
- "error": False,
667
- "prompt_used": image_generation_prompt_text,
668
- }
669
- except openai.error.OpenAIError as e:
670
- logger.warning(f"DALL·E error: {e}. Falling back to Pexels or placeholder.")
671
- asset_info["error_message"] = str(e)
672
- except Exception as e:
673
- logger.error(f"Unexpected DALL·E error: {e}", exc_info=True)
674
- asset_info["error_message"] = str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
675
 
676
- # STEP 2: If DALL·E failed, try Pexels
677
- if asset_info["error"] and self.USE_PEXELS:
678
- logger.info("Attempting Pexels fallback for base image.")
679
  pqt = scene_data.get(
680
- "pexels_search_query_감독", f"{scene_data.get('emotional_beat','')} {scene_data.get('setting_description','')}"
 
681
  )
682
- pp = self._search_pexels_image(pqt, base_image_filename)
683
  if pp:
684
  input_image_for_runway_path = pp
685
- asset_info = {
686
- "path": pp,
687
- "type": "image",
688
- "error": False,
689
- "prompt_used": f"Pexels:{pqt}",
690
  }
691
  else:
692
- current_em = asset_info.get("error_message", "")
693
- asset_info["error_message"] = (current_em + " Pexels fallback failed.").strip()
694
 
695
- # STEP 3: If both DALL·E and Pexels failed, create placeholder
696
- if asset_info["error"]:
697
- logger.warning("Both DALL·E and Pexels failed. Creating placeholder image.")
698
- ppt = asset_info.get("prompt_used", image_generation_prompt_text)
699
  php = self._create_placeholder_image_content(
700
- f"[Placeholder for] {ppt[:70]}...", base_image_filename
 
701
  )
702
  if php:
703
  input_image_for_runway_path = php
704
- asset_info = {
705
- "path": php,
706
- "type": "image",
707
- "error": False,
708
- "prompt_used": ppt,
709
  }
710
  else:
711
- current_em = asset_info.get("error_message", "")
712
- asset_info["error_message"] = (current_em + " Placeholder creation failed.").strip()
713
 
714
- # STEP 4: If a video clip is requested, attempt RunwayML
715
  if generate_as_video_clip:
716
- if not input_image_for_runway_path or not os.path.exists(input_image_for_runway_path):
717
- logger.error("No valid base image for RunwayML. Skipping video generation.")
718
- asset_info["error"] = True
719
- asset_info["error_message"] = (asset_info.get("error_message", "") + " No base image.").strip()
720
- asset_info["type"] = "none"
721
- return asset_info
722
-
723
- if self.USE_RUNWAYML and self.runway_ml_client_instance:
724
  video_path = self._generate_video_clip_with_runwayml(
725
  motion_prompt_text_for_video,
726
  input_image_for_runway_path,
727
  base_name,
728
- runway_target_duration,
729
  )
730
  if video_path and os.path.exists(video_path):
731
- asset_info = {
732
- "path": video_path,
733
- "type": "video",
734
- "error": False,
735
- "prompt_used": motion_prompt_text_for_video,
736
- "base_image_path": input_image_for_runway_path,
737
  }
738
  else:
739
- logger.warning("RunwayML video generation failed. Returning base image instead.")
740
- asset_info = {
741
- "path": input_image_for_runway_path,
742
- "type": "image",
743
- "error": True,
744
- "prompt_used": image_generation_prompt_text,
745
- "error_message": (asset_info.get("error_message", "") + " RunwayML failed.").strip(),
746
- }
 
 
747
  else:
748
- logger.warning("RunwayML not enabled or client not initialized. Skipping video generation.")
749
- asset_info = {
750
- "path": input_image_for_runway_path,
751
- "type": "image",
752
- "error": True,
753
- "prompt_used": image_generation_prompt_text,
754
- "error_message": (asset_info.get("error_message", "") + " RunwayML disabled.").strip(),
755
- }
756
-
757
- return asset_info
758
 
759
- def generate_narration_audio(self, text_to_narrate, output_filename="narration_overall.mp3"):
760
- # (Keep as before - robust enough)
761
- if not self.USE_ELEVENLABS or not self.elevenlabs_client or not text_to_narrate:
762
- logger.info("ElevenLabs audio skipped.")
763
  return None
764
 
765
- afp = os.path.join(self.output_dir, output_filename)
766
  try:
767
- logger.info(f"ElevenLabs audio (Voice:{self.elevenlabs_voice_id}): {text_to_narrate[:70]}...")
768
  asm = None
769
-
770
- if hasattr(self.elevenlabs_client, "text_to_speech") and hasattr(
771
- self.elevenlabs_client.text_to_speech, "stream"
772
  ):
773
  asm = self.elevenlabs_client.text_to_speech.stream
774
- logger.info("Using ElevenLabs .text_to_speech.stream()")
775
- elif hasattr(self.elevenlabs_client, "generate_stream"):
776
  asm = self.elevenlabs_client.generate_stream
777
- logger.info("Using ElevenLabs .generate_stream()")
778
- elif hasattr(self.elevenlabs_client, "generate"):
779
- logger.info("Using ElevenLabs .generate()")
780
  vp = (
781
  Voice(voice_id=str(self.elevenlabs_voice_id), settings=self.elevenlabs_voice_settings)
782
- if Voice and self.elevenlabs_voice_settings
783
- else str(self.elevenlabs_voice_id)
784
- )
785
- ab = self.elevenlabs_client.generate(
786
- text=text_to_narrate, voice=vp, model="eleven_multilingual_v2"
787
  )
 
788
  with open(afp, "wb") as f:
789
  f.write(ab)
790
- logger.info(f"ElevenLabs audio (non-stream) saved: {afp}")
791
  return afp
792
  else:
793
- logger.error("No ElevenLabs audio method available.")
794
  return None
795
 
796
- # If we have a streaming method (asm), use it
797
- if asm:
798
- vps = {"voice_id": str(self.elevenlabs_voice_id)}
799
- if self.elevenlabs_voice_settings:
800
- if hasattr(self.elevenlabs_voice_settings, "model_dump"):
801
- vps["voice_settings"] = self.elevenlabs_voice_settings.model_dump()
802
- elif hasattr(self.elevenlabs_voice_settings, "dict"):
803
- vps["voice_settings"] = self.elevenlabs_voice_settings.dict()
804
- else:
805
- vps["voice_settings"] = self.elevenlabs_voice_settings
806
-
807
- adi = asm(text=text_to_narrate, model_id="eleven_multilingual_v2", **vps)
808
- with open(afp, "wb") as f:
809
- for c in adi:
810
- if c:
811
- f.write(c)
812
- logger.info(f"ElevenLabs audio (stream) saved: {afp}")
813
- return afp
814
-
815
  except Exception as e:
816
- logger.error(f"ElevenLabs audio error: {e}", exc_info=True)
817
  return None
818
 
819
- # --- assemble_animatic_from_assets (Still contains crucial debug saves for blank video issue) ---
820
  def assemble_animatic_from_assets(
821
- self, asset_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24
 
 
 
 
822
  ):
823
- # (Keep the version with robust image processing, C-contiguous arrays, debug saves, and pix_fmt)
824
  if not asset_data_list:
825
  logger.warning("No assets for animatic.")
826
  return None
@@ -831,12 +639,14 @@ class VisualEngine:
831
  logger.info(f"Assembling from {len(asset_data_list)} assets. Frame: {self.video_frame_size}.")
832
 
833
  for i, asset_info in enumerate(asset_data_list):
834
- asset_path = asset_info.get("path")
835
- asset_type = asset_info.get("type")
836
- scene_dur = asset_info.get("duration", 4.5)
837
- scene_num = asset_info.get("scene_num", i + 1)
838
- key_action = asset_info.get("key_action", "")
839
- logger.info(f"S{scene_num}: Path='{asset_path}', Type='{asset_type}', Dur='{scene_dur}'s")
 
 
840
 
841
  if not (asset_path and os.path.exists(asset_path)):
842
  logger.warning(f"S{scene_num}: Not found '{asset_path}'. Skip.")
@@ -847,59 +657,61 @@ class VisualEngine:
847
 
848
  current_scene_mvpy_clip = None
849
  try:
850
- if asset_type == "image":
851
  pil_img = Image.open(asset_path)
852
  logger.debug(f"S{scene_num}: Loaded img. Mode:{pil_img.mode}, Size:{pil_img.size}")
853
- img_rgba = pil_img.convert("RGBA") if pil_img.mode != "RGBA" else pil_img.copy()
854
  thumb = img_rgba.copy()
855
- rf = Image.Resampling.LANCZOS if hasattr(Image.Resampling, "LANCZOS") else Image.BILINEAR
856
  thumb.thumbnail(self.video_frame_size, rf)
857
- cv_rgba = Image.new("RGBA", self.video_frame_size, (0, 0, 0, 0))
858
- xo, yo = (
859
- (self.video_frame_size[0] - thumb.width) // 2,
860
- (self.video_frame_size[1] - thumb.height) // 2,
861
- )
862
  cv_rgba.paste(thumb, (xo, yo), thumb)
863
  final_rgb_pil = Image.new("RGB", self.video_frame_size, (0, 0, 0))
864
  final_rgb_pil.paste(cv_rgba, mask=cv_rgba.split()[3])
865
- dbg_path = os.path.join(self.output_dir, f"debug_PRE_NUMPY_S{scene_num}.png")
 
 
866
  final_rgb_pil.save(dbg_path)
867
  logger.info(f"DEBUG: Saved PRE_NUMPY_S{scene_num} to {dbg_path}")
868
  frame_np = np.array(final_rgb_pil, dtype=np.uint8)
869
- if not frame_np.flags["C_CONTIGUOUS"]:
870
  frame_np = np.ascontiguousarray(frame_np, dtype=np.uint8)
871
  logger.debug(
872
- f"S{scene_num}: NumPy for MoviePy. Shape:{frame_np.shape}, DType:{frame_np.dtype}, C-Contig:{frame_np.flags['C_CONTIGUOUS']}"
 
 
873
  )
874
  if frame_np.size == 0 or frame_np.ndim != 3 or frame_np.shape[2] != 3:
875
  logger.error(f"S{scene_num}: Invalid NumPy. Skip.")
876
  continue
877
  clip_base = ImageClip(frame_np, transparent=False).set_duration(scene_dur)
878
- mvpy_dbg_path = os.path.join(self.output_dir, f"debug_MOVIEPY_FRAME_S{scene_num}.png")
 
 
879
  clip_base.save_frame(mvpy_dbg_path, t=0.1)
880
  logger.info(f"DEBUG: Saved MOVIEPY_FRAME_S{scene_num} to {mvpy_dbg_path}")
881
  clip_fx = clip_base
882
  try:
883
  es = random.uniform(1.03, 1.08)
884
  clip_fx = clip_base.fx(
885
- vfx.resize, lambda t: 1 + (es - 1) * (t / scene_dur) if scene_dur > 0 else 1
886
- ).set_position("center")
 
887
  except Exception as e:
888
  logger.error(f"S{scene_num} Ken Burns error: {e}", exc_info=False)
889
  current_scene_mvpy_clip = clip_fx
890
 
891
- elif asset_type == "video":
892
  src_clip = None
893
  try:
894
  src_clip = VideoFileClip(
895
  asset_path,
896
  target_resolution=(
897
- self.video_frame_size[1],
898
- self.video_frame_size[0],
899
- )
900
- if self.video_frame_size
901
- else None,
902
- audio=False,
903
  )
904
  tmp_clip = src_clip
905
  if src_clip.duration != scene_dur:
@@ -911,16 +723,23 @@ class VisualEngine:
911
  else:
912
  tmp_clip = src_clip.set_duration(src_clip.duration)
913
  logger.info(
914
- f"S{scene_num} Video clip ({src_clip.duration:.2f}s) shorter than target ({scene_dur:.2f}s)."
 
915
  )
916
  current_scene_mvpy_clip = tmp_clip.set_duration(scene_dur)
917
  if current_scene_mvpy_clip.size != list(self.video_frame_size):
918
  current_scene_mvpy_clip = current_scene_mvpy_clip.resize(self.video_frame_size)
919
  except Exception as e:
920
- logger.error(f"S{scene_num} Video load error '{asset_path}':{e}", exc_info=True)
 
 
 
921
  continue
922
  finally:
923
- if src_clip and src_clip is not current_scene_mvpy_clip and hasattr(src_clip, "close"):
 
 
 
924
  src_clip.close()
925
  else:
926
  logger.warning(f"S{scene_num} Unknown asset type '{asset_type}'. Skip.")
@@ -929,32 +748,32 @@ class VisualEngine:
929
  if current_scene_mvpy_clip and key_action:
930
  try:
931
  to_dur = (
932
- min(current_scene_mvpy_clip.duration - 0.5, current_scene_mvpy_clip.duration * 0.8)
933
- if current_scene_mvpy_clip.duration > 0.5
934
- else current_scene_mvpy_clip.duration
 
935
  )
936
  to_start = 0.25
937
- if to_dur > 0:
938
- txt_c = TextClip(
939
- f"Scene {scene_num}\n{key_action}",
940
- fontsize=self.VIDEO_OVERLAY_FONT_SIZE,
941
- color=self.VIDEO_OVERLAY_FONT_COLOR,
942
- font=self.video_overlay_font,
943
- bg_color="rgba(10,10,20,0.7)",
944
- method="caption",
945
- align="West",
946
- size=(self.video_frame_size[0] * 0.9, None),
947
- kerning=-1,
948
- stroke_color="black",
949
- stroke_width=1.5,
950
- ).set_duration(to_dur).set_start(to_start).set_position(
951
- ("center", 0.92), relative=True
952
- )
953
- current_scene_mvpy_clip = CompositeVideoClip(
954
- [current_scene_mvpy_clip, txt_c], size=self.video_frame_size, use_bgclip=True
955
- )
956
- else:
957
- logger.warning(f"S{scene_num}: Text overlay duration is zero. Skip text.")
958
  except Exception as e:
959
  logger.error(f"S{scene_num} TextClip error:{e}. No text.", exc_info=True)
960
 
@@ -964,7 +783,7 @@ class VisualEngine:
964
  except Exception as e:
965
  logger.error(f"MAJOR Error S{scene_num} ({asset_path}):{e}", exc_info=True)
966
  finally:
967
- if current_scene_mvpy_clip and hasattr(current_scene_mvpy_clip, "close"):
968
  try:
969
  current_scene_mvpy_clip.close()
970
  except:
@@ -978,21 +797,32 @@ class VisualEngine:
978
  try:
979
  logger.info(f"Concatenating {len(processed_clips)} clips.")
980
  if len(processed_clips) > 1:
981
- final_clip = concatenate_videoclips(processed_clips, padding=-td if td > 0 else 0, method="compose")
 
 
 
 
982
  elif processed_clips:
983
  final_clip = processed_clips[0]
 
984
  if not final_clip:
985
  logger.error("Concatenation failed.")
986
  return None
987
-
988
  logger.info(f"Concatenated dur:{final_clip.duration:.2f}s")
 
989
  if td > 0 and final_clip.duration > 0:
990
  if final_clip.duration > td * 2:
991
  final_clip = final_clip.fx(vfx.fadein, td).fx(vfx.fadeout, td)
992
  else:
993
- final_clip = final_clip.fx(vfx.fadein, min(td, final_clip.duration / 2.0))
 
 
994
 
995
- if overall_narration_path and os.path.exists(overall_narration_path) and final_clip.duration > 0:
 
 
 
 
996
  try:
997
  narration_clip = AudioFileClip(overall_narration_path)
998
  final_clip = final_clip.set_audio(narration_clip)
@@ -1008,15 +838,18 @@ class VisualEngine:
1008
  final_clip.write_videofile(
1009
  op,
1010
  fps=fps,
1011
- codec="libx264",
1012
- preset="medium",
1013
- audio_codec="aac",
1014
- temp_audiofile=os.path.join(self.output_dir, f"temp-audio-{os.urandom(4).hex()}.m4a"),
 
 
 
1015
  remove_temp=True,
1016
  threads=os.cpu_count() or 2,
1017
- logger="bar",
1018
  bitrate="5000k",
1019
- ffmpeg_params=["-pix_fmt", "yuv420p"],
1020
  )
1021
  logger.info(f"Video created:{op}")
1022
  return op
@@ -1027,13 +860,17 @@ class VisualEngine:
1027
  logger.error(f"Video write error:{e}", exc_info=True)
1028
  return None
1029
  finally:
1030
- logger.debug("Closing all MoviePy clips in `assemble_animatic_from_assets` finally block.")
1031
- all_clips_to_close = processed_clips + ([narration_clip] if narration_clip else []) + ([final_clip] if final_clip else [])
1032
- for clip_obj_to_close in all_clips_to_close:
1033
- if clip_obj_to_close and hasattr(clip_obj_to_close, "close"):
 
 
 
 
 
 
1034
  try:
1035
- clip_obj_to_close.close()
1036
  except Exception as e_close:
1037
- logger.warning(
1038
- f"Ignoring error while closing a clip: {type(clip_obj_to_close).__name__} - {e_close}"
1039
- )
 
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont, ImageOps
 
 
 
 
 
 
 
 
 
 
3
 
4
+ # --- MONKEY PATCH FOR Image.ANTIALIAS ---
5
+ try:
6
+ if hasattr(Image, 'Resampling') and hasattr(Image.Resampling, 'LANCZOS'): # Pillow 9+
7
+ if not hasattr(Image, 'ANTIALIAS'):
8
+ Image.ANTIALIAS = Image.Resampling.LANCZOS
9
+ elif hasattr(Image, 'LANCZOS'): # Pillow 8
10
+ if not hasattr(Image, 'ANTIALIAS'):
11
+ Image.ANTIALIAS = Image.LANCZOS
12
+ elif not hasattr(Image, 'ANTIALIAS'):
13
+ print("WARNING: Pillow version lacks common Resampling attributes or ANTIALIAS. Video effects might fail.")
14
+ except Exception as e_mp:
15
+ print(f"WARNING: ANTIALIAS monkey-patch error: {e_mp}")
16
+ # --- END MONKEY PATCH ---
17
+
18
  from moviepy.editor import (
19
  ImageClip,
20
  VideoFileClip,
21
  concatenate_videoclips,
22
  TextClip,
23
  CompositeVideoClip,
24
+ AudioFileClip
25
  )
26
  import moviepy.video.fx.all as vfx
27
+ import numpy as np
28
+ import os
29
+ import openai
30
+ import requests
31
+ import io
32
+ import time
33
+ import random
34
+ import logging
 
 
 
 
 
 
 
 
 
35
 
36
  logger = logging.getLogger(__name__)
37
+ logger.setLevel(logging.INFO)
 
38
 
39
+ # --- ElevenLabs Client Import ---
40
  ELEVENLABS_CLIENT_IMPORTED = False
41
  ElevenLabsAPIClient = None
42
  Voice = None
 
44
  try:
45
  from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
46
  from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
 
47
  ElevenLabsAPIClient = ImportedElevenLabsClient
48
  Voice = ImportedVoice
49
  VoiceSettings = ImportedVoiceSettings
50
  ELEVENLABS_CLIENT_IMPORTED = True
51
+ logger.info("ElevenLabs client components imported.")
52
+ except Exception as e_eleven:
53
+ logger.warning(f"ElevenLabs client import failed: {e_eleven}. Audio disabled.")
 
 
 
 
 
 
54
 
55
+ # --- RunwayML Client Import (Placeholder) ---
56
  RUNWAYML_SDK_IMPORTED = False
57
+ RunwayMLClient = None
58
  try:
59
+ logger.info("RunwayML SDK import is a placeholder.")
 
 
 
 
60
  except ImportError:
61
+ logger.warning("RunwayML SDK (placeholder) not found. RunwayML disabled.")
62
+ except Exception as e_runway_sdk:
63
+ logger.warning(f"Error importing RunwayML SDK (placeholder): {e_runway_sdk}. RunwayML disabled.")
 
 
 
 
64
 
65
 
66
  class VisualEngine:
67
+ def __init__(self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"):
 
 
 
 
 
 
 
 
 
 
68
  self.output_dir = output_dir
69
  os.makedirs(self.output_dir, exist_ok=True)
70
 
71
+ self.font_filename = "DejaVuSans-Bold.ttf"
72
  font_paths_to_try = [
73
+ self.font_filename,
74
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
75
+ "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
76
+ "/System/Library/Fonts/Supplemental/Arial.ttf",
77
+ "C:/Windows/Fonts/arial.ttf",
78
+ "/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf"
79
  ]
80
+ self.font_path_pil = next((p for p in font_paths_to_try if os.path.exists(p)), None)
81
+ self.font_size_pil = 20
82
+ self.video_overlay_font_size = 30
83
+ self.video_overlay_font_color = 'white'
84
+ self.video_overlay_font = 'DejaVu-Sans-Bold'
 
85
 
86
+ try:
87
+ if self.font_path_pil:
88
+ self.font = ImageFont.truetype(self.font_path_pil, self.font_size_pil)
89
+ logger.info(f"Pillow font loaded: {self.font_path_pil}.")
90
+ else:
91
+ self.font = ImageFont.load_default()
92
+ logger.warning("Using default Pillow font.")
93
+ self.font_size_pil = 10
94
+ except IOError as e_font:
95
+ logger.error(f"Pillow font loading IOError: {e_font}. Using default.")
96
+ self.font = ImageFont.load_default()
97
+ self.font_size_pil = 10
 
 
 
 
 
 
 
 
 
 
98
 
99
  self.openai_api_key = None
100
  self.USE_AI_IMAGE_GENERATION = False
 
111
  stability=0.60,
112
  similarity_boost=0.80,
113
  style=0.15,
114
+ use_speaker_boost=True
115
  )
116
  else:
117
  self.elevenlabs_voice_settings = None
118
 
119
  self.pexels_api_key = None
120
  self.USE_PEXELS = False
121
+
122
  self.runway_api_key = None
123
  self.USE_RUNWAYML = False
124
+ self.runway_client = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  logger.info("VisualEngine initialized.")
127
 
128
+ def set_openai_api_key(self, k):
129
+ self.openai_api_key = k
130
+ self.USE_AI_IMAGE_GENERATION = bool(k)
131
+ logger.info(f"DALL-E ({self.dalle_model}) {'Ready.' if k else 'Disabled.'}")
 
 
 
132
 
133
  def set_elevenlabs_api_key(self, api_key, voice_id_from_secret=None):
134
  self.elevenlabs_api_key = api_key
 
139
  self.elevenlabs_client = ElevenLabsAPIClient(api_key=api_key)
140
  self.USE_ELEVENLABS = bool(self.elevenlabs_client)
141
  logger.info(
142
+ f"ElevenLabs Client {'Ready' if self.USE_ELEVENLABS else 'Failed Init'} "
143
+ f"(Voice ID: {self.elevenlabs_voice_id})."
144
  )
145
  except Exception as e:
146
+ logger.error(f"ElevenLabs client init error: {e}. Disabled.", exc_info=True)
 
 
 
147
  self.USE_ELEVENLABS = False
 
148
  else:
149
  self.USE_ELEVENLABS = False
150
+ logger.info("ElevenLabs Disabled (no key or SDK).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
+ def set_pexels_api_key(self, k):
153
+ self.pexels_api_key = k
154
+ self.USE_PEXELS = bool(k)
155
+ logger.info(f"Pexels Search {'Ready.' if k else 'Disabled.'}")
 
156
 
157
+ def set_runway_api_key(self, k):
158
+ self.runway_api_key = k
159
+ if k and RUNWAYML_SDK_IMPORTED and RunwayMLClient:
160
+ try:
161
+ self.USE_RUNWAYML = True
162
+ logger.info(
163
+ f"RunwayML Client (Placeholder SDK) {'Ready.' if self.USE_RUNWAYML else 'Failed Init.'}"
164
+ )
165
+ except Exception as e:
166
+ logger.error(
167
+ f"RunwayML client (Placeholder SDK) init error: {e}. Disabled.",
168
+ exc_info=True
 
 
 
169
  )
170
  self.USE_RUNWAYML = False
171
+ elif k:
172
+ self.USE_RUNWAYML = True
173
+ logger.info("RunwayML API Key set (direct API or placeholder).")
174
+ else:
175
  self.USE_RUNWAYML = False
176
+ logger.info("RunwayML Disabled (no API key).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
+ def _get_text_dimensions(self, text_content, font_obj):
179
+ default_line_height = getattr(font_obj, 'size', self.font_size_pil)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  if not text_content:
181
+ return 0, default_line_height
182
  try:
183
+ if hasattr(font_obj, 'getbbox'):
184
+ bbox = font_obj.getbbox(text_content)
185
+ width = bbox[2] - bbox[0]
186
+ height = bbox[3] - bbox[1]
187
+ return width, height if height > 0 else default_line_height
188
+ elif hasattr(font_obj, 'getsize'):
189
+ width, height = font_obj.getsize(text_content)
190
+ return width, height if height > 0 else default_line_height
191
  else:
192
+ return int(len(text_content) * default_line_height * 0.6), int(default_line_height * 1.2)
 
 
 
193
  except Exception as e:
194
+ logger.warning(f"Error in _get_text_dimensions for '{text_content[:20]}...': {e}")
195
+ return int(len(text_content) * self.font_size_pil * 0.6), int(self.font_size_pil * 1.2)
 
 
 
196
 
197
  def _create_placeholder_image_content(self, text_description, filename, size=None):
 
198
  if size is None:
199
  size = self.video_frame_size
200
+
201
+ img = Image.new('RGB', size, color=(20, 20, 40))
202
+ draw = ImageDraw.Draw(img)
203
  padding = 25
204
+ max_text_width = size[0] - (2 * padding)
205
  lines = []
206
+
207
  if not text_description:
208
+ text_description = "(Placeholder: No text description provided)"
 
 
 
 
 
 
 
 
209
 
210
+ words = text_description.split()
211
+ current_line = ""
212
+ for word in words:
213
+ test_line = current_line + word + " "
214
+ line_width_test, _ = self._get_text_dimensions(test_line.strip(), self.font)
215
+ if line_width_test <= max_text_width:
216
+ current_line = test_line
217
  else:
218
+ if current_line.strip():
219
+ lines.append(current_line.strip())
220
+ word_width, _ = self._get_text_dimensions(word, self.font)
221
+ if word_width > max_text_width:
222
+ avg_char_w = self._get_text_dimensions("A", self.font)[0] or 10
223
+ chars_that_fit = int(max_text_width / avg_char_w)
224
+ truncated = (
225
+ word[:chars_that_fit-3] + "..."
226
+ if len(word) > chars_that_fit else word
227
+ )
228
+ lines.append(truncated)
229
+ current_line = ""
230
+ else:
231
+ current_line = word + " "
232
+ if current_line.strip():
233
+ lines.append(current_line.strip())
234
 
235
  if not lines and text_description:
236
+ avg_char_w = self._get_text_dimensions("A", self.font)[0] or 10
237
+ chars_that_fit = int(max_text_width / avg_char_w)
238
+ truncated = (
239
+ text_description[:chars_that_fit-3] + "..."
240
+ if len(text_description) > chars_that_fit else text_description
 
241
  )
242
+ lines.append(truncated)
243
  elif not lines:
244
+ lines.append("(Placeholder Text Error)")
245
+
246
+ _, single_line_height = self._get_text_dimensions("Ay", self.font)
247
+ single_line_height = single_line_height if single_line_height > 0 else (self.font_size_pil + 2)
248
+ line_spacing = 2
249
+ max_lines_to_display = min(
250
+ len(lines),
251
+ (size[1] - (2 * padding)) // (single_line_height + line_spacing)
252
+ ) if single_line_height > 0 else 1
253
+ if max_lines_to_display <= 0:
254
+ max_lines_to_display = 1
255
+
256
+ total_text_block_height = (
257
+ max_lines_to_display * single_line_height +
258
+ (max_lines_to_display - 1) * line_spacing
259
  )
260
+ y_text_start = padding + (size[1] - (2 * padding) - total_text_block_height) / 2.0
261
+ current_y = y_text_start
262
+
263
+ for i in range(max_lines_to_display):
264
+ line_content = lines[i]
265
+ line_width_actual, _ = self._get_text_dimensions(line_content, self.font)
266
+ x_text = max(padding, (size[0] - line_width_actual) / 2.0)
267
+ draw.text((x_text, current_y), line_content, font=self.font, fill=(200, 200, 180))
268
+ current_y += single_line_height + line_spacing
269
+
270
+ if i == 6 and max_lines_to_display > 7 and len(lines) > max_lines_to_display:
271
+ ellipsis_width, _ = self._get_text_dimensions("...", self.font)
272
+ x_ellipsis = max(padding, (size[0] - ellipsis_width) / 2.0)
273
+ draw.text((x_ellipsis, current_y), "...", font=self.font, fill=(200, 200, 180))
274
+ break
 
 
 
 
 
275
 
276
  filepath = os.path.join(self.output_dir, filename)
277
  try:
278
  img.save(filepath)
279
  return filepath
280
+ except Exception as e:
281
+ logger.error(f"Error saving placeholder image {filepath}: {e}", exc_info=True)
 
 
282
  return None
283
 
284
  def _search_pexels_image(self, query, output_filename_base):
 
285
  if not self.USE_PEXELS or not self.pexels_api_key:
286
  return None
287
+
288
  headers = {"Authorization": self.pexels_api_key}
289
  params = {"query": query, "per_page": 1, "orientation": "landscape", "size": "large2x"}
290
+ base_name, _ = os.path.splitext(output_filename_base)
291
+ pexels_filename = f"{base_name}_pexels_{random.randint(1000, 9999)}.jpg"
292
  filepath = os.path.join(self.output_dir, pexels_filename)
293
+
294
  try:
295
+ logger.info(f"Pexels search: '{query}'")
296
  effective_query = " ".join(query.split()[:5])
297
  params["query"] = effective_query
298
  response = requests.get(
299
+ "https://api.pexels.com/v1/search",
300
+ headers=headers,
301
+ params=params,
302
+ timeout=20
303
  )
304
  response.raise_for_status()
305
  data = response.json()
306
  if data.get("photos") and len(data["photos"]) > 0:
307
  photo_details = data["photos"][0]
308
+ photo_url = photo_details["src"]["large2x"]
309
+ logger.info(f"Downloading Pexels image from: {photo_url}")
 
 
 
 
310
  image_response = requests.get(photo_url, timeout=60)
311
  image_response.raise_for_status()
312
+ img_data = Image.open(io.BytesIO(image_response.content))
313
+ if img_data.mode != 'RGB':
314
+ logger.debug(f"Pexels image mode is {img_data.mode}, converting to RGB.")
315
+ img_data = img_data.convert('RGB')
316
+ img_data.save(filepath)
317
+ logger.info(f"Pexels image saved successfully: {filepath}")
318
  return filepath
319
  else:
320
+ logger.info(f"No photos found on Pexels for query: '{effective_query}'")
321
  return None
322
  except requests.exceptions.RequestException as e_req:
323
+ logger.error(f"Pexels request error for query '{query}': {e_req}", exc_info=True)
324
+ except json.JSONDecodeError as e_json:
325
+ logger.error(f"Pexels JSON decode error for query '{query}': {e_json}", exc_info=True)
326
  except Exception as e:
327
+ logger.error(f"General Pexels error for query '{query}': {e}", exc_info=True)
328
+ return None
329
 
330
+ def _generate_video_clip_with_runwayml(self, pt, iip, sifnb, tds=5):
331
+ if not self.USE_RUNWAYML or not self.runway_api_key:
332
+ logger.warning("RunwayML disabled.")
 
 
 
 
 
 
 
 
 
 
 
 
333
  return None
334
+ if not iip or not os.path.exists(iip):
335
+ logger.error(f"Runway Gen-4 needs input image. Path invalid: {iip}")
 
336
  return None
337
 
338
+ runway_dur = 10 if tds > 7 else 5
339
+ ovfn = sifnb.replace(".png", f"_runway_gen4_d{runway_dur}s.mp4")
340
+ ovfp = os.path.join(self.output_dir, ovfn)
 
 
 
 
 
 
 
341
  logger.info(
342
+ f"Runway Gen-4 (Placeholder) img: {os.path.basename(iip)}, "
343
+ f"motion: '{pt[:100]}...', dur: {runway_dur}s"
344
  )
345
+ logger.warning("Using PLACEHOLDER video for Runway Gen-4.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
+ img_clip = None
348
+ txt_c = None
349
+ final_ph_clip = None
350
+ try:
351
+ img_clip = ImageClip(iip).set_duration(runway_dur)
352
+ txt = (
353
+ f"Runway Gen-4 Placeholder\n"
354
+ f"Input: {os.path.basename(iip)}\n"
355
+ f"Motion: {pt[:50]}..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  )
357
+ txt_c = TextClip(
358
+ txt,
359
+ fontsize=24,
360
+ color='white',
361
+ font=self.video_overlay_font,
362
+ bg_color='rgba(0,0,0,0.5)',
363
+ size=(int(self.video_frame_size[0] * 0.8), None),
364
+ method='caption'
365
+ ).set_duration(runway_dur).set_position('center')
366
+
367
+ final_ph_clip = CompositeVideoClip([img_clip, txt_c], size=img_clip.size)
368
+ final_ph_clip.write_videofile(
369
+ ovfp,
370
+ fps=24,
371
+ codec='libx264',
372
+ preset='ultrafast',
373
+ logger=None,
374
+ threads=2
375
  )
376
+ logger.info(f"Runway Gen-4 placeholder video: {ovfp}")
377
+ return ovfp
378
+ except Exception as e:
379
+ logger.error(f"Runway Gen-4 placeholder error: {e}", exc_info=True)
380
  return None
381
+ finally:
382
+ if img_clip and hasattr(img_clip, 'close'):
383
+ img_clip.close()
384
+ if txt_c and hasattr(txt_c, 'close'):
385
+ txt_c.close()
386
+ if final_ph_clip and hasattr(final_ph_clip, 'close'):
387
+ final_ph_clip.close()
388
+
389
+ def _create_placeholder_video_content(self, td, fn, dur=4, sz=None):
390
+ if sz is None:
391
+ sz = self.video_frame_size
392
+ fp = os.path.join(self.output_dir, fn)
393
  tc = None
394
  try:
395
  tc = TextClip(
396
+ td,
397
  fontsize=50,
398
+ color='white',
399
  font=self.video_overlay_font,
400
+ bg_color='black',
401
+ size=sz,
402
+ method='caption'
403
+ ).set_duration(dur)
404
  tc.write_videofile(
405
+ fp,
406
+ fps=24,
407
+ codec='libx264',
408
+ preset='ultrafast',
409
+ logger=None,
410
+ threads=2
411
  )
412
  logger.info(f"Generic placeholder video: {fp}")
413
  return fp
414
  except Exception as e:
415
+ logger.error(f"Generic placeholder error {fp}: {e}", exc_info=True)
416
  return None
417
  finally:
418
+ if tc and hasattr(tc, 'close'):
419
  tc.close()
420
 
 
421
  def generate_scene_asset(
422
  self,
423
  image_generation_prompt_text,
 
425
  scene_data,
426
  scene_identifier_filename_base,
427
  generate_as_video_clip=False,
428
+ runway_target_duration=5
429
  ):
430
+ base_name = scene_identifier_filename_base
 
431
  asset_info = {
432
+ 'path': None,
433
+ 'type': 'none',
434
+ 'error': True,
435
+ 'prompt_used': image_generation_prompt_text,
436
+ 'error_message': 'Generation not attempted'
437
  }
438
  input_image_for_runway_path = None
439
+ image_filename_for_base = base_name + "_base_image.png"
440
+ temp_image_asset_info = {
441
+ 'error': True,
442
+ 'prompt_used': image_generation_prompt_text,
443
+ 'error_message': 'Base image generation not attempted'
444
+ }
445
 
 
446
  if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
447
+ max_r = 2
448
+ for att_n in range(max_r):
449
+ try:
450
+ img_fp_dalle = os.path.join(self.output_dir, image_filename_for_base)
451
+ logger.info(
452
+ f"Attempt {att_n + 1} DALL-E (base img): "
453
+ f"{image_generation_prompt_text[:100]}..."
454
+ )
455
+ cl = openai.OpenAI(api_key=self.openai_api_key, timeout=90.0)
456
+ r = cl.images.generate(
457
+ model=self.dalle_model,
458
+ prompt=image_generation_prompt_text,
459
+ n=1,
460
+ size=self.image_size_dalle3,
461
+ quality="hd",
462
+ response_format="url",
463
+ style="vivid"
464
+ )
465
+ iu = r.data[0].url
466
+ rp = getattr(r.data[0], 'revised_prompt', None)
467
+ if rp:
468
+ logger.info(f"DALL-E revised: {rp[:100]}...")
469
+ ir = requests.get(iu, timeout=120)
470
+ ir.raise_for_status()
471
+ id_img = Image.open(io.BytesIO(ir.content))
472
+ if id_img.mode != 'RGB':
473
+ id_img = id_img.convert('RGB')
474
+ id_img.save(img_fp_dalle)
475
+ logger.info(f"DALL-E base image: {img_fp_dalle}")
476
+ input_image_for_runway_path = img_fp_dalle
477
+ temp_image_asset_info = {
478
+ 'path': img_fp_dalle,
479
+ 'type': 'image',
480
+ 'error': False,
481
+ 'prompt_used': image_generation_prompt_text,
482
+ 'revised_prompt': rp
483
+ }
484
+ break
485
+ except openai.RateLimitError as e:
486
+ logger.warning(f"OpenAI Rate Limit {att_n + 1}: {e}. Retry...")
487
+ time.sleep(5 * (att_n + 1))
488
+ temp_image_asset_info['error_message'] = str(e)
489
+ except Exception as e:
490
+ logger.error(f"DALL-E error: {e}", exc_info=True)
491
+ temp_image_asset_info['error_message'] = str(e)
492
+ break
493
+
494
+ if temp_image_asset_info['error']:
495
+ logger.warning(f"DALL-E failed after {att_n + 1} attempts for base image.")
496
 
497
+ if temp_image_asset_info['error'] and self.USE_PEXELS:
 
 
498
  pqt = scene_data.get(
499
+ 'pexels_search_query_감독',
500
+ f"{scene_data.get('emotional_beat', '')} {scene_data.get('setting_description', '')}"
501
  )
502
+ pp = self._search_pexels_image(pqt, image_filename_for_base)
503
  if pp:
504
  input_image_for_runway_path = pp
505
+ temp_image_asset_info = {
506
+ 'path': pp,
507
+ 'type': 'image',
508
+ 'error': False,
509
+ 'prompt_used': f"Pexels: {pqt}"
510
  }
511
  else:
512
+ current_em = temp_image_asset_info.get('error_message', "")
513
+ temp_image_asset_info['error_message'] = (current_em + " Pexels failed.").strip()
514
 
515
+ if temp_image_asset_info['error']:
516
+ logger.warning("Base image (DALL-E/Pexels) failed. Placeholder base image.")
517
+ ppt = temp_image_asset_info.get('prompt_used', image_generation_prompt_text)
 
518
  php = self._create_placeholder_image_content(
519
+ f"[Base Img Placeholder] {ppt[:100]}...",
520
+ image_filename_for_base
521
  )
522
  if php:
523
  input_image_for_runway_path = php
524
+ temp_image_asset_info = {
525
+ 'path': php,
526
+ 'type': 'image',
527
+ 'error': False,
528
+ 'prompt_used': ppt
529
  }
530
  else:
531
+ current_em = temp_image_asset_info.get('error_message', "")
532
+ temp_image_asset_info['error_message'] = (current_em + " Base placeholder failed.").strip()
533
 
 
534
  if generate_as_video_clip:
535
+ if self.USE_RUNWAYML and input_image_for_runway_path:
 
 
 
 
 
 
 
536
  video_path = self._generate_video_clip_with_runwayml(
537
  motion_prompt_text_for_video,
538
  input_image_for_runway_path,
539
  base_name,
540
+ runway_target_duration
541
  )
542
  if video_path and os.path.exists(video_path):
543
+ return {
544
+ 'path': video_path,
545
+ 'type': 'video',
546
+ 'error': False,
547
+ 'prompt_used': motion_prompt_text_for_video,
548
+ 'base_image_path': input_image_for_runway_path
549
  }
550
  else:
551
+ asset_info = temp_image_asset_info
552
+ asset_info['error'] = True
553
+ asset_info['error_message'] = "RunwayML video gen failed; using base image."
554
+ asset_info['type'] = 'image'
555
+ return asset_info
556
+ elif not self.USE_RUNWAYML:
557
+ asset_info = temp_image_asset_info
558
+ asset_info['error_message'] = "RunwayML disabled; using base image."
559
+ asset_info['type'] = 'image'
560
+ return asset_info
561
  else:
562
+ asset_info = temp_image_asset_info
563
+ asset_info['error_message'] = (
564
+ asset_info.get('error_message', "") +
565
+ " Base image failed, Runway video not attempted."
566
+ ).strip()
567
+ asset_info['type'] = 'image'
568
+ return asset_info
569
+ else:
570
+ return temp_image_asset_info
 
571
 
572
+ def generate_narration_audio(self, ttn, ofn="narration_overall.mp3"):
573
+ if not self.USE_ELEVENLABS or not self.elevenlabs_client or not ttn:
574
+ logger.info("11L skip.")
 
575
  return None
576
 
577
+ afp = os.path.join(self.output_dir, ofn)
578
  try:
579
+ logger.info(f"11L audio (Voice:{self.elevenlabs_voice_id}): {ttn[:70]}...")
580
  asm = None
581
+ if (
582
+ hasattr(self.elevenlabs_client, 'text_to_speech') and
583
+ hasattr(self.elevenlabs_client.text_to_speech, 'stream')
584
  ):
585
  asm = self.elevenlabs_client.text_to_speech.stream
586
+ logger.info("Using 11L .text_to_speech.stream()")
587
+ elif hasattr(self.elevenlabs_client, 'generate_stream'):
588
  asm = self.elevenlabs_client.generate_stream
589
+ logger.info("Using 11L .generate_stream()")
590
+ elif hasattr(self.elevenlabs_client, 'generate'):
591
+ logger.info("Using 11L .generate()")
592
  vp = (
593
  Voice(voice_id=str(self.elevenlabs_voice_id), settings=self.elevenlabs_voice_settings)
594
+ if Voice and self.elevenlabs_voice_settings else str(self.elevenlabs_voice_id)
 
 
 
 
595
  )
596
+ ab = self.elevenlabs_client.generate(text=ttn, voice=vp, model="eleven_multilingual_v2")
597
  with open(afp, "wb") as f:
598
  f.write(ab)
599
+ logger.info(f"11L audio (non-stream): {afp}")
600
  return afp
601
  else:
602
+ logger.error("No 11L audio method.")
603
  return None
604
 
605
+ vps = {"voice_id": str(self.elevenlabs_voice_id)}
606
+ if self.elevenlabs_voice_settings:
607
+ if hasattr(self.elevenlabs_voice_settings, 'model_dump'):
608
+ vps["voice_settings"] = self.elevenlabs_voice_settings.model_dump()
609
+ elif hasattr(self.elevenlabs_voice_settings, 'dict'):
610
+ vps["voice_settings"] = self.elevenlabs_voice_settings.dict()
611
+ else:
612
+ vps["voice_settings"] = self.elevenlabs_voice_settings
613
+
614
+ adi = asm(text=ttn, model_id="eleven_multilingual_v2", **vps)
615
+ with open(afp, "wb") as f:
616
+ for c in adi:
617
+ if c:
618
+ f.write(c)
619
+ logger.info(f"11L audio (stream): {afp}")
620
+ return afp
 
 
 
621
  except Exception as e:
622
+ logger.error(f"11L audio error: {e}", exc_info=True)
623
  return None
624
 
 
625
  def assemble_animatic_from_assets(
626
+ self,
627
+ asset_data_list,
628
+ overall_narration_path=None,
629
+ output_filename="final_video.mp4",
630
+ fps=24
631
  ):
 
632
  if not asset_data_list:
633
  logger.warning("No assets for animatic.")
634
  return None
 
639
  logger.info(f"Assembling from {len(asset_data_list)} assets. Frame: {self.video_frame_size}.")
640
 
641
  for i, asset_info in enumerate(asset_data_list):
642
+ asset_path = asset_info.get('path')
643
+ asset_type = asset_info.get('type')
644
+ scene_dur = asset_info.get('duration', 4.5)
645
+ scene_num = asset_info.get('scene_num', i + 1)
646
+ key_action = asset_info.get('key_action', '')
647
+ logger.info(
648
+ f"S{scene_num}: Path='{asset_path}', Type='{asset_type}', Dur='{scene_dur}'s"
649
+ )
650
 
651
  if not (asset_path and os.path.exists(asset_path)):
652
  logger.warning(f"S{scene_num}: Not found '{asset_path}'. Skip.")
 
657
 
658
  current_scene_mvpy_clip = None
659
  try:
660
+ if asset_type == 'image':
661
  pil_img = Image.open(asset_path)
662
  logger.debug(f"S{scene_num}: Loaded img. Mode:{pil_img.mode}, Size:{pil_img.size}")
663
+ img_rgba = pil_img.convert('RGBA') if pil_img.mode != 'RGBA' else pil_img.copy()
664
  thumb = img_rgba.copy()
665
+ rf = Image.Resampling.LANCZOS if hasattr(Image.Resampling, 'LANCZOS') else Image.BILINEAR
666
  thumb.thumbnail(self.video_frame_size, rf)
667
+ cv_rgba = Image.new('RGBA', self.video_frame_size, (0, 0, 0, 0))
668
+ xo = (self.video_frame_size[0] - thumb.width) // 2
669
+ yo = (self.video_frame_size[1] - thumb.height) // 2
 
 
670
  cv_rgba.paste(thumb, (xo, yo), thumb)
671
  final_rgb_pil = Image.new("RGB", self.video_frame_size, (0, 0, 0))
672
  final_rgb_pil.paste(cv_rgba, mask=cv_rgba.split()[3])
673
+ dbg_path = os.path.join(
674
+ self.output_dir, f"debug_PRE_NUMPY_S{scene_num}.png"
675
+ )
676
  final_rgb_pil.save(dbg_path)
677
  logger.info(f"DEBUG: Saved PRE_NUMPY_S{scene_num} to {dbg_path}")
678
  frame_np = np.array(final_rgb_pil, dtype=np.uint8)
679
+ if not frame_np.flags['C_CONTIGUOUS']:
680
  frame_np = np.ascontiguousarray(frame_np, dtype=np.uint8)
681
  logger.debug(
682
+ f"S{scene_num}: NumPy for MoviePy. "
683
+ f"Shape:{frame_np.shape}, DType:{frame_np.dtype}, "
684
+ f"C-Contig:{frame_np.flags['C_CONTIGUOUS']}"
685
  )
686
  if frame_np.size == 0 or frame_np.ndim != 3 or frame_np.shape[2] != 3:
687
  logger.error(f"S{scene_num}: Invalid NumPy. Skip.")
688
  continue
689
  clip_base = ImageClip(frame_np, transparent=False).set_duration(scene_dur)
690
+ mvpy_dbg_path = os.path.join(
691
+ self.output_dir, f"debug_MOVIEPY_FRAME_S{scene_num}.png"
692
+ )
693
  clip_base.save_frame(mvpy_dbg_path, t=0.1)
694
  logger.info(f"DEBUG: Saved MOVIEPY_FRAME_S{scene_num} to {mvpy_dbg_path}")
695
  clip_fx = clip_base
696
  try:
697
  es = random.uniform(1.03, 1.08)
698
  clip_fx = clip_base.fx(
699
+ vfx.resize,
700
+ lambda t: 1 + (es - 1) * (t / scene_dur) if scene_dur > 0 else 1
701
+ ).set_position('center')
702
  except Exception as e:
703
  logger.error(f"S{scene_num} Ken Burns error: {e}", exc_info=False)
704
  current_scene_mvpy_clip = clip_fx
705
 
706
+ elif asset_type == 'video':
707
  src_clip = None
708
  try:
709
  src_clip = VideoFileClip(
710
  asset_path,
711
  target_resolution=(
712
+ self.video_frame_size[1], self.video_frame_size[0]
713
+ ) if self.video_frame_size else None,
714
+ audio=False
 
 
 
715
  )
716
  tmp_clip = src_clip
717
  if src_clip.duration != scene_dur:
 
723
  else:
724
  tmp_clip = src_clip.set_duration(src_clip.duration)
725
  logger.info(
726
+ f"S{scene_num} Video clip ({src_clip.duration:.2f}s) "
727
+ f"shorter than target ({scene_dur:.2f}s)."
728
  )
729
  current_scene_mvpy_clip = tmp_clip.set_duration(scene_dur)
730
  if current_scene_mvpy_clip.size != list(self.video_frame_size):
731
  current_scene_mvpy_clip = current_scene_mvpy_clip.resize(self.video_frame_size)
732
  except Exception as e:
733
+ logger.error(
734
+ f"S{scene_num} Video load error '{asset_path}':{e}",
735
+ exc_info=True
736
+ )
737
  continue
738
  finally:
739
+ if (
740
+ src_clip and src_clip is not current_scene_mvpy_clip and
741
+ hasattr(src_clip, 'close')
742
+ ):
743
  src_clip.close()
744
  else:
745
  logger.warning(f"S{scene_num} Unknown asset type '{asset_type}'. Skip.")
 
748
  if current_scene_mvpy_clip and key_action:
749
  try:
750
  to_dur = (
751
+ min(
752
+ current_scene_mvpy_clip.duration - 0.5,
753
+ current_scene_mvpy_clip.duration * 0.8
754
+ ) if current_scene_mvpy_clip.duration > 0.5 else current_scene_mvpy_clip.duration
755
  )
756
  to_start = 0.25
757
+ txt_c = TextClip(
758
+ f"Scene {scene_num}\n{key_action}",
759
+ fontsize=self.video_overlay_font_size,
760
+ color=self.video_overlay_font_color,
761
+ font=self.video_overlay_font,
762
+ bg_color='rgba(10,10,20,0.7)',
763
+ method='caption',
764
+ align='West',
765
+ size=(int(self.video_frame_size[0] * 0.9), None),
766
+ kerning=-1,
767
+ stroke_color='black',
768
+ stroke_width=1.5
769
+ ).set_duration(to_dur).set_start(to_start).set_position(
770
+ ('center', 0.92), relative=True
771
+ )
772
+ current_scene_mvpy_clip = CompositeVideoClip(
773
+ [current_scene_mvpy_clip, txt_c],
774
+ size=self.video_frame_size,
775
+ use_bgclip=True
776
+ )
 
777
  except Exception as e:
778
  logger.error(f"S{scene_num} TextClip error:{e}. No text.", exc_info=True)
779
 
 
783
  except Exception as e:
784
  logger.error(f"MAJOR Error S{scene_num} ({asset_path}):{e}", exc_info=True)
785
  finally:
786
+ if current_scene_mvpy_clip and hasattr(current_scene_mvpy_clip, 'close'):
787
  try:
788
  current_scene_mvpy_clip.close()
789
  except:
 
797
  try:
798
  logger.info(f"Concatenating {len(processed_clips)} clips.")
799
  if len(processed_clips) > 1:
800
+ final_clip = concatenate_videoclips(
801
+ processed_clips,
802
+ padding=-td if td > 0 else 0,
803
+ method="compose"
804
+ )
805
  elif processed_clips:
806
  final_clip = processed_clips[0]
807
+
808
  if not final_clip:
809
  logger.error("Concatenation failed.")
810
  return None
 
811
  logger.info(f"Concatenated dur:{final_clip.duration:.2f}s")
812
+
813
  if td > 0 and final_clip.duration > 0:
814
  if final_clip.duration > td * 2:
815
  final_clip = final_clip.fx(vfx.fadein, td).fx(vfx.fadeout, td)
816
  else:
817
+ final_clip = final_clip.fx(
818
+ vfx.fadein, min(td, final_clip.duration / 2.0)
819
+ )
820
 
821
+ if (
822
+ overall_narration_path and
823
+ os.path.exists(overall_narration_path) and
824
+ final_clip.duration > 0
825
+ ):
826
  try:
827
  narration_clip = AudioFileClip(overall_narration_path)
828
  final_clip = final_clip.set_audio(narration_clip)
 
838
  final_clip.write_videofile(
839
  op,
840
  fps=fps,
841
+ codec='libx264',
842
+ preset='medium',
843
+ audio_codec='aac',
844
+ temp_audiofile=os.path.join(
845
+ self.output_dir,
846
+ f'temp-audio-{os.urandom(4).hex()}.m4a'
847
+ ),
848
  remove_temp=True,
849
  threads=os.cpu_count() or 2,
850
+ logger='bar',
851
  bitrate="5000k",
852
+ ffmpeg_params=["-pix_fmt", "yuv420p"]
853
  )
854
  logger.info(f"Video created:{op}")
855
  return op
 
860
  logger.error(f"Video write error:{e}", exc_info=True)
861
  return None
862
  finally:
863
+ logger.debug(
864
+ "Closing all MoviePy clips in `assemble_animatic_from_assets` finally block."
865
+ )
866
+ clips_to_close = (
867
+ processed_clips +
868
+ ([narration_clip] if narration_clip else []) +
869
+ ([final_clip] if final_clip else [])
870
+ )
871
+ for clip_obj in clips_to_close:
872
+ if clip_obj and hasattr(clip_obj, 'close'):
873
  try:
874
+ clip_obj.close()
875
  except Exception as e_close:
876
+ logger.warning(f"Ignoring error while closing a clip: {e_close}")