mgbam commited on
Commit
b6860f8
·
verified ·
1 Parent(s): 62838f2

Update core/visual_engine.py

Browse files
Files changed (1) hide show
  1. core/visual_engine.py +95 -237
core/visual_engine.py CHANGED
@@ -1,6 +1,5 @@
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont, ImageOps
3
-
4
  # --- MONKEY PATCH FOR Image.ANTIALIAS ---
5
  try:
6
  if hasattr(Image, 'Resampling') and hasattr(Image.Resampling, 'LANCZOS'): # Pillow 9+
@@ -15,14 +14,8 @@ except Exception as e_mp:
15
  print(f"WARNING: ANTIALIAS monkey-patch error: {e_mp}")
16
  # --- END MONKEY PATCH ---
17
 
18
- from moviepy.editor import (
19
- ImageClip,
20
- VideoFileClip,
21
- concatenate_videoclips,
22
- TextClip,
23
- CompositeVideoClip,
24
- AudioFileClip
25
- )
26
  import moviepy.video.fx.all as vfx
27
  import numpy as np
28
  import os
@@ -67,15 +60,14 @@ class VisualEngine:
67
  def __init__(self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"):
68
  self.output_dir = output_dir
69
  os.makedirs(self.output_dir, exist_ok=True)
70
-
71
  self.font_filename = "DejaVuSans-Bold.ttf"
72
  font_paths_to_try = [
73
  self.font_filename,
74
- "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
75
- "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
76
- "/System/Library/Fonts/Supplemental/Arial.ttf",
77
- "C:/Windows/Fonts/arial.ttf",
78
- "/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf"
79
  ]
80
  self.font_path_pil = next((p for p in font_paths_to_try if os.path.exists(p)), None)
81
  self.font_size_pil = 20
@@ -101,7 +93,6 @@ class VisualEngine:
101
  self.dalle_model = "dall-e-3"
102
  self.image_size_dalle3 = "1792x1024"
103
  self.video_frame_size = (1280, 720)
104
-
105
  self.elevenlabs_api_key = None
106
  self.USE_ELEVENLABS = False
107
  self.elevenlabs_client = None
@@ -115,14 +106,11 @@ class VisualEngine:
115
  )
116
  else:
117
  self.elevenlabs_voice_settings = None
118
-
119
  self.pexels_api_key = None
120
  self.USE_PEXELS = False
121
-
122
  self.runway_api_key = None
123
  self.USE_RUNWAYML = False
124
  self.runway_client = None
125
-
126
  logger.info("VisualEngine initialized.")
127
 
128
  def set_openai_api_key(self, k):
@@ -138,10 +126,7 @@ class VisualEngine:
138
  try:
139
  self.elevenlabs_client = ElevenLabsAPIClient(api_key=api_key)
140
  self.USE_ELEVENLABS = bool(self.elevenlabs_client)
141
- logger.info(
142
- f"ElevenLabs Client {'Ready' if self.USE_ELEVENLABS else 'Failed Init'} "
143
- f"(Voice ID: {self.elevenlabs_voice_id})."
144
- )
145
  except Exception as e:
146
  logger.error(f"ElevenLabs client init error: {e}. Disabled.", exc_info=True)
147
  self.USE_ELEVENLABS = False
@@ -159,14 +144,9 @@ class VisualEngine:
159
  if k and RUNWAYML_SDK_IMPORTED and RunwayMLClient:
160
  try:
161
  self.USE_RUNWAYML = True
162
- logger.info(
163
- f"RunwayML Client (Placeholder SDK) {'Ready.' if self.USE_RUNWAYML else 'Failed Init.'}"
164
- )
165
  except Exception as e:
166
- logger.error(
167
- f"RunwayML client (Placeholder SDK) init error: {e}. Disabled.",
168
- exc_info=True
169
- )
170
  self.USE_RUNWAYML = False
171
  elif k:
172
  self.USE_RUNWAYML = True
@@ -197,16 +177,13 @@ class VisualEngine:
197
  def _create_placeholder_image_content(self, text_description, filename, size=None):
198
  if size is None:
199
  size = self.video_frame_size
200
-
201
  img = Image.new('RGB', size, color=(20, 20, 40))
202
  draw = ImageDraw.Draw(img)
203
  padding = 25
204
  max_text_width = size[0] - (2 * padding)
205
  lines = []
206
-
207
  if not text_description:
208
  text_description = "(Placeholder: No text description provided)"
209
-
210
  words = text_description.split()
211
  current_line = ""
212
  for word in words:
@@ -220,59 +197,45 @@ class VisualEngine:
220
  word_width, _ = self._get_text_dimensions(word, self.font)
221
  if word_width > max_text_width:
222
  avg_char_w = self._get_text_dimensions("A", self.font)[0] or 10
223
- chars_that_fit = int(max_text_width / avg_char_w)
224
- truncated = (
225
- word[:chars_that_fit-3] + "..."
226
- if len(word) > chars_that_fit else word
227
- )
228
- lines.append(truncated)
229
  current_line = ""
230
  else:
231
  current_line = word + " "
232
  if current_line.strip():
233
  lines.append(current_line.strip())
234
-
235
  if not lines and text_description:
236
  avg_char_w = self._get_text_dimensions("A", self.font)[0] or 10
237
- chars_that_fit = int(max_text_width / avg_char_w)
238
- truncated = (
239
- text_description[:chars_that_fit-3] + "..."
240
- if len(text_description) > chars_that_fit else text_description
241
- )
242
- lines.append(truncated)
243
  elif not lines:
244
  lines.append("(Placeholder Text Error)")
245
-
246
  _, single_line_height = self._get_text_dimensions("Ay", self.font)
247
  single_line_height = single_line_height if single_line_height > 0 else (self.font_size_pil + 2)
248
  line_spacing = 2
249
- max_lines_to_display = min(
250
- len(lines),
251
- (size[1] - (2 * padding)) // (single_line_height + line_spacing)
252
- ) if single_line_height > 0 else 1
253
  if max_lines_to_display <= 0:
254
  max_lines_to_display = 1
255
-
256
- total_text_block_height = (
257
- max_lines_to_display * single_line_height +
258
- (max_lines_to_display - 1) * line_spacing
259
- )
260
  y_text_start = padding + (size[1] - (2 * padding) - total_text_block_height) / 2.0
261
  current_y = y_text_start
262
-
263
  for i in range(max_lines_to_display):
264
  line_content = lines[i]
265
  line_width_actual, _ = self._get_text_dimensions(line_content, self.font)
266
  x_text = max(padding, (size[0] - line_width_actual) / 2.0)
267
  draw.text((x_text, current_y), line_content, font=self.font, fill=(200, 200, 180))
268
  current_y += single_line_height + line_spacing
269
-
270
  if i == 6 and max_lines_to_display > 7 and len(lines) > max_lines_to_display:
271
  ellipsis_width, _ = self._get_text_dimensions("...", self.font)
272
  x_ellipsis = max(padding, (size[0] - ellipsis_width) / 2.0)
273
  draw.text((x_ellipsis, current_y), "...", font=self.font, fill=(200, 200, 180))
274
  break
275
-
276
  filepath = os.path.join(self.output_dir, filename)
277
  try:
278
  img.save(filepath)
@@ -284,23 +247,16 @@ class VisualEngine:
284
  def _search_pexels_image(self, query, output_filename_base):
285
  if not self.USE_PEXELS or not self.pexels_api_key:
286
  return None
287
-
288
  headers = {"Authorization": self.pexels_api_key}
289
  params = {"query": query, "per_page": 1, "orientation": "landscape", "size": "large2x"}
290
  base_name, _ = os.path.splitext(output_filename_base)
291
- pexels_filename = f"{base_name}_pexels_{random.randint(1000, 9999)}.jpg"
292
  filepath = os.path.join(self.output_dir, pexels_filename)
293
-
294
  try:
295
  logger.info(f"Pexels search: '{query}'")
296
  effective_query = " ".join(query.split()[:5])
297
  params["query"] = effective_query
298
- response = requests.get(
299
- "https://api.pexels.com/v1/search",
300
- headers=headers,
301
- params=params,
302
- timeout=20
303
- )
304
  response.raise_for_status()
305
  data = response.json()
306
  if data.get("photos") and len(data["photos"]) > 0:
@@ -334,45 +290,28 @@ class VisualEngine:
334
  if not iip or not os.path.exists(iip):
335
  logger.error(f"Runway Gen-4 needs input image. Path invalid: {iip}")
336
  return None
337
-
338
  runway_dur = 10 if tds > 7 else 5
339
  ovfn = sifnb.replace(".png", f"_runway_gen4_d{runway_dur}s.mp4")
340
  ovfp = os.path.join(self.output_dir, ovfn)
341
- logger.info(
342
- f"Runway Gen-4 (Placeholder) img: {os.path.basename(iip)}, "
343
- f"motion: '{pt[:100]}...', dur: {runway_dur}s"
344
- )
345
  logger.warning("Using PLACEHOLDER video for Runway Gen-4.")
346
-
347
  img_clip = None
348
  txt_c = None
349
  final_ph_clip = None
350
  try:
351
  img_clip = ImageClip(iip).set_duration(runway_dur)
352
- txt = (
353
- f"Runway Gen-4 Placeholder\n"
354
- f"Input: {os.path.basename(iip)}\n"
355
- f"Motion: {pt[:50]}..."
356
- )
357
  txt_c = TextClip(
358
  txt,
359
  fontsize=24,
360
  color='white',
361
  font=self.video_overlay_font,
362
  bg_color='rgba(0,0,0,0.5)',
363
- size=(int(self.video_frame_size[0] * 0.8), None),
364
  method='caption'
365
  ).set_duration(runway_dur).set_position('center')
366
-
367
  final_ph_clip = CompositeVideoClip([img_clip, txt_c], size=img_clip.size)
368
- final_ph_clip.write_videofile(
369
- ovfp,
370
- fps=24,
371
- codec='libx264',
372
- preset='ultrafast',
373
- logger=None,
374
- threads=2
375
- )
376
  logger.info(f"Runway Gen-4 placeholder video: {ovfp}")
377
  return ovfp
378
  except Exception as e:
@@ -386,47 +325,44 @@ class VisualEngine:
386
  if final_ph_clip and hasattr(final_ph_clip, 'close'):
387
  final_ph_clip.close()
388
 
389
- def _create_placeholder_video_content(self, td, fn, dur=4, sz=None):
390
- if sz is None:
391
- sz = self.video_frame_size
392
- fp = os.path.join(self.output_dir, fn)
393
- tc = None
394
  try:
395
- tc = TextClip(
396
- td,
397
  fontsize=50,
398
  color='white',
399
  font=self.video_overlay_font,
400
  bg_color='black',
401
- size=sz,
402
  method='caption'
403
- ).set_duration(dur)
404
- tc.write_videofile(
405
- fp,
406
  fps=24,
407
  codec='libx264',
408
  preset='ultrafast',
409
  logger=None,
410
  threads=2
411
  )
412
- logger.info(f"Generic placeholder video: {fp}")
413
- return fp
414
  except Exception as e:
415
- logger.error(f"Generic placeholder error {fp}: {e}", exc_info=True)
416
  return None
417
  finally:
418
- if tc and hasattr(tc, 'close'):
419
- tc.close()
 
 
 
420
 
421
- def generate_scene_asset(
422
- self,
423
- image_generation_prompt_text,
424
- motion_prompt_text_for_video,
425
- scene_data,
426
- scene_identifier_filename_base,
427
- generate_as_video_clip=False,
428
- runway_target_duration=5
429
- ):
430
  base_name = scene_identifier_filename_base
431
  asset_info = {
432
  'path': None,
@@ -444,14 +380,11 @@ class VisualEngine:
444
  }
445
 
446
  if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
447
- max_r = 2
448
  for att_n in range(max_r):
449
  try:
450
  img_fp_dalle = os.path.join(self.output_dir, image_filename_for_base)
451
- logger.info(
452
- f"Attempt {att_n + 1} DALL-E (base img): "
453
- f"{image_generation_prompt_text[:100]}..."
454
- )
455
  cl = openai.OpenAI(api_key=self.openai_api_key, timeout=90.0)
456
  r = cl.images.generate(
457
  model=self.dalle_model,
@@ -483,22 +416,19 @@ class VisualEngine:
483
  }
484
  break
485
  except openai.RateLimitError as e:
486
- logger.warning(f"OpenAI Rate Limit {att_n + 1}: {e}. Retry...")
487
  time.sleep(5 * (att_n + 1))
488
  temp_image_asset_info['error_message'] = str(e)
489
  except Exception as e:
490
  logger.error(f"DALL-E error: {e}", exc_info=True)
491
  temp_image_asset_info['error_message'] = str(e)
492
  break
493
-
494
  if temp_image_asset_info['error']:
495
- logger.warning(f"DALL-E failed after {att_n + 1} attempts for base image.")
496
 
497
  if temp_image_asset_info['error'] and self.USE_PEXELS:
498
- pqt = scene_data.get(
499
- 'pexels_search_query_감독',
500
- f"{scene_data.get('emotional_beat', '')} {scene_data.get('setting_description', '')}"
501
- )
502
  pp = self._search_pexels_image(pqt, image_filename_for_base)
503
  if pp:
504
  input_image_for_runway_path = pp
@@ -515,10 +445,7 @@ class VisualEngine:
515
  if temp_image_asset_info['error']:
516
  logger.warning("Base image (DALL-E/Pexels) failed. Placeholder base image.")
517
  ppt = temp_image_asset_info.get('prompt_used', image_generation_prompt_text)
518
- php = self._create_placeholder_image_content(
519
- f"[Base Img Placeholder] {ppt[:100]}...",
520
- image_filename_for_base
521
- )
522
  if php:
523
  input_image_for_runway_path = php
524
  temp_image_asset_info = {
@@ -560,10 +487,7 @@ class VisualEngine:
560
  return asset_info
561
  else:
562
  asset_info = temp_image_asset_info
563
- asset_info['error_message'] = (
564
- asset_info.get('error_message', "") +
565
- " Base image failed, Runway video not attempted."
566
- ).strip()
567
  asset_info['type'] = 'image'
568
  return asset_info
569
  else:
@@ -573,15 +497,11 @@ class VisualEngine:
573
  if not self.USE_ELEVENLABS or not self.elevenlabs_client or not ttn:
574
  logger.info("11L skip.")
575
  return None
576
-
577
  afp = os.path.join(self.output_dir, ofn)
578
  try:
579
  logger.info(f"11L audio (Voice:{self.elevenlabs_voice_id}): {ttn[:70]}...")
580
  asm = None
581
- if (
582
- hasattr(self.elevenlabs_client, 'text_to_speech') and
583
- hasattr(self.elevenlabs_client.text_to_speech, 'stream')
584
- ):
585
  asm = self.elevenlabs_client.text_to_speech.stream
586
  logger.info("Using 11L .text_to_speech.stream()")
587
  elif hasattr(self.elevenlabs_client, 'generate_stream'):
@@ -589,10 +509,8 @@ class VisualEngine:
589
  logger.info("Using 11L .generate_stream()")
590
  elif hasattr(self.elevenlabs_client, 'generate'):
591
  logger.info("Using 11L .generate()")
592
- vp = (
593
- Voice(voice_id=str(self.elevenlabs_voice_id), settings=self.elevenlabs_voice_settings)
594
- if Voice and self.elevenlabs_voice_settings else str(self.elevenlabs_voice_id)
595
- )
596
  ab = self.elevenlabs_client.generate(text=ttn, voice=vp, model="eleven_multilingual_v2")
597
  with open(afp, "wb") as f:
598
  f.write(ab)
@@ -602,37 +520,30 @@ class VisualEngine:
602
  logger.error("No 11L audio method.")
603
  return None
604
 
605
- vps = {"voice_id": str(self.elevenlabs_voice_id)}
606
- if self.elevenlabs_voice_settings:
607
- if hasattr(self.elevenlabs_voice_settings, 'model_dump'):
608
- vps["voice_settings"] = self.elevenlabs_voice_settings.model_dump()
609
- elif hasattr(self.elevenlabs_voice_settings, 'dict'):
610
- vps["voice_settings"] = self.elevenlabs_voice_settings.dict()
611
- else:
612
- vps["voice_settings"] = self.elevenlabs_voice_settings
613
-
614
- adi = asm(text=ttn, model_id="eleven_multilingual_v2", **vps)
615
- with open(afp, "wb") as f:
616
- for c in adi:
617
- if c:
618
- f.write(c)
619
- logger.info(f"11L audio (stream): {afp}")
620
- return afp
621
  except Exception as e:
622
  logger.error(f"11L audio error: {e}", exc_info=True)
623
  return None
624
 
625
- def assemble_animatic_from_assets(
626
- self,
627
- asset_data_list,
628
- overall_narration_path=None,
629
- output_filename="final_video.mp4",
630
- fps=24
631
- ):
632
  if not asset_data_list:
633
  logger.warning("No assets for animatic.")
634
  return None
635
-
636
  processed_clips = []
637
  narration_clip = None
638
  final_clip = None
@@ -644,9 +555,7 @@ class VisualEngine:
644
  scene_dur = asset_info.get('duration', 4.5)
645
  scene_num = asset_info.get('scene_num', i + 1)
646
  key_action = asset_info.get('key_action', '')
647
- logger.info(
648
- f"S{scene_num}: Path='{asset_path}', Type='{asset_type}', Dur='{scene_dur}'s"
649
- )
650
 
651
  if not (asset_path and os.path.exists(asset_path)):
652
  logger.warning(f"S{scene_num}: Not found '{asset_path}'. Skip.")
@@ -670,26 +579,18 @@ class VisualEngine:
670
  cv_rgba.paste(thumb, (xo, yo), thumb)
671
  final_rgb_pil = Image.new("RGB", self.video_frame_size, (0, 0, 0))
672
  final_rgb_pil.paste(cv_rgba, mask=cv_rgba.split()[3])
673
- dbg_path = os.path.join(
674
- self.output_dir, f"debug_PRE_NUMPY_S{scene_num}.png"
675
- )
676
  final_rgb_pil.save(dbg_path)
677
  logger.info(f"DEBUG: Saved PRE_NUMPY_S{scene_num} to {dbg_path}")
678
  frame_np = np.array(final_rgb_pil, dtype=np.uint8)
679
  if not frame_np.flags['C_CONTIGUOUS']:
680
  frame_np = np.ascontiguousarray(frame_np, dtype=np.uint8)
681
- logger.debug(
682
- f"S{scene_num}: NumPy for MoviePy. "
683
- f"Shape:{frame_np.shape}, DType:{frame_np.dtype}, "
684
- f"C-Contig:{frame_np.flags['C_CONTIGUOUS']}"
685
- )
686
  if frame_np.size == 0 or frame_np.ndim != 3 or frame_np.shape[2] != 3:
687
  logger.error(f"S{scene_num}: Invalid NumPy. Skip.")
688
  continue
689
  clip_base = ImageClip(frame_np, transparent=False).set_duration(scene_dur)
690
- mvpy_dbg_path = os.path.join(
691
- self.output_dir, f"debug_MOVIEPY_FRAME_S{scene_num}.png"
692
- )
693
  clip_base.save_frame(mvpy_dbg_path, t=0.1)
694
  logger.info(f"DEBUG: Saved MOVIEPY_FRAME_S{scene_num} to {mvpy_dbg_path}")
695
  clip_fx = clip_base
@@ -702,15 +603,12 @@ class VisualEngine:
702
  except Exception as e:
703
  logger.error(f"S{scene_num} Ken Burns error: {e}", exc_info=False)
704
  current_scene_mvpy_clip = clip_fx
705
-
706
  elif asset_type == 'video':
707
  src_clip = None
708
  try:
709
  src_clip = VideoFileClip(
710
  asset_path,
711
- target_resolution=(
712
- self.video_frame_size[1], self.video_frame_size[0]
713
- ) if self.video_frame_size else None,
714
  audio=False
715
  )
716
  tmp_clip = src_clip
@@ -722,24 +620,15 @@ class VisualEngine:
722
  tmp_clip = src_clip.loop(duration=scene_dur)
723
  else:
724
  tmp_clip = src_clip.set_duration(src_clip.duration)
725
- logger.info(
726
- f"S{scene_num} Video clip ({src_clip.duration:.2f}s) "
727
- f"shorter than target ({scene_dur:.2f}s)."
728
- )
729
  current_scene_mvpy_clip = tmp_clip.set_duration(scene_dur)
730
  if current_scene_mvpy_clip.size != list(self.video_frame_size):
731
  current_scene_mvpy_clip = current_scene_mvpy_clip.resize(self.video_frame_size)
732
  except Exception as e:
733
- logger.error(
734
- f"S{scene_num} Video load error '{asset_path}':{e}",
735
- exc_info=True
736
- )
737
  continue
738
  finally:
739
- if (
740
- src_clip and src_clip is not current_scene_mvpy_clip and
741
- hasattr(src_clip, 'close')
742
- ):
743
  src_clip.close()
744
  else:
745
  logger.warning(f"S{scene_num} Unknown asset type '{asset_type}'. Skip.")
@@ -747,12 +636,7 @@ class VisualEngine:
747
 
748
  if current_scene_mvpy_clip and key_action:
749
  try:
750
- to_dur = (
751
- min(
752
- current_scene_mvpy_clip.duration - 0.5,
753
- current_scene_mvpy_clip.duration * 0.8
754
- ) if current_scene_mvpy_clip.duration > 0.5 else current_scene_mvpy_clip.duration
755
- )
756
  to_start = 0.25
757
  txt_c = TextClip(
758
  f"Scene {scene_num}\n{key_action}",
@@ -762,13 +646,11 @@ class VisualEngine:
762
  bg_color='rgba(10,10,20,0.7)',
763
  method='caption',
764
  align='West',
765
- size=(int(self.video_frame_size[0] * 0.9), None),
766
  kerning=-1,
767
  stroke_color='black',
768
  stroke_width=1.5
769
- ).set_duration(to_dur).set_start(to_start).set_position(
770
- ('center', 0.92), relative=True
771
- )
772
  current_scene_mvpy_clip = CompositeVideoClip(
773
  [current_scene_mvpy_clip, txt_c],
774
  size=self.video_frame_size,
@@ -792,37 +674,23 @@ class VisualEngine:
792
  if not processed_clips:
793
  logger.warning("No clips processed. Abort.")
794
  return None
795
-
796
  td = 0.75
797
  try:
798
  logger.info(f"Concatenating {len(processed_clips)} clips.")
799
  if len(processed_clips) > 1:
800
- final_clip = concatenate_videoclips(
801
- processed_clips,
802
- padding=-td if td > 0 else 0,
803
- method="compose"
804
- )
805
  elif processed_clips:
806
  final_clip = processed_clips[0]
807
-
808
  if not final_clip:
809
  logger.error("Concatenation failed.")
810
  return None
811
  logger.info(f"Concatenated dur:{final_clip.duration:.2f}s")
812
-
813
  if td > 0 and final_clip.duration > 0:
814
  if final_clip.duration > td * 2:
815
  final_clip = final_clip.fx(vfx.fadein, td).fx(vfx.fadeout, td)
816
  else:
817
- final_clip = final_clip.fx(
818
- vfx.fadein, min(td, final_clip.duration / 2.0)
819
- )
820
-
821
- if (
822
- overall_narration_path and
823
- os.path.exists(overall_narration_path) and
824
- final_clip.duration > 0
825
- ):
826
  try:
827
  narration_clip = AudioFileClip(overall_narration_path)
828
  final_clip = final_clip.set_audio(narration_clip)
@@ -831,7 +699,6 @@ class VisualEngine:
831
  logger.error(f"Narration add error:{e}", exc_info=True)
832
  elif final_clip.duration <= 0:
833
  logger.warning("Video no duration. No audio.")
834
-
835
  if final_clip and final_clip.duration > 0:
836
  op = os.path.join(self.output_dir, output_filename)
837
  logger.info(f"Writing video:{op} (Dur:{final_clip.duration:.2f}s)")
@@ -841,10 +708,7 @@ class VisualEngine:
841
  codec='libx264',
842
  preset='medium',
843
  audio_codec='aac',
844
- temp_audiofile=os.path.join(
845
- self.output_dir,
846
- f'temp-audio-{os.urandom(4).hex()}.m4a'
847
- ),
848
  remove_temp=True,
849
  threads=os.cpu_count() or 2,
850
  logger='bar',
@@ -860,14 +724,8 @@ class VisualEngine:
860
  logger.error(f"Video write error:{e}", exc_info=True)
861
  return None
862
  finally:
863
- logger.debug(
864
- "Closing all MoviePy clips in `assemble_animatic_from_assets` finally block."
865
- )
866
- clips_to_close = (
867
- processed_clips +
868
- ([narration_clip] if narration_clip else []) +
869
- ([final_clip] if final_clip else [])
870
- )
871
  for clip_obj in clips_to_close:
872
  if clip_obj and hasattr(clip_obj, 'close'):
873
  try:
 
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont, ImageOps
 
3
  # --- MONKEY PATCH FOR Image.ANTIALIAS ---
4
  try:
5
  if hasattr(Image, 'Resampling') and hasattr(Image.Resampling, 'LANCZOS'): # Pillow 9+
 
14
  print(f"WARNING: ANTIALIAS monkey-patch error: {e_mp}")
15
  # --- END MONKEY PATCH ---
16
 
17
+ from moviepy.editor import (ImageClip, VideoFileClip, concatenate_videoclips, TextClip,
18
+ CompositeVideoClip, AudioFileClip)
 
 
 
 
 
 
19
  import moviepy.video.fx.all as vfx
20
  import numpy as np
21
  import os
 
60
  def __init__(self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"):
61
  self.output_dir = output_dir
62
  os.makedirs(self.output_dir, exist_ok=True)
 
63
  self.font_filename = "DejaVuSans-Bold.ttf"
64
  font_paths_to_try = [
65
  self.font_filename,
66
+ f"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf",
67
+ f"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf",
68
+ f"/System/Library/Fonts/Supplemental/Arial.ttf",
69
+ f"C:/Windows/Fonts/arial.ttf",
70
+ f"/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf"
71
  ]
72
  self.font_path_pil = next((p for p in font_paths_to_try if os.path.exists(p)), None)
73
  self.font_size_pil = 20
 
93
  self.dalle_model = "dall-e-3"
94
  self.image_size_dalle3 = "1792x1024"
95
  self.video_frame_size = (1280, 720)
 
96
  self.elevenlabs_api_key = None
97
  self.USE_ELEVENLABS = False
98
  self.elevenlabs_client = None
 
106
  )
107
  else:
108
  self.elevenlabs_voice_settings = None
 
109
  self.pexels_api_key = None
110
  self.USE_PEXELS = False
 
111
  self.runway_api_key = None
112
  self.USE_RUNWAYML = False
113
  self.runway_client = None
 
114
  logger.info("VisualEngine initialized.")
115
 
116
  def set_openai_api_key(self, k):
 
126
  try:
127
  self.elevenlabs_client = ElevenLabsAPIClient(api_key=api_key)
128
  self.USE_ELEVENLABS = bool(self.elevenlabs_client)
129
+ logger.info(f"ElevenLabs Client {'Ready' if self.USE_ELEVENLABS else 'Failed Init'} (Voice ID: {self.elevenlabs_voice_id}).")
 
 
 
130
  except Exception as e:
131
  logger.error(f"ElevenLabs client init error: {e}. Disabled.", exc_info=True)
132
  self.USE_ELEVENLABS = False
 
144
  if k and RUNWAYML_SDK_IMPORTED and RunwayMLClient:
145
  try:
146
  self.USE_RUNWAYML = True
147
+ logger.info(f"RunwayML Client (Placeholder SDK) {'Ready.' if self.USE_RUNWAYML else 'Failed Init.'}")
 
 
148
  except Exception as e:
149
+ logger.error(f"RunwayML client (Placeholder SDK) init error: {e}. Disabled.", exc_info=True)
 
 
 
150
  self.USE_RUNWAYML = False
151
  elif k:
152
  self.USE_RUNWAYML = True
 
177
  def _create_placeholder_image_content(self, text_description, filename, size=None):
178
  if size is None:
179
  size = self.video_frame_size
 
180
  img = Image.new('RGB', size, color=(20, 20, 40))
181
  draw = ImageDraw.Draw(img)
182
  padding = 25
183
  max_text_width = size[0] - (2 * padding)
184
  lines = []
 
185
  if not text_description:
186
  text_description = "(Placeholder: No text description provided)"
 
187
  words = text_description.split()
188
  current_line = ""
189
  for word in words:
 
197
  word_width, _ = self._get_text_dimensions(word, self.font)
198
  if word_width > max_text_width:
199
  avg_char_w = self._get_text_dimensions("A", self.font)[0] or 10
200
+ chars_that_fit = int(max_text_width / avg_char_w) if avg_char_w > 0 else 10
201
+ if len(word) > chars_that_fit:
202
+ lines.append(word[:chars_that_fit-3] + "...")
203
+ else:
204
+ lines.append(word)
 
205
  current_line = ""
206
  else:
207
  current_line = word + " "
208
  if current_line.strip():
209
  lines.append(current_line.strip())
 
210
  if not lines and text_description:
211
  avg_char_w = self._get_text_dimensions("A", self.font)[0] or 10
212
+ chars_that_fit = int(max_text_width / avg_char_w) if avg_char_w > 0 else 10
213
+ if len(text_description) > chars_that_fit:
214
+ lines.append(text_description[:chars_that_fit-3] + "...")
215
+ else:
216
+ lines.append(text_description)
 
217
  elif not lines:
218
  lines.append("(Placeholder Text Error)")
 
219
  _, single_line_height = self._get_text_dimensions("Ay", self.font)
220
  single_line_height = single_line_height if single_line_height > 0 else (self.font_size_pil + 2)
221
  line_spacing = 2
222
+ max_lines_to_display = min(len(lines), (size[1] - (2 * padding)) // (single_line_height + line_spacing)) if single_line_height > 0 else 1
 
 
 
223
  if max_lines_to_display <= 0:
224
  max_lines_to_display = 1
225
+ total_text_block_height = max_lines_to_display * single_line_height + (max_lines_to_display - 1) * line_spacing
 
 
 
 
226
  y_text_start = padding + (size[1] - (2 * padding) - total_text_block_height) / 2.0
227
  current_y = y_text_start
 
228
  for i in range(max_lines_to_display):
229
  line_content = lines[i]
230
  line_width_actual, _ = self._get_text_dimensions(line_content, self.font)
231
  x_text = max(padding, (size[0] - line_width_actual) / 2.0)
232
  draw.text((x_text, current_y), line_content, font=self.font, fill=(200, 200, 180))
233
  current_y += single_line_height + line_spacing
 
234
  if i == 6 and max_lines_to_display > 7 and len(lines) > max_lines_to_display:
235
  ellipsis_width, _ = self._get_text_dimensions("...", self.font)
236
  x_ellipsis = max(padding, (size[0] - ellipsis_width) / 2.0)
237
  draw.text((x_ellipsis, current_y), "...", font=self.font, fill=(200, 200, 180))
238
  break
 
239
  filepath = os.path.join(self.output_dir, filename)
240
  try:
241
  img.save(filepath)
 
247
  def _search_pexels_image(self, query, output_filename_base):
248
  if not self.USE_PEXELS or not self.pexels_api_key:
249
  return None
 
250
  headers = {"Authorization": self.pexels_api_key}
251
  params = {"query": query, "per_page": 1, "orientation": "landscape", "size": "large2x"}
252
  base_name, _ = os.path.splitext(output_filename_base)
253
+ pexels_filename = base_name + f"_pexels_{random.randint(1000,9999)}.jpg"
254
  filepath = os.path.join(self.output_dir, pexels_filename)
 
255
  try:
256
  logger.info(f"Pexels search: '{query}'")
257
  effective_query = " ".join(query.split()[:5])
258
  params["query"] = effective_query
259
+ response = requests.get("https://api.pexels.com/v1/search", headers=headers, params=params, timeout=20)
 
 
 
 
 
260
  response.raise_for_status()
261
  data = response.json()
262
  if data.get("photos") and len(data["photos"]) > 0:
 
290
  if not iip or not os.path.exists(iip):
291
  logger.error(f"Runway Gen-4 needs input image. Path invalid: {iip}")
292
  return None
 
293
  runway_dur = 10 if tds > 7 else 5
294
  ovfn = sifnb.replace(".png", f"_runway_gen4_d{runway_dur}s.mp4")
295
  ovfp = os.path.join(self.output_dir, ovfn)
296
+ logger.info(f"Runway Gen-4 (Placeholder) img: {os.path.basename(iip)}, motion: '{pt[:100]}...', dur: {runway_dur}s")
 
 
 
297
  logger.warning("Using PLACEHOLDER video for Runway Gen-4.")
 
298
  img_clip = None
299
  txt_c = None
300
  final_ph_clip = None
301
  try:
302
  img_clip = ImageClip(iip).set_duration(runway_dur)
303
+ txt = f"Runway Gen-4 Placeholder\nInput: {os.path.basename(iip)}\nMotion: {pt[:50]}..."
 
 
 
 
304
  txt_c = TextClip(
305
  txt,
306
  fontsize=24,
307
  color='white',
308
  font=self.video_overlay_font,
309
  bg_color='rgba(0,0,0,0.5)',
310
+ size=(self.video_frame_size[0] * 0.8, None),
311
  method='caption'
312
  ).set_duration(runway_dur).set_position('center')
 
313
  final_ph_clip = CompositeVideoClip([img_clip, txt_c], size=img_clip.size)
314
+ final_ph_clip.write_videofile(ovfp, fps=24, codec='libx264', preset='ultrafast', logger=None, threads=2)
 
 
 
 
 
 
 
315
  logger.info(f"Runway Gen-4 placeholder video: {ovfp}")
316
  return ovfp
317
  except Exception as e:
 
325
  if final_ph_clip and hasattr(final_ph_clip, 'close'):
326
  final_ph_clip.close()
327
 
328
+ def _create_placeholder_video_content(self, text_description, filename, duration=4, size=None):
329
+ if size is None:
330
+ size = self.video_frame_size
331
+ filepath = os.path.join(self.output_dir, filename)
332
+ txt_clip = None
333
  try:
334
+ txt_clip = TextClip(
335
+ text_description,
336
  fontsize=50,
337
  color='white',
338
  font=self.video_overlay_font,
339
  bg_color='black',
340
+ size=size,
341
  method='caption'
342
+ ).set_duration(duration)
343
+ txt_clip.write_videofile(
344
+ filepath,
345
  fps=24,
346
  codec='libx264',
347
  preset='ultrafast',
348
  logger=None,
349
  threads=2
350
  )
351
+ logger.info(f"Generic placeholder video created successfully: {filepath}")
352
+ return filepath
353
  except Exception as e:
354
+ logger.error(f"Failed to create generic placeholder video {filepath}: {e}", exc_info=True)
355
  return None
356
  finally:
357
+ if txt_clip and hasattr(txt_clip, 'close'):
358
+ try:
359
+ txt_clip.close()
360
+ except Exception as e_close:
361
+ logger.warning(f"Error closing TextClip in _create_placeholder_video_content: {e_close}")
362
 
363
+ def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
364
+ scene_data, scene_identifier_filename_base,
365
+ generate_as_video_clip=False, runway_target_duration=5):
 
 
 
 
 
 
366
  base_name = scene_identifier_filename_base
367
  asset_info = {
368
  'path': None,
 
380
  }
381
 
382
  if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
383
+ max_r, att_n = 2, 0
384
  for att_n in range(max_r):
385
  try:
386
  img_fp_dalle = os.path.join(self.output_dir, image_filename_for_base)
387
+ logger.info(f"Attempt {att_n+1} DALL-E (base img): {image_generation_prompt_text[:100]}...")
 
 
 
388
  cl = openai.OpenAI(api_key=self.openai_api_key, timeout=90.0)
389
  r = cl.images.generate(
390
  model=self.dalle_model,
 
416
  }
417
  break
418
  except openai.RateLimitError as e:
419
+ logger.warning(f"OpenAI Rate Limit {att_n+1}: {e}. Retry...")
420
  time.sleep(5 * (att_n + 1))
421
  temp_image_asset_info['error_message'] = str(e)
422
  except Exception as e:
423
  logger.error(f"DALL-E error: {e}", exc_info=True)
424
  temp_image_asset_info['error_message'] = str(e)
425
  break
 
426
  if temp_image_asset_info['error']:
427
+ logger.warning(f"DALL-E failed after {att_n+1} attempts for base image.")
428
 
429
  if temp_image_asset_info['error'] and self.USE_PEXELS:
430
+ pqt = scene_data.get('pexels_search_query_감독',
431
+ f"{scene_data.get('emotional_beat','')} {scene_data.get('setting_description','')}")
 
 
432
  pp = self._search_pexels_image(pqt, image_filename_for_base)
433
  if pp:
434
  input_image_for_runway_path = pp
 
445
  if temp_image_asset_info['error']:
446
  logger.warning("Base image (DALL-E/Pexels) failed. Placeholder base image.")
447
  ppt = temp_image_asset_info.get('prompt_used', image_generation_prompt_text)
448
+ php = self._create_placeholder_image_content(f"[Base Img Placeholder] {ppt[:100]}...", image_filename_for_base)
 
 
 
449
  if php:
450
  input_image_for_runway_path = php
451
  temp_image_asset_info = {
 
487
  return asset_info
488
  else:
489
  asset_info = temp_image_asset_info
490
+ asset_info['error_message'] = (asset_info.get('error_message', "") + " Base image failed, Runway video not attempted.").strip()
 
 
 
491
  asset_info['type'] = 'image'
492
  return asset_info
493
  else:
 
497
  if not self.USE_ELEVENLABS or not self.elevenlabs_client or not ttn:
498
  logger.info("11L skip.")
499
  return None
 
500
  afp = os.path.join(self.output_dir, ofn)
501
  try:
502
  logger.info(f"11L audio (Voice:{self.elevenlabs_voice_id}): {ttn[:70]}...")
503
  asm = None
504
+ if hasattr(self.elevenlabs_client, 'text_to_speech') and hasattr(self.elevenlabs_client.text_to_speech, 'stream'):
 
 
 
505
  asm = self.elevenlabs_client.text_to_speech.stream
506
  logger.info("Using 11L .text_to_speech.stream()")
507
  elif hasattr(self.elevenlabs_client, 'generate_stream'):
 
509
  logger.info("Using 11L .generate_stream()")
510
  elif hasattr(self.elevenlabs_client, 'generate'):
511
  logger.info("Using 11L .generate()")
512
+ vp = Voice(voice_id=str(self.elevenlabs_voice_id),
513
+ settings=self.elevenlabs_voice_settings) if Voice and self.elevenlabs_voice_settings else str(self.elevenlabs_voice_id)
 
 
514
  ab = self.elevenlabs_client.generate(text=ttn, voice=vp, model="eleven_multilingual_v2")
515
  with open(afp, "wb") as f:
516
  f.write(ab)
 
520
  logger.error("No 11L audio method.")
521
  return None
522
 
523
+ if asm:
524
+ vps = {"voice_id": str(self.elevenlabs_voice_id)}
525
+ if self.elevenlabs_voice_settings:
526
+ if hasattr(self.elevenlabs_voice_settings, 'model_dump'):
527
+ vps["voice_settings"] = self.elevenlabs_voice_settings.model_dump()
528
+ elif hasattr(self.elevenlabs_voice_settings, 'dict'):
529
+ vps["voice_settings"] = self.elevenlabs_voice_settings.dict()
530
+ else:
531
+ vps["voice_settings"] = self.elevenlabs_voice_settings
532
+ adi = asm(text=ttn, model_id="eleven_multilingual_v2", **vps)
533
+ with open(afp, "wb") as f:
534
+ for c in adi:
535
+ if c:
536
+ f.write(c)
537
+ logger.info(f"11L audio (stream): {afp}")
538
+ return afp
539
  except Exception as e:
540
  logger.error(f"11L audio error: {e}", exc_info=True)
541
  return None
542
 
543
+ def assemble_animatic_from_assets(self, asset_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24):
 
 
 
 
 
 
544
  if not asset_data_list:
545
  logger.warning("No assets for animatic.")
546
  return None
 
547
  processed_clips = []
548
  narration_clip = None
549
  final_clip = None
 
555
  scene_dur = asset_info.get('duration', 4.5)
556
  scene_num = asset_info.get('scene_num', i + 1)
557
  key_action = asset_info.get('key_action', '')
558
+ logger.info(f"S{scene_num}: Path='{asset_path}', Type='{asset_type}', Dur='{scene_dur}'s")
 
 
559
 
560
  if not (asset_path and os.path.exists(asset_path)):
561
  logger.warning(f"S{scene_num}: Not found '{asset_path}'. Skip.")
 
579
  cv_rgba.paste(thumb, (xo, yo), thumb)
580
  final_rgb_pil = Image.new("RGB", self.video_frame_size, (0, 0, 0))
581
  final_rgb_pil.paste(cv_rgba, mask=cv_rgba.split()[3])
582
+ dbg_path = os.path.join(self.output_dir, f"debug_PRE_NUMPY_S{scene_num}.png")
 
 
583
  final_rgb_pil.save(dbg_path)
584
  logger.info(f"DEBUG: Saved PRE_NUMPY_S{scene_num} to {dbg_path}")
585
  frame_np = np.array(final_rgb_pil, dtype=np.uint8)
586
  if not frame_np.flags['C_CONTIGUOUS']:
587
  frame_np = np.ascontiguousarray(frame_np, dtype=np.uint8)
588
+ logger.debug(f"S{scene_num}: NumPy for MoviePy. Shape:{frame_np.shape}, DType:{frame_np.dtype}, C-Contig:{frame_np.flags['C_CONTIGUOUS']}")
 
 
 
 
589
  if frame_np.size == 0 or frame_np.ndim != 3 or frame_np.shape[2] != 3:
590
  logger.error(f"S{scene_num}: Invalid NumPy. Skip.")
591
  continue
592
  clip_base = ImageClip(frame_np, transparent=False).set_duration(scene_dur)
593
+ mvpy_dbg_path = os.path.join(self.output_dir, f"debug_MOVIEPY_FRAME_S{scene_num}.png")
 
 
594
  clip_base.save_frame(mvpy_dbg_path, t=0.1)
595
  logger.info(f"DEBUG: Saved MOVIEPY_FRAME_S{scene_num} to {mvpy_dbg_path}")
596
  clip_fx = clip_base
 
603
  except Exception as e:
604
  logger.error(f"S{scene_num} Ken Burns error: {e}", exc_info=False)
605
  current_scene_mvpy_clip = clip_fx
 
606
  elif asset_type == 'video':
607
  src_clip = None
608
  try:
609
  src_clip = VideoFileClip(
610
  asset_path,
611
+ target_resolution=(self.video_frame_size[1], self.video_frame_size[0]) if self.video_frame_size else None,
 
 
612
  audio=False
613
  )
614
  tmp_clip = src_clip
 
620
  tmp_clip = src_clip.loop(duration=scene_dur)
621
  else:
622
  tmp_clip = src_clip.set_duration(src_clip.duration)
623
+ logger.info(f"S{scene_num} Video clip ({src_clip.duration:.2f}s) shorter than target ({scene_dur:.2f}s).")
 
 
 
624
  current_scene_mvpy_clip = tmp_clip.set_duration(scene_dur)
625
  if current_scene_mvpy_clip.size != list(self.video_frame_size):
626
  current_scene_mvpy_clip = current_scene_mvpy_clip.resize(self.video_frame_size)
627
  except Exception as e:
628
+ logger.error(f"S{scene_num} Video load error '{asset_path}':{e}", exc_info=True)
 
 
 
629
  continue
630
  finally:
631
+ if src_clip and src_clip is not current_scene_mvpy_clip and hasattr(src_clip, 'close'):
 
 
 
632
  src_clip.close()
633
  else:
634
  logger.warning(f"S{scene_num} Unknown asset type '{asset_type}'. Skip.")
 
636
 
637
  if current_scene_mvpy_clip and key_action:
638
  try:
639
+ to_dur = min(current_scene_mvpy_clip.duration - 0.5, current_scene_mvpy_clip.duration * 0.8) if current_scene_mvpy_clip.duration > 0.5 else current_scene_mvpy_clip.duration
 
 
 
 
 
640
  to_start = 0.25
641
  txt_c = TextClip(
642
  f"Scene {scene_num}\n{key_action}",
 
646
  bg_color='rgba(10,10,20,0.7)',
647
  method='caption',
648
  align='West',
649
+ size=(self.video_frame_size[0] * 0.9, None),
650
  kerning=-1,
651
  stroke_color='black',
652
  stroke_width=1.5
653
+ ).set_duration(to_dur).set_start(to_start).set_position(('center', 0.92), relative=True)
 
 
654
  current_scene_mvpy_clip = CompositeVideoClip(
655
  [current_scene_mvpy_clip, txt_c],
656
  size=self.video_frame_size,
 
674
  if not processed_clips:
675
  logger.warning("No clips processed. Abort.")
676
  return None
 
677
  td = 0.75
678
  try:
679
  logger.info(f"Concatenating {len(processed_clips)} clips.")
680
  if len(processed_clips) > 1:
681
+ final_clip = concatenate_videoclips(processed_clips, padding=-td if td > 0 else 0, method="compose")
 
 
 
 
682
  elif processed_clips:
683
  final_clip = processed_clips[0]
 
684
  if not final_clip:
685
  logger.error("Concatenation failed.")
686
  return None
687
  logger.info(f"Concatenated dur:{final_clip.duration:.2f}s")
 
688
  if td > 0 and final_clip.duration > 0:
689
  if final_clip.duration > td * 2:
690
  final_clip = final_clip.fx(vfx.fadein, td).fx(vfx.fadeout, td)
691
  else:
692
+ final_clip = final_clip.fx(vfx.fadein, min(td, final_clip.duration / 2.0))
693
+ if overall_narration_path and os.path.exists(overall_narration_path) and final_clip.duration > 0:
 
 
 
 
 
 
 
694
  try:
695
  narration_clip = AudioFileClip(overall_narration_path)
696
  final_clip = final_clip.set_audio(narration_clip)
 
699
  logger.error(f"Narration add error:{e}", exc_info=True)
700
  elif final_clip.duration <= 0:
701
  logger.warning("Video no duration. No audio.")
 
702
  if final_clip and final_clip.duration > 0:
703
  op = os.path.join(self.output_dir, output_filename)
704
  logger.info(f"Writing video:{op} (Dur:{final_clip.duration:.2f}s)")
 
708
  codec='libx264',
709
  preset='medium',
710
  audio_codec='aac',
711
+ temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
 
 
 
712
  remove_temp=True,
713
  threads=os.cpu_count() or 2,
714
  logger='bar',
 
724
  logger.error(f"Video write error:{e}", exc_info=True)
725
  return None
726
  finally:
727
+ logger.debug("Closing all MoviePy clips in `assemble_animatic_from_assets` finally block.")
728
+ clips_to_close = processed_clips + ([narration_clip] if narration_clip else []) + ([final_clip] if final_clip else [])
 
 
 
 
 
 
729
  for clip_obj in clips_to_close:
730
  if clip_obj and hasattr(clip_obj, 'close'):
731
  try: