mgbam commited on
Commit
7ff521a
·
verified ·
1 Parent(s): ae86660

Update core/visual_engine.py

Browse files
Files changed (1) hide show
  1. core/visual_engine.py +46 -51
core/visual_engine.py CHANGED
@@ -1,10 +1,10 @@
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont, ImageOps
3
  import base64
4
- import mimetypes
5
  import numpy as np
6
  import os
7
- import openai # Ensure this is OpenAI v1.x.x+
8
  import requests
9
  import io
10
  import time
@@ -28,63 +28,45 @@ except Exception as e_monkey_patch:
28
  print(f"WARNING: An unexpected error occurred during Pillow ANTIALIAS monkey-patch: {e_monkey_patch}")
29
 
30
  logger = logging.getLogger(__name__)
31
- # logger.setLevel(logging.DEBUG) # Uncomment for verbose debugging
32
 
33
  # --- External Service Client Imports ---
34
- ELEVENLABS_CLIENT_IMPORTED = False
35
- ElevenLabsAPIClient = None
36
- Voice = None
37
- VoiceSettings = None
38
  try:
39
  from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
40
  from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
41
- ElevenLabsAPIClient = ImportedElevenLabsClient
42
- Voice = ImportedVoice
43
- VoiceSettings = ImportedVoiceSettings
44
- ELEVENLABS_CLIENT_IMPORTED = True
45
- logger.info("ElevenLabs client components (SDK v1.x.x pattern) imported successfully.")
46
- except ImportError:
47
- logger.warning("ElevenLabs SDK not found (expected 'pip install elevenlabs>=1.0.0'). Audio generation will be disabled.")
48
- except Exception as e_eleven_import_general:
49
- logger.warning(f"General error importing ElevenLabs client components: {e_eleven_import_general}. Audio generation disabled.")
50
 
51
- RUNWAYML_SDK_IMPORTED = False
52
- RunwayMLAPIClientClass = None
53
  try:
54
  from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
55
- RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass
56
- RUNWAYML_SDK_IMPORTED = True
57
  logger.info("RunwayML SDK (runwayml) imported successfully.")
58
- except ImportError:
59
- logger.warning("RunwayML SDK not found (pip install runwayml). RunwayML video generation will be disabled.")
60
- except Exception as e_runway_sdk_import_general:
61
- logger.warning(f"General error importing RunwayML SDK: {e_runway_sdk_import_general}. RunwayML features disabled.")
62
 
63
 
64
  class VisualEngine:
65
- DEFAULT_FONT_SIZE_PIL = 10
66
- PREFERRED_FONT_SIZE_PIL = 20
67
- VIDEO_OVERLAY_FONT_SIZE = 30
68
- VIDEO_OVERLAY_FONT_COLOR = 'white'
69
- DEFAULT_MOVIEPY_FONT = 'DejaVu-Sans-Bold'
70
- PREFERRED_MOVIEPY_FONT = 'Liberation-Sans-Bold'
71
 
72
  def __init__(self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"):
73
- self.output_dir = output_dir
74
- os.makedirs(self.output_dir, exist_ok=True)
75
  self.font_filename_pil_preference = "DejaVuSans-Bold.ttf"
76
  font_paths_to_try = [ self.font_filename_pil_preference, f"/usr/share/fonts/truetype/dejavu/{self.font_filename_pil_preference}", f"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", f"/System/Library/Fonts/Supplemental/Arial.ttf", f"C:/Windows/Fonts/arial.ttf", f"/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf"]
77
  self.resolved_font_path_pil = next((p for p in font_paths_to_try if os.path.exists(p)), None)
78
- self.active_font_pil = ImageFont.load_default(); self.active_font_size_pil = self.DEFAULT_FONT_SIZE_PIL
79
- self.active_moviepy_font_name = self.DEFAULT_MOVIEPY_FONT
80
  if self.resolved_font_path_pil:
81
  try: self.active_font_pil = ImageFont.truetype(self.resolved_font_path_pil, self.PREFERRED_FONT_SIZE_PIL); self.active_font_size_pil = self.PREFERRED_FONT_SIZE_PIL; logger.info(f"Pillow font: {self.resolved_font_path_pil} sz {self.active_font_size_pil}."); self.active_moviepy_font_name = 'DejaVu-Sans-Bold' if "dejavu" in self.resolved_font_path_pil.lower() else ('Liberation-Sans-Bold' if "liberation" in self.resolved_font_path_pil.lower() else self.DEFAULT_MOVIEPY_FONT)
82
  except IOError as e_font_load_io: logger.error(f"Pillow font IOError '{self.resolved_font_path_pil}': {e_font_load_io}. Default.")
83
  else: logger.warning("Preferred Pillow font not found. Default.")
84
  self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
85
  self.video_frame_size = (1280, 720)
86
- self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None
87
- self.elevenlabs_voice_id = default_elevenlabs_voice_id
88
  if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
89
  else: self.elevenlabs_voice_settings_obj = None
90
  self.pexels_api_key = None; self.USE_PEXELS = False
@@ -119,14 +101,28 @@ class VisualEngine:
119
  else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
120
 
121
  def _image_to_data_uri(self, image_path):
122
- # (Implementation from before)
123
- try: mime_type,_=mimetypes.guess_type(image_path)
124
- if not mime_type:ext=os.path.splitext(image_path)[1].lower();mime_map={".png":"image/png",".jpg":"image/jpeg",".jpeg":"image/jpeg",".webp":"image/webp"};mime_type=mime_map.get(ext,"application/octet-stream");
125
- if mime_type=="application/octet-stream":logger.warning(f"Unknown MIME for {image_path}, using {mime_type}.")
126
- with open(image_path,"rb")as image_file:encoded_string=base64.b64encode(image_file.read()).decode('utf-8')
127
- data_uri=f"data:{mime_type};base64,{encoded_string}";logger.debug(f"Data URI for {os.path.basename(image_path)} (start): {data_uri[:100]}...");return data_uri
128
- except FileNotFoundError:logger.error(f"Img not found {image_path} for data URI.");return None
129
- except Exception as e:logger.error(f"Error converting {image_path} to data URI:{e}",exc_info=True);return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  def _map_resolution_to_runway_ratio(self, width, height):
132
  # (Implementation from before)
@@ -142,7 +138,7 @@ class VisualEngine:
142
  if hasattr(font_object_pil,'getbbox'):bb=font_object_pil.getbbox(text_content);w=bb[2]-bb[0];h=bb[3]-bb[1];return w,h if h>0 else dch
143
  elif hasattr(font_object_pil,'getsize'):w,h=font_object_pil.getsize(text_content);return w,h if h>0 else dch
144
  else:return int(len(text_content)*dch*0.6),int(dch*1.2)
145
- except Exception as e_getdim_inner:logger.warning(f"Error in _get_text_dimensions:{e_getdim_inner}");return int(len(text_content)*self.active_font_size_pil*0.6),int(self.active_font_size_pil*1.2) # Renamed e
146
 
147
  def _create_placeholder_image_content(self,text_description,filename,size=None):
148
  # (Corrected version from previous response)
@@ -250,7 +246,7 @@ class VisualEngine:
250
  if size_ph is None: size_ph = self.video_frame_size
251
  filepath_ph = os.path.join(self.output_dir, filename_ph)
252
  text_clip_ph = None
253
- try: # Ensure try block is here
254
  text_clip_ph = TextClip(text_desc_ph, fontsize=50, color='white', font=self.video_overlay_font,
255
  bg_color='black', size=size_ph, method='caption').set_duration(duration_ph)
256
  text_clip_ph.write_videofile(filepath_ph, fps=24, codec='libx264', preset='ultrafast', logger=None, threads=2)
@@ -259,9 +255,10 @@ class VisualEngine:
259
  except Exception as e_ph_vid:
260
  logger.error(f"Failed to create generic placeholder video '{filepath_ph}': {e_ph_vid}", exc_info=True)
261
  return None
262
- finally: # Ensure finally block is here
263
  if text_clip_ph and hasattr(text_clip_ph, 'close'):
264
- text_clip_ph.close()
 
265
 
266
  def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
267
  scene_data_dict, scene_identifier_fn_base,
@@ -326,7 +323,7 @@ class VisualEngine:
326
  logger.info(f"ElevenLabs audio (non-streamed) saved successfully to: {audio_filepath_narration}"); return audio_filepath_narration
327
  else: logger.error("No recognized audio generation method found on the ElevenLabs client instance."); return None
328
 
329
- if audio_stream_method_11l: # If a streaming method was identified
330
  params_for_voice_stream = {"voice_id": str(self.elevenlabs_voice_id)}
331
  if self.elevenlabs_voice_settings_obj:
332
  if hasattr(self.elevenlabs_voice_settings_obj, 'model_dump'): params_for_voice_stream["voice_settings"] = self.elevenlabs_voice_settings_obj.model_dump()
@@ -340,10 +337,8 @@ class VisualEngine:
340
  except AttributeError as ae_11l_sdk: logger.error(f"AttributeError with ElevenLabs SDK client: {ae_11l_sdk}. SDK version/methods might differ.", exc_info=True); return None
341
  except Exception as e_11l_general_audio: logger.error(f"General error during ElevenLabs audio generation: {e_11l_general_audio}", exc_info=True); return None
342
 
343
-
344
  def assemble_animatic_from_assets(self, asset_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24):
345
  # (Keep as in the version with robust image processing, C-contiguous array, debug saves, and pix_fmt)
346
- # ... (This extensive method is assumed to be largely correct from the previous iteration focusing on blank video issues)
347
  if not asset_data_list: logger.warning("No assets for animatic."); return None
348
  processed_moviepy_clips_list = []; narration_audio_clip_mvpy = None; final_video_output_clip = None
349
  logger.info(f"Assembling from {len(asset_data_list)} assets. Target Frame: {self.video_frame_size}.")
 
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont, ImageOps
3
  import base64
4
+ import mimetypes # For Data URI
5
  import numpy as np
6
  import os
7
+ import openai # OpenAI v1.x.x+
8
  import requests
9
  import io
10
  import time
 
28
  print(f"WARNING: An unexpected error occurred during Pillow ANTIALIAS monkey-patch: {e_monkey_patch}")
29
 
30
  logger = logging.getLogger(__name__)
31
+ # logger.setLevel(logging.DEBUG) # Uncomment for verbose debugging during development
32
 
33
  # --- External Service Client Imports ---
34
+ ELEVENLABS_CLIENT_IMPORTED = False; ElevenLabsAPIClient = None; Voice = None; VoiceSettings = None
 
 
 
35
  try:
36
  from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
37
  from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
38
+ ElevenLabsAPIClient = ImportedElevenLabsClient; Voice = ImportedVoice; VoiceSettings = ImportedVoiceSettings
39
+ ELEVENLABS_CLIENT_IMPORTED = True; logger.info("ElevenLabs client components (SDK v1.x.x pattern) imported successfully.")
40
+ except ImportError: logger.warning("ElevenLabs SDK not found (expected 'pip install elevenlabs>=1.0.0'). Audio generation will be disabled.")
41
+ except Exception as e_eleven_import_general: logger.warning(f"General error importing ElevenLabs client components: {e_eleven_import_general}. Audio generation disabled.")
 
 
 
 
 
42
 
43
+ RUNWAYML_SDK_IMPORTED = False; RunwayMLAPIClientClass = None
 
44
  try:
45
  from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
46
+ RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass; RUNWAYML_SDK_IMPORTED = True
 
47
  logger.info("RunwayML SDK (runwayml) imported successfully.")
48
+ except ImportError: logger.warning("RunwayML SDK not found (pip install runwayml). RunwayML video generation will be disabled.")
49
+ except Exception as e_runway_sdk_import_general: logger.warning(f"General error importing RunwayML SDK: {e_runway_sdk_import_general}. RunwayML features disabled.")
 
 
50
 
51
 
52
  class VisualEngine:
53
+ DEFAULT_FONT_SIZE_PIL = 10; PREFERRED_FONT_SIZE_PIL = 20
54
+ VIDEO_OVERLAY_FONT_SIZE = 30; VIDEO_OVERLAY_FONT_COLOR = 'white'
55
+ DEFAULT_MOVIEPY_FONT = 'DejaVu-Sans-Bold'; PREFERRED_MOVIEPY_FONT = 'Liberation-Sans-Bold'
 
 
 
56
 
57
  def __init__(self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"):
58
+ self.output_dir = output_dir; os.makedirs(self.output_dir, exist_ok=True)
 
59
  self.font_filename_pil_preference = "DejaVuSans-Bold.ttf"
60
  font_paths_to_try = [ self.font_filename_pil_preference, f"/usr/share/fonts/truetype/dejavu/{self.font_filename_pil_preference}", f"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", f"/System/Library/Fonts/Supplemental/Arial.ttf", f"C:/Windows/Fonts/arial.ttf", f"/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf"]
61
  self.resolved_font_path_pil = next((p for p in font_paths_to_try if os.path.exists(p)), None)
62
+ self.active_font_pil = ImageFont.load_default(); self.active_font_size_pil = self.DEFAULT_FONT_SIZE_PIL; self.active_moviepy_font_name = self.DEFAULT_MOVIEPY_FONT
 
63
  if self.resolved_font_path_pil:
64
  try: self.active_font_pil = ImageFont.truetype(self.resolved_font_path_pil, self.PREFERRED_FONT_SIZE_PIL); self.active_font_size_pil = self.PREFERRED_FONT_SIZE_PIL; logger.info(f"Pillow font: {self.resolved_font_path_pil} sz {self.active_font_size_pil}."); self.active_moviepy_font_name = 'DejaVu-Sans-Bold' if "dejavu" in self.resolved_font_path_pil.lower() else ('Liberation-Sans-Bold' if "liberation" in self.resolved_font_path_pil.lower() else self.DEFAULT_MOVIEPY_FONT)
65
  except IOError as e_font_load_io: logger.error(f"Pillow font IOError '{self.resolved_font_path_pil}': {e_font_load_io}. Default.")
66
  else: logger.warning("Preferred Pillow font not found. Default.")
67
  self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
68
  self.video_frame_size = (1280, 720)
69
+ self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None; self.elevenlabs_voice_id = default_elevenlabs_voice_id
 
70
  if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
71
  else: self.elevenlabs_voice_settings_obj = None
72
  self.pexels_api_key = None; self.USE_PEXELS = False
 
101
  else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
102
 
103
  def _image_to_data_uri(self, image_path):
104
+ # <<< CORRECTED METHOD >>>
105
+ try:
106
+ mime_type, _ = mimetypes.guess_type(image_path)
107
+ if not mime_type:
108
+ ext = os.path.splitext(image_path)[1].lower()
109
+ mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".webp": "image/webp"}
110
+ mime_type = mime_map.get(ext, "application/octet-stream")
111
+ if mime_type == "application/octet-stream":
112
+ logger.warning(f"Could not determine MIME type for {image_path} from extension '{ext}', using default {mime_type}.")
113
+
114
+ with open(image_path, "rb") as image_file:
115
+ encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
116
+
117
+ data_uri = f"data:{mime_type};base64,{encoded_string}"
118
+ logger.debug(f"Generated data URI for {os.path.basename(image_path)} (MIME: {mime_type}, first 100 chars): {data_uri[:100]}...")
119
+ return data_uri
120
+ except FileNotFoundError:
121
+ logger.error(f"Image file not found at path: {image_path} during data URI conversion.")
122
+ return None
123
+ except Exception as e_data_uri:
124
+ logger.error(f"Error converting image '{image_path}' to data URI: {e_data_uri}", exc_info=True)
125
+ return None
126
 
127
  def _map_resolution_to_runway_ratio(self, width, height):
128
  # (Implementation from before)
 
138
  if hasattr(font_object_pil,'getbbox'):bb=font_object_pil.getbbox(text_content);w=bb[2]-bb[0];h=bb[3]-bb[1];return w,h if h>0 else dch
139
  elif hasattr(font_object_pil,'getsize'):w,h=font_object_pil.getsize(text_content);return w,h if h>0 else dch
140
  else:return int(len(text_content)*dch*0.6),int(dch*1.2)
141
+ except Exception as e_getdim_inner:logger.warning(f"Error in _get_text_dimensions:{e_getdim_inner}");return int(len(text_content)*self.active_font_size_pil*0.6),int(self.active_font_size_pil*1.2)
142
 
143
  def _create_placeholder_image_content(self,text_description,filename,size=None):
144
  # (Corrected version from previous response)
 
246
  if size_ph is None: size_ph = self.video_frame_size
247
  filepath_ph = os.path.join(self.output_dir, filename_ph)
248
  text_clip_ph = None
249
+ try:
250
  text_clip_ph = TextClip(text_desc_ph, fontsize=50, color='white', font=self.video_overlay_font,
251
  bg_color='black', size=size_ph, method='caption').set_duration(duration_ph)
252
  text_clip_ph.write_videofile(filepath_ph, fps=24, codec='libx264', preset='ultrafast', logger=None, threads=2)
 
255
  except Exception as e_ph_vid:
256
  logger.error(f"Failed to create generic placeholder video '{filepath_ph}': {e_ph_vid}", exc_info=True)
257
  return None
258
+ finally:
259
  if text_clip_ph and hasattr(text_clip_ph, 'close'):
260
+ try: text_clip_ph.close() # Attempt to close
261
+ except Exception as e_close_ph_clip: logger.warning(f"Ignoring error closing placeholder TextClip: {e_close_ph_clip}")
262
 
263
  def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
264
  scene_data_dict, scene_identifier_fn_base,
 
323
  logger.info(f"ElevenLabs audio (non-streamed) saved successfully to: {audio_filepath_narration}"); return audio_filepath_narration
324
  else: logger.error("No recognized audio generation method found on the ElevenLabs client instance."); return None
325
 
326
+ if audio_stream_method_11l:
327
  params_for_voice_stream = {"voice_id": str(self.elevenlabs_voice_id)}
328
  if self.elevenlabs_voice_settings_obj:
329
  if hasattr(self.elevenlabs_voice_settings_obj, 'model_dump'): params_for_voice_stream["voice_settings"] = self.elevenlabs_voice_settings_obj.model_dump()
 
337
  except AttributeError as ae_11l_sdk: logger.error(f"AttributeError with ElevenLabs SDK client: {ae_11l_sdk}. SDK version/methods might differ.", exc_info=True); return None
338
  except Exception as e_11l_general_audio: logger.error(f"General error during ElevenLabs audio generation: {e_11l_general_audio}", exc_info=True); return None
339
 
 
340
  def assemble_animatic_from_assets(self, asset_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24):
341
  # (Keep as in the version with robust image processing, C-contiguous array, debug saves, and pix_fmt)
 
342
  if not asset_data_list: logger.warning("No assets for animatic."); return None
343
  processed_moviepy_clips_list = []; narration_audio_clip_mvpy = None; final_video_output_clip = None
344
  logger.info(f"Assembling from {len(asset_data_list)} assets. Target Frame: {self.video_frame_size}.")