Update core/visual_engine.py
Browse files- core/visual_engine.py +46 -51
core/visual_engine.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
# core/visual_engine.py
|
2 |
from PIL import Image, ImageDraw, ImageFont, ImageOps
|
3 |
import base64
|
4 |
-
import mimetypes
|
5 |
import numpy as np
|
6 |
import os
|
7 |
-
import openai #
|
8 |
import requests
|
9 |
import io
|
10 |
import time
|
@@ -28,63 +28,45 @@ except Exception as e_monkey_patch:
|
|
28 |
print(f"WARNING: An unexpected error occurred during Pillow ANTIALIAS monkey-patch: {e_monkey_patch}")
|
29 |
|
30 |
logger = logging.getLogger(__name__)
|
31 |
-
# logger.setLevel(logging.DEBUG) # Uncomment for verbose debugging
|
32 |
|
33 |
# --- External Service Client Imports ---
|
34 |
-
ELEVENLABS_CLIENT_IMPORTED = False
|
35 |
-
ElevenLabsAPIClient = None
|
36 |
-
Voice = None
|
37 |
-
VoiceSettings = None
|
38 |
try:
|
39 |
from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
|
40 |
from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
|
41 |
-
ElevenLabsAPIClient = ImportedElevenLabsClient
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
logger.info("ElevenLabs client components (SDK v1.x.x pattern) imported successfully.")
|
46 |
-
except ImportError:
|
47 |
-
logger.warning("ElevenLabs SDK not found (expected 'pip install elevenlabs>=1.0.0'). Audio generation will be disabled.")
|
48 |
-
except Exception as e_eleven_import_general:
|
49 |
-
logger.warning(f"General error importing ElevenLabs client components: {e_eleven_import_general}. Audio generation disabled.")
|
50 |
|
51 |
-
RUNWAYML_SDK_IMPORTED = False
|
52 |
-
RunwayMLAPIClientClass = None
|
53 |
try:
|
54 |
from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
|
55 |
-
RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass
|
56 |
-
RUNWAYML_SDK_IMPORTED = True
|
57 |
logger.info("RunwayML SDK (runwayml) imported successfully.")
|
58 |
-
except ImportError:
|
59 |
-
|
60 |
-
except Exception as e_runway_sdk_import_general:
|
61 |
-
logger.warning(f"General error importing RunwayML SDK: {e_runway_sdk_import_general}. RunwayML features disabled.")
|
62 |
|
63 |
|
64 |
class VisualEngine:
|
65 |
-
DEFAULT_FONT_SIZE_PIL = 10
|
66 |
-
|
67 |
-
|
68 |
-
VIDEO_OVERLAY_FONT_COLOR = 'white'
|
69 |
-
DEFAULT_MOVIEPY_FONT = 'DejaVu-Sans-Bold'
|
70 |
-
PREFERRED_MOVIEPY_FONT = 'Liberation-Sans-Bold'
|
71 |
|
72 |
def __init__(self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"):
|
73 |
-
self.output_dir = output_dir
|
74 |
-
os.makedirs(self.output_dir, exist_ok=True)
|
75 |
self.font_filename_pil_preference = "DejaVuSans-Bold.ttf"
|
76 |
font_paths_to_try = [ self.font_filename_pil_preference, f"/usr/share/fonts/truetype/dejavu/{self.font_filename_pil_preference}", f"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", f"/System/Library/Fonts/Supplemental/Arial.ttf", f"C:/Windows/Fonts/arial.ttf", f"/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf"]
|
77 |
self.resolved_font_path_pil = next((p for p in font_paths_to_try if os.path.exists(p)), None)
|
78 |
-
self.active_font_pil = ImageFont.load_default(); self.active_font_size_pil = self.DEFAULT_FONT_SIZE_PIL
|
79 |
-
self.active_moviepy_font_name = self.DEFAULT_MOVIEPY_FONT
|
80 |
if self.resolved_font_path_pil:
|
81 |
try: self.active_font_pil = ImageFont.truetype(self.resolved_font_path_pil, self.PREFERRED_FONT_SIZE_PIL); self.active_font_size_pil = self.PREFERRED_FONT_SIZE_PIL; logger.info(f"Pillow font: {self.resolved_font_path_pil} sz {self.active_font_size_pil}."); self.active_moviepy_font_name = 'DejaVu-Sans-Bold' if "dejavu" in self.resolved_font_path_pil.lower() else ('Liberation-Sans-Bold' if "liberation" in self.resolved_font_path_pil.lower() else self.DEFAULT_MOVIEPY_FONT)
|
82 |
except IOError as e_font_load_io: logger.error(f"Pillow font IOError '{self.resolved_font_path_pil}': {e_font_load_io}. Default.")
|
83 |
else: logger.warning("Preferred Pillow font not found. Default.")
|
84 |
self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
|
85 |
self.video_frame_size = (1280, 720)
|
86 |
-
self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None
|
87 |
-
self.elevenlabs_voice_id = default_elevenlabs_voice_id
|
88 |
if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
|
89 |
else: self.elevenlabs_voice_settings_obj = None
|
90 |
self.pexels_api_key = None; self.USE_PEXELS = False
|
@@ -119,14 +101,28 @@ class VisualEngine:
|
|
119 |
else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
|
120 |
|
121 |
def _image_to_data_uri(self, image_path):
|
122 |
-
#
|
123 |
-
try:
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
def _map_resolution_to_runway_ratio(self, width, height):
|
132 |
# (Implementation from before)
|
@@ -142,7 +138,7 @@ class VisualEngine:
|
|
142 |
if hasattr(font_object_pil,'getbbox'):bb=font_object_pil.getbbox(text_content);w=bb[2]-bb[0];h=bb[3]-bb[1];return w,h if h>0 else dch
|
143 |
elif hasattr(font_object_pil,'getsize'):w,h=font_object_pil.getsize(text_content);return w,h if h>0 else dch
|
144 |
else:return int(len(text_content)*dch*0.6),int(dch*1.2)
|
145 |
-
except Exception as e_getdim_inner:logger.warning(f"Error in _get_text_dimensions:{e_getdim_inner}");return int(len(text_content)*self.active_font_size_pil*0.6),int(self.active_font_size_pil*1.2)
|
146 |
|
147 |
def _create_placeholder_image_content(self,text_description,filename,size=None):
|
148 |
# (Corrected version from previous response)
|
@@ -250,7 +246,7 @@ class VisualEngine:
|
|
250 |
if size_ph is None: size_ph = self.video_frame_size
|
251 |
filepath_ph = os.path.join(self.output_dir, filename_ph)
|
252 |
text_clip_ph = None
|
253 |
-
try:
|
254 |
text_clip_ph = TextClip(text_desc_ph, fontsize=50, color='white', font=self.video_overlay_font,
|
255 |
bg_color='black', size=size_ph, method='caption').set_duration(duration_ph)
|
256 |
text_clip_ph.write_videofile(filepath_ph, fps=24, codec='libx264', preset='ultrafast', logger=None, threads=2)
|
@@ -259,9 +255,10 @@ class VisualEngine:
|
|
259 |
except Exception as e_ph_vid:
|
260 |
logger.error(f"Failed to create generic placeholder video '{filepath_ph}': {e_ph_vid}", exc_info=True)
|
261 |
return None
|
262 |
-
finally:
|
263 |
if text_clip_ph and hasattr(text_clip_ph, 'close'):
|
264 |
-
text_clip_ph.close()
|
|
|
265 |
|
266 |
def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
|
267 |
scene_data_dict, scene_identifier_fn_base,
|
@@ -326,7 +323,7 @@ class VisualEngine:
|
|
326 |
logger.info(f"ElevenLabs audio (non-streamed) saved successfully to: {audio_filepath_narration}"); return audio_filepath_narration
|
327 |
else: logger.error("No recognized audio generation method found on the ElevenLabs client instance."); return None
|
328 |
|
329 |
-
if audio_stream_method_11l:
|
330 |
params_for_voice_stream = {"voice_id": str(self.elevenlabs_voice_id)}
|
331 |
if self.elevenlabs_voice_settings_obj:
|
332 |
if hasattr(self.elevenlabs_voice_settings_obj, 'model_dump'): params_for_voice_stream["voice_settings"] = self.elevenlabs_voice_settings_obj.model_dump()
|
@@ -340,10 +337,8 @@ class VisualEngine:
|
|
340 |
except AttributeError as ae_11l_sdk: logger.error(f"AttributeError with ElevenLabs SDK client: {ae_11l_sdk}. SDK version/methods might differ.", exc_info=True); return None
|
341 |
except Exception as e_11l_general_audio: logger.error(f"General error during ElevenLabs audio generation: {e_11l_general_audio}", exc_info=True); return None
|
342 |
|
343 |
-
|
344 |
def assemble_animatic_from_assets(self, asset_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24):
|
345 |
# (Keep as in the version with robust image processing, C-contiguous array, debug saves, and pix_fmt)
|
346 |
-
# ... (This extensive method is assumed to be largely correct from the previous iteration focusing on blank video issues)
|
347 |
if not asset_data_list: logger.warning("No assets for animatic."); return None
|
348 |
processed_moviepy_clips_list = []; narration_audio_clip_mvpy = None; final_video_output_clip = None
|
349 |
logger.info(f"Assembling from {len(asset_data_list)} assets. Target Frame: {self.video_frame_size}.")
|
|
|
1 |
# core/visual_engine.py
|
2 |
from PIL import Image, ImageDraw, ImageFont, ImageOps
|
3 |
import base64
|
4 |
+
import mimetypes # For Data URI
|
5 |
import numpy as np
|
6 |
import os
|
7 |
+
import openai # OpenAI v1.x.x+
|
8 |
import requests
|
9 |
import io
|
10 |
import time
|
|
|
28 |
print(f"WARNING: An unexpected error occurred during Pillow ANTIALIAS monkey-patch: {e_monkey_patch}")
|
29 |
|
30 |
logger = logging.getLogger(__name__)
|
31 |
+
# logger.setLevel(logging.DEBUG) # Uncomment for verbose debugging during development
|
32 |
|
33 |
# --- External Service Client Imports ---
|
34 |
+
ELEVENLABS_CLIENT_IMPORTED = False; ElevenLabsAPIClient = None; Voice = None; VoiceSettings = None
|
|
|
|
|
|
|
35 |
try:
|
36 |
from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
|
37 |
from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
|
38 |
+
ElevenLabsAPIClient = ImportedElevenLabsClient; Voice = ImportedVoice; VoiceSettings = ImportedVoiceSettings
|
39 |
+
ELEVENLABS_CLIENT_IMPORTED = True; logger.info("ElevenLabs client components (SDK v1.x.x pattern) imported successfully.")
|
40 |
+
except ImportError: logger.warning("ElevenLabs SDK not found (expected 'pip install elevenlabs>=1.0.0'). Audio generation will be disabled.")
|
41 |
+
except Exception as e_eleven_import_general: logger.warning(f"General error importing ElevenLabs client components: {e_eleven_import_general}. Audio generation disabled.")
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
+
RUNWAYML_SDK_IMPORTED = False; RunwayMLAPIClientClass = None
|
|
|
44 |
try:
|
45 |
from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
|
46 |
+
RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass; RUNWAYML_SDK_IMPORTED = True
|
|
|
47 |
logger.info("RunwayML SDK (runwayml) imported successfully.")
|
48 |
+
except ImportError: logger.warning("RunwayML SDK not found (pip install runwayml). RunwayML video generation will be disabled.")
|
49 |
+
except Exception as e_runway_sdk_import_general: logger.warning(f"General error importing RunwayML SDK: {e_runway_sdk_import_general}. RunwayML features disabled.")
|
|
|
|
|
50 |
|
51 |
|
52 |
class VisualEngine:
|
53 |
+
DEFAULT_FONT_SIZE_PIL = 10; PREFERRED_FONT_SIZE_PIL = 20
|
54 |
+
VIDEO_OVERLAY_FONT_SIZE = 30; VIDEO_OVERLAY_FONT_COLOR = 'white'
|
55 |
+
DEFAULT_MOVIEPY_FONT = 'DejaVu-Sans-Bold'; PREFERRED_MOVIEPY_FONT = 'Liberation-Sans-Bold'
|
|
|
|
|
|
|
56 |
|
57 |
def __init__(self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"):
|
58 |
+
self.output_dir = output_dir; os.makedirs(self.output_dir, exist_ok=True)
|
|
|
59 |
self.font_filename_pil_preference = "DejaVuSans-Bold.ttf"
|
60 |
font_paths_to_try = [ self.font_filename_pil_preference, f"/usr/share/fonts/truetype/dejavu/{self.font_filename_pil_preference}", f"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", f"/System/Library/Fonts/Supplemental/Arial.ttf", f"C:/Windows/Fonts/arial.ttf", f"/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf"]
|
61 |
self.resolved_font_path_pil = next((p for p in font_paths_to_try if os.path.exists(p)), None)
|
62 |
+
self.active_font_pil = ImageFont.load_default(); self.active_font_size_pil = self.DEFAULT_FONT_SIZE_PIL; self.active_moviepy_font_name = self.DEFAULT_MOVIEPY_FONT
|
|
|
63 |
if self.resolved_font_path_pil:
|
64 |
try: self.active_font_pil = ImageFont.truetype(self.resolved_font_path_pil, self.PREFERRED_FONT_SIZE_PIL); self.active_font_size_pil = self.PREFERRED_FONT_SIZE_PIL; logger.info(f"Pillow font: {self.resolved_font_path_pil} sz {self.active_font_size_pil}."); self.active_moviepy_font_name = 'DejaVu-Sans-Bold' if "dejavu" in self.resolved_font_path_pil.lower() else ('Liberation-Sans-Bold' if "liberation" in self.resolved_font_path_pil.lower() else self.DEFAULT_MOVIEPY_FONT)
|
65 |
except IOError as e_font_load_io: logger.error(f"Pillow font IOError '{self.resolved_font_path_pil}': {e_font_load_io}. Default.")
|
66 |
else: logger.warning("Preferred Pillow font not found. Default.")
|
67 |
self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
|
68 |
self.video_frame_size = (1280, 720)
|
69 |
+
self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None; self.elevenlabs_voice_id = default_elevenlabs_voice_id
|
|
|
70 |
if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
|
71 |
else: self.elevenlabs_voice_settings_obj = None
|
72 |
self.pexels_api_key = None; self.USE_PEXELS = False
|
|
|
101 |
else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
|
102 |
|
103 |
def _image_to_data_uri(self, image_path):
|
104 |
+
# <<< CORRECTED METHOD >>>
|
105 |
+
try:
|
106 |
+
mime_type, _ = mimetypes.guess_type(image_path)
|
107 |
+
if not mime_type:
|
108 |
+
ext = os.path.splitext(image_path)[1].lower()
|
109 |
+
mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".webp": "image/webp"}
|
110 |
+
mime_type = mime_map.get(ext, "application/octet-stream")
|
111 |
+
if mime_type == "application/octet-stream":
|
112 |
+
logger.warning(f"Could not determine MIME type for {image_path} from extension '{ext}', using default {mime_type}.")
|
113 |
+
|
114 |
+
with open(image_path, "rb") as image_file:
|
115 |
+
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
|
116 |
+
|
117 |
+
data_uri = f"data:{mime_type};base64,{encoded_string}"
|
118 |
+
logger.debug(f"Generated data URI for {os.path.basename(image_path)} (MIME: {mime_type}, first 100 chars): {data_uri[:100]}...")
|
119 |
+
return data_uri
|
120 |
+
except FileNotFoundError:
|
121 |
+
logger.error(f"Image file not found at path: {image_path} during data URI conversion.")
|
122 |
+
return None
|
123 |
+
except Exception as e_data_uri:
|
124 |
+
logger.error(f"Error converting image '{image_path}' to data URI: {e_data_uri}", exc_info=True)
|
125 |
+
return None
|
126 |
|
127 |
def _map_resolution_to_runway_ratio(self, width, height):
|
128 |
# (Implementation from before)
|
|
|
138 |
if hasattr(font_object_pil,'getbbox'):bb=font_object_pil.getbbox(text_content);w=bb[2]-bb[0];h=bb[3]-bb[1];return w,h if h>0 else dch
|
139 |
elif hasattr(font_object_pil,'getsize'):w,h=font_object_pil.getsize(text_content);return w,h if h>0 else dch
|
140 |
else:return int(len(text_content)*dch*0.6),int(dch*1.2)
|
141 |
+
except Exception as e_getdim_inner:logger.warning(f"Error in _get_text_dimensions:{e_getdim_inner}");return int(len(text_content)*self.active_font_size_pil*0.6),int(self.active_font_size_pil*1.2)
|
142 |
|
143 |
def _create_placeholder_image_content(self,text_description,filename,size=None):
|
144 |
# (Corrected version from previous response)
|
|
|
246 |
if size_ph is None: size_ph = self.video_frame_size
|
247 |
filepath_ph = os.path.join(self.output_dir, filename_ph)
|
248 |
text_clip_ph = None
|
249 |
+
try:
|
250 |
text_clip_ph = TextClip(text_desc_ph, fontsize=50, color='white', font=self.video_overlay_font,
|
251 |
bg_color='black', size=size_ph, method='caption').set_duration(duration_ph)
|
252 |
text_clip_ph.write_videofile(filepath_ph, fps=24, codec='libx264', preset='ultrafast', logger=None, threads=2)
|
|
|
255 |
except Exception as e_ph_vid:
|
256 |
logger.error(f"Failed to create generic placeholder video '{filepath_ph}': {e_ph_vid}", exc_info=True)
|
257 |
return None
|
258 |
+
finally:
|
259 |
if text_clip_ph and hasattr(text_clip_ph, 'close'):
|
260 |
+
try: text_clip_ph.close() # Attempt to close
|
261 |
+
except Exception as e_close_ph_clip: logger.warning(f"Ignoring error closing placeholder TextClip: {e_close_ph_clip}")
|
262 |
|
263 |
def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
|
264 |
scene_data_dict, scene_identifier_fn_base,
|
|
|
323 |
logger.info(f"ElevenLabs audio (non-streamed) saved successfully to: {audio_filepath_narration}"); return audio_filepath_narration
|
324 |
else: logger.error("No recognized audio generation method found on the ElevenLabs client instance."); return None
|
325 |
|
326 |
+
if audio_stream_method_11l:
|
327 |
params_for_voice_stream = {"voice_id": str(self.elevenlabs_voice_id)}
|
328 |
if self.elevenlabs_voice_settings_obj:
|
329 |
if hasattr(self.elevenlabs_voice_settings_obj, 'model_dump'): params_for_voice_stream["voice_settings"] = self.elevenlabs_voice_settings_obj.model_dump()
|
|
|
337 |
except AttributeError as ae_11l_sdk: logger.error(f"AttributeError with ElevenLabs SDK client: {ae_11l_sdk}. SDK version/methods might differ.", exc_info=True); return None
|
338 |
except Exception as e_11l_general_audio: logger.error(f"General error during ElevenLabs audio generation: {e_11l_general_audio}", exc_info=True); return None
|
339 |
|
|
|
340 |
def assemble_animatic_from_assets(self, asset_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24):
|
341 |
# (Keep as in the version with robust image processing, C-contiguous array, debug saves, and pix_fmt)
|
|
|
342 |
if not asset_data_list: logger.warning("No assets for animatic."); return None
|
343 |
processed_moviepy_clips_list = []; narration_audio_clip_mvpy = None; final_video_output_clip = None
|
344 |
logger.info(f"Assembling from {len(asset_data_list)} assets. Target Frame: {self.video_frame_size}.")
|