Update core/visual_engine.py
Browse files- core/visual_engine.py +50 -60
core/visual_engine.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
# core/visual_engine.py
|
2 |
from PIL import Image, ImageDraw, ImageFont, ImageOps
|
3 |
import base64
|
4 |
-
import mimetypes
|
5 |
import numpy as np
|
6 |
import os
|
7 |
import openai # OpenAI v1.x.x+
|
@@ -11,36 +11,42 @@ import time
|
|
11 |
import random
|
12 |
import logging
|
13 |
|
|
|
14 |
from moviepy.editor import (ImageClip, VideoFileClip, concatenate_videoclips, TextClip,
|
15 |
CompositeVideoClip, AudioFileClip)
|
16 |
import moviepy.video.fx.all as vfx
|
17 |
|
18 |
-
|
19 |
-
|
|
|
20 |
if not hasattr(Image, 'ANTIALIAS'): Image.ANTIALIAS = Image.Resampling.LANCZOS
|
21 |
-
elif hasattr(Image, 'LANCZOS'):
|
22 |
if not hasattr(Image, 'ANTIALIAS'): Image.ANTIALIAS = Image.LANCZOS
|
23 |
elif not hasattr(Image, 'ANTIALIAS'):
|
24 |
-
print("WARNING: Pillow version lacks common Resampling or ANTIALIAS. MoviePy effects might fail.")
|
25 |
-
except Exception as
|
|
|
26 |
|
27 |
logger = logging.getLogger(__name__)
|
28 |
-
# logger.setLevel(logging.DEBUG) # Uncomment for
|
29 |
|
|
|
30 |
ELEVENLABS_CLIENT_IMPORTED = False; ElevenLabsAPIClient = None; Voice = None; VoiceSettings = None
|
31 |
try:
|
32 |
from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
|
33 |
from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
|
34 |
ElevenLabsAPIClient = ImportedElevenLabsClient; Voice = ImportedVoice; VoiceSettings = ImportedVoiceSettings
|
35 |
-
ELEVENLABS_CLIENT_IMPORTED = True; logger.info("ElevenLabs client components imported.")
|
36 |
-
except
|
|
|
37 |
|
38 |
RUNWAYML_SDK_IMPORTED = False; RunwayMLAPIClientClass = None
|
39 |
try:
|
40 |
from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
|
41 |
RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass; RUNWAYML_SDK_IMPORTED = True
|
42 |
-
logger.info("RunwayML SDK imported.")
|
43 |
-
except
|
|
|
44 |
|
45 |
|
46 |
class VisualEngine:
|
@@ -60,8 +66,7 @@ class VisualEngine:
|
|
60 |
else: logger.warning("Preferred Pillow font not found. Default.")
|
61 |
self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
|
62 |
self.video_frame_size = (1280, 720)
|
63 |
-
self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None
|
64 |
-
self.elevenlabs_voice_id = default_elevenlabs_voice_id # Set initial voice ID from constructor
|
65 |
if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
|
66 |
else: self.elevenlabs_voice_settings_obj = None
|
67 |
self.pexels_api_key = None; self.USE_PEXELS = False
|
@@ -72,31 +77,13 @@ class VisualEngine:
|
|
72 |
logger.info("VisualEngine initialized.")
|
73 |
|
74 |
def set_openai_api_key(self, api_key_value): self.openai_api_key = api_key_value; self.USE_AI_IMAGE_GENERATION = bool(api_key_value); logger.info(f"DALL-E status: {'Ready' if self.USE_AI_IMAGE_GENERATION else 'Disabled'}")
|
75 |
-
|
76 |
-
# <<< CORRECTED METHOD SIGNATURE AND LOGIC >>>
|
77 |
def set_elevenlabs_api_key(self, api_key_value, voice_id_from_secret=None):
|
78 |
-
self.elevenlabs_api_key = api_key_value
|
79 |
-
|
80 |
-
if voice_id_from_secret: # If a specific voice ID is passed, update the instance's default
|
81 |
-
self.elevenlabs_voice_id = voice_id_from_secret
|
82 |
-
logger.info(f"ElevenLabs Voice ID updated to: {self.elevenlabs_voice_id} via set_elevenlabs_api_key.")
|
83 |
-
# If voice_id_from_secret is None, self.elevenlabs_voice_id retains the value from __init__
|
84 |
-
|
85 |
if api_key_value and ELEVENLABS_CLIENT_IMPORTED and ElevenLabsAPIClient:
|
86 |
-
try:
|
87 |
-
|
88 |
-
|
89 |
-
logger.info(f"ElevenLabs Client service status: {'Ready' if self.USE_ELEVENLABS else 'Failed Initialization'} (Using Voice ID: {self.elevenlabs_voice_id})")
|
90 |
-
except Exception as e_11l_setkey_init:
|
91 |
-
logger.error(f"ElevenLabs client initialization error during set_elevenlabs_api_key: {e_11l_setkey_init}. Service Disabled.", exc_info=True)
|
92 |
-
self.USE_ELEVENLABS = False
|
93 |
-
self.elevenlabs_client_instance = None
|
94 |
-
else:
|
95 |
-
self.USE_ELEVENLABS = False
|
96 |
-
self.elevenlabs_client_instance = None
|
97 |
-
if not api_key_value: logger.info(f"ElevenLabs Service Disabled (API key not provided).")
|
98 |
-
elif not (ELEVENLABS_CLIENT_IMPORTED and ElevenLabsAPIClient): logger.info(f"ElevenLabs Service Disabled (SDK issue).")
|
99 |
-
|
100 |
def set_pexels_api_key(self, api_key_value): self.pexels_api_key = api_key_value; self.USE_PEXELS = bool(api_key_value); logger.info(f"Pexels status: {'Ready' if self.USE_PEXELS else 'Disabled'}")
|
101 |
def set_runway_api_key(self, api_key_value):
|
102 |
self.runway_api_key = api_key_value
|
@@ -113,35 +100,38 @@ class VisualEngine:
|
|
113 |
else: logger.warning("RunwayML SDK not imported. Service disabled."); self.USE_RUNWAYML = False
|
114 |
else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
|
115 |
|
116 |
-
# ... (Rest of the methods: _image_to_data_uri, _map_resolution_to_runway_ratio, _get_text_dimensions,
|
117 |
-
# _create_placeholder_image_content, _search_pexels_image, _generate_video_clip_with_runwayml,
|
118 |
-
# _create_placeholder_video_content, generate_scene_asset, generate_narration_audio,
|
119 |
-
# assemble_animatic_from_assets - keep these as they were in the last fully corrected version
|
120 |
-
# that addressed the previous syntax errors and had robust image processing for MoviePy)
|
121 |
-
|
122 |
-
# For brevity, I'm re-pasting only the corrected _create_placeholder_image_content and _search_pexels_image
|
123 |
-
# and assuming the other long methods like generate_scene_asset and assemble_animatic_from_assets
|
124 |
-
# are taken from the previous "expertly crafted" full version which already had robust logic.
|
125 |
-
# Make sure to use the complete, most up-to-date versions of ALL methods.
|
126 |
-
|
127 |
def _image_to_data_uri(self, image_path):
|
128 |
-
#
|
129 |
-
try:
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
def _map_resolution_to_runway_ratio(self, width, height):
|
138 |
-
# (Implementation from before)
|
139 |
ratio_str=f"{width}:{height}";supported_ratios_gen4=["1280:720","720:1280","1104:832","832:1104","960:960","1584:672"];
|
140 |
if ratio_str in supported_ratios_gen4:return ratio_str
|
141 |
logger.warning(f"Res {ratio_str} not in Gen-4 list. Default 1280:720.");return "1280:720"
|
142 |
|
143 |
def _get_text_dimensions(self, text_content, font_object_pil):
|
144 |
-
# (Implementation from before)
|
145 |
dch=getattr(font_object_pil,'size',self.active_font_size_pil);
|
146 |
if not text_content:return 0,dch
|
147 |
try:
|
@@ -271,7 +261,7 @@ class VisualEngine:
|
|
271 |
except Exception as e_cl_phv: logger.warning(f"Ignoring error closing placeholder TextClip: {e_cl_phv}")
|
272 |
|
273 |
def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
|
274 |
-
scene_data_dict, scene_identifier_fn_base,
|
275 |
generate_as_video_clip_flag=False, runway_target_dur_val=5):
|
276 |
# (Corrected DALL-E loop from previous response)
|
277 |
base_name_asset, _ = os.path.splitext(scene_identifier_fn_base)
|
@@ -312,9 +302,9 @@ class VisualEngine:
|
|
312 |
return asset_info_result
|
313 |
|
314 |
def generate_narration_audio(self, text_to_narrate, output_filename="narration_overall.mp3"):
|
315 |
-
#
|
316 |
if not self.USE_ELEVENLABS or not self.elevenlabs_client_instance or not text_to_narrate:
|
317 |
-
logger.info("ElevenLabs conditions not met
|
318 |
return None
|
319 |
audio_filepath_narration = os.path.join(self.output_dir, output_filename)
|
320 |
try:
|
|
|
1 |
# core/visual_engine.py
|
2 |
from PIL import Image, ImageDraw, ImageFont, ImageOps
|
3 |
import base64
|
4 |
+
import mimetypes # For Data URI
|
5 |
import numpy as np
|
6 |
import os
|
7 |
import openai # OpenAI v1.x.x+
|
|
|
11 |
import random
|
12 |
import logging
|
13 |
|
14 |
+
# --- MoviePy Imports ---
|
15 |
from moviepy.editor import (ImageClip, VideoFileClip, concatenate_videoclips, TextClip,
|
16 |
CompositeVideoClip, AudioFileClip)
|
17 |
import moviepy.video.fx.all as vfx
|
18 |
|
19 |
+
# --- MONKEY PATCH for Pillow/MoviePy compatibility ---
|
20 |
+
try:
|
21 |
+
if hasattr(Image, 'Resampling') and hasattr(Image.Resampling, 'LANCZOS'): # Pillow 9+
|
22 |
if not hasattr(Image, 'ANTIALIAS'): Image.ANTIALIAS = Image.Resampling.LANCZOS
|
23 |
+
elif hasattr(Image, 'LANCZOS'): # Pillow 8
|
24 |
if not hasattr(Image, 'ANTIALIAS'): Image.ANTIALIAS = Image.LANCZOS
|
25 |
elif not hasattr(Image, 'ANTIALIAS'):
|
26 |
+
print("WARNING: Pillow version lacks common Resampling attributes or ANTIALIAS. MoviePy effects might fail or look different.")
|
27 |
+
except Exception as e_monkey_patch:
|
28 |
+
print(f"WARNING: An unexpected error occurred during Pillow ANTIALIAS monkey-patch: {e_monkey_patch}")
|
29 |
|
30 |
logger = logging.getLogger(__name__)
|
31 |
+
# logger.setLevel(logging.DEBUG) # Uncomment for verbose debugging during development
|
32 |
|
33 |
+
# --- External Service Client Imports ---
|
34 |
ELEVENLABS_CLIENT_IMPORTED = False; ElevenLabsAPIClient = None; Voice = None; VoiceSettings = None
|
35 |
try:
|
36 |
from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
|
37 |
from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
|
38 |
ElevenLabsAPIClient = ImportedElevenLabsClient; Voice = ImportedVoice; VoiceSettings = ImportedVoiceSettings
|
39 |
+
ELEVENLABS_CLIENT_IMPORTED = True; logger.info("ElevenLabs client components (SDK v1.x.x pattern) imported successfully.")
|
40 |
+
except ImportError: logger.warning("ElevenLabs SDK not found (expected 'pip install elevenlabs>=1.0.0'). Audio generation will be disabled.")
|
41 |
+
except Exception as e_eleven_import_general: logger.warning(f"General error importing ElevenLabs client components: {e_eleven_import_general}. Audio generation disabled.")
|
42 |
|
43 |
RUNWAYML_SDK_IMPORTED = False; RunwayMLAPIClientClass = None
|
44 |
try:
|
45 |
from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
|
46 |
RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass; RUNWAYML_SDK_IMPORTED = True
|
47 |
+
logger.info("RunwayML SDK (runwayml) imported successfully.")
|
48 |
+
except ImportError: logger.warning("RunwayML SDK not found (pip install runwayml). RunwayML video generation will be disabled.")
|
49 |
+
except Exception as e_runway_sdk_import_general: logger.warning(f"General error importing RunwayML SDK: {e_runway_sdk_import_general}. RunwayML features disabled.")
|
50 |
|
51 |
|
52 |
class VisualEngine:
|
|
|
66 |
else: logger.warning("Preferred Pillow font not found. Default.")
|
67 |
self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
|
68 |
self.video_frame_size = (1280, 720)
|
69 |
+
self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None; self.elevenlabs_voice_id = default_elevenlabs_voice_id
|
|
|
70 |
if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
|
71 |
else: self.elevenlabs_voice_settings_obj = None
|
72 |
self.pexels_api_key = None; self.USE_PEXELS = False
|
|
|
77 |
logger.info("VisualEngine initialized.")
|
78 |
|
79 |
def set_openai_api_key(self, api_key_value): self.openai_api_key = api_key_value; self.USE_AI_IMAGE_GENERATION = bool(api_key_value); logger.info(f"DALL-E status: {'Ready' if self.USE_AI_IMAGE_GENERATION else 'Disabled'}")
|
|
|
|
|
80 |
def set_elevenlabs_api_key(self, api_key_value, voice_id_from_secret=None):
|
81 |
+
self.elevenlabs_api_key = api_key_value
|
82 |
+
if voice_id_from_secret: self.elevenlabs_voice_id = voice_id_from_secret; logger.info(f"11L Voice ID updated to: {self.elevenlabs_voice_id} via set_elevenlabs_api_key.")
|
|
|
|
|
|
|
|
|
|
|
83 |
if api_key_value and ELEVENLABS_CLIENT_IMPORTED and ElevenLabsAPIClient:
|
84 |
+
try: self.elevenlabs_client_instance = ElevenLabsAPIClient(api_key=api_key_value); self.USE_ELEVENLABS = bool(self.elevenlabs_client_instance); logger.info(f"11L Client: {'Ready' if self.USE_ELEVENLABS else 'Failed'} (Voice: {self.elevenlabs_voice_id})")
|
85 |
+
except Exception as e_11l_setkey_init: logger.error(f"11L client init error: {e_11l_setkey_init}. Disabled.", exc_info=True); self.USE_ELEVENLABS=False; self.elevenlabs_client_instance=None
|
86 |
+
else: self.USE_ELEVENLABS = False; logger.info(f"11L Disabled (key/SDK).")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
def set_pexels_api_key(self, api_key_value): self.pexels_api_key = api_key_value; self.USE_PEXELS = bool(api_key_value); logger.info(f"Pexels status: {'Ready' if self.USE_PEXELS else 'Disabled'}")
|
88 |
def set_runway_api_key(self, api_key_value):
|
89 |
self.runway_api_key = api_key_value
|
|
|
100 |
else: logger.warning("RunwayML SDK not imported. Service disabled."); self.USE_RUNWAYML = False
|
101 |
else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
def _image_to_data_uri(self, image_path):
|
104 |
+
# <<< CORRECTED METHOD >>>
|
105 |
+
try:
|
106 |
+
mime_type, _ = mimetypes.guess_type(image_path)
|
107 |
+
if not mime_type:
|
108 |
+
ext = os.path.splitext(image_path)[1].lower()
|
109 |
+
mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".webp": "image/webp"}
|
110 |
+
mime_type = mime_map.get(ext, "application/octet-stream")
|
111 |
+
if mime_type == "application/octet-stream":
|
112 |
+
logger.warning(f"Could not determine MIME type for {image_path} from extension '{ext}', using default {mime_type}.")
|
113 |
+
|
114 |
+
with open(image_path, "rb") as image_file_handle:
|
115 |
+
image_binary_data = image_file_handle.read()
|
116 |
+
|
117 |
+
encoded_base64_string = base64.b64encode(image_binary_data).decode('utf-8')
|
118 |
+
|
119 |
+
data_uri_string = f"data:{mime_type};base64,{encoded_base64_string}"
|
120 |
+
logger.debug(f"Generated data URI for {os.path.basename(image_path)} (MIME: {mime_type}). Data URI starts with: {data_uri_string[:100]}...")
|
121 |
+
return data_uri_string
|
122 |
+
except FileNotFoundError:
|
123 |
+
logger.error(f"Image file not found at path: '{image_path}' when trying to create data URI.")
|
124 |
+
return None
|
125 |
+
except Exception as e_data_uri_conversion:
|
126 |
+
logger.error(f"Error converting image '{image_path}' to data URI: {e_data_uri_conversion}", exc_info=True)
|
127 |
+
return None
|
128 |
|
129 |
def _map_resolution_to_runway_ratio(self, width, height):
|
|
|
130 |
ratio_str=f"{width}:{height}";supported_ratios_gen4=["1280:720","720:1280","1104:832","832:1104","960:960","1584:672"];
|
131 |
if ratio_str in supported_ratios_gen4:return ratio_str
|
132 |
logger.warning(f"Res {ratio_str} not in Gen-4 list. Default 1280:720.");return "1280:720"
|
133 |
|
134 |
def _get_text_dimensions(self, text_content, font_object_pil):
|
|
|
135 |
dch=getattr(font_object_pil,'size',self.active_font_size_pil);
|
136 |
if not text_content:return 0,dch
|
137 |
try:
|
|
|
261 |
except Exception as e_cl_phv: logger.warning(f"Ignoring error closing placeholder TextClip: {e_cl_phv}")
|
262 |
|
263 |
def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
|
264 |
+
scene_data_dict, scene_identifier_fn_base,
|
265 |
generate_as_video_clip_flag=False, runway_target_dur_val=5):
|
266 |
# (Corrected DALL-E loop from previous response)
|
267 |
base_name_asset, _ = os.path.splitext(scene_identifier_fn_base)
|
|
|
302 |
return asset_info_result
|
303 |
|
304 |
def generate_narration_audio(self, text_to_narrate, output_filename="narration_overall.mp3"):
|
305 |
+
# (Corrected version from previous response)
|
306 |
if not self.USE_ELEVENLABS or not self.elevenlabs_client_instance or not text_to_narrate:
|
307 |
+
logger.info("ElevenLabs conditions not met. Skipping audio generation.")
|
308 |
return None
|
309 |
audio_filepath_narration = os.path.join(self.output_dir, output_filename)
|
310 |
try:
|