mgbam commited on
Commit
a6c3f4c
·
verified ·
1 Parent(s): 4e3ee0b

Update core/visual_engine.py

Browse files
Files changed (1) hide show
  1. core/visual_engine.py +50 -60
core/visual_engine.py CHANGED
@@ -1,7 +1,7 @@
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont, ImageOps
3
  import base64
4
- import mimetypes
5
  import numpy as np
6
  import os
7
  import openai # OpenAI v1.x.x+
@@ -11,36 +11,42 @@ import time
11
  import random
12
  import logging
13
 
 
14
  from moviepy.editor import (ImageClip, VideoFileClip, concatenate_videoclips, TextClip,
15
  CompositeVideoClip, AudioFileClip)
16
  import moviepy.video.fx.all as vfx
17
 
18
- try: # MONKEY PATCH for Pillow/MoviePy compatibility
19
- if hasattr(Image, 'Resampling') and hasattr(Image.Resampling, 'LANCZOS'):
 
20
  if not hasattr(Image, 'ANTIALIAS'): Image.ANTIALIAS = Image.Resampling.LANCZOS
21
- elif hasattr(Image, 'LANCZOS'):
22
  if not hasattr(Image, 'ANTIALIAS'): Image.ANTIALIAS = Image.LANCZOS
23
  elif not hasattr(Image, 'ANTIALIAS'):
24
- print("WARNING: Pillow version lacks common Resampling or ANTIALIAS. MoviePy effects might fail.")
25
- except Exception as e_mp: print(f"WARNING: ANTIALIAS monkey-patch error: {e_mp}")
 
26
 
27
  logger = logging.getLogger(__name__)
28
- # logger.setLevel(logging.DEBUG) # Uncomment for maximum verbosity
29
 
 
30
  ELEVENLABS_CLIENT_IMPORTED = False; ElevenLabsAPIClient = None; Voice = None; VoiceSettings = None
31
  try:
32
  from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
33
  from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
34
  ElevenLabsAPIClient = ImportedElevenLabsClient; Voice = ImportedVoice; VoiceSettings = ImportedVoiceSettings
35
- ELEVENLABS_CLIENT_IMPORTED = True; logger.info("ElevenLabs client components imported.")
36
- except Exception as e_11l_imp: logger.warning(f"ElevenLabs client import failed: {e_11l_imp}. Audio disabled.")
 
37
 
38
  RUNWAYML_SDK_IMPORTED = False; RunwayMLAPIClientClass = None
39
  try:
40
  from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
41
  RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass; RUNWAYML_SDK_IMPORTED = True
42
- logger.info("RunwayML SDK imported.")
43
- except Exception as e_rwy_imp: logger.warning(f"RunwayML SDK import failed: {e_rwy_imp}. RunwayML disabled.")
 
44
 
45
 
46
  class VisualEngine:
@@ -60,8 +66,7 @@ class VisualEngine:
60
  else: logger.warning("Preferred Pillow font not found. Default.")
61
  self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
62
  self.video_frame_size = (1280, 720)
63
- self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None
64
- self.elevenlabs_voice_id = default_elevenlabs_voice_id # Set initial voice ID from constructor
65
  if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
66
  else: self.elevenlabs_voice_settings_obj = None
67
  self.pexels_api_key = None; self.USE_PEXELS = False
@@ -72,31 +77,13 @@ class VisualEngine:
72
  logger.info("VisualEngine initialized.")
73
 
74
  def set_openai_api_key(self, api_key_value): self.openai_api_key = api_key_value; self.USE_AI_IMAGE_GENERATION = bool(api_key_value); logger.info(f"DALL-E status: {'Ready' if self.USE_AI_IMAGE_GENERATION else 'Disabled'}")
75
-
76
- # <<< CORRECTED METHOD SIGNATURE AND LOGIC >>>
77
  def set_elevenlabs_api_key(self, api_key_value, voice_id_from_secret=None):
78
- self.elevenlabs_api_key = api_key_value # Store the API key
79
-
80
- if voice_id_from_secret: # If a specific voice ID is passed, update the instance's default
81
- self.elevenlabs_voice_id = voice_id_from_secret
82
- logger.info(f"ElevenLabs Voice ID updated to: {self.elevenlabs_voice_id} via set_elevenlabs_api_key.")
83
- # If voice_id_from_secret is None, self.elevenlabs_voice_id retains the value from __init__
84
-
85
  if api_key_value and ELEVENLABS_CLIENT_IMPORTED and ElevenLabsAPIClient:
86
- try:
87
- self.elevenlabs_client_instance = ElevenLabsAPIClient(api_key=api_key_value)
88
- self.USE_ELEVENLABS = bool(self.elevenlabs_client_instance)
89
- logger.info(f"ElevenLabs Client service status: {'Ready' if self.USE_ELEVENLABS else 'Failed Initialization'} (Using Voice ID: {self.elevenlabs_voice_id})")
90
- except Exception as e_11l_setkey_init:
91
- logger.error(f"ElevenLabs client initialization error during set_elevenlabs_api_key: {e_11l_setkey_init}. Service Disabled.", exc_info=True)
92
- self.USE_ELEVENLABS = False
93
- self.elevenlabs_client_instance = None
94
- else:
95
- self.USE_ELEVENLABS = False
96
- self.elevenlabs_client_instance = None
97
- if not api_key_value: logger.info(f"ElevenLabs Service Disabled (API key not provided).")
98
- elif not (ELEVENLABS_CLIENT_IMPORTED and ElevenLabsAPIClient): logger.info(f"ElevenLabs Service Disabled (SDK issue).")
99
-
100
  def set_pexels_api_key(self, api_key_value): self.pexels_api_key = api_key_value; self.USE_PEXELS = bool(api_key_value); logger.info(f"Pexels status: {'Ready' if self.USE_PEXELS else 'Disabled'}")
101
  def set_runway_api_key(self, api_key_value):
102
  self.runway_api_key = api_key_value
@@ -113,35 +100,38 @@ class VisualEngine:
113
  else: logger.warning("RunwayML SDK not imported. Service disabled."); self.USE_RUNWAYML = False
114
  else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
115
 
116
- # ... (Rest of the methods: _image_to_data_uri, _map_resolution_to_runway_ratio, _get_text_dimensions,
117
- # _create_placeholder_image_content, _search_pexels_image, _generate_video_clip_with_runwayml,
118
- # _create_placeholder_video_content, generate_scene_asset, generate_narration_audio,
119
- # assemble_animatic_from_assets - keep these as they were in the last fully corrected version
120
- # that addressed the previous syntax errors and had robust image processing for MoviePy)
121
-
122
- # For brevity, I'm re-pasting only the corrected _create_placeholder_image_content and _search_pexels_image
123
- # and assuming the other long methods like generate_scene_asset and assemble_animatic_from_assets
124
- # are taken from the previous "expertly crafted" full version which already had robust logic.
125
- # Make sure to use the complete, most up-to-date versions of ALL methods.
126
-
127
  def _image_to_data_uri(self, image_path):
128
- # (Implementation from before)
129
- try: mime_type,_=mimetypes.guess_type(image_path)
130
- if not mime_type:ext=os.path.splitext(image_path)[1].lower();mime_map={".png":"image/png",".jpg":"image/jpeg",".jpeg":"image/jpeg",".webp":"image/webp"};mime_type=mime_map.get(ext,"application/octet-stream");
131
- if mime_type=="application/octet-stream":logger.warning(f"Unknown MIME for {image_path}, using {mime_type}.")
132
- with open(image_path,"rb")as image_file:encoded_string=base64.b64encode(image_file.read()).decode('utf-8')
133
- data_uri=f"data:{mime_type};base64,{encoded_string}";logger.debug(f"Data URI for {os.path.basename(image_path)} (MIME:{mime_type}): {data_uri[:100]}...");return data_uri
134
- except FileNotFoundError:logger.error(f"Img not found {image_path} for data URI.");return None
135
- except Exception as e:logger.error(f"Error converting {image_path} to data URI:{e}",exc_info=True);return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
  def _map_resolution_to_runway_ratio(self, width, height):
138
- # (Implementation from before)
139
  ratio_str=f"{width}:{height}";supported_ratios_gen4=["1280:720","720:1280","1104:832","832:1104","960:960","1584:672"];
140
  if ratio_str in supported_ratios_gen4:return ratio_str
141
  logger.warning(f"Res {ratio_str} not in Gen-4 list. Default 1280:720.");return "1280:720"
142
 
143
  def _get_text_dimensions(self, text_content, font_object_pil):
144
- # (Implementation from before)
145
  dch=getattr(font_object_pil,'size',self.active_font_size_pil);
146
  if not text_content:return 0,dch
147
  try:
@@ -271,7 +261,7 @@ class VisualEngine:
271
  except Exception as e_cl_phv: logger.warning(f"Ignoring error closing placeholder TextClip: {e_cl_phv}")
272
 
273
  def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
274
- scene_data_dict, scene_identifier_fn_base, # Changed scene_data to scene_data_dict
275
  generate_as_video_clip_flag=False, runway_target_dur_val=5):
276
  # (Corrected DALL-E loop from previous response)
277
  base_name_asset, _ = os.path.splitext(scene_identifier_fn_base)
@@ -312,9 +302,9 @@ class VisualEngine:
312
  return asset_info_result
313
 
314
  def generate_narration_audio(self, text_to_narrate, output_filename="narration_overall.mp3"):
315
- # <<< CORRECTED VERSION OF THIS METHOD >>>
316
  if not self.USE_ELEVENLABS or not self.elevenlabs_client_instance or not text_to_narrate:
317
- logger.info("ElevenLabs conditions not met (service disabled, client not init, or no text). Skipping audio generation.")
318
  return None
319
  audio_filepath_narration = os.path.join(self.output_dir, output_filename)
320
  try:
 
1
  # core/visual_engine.py
2
  from PIL import Image, ImageDraw, ImageFont, ImageOps
3
  import base64
4
+ import mimetypes # For Data URI
5
  import numpy as np
6
  import os
7
  import openai # OpenAI v1.x.x+
 
11
  import random
12
  import logging
13
 
14
+ # --- MoviePy Imports ---
15
  from moviepy.editor import (ImageClip, VideoFileClip, concatenate_videoclips, TextClip,
16
  CompositeVideoClip, AudioFileClip)
17
  import moviepy.video.fx.all as vfx
18
 
19
+ # --- MONKEY PATCH for Pillow/MoviePy compatibility ---
20
+ try:
21
+ if hasattr(Image, 'Resampling') and hasattr(Image.Resampling, 'LANCZOS'): # Pillow 9+
22
  if not hasattr(Image, 'ANTIALIAS'): Image.ANTIALIAS = Image.Resampling.LANCZOS
23
+ elif hasattr(Image, 'LANCZOS'): # Pillow 8
24
  if not hasattr(Image, 'ANTIALIAS'): Image.ANTIALIAS = Image.LANCZOS
25
  elif not hasattr(Image, 'ANTIALIAS'):
26
+ print("WARNING: Pillow version lacks common Resampling attributes or ANTIALIAS. MoviePy effects might fail or look different.")
27
+ except Exception as e_monkey_patch:
28
+ print(f"WARNING: An unexpected error occurred during Pillow ANTIALIAS monkey-patch: {e_monkey_patch}")
29
 
30
  logger = logging.getLogger(__name__)
31
+ # logger.setLevel(logging.DEBUG) # Uncomment for verbose debugging during development
32
 
33
+ # --- External Service Client Imports ---
34
  ELEVENLABS_CLIENT_IMPORTED = False; ElevenLabsAPIClient = None; Voice = None; VoiceSettings = None
35
  try:
36
  from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
37
  from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
38
  ElevenLabsAPIClient = ImportedElevenLabsClient; Voice = ImportedVoice; VoiceSettings = ImportedVoiceSettings
39
+ ELEVENLABS_CLIENT_IMPORTED = True; logger.info("ElevenLabs client components (SDK v1.x.x pattern) imported successfully.")
40
+ except ImportError: logger.warning("ElevenLabs SDK not found (expected 'pip install elevenlabs>=1.0.0'). Audio generation will be disabled.")
41
+ except Exception as e_eleven_import_general: logger.warning(f"General error importing ElevenLabs client components: {e_eleven_import_general}. Audio generation disabled.")
42
 
43
  RUNWAYML_SDK_IMPORTED = False; RunwayMLAPIClientClass = None
44
  try:
45
  from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
46
  RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass; RUNWAYML_SDK_IMPORTED = True
47
+ logger.info("RunwayML SDK (runwayml) imported successfully.")
48
+ except ImportError: logger.warning("RunwayML SDK not found (pip install runwayml). RunwayML video generation will be disabled.")
49
+ except Exception as e_runway_sdk_import_general: logger.warning(f"General error importing RunwayML SDK: {e_runway_sdk_import_general}. RunwayML features disabled.")
50
 
51
 
52
  class VisualEngine:
 
66
  else: logger.warning("Preferred Pillow font not found. Default.")
67
  self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
68
  self.video_frame_size = (1280, 720)
69
+ self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None; self.elevenlabs_voice_id = default_elevenlabs_voice_id
 
70
  if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
71
  else: self.elevenlabs_voice_settings_obj = None
72
  self.pexels_api_key = None; self.USE_PEXELS = False
 
77
  logger.info("VisualEngine initialized.")
78
 
79
  def set_openai_api_key(self, api_key_value): self.openai_api_key = api_key_value; self.USE_AI_IMAGE_GENERATION = bool(api_key_value); logger.info(f"DALL-E status: {'Ready' if self.USE_AI_IMAGE_GENERATION else 'Disabled'}")
 
 
80
  def set_elevenlabs_api_key(self, api_key_value, voice_id_from_secret=None):
81
+ self.elevenlabs_api_key = api_key_value
82
+ if voice_id_from_secret: self.elevenlabs_voice_id = voice_id_from_secret; logger.info(f"11L Voice ID updated to: {self.elevenlabs_voice_id} via set_elevenlabs_api_key.")
 
 
 
 
 
83
  if api_key_value and ELEVENLABS_CLIENT_IMPORTED and ElevenLabsAPIClient:
84
+ try: self.elevenlabs_client_instance = ElevenLabsAPIClient(api_key=api_key_value); self.USE_ELEVENLABS = bool(self.elevenlabs_client_instance); logger.info(f"11L Client: {'Ready' if self.USE_ELEVENLABS else 'Failed'} (Voice: {self.elevenlabs_voice_id})")
85
+ except Exception as e_11l_setkey_init: logger.error(f"11L client init error: {e_11l_setkey_init}. Disabled.", exc_info=True); self.USE_ELEVENLABS=False; self.elevenlabs_client_instance=None
86
+ else: self.USE_ELEVENLABS = False; logger.info(f"11L Disabled (key/SDK).")
 
 
 
 
 
 
 
 
 
 
 
87
  def set_pexels_api_key(self, api_key_value): self.pexels_api_key = api_key_value; self.USE_PEXELS = bool(api_key_value); logger.info(f"Pexels status: {'Ready' if self.USE_PEXELS else 'Disabled'}")
88
  def set_runway_api_key(self, api_key_value):
89
  self.runway_api_key = api_key_value
 
100
  else: logger.warning("RunwayML SDK not imported. Service disabled."); self.USE_RUNWAYML = False
101
  else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
102
 
 
 
 
 
 
 
 
 
 
 
 
103
  def _image_to_data_uri(self, image_path):
104
+ # <<< CORRECTED METHOD >>>
105
+ try:
106
+ mime_type, _ = mimetypes.guess_type(image_path)
107
+ if not mime_type:
108
+ ext = os.path.splitext(image_path)[1].lower()
109
+ mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".webp": "image/webp"}
110
+ mime_type = mime_map.get(ext, "application/octet-stream")
111
+ if mime_type == "application/octet-stream":
112
+ logger.warning(f"Could not determine MIME type for {image_path} from extension '{ext}', using default {mime_type}.")
113
+
114
+ with open(image_path, "rb") as image_file_handle:
115
+ image_binary_data = image_file_handle.read()
116
+
117
+ encoded_base64_string = base64.b64encode(image_binary_data).decode('utf-8')
118
+
119
+ data_uri_string = f"data:{mime_type};base64,{encoded_base64_string}"
120
+ logger.debug(f"Generated data URI for {os.path.basename(image_path)} (MIME: {mime_type}). Data URI starts with: {data_uri_string[:100]}...")
121
+ return data_uri_string
122
+ except FileNotFoundError:
123
+ logger.error(f"Image file not found at path: '{image_path}' when trying to create data URI.")
124
+ return None
125
+ except Exception as e_data_uri_conversion:
126
+ logger.error(f"Error converting image '{image_path}' to data URI: {e_data_uri_conversion}", exc_info=True)
127
+ return None
128
 
129
  def _map_resolution_to_runway_ratio(self, width, height):
 
130
  ratio_str=f"{width}:{height}";supported_ratios_gen4=["1280:720","720:1280","1104:832","832:1104","960:960","1584:672"];
131
  if ratio_str in supported_ratios_gen4:return ratio_str
132
  logger.warning(f"Res {ratio_str} not in Gen-4 list. Default 1280:720.");return "1280:720"
133
 
134
  def _get_text_dimensions(self, text_content, font_object_pil):
 
135
  dch=getattr(font_object_pil,'size',self.active_font_size_pil);
136
  if not text_content:return 0,dch
137
  try:
 
261
  except Exception as e_cl_phv: logger.warning(f"Ignoring error closing placeholder TextClip: {e_cl_phv}")
262
 
263
  def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
264
+ scene_data_dict, scene_identifier_fn_base,
265
  generate_as_video_clip_flag=False, runway_target_dur_val=5):
266
  # (Corrected DALL-E loop from previous response)
267
  base_name_asset, _ = os.path.splitext(scene_identifier_fn_base)
 
302
  return asset_info_result
303
 
304
  def generate_narration_audio(self, text_to_narrate, output_filename="narration_overall.mp3"):
305
+ # (Corrected version from previous response)
306
  if not self.USE_ELEVENLABS or not self.elevenlabs_client_instance or not text_to_narrate:
307
+ logger.info("ElevenLabs conditions not met. Skipping audio generation.")
308
  return None
309
  audio_filepath_narration = os.path.join(self.output_dir, output_filename)
310
  try: