Spaces:

mgbam
/

CingenAI

Running

App Files Files Community

mgbam commited on 6 days ago

Commit

7ff521a

verified ·

1 Parent(s): ae86660

Update core/visual_engine.py

Browse files

Files changed (1) hide show

core/visual_engine.py +46 -51

core/visual_engine.py CHANGED Viewed

@@ -1,10 +1,10 @@
 # core/visual_engine.py
 from PIL import Image, ImageDraw, ImageFont, ImageOps
 import base64
-import mimetypes
 import numpy as np
 import os
-import openai # Ensure this is OpenAI v1.x.x+
 import requests
 import io
 import time
@@ -28,63 +28,45 @@ except Exception as e_monkey_patch:
     print(f"WARNING: An unexpected error occurred during Pillow ANTIALIAS monkey-patch: {e_monkey_patch}")
 logger = logging.getLogger(__name__)
-# logger.setLevel(logging.DEBUG) # Uncomment for verbose debugging
 # --- External Service Client Imports ---
-ELEVENLABS_CLIENT_IMPORTED = False
-ElevenLabsAPIClient = None
-Voice = None
-VoiceSettings = None
 try:
     from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
     from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
-    ElevenLabsAPIClient = ImportedElevenLabsClient
-    Voice = ImportedVoice
-    VoiceSettings = ImportedVoiceSettings
-    ELEVENLABS_CLIENT_IMPORTED = True
-    logger.info("ElevenLabs client components (SDK v1.x.x pattern) imported successfully.")
-except ImportError:
-    logger.warning("ElevenLabs SDK not found (expected 'pip install elevenlabs>=1.0.0'). Audio generation will be disabled.")
-except Exception as e_eleven_import_general:
-    logger.warning(f"General error importing ElevenLabs client components: {e_eleven_import_general}. Audio generation disabled.")
-RUNWAYML_SDK_IMPORTED = False
-RunwayMLAPIClientClass = None
 try:
     from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
-    RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass
-    RUNWAYML_SDK_IMPORTED = True
     logger.info("RunwayML SDK (runwayml) imported successfully.")
-except ImportError:
-    logger.warning("RunwayML SDK not found (pip install runwayml). RunwayML video generation will be disabled.")
-except Exception as e_runway_sdk_import_general:
-    logger.warning(f"General error importing RunwayML SDK: {e_runway_sdk_import_general}. RunwayML features disabled.")
 class VisualEngine:
-    DEFAULT_FONT_SIZE_PIL = 10
-    PREFERRED_FONT_SIZE_PIL = 20
-    VIDEO_OVERLAY_FONT_SIZE = 30
-    VIDEO_OVERLAY_FONT_COLOR = 'white'
-    DEFAULT_MOVIEPY_FONT = 'DejaVu-Sans-Bold'
-    PREFERRED_MOVIEPY_FONT = 'Liberation-Sans-Bold'
     def __init__(self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"):
-        self.output_dir = output_dir
-        os.makedirs(self.output_dir, exist_ok=True)
         self.font_filename_pil_preference = "DejaVuSans-Bold.ttf"
         font_paths_to_try = [ self.font_filename_pil_preference, f"/usr/share/fonts/truetype/dejavu/{self.font_filename_pil_preference}", f"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", f"/System/Library/Fonts/Supplemental/Arial.ttf", f"C:/Windows/Fonts/arial.ttf", f"/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf"]
         self.resolved_font_path_pil = next((p for p in font_paths_to_try if os.path.exists(p)), None)
-        self.active_font_pil = ImageFont.load_default(); self.active_font_size_pil = self.DEFAULT_FONT_SIZE_PIL
-        self.active_moviepy_font_name = self.DEFAULT_MOVIEPY_FONT
         if self.resolved_font_path_pil:
             try: self.active_font_pil = ImageFont.truetype(self.resolved_font_path_pil, self.PREFERRED_FONT_SIZE_PIL); self.active_font_size_pil = self.PREFERRED_FONT_SIZE_PIL; logger.info(f"Pillow font: {self.resolved_font_path_pil} sz {self.active_font_size_pil}."); self.active_moviepy_font_name = 'DejaVu-Sans-Bold' if "dejavu" in self.resolved_font_path_pil.lower() else ('Liberation-Sans-Bold' if "liberation" in self.resolved_font_path_pil.lower() else self.DEFAULT_MOVIEPY_FONT)
             except IOError as e_font_load_io: logger.error(f"Pillow font IOError '{self.resolved_font_path_pil}': {e_font_load_io}. Default.")
         else: logger.warning("Preferred Pillow font not found. Default.")
         self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
         self.video_frame_size = (1280, 720)
-        self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None
-        self.elevenlabs_voice_id = default_elevenlabs_voice_id
         if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
         else: self.elevenlabs_voice_settings_obj = None
         self.pexels_api_key = None; self.USE_PEXELS = False
@@ -119,14 +101,28 @@ class VisualEngine:
         else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
     def _image_to_data_uri(self, image_path):
-        # (Implementation from before)
-        try: mime_type,_=mimetypes.guess_type(image_path)
-        if not mime_type:ext=os.path.splitext(image_path)[1].lower();mime_map={".png":"image/png",".jpg":"image/jpeg",".jpeg":"image/jpeg",".webp":"image/webp"};mime_type=mime_map.get(ext,"application/octet-stream");
-        if mime_type=="application/octet-stream":logger.warning(f"Unknown MIME for {image_path}, using {mime_type}.")
-        with open(image_path,"rb")as image_file:encoded_string=base64.b64encode(image_file.read()).decode('utf-8')
-        data_uri=f"data:{mime_type};base64,{encoded_string}";logger.debug(f"Data URI for {os.path.basename(image_path)} (start): {data_uri[:100]}...");return data_uri
-        except FileNotFoundError:logger.error(f"Img not found {image_path} for data URI.");return None
-        except Exception as e:logger.error(f"Error converting {image_path} to data URI:{e}",exc_info=True);return None
     def _map_resolution_to_runway_ratio(self, width, height):
         # (Implementation from before)
@@ -142,7 +138,7 @@ class VisualEngine:
             if hasattr(font_object_pil,'getbbox'):bb=font_object_pil.getbbox(text_content);w=bb[2]-bb[0];h=bb[3]-bb[1];return w,h if h>0 else dch
             elif hasattr(font_object_pil,'getsize'):w,h=font_object_pil.getsize(text_content);return w,h if h>0 else dch
             else:return int(len(text_content)*dch*0.6),int(dch*1.2)
-        except Exception as e_getdim_inner:logger.warning(f"Error in _get_text_dimensions:{e_getdim_inner}");return int(len(text_content)*self.active_font_size_pil*0.6),int(self.active_font_size_pil*1.2) # Renamed e
     def _create_placeholder_image_content(self,text_description,filename,size=None):
         # (Corrected version from previous response)
@@ -250,7 +246,7 @@ class VisualEngine:
         if size_ph is None: size_ph = self.video_frame_size
         filepath_ph = os.path.join(self.output_dir, filename_ph)
         text_clip_ph = None
-        try: # Ensure try block is here
             text_clip_ph = TextClip(text_desc_ph, fontsize=50, color='white', font=self.video_overlay_font,
                                 bg_color='black', size=size_ph, method='caption').set_duration(duration_ph)
             text_clip_ph.write_videofile(filepath_ph, fps=24, codec='libx264', preset='ultrafast', logger=None, threads=2)
@@ -259,9 +255,10 @@ class VisualEngine:
         except Exception as e_ph_vid:
             logger.error(f"Failed to create generic placeholder video '{filepath_ph}': {e_ph_vid}", exc_info=True)
             return None
-        finally: # Ensure finally block is here
             if text_clip_ph and hasattr(text_clip_ph, 'close'):
-                text_clip_ph.close()
     def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
                              scene_data_dict, scene_identifier_fn_base,
@@ -326,7 +323,7 @@ class VisualEngine:
                 logger.info(f"ElevenLabs audio (non-streamed) saved successfully to: {audio_filepath_narration}"); return audio_filepath_narration
             else: logger.error("No recognized audio generation method found on the ElevenLabs client instance."); return None
-            if audio_stream_method_11l: # If a streaming method was identified
                 params_for_voice_stream = {"voice_id": str(self.elevenlabs_voice_id)}
                 if self.elevenlabs_voice_settings_obj:
                     if hasattr(self.elevenlabs_voice_settings_obj, 'model_dump'): params_for_voice_stream["voice_settings"] = self.elevenlabs_voice_settings_obj.model_dump()
@@ -340,10 +337,8 @@ class VisualEngine:
         except AttributeError as ae_11l_sdk: logger.error(f"AttributeError with ElevenLabs SDK client: {ae_11l_sdk}. SDK version/methods might differ.", exc_info=True); return None
         except Exception as e_11l_general_audio: logger.error(f"General error during ElevenLabs audio generation: {e_11l_general_audio}", exc_info=True); return None
     def assemble_animatic_from_assets(self, asset_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24):
         # (Keep as in the version with robust image processing, C-contiguous array, debug saves, and pix_fmt)
-        # ... (This extensive method is assumed to be largely correct from the previous iteration focusing on blank video issues)
         if not asset_data_list: logger.warning("No assets for animatic."); return None
         processed_moviepy_clips_list = []; narration_audio_clip_mvpy = None; final_video_output_clip = None
         logger.info(f"Assembling from {len(asset_data_list)} assets. Target Frame: {self.video_frame_size}.")

 # core/visual_engine.py
 from PIL import Image, ImageDraw, ImageFont, ImageOps
 import base64
+import mimetypes # For Data URI
 import numpy as np
 import os
+import openai # OpenAI v1.x.x+
 import requests
 import io
 import time
     print(f"WARNING: An unexpected error occurred during Pillow ANTIALIAS monkey-patch: {e_monkey_patch}")
 logger = logging.getLogger(__name__)
+# logger.setLevel(logging.DEBUG) # Uncomment for verbose debugging during development
 # --- External Service Client Imports ---
+ELEVENLABS_CLIENT_IMPORTED = False; ElevenLabsAPIClient = None; Voice = None; VoiceSettings = None
 try:
     from elevenlabs.client import ElevenLabs as ImportedElevenLabsClient
     from elevenlabs import Voice as ImportedVoice, VoiceSettings as ImportedVoiceSettings
+    ElevenLabsAPIClient = ImportedElevenLabsClient; Voice = ImportedVoice; VoiceSettings = ImportedVoiceSettings
+    ELEVENLABS_CLIENT_IMPORTED = True; logger.info("ElevenLabs client components (SDK v1.x.x pattern) imported successfully.")
+except ImportError: logger.warning("ElevenLabs SDK not found (expected 'pip install elevenlabs>=1.0.0'). Audio generation will be disabled.")
+except Exception as e_eleven_import_general: logger.warning(f"General error importing ElevenLabs client components: {e_eleven_import_general}. Audio generation disabled.")
+RUNWAYML_SDK_IMPORTED = False; RunwayMLAPIClientClass = None
 try:
     from runwayml import RunwayML as ImportedRunwayMLAPIClientClass
+    RunwayMLAPIClientClass = ImportedRunwayMLAPIClientClass; RUNWAYML_SDK_IMPORTED = True
     logger.info("RunwayML SDK (runwayml) imported successfully.")
+except ImportError: logger.warning("RunwayML SDK not found (pip install runwayml). RunwayML video generation will be disabled.")
+except Exception as e_runway_sdk_import_general: logger.warning(f"General error importing RunwayML SDK: {e_runway_sdk_import_general}. RunwayML features disabled.")
 class VisualEngine:
+    DEFAULT_FONT_SIZE_PIL = 10; PREFERRED_FONT_SIZE_PIL = 20
+    VIDEO_OVERLAY_FONT_SIZE = 30; VIDEO_OVERLAY_FONT_COLOR = 'white'
+    DEFAULT_MOVIEPY_FONT = 'DejaVu-Sans-Bold'; PREFERRED_MOVIEPY_FONT = 'Liberation-Sans-Bold'
     def __init__(self, output_dir="temp_cinegen_media", default_elevenlabs_voice_id="Rachel"):
+        self.output_dir = output_dir; os.makedirs(self.output_dir, exist_ok=True)
         self.font_filename_pil_preference = "DejaVuSans-Bold.ttf"
         font_paths_to_try = [ self.font_filename_pil_preference, f"/usr/share/fonts/truetype/dejavu/{self.font_filename_pil_preference}", f"/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf", f"/System/Library/Fonts/Supplemental/Arial.ttf", f"C:/Windows/Fonts/arial.ttf", f"/usr/local/share/fonts/truetype/mycustomfonts/arial.ttf"]
         self.resolved_font_path_pil = next((p for p in font_paths_to_try if os.path.exists(p)), None)
+        self.active_font_pil = ImageFont.load_default(); self.active_font_size_pil = self.DEFAULT_FONT_SIZE_PIL; self.active_moviepy_font_name = self.DEFAULT_MOVIEPY_FONT
         if self.resolved_font_path_pil:
             try: self.active_font_pil = ImageFont.truetype(self.resolved_font_path_pil, self.PREFERRED_FONT_SIZE_PIL); self.active_font_size_pil = self.PREFERRED_FONT_SIZE_PIL; logger.info(f"Pillow font: {self.resolved_font_path_pil} sz {self.active_font_size_pil}."); self.active_moviepy_font_name = 'DejaVu-Sans-Bold' if "dejavu" in self.resolved_font_path_pil.lower() else ('Liberation-Sans-Bold' if "liberation" in self.resolved_font_path_pil.lower() else self.DEFAULT_MOVIEPY_FONT)
             except IOError as e_font_load_io: logger.error(f"Pillow font IOError '{self.resolved_font_path_pil}': {e_font_load_io}. Default.")
         else: logger.warning("Preferred Pillow font not found. Default.")
         self.openai_api_key = None; self.USE_AI_IMAGE_GENERATION = False; self.dalle_model = "dall-e-3"; self.image_size_dalle3 = "1792x1024"
         self.video_frame_size = (1280, 720)
+        self.elevenlabs_api_key = None; self.USE_ELEVENLABS = False; self.elevenlabs_client_instance = None; self.elevenlabs_voice_id = default_elevenlabs_voice_id
         if VoiceSettings and ELEVENLABS_CLIENT_IMPORTED: self.elevenlabs_voice_settings_obj = VoiceSettings(stability=0.60, similarity_boost=0.80, style=0.15, use_speaker_boost=True)
         else: self.elevenlabs_voice_settings_obj = None
         self.pexels_api_key = None; self.USE_PEXELS = False
         else: self.USE_RUNWAYML = False; self.runway_ml_sdk_client_instance = None; logger.info("RunwayML Disabled (no API key).")
     def _image_to_data_uri(self, image_path):
+        # <<< CORRECTED METHOD >>>
+        try:
+            mime_type, _ = mimetypes.guess_type(image_path)
+            if not mime_type:
+                ext = os.path.splitext(image_path)[1].lower()
+                mime_map = {".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".webp": "image/webp"}
+                mime_type = mime_map.get(ext, "application/octet-stream")
+                if mime_type == "application/octet-stream":
+                    logger.warning(f"Could not determine MIME type for {image_path} from extension '{ext}', using default {mime_type}.")
+            with open(image_path, "rb") as image_file:
+                encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
+            data_uri = f"data:{mime_type};base64,{encoded_string}"
+            logger.debug(f"Generated data URI for {os.path.basename(image_path)} (MIME: {mime_type}, first 100 chars): {data_uri[:100]}...")
+            return data_uri
+        except FileNotFoundError:
+            logger.error(f"Image file not found at path: {image_path} during data URI conversion.")
+            return None
+        except Exception as e_data_uri:
+            logger.error(f"Error converting image '{image_path}' to data URI: {e_data_uri}", exc_info=True)
+            return None
     def _map_resolution_to_runway_ratio(self, width, height):
         # (Implementation from before)
             if hasattr(font_object_pil,'getbbox'):bb=font_object_pil.getbbox(text_content);w=bb[2]-bb[0];h=bb[3]-bb[1];return w,h if h>0 else dch
             elif hasattr(font_object_pil,'getsize'):w,h=font_object_pil.getsize(text_content);return w,h if h>0 else dch
             else:return int(len(text_content)*dch*0.6),int(dch*1.2)
+        except Exception as e_getdim_inner:logger.warning(f"Error in _get_text_dimensions:{e_getdim_inner}");return int(len(text_content)*self.active_font_size_pil*0.6),int(self.active_font_size_pil*1.2)
     def _create_placeholder_image_content(self,text_description,filename,size=None):
         # (Corrected version from previous response)
         if size_ph is None: size_ph = self.video_frame_size
         filepath_ph = os.path.join(self.output_dir, filename_ph)
         text_clip_ph = None
+        try:
             text_clip_ph = TextClip(text_desc_ph, fontsize=50, color='white', font=self.video_overlay_font,
                                 bg_color='black', size=size_ph, method='caption').set_duration(duration_ph)
             text_clip_ph.write_videofile(filepath_ph, fps=24, codec='libx264', preset='ultrafast', logger=None, threads=2)
         except Exception as e_ph_vid:
             logger.error(f"Failed to create generic placeholder video '{filepath_ph}': {e_ph_vid}", exc_info=True)
             return None
+        finally:
             if text_clip_ph and hasattr(text_clip_ph, 'close'):
+                try: text_clip_ph.close() # Attempt to close
+                except Exception as e_close_ph_clip: logger.warning(f"Ignoring error closing placeholder TextClip: {e_close_ph_clip}")
     def generate_scene_asset(self, image_generation_prompt_text, motion_prompt_text_for_video,
                              scene_data_dict, scene_identifier_fn_base,
                 logger.info(f"ElevenLabs audio (non-streamed) saved successfully to: {audio_filepath_narration}"); return audio_filepath_narration
             else: logger.error("No recognized audio generation method found on the ElevenLabs client instance."); return None
+            if audio_stream_method_11l:
                 params_for_voice_stream = {"voice_id": str(self.elevenlabs_voice_id)}
                 if self.elevenlabs_voice_settings_obj:
                     if hasattr(self.elevenlabs_voice_settings_obj, 'model_dump'): params_for_voice_stream["voice_settings"] = self.elevenlabs_voice_settings_obj.model_dump()
         except AttributeError as ae_11l_sdk: logger.error(f"AttributeError with ElevenLabs SDK client: {ae_11l_sdk}. SDK version/methods might differ.", exc_info=True); return None
         except Exception as e_11l_general_audio: logger.error(f"General error during ElevenLabs audio generation: {e_11l_general_audio}", exc_info=True); return None
     def assemble_animatic_from_assets(self, asset_data_list, overall_narration_path=None, output_filename="final_video.mp4", fps=24):
         # (Keep as in the version with robust image processing, C-contiguous array, debug saves, and pix_fmt)
         if not asset_data_list: logger.warning("No assets for animatic."); return None
         processed_moviepy_clips_list = []; narration_audio_clip_mvpy = None; final_video_output_clip = None
         logger.info(f"Assembling from {len(asset_data_list)} assets. Target Frame: {self.video_frame_size}.")