Spaces:

mgbam
/

CingenAI

Running

App Files Files Community

mgbam commited on 9 days ago

Commit

8583908

verified ·

1 Parent(s): 238ec73

Update core/visual_engine.py

Browse files

Files changed (1) hide show

core/visual_engine.py +125 -98

core/visual_engine.py CHANGED Viewed

@@ -1,10 +1,13 @@
 # core/visual_engine.py
 from PIL import Image, ImageDraw, ImageFont
-from moviepy.editor import ImageClip, concatenate_videoclips
 import os
-import openai # Import OpenAI library
-import requests # To download images from URLs
-import io # To handle image data in memory
 class VisualEngine:
     def __init__(self, output_dir="temp_generated_media"):
@@ -13,8 +16,13 @@ class VisualEngine:
         self.font_filename = "arial.ttf"
         self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
-        self.font_size_pil = 24
         try:
             self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
             print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
@@ -23,39 +31,27 @@ class VisualEngine:
             self.font = ImageFont.load_default()
             self.font_size_pil = 11
-        # --- OpenAI API Client Setup ---
         self.openai_api_key = None
-        self.USE_AI_IMAGE_GENERATION = False # Default to False
-        try:
-            # Try to get API key from Hugging Face secrets (via Streamlit's secrets)
-            # This assumes app.py has loaded st.secrets["OPENAI_API_KEY"] into st.session_state
-            # A better way for a library class is to pass the key in or have it set globally
-            # For now, let's assume it will be set via a method or directly if running outside Streamlit context
-            # In app.py, you would do: st.session_state.visual_engine.set_openai_api_key(st.secrets["OPENAI_API_KEY"])
-            pass # Key will be set by set_openai_api_key method
-        except Exception as e:
-            print(f"OpenAI API key not immediately available for VisualEngine: {e}")
-        # You can choose DALL-E 2 or DALL-E 3. DALL-E 3 is generally better.
-        # For DALL-E 3, the 'model' parameter is "dall-e-3"
-        # For DALL-E 2, the 'model' parameter is "dall-e-2" (or implicitly if not specified for older image create)
         self.dalle_model = "dall-e-3"
-        self.image_size = "1024x1024" # DALL-E 3 supports 1024x1024, 1792x1024, or 1024x1792
     def set_openai_api_key(self, api_key):
         if api_key:
             self.openai_api_key = api_key
-            openai.api_key = self.openai_api_key # Set it for the openai library
             self.USE_AI_IMAGE_GENERATION = True
             print("OpenAI API key set. AI Image Generation Enabled with DALL-E.")
         else:
             self.USE_AI_IMAGE_GENERATION = False
             print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")
     def _get_text_dimensions(self, text_content, font_obj):
-        # ... (this method remains the same as your last working version) ...
         if text_content == "" or text_content is None:
             return 0, self.font_size_pil
         try:
@@ -77,9 +73,8 @@ class VisualEngine:
             height_estimate = self.font_size_pil * 1.2
             return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
     def _create_placeholder_image_content(self, text_description, filename, size=(1024, 576)):
-        # ... (this method remains the same as your last working version) ...
         img = Image.new('RGB', size, color=(30, 30, 60))
         draw = ImageDraw.Draw(img)
         padding = 30
@@ -126,103 +121,135 @@ class VisualEngine:
             return None
         return filepath
     def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
         filepath = os.path.join(self.output_dir, scene_identifier_filename)
         if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
             try:
                 print(f"Generating DALL-E ({self.dalle_model}) image for: {image_prompt_text[:100]}...")
-                # Note: Prompts for DALL-E 3 are often best if they are quite descriptive.
-                # DALL-E 3 also automatically revises prompts to be more detailed if they are too short.
-                # You might want to consider passing the "revised_prompt" back to the UI if you display it.
-                # Using the newer client syntax for openai >= 1.0.0
-                client = openai.OpenAI(api_key=self.openai_api_key) # Initialize client with key
                 response = client.images.generate(
-                    model=self.dalle_model,
-                    prompt=image_prompt_text,
-                    n=1, # Number of images to generate
-                    size=self.image_size, # e.g., "1024x1024"
-                    quality="standard", # or "hd" for DALL-E 3 (hd costs more)
-                    response_format="url" # Get a URL to download the image
-                    # style="vivid" # or "natural" for DALL-E 3
                 )
                 image_url = response.data[0].url
-                revised_prompt_dalle3 = response.data[0].revised_prompt # DALL-E 3 provides this
-                if revised_prompt_dalle3:
-                    print(f"DALL-E 3 revised prompt: {revised_prompt_dalle3[:150]}...")
-                # Download the image from the URL
-                image_response = requests.get(image_url, timeout=30) # Added timeout
-                image_response.raise_for_status() # Raise an exception for bad status codes
-                # Save the image
                 img_data = Image.open(io.BytesIO(image_response.content))
-                # DALL-E images are usually PNG. Let's ensure we save as PNG.
-                # The filename already has .png from app.py, so this should be fine.
-                img_data.save(filepath)
                 print(f"AI Image (DALL-E) saved: {filepath}")
                 return filepath
-            except openai.APIError as e: # Catch OpenAI specific errors
-                print(f"OpenAI API Error generating image: {e}")
-                print(f"Status Code: {e.status_code}, Error Type: {e.type}")
-                print(f"Message: {e.message}")
             except requests.exceptions.RequestException as e:
-                print(f"Error downloading image from DALL-E URL: {e}")
             except Exception as e:
-                print(f"Generic error during DALL-E image generation or saving: {e}")
-            # Fallback to placeholder if any AI generation error occurs
             print("Falling back to placeholder image due to DALL-E error.")
             return self._create_placeholder_image_content(
-                f"[DALL-E Generation Failed] Original Prompt: {image_prompt_text}",
-                scene_identifier_filename
             )
-        else: # Fallback to placeholder if AI generation is not enabled or API key missing
-            # print(f"AI image generation not enabled/ready. Creating placeholder for: {image_prompt_text[:70]}...")
-            return self._create_placeholder_image_content(image_prompt_text, scene_identifier_filename)
-    def create_video_from_images(self, image_paths, output_filename="final_video.mp4", fps=1, duration_per_image=3):
-        # ... (this method remains the same as your last working version) ...
-        if not image_paths:
-            print("No images provided to create video.")
             return None
-        valid_image_paths = [p for p in image_paths if p and os.path.exists(p)]
-        if not valid_image_paths:
-            print("No valid image paths found to create video.")
             return None
-        print(f"Attempting to create video from {len(valid_image_paths)} images.")
         try:
-            clips = []
-            for m_path in valid_image_paths:
-                try:
-                    clip = ImageClip(m_path).set_duration(duration_per_image)
-                    clips.append(clip)
-                except Exception as e_clip:
-                    print(f"Error creating ImageClip for {m_path}: {e_clip}. Skipping.")
-            if not clips:
-                print("Could not create any ImageClips.")
-                return None
-            video_clip = concatenate_videoclips(clips, method="compose")
-            output_path = os.path.join(self.output_dir, output_filename)
-            print(f"Writing video to: {output_path}")
-            video_clip.write_videofile(
                 output_path, fps=fps, codec='libx264', audio_codec='aac',
                 temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
                 remove_temp=True, threads=os.cpu_count() or 2, logger='bar'
             )
-            for clip_to_close in clips: clip_to_close.close()
-            if hasattr(video_clip, 'close'): video_clip.close()
             print(f"Video successfully created: {output_path}")
             return output_path
         except Exception as e:
-            print(f"Error during video creation: {e}")
-            return None

 # core/visual_engine.py
 from PIL import Image, ImageDraw, ImageFont
+from moviepy.editor import (ImageClip, concatenate_videoclips, TextClip,
+                            CompositeVideoClip, vfx) # Added vfx for effects
+import moviepy.video.fx.all as vfx # More explicit import for resize
+import numpy as np # For converting PIL images to numpy arrays for moviepy
 import os
+import openai
+import requests
+import io
 class VisualEngine:
     def __init__(self, output_dir="temp_generated_media"):
         self.font_filename = "arial.ttf"
         self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
+        self.font_size_pil = 24 # For placeholder images
+        self.video_overlay_font_size = 36 # For text overlays on video
+        self.video_overlay_font_color = 'white'
+        # For video overlays, try to use a system font that moviepy/ImageMagick can find
+        # Or provide a path to a .ttf file for TextClip's font parameter
+        self.video_overlay_font = 'Arial' # Generic name, ImageMagick might find it. Or use self.font_path_in_container
         try:
             self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
             print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
             self.font = ImageFont.load_default()
             self.font_size_pil = 11
         self.openai_api_key = None
+        self.USE_AI_IMAGE_GENERATION = False
         self.dalle_model = "dall-e-3"
+        self.image_size = "1024x1024"
+        # For DALL-E 3, you might want a slightly larger video frame to accommodate 1024x1024 images
+        self.video_frame_size = (1024, 576) # 16:9, DALL-E images will be letterboxed or cropped if not 16:9.
+                                         # Or (1024,1024) if you want square video frames.
     def set_openai_api_key(self, api_key):
+        # ... (remains the same) ...
         if api_key:
             self.openai_api_key = api_key
+            # openai.api_key = self.openai_api_key # Older versions. New client takes key per call.
             self.USE_AI_IMAGE_GENERATION = True
             print("OpenAI API key set. AI Image Generation Enabled with DALL-E.")
         else:
             self.USE_AI_IMAGE_GENERATION = False
             print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")
     def _get_text_dimensions(self, text_content, font_obj):
+        # ... (remains the same) ...
         if text_content == "" or text_content is None:
             return 0, self.font_size_pil
         try:
             height_estimate = self.font_size_pil * 1.2
             return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
     def _create_placeholder_image_content(self, text_description, filename, size=(1024, 576)):
+        # ... (remains the same) ...
         img = Image.new('RGB', size, color=(30, 30, 60))
         draw = ImageDraw.Draw(img)
         padding = 30
             return None
         return filepath
     def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
+        # ... (DALL-E logic remains the same, including fallback to _create_placeholder_image_content) ...
         filepath = os.path.join(self.output_dir, scene_identifier_filename)
         if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
             try:
                 print(f"Generating DALL-E ({self.dalle_model}) image for: {image_prompt_text[:100]}...")
+                client = openai.OpenAI(api_key=self.openai_api_key)
                 response = client.images.generate(
+                    model=self.dalle_model, prompt=image_prompt_text, n=1,
+                    size=self.image_size, quality="standard", response_format="url"
                 )
                 image_url = response.data[0].url
+                revised_prompt_dalle3 = response.data[0].revised_prompt
+                if revised_prompt_dalle3: print(f"DALL-E 3 revised prompt: {revised_prompt_dalle3[:150]}...")
+                image_response = requests.get(image_url, timeout=60) # Increased timeout for image download
+                image_response.raise_for_status()
                 img_data = Image.open(io.BytesIO(image_response.content))
+                # Ensure image is RGB before saving as PNG (some APIs might return RGBA)
+                if img_data.mode == 'RGBA':
+                    img_data = img_data.convert('RGB')
+                img_data.save(filepath)
                 print(f"AI Image (DALL-E) saved: {filepath}")
                 return filepath
+            except openai.APIError as e:
+                print(f"OpenAI API Error: {e}")
             except requests.exceptions.RequestException as e:
+                print(f"Requests Error downloading DALL-E image: {e}")
             except Exception as e:
+                print(f"Generic error during DALL-E image generation: {e}")
             print("Falling back to placeholder image due to DALL-E error.")
             return self._create_placeholder_image_content(
+                f"[DALL-E Failed] Prompt: {image_prompt_text[:150]}...",
+                scene_identifier_filename, size=self.video_frame_size # Use video frame size for placeholder
+            )
+        else:
+            return self._create_placeholder_image_content(
+                image_prompt_text, scene_identifier_filename, size=self.video_frame_size
             )
+    def create_video_from_images(self, image_data_list, output_filename="final_video.mp4", fps=24, duration_per_image=3):
+        """
+        Creates a video from a list of image file paths and associated text.
+        image_data_list: List of dictionaries, each like:
+                         {'path': 'path/to/image.png', 'scene_num': 1, 'key_action': 'Some action'}
+        """
+        if not image_data_list:
+            print("No image data provided to create video.")
             return None
+        print(f"Attempting to create video from {len(image_data_list)} images.")
+        processed_clips = []
+        for i, data in enumerate(image_data_list):
+            img_path = data.get('path')
+            scene_num = data.get('scene_num', i + 1)
+            key_action = data.get('key_action', '')
+            if not (img_path and os.path.exists(img_path)):
+                print(f"Image path invalid or not found: {img_path}. Skipping for video.")
+                continue
+            try:
+                # Load image and resize to fit video_frame_size, maintaining aspect ratio (letterbox/pillarbox)
+                pil_image = Image.open(img_path)
+                pil_image.thumbnail(self.video_frame_size, Image.Resampling.LANCZOS) # Resize in place
+                # Create a background matching video_frame_size
+                background = Image.new('RGB', self.video_frame_size, (0,0,0)) # Black background
+                # Paste the thumbnail onto the center of the background
+                paste_x = (self.video_frame_size[0] - pil_image.width) // 2
+                paste_y = (self.video_frame_size[1] - pil_image.height) // 2
+                background.paste(pil_image, (paste_x, paste_y))
+                # Convert PIL image to numpy array for MoviePy
+                frame_np = np.array(background)
+                img_clip = ImageClip(frame_np).set_duration(duration_per_image)
+                # Simple Ken Burns effect (zoom in slightly)
+                # End scale (e.g., 1.1 = 10% zoom in). Adjust for desired effect.
+                end_scale = 1.05
+                img_clip = img_clip.fx(vfx.resize, lambda t: 1 + (end_scale-1) * (t / duration_per_image) )
+                # To keep it centered while zooming:
+                img_clip = img_clip.set_position('center')
+                # Add Text Overlay for Scene Number and Key Action
+                overlay_text = f"Scene {scene_num}\n{key_action}"
+                txt_clip = TextClip(overlay_text, fontsize=self.video_overlay_font_size,
+                                    color=self.video_overlay_font_color,
+                                    font=self.video_overlay_font, # Ensure this font is findable by ImageMagick
+                                    bg_color='rgba(0,0,0,0.5)', # Semi-transparent black background
+                                    size=(img_clip.w * 0.9, None), # Width 90% of image, height auto
+                                    method='caption', # Auto-wrap text
+                                    align='West', # Left align
+                                    kerning=-1
+                                    ).set_duration(duration_per_image - 0.5).set_start(0.25) # Show for most of duration
+                txt_clip = txt_clip.set_position(('center', 0.85), relative=True) # Position at 85% from top, centered
+                # Composite the image and text
+                video_with_text_overlay = CompositeVideoClip([img_clip, txt_clip], size=self.video_frame_size)
+                processed_clips.append(video_with_text_overlay)
+            except Exception as e_clip:
+                print(f"Error processing image/creating clip for {img_path}: {e_clip}. Skipping.")
+        if not processed_clips:
+            print("No clips could be processed for the video.")
             return None
+        # Concatenate with crossfade transitions
+        final_video_clip = concatenate_videoclips(processed_clips, padding=-0.5, method="compose").fx(vfx.fadein, 0.5).fx(vfx.fadeout, 0.5)
+        # padding = -0.5 means 0.5s crossfade. Requires method="compose"
+        output_path = os.path.join(self.output_dir, output_filename)
+        print(f"Writing final video to: {output_path}")
         try:
+            final_video_clip.write_videofile(
                 output_path, fps=fps, codec='libx264', audio_codec='aac',
                 temp_audiofile=os.path.join(self.output_dir, f'temp-audio-{os.urandom(4).hex()}.m4a'),
                 remove_temp=True, threads=os.cpu_count() or 2, logger='bar'
             )
             print(f"Video successfully created: {output_path}")
             return output_path
         except Exception as e:
+            print(f"Error writing final video file: {e}")
+            return None
+        finally: # Ensure clips are closed
+            for clip in processed_clips: clip.close()
+            if hasattr(final_video_clip, 'close'): final_video_clip.close()