Update core/visual_engine.py
Browse files- core/visual_engine.py +97 -87
core/visual_engine.py
CHANGED
@@ -2,18 +2,16 @@
|
|
2 |
from PIL import Image, ImageDraw, ImageFont
|
3 |
from moviepy.editor import ImageClip, concatenate_videoclips
|
4 |
import os
|
5 |
-
|
6 |
-
#
|
7 |
-
#
|
8 |
-
# import requests # For API-based image generation
|
9 |
|
10 |
class VisualEngine:
|
11 |
def __init__(self, output_dir="temp_generated_media"):
|
12 |
self.output_dir = output_dir
|
13 |
os.makedirs(self.output_dir, exist_ok=True)
|
14 |
|
15 |
-
|
16 |
-
self.font_filename = "arial.ttf" # Or your chosen font (e.g., DejaVuSans.ttf)
|
17 |
self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
|
18 |
self.font_size_pil = 24
|
19 |
|
@@ -21,36 +19,43 @@ class VisualEngine:
|
|
21 |
self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
|
22 |
print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
|
23 |
except IOError:
|
24 |
-
print(f"Warning: Could not load font from '{self.font_path_in_container}'. "
|
25 |
-
f"Placeholders will use default font.")
|
26 |
self.font = ImageFont.load_default()
|
27 |
self.font_size_pil = 11
|
28 |
|
29 |
-
# ---
|
30 |
-
|
31 |
-
|
32 |
-
# self.USE_AI_IMAGE_GENERATION = False # Set to True when implemented
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
#
|
45 |
-
#
|
46 |
-
#
|
47 |
-
|
48 |
-
#
|
49 |
-
|
50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
|
53 |
def _get_text_dimensions(self, text_content, font_obj):
|
|
|
54 |
if text_content == "" or text_content is None:
|
55 |
return 0, self.font_size_pil
|
56 |
try:
|
@@ -73,17 +78,16 @@ class VisualEngine:
|
|
73 |
return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
|
74 |
|
75 |
|
76 |
-
def _create_placeholder_image_content(self, text_description, filename, size=(1024, 576)):
|
77 |
-
#
|
78 |
img = Image.new('RGB', size, color=(30, 30, 60))
|
79 |
draw = ImageDraw.Draw(img)
|
80 |
-
padding = 30
|
81 |
max_text_width = size[0] - (2 * padding)
|
82 |
lines = []
|
83 |
if not text_description: text_description = "(No description provided for placeholder)"
|
84 |
words = text_description.split()
|
85 |
current_line = ""
|
86 |
-
|
87 |
for word in words:
|
88 |
test_line_candidate = current_line + word + " "
|
89 |
line_width, _ = self._get_text_dimensions(test_line_candidate.strip(), self.font)
|
@@ -99,16 +103,14 @@ class VisualEngine:
|
|
99 |
current_line = ""
|
100 |
if current_line.strip(): lines.append(current_line.strip())
|
101 |
if not lines: lines.append("(Text error in placeholder)")
|
102 |
-
|
103 |
_, single_line_height = self._get_text_dimensions("Tg", self.font)
|
104 |
if single_line_height == 0: single_line_height = self.font_size_pil
|
105 |
line_spacing_factor = 1.3
|
106 |
estimated_line_block_height = len(lines) * single_line_height * line_spacing_factor
|
107 |
y_text = (size[1] - estimated_line_block_height) / 2.0
|
108 |
if y_text < padding: y_text = float(padding)
|
109 |
-
|
110 |
for line_idx, line in enumerate(lines):
|
111 |
-
if line_idx >= 7 and len(lines) > 8:
|
112 |
draw.text(xy=(float(padding), y_text), text="...", fill=(200, 200, 130), font=self.font)
|
113 |
break
|
114 |
line_width, _ = self._get_text_dimensions(line, self.font)
|
@@ -116,7 +118,6 @@ class VisualEngine:
|
|
116 |
if x_text < padding: x_text = float(padding)
|
117 |
draw.text(xy=(x_text, y_text), text=line, fill=(220, 220, 150), font=self.font)
|
118 |
y_text += single_line_height * line_spacing_factor
|
119 |
-
|
120 |
filepath = os.path.join(self.output_dir, filename)
|
121 |
try:
|
122 |
img.save(filepath)
|
@@ -127,49 +128,70 @@ class VisualEngine:
|
|
127 |
|
128 |
|
129 |
def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
|
130 |
-
"""
|
131 |
-
Generates an image visual. Tries AI generation if enabled,
|
132 |
-
otherwise falls back to a placeholder.
|
133 |
-
image_prompt_text: The detailed prompt from Gemini for image generation.
|
134 |
-
scene_identifier_filename: A unique filename for this image (e.g., "scene_1_visual.png").
|
135 |
-
"""
|
136 |
filepath = os.path.join(self.output_dir, scene_identifier_filename)
|
137 |
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
|
171 |
|
172 |
def create_video_from_images(self, image_paths, output_filename="final_video.mp4", fps=1, duration_per_image=3):
|
|
|
173 |
if not image_paths:
|
174 |
print("No images provided to create video.")
|
175 |
return None
|
@@ -178,31 +200,19 @@ class VisualEngine:
|
|
178 |
print("No valid image paths found to create video.")
|
179 |
return None
|
180 |
print(f"Attempting to create video from {len(valid_image_paths)} images.")
|
181 |
-
|
182 |
try:
|
183 |
clips = []
|
184 |
for m_path in valid_image_paths:
|
185 |
try:
|
186 |
-
|
187 |
-
img_for_clip = Image.open(m_path)
|
188 |
-
if img_for_clip.width > 1920 or img_for_clip.height > 1080:
|
189 |
-
img_for_clip.thumbnail((1920, 1080)) # Resize if too large
|
190 |
-
# Save back to a temp path or directly use the PIL image object if ImageClip supports it well
|
191 |
-
# For simplicity, let's assume ImageClip handles PIL Image objects
|
192 |
-
|
193 |
-
clip = ImageClip(m_path).set_duration(duration_per_image) # Using path directly for now
|
194 |
-
# clip = ImageClip(np.array(img_for_clip)).set_duration(duration_per_image) # If using PIL image
|
195 |
clips.append(clip)
|
196 |
except Exception as e_clip:
|
197 |
print(f"Error creating ImageClip for {m_path}: {e_clip}. Skipping.")
|
198 |
-
|
199 |
if not clips:
|
200 |
print("Could not create any ImageClips.")
|
201 |
return None
|
202 |
-
|
203 |
video_clip = concatenate_videoclips(clips, method="compose")
|
204 |
output_path = os.path.join(self.output_dir, output_filename)
|
205 |
-
|
206 |
print(f"Writing video to: {output_path}")
|
207 |
video_clip.write_videofile(
|
208 |
output_path, fps=fps, codec='libx264', audio_codec='aac',
|
|
|
2 |
from PIL import Image, ImageDraw, ImageFont
|
3 |
from moviepy.editor import ImageClip, concatenate_videoclips
|
4 |
import os
|
5 |
+
import openai # Import OpenAI library
|
6 |
+
import requests # To download images from URLs
|
7 |
+
import io # To handle image data in memory
|
|
|
8 |
|
9 |
class VisualEngine:
|
10 |
def __init__(self, output_dir="temp_generated_media"):
|
11 |
self.output_dir = output_dir
|
12 |
os.makedirs(self.output_dir, exist_ok=True)
|
13 |
|
14 |
+
self.font_filename = "arial.ttf"
|
|
|
15 |
self.font_path_in_container = f"/usr/local/share/fonts/truetype/mycustomfonts/{self.font_filename}"
|
16 |
self.font_size_pil = 24
|
17 |
|
|
|
19 |
self.font = ImageFont.truetype(self.font_path_in_container, self.font_size_pil)
|
20 |
print(f"Successfully loaded font: {self.font_path_in_container} for placeholders.")
|
21 |
except IOError:
|
22 |
+
print(f"Warning: Could not load font from '{self.font_path_in_container}'. Placeholders will use default font.")
|
|
|
23 |
self.font = ImageFont.load_default()
|
24 |
self.font_size_pil = 11
|
25 |
|
26 |
+
# --- OpenAI API Client Setup ---
|
27 |
+
self.openai_api_key = None
|
28 |
+
self.USE_AI_IMAGE_GENERATION = False # Default to False
|
|
|
29 |
|
30 |
+
try:
|
31 |
+
# Try to get API key from Hugging Face secrets (via Streamlit's secrets)
|
32 |
+
# This assumes app.py has loaded st.secrets["OPENAI_API_KEY"] into st.session_state
|
33 |
+
# A better way for a library class is to pass the key in or have it set globally
|
34 |
+
# For now, let's assume it will be set via a method or directly if running outside Streamlit context
|
35 |
+
# In app.py, you would do: st.session_state.visual_engine.set_openai_api_key(st.secrets["OPENAI_API_KEY"])
|
36 |
+
pass # Key will be set by set_openai_api_key method
|
37 |
+
except Exception as e:
|
38 |
+
print(f"OpenAI API key not immediately available for VisualEngine: {e}")
|
39 |
+
|
40 |
+
# You can choose DALL-E 2 or DALL-E 3. DALL-E 3 is generally better.
|
41 |
+
# For DALL-E 3, the 'model' parameter is "dall-e-3"
|
42 |
+
# For DALL-E 2, the 'model' parameter is "dall-e-2" (or implicitly if not specified for older image create)
|
43 |
+
self.dalle_model = "dall-e-3"
|
44 |
+
self.image_size = "1024x1024" # DALL-E 3 supports 1024x1024, 1792x1024, or 1024x1792
|
45 |
+
|
46 |
+
def set_openai_api_key(self, api_key):
|
47 |
+
if api_key:
|
48 |
+
self.openai_api_key = api_key
|
49 |
+
openai.api_key = self.openai_api_key # Set it for the openai library
|
50 |
+
self.USE_AI_IMAGE_GENERATION = True
|
51 |
+
print("OpenAI API key set. AI Image Generation Enabled with DALL-E.")
|
52 |
+
else:
|
53 |
+
self.USE_AI_IMAGE_GENERATION = False
|
54 |
+
print("OpenAI API key not provided. AI Image Generation Disabled. Using placeholders.")
|
55 |
|
56 |
|
57 |
def _get_text_dimensions(self, text_content, font_obj):
|
58 |
+
# ... (this method remains the same as your last working version) ...
|
59 |
if text_content == "" or text_content is None:
|
60 |
return 0, self.font_size_pil
|
61 |
try:
|
|
|
78 |
return int(len(text_content) * avg_char_width), int(height_estimate if height_estimate > 0 else self.font_size_pil)
|
79 |
|
80 |
|
81 |
+
def _create_placeholder_image_content(self, text_description, filename, size=(1024, 576)):
|
82 |
+
# ... (this method remains the same as your last working version) ...
|
83 |
img = Image.new('RGB', size, color=(30, 30, 60))
|
84 |
draw = ImageDraw.Draw(img)
|
85 |
+
padding = 30
|
86 |
max_text_width = size[0] - (2 * padding)
|
87 |
lines = []
|
88 |
if not text_description: text_description = "(No description provided for placeholder)"
|
89 |
words = text_description.split()
|
90 |
current_line = ""
|
|
|
91 |
for word in words:
|
92 |
test_line_candidate = current_line + word + " "
|
93 |
line_width, _ = self._get_text_dimensions(test_line_candidate.strip(), self.font)
|
|
|
103 |
current_line = ""
|
104 |
if current_line.strip(): lines.append(current_line.strip())
|
105 |
if not lines: lines.append("(Text error in placeholder)")
|
|
|
106 |
_, single_line_height = self._get_text_dimensions("Tg", self.font)
|
107 |
if single_line_height == 0: single_line_height = self.font_size_pil
|
108 |
line_spacing_factor = 1.3
|
109 |
estimated_line_block_height = len(lines) * single_line_height * line_spacing_factor
|
110 |
y_text = (size[1] - estimated_line_block_height) / 2.0
|
111 |
if y_text < padding: y_text = float(padding)
|
|
|
112 |
for line_idx, line in enumerate(lines):
|
113 |
+
if line_idx >= 7 and len(lines) > 8:
|
114 |
draw.text(xy=(float(padding), y_text), text="...", fill=(200, 200, 130), font=self.font)
|
115 |
break
|
116 |
line_width, _ = self._get_text_dimensions(line, self.font)
|
|
|
118 |
if x_text < padding: x_text = float(padding)
|
119 |
draw.text(xy=(x_text, y_text), text=line, fill=(220, 220, 150), font=self.font)
|
120 |
y_text += single_line_height * line_spacing_factor
|
|
|
121 |
filepath = os.path.join(self.output_dir, filename)
|
122 |
try:
|
123 |
img.save(filepath)
|
|
|
128 |
|
129 |
|
130 |
def generate_image_visual(self, image_prompt_text, scene_identifier_filename):
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
filepath = os.path.join(self.output_dir, scene_identifier_filename)
|
132 |
|
133 |
+
if self.USE_AI_IMAGE_GENERATION and self.openai_api_key:
|
134 |
+
try:
|
135 |
+
print(f"Generating DALL-E ({self.dalle_model}) image for: {image_prompt_text[:100]}...")
|
136 |
+
|
137 |
+
# Note: Prompts for DALL-E 3 are often best if they are quite descriptive.
|
138 |
+
# DALL-E 3 also automatically revises prompts to be more detailed if they are too short.
|
139 |
+
# You might want to consider passing the "revised_prompt" back to the UI if you display it.
|
140 |
+
|
141 |
+
# Using the newer client syntax for openai >= 1.0.0
|
142 |
+
client = openai.OpenAI(api_key=self.openai_api_key) # Initialize client with key
|
143 |
+
|
144 |
+
response = client.images.generate(
|
145 |
+
model=self.dalle_model,
|
146 |
+
prompt=image_prompt_text,
|
147 |
+
n=1, # Number of images to generate
|
148 |
+
size=self.image_size, # e.g., "1024x1024"
|
149 |
+
quality="standard", # or "hd" for DALL-E 3 (hd costs more)
|
150 |
+
response_format="url" # Get a URL to download the image
|
151 |
+
# style="vivid" # or "natural" for DALL-E 3
|
152 |
+
)
|
153 |
+
|
154 |
+
image_url = response.data[0].url
|
155 |
+
revised_prompt_dalle3 = response.data[0].revised_prompt # DALL-E 3 provides this
|
156 |
+
if revised_prompt_dalle3:
|
157 |
+
print(f"DALL-E 3 revised prompt: {revised_prompt_dalle3[:150]}...")
|
158 |
+
|
159 |
+
# Download the image from the URL
|
160 |
+
image_response = requests.get(image_url, timeout=30) # Added timeout
|
161 |
+
image_response.raise_for_status() # Raise an exception for bad status codes
|
162 |
+
|
163 |
+
# Save the image
|
164 |
+
img_data = Image.open(io.BytesIO(image_response.content))
|
165 |
+
|
166 |
+
# DALL-E images are usually PNG. Let's ensure we save as PNG.
|
167 |
+
# The filename already has .png from app.py, so this should be fine.
|
168 |
+
img_data.save(filepath)
|
169 |
+
|
170 |
+
print(f"AI Image (DALL-E) saved: {filepath}")
|
171 |
+
return filepath
|
172 |
+
except openai.APIError as e: # Catch OpenAI specific errors
|
173 |
+
print(f"OpenAI API Error generating image: {e}")
|
174 |
+
print(f"Status Code: {e.status_code}, Error Type: {e.type}")
|
175 |
+
print(f"Message: {e.message}")
|
176 |
+
except requests.exceptions.RequestException as e:
|
177 |
+
print(f"Error downloading image from DALL-E URL: {e}")
|
178 |
+
except Exception as e:
|
179 |
+
print(f"Generic error during DALL-E image generation or saving: {e}")
|
180 |
+
|
181 |
+
# Fallback to placeholder if any AI generation error occurs
|
182 |
+
print("Falling back to placeholder image due to DALL-E error.")
|
183 |
+
return self._create_placeholder_image_content(
|
184 |
+
f"[DALL-E Generation Failed] Original Prompt: {image_prompt_text}",
|
185 |
+
scene_identifier_filename
|
186 |
+
)
|
187 |
|
188 |
+
else: # Fallback to placeholder if AI generation is not enabled or API key missing
|
189 |
+
# print(f"AI image generation not enabled/ready. Creating placeholder for: {image_prompt_text[:70]}...")
|
190 |
+
return self._create_placeholder_image_content(image_prompt_text, scene_identifier_filename)
|
191 |
|
192 |
|
193 |
def create_video_from_images(self, image_paths, output_filename="final_video.mp4", fps=1, duration_per_image=3):
|
194 |
+
# ... (this method remains the same as your last working version) ...
|
195 |
if not image_paths:
|
196 |
print("No images provided to create video.")
|
197 |
return None
|
|
|
200 |
print("No valid image paths found to create video.")
|
201 |
return None
|
202 |
print(f"Attempting to create video from {len(valid_image_paths)} images.")
|
|
|
203 |
try:
|
204 |
clips = []
|
205 |
for m_path in valid_image_paths:
|
206 |
try:
|
207 |
+
clip = ImageClip(m_path).set_duration(duration_per_image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
clips.append(clip)
|
209 |
except Exception as e_clip:
|
210 |
print(f"Error creating ImageClip for {m_path}: {e_clip}. Skipping.")
|
|
|
211 |
if not clips:
|
212 |
print("Could not create any ImageClips.")
|
213 |
return None
|
|
|
214 |
video_clip = concatenate_videoclips(clips, method="compose")
|
215 |
output_path = os.path.join(self.output_dir, output_filename)
|
|
|
216 |
print(f"Writing video to: {output_path}")
|
217 |
video_clip.write_videofile(
|
218 |
output_path, fps=fps, codec='libx264', audio_codec='aac',
|