Update core/prompt_engineering.py
Browse files- core/prompt_engineering.py +252 -237
core/prompt_engineering.py
CHANGED
@@ -5,291 +5,306 @@ def create_cinematic_treatment_prompt(user_idea, genre, mood, num_scenes=3, crea
|
|
5 |
"""
|
6 |
Generates a prompt for Gemini to create a full cinematic treatment, including
|
7 |
proactive suggestions for visual style, camera, sound, thematic elements,
|
8 |
-
and whether a scene is better as an image or a short video clip.
|
9 |
-
|
10 |
"""
|
11 |
-
|
12 |
-
"standard": "Provide solid, genre-appropriate suggestions. Recommend 'image' for most asset types unless strong motion is
|
13 |
-
"more_artistic": "
|
14 |
-
"experimental_narrative": "Feel free to suggest a minor unexpected narrative twist or a symbolic visual motif. If a scene has significant implied motion or
|
15 |
-
}
|
|
|
16 |
|
17 |
-
# Updated list of fields to request from Gemini
|
18 |
return f"""
|
19 |
-
You are an AI Creative Director and Master Storyteller,
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
48 |
{{
|
49 |
"scene_number": 1,
|
50 |
-
"scene_title": "
|
51 |
-
"emotional_beat": "
|
52 |
-
"setting_description": "
|
53 |
-
"characters_involved": ["
|
54 |
-
"character_focus_moment": "
|
55 |
-
"key_plot_beat": "
|
56 |
-
"suggested_dialogue_hook": "(
|
57 |
-
"PROACTIVE_visual_style_κ°λ
": "
|
58 |
-
"PROACTIVE_camera_work_κ°λ
": "
|
59 |
-
"PROACTIVE_sound_design_κ°λ
": "Ambient:
|
60 |
"suggested_asset_type_κ°λ
": "image",
|
61 |
"video_clip_motion_description_κ°λ
": "N/A",
|
62 |
"video_clip_duration_estimate_secs_κ°λ
": 0,
|
63 |
-
"image_generation_keywords_κ°λ
": "
|
64 |
-
"pexels_search_query_κ°λ
": "
|
65 |
}}
|
66 |
"""
|
67 |
|
68 |
def construct_dalle_prompt(scene_data, character_definitions=None, global_style_additions=""):
|
69 |
"""
|
70 |
-
Constructs
|
71 |
-
injecting character details, and global style preferences.
|
72 |
"""
|
73 |
-
scene_title = scene_data.get('scene_title', 'A dramatic moment')
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
current_scene_character_details.append(f"{char_name_clean} (depicted as: {char_visual_desc})")
|
91 |
else:
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
if
|
96 |
-
if len(
|
97 |
-
|
98 |
-
else:
|
99 |
-
character_narrative = f"The scene prominently features {', '.join(current_scene_character_details[:-1])} and {current_scene_character_details[-1]}."
|
100 |
|
101 |
-
|
102 |
if global_style_additions:
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
f"
|
108 |
-
f"
|
109 |
-
f"{
|
110 |
-
|
111 |
-
f"
|
112 |
-
f"
|
113 |
-
f"
|
114 |
-
f"
|
115 |
-
f"
|
116 |
-
f"
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
119 |
|
120 |
|
121 |
-
|
|
|
122 |
"""
|
123 |
-
Constructs a prompt for
|
124 |
-
|
|
|
125 |
"""
|
126 |
-
|
127 |
-
#
|
128 |
-
|
129 |
-
|
130 |
-
plot_beat = scene_data.get('key_plot_beat', '')
|
131 |
-
motion_desc = scene_data.get('video_clip_motion_description_κ°λ
', 'subtle ambient motion')
|
132 |
-
visual_style = scene_data.get('PROACTIVE_visual_style_κ°λ
', '')
|
133 |
-
camera_work = scene_data.get('PROACTIVE_camera_work_κ°λ
', 'dynamic camera movement')
|
134 |
-
emotional_beat = scene_data.get('emotional_beat', scene_title)
|
135 |
-
|
136 |
-
current_scene_character_details = []
|
137 |
-
characters_involved = scene_data.get('characters_involved', [])
|
138 |
-
if characters_involved:
|
139 |
-
for char_name in characters_involved:
|
140 |
-
char_lookup_key = char_name.strip().lower()
|
141 |
-
if character_definitions and char_lookup_key in character_definitions:
|
142 |
-
current_scene_character_details.append(f"{char_name.strip()} (as: {character_definitions[char_lookup_key]})")
|
143 |
-
else:
|
144 |
-
current_scene_character_details.append(char_name.strip())
|
145 |
|
146 |
-
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
-
final_style_directive = visual_style
|
151 |
-
if global_style_additions:
|
152 |
-
final_style_directive = f"{visual_style}. Global style notes: {global_style_additions}."
|
153 |
-
|
154 |
-
prompt_parts = [
|
155 |
-
f"Generate a highly cinematic video clip for a scene titled '{scene_title}'.",
|
156 |
-
f"Setting: {setting_desc}.",
|
157 |
-
f"Key moment: {plot_beat}.",
|
158 |
-
character_narrative if character_narrative else "Focus on the environment and atmosphere if no specific characters are detailed.",
|
159 |
-
f"Primary Motion: {motion_desc}. This motion should be the central dynamic element of the clip.",
|
160 |
-
f"Visual Style & Mood: {final_style_directive}. Infuse with a strong sense of '{emotional_beat}'.",
|
161 |
-
f"Cinematography: Implement camera work described as '{camera_work}'. If specific shots like 'dolly zoom' or 'tracking shot' are mentioned, execute them clearly.",
|
162 |
-
f"Core visual keywords for styling and content: {base_keywords}.",
|
163 |
-
"The video should be photorealistic, with extreme detail, sophisticated lighting, rich textures, and palpable atmospheric effects.",
|
164 |
-
"Ensure high fidelity and smooth motion. The clip should feel like a shot from a major film production."
|
165 |
-
]
|
166 |
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
|
|
|
|
|
|
171 |
|
172 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
|
175 |
def create_narration_script_prompt_enhanced(story_scenes_data, overall_mood, overall_genre, voice_style="cinematic_trailer"):
|
176 |
-
|
177 |
-
for
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
f"
|
183 |
-
f"
|
184 |
-
f"
|
|
|
|
|
|
|
185 |
)
|
186 |
-
|
187 |
|
188 |
-
|
189 |
-
"cinematic_trailer": "deep, resonant, slightly epic, building anticipation
|
190 |
-
"documentary_neutral": "clear, informative, objective, and well-paced.",
|
191 |
-
"introspective_character": f"reflective, personal, possibly first-person
|
192 |
-
}
|
|
|
193 |
|
194 |
prompt = f"""
|
195 |
-
You are an
|
196 |
-
The animatic
|
197 |
|
198 |
--- SCENE TREATMENTS ---
|
199 |
-
{
|
200 |
--- END SCENE TREATMENTS ---
|
201 |
|
202 |
-
Overall
|
203 |
-
|
204 |
-
|
|
|
205 |
|
206 |
-
Your narration script
|
207 |
-
-
|
208 |
-
-
|
209 |
-
-
|
210 |
-
-
|
211 |
-
- If 'introspective_character',
|
212 |
-
- The output MUST be ONLY the narration script text, ready for text-to-speech.
|
|
|
213 |
|
214 |
-
Example (different story):
|
215 |
-
"
|
216 |
|
217 |
-
Craft your narration.
|
218 |
"""
|
219 |
-
return " ".join(prompt.split())
|
220 |
|
221 |
|
222 |
-
def create_scene_regeneration_prompt(
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
|
|
|
|
|
|
|
|
|
|
228 |
context_str += "\n"
|
229 |
|
230 |
-
|
231 |
return f"""
|
232 |
-
You are an AI Script Supervisor and Creative Consultant.
|
233 |
-
|
234 |
-
User Feedback for this scene: "{user_feedback}"
|
235 |
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
- 'scene_number' MUST NOT change.
|
242 |
-
- 'key_plot_beat'
|
243 |
-
- 'image_generation_keywords_κ°λ
'
|
244 |
-
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
|
|
249 |
"""
|
250 |
|
251 |
-
def create_visual_regeneration_prompt(
|
252 |
"""
|
253 |
-
|
254 |
"""
|
255 |
-
|
256 |
-
|
257 |
-
if
|
258 |
-
for
|
259 |
-
|
260 |
-
if
|
261 |
-
|
262 |
-
else:
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
You are an AI Art Director specializing in refining DALL-E 3 prompts for cinematic STILL IMAGES.
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
-
|
272 |
-
-
|
273 |
-
- {
|
274 |
-
-
|
275 |
-
- Director's Suggested
|
276 |
-
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
|
|
294 |
"""
|
295 |
-
return " ".join(
|
|
|
5 |
"""
|
6 |
Generates a prompt for Gemini to create a full cinematic treatment, including
|
7 |
proactive suggestions for visual style, camera, sound, thematic elements,
|
8 |
+
and whether a scene is better as an image or a short video clip for AI generation.
|
9 |
+
Creative Guidance options: "standard", "more_artistic", "experimental_narrative".
|
10 |
"""
|
11 |
+
guidance_map = {
|
12 |
+
"standard": "Provide solid, genre-appropriate suggestions. Recommend 'image' for most asset types unless strong, specific motion is essential to the scene's core impact.",
|
13 |
+
"more_artistic": "Emphasize artistic, unconventional, and visually striking suggestions for style and camera. Suggest unique color palettes or lighting. Be more liberal in recommending 'video_clip' if it can enhance the artistic vision with distinct motion.",
|
14 |
+
"experimental_narrative": "Feel free to suggest a minor unexpected narrative twist or a symbolic visual motif within one of the scenes. If a scene has significant implied motion, transformation, or a reveal best shown dynamically, recommend 'video_clip'."
|
15 |
+
}
|
16 |
+
guidance_description = guidance_map.get(creative_guidance, guidance_map["standard"])
|
17 |
|
|
|
18 |
return f"""
|
19 |
+
You are an AI Creative Director and Master Storyteller, tasked with developing a cinematic concept.
|
20 |
+
Your response MUST be a valid JSON list of scene objects only.
|
21 |
+
|
22 |
+
Project Details:
|
23 |
+
- Core Idea: "{user_idea}"
|
24 |
+
- Genre: "{genre}"
|
25 |
+
- Overall Mood: "{mood}"
|
26 |
+
- Number of Key Scenes: {num_scenes}
|
27 |
+
- Creative Direction Style: {creative_guidance} ({guidance_description})
|
28 |
+
|
29 |
+
For EACH of the {num_scenes} key scenes, provide exactly the following 16 fields:
|
30 |
+
1. `scene_number` (int): Sequential number (1, 2, ...).
|
31 |
+
2. `scene_title` (str): Short, evocative title (e.g., "The Crimson Dawn", "Whispers in the Wires").
|
32 |
+
3. `emotional_beat` (str): The primary emotion or feeling this scene should evoke (e.g., "Rising Tension", "Brief Respite", "Shocking Revelation").
|
33 |
+
4. `setting_description` (str): Vivid sensory details (sight, sound, atmosphere, time of day). Concisely establish where and when (40-70 words).
|
34 |
+
5. `characters_involved` (list of str): Names of characters central to this scene. List non-speaking entities if key (e.g., "Security Drone").
|
35 |
+
6. `character_focus_moment` (str): A key internal thought, subtle expression, or micro-action for the primary character(s) that reveals their state or advances their arc. If no specific character focus, describe the general atmospheric impact or a key object's significance.
|
36 |
+
7. `key_plot_beat` (str): The most critical plot development or character action in this scene (1-2 concise sentences). This should be suitable for a brief video overlay if needed.
|
37 |
+
8. `suggested_dialogue_hook` (str): One potent line of dialogue capturing the scene's essence or a character's voice. If silent, state "Silent scene" or describe key non-verbal communication.
|
38 |
+
9. `PROACTIVE_visual_style_κ°λ
` (str): Your detailed, proactive suggestion for this scene's visual style. Be specific: art movements, film references (e.g., "Blade Runner 2049"), color palettes, lighting techniques (e.g., "Chiaroscuro lighting with predominantly cool blues and teals, punctuated by a single warm amber light source. Neo-noir aesthetic with a touch of gothic architecture.").
|
39 |
+
10. `PROACTIVE_camera_work_κ°λ
` (str): Your proactive suggestion for impactful camera work. Describe a specific shot, angle, or short sequence (e.g., "Low-angle tracking shot following the character's feet, then tilting up to a dramatic reveal.", "Slow, unsettling push-in on the antagonist's unblinking eye.").
|
40 |
+
11. `PROACTIVE_sound_design_κ°λ
` (str): Key ambient sounds, specific SFX, and a suggestion for the musical mood/instrumentation (e.g., "Ambient: Distant, echoing drips and metallic groans. SFX: A sudden, sharp electronic static burst. Music: Brooding, minimalist synth score with a recurring, dissonant motif.").
|
41 |
+
12. `suggested_asset_type_κ°λ
` (str): Your recommendation for the primary visual asset for this scene: "image" (for a still concept art) or "video_clip" (for a short ~3-7 second AI-generated video, e.g., Runway Gen-4). Default to "image" unless distinct, describable motion is central to the scene's impact.
|
42 |
+
13. `video_clip_motion_description_κ°λ
` (str): If `suggested_asset_type_κ°λ
` is "video_clip", provide a concise (1-3 sentences) description of the primary motion to be generated (e.g., "The subject slowly turns their head to look directly at the camera. A subtle breeze ruffles their hair.", "Steam rises from a street vent as the camera slowly pans upwards."). Otherwise, "N/A". This description is key for text-to-video AI.
|
43 |
+
14. `video_clip_duration_estimate_secs_κ°λ
` (int): If `suggested_asset_type_κ°λ
` is "video_clip", estimate duration in seconds (typically 3-7 seconds, or 5/10 for Gen-4). Otherwise, 0.
|
44 |
+
15. `image_generation_keywords_κ°λ
` (str): A concise list of 5-8 powerful keywords extracted from all above details, suitable for AI image generation (e.g., DALL-E). Focus on nouns, strong adjectives, artistic styles. If a video, include keywords that might inform the *initial frame* or *style* if the video AI uses a seed image. (e.g., "cyberpunk cityscape, neon rain, lone figure silhouette, glowing advertisements, high contrast, cinematic lighting, atmospheric perspective").
|
45 |
+
16. `pexels_search_query_κ°λ
` (str): A concise, effective search query (2-4 words) for Pexels to find a relevant background or atmospheric stock photo/video (e.g., "rainy neon street," "vast desert sunset," "moody forest path").
|
46 |
+
|
47 |
+
Special Instruction for "experimental_narrative" Guidance:
|
48 |
+
If `creative_guidance` is "experimental_narrative", for ONLY ONE of the scenes, you may subtly alter `key_plot_beat` or add a symbolic element to `setting_description` to introduce an unexpected narrative twist or thematic layer. If you do this, add an additional field `director_note` (str) to THAT SCENE'S JSON OBJECT ONLY, briefly explaining your creative choice for the experimental element.
|
49 |
+
|
50 |
+
Ensure your entire response is a single, valid JSON list of objects. Each object must contain all 16 specified fields (plus the optional `director_note` if applicable).
|
51 |
+
Example of one scene object (ensure all 16 fields are present for every scene, values are illustrative):
|
52 |
{{
|
53 |
"scene_number": 1,
|
54 |
+
"scene_title": "First Light on Xylos",
|
55 |
+
"emotional_beat": "Awe and Trepidation",
|
56 |
+
"setting_description": "The twin suns of Xylos cast long, eerie shadows across a crystalline landscape. Jagged, purple rock formations pierce a sky the color of bruised plums. A faint, alien hum resonates through the thin air.",
|
57 |
+
"characters_involved": ["Captain Eva Rostova", "Exploration Drone Unit 7"],
|
58 |
+
"character_focus_moment": "Eva touches her helmet, her reflection showing wide eyes scanning the alien vista. A single, hesitant breath escapes her lips, misting in the cold.",
|
59 |
+
"key_plot_beat": "Eva takes the first human steps onto Xylos, her drone scouting ahead, as an unknown energy signature is detected nearby.",
|
60 |
+
"suggested_dialogue_hook": "Eva (comms, hushed): 'Ground control, Xylos actual. The eagles have landed... and it's breathtakingly strange.'",
|
61 |
+
"PROACTIVE_visual_style_κ°λ
": "Cinematic sci-fi realism. Style of 'Arrival' meets 'Prometheus'. Sharp focus, deep depth of field, lens flares from the twin suns. Palette dominated by cool purples, blues, and metallic greys, with warm highlights from Eva's suit lights.",
|
62 |
+
"PROACTIVE_camera_work_κ°λ
": "Initial sweeping wide shot establishing the alien landscape. Followed by a tight, over-the-shoulder shot as Eva steps from the lander, then a POV from the drone.",
|
63 |
+
"PROACTIVE_sound_design_κ°λ
": "Ambient: Low, resonant alien hum, wind whistling through crystals. SFX: Eva's suit servos, drone whirring. Music: Ethereal, slightly unsettling orchestral score with high strings and deep bass tones.",
|
64 |
"suggested_asset_type_κ°λ
": "image",
|
65 |
"video_clip_motion_description_κ°λ
": "N/A",
|
66 |
"video_clip_duration_estimate_secs_κ°λ
": 0,
|
67 |
+
"image_generation_keywords_κ°λ
": "alien planet Xylos, crystalline landscape, twin suns, female astronaut Eva, exploration drone, sci-fi realism, cinematic lighting, purple sky, awe-inspiring",
|
68 |
+
"pexels_search_query_κ°λ
": "alien landscape purple"
|
69 |
}}
|
70 |
"""
|
71 |
|
72 |
def construct_dalle_prompt(scene_data, character_definitions=None, global_style_additions=""):
|
73 |
"""
|
74 |
+
Constructs a detailed DALL-E prompt for generating a STILL IMAGE.
|
|
|
75 |
"""
|
76 |
+
scene_title = scene_data.get('scene_title', 'A dramatic cinematic moment')
|
77 |
+
base_keywords = scene_data.get('image_generation_keywords_κ°λ
', 'cinematic, highly detailed, photorealistic')
|
78 |
+
setting_desc = scene_data.get('setting_description', 'A visually rich environment.')
|
79 |
+
plot_beat = scene_data.get('key_plot_beat', 'A key action or event is happening.')
|
80 |
+
visual_style_directive = scene_data.get('PROACTIVE_visual_style_κ°λ
', 'Default cinematic visual style.')
|
81 |
+
camera_perspective = scene_data.get('PROACTIVE_camera_work_κ°λ
', 'Dynamic camera angle.')
|
82 |
+
emotional_tone = scene_data.get('emotional_beat', scene_title)
|
83 |
+
|
84 |
+
character_details_list = []
|
85 |
+
characters_in_scene = scene_data.get('characters_involved', [])
|
86 |
+
if characters_in_scene:
|
87 |
+
for char_name_raw in characters_in_scene:
|
88 |
+
char_name_norm = char_name_raw.strip()
|
89 |
+
char_key = char_name_norm.lower()
|
90 |
+
if character_definitions and char_key in character_definitions:
|
91 |
+
char_desc_text = character_definitions[char_key]
|
92 |
+
character_details_list.append(f"{char_name_norm} (described as: {char_desc_text})")
|
|
|
93 |
else:
|
94 |
+
character_details_list.append(char_name_norm)
|
95 |
+
|
96 |
+
character_narrative_str = ""
|
97 |
+
if character_details_list:
|
98 |
+
if len(character_details_list) == 1: character_narrative_str = f"The scene prominently features {character_details_list[0]}."
|
99 |
+
else: character_narrative_str = f"The scene features {', '.join(character_details_list[:-1])} and {character_details_list[-1]}."
|
|
|
|
|
100 |
|
101 |
+
combined_style_directive = visual_style_directive
|
102 |
if global_style_additions:
|
103 |
+
combined_style_directive = f"{visual_style_directive}. Additional overarching style notes: {global_style_additions}."
|
104 |
+
|
105 |
+
# DALL-E 3 benefits from more descriptive, conversational prompts.
|
106 |
+
prompt_elements = [
|
107 |
+
f"Create an ultra-detailed, photorealistic, and intensely cinematic digital painting or concept art still image.",
|
108 |
+
f"Scene Title: '{scene_title}'.",
|
109 |
+
f"Core Elements & Keywords: {base_keywords}.",
|
110 |
+
character_narrative_str,
|
111 |
+
f"Setting Description: {setting_desc}.",
|
112 |
+
f"Key Moment / Plot Beat: {plot_beat}.",
|
113 |
+
f"Artistic & Visual Style: {combined_style_directive}.",
|
114 |
+
f"Cinematography (for a still image): {camera_perspective}. Consider composition, lighting, and perspective described.",
|
115 |
+
f"Emotional Tone to Convey: {emotional_tone}.",
|
116 |
+
f"Technical Execution: Achieve extreme detail, sophisticated and purposeful lighting that sculpts forms and defines mood (e.g., dramatic rim lighting, volumetric light, soft diffused light, harsh contrasts), rich and believable textures, and palpable atmospheric effects (e.g., mist, dust, lens flares, rain, heat haze).",
|
117 |
+
f"The final image should be of exceptional quality, suitable for a major film production's visual development phase or as a keyframe concept art.",
|
118 |
+
f"Ensure any specified characters are distinct and adhere to their provided descriptions. This is a still image, focus on a powerful, frozen moment."
|
119 |
+
]
|
120 |
+
|
121 |
+
final_prompt_str = " ".join(filter(None, prompt_elements)) # Join non-empty parts
|
122 |
+
return " ".join(final_prompt_str.split()) # Normalize whitespace
|
123 |
|
124 |
|
125 |
+
# <<< THIS IS THE FUNCTION YOUR app.py IS TRYING TO IMPORT >>>
|
126 |
+
def construct_text_to_video_prompt_for_gen4(scene_data, global_style_additions=""):
|
127 |
"""
|
128 |
+
Constructs a text prompt specifically for Runway Gen-4 (image-to-video),
|
129 |
+
focusing on MOTION as per their guidelines. The input image for Gen-4
|
130 |
+
will provide the scene, characters, and base visual style.
|
131 |
"""
|
132 |
+
motion_description = scene_data.get('video_clip_motion_description_κ°λ
', 'Subtle ambient motion and atmospheric effects.')
|
133 |
+
# PROACTIVE_camera_work_κ°λ
might describe a static shot or actual camera motion.
|
134 |
+
# We only want to include it if it implies *camera movement*.
|
135 |
+
camera_work_suggestion = scene_data.get('PROACTIVE_camera_work_κ°λ
', '')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
+
# Keywords for camera motion to look for
|
138 |
+
camera_motion_keywords = ["pan", "tilt", "dolly", "zoom", "track", "handheld", "crane", "aerial", "rotate", "orbit", "follow", "move", "travel"]
|
139 |
+
|
140 |
+
camera_motion_instruction = ""
|
141 |
+
if camera_work_suggestion:
|
142 |
+
# Check if any motion keyword is present in the camera work suggestion
|
143 |
+
if any(keyword in camera_work_suggestion.lower() for keyword in camera_motion_keywords):
|
144 |
+
camera_motion_instruction = f"Camera: {camera_work_suggestion}."
|
145 |
+
else:
|
146 |
+
# If no explicit motion, but camera work is described, it might imply a static shot style
|
147 |
+
# For Gen-4, we might omit static camera descriptions unless they add to "style".
|
148 |
+
# logger.debug(f"Camera work '{camera_work_suggestion}' seems static, omitting from motion prompt.")
|
149 |
+
pass
|
150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
|
152 |
+
# Refer to subjects generally as per Runway Gen-4 guide ("the subject", "they", "it")
|
153 |
+
# The `motion_description` from Gemini should ideally already do this.
|
154 |
+
# We avoid re-describing characters here.
|
155 |
+
|
156 |
+
prompt_parts = [motion_description]
|
157 |
+
if camera_motion_instruction:
|
158 |
+
prompt_parts.append(camera_motion_instruction)
|
159 |
|
160 |
+
# Add relevant global style additions if they imply motion style (e.g., "smooth animation", "live-action speed")
|
161 |
+
# or general aesthetic that complements the motion.
|
162 |
+
# Be careful not to add static visual descriptions here.
|
163 |
+
if global_style_additions:
|
164 |
+
# Filter global styles for motion-relevant terms if possible, or use sparingly
|
165 |
+
# For now, we'll include it as is, assuming it's curated for motion.
|
166 |
+
prompt_parts.append(f"Style: {global_style_additions}.")
|
167 |
+
|
168 |
+
final_prompt = " ".join(prompt_parts).strip()
|
169 |
+
|
170 |
+
# Runway Gen-4 promptText is <= 1000 characters.
|
171 |
+
# Ensure conciseness and focus.
|
172 |
+
# "Don't underestimate the power of simplicity in your text prompt" - Runway Guide
|
173 |
+
# "Use positive phrasing and avoid negative prompts" - Runway Guide
|
174 |
+
|
175 |
+
normalized_prompt = " ".join(final_prompt.split()) # Normalize whitespace
|
176 |
+
return normalized_prompt[:1000] # Truncate if necessary
|
177 |
|
178 |
|
179 |
def create_narration_script_prompt_enhanced(story_scenes_data, overall_mood, overall_genre, voice_style="cinematic_trailer"):
|
180 |
+
"""
|
181 |
+
Generates a prompt for Gemini to write a voiceover narration script.
|
182 |
+
"""
|
183 |
+
scenes_summary_list = []
|
184 |
+
for i, scene_item in enumerate(story_scenes_data):
|
185 |
+
scenes_summary_list.append(
|
186 |
+
f"Scene {scene_item.get('scene_number', i+1)} (Title: '{scene_item.get('scene_title','Untitled Scene')}', Beat: '{scene_item.get('emotional_beat','N/A')}', Asset Type: {scene_item.get('suggested_asset_type_κ°λ
','image')}):\n"
|
187 |
+
f" Setting: {scene_item.get('setting_description','Not specified.')}\n"
|
188 |
+
f" Plot Beat: {scene_item.get('key_plot_beat','Not specified.')}\n"
|
189 |
+
f" Character Focus: {scene_item.get('character_focus_moment','General atmosphere or unspecified.')}\n"
|
190 |
+
f" Dialogue Hook: {scene_item.get('suggested_dialogue_hook','(No dialogue specified)')}\n"
|
191 |
+
f" Director's Sound Hint: {scene_item.get('PROACTIVE_sound_design_κ°λ
','Not specified.')}"
|
192 |
)
|
193 |
+
full_summary_of_scenes = "\n\n".join(scenes_summary_list)
|
194 |
|
195 |
+
voice_style_details_map = {
|
196 |
+
"cinematic_trailer": "deep, resonant, authoritative, perhaps slightly epic, building anticipation.",
|
197 |
+
"documentary_neutral": "clear, informative, objective, calm, and well-paced.",
|
198 |
+
"introspective_character": f"reflective, personal, possibly first-person (choose a key character from the scenes if appropriate, or use a more omniscient but internal tone), echoing internal thoughts and observations."
|
199 |
+
}
|
200 |
+
voice_style_description_text = voice_style_details_map.get(voice_style, voice_style_details_map["cinematic_trailer"])
|
201 |
|
202 |
prompt = f"""
|
203 |
+
You are an acclaimed voiceover scriptwriter crafting narration for a cinematic animatic.
|
204 |
+
The animatic is composed of AI-generated still images and short video clips, based on the following scene treatments:
|
205 |
|
206 |
--- SCENE TREATMENTS ---
|
207 |
+
{full_summary_of_scenes}
|
208 |
--- END SCENE TREATMENTS ---
|
209 |
|
210 |
+
Overall Story Details:
|
211 |
+
- Genre: {overall_genre}
|
212 |
+
- Mood: {overall_mood}
|
213 |
+
- Desired Voiceover Style: {voice_style} (Characteristics: {voice_style_description_text})
|
214 |
|
215 |
+
Your Task: Write a compelling and concise narration script.
|
216 |
+
- The script must weave a cohesive narrative thread that connects all provided scenes.
|
217 |
+
- It should enhance the emotional impact and atmosphere, drawing inspiration from each scene's 'emotional_beat', 'character_focus_moment', and 'PROACTIVE_sound_design_κ°λ
' hints.
|
218 |
+
- Aim for 1-3 impactful sentences per scene. For {len(story_scenes_data)} scenes, the total script length should be approximately {len(story_scenes_data) * 15} to {len(story_scenes_data) * 30} words. Brevity is key.
|
219 |
+
- The narration should transcend simple description of on-screen action. Instead, offer insight, build tension or emotion, or evoke thematic depth.
|
220 |
+
- If the voice style is 'introspective_character', adopt the perspective of a key character if one is apparent and consistently featured, or maintain a more omniscient internal monologue tone that reflects the story's core emotional journey.
|
221 |
+
- The final output MUST be ONLY the narration script text itself, ready for text-to-speech.
|
222 |
+
- Do NOT include scene numbers, scene titles, character names in parentheses, or any directives like "(Voiceover)", "Narrator:", "SFX:", etc.
|
223 |
|
224 |
+
Example of desired output format (for a different story):
|
225 |
+
"In the shadowed alleys, hope was a contraband currency. Echoes of a forgotten promise led Kaelan deeper into the neon labyrinth, each step a gamble against time. The city's digital heartbeat pulsed with secrets, some best left undisturbed..."
|
226 |
|
227 |
+
Craft your narration now.
|
228 |
"""
|
229 |
+
return " ".join(prompt.split()) # Normalize whitespace
|
230 |
|
231 |
|
232 |
+
def create_scene_regeneration_prompt(original_scene_data_dict, user_provided_feedback, full_story_context_list=None):
|
233 |
+
"""
|
234 |
+
Generates a prompt for Gemini to regenerate a single scene's JSON object based on feedback.
|
235 |
+
"""
|
236 |
+
original_scene_json_str = json.dumps(original_scene_data_dict, indent=2)
|
237 |
+
context_str = f"Original scene data (Scene Number {original_scene_data_dict.get('scene_number')} - Title: '{original_scene_data_dict.get('scene_title')}' ):\n{original_scene_json_str}\n\n"
|
238 |
+
|
239 |
+
if full_story_context_list:
|
240 |
+
context_str += f"Full story context (abbreviated for reference):\n"
|
241 |
+
for i_ctx, scene_ctx_item in enumerate(full_story_context_list):
|
242 |
+
context_str += f" Scene {scene_ctx_item.get('scene_number', i_ctx+1)} Title: '{scene_ctx_item.get('scene_title', 'Untitled')}', Plot Beat: '{scene_ctx_item.get('key_plot_beat', '')[:60]}...'\n"
|
243 |
context_str += "\n"
|
244 |
|
|
|
245 |
return f"""
|
246 |
+
You are an AI Script Supervisor and Creative Consultant, refining a cinematic scene treatment.
|
247 |
+
Your response MUST be a single, valid JSON object for the revised scene.
|
|
|
248 |
|
249 |
+
{context_str}
|
250 |
+
User Feedback for improving this specific scene: "{user_provided_feedback}"
|
251 |
+
|
252 |
+
Task: Regenerate ONLY the JSON object for this single scene, incorporating the user's feedback.
|
253 |
+
- Adhere strictly to the original 16 field structure: (scene_number, scene_title, emotional_beat, setting_description, characters_involved, character_focus_moment, key_plot_beat, suggested_dialogue_hook, PROACTIVE_visual_style_κ°λ
, PROACTIVE_camera_work_κ°λ
, PROACTIVE_sound_design_κ°λ
, suggested_asset_type_κ°λ
, video_clip_motion_description_κ°λ
, video_clip_duration_estimate_secs_κ°λ
, image_generation_keywords_κ°λ
, pexels_search_query_κ°λ
).
|
254 |
+
- The 'scene_number' field MUST NOT change.
|
255 |
+
- Update fields like 'setting_description', 'key_plot_beat', 'PROACTIVE_visual_style_κ°λ
', 'video_clip_motion_description_κ°λ
', etc., as implied by the feedback.
|
256 |
+
- 'image_generation_keywords_κ°λ
' and 'pexels_search_query_κ°λ
' MUST be updated if visual elements change.
|
257 |
+
- If feedback implies a change in asset type (image vs. video_clip), update 'suggested_asset_type_κ°λ
'.
|
258 |
+
- If new type is 'image', 'video_clip_motion_description_κ°λ
' should be "N/A" and 'video_clip_duration_estimate_secs_κ°λ
' should be 0.
|
259 |
+
- If new type is 'video_clip', ensure 'video_clip_motion_description_κ°λ
' is descriptive and 'video_clip_duration_estimate_secs_κ°λ
' is appropriate (e.g., 3-7 seconds).
|
260 |
+
- If the feedback suggests experimental narrative changes and the original scene had a `director_note`, you may update or remove that note. If introducing a new experimental element based on feedback, add or update the `director_note` field.
|
261 |
+
|
262 |
+
Output ONLY the single, complete, and valid JSON object for the revised scene.
|
263 |
"""
|
264 |
|
265 |
+
def create_visual_regeneration_prompt(original_dalle_image_prompt, user_provided_feedback, scene_data_dict, character_definitions_map=None, global_style_additions_str=""):
|
266 |
"""
|
267 |
+
Generates a prompt for Gemini to refine an existing DALL-E prompt for a STILL IMAGE.
|
268 |
"""
|
269 |
+
characters_in_scene_list = scene_data_dict.get('characters_involved', [])
|
270 |
+
character_details_for_prompt = []
|
271 |
+
if characters_in_scene_list:
|
272 |
+
for char_name_item in characters_in_scene_list:
|
273 |
+
clean_char_name = char_name_item.strip(); lookup_key_char = clean_char_name.lower()
|
274 |
+
if character_definitions_map and lookup_key_char in character_definitions_map:
|
275 |
+
character_details_for_prompt.append(f"{clean_char_name} (described as: {character_definitions_map[lookup_key_char]})")
|
276 |
+
else: character_details_for_prompt.append(clean_char_name)
|
277 |
+
characters_narrative_for_prompt = f" Characters to potentially feature or consider: {', '.join(character_details_for_prompt) if character_details_for_prompt else 'None specifically detailed in character definitions.'}."
|
278 |
+
|
279 |
+
full_prompt_for_gemini_refinement = f"""
|
280 |
+
You are an AI Art Director specializing in iteratively refining DALL-E 3 prompts for cinematic STILL IMAGES.
|
281 |
+
Your goal is to produce a new DALL-E 3 prompt string that incorporates user feedback to achieve a better visual.
|
282 |
+
Your response MUST be ONLY the new DALL-E 3 prompt string.
|
283 |
+
|
284 |
+
Context for the Scene (do not repeat this verbatim in the DALL-E prompt unless relevant to the feedback):
|
285 |
+
- Scene Title: "{scene_data_dict.get('scene_title', 'N/A')}"
|
286 |
+
- Setting: "{scene_data_dict.get('setting_description', 'N/A')}"
|
287 |
+
- Key Plot Beat: "{scene_data_dict.get('key_plot_beat', 'N/A')}"
|
288 |
+
- {characters_narrative_for_prompt}
|
289 |
+
- Director's Suggested Visual Style for Scene: "{scene_data_dict.get('PROACTIVE_visual_style_κ°λ
', 'N/A')}"
|
290 |
+
- Director's Suggested Camera for Scene: "{scene_data_dict.get('PROACTIVE_camera_work_κ°λ
', 'N/A')}"
|
291 |
+
- Current Global Style Additions: "{global_style_additions_str if global_style_additions_str else 'None'}"
|
292 |
+
|
293 |
+
The PREVIOUS DALL-E 3 prompt (which generated the image the user wants to change) was:
|
294 |
+
"{original_dalle_image_prompt}"
|
295 |
+
|
296 |
+
User Feedback on the visual generated by the PREVIOUS prompt:
|
297 |
+
"{user_provided_feedback}"
|
298 |
+
|
299 |
+
Your Task: Generate a NEW, revised DALL-E 3 prompt string for a STILL IMAGE.
|
300 |
+
- This new prompt MUST directly address and incorporate the user's feedback to achieve the desired visual changes.
|
301 |
+
- It should maintain or enhance the ultra-detailed, photorealistic, and intensely cinematic qualities.
|
302 |
+
- The prompt must guide DALL-E 3 to create a stunning still image suitable for high-end film concept art.
|
303 |
+
- Maintain core, unchanged scene elements (setting, characters, plot beat) unless the feedback explicitly requests changes to them.
|
304 |
+
- Translate abstract feedback into concrete visual descriptions: specify lighting, color, composition, character appearance/pose/expression, atmosphere, textures, etc., as needed to fulfill the feedback.
|
305 |
+
- If characters are mentioned in feedback, ensure their descriptions (if available in context) are reinforced or modified in the new prompt.
|
306 |
+
- The output prompt should be a single, coherent block of text, optimized for DALL-E 3.
|
307 |
+
|
308 |
+
Output ONLY the new, revised DALL-E 3 prompt string. Do not include any other explanatory text, preambles, or apologies.
|
309 |
"""
|
310 |
+
return " ".join(full_prompt_for_gemini_refinement.split()) # Normalize whitespace
|