Update core/prompt_engineering.py
Browse files- core/prompt_engineering.py +141 -93
core/prompt_engineering.py
CHANGED
@@ -1,38 +1,37 @@
|
|
1 |
# core/prompt_engineering.py
|
2 |
import json
|
3 |
|
4 |
-
# create_story_breakdown_prompt can remain mostly the same, but ensure 'key_action' is descriptive.
|
5 |
-
# If key_action is very short, text overlays on video might not be useful.
|
6 |
-
# Consider making 'key_action' slightly more verbose or adding a 'video_overlay_text' field.
|
7 |
-
# For now, we'll use key_action.
|
8 |
-
|
9 |
def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful", num_scenes=3):
|
|
|
|
|
|
|
|
|
10 |
return f"""
|
11 |
You are an expert screenwriter and visual storyteller.
|
12 |
Based on the user's idea: "{user_idea}"
|
13 |
And considering the genre: "{genre}" and mood: "{mood}"
|
14 |
|
15 |
Generate a {num_scenes}-scene story breakdown. For each scene, provide:
|
16 |
-
1. scene_number (int)
|
17 |
-
2.
|
18 |
-
3.
|
19 |
-
4.
|
20 |
-
5.
|
21 |
-
6.
|
22 |
-
7.
|
23 |
-
8.
|
24 |
|
25 |
Output ONLY the JSON object for the list of scenes.
|
26 |
Example for one scene:
|
27 |
{{
|
28 |
"scene_number": 1,
|
29 |
-
"
|
30 |
-
"
|
31 |
-
"
|
32 |
-
"
|
33 |
-
"
|
34 |
-
"
|
35 |
-
"
|
36 |
}}
|
37 |
|
38 |
Provide the full JSON structure for {num_scenes} scenes in a list:
|
@@ -43,91 +42,140 @@ def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful",
|
|
43 |
]
|
44 |
"""
|
45 |
|
46 |
-
def create_image_prompt_from_scene_data(scene_data, character_definitions=None,
|
47 |
"""
|
48 |
-
Generates
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
52 |
"""
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
emotional_beat = scene_data.get('emotional_beat', '')
|
58 |
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
66 |
else:
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
if len(characters_str_parts) == 1:
|
74 |
-
characters_involved_str = f" The scene features {characters_str_parts[0]}."
|
75 |
else:
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
-
|
81 |
-
if style_reference_desc:
|
82 |
-
style_mod = f" Artistic style inspired by: {style_reference_desc}."
|
83 |
-
|
84 |
-
# Constructing a more robust prompt for DALL-E 3
|
85 |
-
# DALL-E 3 benefits from descriptive, story-like prompts.
|
86 |
-
full_prompt = f"""
|
87 |
-
Generate a highly detailed, photorealistic and cinematic image.
|
88 |
-
Image Description: {base_desc}
|
89 |
-
Visual Style: {visual_style}. {style_mod}
|
90 |
-
Camera Perspective: {camera_angle}.
|
91 |
-
Emotional Tone: {emotional_beat}.
|
92 |
-
Key elements to emphasize: Cinematic composition, dramatic lighting, depth of field, rich textures, and atmospheric effects.
|
93 |
-
Output a visually stunning image suitable for a film storyboard.
|
94 |
-
"""
|
95 |
-
# Removed "Output only the prompt string" as this function *is* the prompt string builder.
|
96 |
-
# DALL-E 3 does not need "Suitable for DALL-E 3 or Midjourney".
|
97 |
-
return " ".join(full_prompt.split()) # Cleans up extra whitespace
|
98 |
|
99 |
def create_scene_regeneration_prompt(original_scene_data, user_feedback, full_story_context=None):
|
100 |
-
|
101 |
-
|
|
|
|
|
102 |
if full_story_context:
|
103 |
-
context_str += f"Full story context
|
|
|
104 |
return f"""
|
105 |
-
You are an expert script doctor.
|
106 |
{context_str}
|
107 |
-
The user wants to modify this scene based on the following feedback: "{user_feedback}"
|
|
|
108 |
Please regenerate ONLY the JSON object for this single scene, incorporating the feedback.
|
109 |
-
Maintain the same JSON structure as the original
|
110 |
-
|
|
|
|
|
|
|
|
|
111 |
"""
|
112 |
|
113 |
-
def create_visual_regeneration_prompt(
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# core/prompt_engineering.py
|
2 |
import json
|
3 |
|
|
|
|
|
|
|
|
|
|
|
4 |
def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful", num_scenes=3):
|
5 |
+
"""
|
6 |
+
Generates a prompt for Gemini to break down a story idea into scenes.
|
7 |
+
Emphasizes concise, descriptive key_actions suitable for video overlays.
|
8 |
+
"""
|
9 |
return f"""
|
10 |
You are an expert screenwriter and visual storyteller.
|
11 |
Based on the user's idea: "{user_idea}"
|
12 |
And considering the genre: "{genre}" and mood: "{mood}"
|
13 |
|
14 |
Generate a {num_scenes}-scene story breakdown. For each scene, provide:
|
15 |
+
1. scene_number (int): Sequential number of the scene.
|
16 |
+
2. emotional_beat (str): A short title or phrase capturing the core emotion/theme of this scene (e.g., "Desperate Escape," "Betrayal Revealed," "A Glimmer of Hope").
|
17 |
+
3. setting_description (str): Vivid description of the location, atmosphere, and key visual elements (approx 30-50 words).
|
18 |
+
4. characters_involved (list of str): Names of characters present and active in the scene.
|
19 |
+
5. key_action (str): The single most important event or character action happening in this specific visual moment, described concisely for a potential video overlay (max 15-20 words).
|
20 |
+
6. dialogue_snippet (str): A brief, impactful line of dialogue spoken in this scene, if any.
|
21 |
+
7. visual_style_suggestion (str): Keywords for the overall visual style of this scene (e.g., "Dark and gritty, high contrast, Blade Runner-esque neon reflections"). This can be influenced by the overall story mood.
|
22 |
+
8. camera_angle_suggestion (str): A specific camera shot type or angle suggestion (e.g., "Low-angle shot emphasizing power," "Dutch angle for unease," "Extreme close-up on eyes").
|
23 |
|
24 |
Output ONLY the JSON object for the list of scenes.
|
25 |
Example for one scene:
|
26 |
{{
|
27 |
"scene_number": 1,
|
28 |
+
"emotional_beat": "Tense Standoff",
|
29 |
+
"setting_description": "A rain-slicked, neon-drenched alleyway in Neo-Kyoto. Broken holographic advertisements flicker erratically, casting distorted shadows. The air hangs heavy with the smell of ozone and despair.",
|
30 |
+
"characters_involved": ["Detective Kaito Tanaka", "Informant (shadowy figure)"],
|
31 |
+
"key_action": "Kaito cautiously approaches a nervous informant huddled in the shadows.",
|
32 |
+
"dialogue_snippet": "Informant: 'They know you're looking... You're not safe.'",
|
33 |
+
"visual_style_suggestion": "Neo-noir, cyberpunk, high contrast, deep shadows, vibrant neon reflections in puddles, film grain.",
|
34 |
+
"camera_angle_suggestion": "Medium shot from behind Kaito, focusing on the informant, creating suspense."
|
35 |
}}
|
36 |
|
37 |
Provide the full JSON structure for {num_scenes} scenes in a list:
|
|
|
42 |
]
|
43 |
"""
|
44 |
|
45 |
+
def create_image_prompt_from_scene_data(scene_data, character_definitions=None, global_style_reference=""):
|
46 |
"""
|
47 |
+
Generates a detailed image prompt for DALL-E 3 based on structured scene data,
|
48 |
+
injecting character descriptions and global style preferences.
|
49 |
+
|
50 |
+
scene_data (dict): Contains details for a single scene.
|
51 |
+
character_definitions (dict): {'character_name_lower': 'description', ...}
|
52 |
+
global_style_reference (str): User-defined global style keywords.
|
53 |
"""
|
54 |
+
scene_num = scene_data.get('scene_number', 'N/A')
|
55 |
+
emotional_beat_title = scene_data.get('emotional_beat', 'A cinematic scene') # Used as part of the core request
|
56 |
+
setting_desc = scene_data.get('setting_description', 'A visually interesting setting.')
|
57 |
+
key_action_desc = scene_data.get('key_action', 'A significant moment unfolds.')
|
|
|
58 |
|
59 |
+
# --- Character Injection ---
|
60 |
+
characters_involved_in_scene = scene_data.get('characters_involved', [])
|
61 |
+
character_prompt_segments = []
|
62 |
+
if characters_involved_in_scene:
|
63 |
+
for char_name_from_scene in characters_involved_in_scene:
|
64 |
+
char_name_clean = char_name_from_scene.strip()
|
65 |
+
char_lookup_key = char_name_clean.lower()
|
66 |
+
if character_definitions and char_lookup_key in character_definitions:
|
67 |
+
char_visual_desc = character_definitions[char_lookup_key]
|
68 |
+
character_prompt_segments.append(f"{char_name_clean} (described as: {char_visual_desc})")
|
69 |
else:
|
70 |
+
character_prompt_segments.append(char_name_clean) # Character is present but no specific visual definition provided
|
71 |
+
|
72 |
+
characters_narrative = ""
|
73 |
+
if character_prompt_segments:
|
74 |
+
if len(character_prompt_segments) == 1:
|
75 |
+
characters_narrative = f"The primary focus is on {character_prompt_segments[0]}."
|
|
|
|
|
76 |
else:
|
77 |
+
characters_narrative = f"The scene prominently features {', '.join(character_prompt_segments[:-1])} and {character_prompt_segments[-1]}."
|
78 |
+
# --- End Character Injection ---
|
79 |
+
|
80 |
+
# --- Style Aggregation ---
|
81 |
+
scene_specific_style = scene_data.get('visual_style_suggestion', 'cinematic, photorealistic')
|
82 |
+
final_style_directive = scene_specific_style
|
83 |
+
if global_style_reference: # User's global style preference
|
84 |
+
final_style_directive += f", {global_style_reference}"
|
85 |
+
# --- End Style Aggregation ---
|
86 |
+
|
87 |
+
camera_instr = scene_data.get('camera_angle_suggestion', 'eye-level medium shot')
|
88 |
|
89 |
+
# Constructing the DALL-E 3 prompt with more narrative flow
|
90 |
+
# DALL-E 3 often works best if you tell it what kind of image you want first, then the details.
|
91 |
+
prompt = (
|
92 |
+
f"Create an ultra-detailed, photorealistic, and highly cinematic digital painting or concept art image. "
|
93 |
+
f"The image should depict: '{emotional_beat_title}'. "
|
94 |
+
f"Setting: {setting_desc}. "
|
95 |
+
f"{characters_narrative} " # This will be empty if no characters, or list them with descriptions
|
96 |
+
f"They are engaged in the following key action: {key_action_desc}. "
|
97 |
+
f"Visual Style and Atmosphere: {final_style_directive}. "
|
98 |
+
f"Camera Composition: {camera_instr}. "
|
99 |
+
f"Emphasize: Dramatic lighting (consider {scene_data.get('mood','cinematic')} mood), rich textures, depth of field, and strong atmospheric effects like mist, rain, or dust if appropriate to the setting. "
|
100 |
+
f"The overall image must feel like a high-quality still from a major motion picture or a AAA video game. "
|
101 |
+
f"Pay close attention to character details if provided, ensuring they are distinct and match their descriptions."
|
102 |
+
)
|
103 |
|
104 |
+
return " ".join(prompt.split()) # Normalize whitespace
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
def create_scene_regeneration_prompt(original_scene_data, user_feedback, full_story_context=None):
|
107 |
+
"""
|
108 |
+
Prompt for Gemini to regenerate scene script details based on user feedback.
|
109 |
+
"""
|
110 |
+
context_str = f"Original scene (Scene Number {original_scene_data.get('scene_number')}):\n{json.dumps(original_scene_data, indent=2)}\n\n"
|
111 |
if full_story_context:
|
112 |
+
context_str += f"Full story context for reference:\n{json.dumps(full_story_context, indent=2)}\n\n"
|
113 |
+
|
114 |
return f"""
|
115 |
+
You are an expert script doctor and editor.
|
116 |
{context_str}
|
117 |
+
The user wants to modify this specific scene based on the following feedback: "{user_feedback}"
|
118 |
+
|
119 |
Please regenerate ONLY the JSON object for this single scene, incorporating the feedback.
|
120 |
+
Maintain the exact same JSON structure as the original: (scene_number, emotional_beat, setting_description, characters_involved, key_action, dialogue_snippet, visual_style_suggestion, camera_angle_suggestion).
|
121 |
+
The 'scene_number' must remain unchanged.
|
122 |
+
The 'key_action' should be a concise descriptive sentence (max 15-20 words) suitable for a brief video overlay.
|
123 |
+
If feedback pertains to characters, setting, action, dialogue, style, or camera, update those fields accordingly.
|
124 |
+
Ensure the regenerated scene remains coherent with the overall story context if provided.
|
125 |
+
Focus on making the changes impactful and clear.
|
126 |
"""
|
127 |
|
128 |
+
def create_visual_regeneration_prompt(original_image_prompt_text, user_feedback_on_visuals, scene_data, character_definitions=None, global_style_reference=""):
|
129 |
+
"""
|
130 |
+
Prompt for Gemini to rewrite an existing DALL-E image prompt based on user feedback and scene context.
|
131 |
+
"""
|
132 |
+
# Reconstruct parts of what the original prompt *might* have focused on, for context
|
133 |
+
scene_context_summary = (
|
134 |
+
f"Scene Number: {scene_data.get('scene_number', 'N/A')}. "
|
135 |
+
f"Emotional Beat: {scene_data.get('emotional_beat', '')}. "
|
136 |
+
f"Setting: {scene_data.get('setting_description', '')}. "
|
137 |
+
f"Action: {scene_data.get('key_action', '')}. "
|
138 |
+
f"Characters: {', '.join(scene_data.get('characters_involved',[]))}. "
|
139 |
+
f"Current Style Hint: {scene_data.get('visual_style_suggestion', '')}. "
|
140 |
+
f"Current Camera Hint: {scene_data.get('camera_angle_suggestion', '')}."
|
141 |
+
)
|
142 |
+
|
143 |
+
character_details_for_prompt = []
|
144 |
+
if scene_data.get('characters_involved'):
|
145 |
+
for char_name_in_scene in scene_data.get('characters_involved', []):
|
146 |
+
char_name_clean = char_name_in_scene.strip()
|
147 |
+
char_lookup_key = char_name_clean.lower()
|
148 |
+
if character_definitions and char_lookup_key in character_definitions:
|
149 |
+
char_visual_desc = character_definitions[char_lookup_key]
|
150 |
+
character_details_for_prompt.append(f"{char_name_clean} (described as: {char_visual_desc})")
|
151 |
+
else:
|
152 |
+
character_details_for_prompt.append(char_name_clean)
|
153 |
+
characters_narrative = f"Relevant characters and their descriptions: {', '.join(character_details_for_prompt) if character_details_for_prompt else 'None specified'}."
|
154 |
+
|
155 |
+
|
156 |
+
full_prompt_for_gemini = f"""
|
157 |
+
You are an AI assistant specializing in refining image generation prompts for DALL-E 3.
|
158 |
+
The user wants to modify a visual concept for a cinematic scene.
|
159 |
+
|
160 |
+
Original Scene Context:
|
161 |
+
{scene_context_summary}
|
162 |
+
{characters_narrative}
|
163 |
+
Global Style Reference (if any): "{global_style_reference}"
|
164 |
+
|
165 |
+
The DALL-E 3 prompt that was used to generate the previous image was:
|
166 |
+
"{original_image_prompt_text}"
|
167 |
+
|
168 |
+
The user provided the following feedback on the visual generated by that prompt:
|
169 |
+
"{user_feedback_on_visuals}"
|
170 |
+
|
171 |
+
Your task is to generate a new, revised DALL-E 3 prompt that incorporates the user's feedback to achieve the desired visual changes.
|
172 |
+
The new prompt should be ultra-detailed, photorealistic, and highly cinematic.
|
173 |
+
It should instruct DALL-E 3 to create an image that feels like a high-quality still from a major motion picture or AAA video game.
|
174 |
+
Maintain the core elements of the scene (setting, characters, key action) unless the feedback explicitly asks to change them.
|
175 |
+
Focus on translating the user's feedback into concrete visual descriptions related to composition, lighting, color, character appearance/pose, atmosphere, etc.
|
176 |
+
Ensure character descriptions from the context are respected and reinforced if characters are mentioned.
|
177 |
+
The prompt should be a single block of text.
|
178 |
+
|
179 |
+
Output ONLY the new, revised DALL-E 3 prompt string.
|
180 |
+
"""
|
181 |
+
return " ".join(full_prompt_for_gemini.split())
|