mgbam commited on
Commit
1cb1db0
·
verified ·
1 Parent(s): 9840152

Update core/prompt_engineering.py

Browse files
Files changed (1) hide show
  1. core/prompt_engineering.py +86 -153
core/prompt_engineering.py CHANGED
@@ -1,181 +1,114 @@
1
  # core/prompt_engineering.py
2
  import json
3
 
 
4
  def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful", num_scenes=3):
5
- """
6
- Generates a prompt for Gemini to break down a story idea into scenes.
7
- Emphasizes concise, descriptive key_actions suitable for video overlays.
8
- """
9
  return f"""
10
- You are an expert screenwriter and visual storyteller.
11
- Based on the user's idea: "{user_idea}"
12
- And considering the genre: "{genre}" and mood: "{mood}"
13
-
14
- Generate a {num_scenes}-scene story breakdown. For each scene, provide:
15
- 1. scene_number (int): Sequential number of the scene.
16
- 2. emotional_beat (str): A short title or phrase capturing the core emotion/theme of this scene (e.g., "Desperate Escape," "Betrayal Revealed," "A Glimmer of Hope").
17
- 3. setting_description (str): Vivid description of the location, atmosphere, and key visual elements (approx 30-50 words).
18
- 4. characters_involved (list of str): Names of characters present and active in the scene.
19
- 5. key_action (str): The single most important event or character action happening in this specific visual moment, described concisely for a potential video overlay (max 15-20 words).
20
- 6. dialogue_snippet (str): A brief, impactful line of dialogue spoken in this scene, if any.
21
- 7. visual_style_suggestion (str): Keywords for the overall visual style of this scene (e.g., "Dark and gritty, high contrast, Blade Runner-esque neon reflections"). This can be influenced by the overall story mood.
22
- 8. camera_angle_suggestion (str): A specific camera shot type or angle suggestion (e.g., "Low-angle shot emphasizing power," "Dutch angle for unease," "Extreme close-up on eyes").
23
-
24
- Output ONLY the JSON object for the list of scenes.
25
- Example for one scene:
26
- {{
27
- "scene_number": 1,
28
- "emotional_beat": "Tense Standoff",
29
- "setting_description": "A rain-slicked, neon-drenched alleyway in Neo-Kyoto. Broken holographic advertisements flicker erratically, casting distorted shadows. The air hangs heavy with the smell of ozone and despair.",
30
- "characters_involved": ["Detective Kaito Tanaka", "Informant (shadowy figure)"],
31
- "key_action": "Kaito cautiously approaches a nervous informant huddled in the shadows.",
32
- "dialogue_snippet": "Informant: 'They know you're looking... You're not safe.'",
33
- "visual_style_suggestion": "Neo-noir, cyberpunk, high contrast, deep shadows, vibrant neon reflections in puddles, film grain.",
34
- "camera_angle_suggestion": "Medium shot from behind Kaito, focusing on the informant, creating suspense."
35
- }}
36
-
37
- Provide the full JSON structure for {num_scenes} scenes in a list:
38
- [
39
- {{scene1_details...}},
40
- {{scene2_details...}},
41
- ...
42
- ]
43
  """
44
 
45
- def create_image_prompt_from_scene_data(scene_data, character_definitions=None, global_style_reference=""):
46
- """
47
- Generates a detailed image prompt for DALL-E 3 based on structured scene data,
48
- injecting character descriptions and global style preferences.
49
 
50
- scene_data (dict): Contains details for a single scene.
51
- character_definitions (dict): {'character_name_lower': 'description', ...}
52
- global_style_reference (str): User-defined global style keywords.
53
- """
54
- scene_num = scene_data.get('scene_number', 'N/A')
55
- emotional_beat_title = scene_data.get('emotional_beat', 'A cinematic scene') # Used as part of the core request
56
  setting_desc = scene_data.get('setting_description', 'A visually interesting setting.')
57
  key_action_desc = scene_data.get('key_action', 'A significant moment unfolds.')
58
-
59
- # --- Character Injection ---
60
  characters_involved_in_scene = scene_data.get('characters_involved', [])
61
  character_prompt_segments = []
62
  if characters_involved_in_scene:
63
  for char_name_from_scene in characters_involved_in_scene:
64
- char_name_clean = char_name_from_scene.strip()
65
- char_lookup_key = char_name_clean.lower()
66
  if character_definitions and char_lookup_key in character_definitions:
67
- char_visual_desc = character_definitions[char_lookup_key]
68
- character_prompt_segments.append(f"{char_name_clean} (described as: {char_visual_desc})")
69
- else:
70
- character_prompt_segments.append(char_name_clean) # Character is present but no specific visual definition provided
71
-
72
  characters_narrative = ""
73
  if character_prompt_segments:
74
- if len(character_prompt_segments) == 1:
75
- characters_narrative = f"The primary focus is on {character_prompt_segments[0]}."
76
- else:
77
- characters_narrative = f"The scene prominently features {', '.join(character_prompt_segments[:-1])} and {character_prompt_segments[-1]}."
78
- # --- End Character Injection ---
79
-
80
- # --- Style Aggregation ---
81
- scene_specific_style = scene_data.get('visual_style_suggestion', 'cinematic, photorealistic')
82
- final_style_directive = scene_specific_style
83
- if global_style_reference: # User's global style preference
84
- final_style_directive += f", {global_style_reference}"
85
- # --- End Style Aggregation ---
86
-
87
- camera_instr = scene_data.get('camera_angle_suggestion', 'eye-level medium shot')
88
-
89
- # Constructing the DALL-E 3 prompt with more narrative flow
90
- # DALL-E 3 often works best if you tell it what kind of image you want first, then the details.
91
- prompt = (
92
- f"Create an ultra-detailed, photorealistic, and highly cinematic digital painting or concept art image. "
93
- f"The image should depict: '{emotional_beat_title}'. "
94
- f"Setting: {setting_desc}. "
95
- f"{characters_narrative} " # This will be empty if no characters, or list them with descriptions
96
- f"They are engaged in the following key action: {key_action_desc}. "
97
- f"Visual Style and Atmosphere: {final_style_directive}. "
98
- f"Camera Composition: {camera_instr}. "
99
- f"Emphasize: Dramatic lighting (consider {scene_data.get('mood','cinematic')} mood), rich textures, depth of field, and strong atmospheric effects like mist, rain, or dust if appropriate to the setting. "
100
- f"The overall image must feel like a high-quality still from a major motion picture or a AAA video game. "
101
- f"Pay close attention to character details if provided, ensuring they are distinct and match their descriptions."
102
- )
103
-
104
- return " ".join(prompt.split()) # Normalize whitespace
105
-
106
- def create_scene_regeneration_prompt(original_scene_data, user_feedback, full_story_context=None):
107
  """
108
- Prompt for Gemini to regenerate scene script details based on user feedback.
 
109
  """
110
- context_str = f"Original scene (Scene Number {original_scene_data.get('scene_number')}):\n{json.dumps(original_scene_data, indent=2)}\n\n"
111
- if full_story_context:
112
- context_str += f"Full story context for reference:\n{json.dumps(full_story_context, indent=2)}\n\n"
113
-
114
- return f"""
115
- You are an expert script doctor and editor.
116
- {context_str}
117
- The user wants to modify this specific scene based on the following feedback: "{user_feedback}"
 
 
 
118
 
119
- Please regenerate ONLY the JSON object for this single scene, incorporating the feedback.
120
- Maintain the exact same JSON structure as the original: (scene_number, emotional_beat, setting_description, characters_involved, key_action, dialogue_snippet, visual_style_suggestion, camera_angle_suggestion).
121
- The 'scene_number' must remain unchanged.
122
- The 'key_action' should be a concise descriptive sentence (max 15-20 words) suitable for a brief video overlay.
123
- If feedback pertains to characters, setting, action, dialogue, style, or camera, update those fields accordingly.
124
- Ensure the regenerated scene remains coherent with the overall story context if provided.
125
- Focus on making the changes impactful and clear.
126
- """
127
 
128
- def create_visual_regeneration_prompt(original_image_prompt_text, user_feedback_on_visuals, scene_data, character_definitions=None, global_style_reference=""):
129
- """
130
- Prompt for Gemini to rewrite an existing DALL-E image prompt based on user feedback and scene context.
131
- """
132
- # Reconstruct parts of what the original prompt *might* have focused on, for context
133
- scene_context_summary = (
134
- f"Scene Number: {scene_data.get('scene_number', 'N/A')}. "
135
- f"Emotional Beat: {scene_data.get('emotional_beat', '')}. "
136
- f"Setting: {scene_data.get('setting_description', '')}. "
137
- f"Action: {scene_data.get('key_action', '')}. "
138
- f"Characters: {', '.join(scene_data.get('characters_involved',[]))}. "
139
- f"Current Style Hint: {scene_data.get('visual_style_suggestion', '')}. "
140
- f"Current Camera Hint: {scene_data.get('camera_angle_suggestion', '')}."
141
- )
142
-
143
- character_details_for_prompt = []
144
- if scene_data.get('characters_involved'):
145
- for char_name_in_scene in scene_data.get('characters_involved', []):
146
- char_name_clean = char_name_in_scene.strip()
147
- char_lookup_key = char_name_clean.lower()
148
- if character_definitions and char_lookup_key in character_definitions:
149
- char_visual_desc = character_definitions[char_lookup_key]
150
- character_details_for_prompt.append(f"{char_name_clean} (described as: {char_visual_desc})")
151
- else:
152
- character_details_for_prompt.append(char_name_clean)
153
- characters_narrative = f"Relevant characters and their descriptions: {', '.join(character_details_for_prompt) if character_details_for_prompt else 'None specified'}."
154
 
 
 
155
 
156
- full_prompt_for_gemini = f"""
157
- You are an AI assistant specializing in refining image generation prompts for DALL-E 3.
158
- The user wants to modify a visual concept for a cinematic scene.
 
 
 
 
 
159
 
160
- Original Scene Context:
161
- {scene_context_summary}
162
- {characters_narrative}
163
- Global Style Reference (if any): "{global_style_reference}"
164
 
165
- The DALL-E 3 prompt that was used to generate the previous image was:
166
- "{original_image_prompt_text}"
 
167
 
168
- The user provided the following feedback on the visual generated by that prompt:
169
- "{user_feedback_on_visuals}"
170
 
171
- Your task is to generate a new, revised DALL-E 3 prompt that incorporates the user's feedback to achieve the desired visual changes.
172
- The new prompt should be ultra-detailed, photorealistic, and highly cinematic.
173
- It should instruct DALL-E 3 to create an image that feels like a high-quality still from a major motion picture or AAA video game.
174
- Maintain the core elements of the scene (setting, characters, key action) unless the feedback explicitly asks to change them.
175
- Focus on translating the user's feedback into concrete visual descriptions related to composition, lighting, color, character appearance/pose, atmosphere, etc.
176
- Ensure character descriptions from the context are respected and reinforced if characters are mentioned.
177
- The prompt should be a single block of text.
178
 
179
- Output ONLY the new, revised DALL-E 3 prompt string.
180
- """
181
- return " ".join(full_prompt_for_gemini.split())
 
 
 
 
 
 
 
 
1
  # core/prompt_engineering.py
2
  import json
3
 
4
+ # create_story_breakdown_prompt - (ensure key_action and emotional_beat are descriptive) - REMAINS SAME
5
  def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful", num_scenes=3):
 
 
 
 
6
  return f"""
7
+ You are an expert screenwriter and visual storyteller. Based on: "{user_idea}", genre: "{genre}", mood: "{mood}".
8
+ Generate a {num_scenes}-scene story breakdown. For each scene:
9
+ 1. scene_number (int)
10
+ 2. emotional_beat (str): Short title for the scene's core feeling.
11
+ 3. setting_description (str): Vivid description (30-50 words).
12
+ 4. characters_involved (list of str): Names of characters.
13
+ 5. key_action (str): Main event for video overlay (15-20 words).
14
+ 6. dialogue_snippet (str): Brief impactful dialogue.
15
+ 7. visual_style_suggestion (str): Keywords for visual style.
16
+ 8. camera_angle_suggestion (str): Specific camera shot.
17
+ Output ONLY the JSON list of scenes. Example:
18
+ {{ "scene_number": 1, "emotional_beat": "Tense Standoff", "setting_description": "Rain-slicked, neon-drenched alleyway...", "characters_involved": ["Detective Kaito", "Informant"], "key_action": "Kaito cautiously approaches a nervous informant.", "dialogue_snippet": "Informant: 'They know...'", "visual_style_suggestion": "Neo-noir, cyberpunk...", "camera_angle_suggestion": "Medium shot..."}}
19
+ [{{"scene1_details..."}}, {{"scene2_details..."}}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  """
21
 
 
 
 
 
22
 
23
+ # create_image_prompt_from_scene_data - (injects char defs & style) - REMAINS SAME
24
+ def create_image_prompt_from_scene_data(scene_data, character_definitions=None, global_style_reference=""):
25
+ emotional_beat_title = scene_data.get('emotional_beat', 'A cinematic scene')
 
 
 
26
  setting_desc = scene_data.get('setting_description', 'A visually interesting setting.')
27
  key_action_desc = scene_data.get('key_action', 'A significant moment unfolds.')
 
 
28
  characters_involved_in_scene = scene_data.get('characters_involved', [])
29
  character_prompt_segments = []
30
  if characters_involved_in_scene:
31
  for char_name_from_scene in characters_involved_in_scene:
32
+ char_name_clean = char_name_from_scene.strip(); char_lookup_key = char_name_clean.lower()
 
33
  if character_definitions and char_lookup_key in character_definitions:
34
+ character_prompt_segments.append(f"{char_name_clean} (described as: {character_definitions[char_lookup_key]})")
35
+ else: character_prompt_segments.append(char_name_clean)
 
 
 
36
  characters_narrative = ""
37
  if character_prompt_segments:
38
+ if len(character_prompt_segments) == 1: characters_narrative = f"The main character is {character_prompt_segments[0]}."
39
+ else: characters_narrative = f"The scene features {', '.join(character_prompt_segments[:-1])}, and {character_prompt_segments[-1]}."
40
+ narrative_prompt = f"Scene Number: {scene_data.get('scene_number', 'N/A')}. Setting: {setting_desc}. {characters_narrative} Key Action: {key_action_desc}. Emotional Tone: {scene_data.get('emotional_beat', '')}."
41
+ style_instructions = f"Visual Style: {scene_data.get('visual_style_suggestion', 'cinematic, photorealistic')}."
42
+ if global_style_reference: style_instructions += f" Specific style reference: {global_style_reference}."
43
+ camera_instructions = f"Camera Perspective: {scene_data.get('camera_angle_suggestion', 'eye-level medium shot')}."
44
+ full_prompt = (f"Generate an ultra-detailed, photorealistic, and highly cinematic digital painting or concept art image. "
45
+ f"The image should depict: '{emotional_beat_title}'. Narrative Context: {narrative_prompt} "
46
+ f"Artistic & Technical Instructions: {style_instructions} {camera_instructions} "
47
+ f"Emphasize: Cinematic composition, dramatic lighting, rich textures, depth of field, strong atmospheric effects. "
48
+ f"The image must feel like a high-quality film still. Pay close attention to character details.")
49
+ return " ".join(full_prompt.split())
50
+
51
+
52
+ # --- NEW: Prompt for Narration Script ---
53
+ def create_narration_script_prompt(story_scenes_data, overall_mood, overall_genre):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  """
55
+ Generates a prompt for Gemini to write a concise narration script for an animatic,
56
+ covering all provided scenes.
57
  """
58
+ scenes_summary = []
59
+ for i, scene in enumerate(story_scenes_data):
60
+ scenes_summary.append(
61
+ f"Scene {scene.get('scene_number', i+1)} ({scene.get('emotional_beat','')}):\n"
62
+ f"- Setting: {scene.get('setting_description','')}\n"
63
+ f"- Key Action: {scene.get('key_action','')}\n"
64
+ f"- Characters: {', '.join(scene.get('characters_involved',[]))}\n"
65
+ f"- Implied Dialogue/Thought: {scene.get('dialogue_snippet','(none)')}"
66
+ )
67
+
68
+ full_summary_text = "\n\n".join(scenes_summary)
69
 
70
+ prompt = f"""
71
+ You are a professional scriptwriter for documentary-style voiceovers and cinematic trailers.
72
+ Given the following scene summaries for an animatic storyboard:
 
 
 
 
 
73
 
74
+ --- SCENE SUMMARIES ---
75
+ {full_summary_text}
76
+ --- END SCENE SUMMARIES ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
+ Overall Story Genre: {overall_genre}
79
+ Overall Story Mood: {overall_mood}
80
 
81
+ Write a concise, engaging, and continuous narration script that flows smoothly across these scenes.
82
+ The narration should enhance the visual storytelling, not just describe what's visible.
83
+ It should set the tone, build suspense or emotion, and connect the scenes thematically.
84
+ The tone of the narration should match the overall mood and genre.
85
+ Keep the narration for each scene relatively brief (1-2 short sentences per scene on average).
86
+ The total narration should be suitable for a short animatic (e.g., if 3 scenes at 4 seconds each, total ~12 seconds of video, so narration should be ~60-90 words max).
87
+ Do not include scene numbers or explicit directives like "(Voiceover)" in the output. Just provide the pure narration text.
88
+ Focus on evocative language.
89
 
90
+ Example (if scenes were about a space discovery):
91
+ "The red dust of Mars whispered secrets of a forgotten age. Deep within the chasm, an impossible structure pulsed with an alien light, beckoning humanity towards a destiny unknown, and perhaps, a truth too vast to comprehend."
 
 
92
 
93
+ Output ONLY the narration script text.
94
+ """
95
+ return " ".join(prompt.split())
96
 
 
 
97
 
98
+ # create_scene_regeneration_prompt - REMAINS SAME
99
+ def create_scene_regeneration_prompt(original_scene_data, user_feedback, full_story_context=None):
100
+ context_str = f"Original scene (Scene Number {original_scene_data.get('scene_number')}):\n{json.dumps(original_scene_data, indent=2)}\n\n"
101
+ if full_story_context: context_str += f"Full story context:\n{json.dumps(full_story_context, indent=2)}\n\n"
102
+ return (f"Expert script doctor. Original scene:\n{context_str}User feedback: \"{user_feedback}\"\n"
103
+ f"Regenerate ONLY the JSON for this single scene, incorporating feedback. Maintain structure. 'key_action' max 15-20 words.")
 
104
 
105
+ # create_visual_regeneration_prompt - REMAINS SAME
106
+ def create_visual_regeneration_prompt(original_image_prompt_text, user_feedback_on_visuals, scene_data, character_definitions=None, global_style_reference=""):
107
+ scene_context_summary = (f"Scene: {scene_data.get('emotional_beat', '')}. Setting: {scene_data.get('setting_description', '')}. "
108
+ f"Action: {scene_data.get('key_action', '')}. Characters: {', '.join(scene_data.get('characters_involved',[]))}.")
109
+ char_details_str = "Relevant characters: " + (", ".join([f"{name} ({desc})" for name, desc in character_definitions.items() if name.lower() in [cn.lower() for cn in scene_data.get('characters_involved',[])]])) if character_definitions else "None specified."
110
+ return (f"AI assistant for refining DALL-E 3 prompts. Original Scene Context: {scene_context_summary} {char_details_str} "
111
+ f"Global Style: \"{global_style_reference}\". Original DALL-E 3 prompt was: \"{original_image_prompt_text}\". "
112
+ f"User feedback on visual: \"{user_feedback_on_visuals}\". Generate a new, revised DALL-E 3 prompt. "
113
+ f"It must be ultra-detailed, photorealistic, cinematic, film/game quality. Translate feedback into concrete visual descriptions. "
114
+ f"Respect character descriptions. Output ONLY the new prompt string.")