mgbam commited on
Commit
5876552
·
verified ·
1 Parent(s): 990e23e

Update core/prompt_engineering.py

Browse files
Files changed (1) hide show
  1. core/prompt_engineering.py +141 -93
core/prompt_engineering.py CHANGED
@@ -1,38 +1,37 @@
1
  # core/prompt_engineering.py
2
  import json
3
 
4
- # create_story_breakdown_prompt can remain mostly the same, but ensure 'key_action' is descriptive.
5
- # If key_action is very short, text overlays on video might not be useful.
6
- # Consider making 'key_action' slightly more verbose or adding a 'video_overlay_text' field.
7
- # For now, we'll use key_action.
8
-
9
  def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful", num_scenes=3):
 
 
 
 
10
  return f"""
11
  You are an expert screenwriter and visual storyteller.
12
  Based on the user's idea: "{user_idea}"
13
  And considering the genre: "{genre}" and mood: "{mood}"
14
 
15
  Generate a {num_scenes}-scene story breakdown. For each scene, provide:
16
- 1. scene_number (int)
17
- 2. setting_description (str): Vivid description of the location and atmosphere (approx 20-40 words).
18
- 3. characters_involved (list of str): Names of characters in the scene.
19
- 4. key_action (str): The main event or action happening in one concise sentence (approx 10-20 words). This will be used for video overlays.
20
- 5. dialogue_snippet (str): A brief, impactful line of dialogue if applicable.
21
- 6. visual_style_suggestion (str): e.g., "Dark and gritty, high contrast, Blade Runner-esque neon"
22
- 7. camera_angle_suggestion (str): e.g., "Low-angle shot to emphasize power"
23
- 8. emotional_beat (str): The core emotion or turning point in the scene.
24
 
25
  Output ONLY the JSON object for the list of scenes.
26
  Example for one scene:
27
  {{
28
  "scene_number": 1,
29
- "setting_description": "A dimly lit, cluttered spaceship cockpit. Warning lights flash intermittently. Steam vents from a broken pipe.",
30
- "characters_involved": ["Captain Eva Rostova"],
31
- "key_action": "Eva frantically works at a console, trying to divert a catastrophic system failure.",
32
- "dialogue_snippet": "Eva: 'Come on, come on... don't do this to me now!'",
33
- "visual_style_suggestion": "Claustrophobic, practical lighting, lens flares, metallic sheens.",
34
- "camera_angle_suggestion": "Close-up on Eva's determined face, sweat beading on her forehead.",
35
- "emotional_beat": "Desperation and intense focus."
36
  }}
37
 
38
  Provide the full JSON structure for {num_scenes} scenes in a list:
@@ -43,91 +42,140 @@ def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful",
43
  ]
44
  """
45
 
46
- def create_image_prompt_from_scene_data(scene_data, character_definitions=None, style_reference_desc=None):
47
  """
48
- Generates an image prompt from structured scene data, injecting character details.
49
- scene_data: dictionary for a single scene.
50
- character_definitions: dict {char_name_lower: description} for consistency.
51
- style_reference_desc: textual description of a desired style.
 
 
52
  """
53
- setting_desc = scene_data.get('setting_description', '')
54
- key_action_desc = scene_data.get('key_action', '')
55
- visual_style = scene_data.get('visual_style_suggestion', 'cinematic')
56
- camera_angle = scene_data.get('camera_angle_suggestion', '')
57
- emotional_beat = scene_data.get('emotional_beat', '')
58
 
59
- characters_str_parts = []
60
- if character_definitions and scene_data.get('characters_involved'):
61
- for char_name in scene_data.get('characters_involved', []):
62
- # Match character name case-insensitively, but use original name from scene_data
63
- defined_desc = character_definitions.get(char_name.lower().strip())
64
- if defined_desc:
65
- characters_str_parts.append(f"{char_name.strip()} ({defined_desc})")
 
 
 
66
  else:
67
- characters_str_parts.append(char_name.strip()) # Character mentioned but not defined
68
- elif scene_data.get('characters_involved'): # Characters mentioned but no definitions provided
69
- characters_str_parts = [name.strip() for name in scene_data.get('characters_involved', [])]
70
-
71
- characters_involved_str = ""
72
- if characters_str_parts:
73
- if len(characters_str_parts) == 1:
74
- characters_involved_str = f" The scene features {characters_str_parts[0]}."
75
  else:
76
- characters_involved_str = f" The scene features {', '.join(characters_str_parts[:-1])} and {characters_str_parts[-1]}."
 
 
 
 
 
 
 
 
 
 
77
 
78
- base_desc = f"Depict: Scene {scene_data.get('scene_number', '')}. {key_action_desc} {characters_involved_str} Setting: {setting_desc}."
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- style_mod = ""
81
- if style_reference_desc:
82
- style_mod = f" Artistic style inspired by: {style_reference_desc}."
83
-
84
- # Constructing a more robust prompt for DALL-E 3
85
- # DALL-E 3 benefits from descriptive, story-like prompts.
86
- full_prompt = f"""
87
- Generate a highly detailed, photorealistic and cinematic image.
88
- Image Description: {base_desc}
89
- Visual Style: {visual_style}. {style_mod}
90
- Camera Perspective: {camera_angle}.
91
- Emotional Tone: {emotional_beat}.
92
- Key elements to emphasize: Cinematic composition, dramatic lighting, depth of field, rich textures, and atmospheric effects.
93
- Output a visually stunning image suitable for a film storyboard.
94
- """
95
- # Removed "Output only the prompt string" as this function *is* the prompt string builder.
96
- # DALL-E 3 does not need "Suitable for DALL-E 3 or Midjourney".
97
- return " ".join(full_prompt.split()) # Cleans up extra whitespace
98
 
99
  def create_scene_regeneration_prompt(original_scene_data, user_feedback, full_story_context=None):
100
- # ... (remains the same as your last working version) ...
101
- context_str = f"Original scene details:\n{json.dumps(original_scene_data, indent=2)}\n\n"
 
 
102
  if full_story_context:
103
- context_str += f"Full story context (this scene is number {original_scene_data.get('scene_number')}):\n{json.dumps(full_story_context, indent=2)}\n\n"
 
104
  return f"""
105
- You are an expert script doctor.
106
  {context_str}
107
- The user wants to modify this scene based on the following feedback: "{user_feedback}"
 
108
  Please regenerate ONLY the JSON object for this single scene, incorporating the feedback.
109
- Maintain the same JSON structure as the original scene (scene_number, setting_description, characters_involved, key_action, dialogue_snippet, visual_style_suggestion, camera_angle_suggestion, emotional_beat).
110
- Ensure the scene_number remains the same. The 'key_action' should be a concise descriptive sentence (10-20 words).
 
 
 
 
111
  """
112
 
113
- def create_visual_regeneration_prompt(original_image_prompt, user_feedback_on_visuals, scene_data):
114
- # ... (remains the same, this prompt is for Gemini to *rewrite* an image prompt) ...
115
- # This prompt should also be updated to be DALL-E 3 friendly if it's rewriting.
116
- return f"""
117
- The previous detailed image generation prompt for a scene was: "{original_image_prompt}"
118
- The scene details are:
119
- Setting: {scene_data.get('setting_description')}
120
- Action: {scene_data.get('key_action')}
121
- Characters: {', '.join(scene_data.get('characters_involved',[]))}
122
- Mood/Emotion: {scene_data.get('emotional_beat')}
123
- Current Visual Style: {scene_data.get('visual_style_suggestion')}
124
- Current Camera: {scene_data.get('camera_angle_suggestion')}
125
-
126
- The user provided this feedback on the visuals: "{user_feedback_on_visuals}"
127
-
128
- Generate a new, refined, highly detailed, photorealistic and cinematic image generation prompt based on this feedback.
129
- The new prompt should aim to correct or enhance the visuals as per the user's request, while maintaining the core scene elements.
130
- Ensure the prompt is descriptive and suitable for generating a stunning image for a film storyboard with DALL-E 3.
131
- Focus on cinematic composition, dramatic lighting, depth of field, rich textures, and atmospheric effects.
132
- Output only the new prompt string.
133
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # core/prompt_engineering.py
2
  import json
3
 
 
 
 
 
 
4
  def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful", num_scenes=3):
5
+ """
6
+ Generates a prompt for Gemini to break down a story idea into scenes.
7
+ Emphasizes concise, descriptive key_actions suitable for video overlays.
8
+ """
9
  return f"""
10
  You are an expert screenwriter and visual storyteller.
11
  Based on the user's idea: "{user_idea}"
12
  And considering the genre: "{genre}" and mood: "{mood}"
13
 
14
  Generate a {num_scenes}-scene story breakdown. For each scene, provide:
15
+ 1. scene_number (int): Sequential number of the scene.
16
+ 2. emotional_beat (str): A short title or phrase capturing the core emotion/theme of this scene (e.g., "Desperate Escape," "Betrayal Revealed," "A Glimmer of Hope").
17
+ 3. setting_description (str): Vivid description of the location, atmosphere, and key visual elements (approx 30-50 words).
18
+ 4. characters_involved (list of str): Names of characters present and active in the scene.
19
+ 5. key_action (str): The single most important event or character action happening in this specific visual moment, described concisely for a potential video overlay (max 15-20 words).
20
+ 6. dialogue_snippet (str): A brief, impactful line of dialogue spoken in this scene, if any.
21
+ 7. visual_style_suggestion (str): Keywords for the overall visual style of this scene (e.g., "Dark and gritty, high contrast, Blade Runner-esque neon reflections"). This can be influenced by the overall story mood.
22
+ 8. camera_angle_suggestion (str): A specific camera shot type or angle suggestion (e.g., "Low-angle shot emphasizing power," "Dutch angle for unease," "Extreme close-up on eyes").
23
 
24
  Output ONLY the JSON object for the list of scenes.
25
  Example for one scene:
26
  {{
27
  "scene_number": 1,
28
+ "emotional_beat": "Tense Standoff",
29
+ "setting_description": "A rain-slicked, neon-drenched alleyway in Neo-Kyoto. Broken holographic advertisements flicker erratically, casting distorted shadows. The air hangs heavy with the smell of ozone and despair.",
30
+ "characters_involved": ["Detective Kaito Tanaka", "Informant (shadowy figure)"],
31
+ "key_action": "Kaito cautiously approaches a nervous informant huddled in the shadows.",
32
+ "dialogue_snippet": "Informant: 'They know you're looking... You're not safe.'",
33
+ "visual_style_suggestion": "Neo-noir, cyberpunk, high contrast, deep shadows, vibrant neon reflections in puddles, film grain.",
34
+ "camera_angle_suggestion": "Medium shot from behind Kaito, focusing on the informant, creating suspense."
35
  }}
36
 
37
  Provide the full JSON structure for {num_scenes} scenes in a list:
 
42
  ]
43
  """
44
 
45
+ def create_image_prompt_from_scene_data(scene_data, character_definitions=None, global_style_reference=""):
46
  """
47
+ Generates a detailed image prompt for DALL-E 3 based on structured scene data,
48
+ injecting character descriptions and global style preferences.
49
+
50
+ scene_data (dict): Contains details for a single scene.
51
+ character_definitions (dict): {'character_name_lower': 'description', ...}
52
+ global_style_reference (str): User-defined global style keywords.
53
  """
54
+ scene_num = scene_data.get('scene_number', 'N/A')
55
+ emotional_beat_title = scene_data.get('emotional_beat', 'A cinematic scene') # Used as part of the core request
56
+ setting_desc = scene_data.get('setting_description', 'A visually interesting setting.')
57
+ key_action_desc = scene_data.get('key_action', 'A significant moment unfolds.')
 
58
 
59
+ # --- Character Injection ---
60
+ characters_involved_in_scene = scene_data.get('characters_involved', [])
61
+ character_prompt_segments = []
62
+ if characters_involved_in_scene:
63
+ for char_name_from_scene in characters_involved_in_scene:
64
+ char_name_clean = char_name_from_scene.strip()
65
+ char_lookup_key = char_name_clean.lower()
66
+ if character_definitions and char_lookup_key in character_definitions:
67
+ char_visual_desc = character_definitions[char_lookup_key]
68
+ character_prompt_segments.append(f"{char_name_clean} (described as: {char_visual_desc})")
69
  else:
70
+ character_prompt_segments.append(char_name_clean) # Character is present but no specific visual definition provided
71
+
72
+ characters_narrative = ""
73
+ if character_prompt_segments:
74
+ if len(character_prompt_segments) == 1:
75
+ characters_narrative = f"The primary focus is on {character_prompt_segments[0]}."
 
 
76
  else:
77
+ characters_narrative = f"The scene prominently features {', '.join(character_prompt_segments[:-1])} and {character_prompt_segments[-1]}."
78
+ # --- End Character Injection ---
79
+
80
+ # --- Style Aggregation ---
81
+ scene_specific_style = scene_data.get('visual_style_suggestion', 'cinematic, photorealistic')
82
+ final_style_directive = scene_specific_style
83
+ if global_style_reference: # User's global style preference
84
+ final_style_directive += f", {global_style_reference}"
85
+ # --- End Style Aggregation ---
86
+
87
+ camera_instr = scene_data.get('camera_angle_suggestion', 'eye-level medium shot')
88
 
89
+ # Constructing the DALL-E 3 prompt with more narrative flow
90
+ # DALL-E 3 often works best if you tell it what kind of image you want first, then the details.
91
+ prompt = (
92
+ f"Create an ultra-detailed, photorealistic, and highly cinematic digital painting or concept art image. "
93
+ f"The image should depict: '{emotional_beat_title}'. "
94
+ f"Setting: {setting_desc}. "
95
+ f"{characters_narrative} " # This will be empty if no characters, or list them with descriptions
96
+ f"They are engaged in the following key action: {key_action_desc}. "
97
+ f"Visual Style and Atmosphere: {final_style_directive}. "
98
+ f"Camera Composition: {camera_instr}. "
99
+ f"Emphasize: Dramatic lighting (consider {scene_data.get('mood','cinematic')} mood), rich textures, depth of field, and strong atmospheric effects like mist, rain, or dust if appropriate to the setting. "
100
+ f"The overall image must feel like a high-quality still from a major motion picture or a AAA video game. "
101
+ f"Pay close attention to character details if provided, ensuring they are distinct and match their descriptions."
102
+ )
103
 
104
+ return " ".join(prompt.split()) # Normalize whitespace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  def create_scene_regeneration_prompt(original_scene_data, user_feedback, full_story_context=None):
107
+ """
108
+ Prompt for Gemini to regenerate scene script details based on user feedback.
109
+ """
110
+ context_str = f"Original scene (Scene Number {original_scene_data.get('scene_number')}):\n{json.dumps(original_scene_data, indent=2)}\n\n"
111
  if full_story_context:
112
+ context_str += f"Full story context for reference:\n{json.dumps(full_story_context, indent=2)}\n\n"
113
+
114
  return f"""
115
+ You are an expert script doctor and editor.
116
  {context_str}
117
+ The user wants to modify this specific scene based on the following feedback: "{user_feedback}"
118
+
119
  Please regenerate ONLY the JSON object for this single scene, incorporating the feedback.
120
+ Maintain the exact same JSON structure as the original: (scene_number, emotional_beat, setting_description, characters_involved, key_action, dialogue_snippet, visual_style_suggestion, camera_angle_suggestion).
121
+ The 'scene_number' must remain unchanged.
122
+ The 'key_action' should be a concise descriptive sentence (max 15-20 words) suitable for a brief video overlay.
123
+ If feedback pertains to characters, setting, action, dialogue, style, or camera, update those fields accordingly.
124
+ Ensure the regenerated scene remains coherent with the overall story context if provided.
125
+ Focus on making the changes impactful and clear.
126
  """
127
 
128
+ def create_visual_regeneration_prompt(original_image_prompt_text, user_feedback_on_visuals, scene_data, character_definitions=None, global_style_reference=""):
129
+ """
130
+ Prompt for Gemini to rewrite an existing DALL-E image prompt based on user feedback and scene context.
131
+ """
132
+ # Reconstruct parts of what the original prompt *might* have focused on, for context
133
+ scene_context_summary = (
134
+ f"Scene Number: {scene_data.get('scene_number', 'N/A')}. "
135
+ f"Emotional Beat: {scene_data.get('emotional_beat', '')}. "
136
+ f"Setting: {scene_data.get('setting_description', '')}. "
137
+ f"Action: {scene_data.get('key_action', '')}. "
138
+ f"Characters: {', '.join(scene_data.get('characters_involved',[]))}. "
139
+ f"Current Style Hint: {scene_data.get('visual_style_suggestion', '')}. "
140
+ f"Current Camera Hint: {scene_data.get('camera_angle_suggestion', '')}."
141
+ )
142
+
143
+ character_details_for_prompt = []
144
+ if scene_data.get('characters_involved'):
145
+ for char_name_in_scene in scene_data.get('characters_involved', []):
146
+ char_name_clean = char_name_in_scene.strip()
147
+ char_lookup_key = char_name_clean.lower()
148
+ if character_definitions and char_lookup_key in character_definitions:
149
+ char_visual_desc = character_definitions[char_lookup_key]
150
+ character_details_for_prompt.append(f"{char_name_clean} (described as: {char_visual_desc})")
151
+ else:
152
+ character_details_for_prompt.append(char_name_clean)
153
+ characters_narrative = f"Relevant characters and their descriptions: {', '.join(character_details_for_prompt) if character_details_for_prompt else 'None specified'}."
154
+
155
+
156
+ full_prompt_for_gemini = f"""
157
+ You are an AI assistant specializing in refining image generation prompts for DALL-E 3.
158
+ The user wants to modify a visual concept for a cinematic scene.
159
+
160
+ Original Scene Context:
161
+ {scene_context_summary}
162
+ {characters_narrative}
163
+ Global Style Reference (if any): "{global_style_reference}"
164
+
165
+ The DALL-E 3 prompt that was used to generate the previous image was:
166
+ "{original_image_prompt_text}"
167
+
168
+ The user provided the following feedback on the visual generated by that prompt:
169
+ "{user_feedback_on_visuals}"
170
+
171
+ Your task is to generate a new, revised DALL-E 3 prompt that incorporates the user's feedback to achieve the desired visual changes.
172
+ The new prompt should be ultra-detailed, photorealistic, and highly cinematic.
173
+ It should instruct DALL-E 3 to create an image that feels like a high-quality still from a major motion picture or AAA video game.
174
+ Maintain the core elements of the scene (setting, characters, key action) unless the feedback explicitly asks to change them.
175
+ Focus on translating the user's feedback into concrete visual descriptions related to composition, lighting, color, character appearance/pose, atmosphere, etc.
176
+ Ensure character descriptions from the context are respected and reinforced if characters are mentioned.
177
+ The prompt should be a single block of text.
178
+
179
+ Output ONLY the new, revised DALL-E 3 prompt string.
180
+ """
181
+ return " ".join(full_prompt_for_gemini.split())