Spaces:

mgbam
/

CingenAI

Running

App Files Files Community

mgbam commited on 9 days ago

Commit

1cb1db0

verified ·

1 Parent(s): 9840152

Update core/prompt_engineering.py

Browse files

Files changed (1) hide show

core/prompt_engineering.py +86 -153

core/prompt_engineering.py CHANGED Viewed

@@ -1,181 +1,114 @@
 # core/prompt_engineering.py
 import json
 def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful", num_scenes=3):
-    """
-    Generates a prompt for Gemini to break down a story idea into scenes.
-    Emphasizes concise, descriptive key_actions suitable for video overlays.
-    """
     return f"""
-    You are an expert screenwriter and visual storyteller.
-    Based on the user's idea: "{user_idea}"
-    And considering the genre: "{genre}" and mood: "{mood}"
-    Generate a {num_scenes}-scene story breakdown. For each scene, provide:
-    1. scene_number (int): Sequential number of the scene.
-    2. emotional_beat (str): A short title or phrase capturing the core emotion/theme of this scene (e.g., "Desperate Escape," "Betrayal Revealed," "A Glimmer of Hope").
-    3. setting_description (str): Vivid description of the location, atmosphere, and key visual elements (approx 30-50 words).
-    4. characters_involved (list of str): Names of characters present and active in the scene.
-    5. key_action (str): The single most important event or character action happening in this specific visual moment, described concisely for a potential video overlay (max 15-20 words).
-    6. dialogue_snippet (str): A brief, impactful line of dialogue spoken in this scene, if any.
-    7. visual_style_suggestion (str): Keywords for the overall visual style of this scene (e.g., "Dark and gritty, high contrast, Blade Runner-esque neon reflections"). This can be influenced by the overall story mood.
-    8. camera_angle_suggestion (str): A specific camera shot type or angle suggestion (e.g., "Low-angle shot emphasizing power," "Dutch angle for unease," "Extreme close-up on eyes").
-    Output ONLY the JSON object for the list of scenes.
-    Example for one scene:
-    {{
-      "scene_number": 1,
-      "emotional_beat": "Tense Standoff",
-      "setting_description": "A rain-slicked, neon-drenched alleyway in Neo-Kyoto. Broken holographic advertisements flicker erratically, casting distorted shadows. The air hangs heavy with the smell of ozone and despair.",
-      "characters_involved": ["Detective Kaito Tanaka", "Informant (shadowy figure)"],
-      "key_action": "Kaito cautiously approaches a nervous informant huddled in the shadows.",
-      "dialogue_snippet": "Informant: 'They know you're looking... You're not safe.'",
-      "visual_style_suggestion": "Neo-noir, cyberpunk, high contrast, deep shadows, vibrant neon reflections in puddles, film grain.",
-      "camera_angle_suggestion": "Medium shot from behind Kaito, focusing on the informant, creating suspense."
-    }}
-    Provide the full JSON structure for {num_scenes} scenes in a list:
-    [
-        {{scene1_details...}},
-        {{scene2_details...}},
-        ...
-    ]
     """
-def create_image_prompt_from_scene_data(scene_data, character_definitions=None, global_style_reference=""):
-    """
-    Generates a detailed image prompt for DALL-E 3 based on structured scene data,
-    injecting character descriptions and global style preferences.
-    scene_data (dict): Contains details for a single scene.
-    character_definitions (dict): {'character_name_lower': 'description', ...}
-    global_style_reference (str): User-defined global style keywords.
-    """
-    scene_num = scene_data.get('scene_number', 'N/A')
-    emotional_beat_title = scene_data.get('emotional_beat', 'A cinematic scene') # Used as part of the core request
     setting_desc = scene_data.get('setting_description', 'A visually interesting setting.')
     key_action_desc = scene_data.get('key_action', 'A significant moment unfolds.')
-    # --- Character Injection ---
     characters_involved_in_scene = scene_data.get('characters_involved', [])
     character_prompt_segments = []
     if characters_involved_in_scene:
         for char_name_from_scene in characters_involved_in_scene:
-            char_name_clean = char_name_from_scene.strip()
-            char_lookup_key = char_name_clean.lower()
             if character_definitions and char_lookup_key in character_definitions:
-                char_visual_desc = character_definitions[char_lookup_key]
-                character_prompt_segments.append(f"{char_name_clean} (described as: {char_visual_desc})")
-            else:
-                character_prompt_segments.append(char_name_clean) # Character is present but no specific visual definition provided
     characters_narrative = ""
     if character_prompt_segments:
-        if len(character_prompt_segments) == 1:
-            characters_narrative = f"The primary focus is on {character_prompt_segments[0]}."
-        else:
-            characters_narrative = f"The scene prominently features {', '.join(character_prompt_segments[:-1])} and {character_prompt_segments[-1]}."
-    # --- End Character Injection ---
-    # --- Style Aggregation ---
-    scene_specific_style = scene_data.get('visual_style_suggestion', 'cinematic, photorealistic')
-    final_style_directive = scene_specific_style
-    if global_style_reference: # User's global style preference
-        final_style_directive += f", {global_style_reference}"
-    # --- End Style Aggregation ---
-    camera_instr = scene_data.get('camera_angle_suggestion', 'eye-level medium shot')
-    # Constructing the DALL-E 3 prompt with more narrative flow
-    # DALL-E 3 often works best if you tell it what kind of image you want first, then the details.
-    prompt = (
-        f"Create an ultra-detailed, photorealistic, and highly cinematic digital painting or concept art image. "
-        f"The image should depict: '{emotional_beat_title}'. "
-        f"Setting: {setting_desc}. "
-        f"{characters_narrative} " # This will be empty if no characters, or list them with descriptions
-        f"They are engaged in the following key action: {key_action_desc}. "
-        f"Visual Style and Atmosphere: {final_style_directive}. "
-        f"Camera Composition: {camera_instr}. "
-        f"Emphasize: Dramatic lighting (consider {scene_data.get('mood','cinematic')} mood), rich textures, depth of field, and strong atmospheric effects like mist, rain, or dust if appropriate to the setting. "
-        f"The overall image must feel like a high-quality still from a major motion picture or a AAA video game. "
-        f"Pay close attention to character details if provided, ensuring they are distinct and match their descriptions."
-    )
-    return " ".join(prompt.split()) # Normalize whitespace
-def create_scene_regeneration_prompt(original_scene_data, user_feedback, full_story_context=None):
     """
-    Prompt for Gemini to regenerate scene script details based on user feedback.
     """
-    context_str = f"Original scene (Scene Number {original_scene_data.get('scene_number')}):\n{json.dumps(original_scene_data, indent=2)}\n\n"
-    if full_story_context:
-        context_str += f"Full story context for reference:\n{json.dumps(full_story_context, indent=2)}\n\n"
-    return f"""
-    You are an expert script doctor and editor.
-    {context_str}
-    The user wants to modify this specific scene based on the following feedback: "{user_feedback}"
-    Please regenerate ONLY the JSON object for this single scene, incorporating the feedback.
-    Maintain the exact same JSON structure as the original: (scene_number, emotional_beat, setting_description, characters_involved, key_action, dialogue_snippet, visual_style_suggestion, camera_angle_suggestion).
-    The 'scene_number' must remain unchanged.
-    The 'key_action' should be a concise descriptive sentence (max 15-20 words) suitable for a brief video overlay.
-    If feedback pertains to characters, setting, action, dialogue, style, or camera, update those fields accordingly.
-    Ensure the regenerated scene remains coherent with the overall story context if provided.
-    Focus on making the changes impactful and clear.
-    """
-def create_visual_regeneration_prompt(original_image_prompt_text, user_feedback_on_visuals, scene_data, character_definitions=None, global_style_reference=""):
-    """
-    Prompt for Gemini to rewrite an existing DALL-E image prompt based on user feedback and scene context.
-    """
-    # Reconstruct parts of what the original prompt *might* have focused on, for context
-    scene_context_summary = (
-        f"Scene Number: {scene_data.get('scene_number', 'N/A')}. "
-        f"Emotional Beat: {scene_data.get('emotional_beat', '')}. "
-        f"Setting: {scene_data.get('setting_description', '')}. "
-        f"Action: {scene_data.get('key_action', '')}. "
-        f"Characters: {', '.join(scene_data.get('characters_involved',[]))}. "
-        f"Current Style Hint: {scene_data.get('visual_style_suggestion', '')}. "
-        f"Current Camera Hint: {scene_data.get('camera_angle_suggestion', '')}."
-    )
-    character_details_for_prompt = []
-    if scene_data.get('characters_involved'):
-        for char_name_in_scene in scene_data.get('characters_involved', []):
-            char_name_clean = char_name_in_scene.strip()
-            char_lookup_key = char_name_clean.lower()
-            if character_definitions and char_lookup_key in character_definitions:
-                char_visual_desc = character_definitions[char_lookup_key]
-                character_details_for_prompt.append(f"{char_name_clean} (described as: {char_visual_desc})")
-            else:
-                character_details_for_prompt.append(char_name_clean)
-    characters_narrative = f"Relevant characters and their descriptions: {', '.join(character_details_for_prompt) if character_details_for_prompt else 'None specified'}."
-    full_prompt_for_gemini = f"""
-    You are an AI assistant specializing in refining image generation prompts for DALL-E 3.
-    The user wants to modify a visual concept for a cinematic scene.
-    Original Scene Context:
-    {scene_context_summary}
-    {characters_narrative}
-    Global Style Reference (if any): "{global_style_reference}"
-    The DALL-E 3 prompt that was used to generate the previous image was:
-    "{original_image_prompt_text}"
-    The user provided the following feedback on the visual generated by that prompt:
-    "{user_feedback_on_visuals}"
-    Your task is to generate a new, revised DALL-E 3 prompt that incorporates the user's feedback to achieve the desired visual changes.
-    The new prompt should be ultra-detailed, photorealistic, and highly cinematic.
-    It should instruct DALL-E 3 to create an image that feels like a high-quality still from a major motion picture or AAA video game.
-    Maintain the core elements of the scene (setting, characters, key action) unless the feedback explicitly asks to change them.
-    Focus on translating the user's feedback into concrete visual descriptions related to composition, lighting, color, character appearance/pose, atmosphere, etc.
-    Ensure character descriptions from the context are respected and reinforced if characters are mentioned.
-    The prompt should be a single block of text.
-    Output ONLY the new, revised DALL-E 3 prompt string.
-    """
-    return " ".join(full_prompt_for_gemini.split())

 # core/prompt_engineering.py
 import json
+# create_story_breakdown_prompt - (ensure key_action and emotional_beat are descriptive) - REMAINS SAME
 def create_story_breakdown_prompt(user_idea, genre="sci-fi", mood="suspenseful", num_scenes=3):
     return f"""
+    You are an expert screenwriter and visual storyteller. Based on: "{user_idea}", genre: "{genre}", mood: "{mood}".
+    Generate a {num_scenes}-scene story breakdown. For each scene:
+    1. scene_number (int)
+    2. emotional_beat (str): Short title for the scene's core feeling.
+    3. setting_description (str): Vivid description (30-50 words).
+    4. characters_involved (list of str): Names of characters.
+    5. key_action (str): Main event for video overlay (15-20 words).
+    6. dialogue_snippet (str): Brief impactful dialogue.
+    7. visual_style_suggestion (str): Keywords for visual style.
+    8. camera_angle_suggestion (str): Specific camera shot.
+    Output ONLY the JSON list of scenes. Example:
+    {{ "scene_number": 1, "emotional_beat": "Tense Standoff", "setting_description": "Rain-slicked, neon-drenched alleyway...", "characters_involved": ["Detective Kaito", "Informant"], "key_action": "Kaito cautiously approaches a nervous informant.", "dialogue_snippet": "Informant: 'They know...'", "visual_style_suggestion": "Neo-noir, cyberpunk...", "camera_angle_suggestion": "Medium shot..."}}
+    [{{"scene1_details..."}}, {{"scene2_details..."}}]
     """
+# create_image_prompt_from_scene_data - (injects char defs & style) - REMAINS SAME
+def create_image_prompt_from_scene_data(scene_data, character_definitions=None, global_style_reference=""):
+    emotional_beat_title = scene_data.get('emotional_beat', 'A cinematic scene')
     setting_desc = scene_data.get('setting_description', 'A visually interesting setting.')
     key_action_desc = scene_data.get('key_action', 'A significant moment unfolds.')
     characters_involved_in_scene = scene_data.get('characters_involved', [])
     character_prompt_segments = []
     if characters_involved_in_scene:
         for char_name_from_scene in characters_involved_in_scene:
+            char_name_clean = char_name_from_scene.strip(); char_lookup_key = char_name_clean.lower()
             if character_definitions and char_lookup_key in character_definitions:
+                character_prompt_segments.append(f"{char_name_clean} (described as: {character_definitions[char_lookup_key]})")
+            else: character_prompt_segments.append(char_name_clean)
     characters_narrative = ""
     if character_prompt_segments:
+        if len(character_prompt_segments) == 1: characters_narrative = f"The main character is {character_prompt_segments[0]}."
+        else: characters_narrative = f"The scene features {', '.join(character_prompt_segments[:-1])}, and {character_prompt_segments[-1]}."
+    narrative_prompt = f"Scene Number: {scene_data.get('scene_number', 'N/A')}. Setting: {setting_desc}. {characters_narrative} Key Action: {key_action_desc}. Emotional Tone: {scene_data.get('emotional_beat', '')}."
+    style_instructions = f"Visual Style: {scene_data.get('visual_style_suggestion', 'cinematic, photorealistic')}."
+    if global_style_reference: style_instructions += f" Specific style reference: {global_style_reference}."
+    camera_instructions = f"Camera Perspective: {scene_data.get('camera_angle_suggestion', 'eye-level medium shot')}."
+    full_prompt = (f"Generate an ultra-detailed, photorealistic, and highly cinematic digital painting or concept art image. "
+                   f"The image should depict: '{emotional_beat_title}'. Narrative Context: {narrative_prompt} "
+                   f"Artistic & Technical Instructions: {style_instructions} {camera_instructions} "
+                   f"Emphasize: Cinematic composition, dramatic lighting, rich textures, depth of field, strong atmospheric effects. "
+                   f"The image must feel like a high-quality film still. Pay close attention to character details.")
+    return " ".join(full_prompt.split())
+# --- NEW: Prompt for Narration Script ---
+def create_narration_script_prompt(story_scenes_data, overall_mood, overall_genre):
     """
+    Generates a prompt for Gemini to write a concise narration script for an animatic,
+    covering all provided scenes.
     """
+    scenes_summary = []
+    for i, scene in enumerate(story_scenes_data):
+        scenes_summary.append(
+            f"Scene {scene.get('scene_number', i+1)} ({scene.get('emotional_beat','')}):\n"
+            f"- Setting: {scene.get('setting_description','')}\n"
+            f"- Key Action: {scene.get('key_action','')}\n"
+            f"- Characters: {', '.join(scene.get('characters_involved',[]))}\n"
+            f"- Implied Dialogue/Thought: {scene.get('dialogue_snippet','(none)')}"
+        )
+    full_summary_text = "\n\n".join(scenes_summary)
+    prompt = f"""
+    You are a professional scriptwriter for documentary-style voiceovers and cinematic trailers.
+    Given the following scene summaries for an animatic storyboard:
+    --- SCENE SUMMARIES ---
+    {full_summary_text}
+    --- END SCENE SUMMARIES ---
+    Overall Story Genre: {overall_genre}
+    Overall Story Mood: {overall_mood}
+    Write a concise, engaging, and continuous narration script that flows smoothly across these scenes.
+    The narration should enhance the visual storytelling, not just describe what's visible.
+    It should set the tone, build suspense or emotion, and connect the scenes thematically.
+    The tone of the narration should match the overall mood and genre.
+    Keep the narration for each scene relatively brief (1-2 short sentences per scene on average).
+    The total narration should be suitable for a short animatic (e.g., if 3 scenes at 4 seconds each, total ~12 seconds of video, so narration should be ~60-90 words max).
+    Do not include scene numbers or explicit directives like "(Voiceover)" in the output. Just provide the pure narration text.
+    Focus on evocative language.
+    Example (if scenes were about a space discovery):
+    "The red dust of Mars whispered secrets of a forgotten age. Deep within the chasm, an impossible structure pulsed with an alien light, beckoning humanity towards a destiny unknown, and perhaps, a truth too vast to comprehend."
+    Output ONLY the narration script text.
+    """
+    return " ".join(prompt.split())
+# create_scene_regeneration_prompt - REMAINS SAME
+def create_scene_regeneration_prompt(original_scene_data, user_feedback, full_story_context=None):
+    context_str = f"Original scene (Scene Number {original_scene_data.get('scene_number')}):\n{json.dumps(original_scene_data, indent=2)}\n\n"
+    if full_story_context: context_str += f"Full story context:\n{json.dumps(full_story_context, indent=2)}\n\n"
+    return (f"Expert script doctor. Original scene:\n{context_str}User feedback: \"{user_feedback}\"\n"
+            f"Regenerate ONLY the JSON for this single scene, incorporating feedback. Maintain structure. 'key_action' max 15-20 words.")
+# create_visual_regeneration_prompt - REMAINS SAME
+def create_visual_regeneration_prompt(original_image_prompt_text, user_feedback_on_visuals, scene_data, character_definitions=None, global_style_reference=""):
+    scene_context_summary = (f"Scene: {scene_data.get('emotional_beat', '')}. Setting: {scene_data.get('setting_description', '')}. "
+                             f"Action: {scene_data.get('key_action', '')}. Characters: {', '.join(scene_data.get('characters_involved',[]))}.")
+    char_details_str = "Relevant characters: " + (", ".join([f"{name} ({desc})" for name, desc in character_definitions.items() if name.lower() in [cn.lower() for cn in scene_data.get('characters_involved',[])]])) if character_definitions else "None specified."
+    return (f"AI assistant for refining DALL-E 3 prompts. Original Scene Context: {scene_context_summary} {char_details_str} "
+            f"Global Style: \"{global_style_reference}\". Original DALL-E 3 prompt was: \"{original_image_prompt_text}\". "
+            f"User feedback on visual: \"{user_feedback_on_visuals}\". Generate a new, revised DALL-E 3 prompt. "
+            f"It must be ultra-detailed, photorealistic, cinematic, film/game quality. Translate feedback into concrete visual descriptions. "
+            f"Respect character descriptions. Output ONLY the new prompt string.")