Spaces:

Curify-dev
/

prompt_to_video

Runtime error

App Files Files Community

qqwjq1981 commited on Jun 19

Commit

a5fc5ac

verified ·

1 Parent(s): 9715c8a

Upload 8 files

Browse files

Files changed (8) hide show

cinematic_planning.py +100 -0
evaluation.py +124 -0
generation.py +230 -0
main.py +169 -0
prompt_template_control.py +48 -0
requirements.txt +10 -0
selected_storyboards.json +6 -0
storyboard.py +85 -0

cinematic_planning.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import json
+import os
+from dotenv import load_dotenv
+from openai import OpenAI
+from generation import generate_video
+# Load env for OpenAI
+load_dotenv()
+client = OpenAI()
+def storyboard_to_pseudo_video(storyboard):
+    return {
+        "scene": storyboard["scene"],
+        "characters": [
+            {
+                "id": "main",
+                "emoji": "👧",
+                "action": "walk",
+                "path": "left_to_right",
+                "emotion": storyboard["emotion"]
+            }
+        ],
+        "duration_sec": 5,
+        "camera": storyboard["shot_type"]
+    }
+# Generate natural language transition description
+def generate_transition_description(previous_state, next_state, i):
+    # You can replace this with GPT for smarter descriptions
+    return f"Transition {i+1}: The character continues to walk through the {next_state['scene']} with a {next_state['characters'][0]['emotion']} expression."
+# Convert pseudo-video spec to text prompt
+def pseudo_video_to_prompt(pseudo_video):
+    scene = pseudo_video["scene"]
+    emotion = pseudo_video["characters"][0]["emotion"]
+    camera = pseudo_video["camera"]
+    action = pseudo_video["characters"][0]["action"]
+    path = pseudo_video["characters"][0]["path"]
+    duration = pseudo_video["duration_sec"]
+    prompt = (
+        f"Create a {duration}-second video showing a {emotion} scene in a {scene}. "
+        f"A character (represented by emoji) performs the action '{action}' across the screen from {path.replace('_', ' ')}. "
+        f"Use a {camera} to capture the atmosphere."
+    )
+    return prompt
+# Iterative Process
+def build_scene_sequence(storyboard, model_id, num_keyframes=12):
+    pseudo_video = storyboard_to_pseudo_video(storyboard)
+    print("Pseudo-Video Spec:\n", json.dumps(pseudo_video, indent=2))
+    previous_state = pseudo_video
+    scene_sequence = []
+    for i in range(num_keyframes):
+        # 1️⃣ Generate transition text
+        transition_text = generate_transition_description(previous_state, pseudo_video, i)
+        # 2️⃣ Generate video prompt
+        video_prompt = pseudo_video_to_prompt(pseudo_video)
+        # 3️⃣ Generate video clip
+        video_path = generate_video(video_prompt, model_id)
+        # 4️⃣ Save this step
+        scene_sequence.append({
+            "transition_text": transition_text,
+            "prompt": video_prompt,
+            "video_path": video_path
+        })
+        # Optional: Update pseudo_video for next iteration if needed
+        # Example: character moves deeper, emotion changes, etc.
+    return scene_sequence
+if __name__ == "__main__":
+    os.makedirs("output", exist_ok=True)
+    # Example storyboard
+    storyboard = {
+        "scene": "misty forest",
+        "shot_type": "wide shot",
+        "emotion": "mysterious"
+    }
+    scene_sequence = build_scene_sequence(storyboard, model_id="Veo-2", num_keyframes=3)
+    print("\n--- Final Scene Sequence ---")
+    for i, step in enumerate(scene_sequence):
+        print(f"\nKeyframe {i+1}:")
+        print("Transition:", step["transition_text"])
+        print("Video:", step["video_path"])

evaluation.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import json
+import numpy as np
+import cv2
+from dotenv import load_dotenv
+from openai import OpenAI
+from transformers import CLIPProcessor, CLIPModel
+from PIL import Image
+from skimage.metrics import structural_similarity as ssim
+# Load env for OpenAI
+load_dotenv()
+client = OpenAI()
+clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+def evaluate_with_gpt4(storyboard, video_description):
+    system_prompt = (
+        "You are a film critic evaluating how well a video matches a storyboard.\n"
+        "Rate each of the following from 1 to 10:\n"
+        "- Story Consistency: Does the video follow the scene and emotion described?\n"
+        "- Shot Variety: Does it use interesting or varied camera angles?\n"
+        "- Relevance: Does it suit the intended purpose (role, setting, emotion)?\n\n"
+        "Provide scores and brief justifications for each.\n\n"
+        "Format output as:\n"
+        "{\n"
+        "  \"story_consistency\": <score>,\n"
+        "  \"shot_variety\": <score>,\n"
+        "  \"relevance\": <score>,\n"
+        "  \"justification\": \"...\"\n"
+        "}"
+    )
+    user_prompt = (
+        f"Storyboard:\n"
+        f"Scene: {storyboard['scene']}\n"
+        f"Shot: {storyboard['shot_type']}\n"
+        f"Emotion: {storyboard['emotion']}\n\n"
+        f"Video Description:\n{video_description}"
+    )
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        temperature=0.3,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ]
+    )
+    content = response.choices[0].message.content.strip()
+    return json.loads(content)
+def compute_clip_similarity(image_path, text_prompt):
+    image = Image.open(image_path).convert("RGB")
+    inputs = clip_processor(text=[text_prompt], images=image, return_tensors="pt", padding=True)
+    outputs = clip_model(**inputs)
+    logits_per_image = outputs.logits_per_image
+    similarity = logits_per_image.softmax(dim=1).item()
+    return similarity
+def compute_motion_score(video_path):
+    cap = cv2.VideoCapture(video_path)
+    prev_gray = None
+    motion_values = []
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        if prev_gray is not None:
+            flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
+                                                 0.5, 3, 15, 3, 5, 1.2, 0)
+            magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
+            motion_values.append(np.mean(magnitude))
+        prev_gray = gray
+    cap.release()
+    return np.mean(motion_values) if motion_values else 0
+def compute_temporal_coherence(video_path):
+    cap = cv2.VideoCapture(video_path)
+    prev_frame = None
+    ssim_scores = []
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        if prev_frame is not None:
+            score = ssim(prev_frame, gray)
+            ssim_scores.append(score)
+        prev_frame = gray
+    cap.release()
+    return np.mean(ssim_scores) if ssim_scores else 0
+def evaluate_video(storyboard, video_description, video_path, thumbnail_path, text_prompt):
+    gpt_eval = evaluate_with_gpt4(storyboard, video_description)
+    clip_score = compute_clip_similarity(thumbnail_path, text_prompt)
+    motion_score = compute_motion_score(video_path)
+    coherence_score = compute_temporal_coherence(video_path)
+    return {
+        "gpt_eval": gpt_eval,
+        "metrics": {
+            "clip_similarity": clip_score,
+            "motion_score": motion_score,
+            "temporal_coherence": coherence_score
+        }
+    }

generation.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import uuid
+import torch
+from diffusers.utils import export_to_video
+from diffusers import AutoencoderKLWan, WanPipeline
+from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
+import os
+import time
+import requests
+import json
+from PIL import Image as PIL_Image
+from google import genai
+from google.genai import types
+from google.cloud import aiplatform
+from google.cloud import storage
+import matplotlib.pyplot as plt
+import mediapy as media
+def wan_text_to_video(prompt, negative_prompt):
+    # Available models: Wan-AI/Wan2.1-T2V-14B-Diffusers, Wan-AI/Wan2.1-T2V-1.3B-Diffusers
+    # model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
+    model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
+    vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
+    flow_shift = 5.0  # 5.0 for 720P, 3.0 for 480P
+    scheduler = UniPCMultistepScheduler(prediction_type='flow_prediction', use_flow_sigmas=True, num_train_timesteps=1000, flow_shift=flow_shift)
+    pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
+    pipe.scheduler = scheduler
+    pipe.to("cpu")
+    prompt = ("A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the "
+              "dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through "
+              "the window.")
+    negative_prompt = ("Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, "
+                       "images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, "
+                       "incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, "
+                       "misshapen limbs, fused fingers, still picture, messy background, three legs, many people in "
+                       "the background, walking backwards")
+    output = pipe(
+         prompt=prompt,
+         negative_prompt=negative_prompt,
+         height=720,
+         width=1280,
+         num_frames=81,
+         guidance_scale=5.0,
+        ).frames[0]
+    export_to_video(output, "output.mp4", fps=16)
+    return "output.mp4"
+def gcp_veo(prompt: str = "a cat reading a book"):
+    PROJECT_ID = "gcp-credit-applying-to-g-suite"
+    LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")
+    BUCKET_NAME = "dante-test-123456-output"
+    OUTPUT_GCS_PATH = f"gs://{BUCKET_NAME}/videos/output_{int(time.time())}.mp4"
+    # Initialize Vertex AI
+    aiplatform.init(project=PROJECT_ID, location=LOCATION)
+    # Initialize Generative AI client
+    client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)
+    # Video Generation Pipeline
+    video_model = "veo-2.0-generate-001"
+    # video_model = "veo-3.0-generate-preview"
+    aspect_ratio = "16:9"
+    operation = client.models.generate_videos(
+        model=video_model,
+        prompt=prompt,
+        config=types.GenerateVideosConfig(
+            aspect_ratio=aspect_ratio,
+            output_gcs_uri=OUTPUT_GCS_PATH,
+            number_of_videos=1,
+            duration_seconds=5,
+            person_generation="allow_adult",
+            enhance_prompt=True,
+        ),
+    )
+    # Poll until operation is complete
+    print("Generating video...")
+    while not operation.done:
+        time.sleep(15)
+        operation = client.operations.get(operation)
+        print(f"Operation status: {operation}")
+    # Error Handling
+    if operation.error:
+        raise Exception(f"Video generation failed: {operation.error}")
+    # Get the generated video URI
+    if operation.response and operation.result.generated_videos:
+        video_uri = operation.result.generated_videos[0].video.uri
+        print(f"Video generated at: {video_uri}")
+        # Download the video from GCS to local
+        storage_client = storage.Client(project=PROJECT_ID)
+        bucket = storage_client.bucket(BUCKET_NAME)
+        blob_name = video_uri.replace(f"gs://{BUCKET_NAME}/", "")
+        blob = bucket.blob(blob_name)
+        local_output_path = f"output/sample-{uuid.uuid1()}.mp4"
+        # Ensure local directory exists
+        os.makedirs(os.path.dirname(local_output_path), exist_ok=True)
+        # Download the video
+        blob.download_to_filename(local_output_path)
+        print(f"Video downloaded to: {local_output_path}")
+        # Delete the file from GCS
+        blob.delete()
+        print(f"Video deleted from GCS: {video_uri}")
+        return local_output_path
+    else:
+        raise Exception("No video generated or response is empty")
+def hailuo_text_to_video(
+        prompt: str,
+        model: str = "T2V-01-Director",
+        output_file_name: str = "output.mp4",
+        api_key: str = ""
+) -> str:
+    def invoke_video_generation()->str:
+        print("-----------------Submit video generation task-----------------")
+        url = "https://api.minimaxi.chat/v1/video_generation"
+        payload = json.dumps({
+          "prompt": prompt,
+          "model": model
+        })
+        headers = {
+          'authorization': 'Bearer ' + api_key,
+          'content-type': 'application/json',
+        }
+        response = requests.request("POST", url, headers=headers, data=payload)
+        print(response.text)
+        task_id = response.json()['task_id']
+        print("Video generation task submitted successfully, task ID.："+task_id)
+        return task_id
+    def query_video_generation(task_id: str):
+        url = "https://api.minimaxi.chat/v1/query/video_generation?task_id="+task_id
+        headers = {
+          'authorization': 'Bearer ' + api_key
+        }
+        response = requests.request("GET", url, headers=headers)
+        status = response.json()['status']
+        if status == 'Preparing':
+            print("...Preparing...")
+            return "", 'Preparing'
+        elif status == 'Queueing':
+            print("...In the queue...")
+            return "", 'Queueing'
+        elif status == 'Processing':
+            print("...Generating...")
+            return "", 'Processing'
+        elif status == 'Success':
+            return response.json()['file_id'], "Finished"
+        elif status == 'Fail':
+            return "", "Fail"
+        else:
+            return "", "Unknown"
+    def fetch_video_result(file_id: str):
+        print("---------------Video generated successfully, downloading now---------------")
+        url = "https://api.minimaxi.chat/v1/files/retrieve?file_id="+file_id
+        headers = {
+            'authorization': 'Bearer '+api_key,
+        }
+        response = requests.request("GET", url, headers=headers)
+        print(response.text)
+        download_url = response.json()['file']['download_url']
+        print("Video download link：" + download_url)
+        with open(output_file_name, 'wb') as f:
+            f.write(requests.get(download_url).content)
+        print("THe video has been downloaded in："+os.getcwd()+'/'+output_file_name)
+    task_id = invoke_video_generation()
+    print("-----------------Video generation task submitted -----------------")
+    while True:
+        time.sleep(10)
+        file_id, status = query_video_generation(task_id)
+        if file_id != "":
+            fetch_video_result(file_id)
+            print("---------------Successful---------------")
+            break
+        elif status == "Fail" or status == "Unknown":
+            print("---------------Failed---------------")
+            break
+    return os.getcwd()+'/'+output_file_name
+def generate_video(prompt, model_id, negative_prompt=None):
+    video_path = None
+    if model_id == "Wan2.1":
+        video_path = wan_text_to_video(prompt, negative_prompt)
+    elif model_id == "SkyReels-V2":
+        raise ValueError("SkyReels-V2 model not yet implemented.")
+    elif model_id == "Veo-2":
+        video_path = gcp_veo(prompt)
+    elif model_id == "T2V-01-Director":
+        video_path = hailuo_text_to_video(prompt)
+    return video_path
+# Only available for cuda / cpu
+# wan_text_to_video()
+# if __name__ == "__main__":
+#     try:
+#         local_path = gcp_veo_3(
+#             prompt="a cat reading a book",
+#             local_output_path="output/cat_reading_book.mp4"
+#         )
+#         print(f"Success! Video saved at: {local_path}")
+#     except Exception as e:
+#         print(f"Error: {e}")

main.py ADDED Viewed

	@@ -0,0 +1,169 @@

+from typing import Dict
+import gradio as gr
+import json
+from cinematic_planning import build_scene_sequence
+from generation import generate_video
+from prompt_template_control import generate_video_prompt_with_template
+from storyboard import generate_multiple_storyboards
+def save_storyboard_choice(choice: Dict[str, str]):
+    # Save the full dictionary as JSON (append mode)
+    with open("selected_storyboards.json", "a") as f:
+        f.write(json.dumps(choice) + "\n")
+    return f"✅ Saved your selection to selected_storyboards.json:\n\n{json.dumps(choice, indent=2)}"
+# Connect button
+def run_pseudo_video_workflow(scene, shot_type, emotion, model_choice, num_keyframes):
+    # Build storyboard dict
+    storyboard = {
+        "scene": scene,
+        "shot_type": shot_type,
+        "emotion": emotion
+    }
+    # Call your iterative builder
+    scene_sequence = build_scene_sequence(
+        storyboard, model_choice, num_keyframes=num_keyframes
+    )
+    # Format result as text
+    result_text = ""
+    for i, step in enumerate(scene_sequence):
+        result_text += f"\nKeyframe {i + 1}:\n"
+        result_text += f"Transition: {step['transition_text']}\n"
+        result_text += f"Video Path: {step['video_path']}\n"
+    return result_text
+if __name__ == "__main__":
+    with gr.Blocks() as demo:
+        gr.Markdown("# 🎥 Video Generator")
+        # Video Generator Interface
+        with gr.Row():
+            with gr.Column():
+                video_prompt = gr.Textbox(label="Enter your video prompt")
+                negative_prompt = gr.Textbox(label="Enter your negative prompt (optional: Wan2.1 Only)")
+                model_choice = gr.Radio(
+                    choices=["SkyReels-V2", "Wan2.1", "Veo-2", "T2V-01-Director"],
+                    label="Choose the video generation model"
+                )
+                generate_btn = gr.Button("Generate Video")
+            with gr.Column():
+                video_output = gr.Video(label="Generated Video")
+        generate_btn.click(
+            generate_video,
+            inputs=[video_prompt, model_choice, negative_prompt],
+            outputs=video_output
+        )
+        # Divider
+        gr.Markdown("---")
+        # Narrative to Storyboard interface
+        gr.Markdown("# 🎬 Narrative to Storyboard Grounding")
+        narrative_input = gr.Textbox(label="Enter your narrative")
+        generate_storyboards_btn = gr.Button("Generate 5 Storyboards")
+        storyboards_output = gr.Radio(
+            choices=[],
+            label="Select your preferred storyboard"
+        )
+        save_choice_btn = gr.Button("Save Selection")
+        save_output = gr.Textbox(label="Save Output", interactive=False)
+        # Generate the storyboards
+        def update_storyboards(narrative):
+            cards = generate_multiple_storyboards(narrative)
+            return gr.update(choices=cards)
+        generate_storyboards_btn.click(
+            update_storyboards,
+            inputs=narrative_input,
+            outputs=storyboards_output
+        )
+        # Save the choice
+        save_choice_btn.click(
+            save_storyboard_choice,
+            inputs=storyboards_output,
+            outputs=save_output
+        )
+        gr.Markdown("---")
+        # Prompt Injection + Template Control
+        gr.Markdown("# 🎥 Prompt Injection + Template Control (LLM + T2V)")
+        # Modular controls
+        role_input = gr.Textbox(label="Role", placeholder="e.g., Product demo")
+        setting_input = gr.Textbox(label="Setting", placeholder="e.g., Urban bar")
+        emotion_input = gr.Textbox(label="Emotion", placeholder="e.g., Energetic")
+        shot_input = gr.Textbox(label="Shot Type", placeholder="e.g., Front-facing")
+        duration_input = gr.Textbox(label="Duration", placeholder="e.g., 5s loop")
+        # Model selection
+        model_choice = gr.Radio(
+            choices=["SkyReels-V2", "Veo-2", "Runway", "T2V-01-Director"],
+            label="Choose video generation model"
+        )
+        # Generate final natural language prompt
+        generate_prompt_btn = gr.Button("Generate Final Prompt")
+        final_prompt_output = gr.Textbox(label="Final Video Prompt", interactive=False)
+        # Generate video
+        generate_video_btn = gr.Button("Generate Video")
+        video_output = gr.Video(label="Generated Video")
+        # Connect callbacks
+        generate_prompt_btn.click(
+            generate_video_prompt_with_template,
+            inputs=[role_input, setting_input, emotion_input, shot_input, duration_input],
+            outputs=final_prompt_output
+        )
+        generate_video_btn.click(
+            generate_video,
+            inputs=[final_prompt_output, model_choice, negative_prompt],
+            outputs=video_output
+        )
+        gr.Markdown("# 🎞️ Pseudo Video Workflow (Storyboard → Scene Builder)")
+        # Storyboard inputs
+        pseudo_scene_input = gr.Textbox(label="Scene", placeholder="e.g., Misty forest")
+        pseudo_shot_input = gr.Textbox(label="Shot Type", placeholder="e.g., Wide shot")
+        pseudo_emotion_input = gr.Textbox(label="Emotion", placeholder="e.g., Mysterious")
+        pseudo_model_choice = gr.Radio(
+            choices=["SkyReels-V2", "Wan2.1", "Veo-2", "T2V-01-Director"],
+            label="Choose video generation model"
+        )
+        num_keyframes_input = gr.Slider(minimum=1, maximum=20, value=12, label="Number of Keyframes")
+        run_pseudo_video_btn = gr.Button("Build Pseudo Video Workflow")
+        pseudo_output = gr.Textbox(label="Workflow Result", lines=10)
+        # Hook to Gradio button
+        run_pseudo_video_btn.click(
+            run_pseudo_video_workflow,
+            inputs=[
+                pseudo_scene_input,
+                pseudo_shot_input,
+                pseudo_emotion_input,
+                pseudo_model_choice,
+                num_keyframes_input
+            ],
+            outputs=pseudo_output
+        )
+    demo.launch()

prompt_template_control.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from openai import OpenAI
+from dotenv import load_dotenv
+# Env variable
+load_dotenv()
+# Initialize OpenAI client
+client = OpenAI()
+def generate_video_prompt_with_template(role: str, setting: str, emotion: str, shot: str, duration: str) -> str:
+    system_prompt = (
+        "You are a video director who converts structured metadata into detailed, natural language video prompts.\n"
+        "Here are examples:\n\n"
+        "Example 1:\n"
+        "- Role: Product demo\n"
+        "- Setting: Urban bar\n"
+        "- Emotion: Energetic\n"
+        "- Shot: Front-facing, 5s loop\n"
+        "Output: \"Create a short 5-second video of a product demo in an energetic tone. "
+        "The scene takes place in an urban bar setting, using a front-facing camera to capture the vibrant atmosphere.\"\n\n"
+        "Example 2:\n"
+        "- Role: Storytelling\n"
+        "- Setting: Forest, misty\n"
+        "- Emotion: Mysterious\n"
+        "- Shot: Wide shot\n"
+        "Output: \"Create a video showing a mysterious scene in a misty forest. Use a wide shot to capture the atmosphere and suspense.\"\n\n"
+        "Now, create a natural language video prompt for the following:\n"
+    )
+    user_prompt = (
+        f"- Role: {role}\n"
+        f"- Setting: {setting}\n"
+        f"- Emotion: {emotion}\n"
+        f"- Shot: {shot}, {duration}\n"
+    )
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ],
+        temperature=0.3
+    )
+    final_prompt = response.choices[0].message.content.strip()
+    return final_prompt

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch
+diffusers
+transformers
+ftfy==6.3.1
+gradio
+google-genai
+mediapy
+google-cloud-aiplatform
+openai
+dotenv

selected_storyboards.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "scene": "A dense, shadowy forest shrouded in mist. The trees are tall and imposing, their branches intertwining overhead, creating a canopy that blocks out the moonlight. The ground is covered in a thick layer of fallen leaves, and the air is filled with the sound of distant rustling and the occasional hoot of an owl.",
+  "shot_type": "Wide shot",
+  "emotion": "Eerie and mysterious",
+  "version": 4
+}

storyboard.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import json
+import re
+from typing import List, Dict
+from dotenv import load_dotenv
+from typing import Dict
+from openai import OpenAI
+# Env variable
+load_dotenv()
+# Initialize OpenAI client
+client = OpenAI()
+def narrative_to_storyboard(narrative: str) -> Dict[str, str]:
+    """
+    Converts a narrative prompt into a structured storyboard dict
+    with scene, shot type, and emotion using an LLM.
+    """
+    system_prompt = (
+        "You are a professional storyboard artist and cinematographer. "
+        "Given a narrative, extract and describe:\n"
+        "- scene: The environment and visual setting\n"
+        "- shot_type: The camera angle or framing (e.g., wide shot, close-up)\n"
+        "- emotion: The overall mood or emotional tone\n\n"
+        "Return the result as a JSON dictionary with keys: scene, shot_type, emotion."
+    )
+    user_prompt = f"Narrative: {narrative}"
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt}
+        ],
+        temperature=0.3
+    )
+    content = response.choices[0].message.content.strip()
+    # Use regex to extract JSON block
+    json_match = re.search(r"\{[\s\S]*\}", content)
+    if json_match:
+        json_str = json_match.group(0)
+        try:
+            parsed_json = json.loads(json_str)
+            return parsed_json
+        except json.JSONDecodeError as e:
+            print(f"JSON decode error: {e}")
+            return {"scene": "", "shot_type": "", "emotion": ""}
+    else:
+        print("No JSON block found in response.")
+        return {"scene": "", "shot_type": "", "emotion": ""}
+def generate_multiple_storyboards(narrative: str, num_versions: int = 5) -> List[Dict[str, str]]:
+    """
+    Generate multiple storyboards for the same narrative by calling the LLM-based
+    narrative_to_storyboard() function multiple times with slight variations.
+    """
+    storyboards = []
+    for i in range(num_versions):
+        # Add variation to the narrative to encourage different outputs
+        variant_narrative = f"{narrative}\nPlease provide a different creative version #{i+1}."
+        storyboard = narrative_to_storyboard(variant_narrative)
+        storyboard['version'] = i + 1  # Track version number
+        storyboards.append(storyboard)
+    return storyboards
+if __name__ == "__main__":
+    print("Testing Narrative to Storyboard...")
+    narrative_text = "A girl walks into a dark forest on a misty night."
+    # storyboard_output = narrative_to_storyboard(narrative_text)
+    # print(storyboard_output)
+    print("Generate 5 Storyboard based on the narrative...")
+    storyboard_list = generate_multiple_storyboards(narrative_text)
+    for i, sb in enumerate(storyboard_list):
+        print(f"Version {i + 1}: {sb}")