qqwjq1981 commited on
Commit
a5fc5ac
·
verified ·
1 Parent(s): 9715c8a

Upload 8 files

Browse files
cinematic_planning.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from openai import OpenAI
5
+
6
+ from generation import generate_video
7
+
8
+ # Load env for OpenAI
9
+ load_dotenv()
10
+ client = OpenAI()
11
+
12
+
13
+ def storyboard_to_pseudo_video(storyboard):
14
+ return {
15
+ "scene": storyboard["scene"],
16
+ "characters": [
17
+ {
18
+ "id": "main",
19
+ "emoji": "👧",
20
+ "action": "walk",
21
+ "path": "left_to_right",
22
+ "emotion": storyboard["emotion"]
23
+ }
24
+ ],
25
+ "duration_sec": 5,
26
+ "camera": storyboard["shot_type"]
27
+ }
28
+
29
+
30
+ # Generate natural language transition description
31
+ def generate_transition_description(previous_state, next_state, i):
32
+ # You can replace this with GPT for smarter descriptions
33
+ return f"Transition {i+1}: The character continues to walk through the {next_state['scene']} with a {next_state['characters'][0]['emotion']} expression."
34
+
35
+
36
+ # Convert pseudo-video spec to text prompt
37
+ def pseudo_video_to_prompt(pseudo_video):
38
+ scene = pseudo_video["scene"]
39
+ emotion = pseudo_video["characters"][0]["emotion"]
40
+ camera = pseudo_video["camera"]
41
+ action = pseudo_video["characters"][0]["action"]
42
+ path = pseudo_video["characters"][0]["path"]
43
+ duration = pseudo_video["duration_sec"]
44
+
45
+ prompt = (
46
+ f"Create a {duration}-second video showing a {emotion} scene in a {scene}. "
47
+ f"A character (represented by emoji) performs the action '{action}' across the screen from {path.replace('_', ' ')}. "
48
+ f"Use a {camera} to capture the atmosphere."
49
+ )
50
+ return prompt
51
+
52
+
53
+ # Iterative Process
54
+ def build_scene_sequence(storyboard, model_id, num_keyframes=12):
55
+ pseudo_video = storyboard_to_pseudo_video(storyboard)
56
+ print("Pseudo-Video Spec:\n", json.dumps(pseudo_video, indent=2))
57
+
58
+ previous_state = pseudo_video
59
+ scene_sequence = []
60
+
61
+ for i in range(num_keyframes):
62
+ # 1️⃣ Generate transition text
63
+ transition_text = generate_transition_description(previous_state, pseudo_video, i)
64
+
65
+ # 2️⃣ Generate video prompt
66
+ video_prompt = pseudo_video_to_prompt(pseudo_video)
67
+
68
+ # 3️⃣ Generate video clip
69
+ video_path = generate_video(video_prompt, model_id)
70
+
71
+ # 4️⃣ Save this step
72
+ scene_sequence.append({
73
+ "transition_text": transition_text,
74
+ "prompt": video_prompt,
75
+ "video_path": video_path
76
+ })
77
+
78
+ # Optional: Update pseudo_video for next iteration if needed
79
+ # Example: character moves deeper, emotion changes, etc.
80
+
81
+ return scene_sequence
82
+
83
+
84
+ if __name__ == "__main__":
85
+ os.makedirs("output", exist_ok=True)
86
+
87
+ # Example storyboard
88
+ storyboard = {
89
+ "scene": "misty forest",
90
+ "shot_type": "wide shot",
91
+ "emotion": "mysterious"
92
+ }
93
+
94
+ scene_sequence = build_scene_sequence(storyboard, model_id="Veo-2", num_keyframes=3)
95
+
96
+ print("\n--- Final Scene Sequence ---")
97
+ for i, step in enumerate(scene_sequence):
98
+ print(f"\nKeyframe {i+1}:")
99
+ print("Transition:", step["transition_text"])
100
+ print("Video:", step["video_path"])
evaluation.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ import cv2
4
+
5
+ from dotenv import load_dotenv
6
+ from openai import OpenAI
7
+ from transformers import CLIPProcessor, CLIPModel
8
+ from PIL import Image
9
+ from skimage.metrics import structural_similarity as ssim
10
+
11
+
12
+ # Load env for OpenAI
13
+ load_dotenv()
14
+ client = OpenAI()
15
+
16
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
17
+ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
18
+
19
+
20
+ def evaluate_with_gpt4(storyboard, video_description):
21
+ system_prompt = (
22
+ "You are a film critic evaluating how well a video matches a storyboard.\n"
23
+ "Rate each of the following from 1 to 10:\n"
24
+ "- Story Consistency: Does the video follow the scene and emotion described?\n"
25
+ "- Shot Variety: Does it use interesting or varied camera angles?\n"
26
+ "- Relevance: Does it suit the intended purpose (role, setting, emotion)?\n\n"
27
+ "Provide scores and brief justifications for each.\n\n"
28
+ "Format output as:\n"
29
+ "{\n"
30
+ " \"story_consistency\": <score>,\n"
31
+ " \"shot_variety\": <score>,\n"
32
+ " \"relevance\": <score>,\n"
33
+ " \"justification\": \"...\"\n"
34
+ "}"
35
+ )
36
+
37
+ user_prompt = (
38
+ f"Storyboard:\n"
39
+ f"Scene: {storyboard['scene']}\n"
40
+ f"Shot: {storyboard['shot_type']}\n"
41
+ f"Emotion: {storyboard['emotion']}\n\n"
42
+ f"Video Description:\n{video_description}"
43
+ )
44
+
45
+ response = client.chat.completions.create(
46
+ model="gpt-4o",
47
+ temperature=0.3,
48
+ messages=[
49
+ {"role": "system", "content": system_prompt},
50
+ {"role": "user", "content": user_prompt}
51
+ ]
52
+ )
53
+
54
+ content = response.choices[0].message.content.strip()
55
+ return json.loads(content)
56
+
57
+
58
+ def compute_clip_similarity(image_path, text_prompt):
59
+ image = Image.open(image_path).convert("RGB")
60
+ inputs = clip_processor(text=[text_prompt], images=image, return_tensors="pt", padding=True)
61
+ outputs = clip_model(**inputs)
62
+ logits_per_image = outputs.logits_per_image
63
+ similarity = logits_per_image.softmax(dim=1).item()
64
+ return similarity
65
+
66
+
67
+ def compute_motion_score(video_path):
68
+ cap = cv2.VideoCapture(video_path)
69
+ prev_gray = None
70
+ motion_values = []
71
+
72
+ while cap.isOpened():
73
+ ret, frame = cap.read()
74
+ if not ret:
75
+ break
76
+
77
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
78
+ if prev_gray is not None:
79
+ flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None,
80
+ 0.5, 3, 15, 3, 5, 1.2, 0)
81
+ magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
82
+ motion_values.append(np.mean(magnitude))
83
+
84
+ prev_gray = gray
85
+
86
+ cap.release()
87
+ return np.mean(motion_values) if motion_values else 0
88
+
89
+
90
+ def compute_temporal_coherence(video_path):
91
+ cap = cv2.VideoCapture(video_path)
92
+ prev_frame = None
93
+ ssim_scores = []
94
+
95
+ while cap.isOpened():
96
+ ret, frame = cap.read()
97
+ if not ret:
98
+ break
99
+
100
+ gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
101
+ if prev_frame is not None:
102
+ score = ssim(prev_frame, gray)
103
+ ssim_scores.append(score)
104
+
105
+ prev_frame = gray
106
+
107
+ cap.release()
108
+ return np.mean(ssim_scores) if ssim_scores else 0
109
+
110
+
111
+ def evaluate_video(storyboard, video_description, video_path, thumbnail_path, text_prompt):
112
+ gpt_eval = evaluate_with_gpt4(storyboard, video_description)
113
+ clip_score = compute_clip_similarity(thumbnail_path, text_prompt)
114
+ motion_score = compute_motion_score(video_path)
115
+ coherence_score = compute_temporal_coherence(video_path)
116
+
117
+ return {
118
+ "gpt_eval": gpt_eval,
119
+ "metrics": {
120
+ "clip_similarity": clip_score,
121
+ "motion_score": motion_score,
122
+ "temporal_coherence": coherence_score
123
+ }
124
+ }
generation.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+
3
+ import torch
4
+ from diffusers.utils import export_to_video
5
+ from diffusers import AutoencoderKLWan, WanPipeline
6
+ from diffusers.schedulers.scheduling_unipc_multistep import UniPCMultistepScheduler
7
+
8
+ import os
9
+ import time
10
+ import requests
11
+ import json
12
+
13
+ from PIL import Image as PIL_Image
14
+ from google import genai
15
+ from google.genai import types
16
+ from google.cloud import aiplatform
17
+ from google.cloud import storage
18
+ import matplotlib.pyplot as plt
19
+ import mediapy as media
20
+
21
+
22
+ def wan_text_to_video(prompt, negative_prompt):
23
+ # Available models: Wan-AI/Wan2.1-T2V-14B-Diffusers, Wan-AI/Wan2.1-T2V-1.3B-Diffusers
24
+ # model_id = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
25
+ model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
26
+ vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
27
+ flow_shift = 5.0 # 5.0 for 720P, 3.0 for 480P
28
+ scheduler = UniPCMultistepScheduler(prediction_type='flow_prediction', use_flow_sigmas=True, num_train_timesteps=1000, flow_shift=flow_shift)
29
+ pipe = WanPipeline.from_pretrained(model_id, vae=vae, torch_dtype=torch.bfloat16)
30
+ pipe.scheduler = scheduler
31
+ pipe.to("cpu")
32
+
33
+ prompt = ("A cat and a dog baking a cake together in a kitchen. The cat is carefully measuring flour, while the "
34
+ "dog is stirring the batter with a wooden spoon. The kitchen is cozy, with sunlight streaming through "
35
+ "the window.")
36
+ negative_prompt = ("Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, "
37
+ "images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, "
38
+ "incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, "
39
+ "misshapen limbs, fused fingers, still picture, messy background, three legs, many people in "
40
+ "the background, walking backwards")
41
+
42
+ output = pipe(
43
+ prompt=prompt,
44
+ negative_prompt=negative_prompt,
45
+ height=720,
46
+ width=1280,
47
+ num_frames=81,
48
+ guidance_scale=5.0,
49
+ ).frames[0]
50
+ export_to_video(output, "output.mp4", fps=16)
51
+
52
+ return "output.mp4"
53
+
54
+
55
+ def gcp_veo(prompt: str = "a cat reading a book"):
56
+ PROJECT_ID = "gcp-credit-applying-to-g-suite"
57
+ LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")
58
+ BUCKET_NAME = "dante-test-123456-output"
59
+ OUTPUT_GCS_PATH = f"gs://{BUCKET_NAME}/videos/output_{int(time.time())}.mp4"
60
+
61
+ # Initialize Vertex AI
62
+ aiplatform.init(project=PROJECT_ID, location=LOCATION)
63
+
64
+ # Initialize Generative AI client
65
+ client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)
66
+
67
+ # Video Generation Pipeline
68
+ video_model = "veo-2.0-generate-001"
69
+ # video_model = "veo-3.0-generate-preview"
70
+ aspect_ratio = "16:9"
71
+
72
+ operation = client.models.generate_videos(
73
+ model=video_model,
74
+ prompt=prompt,
75
+ config=types.GenerateVideosConfig(
76
+ aspect_ratio=aspect_ratio,
77
+ output_gcs_uri=OUTPUT_GCS_PATH,
78
+ number_of_videos=1,
79
+ duration_seconds=5,
80
+ person_generation="allow_adult",
81
+ enhance_prompt=True,
82
+ ),
83
+ )
84
+
85
+ # Poll until operation is complete
86
+ print("Generating video...")
87
+ while not operation.done:
88
+ time.sleep(15)
89
+ operation = client.operations.get(operation)
90
+ print(f"Operation status: {operation}")
91
+
92
+ # Error Handling
93
+ if operation.error:
94
+ raise Exception(f"Video generation failed: {operation.error}")
95
+
96
+ # Get the generated video URI
97
+ if operation.response and operation.result.generated_videos:
98
+ video_uri = operation.result.generated_videos[0].video.uri
99
+ print(f"Video generated at: {video_uri}")
100
+
101
+ # Download the video from GCS to local
102
+ storage_client = storage.Client(project=PROJECT_ID)
103
+ bucket = storage_client.bucket(BUCKET_NAME)
104
+ blob_name = video_uri.replace(f"gs://{BUCKET_NAME}/", "")
105
+ blob = bucket.blob(blob_name)
106
+
107
+ local_output_path = f"output/sample-{uuid.uuid1()}.mp4"
108
+
109
+ # Ensure local directory exists
110
+ os.makedirs(os.path.dirname(local_output_path), exist_ok=True)
111
+
112
+ # Download the video
113
+ blob.download_to_filename(local_output_path)
114
+ print(f"Video downloaded to: {local_output_path}")
115
+
116
+ # Delete the file from GCS
117
+ blob.delete()
118
+ print(f"Video deleted from GCS: {video_uri}")
119
+
120
+ return local_output_path
121
+ else:
122
+ raise Exception("No video generated or response is empty")
123
+
124
+
125
+ def hailuo_text_to_video(
126
+ prompt: str,
127
+ model: str = "T2V-01-Director",
128
+ output_file_name: str = "output.mp4",
129
+ api_key: str = ""
130
+ ) -> str:
131
+ def invoke_video_generation()->str:
132
+ print("-----------------Submit video generation task-----------------")
133
+ url = "https://api.minimaxi.chat/v1/video_generation"
134
+ payload = json.dumps({
135
+ "prompt": prompt,
136
+ "model": model
137
+ })
138
+ headers = {
139
+ 'authorization': 'Bearer ' + api_key,
140
+ 'content-type': 'application/json',
141
+ }
142
+
143
+ response = requests.request("POST", url, headers=headers, data=payload)
144
+ print(response.text)
145
+ task_id = response.json()['task_id']
146
+ print("Video generation task submitted successfully, task ID.:"+task_id)
147
+ return task_id
148
+
149
+ def query_video_generation(task_id: str):
150
+ url = "https://api.minimaxi.chat/v1/query/video_generation?task_id="+task_id
151
+ headers = {
152
+ 'authorization': 'Bearer ' + api_key
153
+ }
154
+ response = requests.request("GET", url, headers=headers)
155
+ status = response.json()['status']
156
+ if status == 'Preparing':
157
+ print("...Preparing...")
158
+ return "", 'Preparing'
159
+ elif status == 'Queueing':
160
+ print("...In the queue...")
161
+ return "", 'Queueing'
162
+ elif status == 'Processing':
163
+ print("...Generating...")
164
+ return "", 'Processing'
165
+ elif status == 'Success':
166
+ return response.json()['file_id'], "Finished"
167
+ elif status == 'Fail':
168
+ return "", "Fail"
169
+ else:
170
+ return "", "Unknown"
171
+
172
+
173
+ def fetch_video_result(file_id: str):
174
+ print("---------------Video generated successfully, downloading now---------------")
175
+ url = "https://api.minimaxi.chat/v1/files/retrieve?file_id="+file_id
176
+ headers = {
177
+ 'authorization': 'Bearer '+api_key,
178
+ }
179
+
180
+ response = requests.request("GET", url, headers=headers)
181
+ print(response.text)
182
+
183
+ download_url = response.json()['file']['download_url']
184
+ print("Video download link:" + download_url)
185
+ with open(output_file_name, 'wb') as f:
186
+ f.write(requests.get(download_url).content)
187
+ print("THe video has been downloaded in:"+os.getcwd()+'/'+output_file_name)
188
+
189
+
190
+ task_id = invoke_video_generation()
191
+ print("-----------------Video generation task submitted -----------------")
192
+ while True:
193
+ time.sleep(10)
194
+
195
+ file_id, status = query_video_generation(task_id)
196
+ if file_id != "":
197
+ fetch_video_result(file_id)
198
+ print("---------------Successful---------------")
199
+ break
200
+ elif status == "Fail" or status == "Unknown":
201
+ print("---------------Failed---------------")
202
+ break
203
+
204
+ return os.getcwd()+'/'+output_file_name
205
+
206
+ def generate_video(prompt, model_id, negative_prompt=None):
207
+ video_path = None
208
+ if model_id == "Wan2.1":
209
+ video_path = wan_text_to_video(prompt, negative_prompt)
210
+ elif model_id == "SkyReels-V2":
211
+ raise ValueError("SkyReels-V2 model not yet implemented.")
212
+ elif model_id == "Veo-2":
213
+ video_path = gcp_veo(prompt)
214
+ elif model_id == "T2V-01-Director":
215
+ video_path = hailuo_text_to_video(prompt)
216
+ return video_path
217
+
218
+ # Only available for cuda / cpu
219
+ # wan_text_to_video()
220
+
221
+
222
+ # if __name__ == "__main__":
223
+ # try:
224
+ # local_path = gcp_veo_3(
225
+ # prompt="a cat reading a book",
226
+ # local_output_path="output/cat_reading_book.mp4"
227
+ # )
228
+ # print(f"Success! Video saved at: {local_path}")
229
+ # except Exception as e:
230
+ # print(f"Error: {e}")
main.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+
3
+ import gradio as gr
4
+ import json
5
+
6
+ from cinematic_planning import build_scene_sequence
7
+ from generation import generate_video
8
+ from prompt_template_control import generate_video_prompt_with_template
9
+ from storyboard import generate_multiple_storyboards
10
+
11
+
12
+ def save_storyboard_choice(choice: Dict[str, str]):
13
+ # Save the full dictionary as JSON (append mode)
14
+ with open("selected_storyboards.json", "a") as f:
15
+ f.write(json.dumps(choice) + "\n")
16
+ return f"✅ Saved your selection to selected_storyboards.json:\n\n{json.dumps(choice, indent=2)}"
17
+
18
+
19
+ # Connect button
20
+ def run_pseudo_video_workflow(scene, shot_type, emotion, model_choice, num_keyframes):
21
+ # Build storyboard dict
22
+ storyboard = {
23
+ "scene": scene,
24
+ "shot_type": shot_type,
25
+ "emotion": emotion
26
+ }
27
+
28
+ # Call your iterative builder
29
+ scene_sequence = build_scene_sequence(
30
+ storyboard, model_choice, num_keyframes=num_keyframes
31
+ )
32
+
33
+ # Format result as text
34
+ result_text = ""
35
+ for i, step in enumerate(scene_sequence):
36
+ result_text += f"\nKeyframe {i + 1}:\n"
37
+ result_text += f"Transition: {step['transition_text']}\n"
38
+ result_text += f"Video Path: {step['video_path']}\n"
39
+
40
+ return result_text
41
+
42
+
43
+ if __name__ == "__main__":
44
+ with gr.Blocks() as demo:
45
+ gr.Markdown("# 🎥 Video Generator")
46
+
47
+ # Video Generator Interface
48
+ with gr.Row():
49
+ with gr.Column():
50
+ video_prompt = gr.Textbox(label="Enter your video prompt")
51
+ negative_prompt = gr.Textbox(label="Enter your negative prompt (optional: Wan2.1 Only)")
52
+ model_choice = gr.Radio(
53
+ choices=["SkyReels-V2", "Wan2.1", "Veo-2", "T2V-01-Director"],
54
+ label="Choose the video generation model"
55
+ )
56
+ generate_btn = gr.Button("Generate Video")
57
+ with gr.Column():
58
+ video_output = gr.Video(label="Generated Video")
59
+
60
+ generate_btn.click(
61
+ generate_video,
62
+ inputs=[video_prompt, model_choice, negative_prompt],
63
+ outputs=video_output
64
+ )
65
+
66
+ # Divider
67
+ gr.Markdown("---")
68
+
69
+ # Narrative to Storyboard interface
70
+ gr.Markdown("# 🎬 Narrative to Storyboard Grounding")
71
+ narrative_input = gr.Textbox(label="Enter your narrative")
72
+ generate_storyboards_btn = gr.Button("Generate 5 Storyboards")
73
+ storyboards_output = gr.Radio(
74
+ choices=[],
75
+ label="Select your preferred storyboard"
76
+ )
77
+ save_choice_btn = gr.Button("Save Selection")
78
+ save_output = gr.Textbox(label="Save Output", interactive=False)
79
+
80
+ # Generate the storyboards
81
+ def update_storyboards(narrative):
82
+ cards = generate_multiple_storyboards(narrative)
83
+ return gr.update(choices=cards)
84
+
85
+
86
+ generate_storyboards_btn.click(
87
+ update_storyboards,
88
+ inputs=narrative_input,
89
+ outputs=storyboards_output
90
+ )
91
+
92
+ # Save the choice
93
+ save_choice_btn.click(
94
+ save_storyboard_choice,
95
+ inputs=storyboards_output,
96
+ outputs=save_output
97
+ )
98
+
99
+ gr.Markdown("---")
100
+
101
+ # Prompt Injection + Template Control
102
+ gr.Markdown("# 🎥 Prompt Injection + Template Control (LLM + T2V)")
103
+
104
+ # Modular controls
105
+ role_input = gr.Textbox(label="Role", placeholder="e.g., Product demo")
106
+ setting_input = gr.Textbox(label="Setting", placeholder="e.g., Urban bar")
107
+ emotion_input = gr.Textbox(label="Emotion", placeholder="e.g., Energetic")
108
+ shot_input = gr.Textbox(label="Shot Type", placeholder="e.g., Front-facing")
109
+ duration_input = gr.Textbox(label="Duration", placeholder="e.g., 5s loop")
110
+
111
+ # Model selection
112
+ model_choice = gr.Radio(
113
+ choices=["SkyReels-V2", "Veo-2", "Runway", "T2V-01-Director"],
114
+ label="Choose video generation model"
115
+ )
116
+
117
+ # Generate final natural language prompt
118
+ generate_prompt_btn = gr.Button("Generate Final Prompt")
119
+ final_prompt_output = gr.Textbox(label="Final Video Prompt", interactive=False)
120
+
121
+ # Generate video
122
+ generate_video_btn = gr.Button("Generate Video")
123
+ video_output = gr.Video(label="Generated Video")
124
+
125
+ # Connect callbacks
126
+ generate_prompt_btn.click(
127
+ generate_video_prompt_with_template,
128
+ inputs=[role_input, setting_input, emotion_input, shot_input, duration_input],
129
+ outputs=final_prompt_output
130
+ )
131
+
132
+ generate_video_btn.click(
133
+ generate_video,
134
+ inputs=[final_prompt_output, model_choice, negative_prompt],
135
+ outputs=video_output
136
+ )
137
+
138
+ gr.Markdown("# 🎞️ Pseudo Video Workflow (Storyboard → Scene Builder)")
139
+
140
+ # Storyboard inputs
141
+ pseudo_scene_input = gr.Textbox(label="Scene", placeholder="e.g., Misty forest")
142
+ pseudo_shot_input = gr.Textbox(label="Shot Type", placeholder="e.g., Wide shot")
143
+ pseudo_emotion_input = gr.Textbox(label="Emotion", placeholder="e.g., Mysterious")
144
+
145
+ pseudo_model_choice = gr.Radio(
146
+ choices=["SkyReels-V2", "Wan2.1", "Veo-2", "T2V-01-Director"],
147
+ label="Choose video generation model"
148
+ )
149
+
150
+ num_keyframes_input = gr.Slider(minimum=1, maximum=20, value=12, label="Number of Keyframes")
151
+
152
+ run_pseudo_video_btn = gr.Button("Build Pseudo Video Workflow")
153
+
154
+ pseudo_output = gr.Textbox(label="Workflow Result", lines=10)
155
+
156
+ # Hook to Gradio button
157
+ run_pseudo_video_btn.click(
158
+ run_pseudo_video_workflow,
159
+ inputs=[
160
+ pseudo_scene_input,
161
+ pseudo_shot_input,
162
+ pseudo_emotion_input,
163
+ pseudo_model_choice,
164
+ num_keyframes_input
165
+ ],
166
+ outputs=pseudo_output
167
+ )
168
+
169
+ demo.launch()
prompt_template_control.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ from dotenv import load_dotenv
3
+
4
+ # Env variable
5
+ load_dotenv()
6
+
7
+ # Initialize OpenAI client
8
+ client = OpenAI()
9
+
10
+
11
+ def generate_video_prompt_with_template(role: str, setting: str, emotion: str, shot: str, duration: str) -> str:
12
+ system_prompt = (
13
+ "You are a video director who converts structured metadata into detailed, natural language video prompts.\n"
14
+ "Here are examples:\n\n"
15
+ "Example 1:\n"
16
+ "- Role: Product demo\n"
17
+ "- Setting: Urban bar\n"
18
+ "- Emotion: Energetic\n"
19
+ "- Shot: Front-facing, 5s loop\n"
20
+ "Output: \"Create a short 5-second video of a product demo in an energetic tone. "
21
+ "The scene takes place in an urban bar setting, using a front-facing camera to capture the vibrant atmosphere.\"\n\n"
22
+ "Example 2:\n"
23
+ "- Role: Storytelling\n"
24
+ "- Setting: Forest, misty\n"
25
+ "- Emotion: Mysterious\n"
26
+ "- Shot: Wide shot\n"
27
+ "Output: \"Create a video showing a mysterious scene in a misty forest. Use a wide shot to capture the atmosphere and suspense.\"\n\n"
28
+ "Now, create a natural language video prompt for the following:\n"
29
+ )
30
+
31
+ user_prompt = (
32
+ f"- Role: {role}\n"
33
+ f"- Setting: {setting}\n"
34
+ f"- Emotion: {emotion}\n"
35
+ f"- Shot: {shot}, {duration}\n"
36
+ )
37
+
38
+ response = client.chat.completions.create(
39
+ model="gpt-4o",
40
+ messages=[
41
+ {"role": "system", "content": system_prompt},
42
+ {"role": "user", "content": user_prompt}
43
+ ],
44
+ temperature=0.3
45
+ )
46
+
47
+ final_prompt = response.choices[0].message.content.strip()
48
+ return final_prompt
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ diffusers
3
+ transformers
4
+ ftfy==6.3.1
5
+ gradio
6
+ google-genai
7
+ mediapy
8
+ google-cloud-aiplatform
9
+ openai
10
+ dotenv
selected_storyboards.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "scene": "A dense, shadowy forest shrouded in mist. The trees are tall and imposing, their branches intertwining overhead, creating a canopy that blocks out the moonlight. The ground is covered in a thick layer of fallen leaves, and the air is filled with the sound of distant rustling and the occasional hoot of an owl.",
3
+ "shot_type": "Wide shot",
4
+ "emotion": "Eerie and mysterious",
5
+ "version": 4
6
+ }
storyboard.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ from typing import List, Dict
4
+
5
+ from dotenv import load_dotenv
6
+ from typing import Dict
7
+ from openai import OpenAI
8
+
9
+
10
+ # Env variable
11
+ load_dotenv()
12
+
13
+ # Initialize OpenAI client
14
+ client = OpenAI()
15
+
16
+
17
+ def narrative_to_storyboard(narrative: str) -> Dict[str, str]:
18
+ """
19
+ Converts a narrative prompt into a structured storyboard dict
20
+ with scene, shot type, and emotion using an LLM.
21
+ """
22
+ system_prompt = (
23
+ "You are a professional storyboard artist and cinematographer. "
24
+ "Given a narrative, extract and describe:\n"
25
+ "- scene: The environment and visual setting\n"
26
+ "- shot_type: The camera angle or framing (e.g., wide shot, close-up)\n"
27
+ "- emotion: The overall mood or emotional tone\n\n"
28
+ "Return the result as a JSON dictionary with keys: scene, shot_type, emotion."
29
+ )
30
+
31
+ user_prompt = f"Narrative: {narrative}"
32
+
33
+ response = client.chat.completions.create(
34
+ model="gpt-4o",
35
+ messages=[
36
+ {"role": "system", "content": system_prompt},
37
+ {"role": "user", "content": user_prompt}
38
+ ],
39
+ temperature=0.3
40
+ )
41
+
42
+ content = response.choices[0].message.content.strip()
43
+
44
+ # Use regex to extract JSON block
45
+ json_match = re.search(r"\{[\s\S]*\}", content)
46
+ if json_match:
47
+ json_str = json_match.group(0)
48
+ try:
49
+ parsed_json = json.loads(json_str)
50
+ return parsed_json
51
+ except json.JSONDecodeError as e:
52
+ print(f"JSON decode error: {e}")
53
+ return {"scene": "", "shot_type": "", "emotion": ""}
54
+ else:
55
+ print("No JSON block found in response.")
56
+ return {"scene": "", "shot_type": "", "emotion": ""}
57
+
58
+
59
+ def generate_multiple_storyboards(narrative: str, num_versions: int = 5) -> List[Dict[str, str]]:
60
+ """
61
+ Generate multiple storyboards for the same narrative by calling the LLM-based
62
+ narrative_to_storyboard() function multiple times with slight variations.
63
+ """
64
+ storyboards = []
65
+
66
+ for i in range(num_versions):
67
+ # Add variation to the narrative to encourage different outputs
68
+ variant_narrative = f"{narrative}\nPlease provide a different creative version #{i+1}."
69
+ storyboard = narrative_to_storyboard(variant_narrative)
70
+ storyboard['version'] = i + 1 # Track version number
71
+ storyboards.append(storyboard)
72
+
73
+ return storyboards
74
+
75
+
76
+ if __name__ == "__main__":
77
+ print("Testing Narrative to Storyboard...")
78
+ narrative_text = "A girl walks into a dark forest on a misty night."
79
+ # storyboard_output = narrative_to_storyboard(narrative_text)
80
+ # print(storyboard_output)
81
+
82
+ print("Generate 5 Storyboard based on the narrative...")
83
+ storyboard_list = generate_multiple_storyboards(narrative_text)
84
+ for i, sb in enumerate(storyboard_list):
85
+ print(f"Version {i + 1}: {sb}")