Upload 7 files
Browse files- animate.py +18 -0
- app.py +46 -0
- compose.py +14 -0
- generate_image.py +17 -0
- requirements.txt +11 -0
- scene_planner.py +20 -0
- tts.py +16 -0
animate.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from moviepy.editor import ImageClip, AudioFileClip
|
2 |
+
|
3 |
+
def animate_scene(image_path, audio_path, idx):
|
4 |
+
audio = AudioFileClip(audio_path)
|
5 |
+
duration = audio.duration
|
6 |
+
|
7 |
+
image_clip = (
|
8 |
+
ImageClip(image_path)
|
9 |
+
.set_duration(duration)
|
10 |
+
.set_audio(audio)
|
11 |
+
.resize(height=720)
|
12 |
+
.fx(lambda clip: clip.crossfadein(1).crossfadeout(1))
|
13 |
+
.zoom_in(1.05)
|
14 |
+
)
|
15 |
+
|
16 |
+
out_path = f"assets/video/scene_{idx}.mp4"
|
17 |
+
image_clip.write_videofile(out_path, fps=24, audio_codec="aac", verbose=False, logger=None)
|
18 |
+
return out_path
|
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from scene_planner import plan_scenes
|
3 |
+
from generate_image import generate_scene_image
|
4 |
+
from tts import generate_audio
|
5 |
+
from animate import animate_scene
|
6 |
+
from compose import compose_video
|
7 |
+
import os
|
8 |
+
import shutil
|
9 |
+
import spaces
|
10 |
+
|
11 |
+
LANGUAGES = ["Gujarati", "Hindi", "English"]
|
12 |
+
|
13 |
+
@spaces.GPU
|
14 |
+
def full_pipeline(script, language):
|
15 |
+
scenes = plan_scenes(script)
|
16 |
+
|
17 |
+
if os.path.exists("assets"):
|
18 |
+
shutil.rmtree("assets")
|
19 |
+
os.makedirs("assets/images")
|
20 |
+
os.makedirs("assets/audio")
|
21 |
+
os.makedirs("assets/video")
|
22 |
+
|
23 |
+
video_segments = []
|
24 |
+
for idx, scene in enumerate(scenes):
|
25 |
+
prompt = scene['prompt']
|
26 |
+
line = scene['dialogue']
|
27 |
+
|
28 |
+
image_path = generate_scene_image(prompt, idx)
|
29 |
+
audio_path = generate_audio(line, idx, language)
|
30 |
+
video_path = animate_scene(image_path, audio_path, idx)
|
31 |
+
video_segments.append(video_path)
|
32 |
+
|
33 |
+
final_path = compose_video(video_segments)
|
34 |
+
return final_path
|
35 |
+
|
36 |
+
with gr.Blocks() as demo:
|
37 |
+
gr.Markdown("# 🧙♂️ Script to Cartoon Video Generator (Gujarati | Hindi | English)")
|
38 |
+
with gr.Row():
|
39 |
+
script_input = gr.Textbox(label="Enter Story or Script", lines=10)
|
40 |
+
lang_input = gr.Dropdown(choices=LANGUAGES, label="Select Narration Language")
|
41 |
+
gen_btn = gr.Button("Generate Cartoon Video")
|
42 |
+
output_video = gr.Video(label="Final Video")
|
43 |
+
|
44 |
+
gen_btn.click(full_pipeline, inputs=[script_input, lang_input], outputs=output_video)
|
45 |
+
|
46 |
+
demo.launch()
|
compose.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from moviepy.editor import concatenate_videoclips
|
2 |
+
import os
|
3 |
+
|
4 |
+
def compose_video(video_paths):
|
5 |
+
clips = []
|
6 |
+
for path in video_paths:
|
7 |
+
if os.path.exists(path):
|
8 |
+
from moviepy.editor import VideoFileClip
|
9 |
+
clips.append(VideoFileClip(path))
|
10 |
+
|
11 |
+
final = concatenate_videoclips(clips, method="compose")
|
12 |
+
out_path = "assets/final_video.mp4"
|
13 |
+
final.write_videofile(out_path, fps=24, audio_codec="aac", verbose=False, logger=None)
|
14 |
+
return out_path
|
generate_image.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from diffusers import StableDiffusionXLPipeline
|
2 |
+
import torch
|
3 |
+
from PIL import Image
|
4 |
+
import os
|
5 |
+
|
6 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(
|
7 |
+
"stabilityai/sdxl-turbo",
|
8 |
+
torch_dtype=torch.float16,
|
9 |
+
variant="fp16"
|
10 |
+
).to("cuda" if torch.cuda.is_available() else "cpu")
|
11 |
+
pipe.safety_checker = None
|
12 |
+
|
13 |
+
def generate_scene_image(prompt, idx):
|
14 |
+
image = pipe(prompt).images[0]
|
15 |
+
out_path = f"assets/images/scene_{idx}.png"
|
16 |
+
image.save(out_path)
|
17 |
+
return out_path
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
transformers
|
3 |
+
accelerate
|
4 |
+
torch
|
5 |
+
pillow
|
6 |
+
moviepy
|
7 |
+
ffmpeg-python
|
8 |
+
diffusers
|
9 |
+
bark
|
10 |
+
pyttsx3
|
11 |
+
spaces
|
scene_planner.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import pipeline
|
2 |
+
|
3 |
+
scene_splitter = pipeline("text2text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
|
4 |
+
|
5 |
+
def plan_scenes(script):
|
6 |
+
prompt = (
|
7 |
+
"Split the following story into 10-15 scenes. For each scene, return a JSON with 'prompt' "
|
8 |
+
"(scene image description) and 'dialogue' (narration to speak). Make the prompt cartoon-friendly.\nStory:\n" + script
|
9 |
+
)
|
10 |
+
|
11 |
+
response = scene_splitter(prompt, max_new_tokens=1024, do_sample=False)[0]['generated_text']
|
12 |
+
|
13 |
+
try:
|
14 |
+
import json
|
15 |
+
scenes = json.loads(response)
|
16 |
+
except:
|
17 |
+
scenes = [
|
18 |
+
{"prompt": line.strip(), "dialogue": line.strip()} for line in script.split(".") if line.strip()
|
19 |
+
]
|
20 |
+
return scenes[:15]
|
tts.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from bark import generate_audio as bark_tts
|
3 |
+
|
4 |
+
def generate_audio(text, idx, language):
|
5 |
+
out_path = f"assets/audio/scene_{idx}.wav"
|
6 |
+
try:
|
7 |
+
audio = bark_tts(text, lang=language.lower())
|
8 |
+
with open(out_path, "wb") as f:
|
9 |
+
f.write(audio)
|
10 |
+
except:
|
11 |
+
import pyttsx3
|
12 |
+
engine = pyttsx3.init()
|
13 |
+
engine.setProperty('rate', 150)
|
14 |
+
engine.save_to_file(text, out_path)
|
15 |
+
engine.runAndWait()
|
16 |
+
return out_path
|