Munaf1987 commited on
Commit
139ec19
·
verified ·
1 Parent(s): 542c2bc

Upload 7 files

Browse files
Files changed (7) hide show
  1. animate.py +18 -0
  2. app.py +46 -0
  3. compose.py +14 -0
  4. generate_image.py +17 -0
  5. requirements.txt +11 -0
  6. scene_planner.py +20 -0
  7. tts.py +16 -0
animate.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import ImageClip, AudioFileClip
2
+
3
+ def animate_scene(image_path, audio_path, idx):
4
+ audio = AudioFileClip(audio_path)
5
+ duration = audio.duration
6
+
7
+ image_clip = (
8
+ ImageClip(image_path)
9
+ .set_duration(duration)
10
+ .set_audio(audio)
11
+ .resize(height=720)
12
+ .fx(lambda clip: clip.crossfadein(1).crossfadeout(1))
13
+ .zoom_in(1.05)
14
+ )
15
+
16
+ out_path = f"assets/video/scene_{idx}.mp4"
17
+ image_clip.write_videofile(out_path, fps=24, audio_codec="aac", verbose=False, logger=None)
18
+ return out_path
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from scene_planner import plan_scenes
3
+ from generate_image import generate_scene_image
4
+ from tts import generate_audio
5
+ from animate import animate_scene
6
+ from compose import compose_video
7
+ import os
8
+ import shutil
9
+ import spaces
10
+
11
+ LANGUAGES = ["Gujarati", "Hindi", "English"]
12
+
13
+ @spaces.GPU
14
+ def full_pipeline(script, language):
15
+ scenes = plan_scenes(script)
16
+
17
+ if os.path.exists("assets"):
18
+ shutil.rmtree("assets")
19
+ os.makedirs("assets/images")
20
+ os.makedirs("assets/audio")
21
+ os.makedirs("assets/video")
22
+
23
+ video_segments = []
24
+ for idx, scene in enumerate(scenes):
25
+ prompt = scene['prompt']
26
+ line = scene['dialogue']
27
+
28
+ image_path = generate_scene_image(prompt, idx)
29
+ audio_path = generate_audio(line, idx, language)
30
+ video_path = animate_scene(image_path, audio_path, idx)
31
+ video_segments.append(video_path)
32
+
33
+ final_path = compose_video(video_segments)
34
+ return final_path
35
+
36
+ with gr.Blocks() as demo:
37
+ gr.Markdown("# 🧙‍♂️ Script to Cartoon Video Generator (Gujarati | Hindi | English)")
38
+ with gr.Row():
39
+ script_input = gr.Textbox(label="Enter Story or Script", lines=10)
40
+ lang_input = gr.Dropdown(choices=LANGUAGES, label="Select Narration Language")
41
+ gen_btn = gr.Button("Generate Cartoon Video")
42
+ output_video = gr.Video(label="Final Video")
43
+
44
+ gen_btn.click(full_pipeline, inputs=[script_input, lang_input], outputs=output_video)
45
+
46
+ demo.launch()
compose.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from moviepy.editor import concatenate_videoclips
2
+ import os
3
+
4
+ def compose_video(video_paths):
5
+ clips = []
6
+ for path in video_paths:
7
+ if os.path.exists(path):
8
+ from moviepy.editor import VideoFileClip
9
+ clips.append(VideoFileClip(path))
10
+
11
+ final = concatenate_videoclips(clips, method="compose")
12
+ out_path = "assets/final_video.mp4"
13
+ final.write_videofile(out_path, fps=24, audio_codec="aac", verbose=False, logger=None)
14
+ return out_path
generate_image.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import StableDiffusionXLPipeline
2
+ import torch
3
+ from PIL import Image
4
+ import os
5
+
6
+ pipe = StableDiffusionXLPipeline.from_pretrained(
7
+ "stabilityai/sdxl-turbo",
8
+ torch_dtype=torch.float16,
9
+ variant="fp16"
10
+ ).to("cuda" if torch.cuda.is_available() else "cpu")
11
+ pipe.safety_checker = None
12
+
13
+ def generate_scene_image(prompt, idx):
14
+ image = pipe(prompt).images[0]
15
+ out_path = f"assets/images/scene_{idx}.png"
16
+ image.save(out_path)
17
+ return out_path
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ accelerate
4
+ torch
5
+ pillow
6
+ moviepy
7
+ ffmpeg-python
8
+ diffusers
9
+ bark
10
+ pyttsx3
11
+ spaces
scene_planner.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ scene_splitter = pipeline("text2text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
4
+
5
+ def plan_scenes(script):
6
+ prompt = (
7
+ "Split the following story into 10-15 scenes. For each scene, return a JSON with 'prompt' "
8
+ "(scene image description) and 'dialogue' (narration to speak). Make the prompt cartoon-friendly.\nStory:\n" + script
9
+ )
10
+
11
+ response = scene_splitter(prompt, max_new_tokens=1024, do_sample=False)[0]['generated_text']
12
+
13
+ try:
14
+ import json
15
+ scenes = json.loads(response)
16
+ except:
17
+ scenes = [
18
+ {"prompt": line.strip(), "dialogue": line.strip()} for line in script.split(".") if line.strip()
19
+ ]
20
+ return scenes[:15]
tts.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from bark import generate_audio as bark_tts
3
+
4
+ def generate_audio(text, idx, language):
5
+ out_path = f"assets/audio/scene_{idx}.wav"
6
+ try:
7
+ audio = bark_tts(text, lang=language.lower())
8
+ with open(out_path, "wb") as f:
9
+ f.write(audio)
10
+ except:
11
+ import pyttsx3
12
+ engine = pyttsx3.init()
13
+ engine.setProperty('rate', 150)
14
+ engine.save_to_file(text, out_path)
15
+ engine.runAndWait()
16
+ return out_path