Spaces:

DEMONMO
/

het

Runtime error

App Files Files Community

DEMONMO commited on May 16

Commit

b9d776d

verified ·

1 Parent(s): 225c8e1

Create video_generator.py

Browse files

Files changed (1) hide show

video_generator.py +240 -0

video_generator.py ADDED Viewed

	@@ -0,0 +1,240 @@

+import torch
+from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
+from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
+from diffusers.utils import export_to_video
+pipe = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-dev", torch_dtype=torch.bfloat16)
+pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/ltxv-spatial-upscaler-0.9.7", vae=pipe.vae, torch_dtype=torch.bfloat16)
+pipe.to("cuda")
+pipe_upsample.to("cuda")
+pipe.vae.enable_tiling()
+prompt = "The video depicts a winding mountain road covered in snow, with a single vehicle traveling along it. The road is flanked by steep, rocky cliffs and sparse vegetation. The landscape is characterized by rugged terrain and a river visible in the distance. The scene captures the solitude and beauty of a winter drive through a mountainous region."
+negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted"
+expected_height, expected_width = 704, 512
+downscale_factor = 2 / 3
+num_frames = 121
+# Part 1. Generate video at smaller resolution
+downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
+latents = pipe(
+    conditions=None,
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    width=downscaled_width,
+    height=downscaled_height,
+    num_frames=num_frames,
+    num_inference_steps=30,
+    generator=torch.Generator().manual_seed(0),
+    output_type="latent",
+).frames
+# Part 2. Upscale generated video using latent upsampler with fewer inference steps
+# The available latent upsampler upscales the height/width by 2x
+upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
+upscaled_latents = pipe_upsample(
+    latents=latents,
+    output_type="latent"
+).frames
+# Part 3. Denoise the upscaled video with few steps to improve texture (optional, but recommended)
+video = pipe(
+    prompt=prompt,
+    negative_prompt=negative_prompt,
+    width=upscaled_width,
+    height=upscaled_height,
+    num_frames=num_frames,
+    denoise_strength=0.4,  # Effectively, 4 inference steps out of 10
+    num_inference_steps=10,
+    latents=upscaled_latents,
+    decode_timestep=0.05,
+    image_cond_noise_scale=0.025,
+    generator=torch.Generator().manual_seed(0),
+    output_type="pil",
+).frames[0]
+# Part 4. Downscale the video to the expected resolution
+video = [frame.resize((expected_width, expected_height)) for frame in video]
+export_to_video(video, "output.mp4", fps=24)
+import torch
+import gradio as gr
+from diffusers import LTXConditionPipeline, LTXLatentUpsamplePipeline
+from diffusers.pipelines.ltx.pipeline_ltx_condition import LTXVideoCondition
+from diffusers.utils import export_to_video
+def generate_video(
+    prompt,
+    negative_prompt,
+    expected_height,
+    expected_width,
+    downscale_factor,
+    num_frames,
+    num_inference_steps,
+    denoise_strength,
+    seed,
+    progress=gr.Progress()
+):
+    # Initialize pipelines (move this outside the function for production)
+    progress(0.1, desc="Loading models...")
+    pipe = LTXConditionPipeline.from_pretrained("Lightricks/LTX-Video-0.9.7-dev", torch_dtype=torch.bfloat16)
+    pipe_upsample = LTXLatentUpsamplePipeline.from_pretrained("Lightricks/ltxv-spatial-upscaler-0.9.7", vae=pipe.vae, torch_dtype=torch.bfloat16)
+    pipe.to("cuda")
+    pipe_upsample.to("cuda")
+    pipe.vae.enable_tiling()
+    # Part 1. Generate video at smaller resolution
+    progress(0.2, desc="Generating initial video...")
+    downscaled_height, downscaled_width = int(expected_height * downscale_factor), int(expected_width * downscale_factor)
+    generator = torch.Generator().manual_seed(seed)
+    latents = pipe(
+        conditions=None,
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        width=downscaled_width,
+        height=downscaled_height,
+        num_frames=num_frames,
+        num_inference_steps=num_inference_steps,
+        generator=generator,
+        output_type="latent",
+    ).frames
+    # Part 2. Upscale generated video
+    progress(0.5, desc="Upscaling video...")
+    upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
+    upscaled_latents = pipe_upsample(
+        latents=latents,
+        output_type="latent"
+    ).frames
+    # Part 3. Denoise the upscaled video
+    progress(0.7, desc="Refining video quality...")
+    video = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        width=upscaled_width,
+        height=upscaled_height,
+        num_frames=num_frames,
+        denoise_strength=denoise_strength,
+        num_inference_steps=10,
+        latents=upscaled_latents,
+        decode_timestep=0.05,
+        image_cond_noise_scale=0.025,
+        generator=generator,
+        output_type="pil",
+    ).frames[0]
+    # Part 4. Downscale the video to the expected resolution
+    progress(0.9, desc="Finalizing video...")
+    video = [frame.resize((expected_width, expected_height)) for frame in video]
+    # Save and return video
+    output_path = "output.mp4"
+    export_to_video(video, output_path, fps=24)
+    return output_path
+# Create Gradio interface
+with gr.Blocks(title="LTX Video Generator") as demo:
+    gr.Markdown("# LTX Video Generator")
+    gr.Markdown("Generate videos from text prompts using Lightricks' LTX model")
+    with gr.Row():
+        with gr.Column():
+            prompt = gr.Textbox(
+                label="Prompt",
+                value="The video depicts a winding mountain road covered in snow, with a single vehicle traveling along it. The road is flanked by steep, rocky cliffs and sparse vegetation. The landscape is characterized by rugged terrain and a river visible in the distance. The scene captures the solitude and beauty of a winter drive through a mountainous region.",
+                lines=4
+            )
+            negative_prompt = gr.Textbox(
+                label="Negative Prompt",
+                value="worst quality, inconsistent motion, blurry, jittery, distorted",
+                lines=2
+            )
+            with gr.Row():
+                expected_height = gr.Slider(
+                    label="Output Height",
+                    minimum=256,
+                    maximum=1024,
+                    step=64,
+                    value=704
+                )
+                expected_width = gr.Slider(
+                    label="Output Width",
+                    minimum=256,
+                    maximum=1024,
+                    step=64,
+                    value=512
+                )
+            with gr.Row():
+                downscale_factor = gr.Slider(
+                    label="Initial Downscale Factor",
+                    minimum=0.3,
+                    maximum=0.9,
+                    step=0.05,
+                    value=2/3
+                )
+                num_frames = gr.Slider(
+                    label="Number of Frames",
+                    minimum=24,
+                    maximum=240,
+                    step=1,
+                    value=121
+                )
+            with gr.Row():
+                num_inference_steps = gr.Slider(
+                    label="Inference Steps",
+                    minimum=10,
+                    maximum=50,
+                    step=1,
+                    value=30
+                )
+                denoise_strength = gr.Slider(
+                    label="Denoise Strength",
+                    minimum=0.1,
+                    maximum=0.9,
+                    step=0.05,
+                    value=0.4
+                )
+                seed = gr.Number(
+                    label="Seed",
+                    value=0,
+                    precision=0
+                )
+            submit_btn = gr.Button("Generate Video", variant="primary")
+        with gr.Column():
+            output_video = gr.Video(label="Generated Video")
+    submit_btn.click(
+        fn=generate_video,
+        inputs=[
+            prompt,
+            negative_prompt,
+            expected_height,
+            expected_width,
+            downscale_factor,
+            num_frames,
+            num_inference_steps,
+            denoise_strength,
+            seed
+        ],
+        outputs=output_video
+    )
+if __name__ == "__main__":
+    demo.launch()