Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -444,16 +444,41 @@ def process(
|
|
444 |
input_image, prompt, t2v=False, n_prompt="", seed=31337,
|
445 |
total_second_length=60, latent_window_size=9, steps=25,
|
446 |
cfg=1.0, gs=10.0, rs=0.0, gpu_memory_preservation=6,
|
447 |
-
use_teacache=True, mp4_crf=16
|
448 |
):
|
449 |
global stream
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
if t2v:
|
451 |
-
|
452 |
-
|
453 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
454 |
else:
|
|
|
455 |
if isinstance(input_image, dict) and "composite" in input_image:
|
456 |
-
# Handle uploaded image with alpha channel
|
457 |
composite_rgba_uint8 = input_image["composite"]
|
458 |
rgb_uint8 = composite_rgba_uint8[:, :, :3]
|
459 |
mask_uint8 = composite_rgba_uint8[:, :, 3]
|
@@ -467,7 +492,6 @@ def process(
|
|
467 |
elif input_image is None:
|
468 |
raise ValueError("Please provide an input image or enable Text to Video mode")
|
469 |
else:
|
470 |
-
# Handle regular RGB image
|
471 |
input_image = input_image.astype(np.uint8)
|
472 |
|
473 |
yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
|
@@ -668,10 +692,10 @@ with block:
|
|
668 |
with gr.Group():
|
669 |
total_second_length = gr.Slider(
|
670 |
label="Duration (Seconds)",
|
671 |
-
minimum=
|
672 |
-
maximum=
|
673 |
value=2,
|
674 |
-
step=
|
675 |
info='Length of generated video'
|
676 |
)
|
677 |
steps = gr.Slider(
|
@@ -687,16 +711,22 @@ with block:
|
|
687 |
minimum=1.0,
|
688 |
maximum=32.0,
|
689 |
value=10.0,
|
690 |
-
step=1,
|
691 |
info='8-12 recommended'
|
692 |
)
|
693 |
-
|
694 |
-
label="Video Quality",
|
695 |
-
|
696 |
-
|
697 |
-
|
698 |
-
|
699 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
700 |
)
|
701 |
|
702 |
gr.Markdown("### Advanced")
|
@@ -793,7 +823,8 @@ with block:
|
|
793 |
input_image, prompt, t2v, n_prompt, seed,
|
794 |
total_second_length, latent_window_size,
|
795 |
steps, cfg, gs, rs, gpu_memory_preservation,
|
796 |
-
use_teacache, mp4_crf
|
|
|
797 |
]
|
798 |
|
799 |
start_button.click(
|
@@ -816,4 +847,14 @@ with block:
|
|
816 |
queue=False
|
817 |
)
|
818 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
819 |
block.launch(share=True)
|
|
|
444 |
input_image, prompt, t2v=False, n_prompt="", seed=31337,
|
445 |
total_second_length=60, latent_window_size=9, steps=25,
|
446 |
cfg=1.0, gs=10.0, rs=0.0, gpu_memory_preservation=6,
|
447 |
+
use_teacache=True, mp4_crf=16, quality_radio="640x360", aspect_ratio="1:1"
|
448 |
):
|
449 |
global stream
|
450 |
+
quality_map = {
|
451 |
+
"360p": (640, 360),
|
452 |
+
"480p": (854, 480),
|
453 |
+
"540p": (960, 540),
|
454 |
+
"720p": (1280, 720),
|
455 |
+
"640x360": (640, 360), # fallback for default
|
456 |
+
}
|
457 |
+
# Aspect ratio map: (width, height)
|
458 |
+
aspect_map = {
|
459 |
+
"1:1": (1, 1),
|
460 |
+
"3:4": (3, 4),
|
461 |
+
"4:3": (4, 3),
|
462 |
+
"16:9": (16, 9),
|
463 |
+
"9:16": (9, 16),
|
464 |
+
}
|
465 |
+
selected_quality = quality_map.get(quality_radio, (640, 360))
|
466 |
+
base_width, base_height = selected_quality
|
467 |
+
|
468 |
if t2v:
|
469 |
+
# Use aspect ratio to determine final width/height
|
470 |
+
ar_w, ar_h = aspect_map.get(aspect_ratio, (1, 1))
|
471 |
+
if ar_w >= ar_h:
|
472 |
+
target_height = base_height
|
473 |
+
target_width = int(round(target_height * ar_w / ar_h))
|
474 |
+
else:
|
475 |
+
target_width = base_width
|
476 |
+
target_height = int(round(target_width * ar_h / ar_w))
|
477 |
+
input_image = np.ones((target_height, target_width, 3), dtype=np.uint8) * 255
|
478 |
+
print(f"Using blank white image for text-to-video mode, {target_width}x{target_height} ({aspect_ratio})")
|
479 |
else:
|
480 |
+
target_width, target_height = selected_quality
|
481 |
if isinstance(input_image, dict) and "composite" in input_image:
|
|
|
482 |
composite_rgba_uint8 = input_image["composite"]
|
483 |
rgb_uint8 = composite_rgba_uint8[:, :, :3]
|
484 |
mask_uint8 = composite_rgba_uint8[:, :, 3]
|
|
|
492 |
elif input_image is None:
|
493 |
raise ValueError("Please provide an input image or enable Text to Video mode")
|
494 |
else:
|
|
|
495 |
input_image = input_image.astype(np.uint8)
|
496 |
|
497 |
yield None, None, '', '', gr.update(interactive=False), gr.update(interactive=True)
|
|
|
692 |
with gr.Group():
|
693 |
total_second_length = gr.Slider(
|
694 |
label="Duration (Seconds)",
|
695 |
+
minimum=1,
|
696 |
+
maximum=10,
|
697 |
value=2,
|
698 |
+
step=1,
|
699 |
info='Length of generated video'
|
700 |
)
|
701 |
steps = gr.Slider(
|
|
|
711 |
minimum=1.0,
|
712 |
maximum=32.0,
|
713 |
value=10.0,
|
714 |
+
step=0.1,
|
715 |
info='8-12 recommended'
|
716 |
)
|
717 |
+
quality_radio = gr.Radio(
|
718 |
+
label="Video Quality (Resolution)",
|
719 |
+
choices=["360p", "480p", "540p", "720p"],
|
720 |
+
value="640x360",
|
721 |
+
info="Choose output video resolution"
|
722 |
+
)
|
723 |
+
# Aspect ratio dropdown, hidden by default
|
724 |
+
aspect_ratio = gr.Dropdown(
|
725 |
+
label="Aspect Ratio",
|
726 |
+
choices=["1:1", "3:4", "4:3", "16:9", "9:16"],
|
727 |
+
value="1:1",
|
728 |
+
visible=False,
|
729 |
+
info="Only applies to Text to Video mode"
|
730 |
)
|
731 |
|
732 |
gr.Markdown("### Advanced")
|
|
|
823 |
input_image, prompt, t2v, n_prompt, seed,
|
824 |
total_second_length, latent_window_size,
|
825 |
steps, cfg, gs, rs, gpu_memory_preservation,
|
826 |
+
use_teacache, 16, # mp4_crf default
|
827 |
+
quality_radio, aspect_ratio
|
828 |
]
|
829 |
|
830 |
start_button.click(
|
|
|
847 |
queue=False
|
848 |
)
|
849 |
|
850 |
+
# Show/hide aspect ratio dropdown based on t2v checkbox
|
851 |
+
def show_aspect_ratio(t2v_checked):
|
852 |
+
return gr.update(visible=bool(t2v_checked))
|
853 |
+
t2v.change(
|
854 |
+
fn=show_aspect_ratio,
|
855 |
+
inputs=[t2v],
|
856 |
+
outputs=[aspect_ratio],
|
857 |
+
queue=False
|
858 |
+
)
|
859 |
+
|
860 |
block.launch(share=True)
|