Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,145 Bytes
26557da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import torch
from data.video import save_video
from wan_loader import load_wan_pipe
from models.set_condition_branch import set_stand_in
from preprocessor import FaceProcessor
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--ip_image",
type=str,
default="test/input/first_frame.png",
help="Input face image path or URL",
)
parser.add_argument(
"--reference_video",
type=str,
default="test/input/pose.mp4",
help="reference_video path",
)
parser.add_argument(
"--reference_image",
default="test/input/first_frame.png",
type=str,
help="reference_video path",
)
parser.add_argument(
"--vace_scale",
type=float,
default=0.8,
help="Scaling factor for VACE.",
)
parser.add_argument(
"--prompt",
type=str,
default="一个女人举起双手",
help="Text prompt for video generation",
)
parser.add_argument(
"--output", type=str, default="test/output/woman.mp4", help="Output video file path"
)
parser.add_argument(
"--seed", type=int, default=0, help="Random seed for reproducibility"
)
parser.add_argument(
"--num_inference_steps", type=int, default=20, help="Number of inference steps"
)
parser.add_argument(
"--vace_path",
type=str,
default="checkpoints/VACE/",
help="Path to base model checkpoint",
)
parser.add_argument(
"--negative_prompt",
type=str,
default="色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走",
help="Negative prompt to avoid unwanted features",
)
parser.add_argument("--tiled", action="store_true", help="Enable tiled mode")
parser.add_argument(
"--fps", type=int, default=25, help="Frames per second for output video"
)
parser.add_argument(
"--quality", type=int, default=9, help="Output video quality (1-9)"
)
parser.add_argument(
"--stand_in_path",
type=str,
default="checkpoints/Stand-In/Stand-In_wan2.1_T2V_14B_ver1.0.ckpt",
help="Path to LoRA weights checkpoint",
)
parser.add_argument(
"--antelopv2_path",
type=str,
default="checkpoints/antelopev2",
help="Path to AntelopeV2 model checkpoint",
)
args = parser.parse_args()
face_processor = FaceProcessor(antelopv2_path=args.antelopv2_path)
ip_image = face_processor.process(args.ip_image)
pipe = load_wan_pipe(base_path=args.vace_path, use_vace=True, torch_dtype=torch.bfloat16)
set_stand_in(
pipe,
model_path=args.stand_in_path,
)
video = pipe(
prompt=args.prompt,
vace_video=args.reference_video,
vace_reference_image=args.reference_image,
negative_prompt=args.negative_prompt,
vace_scale=args.vace_scale,
seed=args.seed,
ip_image=ip_image,
num_inference_steps=args.num_inference_steps,
tiled=args.tiled,
)
save_video(video, args.output, fps=args.fps, quality=args.quality)
|