Spaces:
Running
on
Zero
Running
on
Zero
# 预训练模型路径 | |
dtype: "bf16" | |
text_encoder_path: pretrained_models/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.pth | |
image_encoder_path: None | |
dit_path: pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00001-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00002-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00003-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00004-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00005-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00006-of-00006.safetensors | |
vae_path: pretrained_models/Wan2.1-T2V-14B/Wan2.1_VAE.pth | |
wav2vec_path: pretrained_models/wav2vec2-base-960h | |
exp_path: pretrained_models/OmniAvatar-14B | |
num_persistent_param_in_dit: # You can set `num_persistent_param_in_dit` to a small number to reduce VRAM required. | |
reload_cfg: True | |
sp_size: 1 | |
# 数据参数 | |
seed: 42 | |
image_sizes_720: [[400, 720], | |
[720, 720], | |
[720, 400]] | |
image_sizes_1280: [ | |
[720, 720], | |
[528, 960], | |
[960, 528], | |
[720, 1280], | |
[1280, 720]] | |
max_hw: 720 # 720: 480p; 1280: 720p | |
max_tokens: 30000 | |
seq_len: 200 | |
overlap_frame: 13 # must be 1 + 4*n | |
guidance_scale: 4.5 | |
audio_scale: | |
num_steps: 16 | |
fps: 25 | |
sample_rate: 16000 | |
negative_prompt: "Vivid color tones, background/camera moving quickly, screen switching, subtitles and special effects, mutation, overexposed, static, blurred details, subtitles, style, work, painting, image, still, overall grayish, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn face, deformed, disfigured, malformed limbs, fingers merging, motionless image, chaotic background, three legs, crowded background with many people, walking backward" | |
silence_duration_s: 0.3 | |
use_fsdp: False | |
tea_cache_l1_thresh: 0 # 0.14 The larger this value is, the faster the speed, but the worse the visual quality. TODO check value |