Spaces:

alexnasa
/

OmniAvatar

Running on Zero

App Files Files Community

OmniAvatar / configs /inference.yaml

alexnasa

Upload 37 files

468a4ed verified about 1 month ago

raw

history blame contribute delete

2.12 kB

	# 预训练模型路径
	dtype: "bf16"
	text_encoder_path: pretrained_models/Wan2.1-T2V-14B/models_t5_umt5-xxl-enc-bf16.pth
	image_encoder_path: None
	dit_path: pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00001-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00002-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00003-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00004-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00005-of-00006.safetensors,pretrained_models/Wan2.1-T2V-14B/diffusion_pytorch_model-00006-of-00006.safetensors
	vae_path: pretrained_models/Wan2.1-T2V-14B/Wan2.1_VAE.pth
	wav2vec_path: pretrained_models/wav2vec2-base-960h
	exp_path: pretrained_models/OmniAvatar-14B
	num_persistent_param_in_dit: # You can set `num_persistent_param_in_dit` to a small number to reduce VRAM required.

	reload_cfg: True
	sp_size: 1

	# 数据参数
	seed: 42
	image_sizes_720: [[400, 720],
	[720, 720],
	[720, 400]]
	image_sizes_1280: [
	[720, 720],
	[528, 960],
	[960, 528],
	[720, 1280],
	[1280, 720]]
	max_hw: 720 # 720: 480p; 1280: 720p
	max_tokens: 30000
	seq_len: 200
	overlap_frame: 13 # must be 1 + 4*n
	guidance_scale: 4.5
	audio_scale:
	num_steps: 16
	fps: 25
	sample_rate: 16000
	negative_prompt: "Vivid color tones, background/camera moving quickly, screen switching, subtitles and special effects, mutation, overexposed, static, blurred details, subtitles, style, work, painting, image, still, overall grayish, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn face, deformed, disfigured, malformed limbs, fingers merging, motionless image, chaotic background, three legs, crowded background with many people, walking backward"
	silence_duration_s: 0.3
	use_fsdp: False
	tea_cache_l1_thresh: 0 # 0.14 The larger this value is, the faster the speed, but the worse the visual quality. TODO check value