k-l-lambda's picture
commit more checkpoints.
fdb02ef
base_model_path: ./pretrained_weights/stable-diffusion-v1-5
checkpointing_steps: 2000
data:
crop_scale:
- 1
- 1
do_center_crop: false
meta_paths:
- /workspace/develop/video/data/ubc_tiktok-dropout0.03/ubc-meta.json
- /workspace/develop/video/data/tiktok-dance/good-meta.json
- /workspace/develop/video/data/20240321/meta.json
- /workspace/develop/video/data/20240327/meta.json
- /workspace/develop/video/data/20240506/meta.json
- /workspace/develop/video/data/20240509/meta.json
n_sample_frames: 24
ref_augment:
pan:
- 0.04
- 0.04
rotate: 2
scale:
- 0.9
- 1.2
sample_rate: 4
train_bs: 1
train_height: 960
train_width: 640
enable_zero_snr: true
exp_name: stage2-9k
image_encoder_path: ./pretrained_weights/sd-image-variations-diffusers/image_encoder
mm_path: ./pretrained_weights/mm_sd_v15_v2.ckpt
noise_offset: 0.05
noise_scheduler_kwargs:
beta_end: 0.012
beta_schedule: linear
beta_start: 0.00085
clip_sample: false
num_train_timesteps: 1000
steps_offset: 1
output_dir: /workspace/camus/train
resume_from_checkpoint: latest
save_dir: /workspace/camus/train/20240513-stage2-9k
save_model_epoch_interval: 1
seed: 12580
snr_gamma: 5.0
solver:
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
adam_weight_decay: 0.01
enable_xformers_memory_efficient_attention: true
gradient_accumulation_steps: 1
gradient_checkpointing: true
learning_rate: 1.0e-05
lr_scheduler: constant
lr_warmup_steps: 1
max_grad_norm: 1.0
max_train_steps: 160000
mixed_precision: fp16
scale_lr: false
use_8bit_adam: true
stage1_ckpt_dir: /workspace/camus/train/20240510-stage1-9k
stage1_ckpt_step: 75392
uncond_ratio: 0.1
vae_model_path: ./pretrained_weights/sd-vae-ft-mse
val:
validation_steps: 1000
validation:
metric:
generate_frame_range:
- 30
- 54
guidance_scale: 2.8
ref_frame: 29
seed: 42
steps: 30
videos:
- configs/inference/metric/oATCBbieJIB8u3QAMAUwvMi9ymEOIc1AoDOajA.mp4
- configs/inference/metric/oonQq0HjAC7ExkJlRSMBBs1q3EIiQgFveLD7fD.mp4
- configs/inference/metric/os0aLDIkagGgAfAFQsfICCWMuoL8jm3IgJ0Wey.mp4
- configs/inference/metric/oYflAvAyfAIFRf3yQDrLRDCWcEDoFENF9tBEgg.mp4
pose_range:
- 0
- 24
test_cases:
- - ./configs/inference/ref_images/anyone-3.png
- ./configs/inference/pose_videos/demo18.mp4
- - ./configs/inference/ref_images/anyone-3-partial.png
- ./configs/inference/pose_videos/demo6.mp4
- - ./configs/inference/ref_images/anyone-2.png
- ./configs/inference/pose_videos/demo11.mp4
- - ./configs/inference/ref_images/anyone-1.png
- ./configs/inference/pose_videos/demo11.mp4
- - ./configs/inference/ref_images/anyone-1.png
- ./configs/inference/pose_videos/demo6.mp4
- - ./configs/inference/ref_images/anyone-5.png
- ./configs/inference/pose_videos/demo11.mp4
- - ./configs/inference/ref_images/anyone-11.png
- ./configs/inference/pose_videos/demo15.mp4
uniform_along_time: false
weight_dtype: fp16