Spaces:
Running
on
Zero
Running
on
Zero
project_root: "." | |
evaluation: | |
checkpoint_path: "" | |
test_set_path: "" | |
negative_style_prompt: ${project_root}/public/vocal.npy | |
num_samples: null | |
batch_size: 1 | |
random_crop_style: false | |
vae_type: 'diffrhythm' | |
num_style_secs: 30 | |
ignore_style: false | |
use_prompt_style: false | |
dataset: | |
pattern: "placeholder" | |
shuffle: false | |
resample_by_duration_threshold: null | |
always_crop_from_beginning: true | |
always_use_style_index: 0 | |
sample_kwargs: | |
batch_infer_num: 1 | |
cfg_range: | |
- 0.05 | |
- 1 | |
# fix_dual_cfg: true | |
dual_cfg: | |
- 4.7 | |
- 2.5 | |
steps: 50 | |
model: | |
num_channels: 64 | |
cfm: | |
max_frames: ${max_frames} | |
num_channels: ${model.num_channels} | |
dual_drop_prob: [0.1, 0.5] | |
no_edit: true | |
dit: | |
max_frames: ${max_frames} | |
mel_dim: ${model.num_channels} | |
dim: 1408 | |
depth: 16 | |
heads: 32 | |
ff_mult: 4 | |
text_dim: 512 | |
conv_layers: 4 | |
grad_ckpt: true | |
use_implicit_duration: true | |
data: | |
train_dataset: | |
max_frames: ${max_frames} | |
multiple_styles: true | |
sampling_rate: 44100 | |
shuffle: true | |
silence_latent_path: ${project_root}/public/silience_latent.pt | |
tokenizer_path: ${project_root}/public/en_us_cmudict_ipa_forward.pt | |
lrc_upsample_factor: ${lrc_upsample_factor} | |
filler: average_sparse | |
phonemizer_checkpoint: ${project_root}/public/en_us_cmudict_ipa_forward.pt | |
# General settings | |
max_frames: 5000 | |
lrc_upsample_factor: 4 | |
seed: 42 | |