model_name: "silero_vad" | |
# spec | |
sample_rate: 8000 | |
nfft: 512 | |
win_size: 240 | |
hop_size: 80 | |
win_type: hann | |
# model | |
encoder_in_channels: 64 | |
encoder_hidden_channels: 128 | |
encoder_out_channels: 64 | |
encoder_kernel_size: 3 | |
encoder_num_layers: 3 | |
decoder_hidden_size: 64 | |
decoder_num_layers: 2 | |
# lsnr | |
n_frame: 3 | |
min_local_snr_db: -15 | |
max_local_snr_db: 30 | |
norm_tau: 1. | |
# data | |
min_snr_db: -10 | |
max_snr_db: 20 | |
# train | |
lr: 0.001 | |
lr_scheduler: "CosineAnnealingLR" | |
lr_scheduler_kwargs: | |
T_max: 250000 | |
eta_min: 0.0001 | |
max_epochs: 100 | |
clip_grad_norm: 10.0 | |
seed: 1234 | |
num_workers: 4 | |
batch_size: 128 | |
eval_steps: 25000 | |