|
#!/usr/bin/env bash |
|
|
|
: <<'END' |
|
|
|
bash run.sh --stage 2 --stop_stage 2 --system_version centos \ |
|
--file_folder_name cnn-vad-by-webrtcvad-nx-dns3 \ |
|
--final_model_name cnn-vad-by-webrtcvad-nx-dns3 \ |
|
--noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \ |
|
--speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \ |
|
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav" |
|
|
|
bash run.sh --stage 3 --stop_stage 3 --system_version centos \ |
|
--file_folder_name cnn-vad-by-webrtcvad-nx-dns3 \ |
|
--final_model_name cnn-vad-by-webrtcvad-nx-dns3 \ |
|
--noise_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav" \ |
|
--speech_patterns "/data/tianxing/HuggingDatasets/nx_noise/data/speech/dns3-speech/**/*.wav \ |
|
/data/tianxing/HuggingDatasets/nx_noise/data/speech/nx-speech2/**/*.wav" |
|
|
|
|
|
END |
|
|
|
|
|
|
|
system_version="windows"; |
|
verbose=true; |
|
stage=0 |
|
stop_stage=9 |
|
|
|
work_dir="$(pwd)" |
|
file_folder_name=file_folder_name |
|
final_model_name=final_model_name |
|
config_file="yaml/config.yaml" |
|
limit=10 |
|
|
|
noise_patterns=/data/tianxing/HuggingDatasets/nx_noise/data/noise/**/*.wav |
|
speech_patterns=/data/tianxing/HuggingDatasets/nx_noise/data/speech/**/*.wav |
|
|
|
max_count=-1 |
|
|
|
nohup_name=nohup.out |
|
|
|
|
|
batch_size=64 |
|
max_epochs=200 |
|
save_top_k=10 |
|
patience=5 |
|
|
|
|
|
|
|
while true; do |
|
[ -z "${1:-}" ] && break; |
|
case "$1" in |
|
--*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g); |
|
eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1; |
|
old_value="(eval echo \\$$name)"; |
|
if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then |
|
was_bool=true; |
|
else |
|
was_bool=false; |
|
fi |
|
|
|
|
|
|
|
eval "${name}=\"$2\""; |
|
|
|
|
|
if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then |
|
echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2 |
|
exit 1; |
|
fi |
|
shift 2; |
|
;; |
|
|
|
*) break; |
|
esac |
|
done |
|
|
|
file_dir="${work_dir}/${file_folder_name}" |
|
final_model_dir="${work_dir}/../../trained_models/${final_model_name}"; |
|
evaluation_audio_dir="${file_dir}/evaluation_audio" |
|
|
|
train_dataset="${file_dir}/train.jsonl" |
|
valid_dataset="${file_dir}/valid.jsonl" |
|
|
|
train_vad_dataset="${file_dir}/train-vad.jsonl" |
|
valid_vad_dataset="${file_dir}/valid-vad.jsonl" |
|
|
|
$verbose && echo "system_version: ${system_version}" |
|
$verbose && echo "file_folder_name: ${file_folder_name}" |
|
|
|
if [ $system_version == "windows" ]; then |
|
alias python3='D:/Users/tianx/PycharmProjects/virtualenv/nx_denoise/Scripts/python.exe' |
|
elif [ $system_version == "centos" ] || [ $system_version == "ubuntu" ]; then |
|
|
|
alias python3='/data/local/bin/nx_denoise/bin/python3' |
|
fi |
|
|
|
|
|
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then |
|
$verbose && echo "stage 1: prepare data" |
|
cd "${work_dir}" || exit 1 |
|
python3 step_1_prepare_data.py \ |
|
--noise_patterns "${noise_patterns}" \ |
|
--speech_patterns "${speech_patterns}" \ |
|
--train_dataset "${train_dataset}" \ |
|
--valid_dataset "${valid_dataset}" \ |
|
--max_count "${max_count}" \ |
|
|
|
fi |
|
|
|
|
|
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then |
|
$verbose && echo "stage 2: make vad segments" |
|
cd "${work_dir}" || exit 1 |
|
python3 step_2_make_vad_segments.py \ |
|
--train_dataset "${train_dataset}" \ |
|
--valid_dataset "${valid_dataset}" \ |
|
--train_vad_dataset "${train_vad_dataset}" \ |
|
--valid_vad_dataset "${valid_vad_dataset}" \ |
|
|
|
fi |
|
|
|
|
|
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then |
|
$verbose && echo "stage 3: train model" |
|
cd "${work_dir}" || exit 1 |
|
python3 step_4_train_model.py \ |
|
--train_dataset "${train_vad_dataset}" \ |
|
--valid_dataset "${valid_vad_dataset}" \ |
|
--serialization_dir "${file_dir}" \ |
|
--config_file "${config_file}" \ |
|
|
|
fi |
|
|
|
|
|
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then |
|
$verbose && echo "stage 4: test model" |
|
cd "${work_dir}" || exit 1 |
|
python3 step_3_evaluation.py \ |
|
--valid_dataset "${valid_dataset}" \ |
|
--model_dir "${file_dir}/best" \ |
|
--evaluation_audio_dir "${evaluation_audio_dir}" \ |
|
--limit "${limit}" \ |
|
|
|
fi |
|
|
|
|
|
if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then |
|
$verbose && echo "stage 5: collect files" |
|
cd "${work_dir}" || exit 1 |
|
|
|
mkdir -p ${final_model_dir} |
|
|
|
cp "${file_dir}/best"/* "${final_model_dir}" |
|
cp -r "${file_dir}/evaluation_audio" "${final_model_dir}" |
|
|
|
cd "${final_model_dir}/.." || exit 1; |
|
|
|
if [ -e "${final_model_name}.zip" ]; then |
|
rm -rf "${final_model_name}_backup.zip" |
|
mv "${final_model_name}.zip" "${final_model_name}_backup.zip" |
|
fi |
|
|
|
zip -r "${final_model_name}.zip" "${final_model_name}" |
|
rm -rf "${final_model_name}" |
|
|
|
fi |
|
|
|
|
|
if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then |
|
$verbose && echo "stage 6: clear file_dir" |
|
cd "${work_dir}" || exit 1 |
|
|
|
rm -rf "${file_dir}"; |
|
|
|
fi |
|
|