kevinwang676's picture
Upload folder using huggingface_hub
1b6bcbc verified
import os
import shutil
from subfix.models.audio.speaker_diarization import Speech_Campplus_Speaker_Diarization
from subfix.utils import convert_files, get_files_by_ext
from subfix.utils.misc import merge_audio_vads
def diarization_dir(args):
source_dir = args.source_dir
target_dir = args.target_dir
cache_dir = args.cache_dir
sample_rate = args.sample_rate
min_seconds = args.min_seconds
top_of_number = args.top_of_number
interval = args.interval
oracle_num = None if int(args.oracle_num) == 0 else int(args.oracle_num)
dir_16000 = os.path.join(cache_dir,'subfix','origin','16000')
dir_sample_rate = os.path.join(cache_dir,'subfix','origin',str(sample_rate))
if os.path.exists(dir_16000):
shutil.rmtree(dir_16000)
if os.path.exists(dir_sample_rate):
shutil.rmtree(dir_sample_rate)
convert_files(source_dir, dir_sample_rate, sample_rate)
convert_files(dir_sample_rate, dir_16000, 16000)
files = get_files_by_ext(dir_16000, [".wav"])
print("Start Speech_Campplus_Speaker_Diarization")
SCSD = Speech_Campplus_Speaker_Diarization()
for file_path in files:
f_16000 = os.path.join(dir_16000, file_path)
f_samplerate = os.path.join(dir_sample_rate, file_path)
result, topn, topn_number = SCSD.infer(f_16000, min_seconds = min_seconds , oracle_num = oracle_num)
topn = topn[:top_of_number]
for person in topn:
vad_list = []
save_path = os.path.join(target_dir, os.path.splitext(file_path)[0] + f"_{person}" + os.path.splitext(file_path)[1])
print("save:", save_path)
for item in result:
if item[2] == person:
vad_list.append(item[:2])
if len(vad_list) > 0:
merge_audio_vads(f_samplerate, save_path, vad_list, interval=interval)
if os.path.exists(dir_16000):
shutil.rmtree(dir_16000)
if os.path.exists(dir_sample_rate):
shutil.rmtree(dir_sample_rate)