kevinwang676's picture
Upload folder using huggingface_hub
1b6bcbc verified
import librosa
import os
import soundfile
import subprocess
from concurrent.futures import ThreadPoolExecutor
from .ext_files import get_files_by_ext
def ffmpeg_installed():
try:
subprocess.run(["ffmpeg", "-version"],
capture_output=True,
check=True)
print("find ffmpeg installed, use ffmpeg")
return True
except Exception as e:
print("ffmpeg not found, use librosa")
return False
def convert_wav_ffmpeg(source_file : str,
target_file : str,
sample_rate : int,
number : int):
os.makedirs(os.path.dirname(target_file), exist_ok=True)
print(f"file {number} start convert")
cmd = ["ffmpeg", "-y", "-i", source_file, "-ar", f"{sample_rate}", "-ac", "1", "-v", "quiet", target_file]
subprocess.run(cmd)
def convert_wav_librosa(source_file : str,
target_file : str,
sample_rate : int,
number : int):
os.makedirs(os.path.dirname(target_file), exist_ok=True)
print(f"file {number} start convert")
data, sample_rate = librosa.load(source_file,
sr=sample_rate,
mono=True)
soundfile.write(target_file, data, sample_rate)
def convert_files(source_dir : str,
target_dir : str,
sample_rate : int,
max_threads = None,
force_librosa = False):
if max_threads == None:
max_threads = os.cpu_count()
ext_files = get_files_by_ext(source_dir, [".mp3","acc","wav"])
ffmpeg_installed_flag = (not force_librosa) and ffmpeg_installed()
os.makedirs(target_dir, exist_ok=True)
with ThreadPoolExecutor(max_workers=max_threads) as executor:
print(f"files count: {len(ext_files)}")
print(f"max_threads = {max_threads}")
for number, file in enumerate(ext_files, start=1):
source_path = os.path.join(source_dir, file)
target_path = os.path.join(target_dir, os.path.splitext(file)[0] + '.wav')
os.makedirs(os.path.dirname(target_path), exist_ok=True)
if not os.path.exists(target_path):
if ffmpeg_installed_flag:
executor.submit(convert_wav_ffmpeg,
source_path,
target_path,
sample_rate,
number)
else:
executor.submit(convert_wav_librosa,
source_path,
target_path,
sample_rate,
number)