|
import os |
|
import json |
|
from pathlib import Path |
|
import librosa |
|
import taglib |
|
from tqdm import tqdm |
|
import logging |
|
import soundfile as sf |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class MusicDataPreprocessor: |
|
def __init__(self, input_dir: str, output_dir: str): |
|
self.input_dir = Path(input_dir) |
|
self.output_dir = Path(output_dir) |
|
self.metadata = [] |
|
|
|
|
|
self.output_dir.mkdir(parents=True, exist_ok=True) |
|
(self.output_dir / "audio").mkdir(exist_ok=True) |
|
(self.output_dir / "metadata").mkdir(exist_ok=True) |
|
|
|
def extract_metadata(self, audio_path: Path) -> dict: |
|
"""Extract metadata from audio file (MP3 or WAV)""" |
|
try: |
|
|
|
audio_format = audio_path.suffix.lower()[1:] |
|
audio_file = taglib.File(str(audio_path)) |
|
|
|
|
|
y, sr = librosa.load(audio_path, sr=16000) |
|
duration = librosa.get_duration(y=y, sr=sr) |
|
|
|
metadata = { |
|
"filename": audio_path.name, |
|
"format": audio_format, |
|
"duration": duration, |
|
"genre": audio_file.tags.get("GENRE", ["unknown"])[0], |
|
"title": audio_file.tags.get("TITLE", ["unknown"])[0], |
|
"artist": audio_file.tags.get("ARTIST", ["unknown"])[0], |
|
"sample_rate": sr, |
|
"channels": audio_file.channels |
|
} |
|
|
|
return metadata |
|
|
|
except Exception as e: |
|
logger.error(f"Error processing {audio_path}: {str(e)}") |
|
return None |
|
|
|
def process_files(self): |
|
"""Process all audio files (MP3 and WAV) in the input directory""" |
|
|
|
audio_files = list(self.input_dir.glob("**/*.[mw][pa][3v]")) |
|
|
|
formats_found = {"mp3": 0, "wav": 0, "other": 0} |
|
formats_processed = {"mp3": 0, "wav": 0} |
|
|
|
logger.info(f"Found {len(audio_files)} audio files to process") |
|
|
|
for audio_path in tqdm(audio_files, desc="Processing audio files"): |
|
|
|
file_ext = audio_path.suffix.lower()[1:] |
|
if file_ext == "mp3": |
|
formats_found["mp3"] += 1 |
|
elif file_ext == "wav": |
|
formats_found["wav"] += 1 |
|
else: |
|
formats_found["other"] += 1 |
|
logger.warning(f"Unexpected file format: {file_ext} for file {audio_path}") |
|
|
|
metadata = self.extract_metadata(audio_path) |
|
|
|
if metadata: |
|
|
|
output_audio_path = self.output_dir / "audio" / f"{audio_path.stem}.wav" |
|
try: |
|
y, sr = librosa.load(audio_path, sr=16000, mono=True) |
|
sf.write(output_audio_path, y, sr, format='WAV') |
|
|
|
|
|
formats_processed[file_ext] += 1 |
|
|
|
|
|
metadata["processed_path"] = str(output_audio_path.relative_to(self.output_dir)) |
|
self.metadata.append(metadata) |
|
|
|
except Exception as e: |
|
logger.error(f"Error saving {audio_path}: {str(e)}") |
|
continue |
|
|
|
|
|
with open(self.output_dir / "metadata" / "dataset_info.json", "w") as f: |
|
json.dump({ |
|
"files": self.metadata, |
|
"stats": { |
|
"total_processed": len(self.metadata), |
|
"formats_found": formats_found, |
|
"formats_processed": formats_processed |
|
} |
|
}, f, indent=2) |
|
|
|
logger.info(f"Processed {len(self.metadata)} files successfully") |
|
logger.info(f"Files found: MP3: {formats_found['mp3']}, WAV: {formats_found['wav']}") |
|
logger.info(f"Files processed: MP3: {formats_processed['mp3']}, WAV: {formats_processed['wav']}") |
|
|
|
if __name__ == "__main__": |
|
import argparse |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--input_dir", type=str, required=True, help="Directory containing music files") |
|
parser.add_argument("--output_dir", type=str, required=True, help="Directory to save processed files") |
|
args = parser.parse_args() |
|
|
|
preprocessor = MusicDataPreprocessor(args.input_dir, args.output_dir) |
|
preprocessor.process_files() |