Spaces:
Sleeping
Sleeping
import modal | |
from modal import App,Image | |
app=modal.App(name="ClassifierAudio") | |
image = Image.debian_slim().pip_install("torch","huggingface","transformers","speechbrain","numpy","torchaudio","yt_dlp").apt_install("ffmpeg") | |
model_name = "warisqr7/accent-id-commonaccent_xlsr-en-english" | |
secrets = [modal.Secret.from_name("hf-secret")] | |
LOCAL_DIR="model" | |
class Accentizer: | |
def download_model(self): | |
from huggingface_hub import snapshot_download | |
import os | |
import torch | |
BASE_DIR="model" | |
os.makedirs(BASE_DIR, exist_ok=True) | |
snapshot_download(model_name, local_dir=LOCAL_DIR) | |
def setup(self): | |
import yt_dlp | |
from speechbrain.pretrained.interfaces import foreign_class | |
self.download=self.download_audio | |
self.classifier = foreign_class(source=LOCAL_DIR, pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier") | |
def download_audio(self,url, output_path='audio'): | |
import yt_dlp | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'outtmpl': output_path, | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'mp3', # or 'wav' | |
'preferredquality': '192', | |
}], | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
ydl.download([url]) | |
def classify(self, url): | |
import torch | |
import torchaudio | |
import yt_dlp | |
self.download(url) | |
audio_path = "audio.mp3" | |
out_prob, score, index, text_lab = self.classifier.classify_file(audio_path) | |
return {"label": text_lab, "score": score, "index": index, "out_prob": out_prob} | |
def wake_up(self) -> str: | |
return "ok" | |