Spaces:
Sleeping
Sleeping
File size: 1,957 Bytes
ae09409 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import modal
from modal import App,Image
app=modal.App(name="ClassifierAudio")
image = Image.debian_slim().pip_install("torch","huggingface","transformers","speechbrain","numpy","torchaudio","yt_dlp").apt_install("ffmpeg")
model_name = "warisqr7/accent-id-commonaccent_xlsr-en-english"
secrets = [modal.Secret.from_name("hf-secret")]
LOCAL_DIR="model"
@app.cls(image=image, secrets=secrets, timeout=1800)
class Accentizer:
@modal.build()
def download_model(self):
from huggingface_hub import snapshot_download
import os
import torch
BASE_DIR="model"
os.makedirs(BASE_DIR, exist_ok=True)
snapshot_download(model_name, local_dir=LOCAL_DIR)
@modal.enter()
def setup(self):
import yt_dlp
from speechbrain.pretrained.interfaces import foreign_class
self.download=self.download_audio
self.classifier = foreign_class(source=LOCAL_DIR, pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
def download_audio(self,url, output_path='audio'):
import yt_dlp
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': output_path,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3', # or 'wav'
'preferredquality': '192',
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
@modal.method()
def classify(self, url):
import torch
import torchaudio
import yt_dlp
self.download(url)
audio_path = "audio.mp3"
out_prob, score, index, text_lab = self.classifier.classify_file(audio_path)
return {"label": text_lab, "score": score, "index": index, "out_prob": out_prob}
@modal.method()
def wake_up(self) -> str:
return "ok"
|