Spaces:

DjallelBr
/

AccentDetector

Sleeping

File size: 1,957 Bytes

ae09409

import modal
from modal import App,Image

app=modal.App(name="ClassifierAudio")
image = Image.debian_slim().pip_install("torch","huggingface","transformers","speechbrain","numpy","torchaudio","yt_dlp").apt_install("ffmpeg")
model_name = "warisqr7/accent-id-commonaccent_xlsr-en-english"
secrets = [modal.Secret.from_name("hf-secret")]
LOCAL_DIR="model"
@app.cls(image=image, secrets=secrets, timeout=1800)

class Accentizer:
    @modal.build()
    def download_model(self):
        from huggingface_hub import snapshot_download
        import os
        import torch
        BASE_DIR="model"
        os.makedirs(BASE_DIR, exist_ok=True)
        snapshot_download(model_name, local_dir=LOCAL_DIR)
    @modal.enter()
    def setup(self):
        import yt_dlp
        from speechbrain.pretrained.interfaces import foreign_class
        self.download=self.download_audio
        self.classifier = foreign_class(source=LOCAL_DIR, pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")

    def download_audio(self,url, output_path='audio'):
        import yt_dlp 
        ydl_opts = {
            'format': 'bestaudio/best',
            'outtmpl': output_path,
            'postprocessors': [{
                'key': 'FFmpegExtractAudio',
                'preferredcodec': 'mp3',  # or 'wav'
                'preferredquality': '192',
            }],
        }

        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
    @modal.method()
    def classify(self, url):
        import torch
        import torchaudio
        import yt_dlp

        self.download(url)
        
        audio_path = "audio.mp3"

        out_prob, score, index, text_lab = self.classifier.classify_file(audio_path)
        return {"label": text_lab, "score": score, "index": index, "out_prob": out_prob}
    @modal.method()
    def wake_up(self) -> str:
            return "ok"