Spaces:
Running
Running
File size: 4,962 Bytes
9552602 5dd29f5 b41b158 9552602 2109747 9552602 b41b158 9552602 b41b158 9552602 2109747 9552602 b41b158 9552602 2109747 9552602 2109747 9552602 2109747 9552602 2109747 0467024 2109747 0467024 2109747 43bdbc0 c0be2e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import gradio as gr
from speechbrain.inference import EncoderClassifier
import torch
import requests
import subprocess
import os
import uuid
import yt_dlp
model = None # Lazy-loaded model
def get_model():
global model
if model is None:
model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa")
return model
def extract_id_from_url(url):
urlRet = url.split("/")[-1]
if '?' in urlRet:
urlRet = urlRet.split("?")[0]
return urlRet
def fetch_loom_download_url(id):
response = requests.post(url=f"https://www.loom.com/api/campaigns/sessions/{id}/transcoded-url")
if response.status_code == 200:
return response.json()["url"]
else:
print("Error while retrieving response: ", response.status_code)
exit
def download_loom_video(url, filename):
headers = {
"User-Agent": "Mozilla/5.0"
}
try:
with requests.get(url, headers=headers, stream=True) as response:
response.raise_for_status()
with open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print(f"Downloaded video to {filename}")
return filename
except requests.exceptions.RequestException as e:
print(f"Failed to download Loom video: {e}")
return None
def download_direct_mp4(url, filename):
try:
response = requests.get(url, stream=True)
response.raise_for_status()
with open(filename, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return filename
except Exception as e:
print(f"Error downloading direct mp4: {e}")
return None
def download_video_from_url(url):
if "loom.com" in url:
video_id = extract_id_from_url(url)
print(video_id)
direct_url = fetch_loom_download_url(video_id)
print(direct_url)
filename = f"LoomVideo_{video_id}.mp4"
success = download_loom_video(direct_url, filename)
print(success)
return filename if success else None
elif url.endswith(".mp4"):
filename = f"video_{uuid.uuid4()}.mp4"
result = download_direct_mp4(url, filename)
return result
else:
# fallback to yt_dlp for youtube, vimeo, etc.
out_path = f"video_{uuid.uuid4()}.mp4"
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': out_path,
'quiet': True,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return out_path
def extract_audio(video_file):
audio_path = f"audio_{uuid.uuid4()}.wav"
cmd = [
"ffmpeg", "-i", video_file, "-vn",
"-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000",
audio_path, "-y"
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return audio_path
def classify_accent(input_file_or_url):
model = get_model()
# Check if it's a URL
if isinstance(input_file_or_url, str) and input_file_or_url.startswith("http"):
video_path = download_video_from_url(input_file_or_url)
else:
video_path = input_file_or_url.name if hasattr(input_file_or_url, "name") else input_file_or_url
audio_path = extract_audio(video_path)
out_probs, top_prob, top_idx, label = model.classify_file(audio_path)
top_labels = model.hparams.label_encoder.decode_ndim(torch.topk(out_probs, 3).indices.squeeze())
confidences = torch.topk(out_probs, 3).values.squeeze().tolist()
result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)])
return label[0], f"{top_prob.item()*100:.2f}%", result
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Accent Identifier")
gr.Markdown(
"Upload a video or audio file, or paste a link (e.g. direct .mp4 URL or Loom video) to identify the speaker's accent."
)
with gr.Row():
with gr.Column():
input_file = gr.File(label="Upload video/audio file", file_types=[".mp4", ".wav", ".mp3"])
url_input = gr.Textbox(label="...or paste a direct mp4 URL/loom link")
submit_btn = gr.Button("Classify Accent")
with gr.Column():
label_output = gr.Textbox(label="Top Prediction")
confidence_output = gr.Textbox(label="Confidence")
top3_output = gr.Textbox(label="Top 3 Predictions")
def handle_inputs(file, url):
if url:
return classify_accent(url)
elif file:
return classify_accent(file)
else:
return "No input", "", ""
submit_btn.click(handle_inputs, inputs=[input_file, url_input], outputs=[label_output, confidence_output, top3_output])
if __name__ == "__main__":
demo.launch(share=True)
|