Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,25 +3,43 @@ import os
|
|
3 |
import torchaudio
|
4 |
from speechbrain.pretrained import EncoderClassifier
|
5 |
|
6 |
-
def accent_detect(
|
7 |
-
#
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
os.system(f"ffmpeg -y -i '{video_path}' -ar 16000 -ac 1 -vn audio.wav")
|
10 |
|
11 |
-
# Check if audio.wav was created and is of reasonable size
|
12 |
if not os.path.exists("audio.wav") or os.path.getsize("audio.wav") < 1000:
|
13 |
-
return "Audio extraction failed. Please
|
14 |
|
15 |
-
# Load
|
16 |
accent_model = EncoderClassifier.from_hparams(
|
17 |
source="speechbrain/lang-id-commonlanguage_ecapa",
|
18 |
savedir="tmp_accent_model"
|
19 |
)
|
20 |
-
# Load audio
|
21 |
signal, fs = torchaudio.load("audio.wav")
|
22 |
if signal.shape[0] > 1:
|
23 |
signal = signal[0].unsqueeze(0)
|
24 |
-
# Predict accent
|
25 |
prediction = accent_model.classify_batch(signal)
|
26 |
pred_label = prediction[3][0]
|
27 |
pred_scores = prediction[1][0]
|
@@ -34,10 +52,13 @@ def accent_detect(video_path):
|
|
34 |
|
35 |
demo = gr.Interface(
|
36 |
fn=accent_detect,
|
37 |
-
inputs=
|
|
|
|
|
|
|
38 |
outputs="text",
|
39 |
title="🗣️ English Accent Classifier (Gradio Demo)",
|
40 |
-
description="
|
41 |
)
|
42 |
|
43 |
if __name__ == "__main__":
|
|
|
3 |
import torchaudio
|
4 |
from speechbrain.pretrained import EncoderClassifier
|
5 |
|
6 |
+
def accent_detect(video_link, video_file):
|
7 |
+
# Decide which input to use
|
8 |
+
video_path = None
|
9 |
+
|
10 |
+
# If a video file is uploaded, use it
|
11 |
+
if video_file is not None:
|
12 |
+
video_path = "uploaded_input.mp4"
|
13 |
+
with open(video_path, "wb") as f:
|
14 |
+
f.write(video_file.read())
|
15 |
+
# Else if a link is provided, try to download it
|
16 |
+
elif video_link and len(video_link.strip()) > 8:
|
17 |
+
# Use yt-dlp for YouTube or wget for direct link
|
18 |
+
if "youtube.com" in video_link or "youtu.be" in video_link:
|
19 |
+
os.system(f'yt-dlp -o input_video.mp4 "{video_link}"')
|
20 |
+
else:
|
21 |
+
os.system(f'wget -O input_video.mp4 "{video_link}"')
|
22 |
+
if os.path.exists("input_video.mp4") and os.path.getsize("input_video.mp4") > 0:
|
23 |
+
video_path = "input_video.mp4"
|
24 |
+
else:
|
25 |
+
return "Failed to download the video. Please check your link."
|
26 |
+
else:
|
27 |
+
return "Please upload a video file or provide a valid video link."
|
28 |
+
|
29 |
+
# Extract audio from video
|
30 |
os.system(f"ffmpeg -y -i '{video_path}' -ar 16000 -ac 1 -vn audio.wav")
|
31 |
|
|
|
32 |
if not os.path.exists("audio.wav") or os.path.getsize("audio.wav") < 1000:
|
33 |
+
return "Audio extraction failed. Please use a different video."
|
34 |
|
35 |
+
# Load model and classify accent
|
36 |
accent_model = EncoderClassifier.from_hparams(
|
37 |
source="speechbrain/lang-id-commonlanguage_ecapa",
|
38 |
savedir="tmp_accent_model"
|
39 |
)
|
|
|
40 |
signal, fs = torchaudio.load("audio.wav")
|
41 |
if signal.shape[0] > 1:
|
42 |
signal = signal[0].unsqueeze(0)
|
|
|
43 |
prediction = accent_model.classify_batch(signal)
|
44 |
pred_label = prediction[3][0]
|
45 |
pred_scores = prediction[1][0]
|
|
|
52 |
|
53 |
demo = gr.Interface(
|
54 |
fn=accent_detect,
|
55 |
+
inputs=[
|
56 |
+
gr.Textbox(label="YouTube or direct MP4 link (optional)", placeholder="https://youtube.com/yourvideo"),
|
57 |
+
gr.File(label="Or upload a video file (MP4, WEBM, etc.)"),
|
58 |
+
],
|
59 |
outputs="text",
|
60 |
title="🗣️ English Accent Classifier (Gradio Demo)",
|
61 |
+
description="Paste a YouTube/direct MP4 link or upload a video file with English speech. The tool predicts the English accent and confidence."
|
62 |
)
|
63 |
|
64 |
if __name__ == "__main__":
|