Sajidahamed commited on
Commit
7cebfba
·
verified ·
1 Parent(s): 98c4440

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -27
app.py CHANGED
@@ -1,32 +1,44 @@
1
-
2
- import streamlit as st
3
  import os
4
- import subprocess
5
  import torchaudio
6
  from speechbrain.pretrained import EncoderClassifier
7
 
8
- st.title("🗣️ English Accent Classifier (Proof of Concept)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- url = st.text_input("Enter public video URL (YouTube or direct MP4):")
11
- if st.button("Analyze"):
12
- with st.spinner("Downloading video..."):
13
- if "youtube.com" in url or "youtu.be" in url:
14
- os.system(f'yt-dlp -o input_video.mp4 "{url}"')
15
- else:
16
- os.system(f'wget -O input_video.mp4 "{url}"')
17
- with st.spinner("Extracting audio..."):
18
- os.system("ffmpeg -y -i input_video.mp4 -ar 16000 -ac 1 -vn audio.wav")
19
- with st.spinner("Classifying accent..."):
20
- accent_model = EncoderClassifier.from_hparams(
21
- source="speechbrain/lang-id-commonlanguage_ecapa",
22
- savedir="tmp_accent_model"
23
- )
24
- signal, fs = torchaudio.load("audio.wav")
25
- if signal.shape[0] > 1:
26
- signal = signal[0].unsqueeze(0)
27
- prediction = accent_model.classify_batch(signal)
28
- pred_label = prediction[3][0]
29
- pred_scores = prediction[1][0]
30
- confidence = float(pred_scores.max()) * 100
31
- st.success(f"Predicted Accent: {pred_label} ({confidence:.1f}%)")
32
- st.info(f"The model is {confidence:.0f}% confident this is a {pred_label} English accent.")
 
1
+ import gradio as gr
 
2
  import os
 
3
  import torchaudio
4
  from speechbrain.pretrained import EncoderClassifier
5
 
6
+ def accent_detect(video_file):
7
+ # Save uploaded video
8
+ if isinstance(video_file, tuple):
9
+ video_path = video_file[0]
10
+ else:
11
+ video_path = "uploaded_input.mp4"
12
+ with open(video_path, "wb") as f:
13
+ f.write(video_file.read())
14
+
15
+ # Extract audio
16
+ os.system(f"ffmpeg -y -i {video_path} -ar 16000 -ac 1 -vn audio.wav")
17
+ if not os.path.exists("audio.wav") or os.path.getsize("audio.wav") < 1000:
18
+ return "Audio extraction failed. Please check your file."
19
+
20
+ # Classify accent
21
+ accent_model = EncoderClassifier.from_hparams(
22
+ source="speechbrain/lang-id-commonlanguage_ecapa",
23
+ savedir="tmp_accent_model"
24
+ )
25
+ signal, fs = torchaudio.load("audio.wav")
26
+ if signal.shape[0] > 1:
27
+ signal = signal[0].unsqueeze(0)
28
+ prediction = accent_model.classify_batch(signal)
29
+ pred_label = prediction[3][0]
30
+ pred_scores = prediction[1][0]
31
+ confidence = float(pred_scores.max()) * 100
32
+ explanation = f"Predicted Accent: {pred_label} ({confidence:.1f}%)\nThe model is {confidence:.0f}% confident this is a {pred_label} English accent."
33
+ return explanation
34
+
35
+ demo = gr.Interface(
36
+ fn=accent_detect,
37
+ inputs=gr.Video(type="filepath", label="Upload a Video File (MP4, WEBM, etc.)"),
38
+ outputs="text",
39
+ title="🗣️ English Accent Classifier (Gradio Demo)",
40
+ description="Upload a short video clip of English speech. This tool predicts the English accent and confidence."
41
+ )
42
 
43
+ if __name__ == "__main__":
44
+ demo.launch()