Update app.py
Browse files
app.py
CHANGED
@@ -2,11 +2,11 @@ import streamlit as st
|
|
2 |
import os
|
3 |
import tempfile
|
4 |
import requests
|
5 |
-
import subprocess
|
6 |
import random
|
7 |
import matplotlib.pyplot as plt
|
8 |
import torchaudio
|
9 |
import torch
|
|
|
10 |
|
11 |
# Load SpeechBrain
|
12 |
try:
|
@@ -17,7 +17,7 @@ try:
|
|
17 |
)
|
18 |
SPEECHBRAIN_LOADED = True
|
19 |
except Exception as e:
|
20 |
-
st.warning(f"
|
21 |
SPEECHBRAIN_LOADED = False
|
22 |
|
23 |
class AccentAnalyzer:
|
@@ -69,16 +69,24 @@ class AccentAnalyzer:
|
|
69 |
def analyze_accent(self, audio_path):
|
70 |
if not SPEECHBRAIN_LOADED:
|
71 |
return self._simulate_accent_classification(audio_path)
|
|
|
72 |
try:
|
73 |
signal, sr = torchaudio.load(audio_path)
|
74 |
-
|
75 |
-
|
|
|
|
|
76 |
if signal.shape[0] > 1:
|
77 |
signal = signal.mean(dim=0, keepdim=True)
|
78 |
-
|
|
|
|
|
|
|
|
|
79 |
probs = pred[0].squeeze(0).tolist()
|
80 |
labels = pred[1][0]
|
81 |
scores = {speechbrain_classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)}
|
|
|
82 |
if labels[0] == 'en':
|
83 |
result = self._simulate_accent_classification(audio_path)
|
84 |
result["all_scores"] = scores
|
@@ -93,50 +101,57 @@ class AccentAnalyzer:
|
|
93 |
st.warning(f"Fallback to simulation: {e}")
|
94 |
return self._simulate_accent_classification(audio_path)
|
95 |
|
96 |
-
def download_and_extract_audio(
|
97 |
temp_dir = tempfile.mkdtemp()
|
98 |
video_path = os.path.join(temp_dir, "video.mp4")
|
99 |
audio_path = os.path.join(temp_dir, "audio.wav")
|
100 |
|
101 |
-
if
|
102 |
-
|
103 |
-
|
104 |
-
stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
|
105 |
-
stream.download(output_path=temp_dir, filename="video.mp4")
|
106 |
else:
|
107 |
-
with requests.get(
|
108 |
r.raise_for_status()
|
109 |
with open(video_path, 'wb') as f:
|
110 |
for chunk in r.iter_content(chunk_size=8192):
|
111 |
f.write(chunk)
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
|
|
117 |
return audio_path
|
118 |
|
119 |
-
# Streamlit
|
120 |
-
st.
|
121 |
-
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
if st.button("Analyze"):
|
124 |
-
if not url:
|
125 |
-
st.error("Please enter a URL.")
|
126 |
else:
|
127 |
try:
|
128 |
-
|
129 |
-
|
130 |
-
|
|
|
131 |
|
132 |
-
st.
|
133 |
|
134 |
labels, values = zip(*results["all_scores"].items())
|
135 |
fig, ax = plt.subplots()
|
136 |
-
ax.bar(labels, values)
|
137 |
ax.set_ylabel('Confidence (%)')
|
138 |
ax.set_title('Accent/Language Confidence')
|
139 |
plt.xticks(rotation=45)
|
140 |
st.pyplot(fig)
|
|
|
141 |
except Exception as e:
|
142 |
-
st.error(f"
|
|
|
2 |
import os
|
3 |
import tempfile
|
4 |
import requests
|
|
|
5 |
import random
|
6 |
import matplotlib.pyplot as plt
|
7 |
import torchaudio
|
8 |
import torch
|
9 |
+
import ffmpeg
|
10 |
|
11 |
# Load SpeechBrain
|
12 |
try:
|
|
|
17 |
)
|
18 |
SPEECHBRAIN_LOADED = True
|
19 |
except Exception as e:
|
20 |
+
st.warning(f"Could not load SpeechBrain model: {e}. Using simulation.")
|
21 |
SPEECHBRAIN_LOADED = False
|
22 |
|
23 |
class AccentAnalyzer:
|
|
|
69 |
def analyze_accent(self, audio_path):
|
70 |
if not SPEECHBRAIN_LOADED:
|
71 |
return self._simulate_accent_classification(audio_path)
|
72 |
+
|
73 |
try:
|
74 |
signal, sr = torchaudio.load(audio_path)
|
75 |
+
duration = signal.shape[1] / sr
|
76 |
+
if duration < 1.0:
|
77 |
+
raise ValueError("Audio too short to analyze.")
|
78 |
+
|
79 |
if signal.shape[0] > 1:
|
80 |
signal = signal.mean(dim=0, keepdim=True)
|
81 |
+
if sr != 16000:
|
82 |
+
signal = torchaudio.transforms.Resample(sr, 16000)(signal)
|
83 |
+
signal = signal.unsqueeze(0) # [1, 1, time]
|
84 |
+
|
85 |
+
pred = speechbrain_classifier.classify_batch(signal)
|
86 |
probs = pred[0].squeeze(0).tolist()
|
87 |
labels = pred[1][0]
|
88 |
scores = {speechbrain_classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)}
|
89 |
+
|
90 |
if labels[0] == 'en':
|
91 |
result = self._simulate_accent_classification(audio_path)
|
92 |
result["all_scores"] = scores
|
|
|
101 |
st.warning(f"Fallback to simulation: {e}")
|
102 |
return self._simulate_accent_classification(audio_path)
|
103 |
|
104 |
+
def download_and_extract_audio(url_or_path, is_upload=False):
|
105 |
temp_dir = tempfile.mkdtemp()
|
106 |
video_path = os.path.join(temp_dir, "video.mp4")
|
107 |
audio_path = os.path.join(temp_dir, "audio.wav")
|
108 |
|
109 |
+
if is_upload:
|
110 |
+
with open(video_path, "wb") as f:
|
111 |
+
f.write(url_or_path.read())
|
|
|
|
|
112 |
else:
|
113 |
+
with requests.get(url_or_path, stream=True) as r:
|
114 |
r.raise_for_status()
|
115 |
with open(video_path, 'wb') as f:
|
116 |
for chunk in r.iter_content(chunk_size=8192):
|
117 |
f.write(chunk)
|
118 |
|
119 |
+
(
|
120 |
+
ffmpeg
|
121 |
+
.input(video_path)
|
122 |
+
.output(audio_path, ar=16000, ac=1, format='wav')
|
123 |
+
.run(quiet=True, overwrite_output=True)
|
124 |
+
)
|
125 |
return audio_path
|
126 |
|
127 |
+
# --- Streamlit App ---
|
128 |
+
st.set_page_config(page_title="Accent Analyzer", layout="wide")
|
129 |
+
st.title("🗣️ English Accent or Language Analyzer")
|
130 |
+
|
131 |
+
st.markdown("Upload a video/audio file or provide a direct `.mp4` or `.wav` URL:")
|
132 |
+
|
133 |
+
url = st.text_input("🔗 Enter Direct MP4/WAV URL:")
|
134 |
+
uploaded_file = st.file_uploader("📁 Or upload a file (MP4/WAV)", type=["mp4", "wav"])
|
135 |
|
136 |
if st.button("Analyze"):
|
137 |
+
if not url and not uploaded_file:
|
138 |
+
st.error("Please enter a valid URL or upload a file.")
|
139 |
else:
|
140 |
try:
|
141 |
+
with st.spinner("Processing audio..."):
|
142 |
+
audio_path = download_and_extract_audio(uploaded_file if uploaded_file else url, is_upload=bool(uploaded_file))
|
143 |
+
analyzer = AccentAnalyzer()
|
144 |
+
results = analyzer.analyze_accent(audio_path)
|
145 |
|
146 |
+
st.success(results["explanation"])
|
147 |
|
148 |
labels, values = zip(*results["all_scores"].items())
|
149 |
fig, ax = plt.subplots()
|
150 |
+
ax.bar(labels, values, color='skyblue')
|
151 |
ax.set_ylabel('Confidence (%)')
|
152 |
ax.set_title('Accent/Language Confidence')
|
153 |
plt.xticks(rotation=45)
|
154 |
st.pyplot(fig)
|
155 |
+
|
156 |
except Exception as e:
|
157 |
+
st.error(f"Failed to analyze: {e}")
|