EdgarDataScientist's picture
Update app.py
8cdbd03 verified
import streamlit as st
import os
import tempfile
import requests
import random
import matplotlib.pyplot as plt
import torchaudio
import torch
import ffmpeg
# Try loading SpeechBrain
try:
from speechbrain.inference import EncoderClassifier
classifier = EncoderClassifier.from_hparams(
source="speechbrain/lang-id-commonlanguage_ecapa",
savedir="pretrained_models/lang-id-commonlanguage_ecapa"
)
SB_READY = True
except Exception as e:
st.warning(" SpeechBrain model load failed. Falling back to simulation.")
SB_READY = False
# Accent Profiles for English detection
accent_profiles = {
"American": ["rhotic", "flapped_t", "cot_caught_merger"],
"British": ["non_rhotic", "t_glottalization", "trap_bath_split"],
"Australian": ["non_rhotic", "flat_a", "high_rising_terminal"],
"Canadian": ["rhotic", "canadian_raising", "eh_tag"],
"Indian": ["retroflex_consonants", "monophthongization", "syllable_timing"]
}
def simulate_accent_classification():
accent = random.choice(list(accent_profiles.keys()))
confidence = random.uniform(75, 98)
return {
"accent": accent,
"confidence": round(confidence, 2),
"summary": f"Simulated detection: {accent} accent with {confidence:.2f}% confidence."
}
def real_accent_classification(audio_path):
try:
signal, sr = torchaudio.load(audio_path)
if signal.shape[0] > 1:
signal = signal.mean(dim=0, keepdim=True)
if sr != 16000:
signal = torchaudio.transforms.Resample(sr, 16000)(signal)
signal = signal.unsqueeze(0)
pred = classifier.classify_batch(signal)
probs = pred[0].squeeze(0).tolist()
labels = pred[1][0]
lang_scores = {classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)}
top_lang = max(lang_scores, key=lang_scores.get)
if top_lang != "en":
return {"accent": "Non-English", "confidence": lang_scores[top_lang], "summary": f"Detected language: {top_lang}"}
# Simulate accent if English
result = simulate_accent_classification()
result["summary"] += f" (Base language: English)"
return result
except Exception as e:
return simulate_accent_classification()
def extract_audio(url_or_file, is_upload=False):
temp_dir = tempfile.mkdtemp()
video_path = os.path.join(temp_dir, "input_video.mp4")
audio_path = os.path.join(temp_dir, "audio.wav")
if is_upload:
with open(video_path, "wb") as f:
f.write(url_or_file.read())
else:
with requests.get(url_or_file, stream=True) as r:
r.raise_for_status()
with open(video_path, 'wb') as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
ffmpeg.input(video_path).output(audio_path, ar=16000, ac=1).run(overwrite_output=True, quiet=True)
return audio_path
# --- Streamlit UI ---
st.set_page_config(page_title="English Accent Analyzer", layout="centered")
st.title("πŸ—£οΈ English Accent Analyzer")
st.markdown("### 🎯 Objective:\nUpload or link a video/audio of a speaker. We’ll detect if they're speaking English and simulate the accent.")
url_input = st.text_input("πŸ”— Paste public Loom or direct MP4/WAV link:")
uploaded_file = st.file_uploader("πŸ“ Or upload a video/audio file", type=["mp4", "wav"])
if st.button(" Analyze"):
if not url_input and not uploaded_file:
st.error("Please provide a valid URL or upload a file.")
else:
with st.spinner("Analyzing..."):
try:
audio_path = extract_audio(uploaded_file if uploaded_file else url_input, is_upload=bool(uploaded_file))
result = real_accent_classification(audio_path) if SB_READY else simulate_accent_classification()
st.success(f"🎧 Detected Accent: **{result['accent']}**")
st.metric("Confidence", f"{result['confidence']}%")
st.markdown(f"πŸ“ {result['summary']}")
except Exception as e:
st.error(f"❌ Error during analysis: {e}")