|
import streamlit as st |
|
import os |
|
import tempfile |
|
import requests |
|
import random |
|
import matplotlib.pyplot as plt |
|
import torchaudio |
|
import torch |
|
import ffmpeg |
|
|
|
|
|
try: |
|
from speechbrain.inference import EncoderClassifier |
|
classifier = EncoderClassifier.from_hparams( |
|
source="speechbrain/lang-id-commonlanguage_ecapa", |
|
savedir="pretrained_models/lang-id-commonlanguage_ecapa" |
|
) |
|
SB_READY = True |
|
except Exception as e: |
|
st.warning(" SpeechBrain model load failed. Falling back to simulation.") |
|
SB_READY = False |
|
|
|
|
|
accent_profiles = { |
|
"American": ["rhotic", "flapped_t", "cot_caught_merger"], |
|
"British": ["non_rhotic", "t_glottalization", "trap_bath_split"], |
|
"Australian": ["non_rhotic", "flat_a", "high_rising_terminal"], |
|
"Canadian": ["rhotic", "canadian_raising", "eh_tag"], |
|
"Indian": ["retroflex_consonants", "monophthongization", "syllable_timing"] |
|
} |
|
|
|
def simulate_accent_classification(): |
|
accent = random.choice(list(accent_profiles.keys())) |
|
confidence = random.uniform(75, 98) |
|
return { |
|
"accent": accent, |
|
"confidence": round(confidence, 2), |
|
"summary": f"Simulated detection: {accent} accent with {confidence:.2f}% confidence." |
|
} |
|
|
|
def real_accent_classification(audio_path): |
|
try: |
|
signal, sr = torchaudio.load(audio_path) |
|
if signal.shape[0] > 1: |
|
signal = signal.mean(dim=0, keepdim=True) |
|
if sr != 16000: |
|
signal = torchaudio.transforms.Resample(sr, 16000)(signal) |
|
signal = signal.unsqueeze(0) |
|
|
|
pred = classifier.classify_batch(signal) |
|
probs = pred[0].squeeze(0).tolist() |
|
labels = pred[1][0] |
|
|
|
lang_scores = {classifier.hparams.label_encoder.ind2lab[i]: p * 100 for i, p in enumerate(probs)} |
|
top_lang = max(lang_scores, key=lang_scores.get) |
|
|
|
if top_lang != "en": |
|
return {"accent": "Non-English", "confidence": lang_scores[top_lang], "summary": f"Detected language: {top_lang}"} |
|
|
|
|
|
result = simulate_accent_classification() |
|
result["summary"] += f" (Base language: English)" |
|
return result |
|
except Exception as e: |
|
return simulate_accent_classification() |
|
|
|
def extract_audio(url_or_file, is_upload=False): |
|
temp_dir = tempfile.mkdtemp() |
|
video_path = os.path.join(temp_dir, "input_video.mp4") |
|
audio_path = os.path.join(temp_dir, "audio.wav") |
|
|
|
if is_upload: |
|
with open(video_path, "wb") as f: |
|
f.write(url_or_file.read()) |
|
else: |
|
with requests.get(url_or_file, stream=True) as r: |
|
r.raise_for_status() |
|
with open(video_path, 'wb') as f: |
|
for chunk in r.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
|
|
ffmpeg.input(video_path).output(audio_path, ar=16000, ac=1).run(overwrite_output=True, quiet=True) |
|
return audio_path |
|
|
|
|
|
st.set_page_config(page_title="English Accent Analyzer", layout="centered") |
|
st.title("π£οΈ English Accent Analyzer") |
|
|
|
st.markdown("### π― Objective:\nUpload or link a video/audio of a speaker. Weβll detect if they're speaking English and simulate the accent.") |
|
|
|
url_input = st.text_input("π Paste public Loom or direct MP4/WAV link:") |
|
uploaded_file = st.file_uploader("π Or upload a video/audio file", type=["mp4", "wav"]) |
|
|
|
if st.button(" Analyze"): |
|
if not url_input and not uploaded_file: |
|
st.error("Please provide a valid URL or upload a file.") |
|
else: |
|
with st.spinner("Analyzing..."): |
|
try: |
|
audio_path = extract_audio(uploaded_file if uploaded_file else url_input, is_upload=bool(uploaded_file)) |
|
result = real_accent_classification(audio_path) if SB_READY else simulate_accent_classification() |
|
|
|
st.success(f"π§ Detected Accent: **{result['accent']}**") |
|
st.metric("Confidence", f"{result['confidence']}%") |
|
st.markdown(f"π {result['summary']}") |
|
except Exception as e: |
|
st.error(f"β Error during analysis: {e}") |
|
|