Spaces:
Sleeping
Sleeping
# app.py - FIXED VERSION | |
import streamlit as st | |
import os | |
import sys | |
# MUST BE FIRST STREAMLIT COMMAND | |
st.set_page_config( | |
page_title="English Language & Accent Detection", | |
page_icon="π", | |
layout="centered" | |
) | |
# STREAMLIT CLOUD OPTIMIZATIONS | |
import torch | |
torch.set_num_threads(1) # Reduce CPU usage | |
os.environ['TOKENIZERS_PARALLELISM'] = 'false' # Avoid threading issues | |
# Add error handling for imports | |
try: | |
from utils import download_video, extract_audio, analyze_speech, cleanup_files | |
except ImportError as e: | |
st.error(f"β Import Error: {e}") | |
st.info("This might be a deployment issue. Please check the logs.") | |
st.stop() | |
st.title("π English Language & Accent Detection Tool") | |
st.write("Upload a video to first detect if the speaker is speaking English, then analyze their English accent.") | |
# Add a warning for Streamlit Cloud users | |
st.info("β οΈ **Note**: First-time model loading may take 2-3 minutes. Please be patient!") | |
# Information section | |
with st.expander("βΉοΈ How this tool works"): | |
st.write(""" | |
## Two-Step Analysis Process: | |
### Step 1: Language Detection π | |
- **Detects what language** the speaker is using | |
- **Supports 107+ languages** using advanced AI models | |
- **Only proceeds to accent analysis** if English is detected | |
### Step 2: English Accent Analysis π― (Only if English detected) | |
- **16 different English accents** can be identified: | |
- American, British (England), Australian, Indian, Canadian | |
- Scottish, Irish, Welsh, South African, New Zealand | |
- Malaysian, Filipino, Singaporean, Hong Kong, Bermudian, South Atlantic | |
## Perfect for: | |
β **Recruitment screening** - Verify English language candidates | |
β **Language assessment** - Determine if applicant speaks English | |
β **Accent identification** - Identify specific English accent varieties | |
β **Call center hiring** - Screen for English-speaking candidates | |
## Requirements: | |
- Direct video file URL (MP4, AVI, MOV, etc.) | |
- Clear audio with minimal background noise | |
- At least 10-15 seconds of speech | |
- Single speaker preferred | |
""") | |
# URL input | |
video_url = st.text_input( | |
"π Video URL:", | |
placeholder="https://example.com/video.mp4", | |
help="Enter a direct link to a video file" | |
) | |
# Analysis button | |
if st.button("π Analyze Language & Accent", type="primary"): | |
if not video_url.strip(): | |
st.warning("β οΈ Please enter a video URL first.") | |
else: | |
video_path = None | |
audio_path = None | |
try: | |
# Download video | |
with st.spinner("π₯ Downloading video..."): | |
video_path = download_video(video_url.strip()) | |
if not video_path or not os.path.exists(video_path): | |
st.error("β **Video download failed!**") | |
st.write("**Possible reasons:**") | |
st.write("- URL is not a direct link to a video file") | |
st.write("- Video is behind authentication/login") | |
st.write("- Server is blocking requests") | |
st.write("- URL is incorrect or video doesn't exist") | |
st.stop() | |
st.success(f"β Video downloaded ({os.path.getsize(video_path):,} bytes)") | |
# Extract audio | |
with st.spinner("π΅ Extracting audio..."): | |
audio_path = extract_audio(video_path) | |
if not audio_path or not os.path.exists(audio_path): | |
st.error("β **Audio extraction failed!**") | |
st.write("**Possible reasons:**") | |
st.write("- Video file is corrupted") | |
st.write("- Video format not supported") | |
st.write("- Video has no audio track") | |
st.write("- FFmpeg is not properly installed") | |
st.stop() | |
st.success(f"β Audio extracted ({os.path.getsize(audio_path):,} bytes)") | |
# Analyze speech | |
with st.spinner("π§ Analyzing language and accent... This may take 2-3 minutes on first run..."): | |
try: | |
is_english, language, accent, lang_confidence, accent_confidence = analyze_speech(audio_path) | |
# Display results | |
st.markdown("---") | |
st.markdown("### π― Analysis Results") | |
if not is_english: | |
# NOT ENGLISH | |
st.error("β **Speaker is NOT speaking English**") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.metric( | |
label="Detected Language", | |
value=language.title() | |
) | |
with col2: | |
st.metric( | |
label="Confidence", | |
value=f"{lang_confidence:.1f}%" | |
) | |
st.info("π‘ **For English accent analysis, please provide a video where the speaker is speaking English.**") | |
with st.expander("π About Language Detection"): | |
st.write(f""" | |
**Detected Language:** {language.title()} | |
**Detection Confidence:** {lang_confidence:.1f}% | |
This tool first detects what language is being spoken before proceeding to accent analysis. | |
Since the speaker appears to be speaking **{language.title()}** rather than English, | |
we cannot proceed with English accent detection. | |
**To get English accent analysis:** | |
- Provide a video where the speaker is clearly speaking English | |
- Ensure the audio quality is good | |
- Make sure there's at least 10-15 seconds of speech | |
""") | |
else: | |
# IS ENGLISH - Show accent results | |
st.success("β **Speaker IS speaking English!**") | |
# Main metrics | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.metric( | |
label="Language", | |
value="English β " | |
) | |
with col2: | |
st.metric( | |
label="Detected Accent", | |
value=accent | |
) | |
with col3: | |
st.metric( | |
label="Accent Confidence", | |
value=f"{accent_confidence:.1f}%" | |
) | |
# Confidence interpretation | |
if accent_confidence >= 80: | |
st.success("π― High confidence accent prediction") | |
elif accent_confidence >= 60: | |
st.info("π€ Moderate confidence accent prediction") | |
else: | |
st.warning("β οΈ Low confidence accent prediction - results may be unreliable") | |
# Detailed results | |
with st.expander("π Detailed Analysis Results"): | |
st.write(f""" | |
### Factors affecting accuracy: | |
- Audio quality and clarity | |
- Background noise levels | |
- Speaker's accent strength | |
- Length of speech sample | |
- Speaking style and pace | |
### Supported English Accents: | |
American, British (England), Australian, Indian, Canadian, Scottish, Irish, Welsh, | |
South African, New Zealand, Malaysian, Filipino, Singaporean, Hong Kong, Bermudian, South Atlantic | |
""") | |
# For recruiters | |
st.markdown("### π For Recruiters & HR:") | |
if lang_confidence >= 80: | |
st.success("β **CANDIDATE SPEAKS ENGLISH** - Suitable for English-speaking roles") | |
elif lang_confidence >= 60: | |
st.info("π€ **LIKELY SPEAKS ENGLISH** - May need additional assessment") | |
else: | |
st.warning("β οΈ **UNCERTAIN** - Recommend manual review or additional testing") | |
except Exception as e: | |
st.error("β **Analysis failed!**") | |
st.write("**Error details:**") | |
st.code(str(e)) | |
st.write("**Possible solutions:**") | |
st.write("- Try a different video with clearer audio") | |
st.write("- Ensure the video contains clear speech (any language)") | |
st.write("- Check that the audio is at least 10-15 seconds long") | |
st.write("- Verify the video URL is accessible") | |
except Exception as e: | |
st.error(f"β **Unexpected error occurred:**") | |
st.code(str(e)) | |
st.write("Please try again with a different video or contact support if the issue persists.") | |
finally: | |
# Clean up temporary files | |
if video_path or audio_path: | |
cleanup_files(video_path, audio_path) | |
# Use cases section | |
st.markdown("---") | |
st.markdown("### π― Use Cases") | |
col1, col2 = st.columns(2) | |
with col1: | |
st.markdown(""" | |
**π’ For Recruitment:** | |
- Screen English-speaking candidates | |
- Verify language requirements | |
- Identify accent preferences | |
- Filter initial applications | |
""") | |
with col2: | |
st.markdown(""" | |
**π For Call Centers:** | |
- Assess English fluency | |
- Match accents to regions | |
- Quality control checks | |
- Training needs assessment | |
""") | |
# Footer | |
st.markdown("---") | |
st.markdown( | |
""" | |
<div style='text-align: center; color: #666; font-size: 0.8em;'> | |
π This tool first detects if the speaker is speaking English, then analyzes their English accent.<br> | |
Perfect for recruitment screening and language assessment.<br> | |
Results are AI-generated estimates and may not always be 100% accurate. | |
</div> | |
""", | |
unsafe_allow_html=True | |
) |