# app.py - FIXED VERSION import streamlit as st import os import sys # MUST BE FIRST STREAMLIT COMMAND st.set_page_config( page_title="English Language & Accent Detection", page_icon="đ", layout="centered" ) # STREAMLIT CLOUD OPTIMIZATIONS import torch torch.set_num_threads(1) # Reduce CPU usage os.environ['TOKENIZERS_PARALLELISM'] = 'false' # Avoid threading issues # Add error handling for imports try: from utils import download_video, extract_audio, analyze_speech, cleanup_files except ImportError as e: st.error(f"â Import Error: {e}") st.info("This might be a deployment issue. Please check the logs.") st.stop() st.title("đ English Language & Accent Detection Tool") st.write("Upload a video to first detect if the speaker is speaking English, then analyze their English accent.") # Add a warning for Streamlit Cloud users st.info("â ī¸ **Note**: First-time model loading may take 2-3 minutes. Please be patient!") # Information section with st.expander("âšī¸ How this tool works"): st.write(""" ## Two-Step Analysis Process: ### Step 1: Language Detection đ - **Detects what language** the speaker is using - **Supports 107+ languages** using advanced AI models - **Only proceeds to accent analysis** if English is detected ### Step 2: English Accent Analysis đ¯ (Only if English detected) - **16 different English accents** can be identified: - American, British (England), Australian, Indian, Canadian - Scottish, Irish, Welsh, South African, New Zealand - Malaysian, Filipino, Singaporean, Hong Kong, Bermudian, South Atlantic ## Perfect for: â **Recruitment screening** - Verify English language candidates â **Language assessment** - Determine if applicant speaks English â **Accent identification** - Identify specific English accent varieties â **Call center hiring** - Screen for English-speaking candidates ## Requirements: - Direct video file URL (MP4, AVI, MOV, etc.) - Clear audio with minimal background noise - At least 10-15 seconds of speech - Single speaker preferred """) # URL input video_url = st.text_input( "đ Video URL:", placeholder="https://example.com/video.mp4", help="Enter a direct link to a video file" ) # Analysis button if st.button("đ Analyze Language & Accent", type="primary"): if not video_url.strip(): st.warning("â ī¸ Please enter a video URL first.") else: video_path = None audio_path = None try: # Download video with st.spinner("đĨ Downloading video..."): video_path = download_video(video_url.strip()) if not video_path or not os.path.exists(video_path): st.error("â **Video download failed!**") st.write("**Possible reasons:**") st.write("- URL is not a direct link to a video file") st.write("- Video is behind authentication/login") st.write("- Server is blocking requests") st.write("- URL is incorrect or video doesn't exist") st.stop() st.success(f"â Video downloaded ({os.path.getsize(video_path):,} bytes)") # Extract audio with st.spinner("đĩ Extracting audio..."): audio_path = extract_audio(video_path) if not audio_path or not os.path.exists(audio_path): st.error("â **Audio extraction failed!**") st.write("**Possible reasons:**") st.write("- Video file is corrupted") st.write("- Video format not supported") st.write("- Video has no audio track") st.write("- FFmpeg is not properly installed") st.stop() st.success(f"â Audio extracted ({os.path.getsize(audio_path):,} bytes)") # Analyze speech with st.spinner("đ§ Analyzing language and accent... This may take 2-3 minutes on first run..."): try: is_english, language, accent, lang_confidence, accent_confidence = analyze_speech(audio_path) # Display results st.markdown("---") st.markdown("### đ¯ Analysis Results") if not is_english: # NOT ENGLISH st.error("â **Speaker is NOT speaking English**") col1, col2 = st.columns(2) with col1: st.metric( label="Detected Language", value=language.title() ) with col2: st.metric( label="Confidence", value=f"{lang_confidence:.1f}%" ) st.info("đĄ **For English accent analysis, please provide a video where the speaker is speaking English.**") with st.expander("đ About Language Detection"): st.write(f""" **Detected Language:** {language.title()} **Detection Confidence:** {lang_confidence:.1f}% This tool first detects what language is being spoken before proceeding to accent analysis. Since the speaker appears to be speaking **{language.title()}** rather than English, we cannot proceed with English accent detection. **To get English accent analysis:** - Provide a video where the speaker is clearly speaking English - Ensure the audio quality is good - Make sure there's at least 10-15 seconds of speech """) else: # IS ENGLISH - Show accent results st.success("â **Speaker IS speaking English!**") # Main metrics col1, col2, col3 = st.columns(3) with col1: st.metric( label="Language", value="English â " ) with col2: st.metric( label="Detected Accent", value=accent ) with col3: st.metric( label="Accent Confidence", value=f"{accent_confidence:.1f}%" ) # Confidence interpretation if accent_confidence >= 80: st.success("đ¯ High confidence accent prediction") elif accent_confidence >= 60: st.info("đ¤ Moderate confidence accent prediction") else: st.warning("â ī¸ Low confidence accent prediction - results may be unreliable") # Detailed results with st.expander("đ Detailed Analysis Results"): st.write(f""" ### Factors affecting accuracy: - Audio quality and clarity - Background noise levels - Speaker's accent strength - Length of speech sample - Speaking style and pace ### Supported English Accents: American, British (England), Australian, Indian, Canadian, Scottish, Irish, Welsh, South African, New Zealand, Malaysian, Filipino, Singaporean, Hong Kong, Bermudian, South Atlantic """) # For recruiters st.markdown("### đ For Recruiters & HR:") if lang_confidence >= 80: st.success("â **CANDIDATE SPEAKS ENGLISH** - Suitable for English-speaking roles") elif lang_confidence >= 60: st.info("đ¤ **LIKELY SPEAKS ENGLISH** - May need additional assessment") else: st.warning("â ī¸ **UNCERTAIN** - Recommend manual review or additional testing") except Exception as e: st.error("â **Analysis failed!**") st.write("**Error details:**") st.code(str(e)) st.write("**Possible solutions:**") st.write("- Try a different video with clearer audio") st.write("- Ensure the video contains clear speech (any language)") st.write("- Check that the audio is at least 10-15 seconds long") st.write("- Verify the video URL is accessible") except Exception as e: st.error(f"â **Unexpected error occurred:**") st.code(str(e)) st.write("Please try again with a different video or contact support if the issue persists.") finally: # Clean up temporary files if video_path or audio_path: cleanup_files(video_path, audio_path) # Use cases section st.markdown("---") st.markdown("### đ¯ Use Cases") col1, col2 = st.columns(2) with col1: st.markdown(""" **đĸ For Recruitment:** - Screen English-speaking candidates - Verify language requirements - Identify accent preferences - Filter initial applications """) with col2: st.markdown(""" **đ For Call Centers:** - Assess English fluency - Match accents to regions - Quality control checks - Training needs assessment """) # Footer st.markdown("---") st.markdown( """