Spaces:

amirjamali
/

accent-detector

Sleeping

App Files Files Community

amirjamali commited on May 24

Commit

6252089

unverified ·

1 Parent(s): 6f0cad3

Update Streamlit app and Dockerfile; enable CORS, adjust max upload size, and enhance upload directory handling with improved error logging and debugging features

Browse files

Files changed (3) hide show

.streamlit/config.toml +3 -2
Dockerfile +10 -5
src/streamlit_app.py +152 -22

.streamlit/config.toml CHANGED Viewed

@@ -2,9 +2,10 @@
 port = 8501
 address = "0.0.0.0"
 headless = true
-enableCORS = false
-maxUploadSize = 200
 enableXsrfProtection = false
 [browser]
 gatherUsageStats = false

 port = 8501
 address = "0.0.0.0"
 headless = true
+enableCORS = true
+maxUploadSize = 150
 enableXsrfProtection = false
+enableWebsocketCompression = false
 [browser]
 gatherUsageStats = false

Dockerfile CHANGED Viewed

@@ -9,7 +9,9 @@ ENV PYTHONUNBUFFERED=1 \
     XDG_CACHE_HOME=/app/.cache \
     PYTHONIOENCODING=utf-8 \
     TOKENIZERS_PARALLELISM=false \
-    HF_HUB_DISABLE_SYMLINKS_WARNING=1
 WORKDIR /app
@@ -29,10 +31,12 @@ RUN apt-get update && \
     && rm -rf /var/lib/apt/lists/*
 # Create necessary directories with proper permissions
-RUN mkdir -p /app/tmp_model /tmp/matplotlib /app/uploads /app/.cache/huggingface /app/.streamlit /app/.config /root/.cache/huggingface
-RUN chmod -R 777 /app/uploads /app/tmp_model /tmp/matplotlib /app/.cache /app/.streamlit /app/.config /root/.cache
 # Create symbolic link to ensure both user and root can access cache
 RUN ln -sf /app/.cache/huggingface /root/.cache/huggingface
 # Copy requirements first (for better caching)
 COPY requirements.txt .
@@ -52,9 +56,10 @@ RUN mkdir -p .streamlit && \
     echo 'port = 8501' >> .streamlit/config.toml && \
     echo 'address = "0.0.0.0"' >> .streamlit/config.toml && \
     echo 'headless = true' >> .streamlit/config.toml && \
-    echo 'enableCORS = false' >> .streamlit/config.toml && \
-    echo 'maxUploadSize = 200' >> .streamlit/config.toml && \
     echo 'enableXsrfProtection = false' >> .streamlit/config.toml && \
     echo '' >> .streamlit/config.toml && \
     echo '[browser]' >> .streamlit/config.toml && \
     echo 'gatherUsageStats = false' >> .streamlit/config.toml && \

     XDG_CACHE_HOME=/app/.cache \
     PYTHONIOENCODING=utf-8 \
     TOKENIZERS_PARALLELISM=false \
+    HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
+    STREAMLIT_SERVER_MAX_UPLOAD_SIZE=150 \
+    STREAMLIT_CLIENT_TOOLCHAIN=vite
 WORKDIR /app
     && rm -rf /var/lib/apt/lists/*
 # Create necessary directories with proper permissions
+RUN mkdir -p /app/tmp_model /tmp/matplotlib /app/uploads /app/.cache/huggingface /app/.streamlit /app/.config /root/.cache/huggingface /tmp/streamlit_uploads
+RUN chmod -R 777 /app/uploads /app/tmp_model /tmp/matplotlib /app/.cache /app/.streamlit /app/.config /root/.cache /tmp/streamlit_uploads
 # Create symbolic link to ensure both user and root can access cache
 RUN ln -sf /app/.cache/huggingface /root/.cache/huggingface
+# Use alternative uploads directory with guaranteed permissions
+ENV STREAMLIT_UPLOADS_PATH=/tmp/streamlit_uploads
 # Copy requirements first (for better caching)
 COPY requirements.txt .
     echo 'port = 8501' >> .streamlit/config.toml && \
     echo 'address = "0.0.0.0"' >> .streamlit/config.toml && \
     echo 'headless = true' >> .streamlit/config.toml && \
+    echo 'enableCORS = true' >> .streamlit/config.toml && \
+    echo 'maxUploadSize = 150' >> .streamlit/config.toml && \
     echo 'enableXsrfProtection = false' >> .streamlit/config.toml && \
+    echo 'enableWebsocketCompression = false' >> .streamlit/config.toml && \
     echo '' >> .streamlit/config.toml && \
     echo '[browser]' >> .streamlit/config.toml && \
     echo 'gatherUsageStats = false' >> .streamlit/config.toml && \

src/streamlit_app.py CHANGED Viewed

@@ -498,10 +498,25 @@ def process_uploaded_audio(file_input):
         # Create a unique filename based on timestamp
         timestamp = str(int(time.time()))
-        # Create an uploads directory if it doesn't exist - we'll need this regardless
         uploads_dir = os.path.join(os.getcwd(), "uploads")
         os.makedirs(uploads_dir, exist_ok=True)
         # Handle different input types
         if isinstance(file_input, str):
             # If it's already a file path
@@ -513,22 +528,55 @@ def process_uploaded_audio(file_input):
             file_extension = os.path.splitext(file_input.name)[1].lower()
             # Write the uploaded file to disk with proper extension in the uploads directory
-            temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
-            with open(temp_input_path, "wb") as f:
-                f.write(file_input.getbuffer())
         # For MP4 files, extract the audio using ffmpeg
         if file_extension == ".mp4":
             st.info("Extracting audio from video file...")
             audio_path = os.path.join(uploads_dir, f"extracted_audio_{timestamp}.wav")
             try:
                 subprocess.run(
-                    ['ffmpeg', '-i', temp_input_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', audio_path],
                     check=True,
                     capture_output=True
                 )
-                # Remove the original video file
-                os.remove(temp_input_path)
             except subprocess.CalledProcessError as e:
                 st.error(f"Error extracting audio: {e}")
                 if e.stderr:
@@ -539,18 +587,37 @@ def process_uploaded_audio(file_input):
             if file_extension in [".mp3", ".m4a", ".ogg", ".flac"]:
                 # Convert to WAV for better compatibility
                 audio_path = os.path.join(uploads_dir, f"converted_audio_{timestamp}.wav")
                 try:
-                    # Use a verbose ffmpeg command with detailed logging
                     process = subprocess.run(
-                        ['ffmpeg', '-i', temp_input_path, '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', '-y', audio_path],
                         check=True,
                         capture_output=True
                     )
-                    # Verify the file was created
-                    if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
-                        st.warning("Conversion produced an empty file. Using original file.")
-                        audio_path = temp_input_path
                 except subprocess.CalledProcessError as e:
                     st.warning(f"Conversion warning: {e}")
@@ -561,6 +628,7 @@ def process_uploaded_audio(file_input):
             else:
                 # For already WAV files, use them directly
                 audio_path = temp_input_path
         detector = AccentDetector()
         results = detector.analyze_audio(audio_path)
@@ -775,13 +843,19 @@ with tab2:
     if uploaded_file is not None:        # Show a preview of the audio
         st.markdown("#### Audio Preview:")
-        st.audio(uploaded_file)
-        st.markdown("#### Ready for Analysis")
-        col1, col2 = st.columns([1, 3])
-        with col1:
-            analyze_button = st.button("Analyze Audio", type="primary", use_container_width=True)
-        with col2:
-            st.caption("Tip: 15-30 seconds of clear speech works best for accent detection")
         if analyze_button:
             with st.spinner("Analyzing audio... (this may take 15-30 seconds)"):
@@ -791,8 +865,7 @@ with tab2:
                     if file_size_mb > 190:  # Stay below the 200MB limit with some buffer
                         st.error(f"File size ({file_size_mb:.1f}MB) is too large. Maximum allowed is 190MB.")
                         st.info("Tip: Try trimming your audio to just the speech segment for better results.")
-                    else:
-                        # Create a progress bar to show processing stages
                         progress_bar = st.progress(0)
                         # Check the file type and inform user about processing steps
@@ -895,3 +968,60 @@ with st.expander("ℹ️ How It Works"):
     5. **Analysis Summary**: An explanation is generated describing accent characteristics relevant for hiring evaluations.
     """)

         # Create a unique filename based on timestamp
         timestamp = str(int(time.time()))
+        # Create a deterministic uploads directory with full permissions
         uploads_dir = os.path.join(os.getcwd(), "uploads")
         os.makedirs(uploads_dir, exist_ok=True)
+        # Try Streamlit's own upload path first if available
+        streamlit_uploads_path = os.environ.get('STREAMLIT_UPLOADS_PATH')
+        if streamlit_uploads_path and os.path.isdir(streamlit_uploads_path):
+            uploads_dir = streamlit_uploads_path
+            st.info(f"Using Streamlit's upload directory: {uploads_dir}")
+        # Make sure uploads directory has proper permissions
+        try:
+            os.chmod(uploads_dir, 0o777)  # Full permissions
+        except Exception as chmod_error:
+            st.warning(f"Could not set permissions on uploads directory: {str(chmod_error)}. Continuing anyway.")
+        # Log upload dir info for debugging
+        st.info(f"Upload directory: {uploads_dir} (exists: {os.path.exists(uploads_dir)}, writable: {os.access(uploads_dir, os.W_OK)})")
         # Handle different input types
         if isinstance(file_input, str):
             # If it's already a file path
             file_extension = os.path.splitext(file_input.name)[1].lower()
             # Write the uploaded file to disk with proper extension in the uploads directory
+            # Use a unique filename to avoid conflicts
+            safe_filename = ''.join(c if c.isalnum() or c in '._- ' else '_' for c in file_input.name)
+            temp_input_path = os.path.join(uploads_dir, f"uploaded_{timestamp}_{safe_filename}")
+            st.info(f"Saving uploaded file to: {temp_input_path}")
+            try:
+                # Write in chunks to handle large files better
+                chunk_size = 1024 * 1024  # 1MB chunks
+                buffer = file_input.getbuffer()
+                with open(temp_input_path, "wb") as f:
+                    for i in range(0, len(buffer), chunk_size):
+                        f.write(buffer[i:i+chunk_size])
+                # Verify file was written properly
+                if os.path.exists(temp_input_path):
+                    file_size = os.path.getsize(temp_input_path)
+                    st.success(f"File saved successfully: {file_size} bytes")
+                else:
+                    st.error(f"Failed to save file - file doesn't exist after writing")
+            except Exception as write_error:
+                st.error(f"Error writing uploaded file: {str(write_error)}")
+                # Try alternative temp directory as fallback
+                try:
+                    import tempfile
+                    temp_dir = tempfile.gettempdir()
+                    temp_input_path = os.path.join(temp_dir, f"uploaded_{timestamp}_{safe_filename}")
+                    st.warning(f"Trying alternative location: {temp_input_path}")
+                    with open(temp_input_path, "wb") as f:
+                        f.write(file_input.getbuffer())
+                except Exception as alt_write_error:
+                    st.error(f"Alternative write also failed: {str(alt_write_error)}")
+                    raise
         # For MP4 files, extract the audio using ffmpeg
         if file_extension == ".mp4":
             st.info("Extracting audio from video file...")
             audio_path = os.path.join(uploads_dir, f"extracted_audio_{timestamp}.wav")
             try:
+                # Add -y flag to overwrite output file if it exists
                 subprocess.run(
+                    ['ffmpeg', '-y', '-i', temp_input_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', audio_path],
                     check=True,
                     capture_output=True
                 )
+                st.success(f"Audio extracted successfully to {audio_path}")
+                # Remove the original video file if extraction was successful
+                if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
+                    os.remove(temp_input_path)
             except subprocess.CalledProcessError as e:
                 st.error(f"Error extracting audio: {e}")
                 if e.stderr:
             if file_extension in [".mp3", ".m4a", ".ogg", ".flac"]:
                 # Convert to WAV for better compatibility
                 audio_path = os.path.join(uploads_dir, f"converted_audio_{timestamp}.wav")
+                st.info(f"Converting {file_extension} to WAV format for analysis...")
                 try:
+                    # Use a verbose ffmpeg command with more options for compatibility
                     process = subprocess.run(
+                        [
+                            'ffmpeg', '-y', '-i', temp_input_path,
+                            '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le',
+                            # Add error handling flags
+                            '-err_detect', 'ignore_err',
+                            # Add buffers for better handling
+                            '-analyzeduration', '10000000', '-probesize', '10000000',
+                            audio_path
+                        ],
                         check=True,
                         capture_output=True
                     )
+                    # Verify the file was created successfully
+                    if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
+                        st.success(f"Audio converted successfully: {os.path.getsize(audio_path)} bytes")
+                        # If conversion was successful, remove the original file to save space
+                        os.remove(temp_input_path)
+                    else:
+                        st.warning("Conversion produced an empty file. Trying fallback conversion method...")
+                        # Try alternative conversion method - simpler command
+                        fallback_cmd = ['ffmpeg', '-y', '-i', temp_input_path, audio_path]
+                        subprocess.run(fallback_cmd, check=True, capture_output=True)
+                        if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
+                            st.warning("Fallback conversion also failed. Using original file.")
+                            audio_path = temp_input_path
                 except subprocess.CalledProcessError as e:
                     st.warning(f"Conversion warning: {e}")
             else:
                 # For already WAV files, use them directly
                 audio_path = temp_input_path
+                st.info(f"Using WAV file directly: {audio_path}")
         detector = AccentDetector()
         results = detector.analyze_audio(audio_path)
     if uploaded_file is not None:        # Show a preview of the audio
         st.markdown("#### Audio Preview:")
+        try:
+            st.audio(uploaded_file)
+            st.markdown("#### Ready for Analysis")
+            col1, col2 = st.columns([1, 3])
+            with col1:
+                analyze_button = st.button("Analyze Audio", type="primary", use_container_width=True)
+            with col2:
+                st.caption("Tip: 15-30 seconds of clear speech works best for accent detection")
+        except Exception as preview_error:
+            st.warning(f"Could not preview audio: {str(preview_error)}")
+            # If preview fails, still allow analysis
+            analyze_button = st.button("Analyze Audio (Preview Failed)", type="primary")
+            st.caption("Proceeding with analysis might still work even if preview failed")
         if analyze_button:
             with st.spinner("Analyzing audio... (this may take 15-30 seconds)"):
                     if file_size_mb > 190:  # Stay below the 200MB limit with some buffer
                         st.error(f"File size ({file_size_mb:.1f}MB) is too large. Maximum allowed is 190MB.")
                         st.info("Tip: Try trimming your audio to just the speech segment for better results.")
+                    else:                        # Create a progress bar to show processing stages
                         progress_bar = st.progress(0)
                         # Check the file type and inform user about processing steps
     5. **Analysis Summary**: An explanation is generated describing accent characteristics relevant for hiring evaluations.
     """)
+# Add debug function for troubleshooting HTTP errors
+def debug_http_errors():
+    """Print debug information for HTTP errors"""
+    st.warning("⚠️ HTTP 400 Error Debugging Mode")
+    st.markdown("""
+    ### Common HTTP 400 Error Causes:
+    1. **File size exceeds limits** (current limit: 150MB)
+    2. **File format incompatibility**
+    3. **Network interruption** during upload
+    4. **Server-side timeout** during processing
+    5. **Permissions issues** in container
+    """)
+    # Show environment info
+    st.subheader("Environment Information")
+    env_info = {
+        "STREAMLIT_UPLOADS_PATH": os.environ.get("STREAMLIT_UPLOADS_PATH", "Not set"),
+        "STREAMLIT_SERVER_MAX_UPLOAD_SIZE": os.environ.get("STREAMLIT_SERVER_MAX_UPLOAD_SIZE", "Not set"),
+        "Current directory": os.getcwd(),
+        "Python version": sys.version
+    }
+    for key, value in env_info.items():
+        st.code(f"{key}: {value}")
+    # Check if uploads directory is writable
+    uploads_dir = os.environ.get("STREAMLIT_UPLOADS_PATH", os.path.join(os.getcwd(), "uploads"))
+    os.makedirs(uploads_dir, exist_ok=True)
+    try:
+        test_file = os.path.join(uploads_dir, "test_write.txt")
+        with open(test_file, "w") as f:
+            f.write("Test write permission")
+        os.remove(test_file)
+        st.success(f"✓ Upload directory is writable: {uploads_dir}")
+    except Exception as e:
+        st.error(f"✗ Cannot write to upload directory: {str(e)}")
+    # Test ffmpeg
+    try:
+        result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
+        st.success(f"✓ FFmpeg is available")
+    except Exception as e:
+        st.error(f"✗ FFmpeg error: {str(e)}")
+# Add debug mode flag to the app
+debug_mode = False
+with st.expander("🔧 Troubleshooting Tools"):
+    debug_mode = st.checkbox("Enable Debug Mode for HTTP 400 Errors")
+    if debug_mode:
+        debug_http_errors()
+    # Add option for user to try different upload method
+    alt_upload = st.checkbox("Use alternative upload method (for HTTP 400 errors)")
+    if alt_upload:
+        st.info("Using alternative upload method that may bypass some HTTP 400 errors")