Spaces:

amirjamali
/

accent-detector

Sleeping

App Files Files Community

amirjamali commited on May 24

Commit

6f0cad3

unverified ·

1 Parent(s): ccf2172

Update Dockerfile and Streamlit app; add additional system dependencies and improve audio processing error handling

Browse files

Files changed (2) hide show

Dockerfile +4 -0
src/streamlit_app.py +64 -20

Dockerfile CHANGED Viewed

@@ -21,6 +21,10 @@ RUN apt-get update && \
     git \
     ffmpeg \
     libsndfile1 \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*

     git \
     ffmpeg \
     libsndfile1 \
+    libgl1-mesa-glx \
+    python3-tk \
+    libavcodec-extra \
+    libavformat-dev \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*

src/streamlit_app.py CHANGED Viewed

@@ -491,10 +491,17 @@ def process_uploaded_audio(file_input):
     Args:
         file_input: Either a StreamlitUploadedFile object or a string path to a file
     """
     try:
         # Create a unique filename based on timestamp
         timestamp = str(int(time.time()))
         # Handle different input types
         if isinstance(file_input, str):
             # If it's already a file path
@@ -505,15 +512,12 @@ def process_uploaded_audio(file_input):
             # If it's a StreamlitUploadedFile
             file_extension = os.path.splitext(file_input.name)[1].lower()
-            # Create an uploads directory if it doesn't exist
-            uploads_dir = os.path.join(os.getcwd(), "uploads")
-            os.makedirs(uploads_dir, exist_ok=True)
             # Write the uploaded file to disk with proper extension in the uploads directory
             temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
             with open(temp_input_path, "wb") as f:
                 f.write(file_input.getbuffer())
-              # For MP4 files, extract the audio using ffmpeg
         if file_extension == ".mp4":
             st.info("Extracting audio from video file...")
             audio_path = os.path.join(uploads_dir, f"extracted_audio_{timestamp}.wav")
@@ -527,7 +531,8 @@ def process_uploaded_audio(file_input):
                 os.remove(temp_input_path)
             except subprocess.CalledProcessError as e:
                 st.error(f"Error extracting audio: {e}")
-                st.error(f"ffmpeg output: {e.stderr.decode('utf-8')}")
                 raise
         else:
             # For audio files, process based on format
@@ -535,14 +540,23 @@ def process_uploaded_audio(file_input):
                 # Convert to WAV for better compatibility
                 audio_path = os.path.join(uploads_dir, f"converted_audio_{timestamp}.wav")
                 try:
-                    subprocess.run(
-                        ['ffmpeg', '-i', temp_input_path, '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', audio_path],
                         check=True,
                         capture_output=True
                     )
-                    # Keep original file for reference but continue with WAV
                 except subprocess.CalledProcessError as e:
-                    st.warning(f"Conversion warning: {e}. Using original file.")
                     audio_path = temp_input_path
             else:
                 # For already WAV files, use them directly
@@ -552,18 +566,47 @@ def process_uploaded_audio(file_input):
         results = detector.analyze_audio(audio_path)
         # Clean up
-        if os.path.exists(audio_path):
             os.remove(audio_path)
         return results
     except Exception as e:
-        st.error(f"Error processing audio: {str(e)}")
-        if 'temp_input_path' in locals() and os.path.exists(temp_input_path):
             os.remove(temp_input_path)
-        if 'audio_path' in locals() and os.path.exists(audio_path):
             os.remove(audio_path)
         raise
     return results
 # --- Streamlit App ---
@@ -690,14 +733,15 @@ with tab1:
                             # Show explanation in a box
                             st.markdown("### Expert Analysis")
                             st.info(results['explanation'])
-                        with col2:
                             if results['audio_viz']:
                                 try:
                                     st.pyplot(results['audio_viz'])
                                 except Exception as viz_error:
                                     st.warning("Could not display visualization due to torchvision issue.")
-                                    st.info("Audio analysis was successful even though visualization failed.")                            # Show audio playback
                             st.audio(audio_path)
                         # Clean up files
@@ -763,15 +807,15 @@ with tab2:
                         # First save the file to a known location to bypass 403 errors
                         # Create an uploads directory if it doesn't exist
                         uploads_dir = os.path.join(os.getcwd(), "uploads")
-                        os.makedirs(uploads_dir, exist_ok=True)
-                          # Save the file first to avoid streaming it multiple times
                         temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
                         with open(temp_file_path, "wb") as f:
                             f.write(uploaded_file.getbuffer())
                         progress_bar.progress(50, text="Analyzing audio...")
-                        # Process using the saved file path directly                        results = process_uploaded_audio(temp_file_path)
                         progress_bar.progress(100, text="Analysis complete!")
                         # Display results

     Args:
         file_input: Either a StreamlitUploadedFile object or a string path to a file
     """
+    audio_path = None
+    temp_input_path = None
     try:
         # Create a unique filename based on timestamp
         timestamp = str(int(time.time()))
+        # Create an uploads directory if it doesn't exist - we'll need this regardless
+        uploads_dir = os.path.join(os.getcwd(), "uploads")
+        os.makedirs(uploads_dir, exist_ok=True)
         # Handle different input types
         if isinstance(file_input, str):
             # If it's already a file path
             # If it's a StreamlitUploadedFile
             file_extension = os.path.splitext(file_input.name)[1].lower()
             # Write the uploaded file to disk with proper extension in the uploads directory
             temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
             with open(temp_input_path, "wb") as f:
                 f.write(file_input.getbuffer())
+        # For MP4 files, extract the audio using ffmpeg
         if file_extension == ".mp4":
             st.info("Extracting audio from video file...")
             audio_path = os.path.join(uploads_dir, f"extracted_audio_{timestamp}.wav")
                 os.remove(temp_input_path)
             except subprocess.CalledProcessError as e:
                 st.error(f"Error extracting audio: {e}")
+                if e.stderr:
+                    st.error(f"FFmpeg output: {e.stderr.decode('utf-8')}")
                 raise
         else:
             # For audio files, process based on format
                 # Convert to WAV for better compatibility
                 audio_path = os.path.join(uploads_dir, f"converted_audio_{timestamp}.wav")
                 try:
+                    # Use a verbose ffmpeg command with detailed logging
+                    process = subprocess.run(
+                        ['ffmpeg', '-i', temp_input_path, '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', '-y', audio_path],
                         check=True,
                         capture_output=True
                     )
+                    # Verify the file was created
+                    if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
+                        st.warning("Conversion produced an empty file. Using original file.")
+                        audio_path = temp_input_path
                 except subprocess.CalledProcessError as e:
+                    st.warning(f"Conversion warning: {e}")
+                    if e.stderr:
+                        st.warning(f"FFmpeg error: {e.stderr.decode('utf-8')}")
+                    st.info("Using original file instead.")
                     audio_path = temp_input_path
             else:
                 # For already WAV files, use them directly
         results = detector.analyze_audio(audio_path)
         # Clean up
+        if audio_path and audio_path != temp_input_path and os.path.exists(audio_path):
             os.remove(audio_path)
         return results
     except Exception as e:
+        error_msg = str(e)
+        st.error(f"Error processing audio: {error_msg}")
+        # Add detailed debugging info
+        import traceback
+        st.error(f"Error details: {traceback.format_exc()}")
+        # Show file info if available
+        if temp_input_path and os.path.exists(temp_input_path):
+            st.info(f"Input file exists: {temp_input_path}, size: {os.path.getsize(temp_input_path)} bytes")
             os.remove(temp_input_path)
+        else:
+            if temp_input_path:
+                st.warning(f"Input file does not exist: {temp_input_path}")
+        if audio_path and os.path.exists(audio_path):
+            st.info(f"Audio file exists: {audio_path}, size: {os.path.getsize(audio_path)} bytes")
             os.remove(audio_path)
+        else:
+            if audio_path:
+                st.warning(f"Audio file does not exist: {audio_path}")
+        # Check for common error types
+        if "ffmpeg" in error_msg.lower():
+            st.warning("FFmpeg error detected. The audio conversion failed.")
+            st.info("Try a different audio format or check if FFmpeg is installed correctly.")
+        elif "permission" in error_msg.lower():
+            st.warning("Permission error detected.")
+            st.info("Check that the uploads directory is writable.")
+        elif "no such file" in error_msg.lower():
+            st.warning("File not found error detected.")
+            st.info("The file may have been moved, deleted, or not saved correctly.")
         raise
     return results
 # --- Streamlit App ---
                             # Show explanation in a box
                             st.markdown("### Expert Analysis")
                             st.info(results['explanation'])
+                          with col2:
                             if results['audio_viz']:
                                 try:
                                     st.pyplot(results['audio_viz'])
                                 except Exception as viz_error:
                                     st.warning("Could not display visualization due to torchvision issue.")
+                                    st.info("Audio analysis was successful even though visualization failed.")
+                            # Show audio playback
                             st.audio(audio_path)
                         # Clean up files
                         # First save the file to a known location to bypass 403 errors
                         # Create an uploads directory if it doesn't exist
                         uploads_dir = os.path.join(os.getcwd(), "uploads")
+                        os.makedirs(uploads_dir, exist_ok=True)                        # Save the file first to avoid streaming it multiple times
                         temp_file_path = os.path.join(uploads_dir, f"temp_{int(time.time())}_{uploaded_file.name}")
                         with open(temp_file_path, "wb") as f:
                             f.write(uploaded_file.getbuffer())
                         progress_bar.progress(50, text="Analyzing audio...")
+                        # Process using the saved file path directly
+                        results = process_uploaded_audio(temp_file_path)
                         progress_bar.progress(100, text="Analysis complete!")
                         # Display results