amirjamali commited on
Commit
6252089
·
unverified ·
1 Parent(s): 6f0cad3

Update Streamlit app and Dockerfile; enable CORS, adjust max upload size, and enhance upload directory handling with improved error logging and debugging features

Browse files
Files changed (3) hide show
  1. .streamlit/config.toml +3 -2
  2. Dockerfile +10 -5
  3. src/streamlit_app.py +152 -22
.streamlit/config.toml CHANGED
@@ -2,9 +2,10 @@
2
  port = 8501
3
  address = "0.0.0.0"
4
  headless = true
5
- enableCORS = false
6
- maxUploadSize = 200
7
  enableXsrfProtection = false
 
8
 
9
  [browser]
10
  gatherUsageStats = false
 
2
  port = 8501
3
  address = "0.0.0.0"
4
  headless = true
5
+ enableCORS = true
6
+ maxUploadSize = 150
7
  enableXsrfProtection = false
8
+ enableWebsocketCompression = false
9
 
10
  [browser]
11
  gatherUsageStats = false
Dockerfile CHANGED
@@ -9,7 +9,9 @@ ENV PYTHONUNBUFFERED=1 \
9
  XDG_CACHE_HOME=/app/.cache \
10
  PYTHONIOENCODING=utf-8 \
11
  TOKENIZERS_PARALLELISM=false \
12
- HF_HUB_DISABLE_SYMLINKS_WARNING=1
 
 
13
 
14
  WORKDIR /app
15
 
@@ -29,10 +31,12 @@ RUN apt-get update && \
29
  && rm -rf /var/lib/apt/lists/*
30
 
31
  # Create necessary directories with proper permissions
32
- RUN mkdir -p /app/tmp_model /tmp/matplotlib /app/uploads /app/.cache/huggingface /app/.streamlit /app/.config /root/.cache/huggingface
33
- RUN chmod -R 777 /app/uploads /app/tmp_model /tmp/matplotlib /app/.cache /app/.streamlit /app/.config /root/.cache
34
  # Create symbolic link to ensure both user and root can access cache
35
  RUN ln -sf /app/.cache/huggingface /root/.cache/huggingface
 
 
36
 
37
  # Copy requirements first (for better caching)
38
  COPY requirements.txt .
@@ -52,9 +56,10 @@ RUN mkdir -p .streamlit && \
52
  echo 'port = 8501' >> .streamlit/config.toml && \
53
  echo 'address = "0.0.0.0"' >> .streamlit/config.toml && \
54
  echo 'headless = true' >> .streamlit/config.toml && \
55
- echo 'enableCORS = false' >> .streamlit/config.toml && \
56
- echo 'maxUploadSize = 200' >> .streamlit/config.toml && \
57
  echo 'enableXsrfProtection = false' >> .streamlit/config.toml && \
 
58
  echo '' >> .streamlit/config.toml && \
59
  echo '[browser]' >> .streamlit/config.toml && \
60
  echo 'gatherUsageStats = false' >> .streamlit/config.toml && \
 
9
  XDG_CACHE_HOME=/app/.cache \
10
  PYTHONIOENCODING=utf-8 \
11
  TOKENIZERS_PARALLELISM=false \
12
+ HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
13
+ STREAMLIT_SERVER_MAX_UPLOAD_SIZE=150 \
14
+ STREAMLIT_CLIENT_TOOLCHAIN=vite
15
 
16
  WORKDIR /app
17
 
 
31
  && rm -rf /var/lib/apt/lists/*
32
 
33
  # Create necessary directories with proper permissions
34
+ RUN mkdir -p /app/tmp_model /tmp/matplotlib /app/uploads /app/.cache/huggingface /app/.streamlit /app/.config /root/.cache/huggingface /tmp/streamlit_uploads
35
+ RUN chmod -R 777 /app/uploads /app/tmp_model /tmp/matplotlib /app/.cache /app/.streamlit /app/.config /root/.cache /tmp/streamlit_uploads
36
  # Create symbolic link to ensure both user and root can access cache
37
  RUN ln -sf /app/.cache/huggingface /root/.cache/huggingface
38
+ # Use alternative uploads directory with guaranteed permissions
39
+ ENV STREAMLIT_UPLOADS_PATH=/tmp/streamlit_uploads
40
 
41
  # Copy requirements first (for better caching)
42
  COPY requirements.txt .
 
56
  echo 'port = 8501' >> .streamlit/config.toml && \
57
  echo 'address = "0.0.0.0"' >> .streamlit/config.toml && \
58
  echo 'headless = true' >> .streamlit/config.toml && \
59
+ echo 'enableCORS = true' >> .streamlit/config.toml && \
60
+ echo 'maxUploadSize = 150' >> .streamlit/config.toml && \
61
  echo 'enableXsrfProtection = false' >> .streamlit/config.toml && \
62
+ echo 'enableWebsocketCompression = false' >> .streamlit/config.toml && \
63
  echo '' >> .streamlit/config.toml && \
64
  echo '[browser]' >> .streamlit/config.toml && \
65
  echo 'gatherUsageStats = false' >> .streamlit/config.toml && \
src/streamlit_app.py CHANGED
@@ -498,10 +498,25 @@ def process_uploaded_audio(file_input):
498
  # Create a unique filename based on timestamp
499
  timestamp = str(int(time.time()))
500
 
501
- # Create an uploads directory if it doesn't exist - we'll need this regardless
502
  uploads_dir = os.path.join(os.getcwd(), "uploads")
503
  os.makedirs(uploads_dir, exist_ok=True)
504
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  # Handle different input types
506
  if isinstance(file_input, str):
507
  # If it's already a file path
@@ -513,22 +528,55 @@ def process_uploaded_audio(file_input):
513
  file_extension = os.path.splitext(file_input.name)[1].lower()
514
 
515
  # Write the uploaded file to disk with proper extension in the uploads directory
516
- temp_input_path = os.path.join(uploads_dir, f"uploaded_audio_{timestamp}{file_extension}")
517
- with open(temp_input_path, "wb") as f:
518
- f.write(file_input.getbuffer())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
519
 
520
  # For MP4 files, extract the audio using ffmpeg
521
  if file_extension == ".mp4":
522
  st.info("Extracting audio from video file...")
523
  audio_path = os.path.join(uploads_dir, f"extracted_audio_{timestamp}.wav")
524
  try:
 
525
  subprocess.run(
526
- ['ffmpeg', '-i', temp_input_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', audio_path],
527
  check=True,
528
  capture_output=True
529
  )
530
- # Remove the original video file
531
- os.remove(temp_input_path)
 
 
532
  except subprocess.CalledProcessError as e:
533
  st.error(f"Error extracting audio: {e}")
534
  if e.stderr:
@@ -539,18 +587,37 @@ def process_uploaded_audio(file_input):
539
  if file_extension in [".mp3", ".m4a", ".ogg", ".flac"]:
540
  # Convert to WAV for better compatibility
541
  audio_path = os.path.join(uploads_dir, f"converted_audio_{timestamp}.wav")
 
542
  try:
543
- # Use a verbose ffmpeg command with detailed logging
544
  process = subprocess.run(
545
- ['ffmpeg', '-i', temp_input_path, '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le', '-y', audio_path],
 
 
 
 
 
 
 
 
546
  check=True,
547
  capture_output=True
548
  )
549
 
550
- # Verify the file was created
551
- if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
552
- st.warning("Conversion produced an empty file. Using original file.")
553
- audio_path = temp_input_path
 
 
 
 
 
 
 
 
 
 
554
 
555
  except subprocess.CalledProcessError as e:
556
  st.warning(f"Conversion warning: {e}")
@@ -561,6 +628,7 @@ def process_uploaded_audio(file_input):
561
  else:
562
  # For already WAV files, use them directly
563
  audio_path = temp_input_path
 
564
 
565
  detector = AccentDetector()
566
  results = detector.analyze_audio(audio_path)
@@ -775,13 +843,19 @@ with tab2:
775
 
776
  if uploaded_file is not None: # Show a preview of the audio
777
  st.markdown("#### Audio Preview:")
778
- st.audio(uploaded_file)
779
- st.markdown("#### Ready for Analysis")
780
- col1, col2 = st.columns([1, 3])
781
- with col1:
782
- analyze_button = st.button("Analyze Audio", type="primary", use_container_width=True)
783
- with col2:
784
- st.caption("Tip: 15-30 seconds of clear speech works best for accent detection")
 
 
 
 
 
 
785
 
786
  if analyze_button:
787
  with st.spinner("Analyzing audio... (this may take 15-30 seconds)"):
@@ -791,8 +865,7 @@ with tab2:
791
  if file_size_mb > 190: # Stay below the 200MB limit with some buffer
792
  st.error(f"File size ({file_size_mb:.1f}MB) is too large. Maximum allowed is 190MB.")
793
  st.info("Tip: Try trimming your audio to just the speech segment for better results.")
794
- else:
795
- # Create a progress bar to show processing stages
796
  progress_bar = st.progress(0)
797
 
798
  # Check the file type and inform user about processing steps
@@ -895,3 +968,60 @@ with st.expander("ℹ️ How It Works"):
895
 
896
  5. **Analysis Summary**: An explanation is generated describing accent characteristics relevant for hiring evaluations.
897
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  # Create a unique filename based on timestamp
499
  timestamp = str(int(time.time()))
500
 
501
+ # Create a deterministic uploads directory with full permissions
502
  uploads_dir = os.path.join(os.getcwd(), "uploads")
503
  os.makedirs(uploads_dir, exist_ok=True)
504
 
505
+ # Try Streamlit's own upload path first if available
506
+ streamlit_uploads_path = os.environ.get('STREAMLIT_UPLOADS_PATH')
507
+ if streamlit_uploads_path and os.path.isdir(streamlit_uploads_path):
508
+ uploads_dir = streamlit_uploads_path
509
+ st.info(f"Using Streamlit's upload directory: {uploads_dir}")
510
+
511
+ # Make sure uploads directory has proper permissions
512
+ try:
513
+ os.chmod(uploads_dir, 0o777) # Full permissions
514
+ except Exception as chmod_error:
515
+ st.warning(f"Could not set permissions on uploads directory: {str(chmod_error)}. Continuing anyway.")
516
+
517
+ # Log upload dir info for debugging
518
+ st.info(f"Upload directory: {uploads_dir} (exists: {os.path.exists(uploads_dir)}, writable: {os.access(uploads_dir, os.W_OK)})")
519
+
520
  # Handle different input types
521
  if isinstance(file_input, str):
522
  # If it's already a file path
 
528
  file_extension = os.path.splitext(file_input.name)[1].lower()
529
 
530
  # Write the uploaded file to disk with proper extension in the uploads directory
531
+ # Use a unique filename to avoid conflicts
532
+ safe_filename = ''.join(c if c.isalnum() or c in '._- ' else '_' for c in file_input.name)
533
+ temp_input_path = os.path.join(uploads_dir, f"uploaded_{timestamp}_{safe_filename}")
534
+
535
+ st.info(f"Saving uploaded file to: {temp_input_path}")
536
+
537
+ try:
538
+ # Write in chunks to handle large files better
539
+ chunk_size = 1024 * 1024 # 1MB chunks
540
+ buffer = file_input.getbuffer()
541
+ with open(temp_input_path, "wb") as f:
542
+ for i in range(0, len(buffer), chunk_size):
543
+ f.write(buffer[i:i+chunk_size])
544
+
545
+ # Verify file was written properly
546
+ if os.path.exists(temp_input_path):
547
+ file_size = os.path.getsize(temp_input_path)
548
+ st.success(f"File saved successfully: {file_size} bytes")
549
+ else:
550
+ st.error(f"Failed to save file - file doesn't exist after writing")
551
+ except Exception as write_error:
552
+ st.error(f"Error writing uploaded file: {str(write_error)}")
553
+ # Try alternative temp directory as fallback
554
+ try:
555
+ import tempfile
556
+ temp_dir = tempfile.gettempdir()
557
+ temp_input_path = os.path.join(temp_dir, f"uploaded_{timestamp}_{safe_filename}")
558
+ st.warning(f"Trying alternative location: {temp_input_path}")
559
+ with open(temp_input_path, "wb") as f:
560
+ f.write(file_input.getbuffer())
561
+ except Exception as alt_write_error:
562
+ st.error(f"Alternative write also failed: {str(alt_write_error)}")
563
+ raise
564
 
565
  # For MP4 files, extract the audio using ffmpeg
566
  if file_extension == ".mp4":
567
  st.info("Extracting audio from video file...")
568
  audio_path = os.path.join(uploads_dir, f"extracted_audio_{timestamp}.wav")
569
  try:
570
+ # Add -y flag to overwrite output file if it exists
571
  subprocess.run(
572
+ ['ffmpeg', '-y', '-i', temp_input_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', audio_path],
573
  check=True,
574
  capture_output=True
575
  )
576
+ st.success(f"Audio extracted successfully to {audio_path}")
577
+ # Remove the original video file if extraction was successful
578
+ if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
579
+ os.remove(temp_input_path)
580
  except subprocess.CalledProcessError as e:
581
  st.error(f"Error extracting audio: {e}")
582
  if e.stderr:
 
587
  if file_extension in [".mp3", ".m4a", ".ogg", ".flac"]:
588
  # Convert to WAV for better compatibility
589
  audio_path = os.path.join(uploads_dir, f"converted_audio_{timestamp}.wav")
590
+ st.info(f"Converting {file_extension} to WAV format for analysis...")
591
  try:
592
+ # Use a verbose ffmpeg command with more options for compatibility
593
  process = subprocess.run(
594
+ [
595
+ 'ffmpeg', '-y', '-i', temp_input_path,
596
+ '-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le',
597
+ # Add error handling flags
598
+ '-err_detect', 'ignore_err',
599
+ # Add buffers for better handling
600
+ '-analyzeduration', '10000000', '-probesize', '10000000',
601
+ audio_path
602
+ ],
603
  check=True,
604
  capture_output=True
605
  )
606
 
607
+ # Verify the file was created successfully
608
+ if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
609
+ st.success(f"Audio converted successfully: {os.path.getsize(audio_path)} bytes")
610
+ # If conversion was successful, remove the original file to save space
611
+ os.remove(temp_input_path)
612
+ else:
613
+ st.warning("Conversion produced an empty file. Trying fallback conversion method...")
614
+ # Try alternative conversion method - simpler command
615
+ fallback_cmd = ['ffmpeg', '-y', '-i', temp_input_path, audio_path]
616
+ subprocess.run(fallback_cmd, check=True, capture_output=True)
617
+
618
+ if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
619
+ st.warning("Fallback conversion also failed. Using original file.")
620
+ audio_path = temp_input_path
621
 
622
  except subprocess.CalledProcessError as e:
623
  st.warning(f"Conversion warning: {e}")
 
628
  else:
629
  # For already WAV files, use them directly
630
  audio_path = temp_input_path
631
+ st.info(f"Using WAV file directly: {audio_path}")
632
 
633
  detector = AccentDetector()
634
  results = detector.analyze_audio(audio_path)
 
843
 
844
  if uploaded_file is not None: # Show a preview of the audio
845
  st.markdown("#### Audio Preview:")
846
+ try:
847
+ st.audio(uploaded_file)
848
+ st.markdown("#### Ready for Analysis")
849
+ col1, col2 = st.columns([1, 3])
850
+ with col1:
851
+ analyze_button = st.button("Analyze Audio", type="primary", use_container_width=True)
852
+ with col2:
853
+ st.caption("Tip: 15-30 seconds of clear speech works best for accent detection")
854
+ except Exception as preview_error:
855
+ st.warning(f"Could not preview audio: {str(preview_error)}")
856
+ # If preview fails, still allow analysis
857
+ analyze_button = st.button("Analyze Audio (Preview Failed)", type="primary")
858
+ st.caption("Proceeding with analysis might still work even if preview failed")
859
 
860
  if analyze_button:
861
  with st.spinner("Analyzing audio... (this may take 15-30 seconds)"):
 
865
  if file_size_mb > 190: # Stay below the 200MB limit with some buffer
866
  st.error(f"File size ({file_size_mb:.1f}MB) is too large. Maximum allowed is 190MB.")
867
  st.info("Tip: Try trimming your audio to just the speech segment for better results.")
868
+ else: # Create a progress bar to show processing stages
 
869
  progress_bar = st.progress(0)
870
 
871
  # Check the file type and inform user about processing steps
 
968
 
969
  5. **Analysis Summary**: An explanation is generated describing accent characteristics relevant for hiring evaluations.
970
  """)
971
+
972
+ # Add debug function for troubleshooting HTTP errors
973
+ def debug_http_errors():
974
+ """Print debug information for HTTP errors"""
975
+ st.warning("⚠️ HTTP 400 Error Debugging Mode")
976
+ st.markdown("""
977
+ ### Common HTTP 400 Error Causes:
978
+ 1. **File size exceeds limits** (current limit: 150MB)
979
+ 2. **File format incompatibility**
980
+ 3. **Network interruption** during upload
981
+ 4. **Server-side timeout** during processing
982
+ 5. **Permissions issues** in container
983
+ """)
984
+
985
+ # Show environment info
986
+ st.subheader("Environment Information")
987
+ env_info = {
988
+ "STREAMLIT_UPLOADS_PATH": os.environ.get("STREAMLIT_UPLOADS_PATH", "Not set"),
989
+ "STREAMLIT_SERVER_MAX_UPLOAD_SIZE": os.environ.get("STREAMLIT_SERVER_MAX_UPLOAD_SIZE", "Not set"),
990
+ "Current directory": os.getcwd(),
991
+ "Python version": sys.version
992
+ }
993
+
994
+ for key, value in env_info.items():
995
+ st.code(f"{key}: {value}")
996
+
997
+ # Check if uploads directory is writable
998
+ uploads_dir = os.environ.get("STREAMLIT_UPLOADS_PATH", os.path.join(os.getcwd(), "uploads"))
999
+ os.makedirs(uploads_dir, exist_ok=True)
1000
+
1001
+ try:
1002
+ test_file = os.path.join(uploads_dir, "test_write.txt")
1003
+ with open(test_file, "w") as f:
1004
+ f.write("Test write permission")
1005
+ os.remove(test_file)
1006
+ st.success(f"✓ Upload directory is writable: {uploads_dir}")
1007
+ except Exception as e:
1008
+ st.error(f"✗ Cannot write to upload directory: {str(e)}")
1009
+
1010
+ # Test ffmpeg
1011
+ try:
1012
+ result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
1013
+ st.success(f"✓ FFmpeg is available")
1014
+ except Exception as e:
1015
+ st.error(f"✗ FFmpeg error: {str(e)}")
1016
+
1017
+ # Add debug mode flag to the app
1018
+ debug_mode = False
1019
+ with st.expander("🔧 Troubleshooting Tools"):
1020
+ debug_mode = st.checkbox("Enable Debug Mode for HTTP 400 Errors")
1021
+ if debug_mode:
1022
+ debug_http_errors()
1023
+
1024
+ # Add option for user to try different upload method
1025
+ alt_upload = st.checkbox("Use alternative upload method (for HTTP 400 errors)")
1026
+ if alt_upload:
1027
+ st.info("Using alternative upload method that may bypass some HTTP 400 errors")