Spaces:
Sleeping
Sleeping
Update Streamlit app and Dockerfile; enable CORS, adjust max upload size, and enhance upload directory handling with improved error logging and debugging features
Browse files- .streamlit/config.toml +3 -2
- Dockerfile +10 -5
- src/streamlit_app.py +152 -22
.streamlit/config.toml
CHANGED
@@ -2,9 +2,10 @@
|
|
2 |
port = 8501
|
3 |
address = "0.0.0.0"
|
4 |
headless = true
|
5 |
-
enableCORS =
|
6 |
-
maxUploadSize =
|
7 |
enableXsrfProtection = false
|
|
|
8 |
|
9 |
[browser]
|
10 |
gatherUsageStats = false
|
|
|
2 |
port = 8501
|
3 |
address = "0.0.0.0"
|
4 |
headless = true
|
5 |
+
enableCORS = true
|
6 |
+
maxUploadSize = 150
|
7 |
enableXsrfProtection = false
|
8 |
+
enableWebsocketCompression = false
|
9 |
|
10 |
[browser]
|
11 |
gatherUsageStats = false
|
Dockerfile
CHANGED
@@ -9,7 +9,9 @@ ENV PYTHONUNBUFFERED=1 \
|
|
9 |
XDG_CACHE_HOME=/app/.cache \
|
10 |
PYTHONIOENCODING=utf-8 \
|
11 |
TOKENIZERS_PARALLELISM=false \
|
12 |
-
HF_HUB_DISABLE_SYMLINKS_WARNING=1
|
|
|
|
|
13 |
|
14 |
WORKDIR /app
|
15 |
|
@@ -29,10 +31,12 @@ RUN apt-get update && \
|
|
29 |
&& rm -rf /var/lib/apt/lists/*
|
30 |
|
31 |
# Create necessary directories with proper permissions
|
32 |
-
RUN mkdir -p /app/tmp_model /tmp/matplotlib /app/uploads /app/.cache/huggingface /app/.streamlit /app/.config /root/.cache/huggingface
|
33 |
-
RUN chmod -R 777 /app/uploads /app/tmp_model /tmp/matplotlib /app/.cache /app/.streamlit /app/.config /root/.cache
|
34 |
# Create symbolic link to ensure both user and root can access cache
|
35 |
RUN ln -sf /app/.cache/huggingface /root/.cache/huggingface
|
|
|
|
|
36 |
|
37 |
# Copy requirements first (for better caching)
|
38 |
COPY requirements.txt .
|
@@ -52,9 +56,10 @@ RUN mkdir -p .streamlit && \
|
|
52 |
echo 'port = 8501' >> .streamlit/config.toml && \
|
53 |
echo 'address = "0.0.0.0"' >> .streamlit/config.toml && \
|
54 |
echo 'headless = true' >> .streamlit/config.toml && \
|
55 |
-
echo 'enableCORS =
|
56 |
-
echo 'maxUploadSize =
|
57 |
echo 'enableXsrfProtection = false' >> .streamlit/config.toml && \
|
|
|
58 |
echo '' >> .streamlit/config.toml && \
|
59 |
echo '[browser]' >> .streamlit/config.toml && \
|
60 |
echo 'gatherUsageStats = false' >> .streamlit/config.toml && \
|
|
|
9 |
XDG_CACHE_HOME=/app/.cache \
|
10 |
PYTHONIOENCODING=utf-8 \
|
11 |
TOKENIZERS_PARALLELISM=false \
|
12 |
+
HF_HUB_DISABLE_SYMLINKS_WARNING=1 \
|
13 |
+
STREAMLIT_SERVER_MAX_UPLOAD_SIZE=150 \
|
14 |
+
STREAMLIT_CLIENT_TOOLCHAIN=vite
|
15 |
|
16 |
WORKDIR /app
|
17 |
|
|
|
31 |
&& rm -rf /var/lib/apt/lists/*
|
32 |
|
33 |
# Create necessary directories with proper permissions
|
34 |
+
RUN mkdir -p /app/tmp_model /tmp/matplotlib /app/uploads /app/.cache/huggingface /app/.streamlit /app/.config /root/.cache/huggingface /tmp/streamlit_uploads
|
35 |
+
RUN chmod -R 777 /app/uploads /app/tmp_model /tmp/matplotlib /app/.cache /app/.streamlit /app/.config /root/.cache /tmp/streamlit_uploads
|
36 |
# Create symbolic link to ensure both user and root can access cache
|
37 |
RUN ln -sf /app/.cache/huggingface /root/.cache/huggingface
|
38 |
+
# Use alternative uploads directory with guaranteed permissions
|
39 |
+
ENV STREAMLIT_UPLOADS_PATH=/tmp/streamlit_uploads
|
40 |
|
41 |
# Copy requirements first (for better caching)
|
42 |
COPY requirements.txt .
|
|
|
56 |
echo 'port = 8501' >> .streamlit/config.toml && \
|
57 |
echo 'address = "0.0.0.0"' >> .streamlit/config.toml && \
|
58 |
echo 'headless = true' >> .streamlit/config.toml && \
|
59 |
+
echo 'enableCORS = true' >> .streamlit/config.toml && \
|
60 |
+
echo 'maxUploadSize = 150' >> .streamlit/config.toml && \
|
61 |
echo 'enableXsrfProtection = false' >> .streamlit/config.toml && \
|
62 |
+
echo 'enableWebsocketCompression = false' >> .streamlit/config.toml && \
|
63 |
echo '' >> .streamlit/config.toml && \
|
64 |
echo '[browser]' >> .streamlit/config.toml && \
|
65 |
echo 'gatherUsageStats = false' >> .streamlit/config.toml && \
|
src/streamlit_app.py
CHANGED
@@ -498,10 +498,25 @@ def process_uploaded_audio(file_input):
|
|
498 |
# Create a unique filename based on timestamp
|
499 |
timestamp = str(int(time.time()))
|
500 |
|
501 |
-
# Create
|
502 |
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
503 |
os.makedirs(uploads_dir, exist_ok=True)
|
504 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
505 |
# Handle different input types
|
506 |
if isinstance(file_input, str):
|
507 |
# If it's already a file path
|
@@ -513,22 +528,55 @@ def process_uploaded_audio(file_input):
|
|
513 |
file_extension = os.path.splitext(file_input.name)[1].lower()
|
514 |
|
515 |
# Write the uploaded file to disk with proper extension in the uploads directory
|
516 |
-
|
517 |
-
|
518 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
519 |
|
520 |
# For MP4 files, extract the audio using ffmpeg
|
521 |
if file_extension == ".mp4":
|
522 |
st.info("Extracting audio from video file...")
|
523 |
audio_path = os.path.join(uploads_dir, f"extracted_audio_{timestamp}.wav")
|
524 |
try:
|
|
|
525 |
subprocess.run(
|
526 |
-
['ffmpeg', '-i', temp_input_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', audio_path],
|
527 |
check=True,
|
528 |
capture_output=True
|
529 |
)
|
530 |
-
|
531 |
-
|
|
|
|
|
532 |
except subprocess.CalledProcessError as e:
|
533 |
st.error(f"Error extracting audio: {e}")
|
534 |
if e.stderr:
|
@@ -539,18 +587,37 @@ def process_uploaded_audio(file_input):
|
|
539 |
if file_extension in [".mp3", ".m4a", ".ogg", ".flac"]:
|
540 |
# Convert to WAV for better compatibility
|
541 |
audio_path = os.path.join(uploads_dir, f"converted_audio_{timestamp}.wav")
|
|
|
542 |
try:
|
543 |
-
# Use a verbose ffmpeg command with
|
544 |
process = subprocess.run(
|
545 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
check=True,
|
547 |
capture_output=True
|
548 |
)
|
549 |
|
550 |
-
# Verify the file was created
|
551 |
-
if
|
552 |
-
st.
|
553 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
554 |
|
555 |
except subprocess.CalledProcessError as e:
|
556 |
st.warning(f"Conversion warning: {e}")
|
@@ -561,6 +628,7 @@ def process_uploaded_audio(file_input):
|
|
561 |
else:
|
562 |
# For already WAV files, use them directly
|
563 |
audio_path = temp_input_path
|
|
|
564 |
|
565 |
detector = AccentDetector()
|
566 |
results = detector.analyze_audio(audio_path)
|
@@ -775,13 +843,19 @@ with tab2:
|
|
775 |
|
776 |
if uploaded_file is not None: # Show a preview of the audio
|
777 |
st.markdown("#### Audio Preview:")
|
778 |
-
|
779 |
-
|
780 |
-
|
781 |
-
|
782 |
-
|
783 |
-
|
784 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
785 |
|
786 |
if analyze_button:
|
787 |
with st.spinner("Analyzing audio... (this may take 15-30 seconds)"):
|
@@ -791,8 +865,7 @@ with tab2:
|
|
791 |
if file_size_mb > 190: # Stay below the 200MB limit with some buffer
|
792 |
st.error(f"File size ({file_size_mb:.1f}MB) is too large. Maximum allowed is 190MB.")
|
793 |
st.info("Tip: Try trimming your audio to just the speech segment for better results.")
|
794 |
-
else:
|
795 |
-
# Create a progress bar to show processing stages
|
796 |
progress_bar = st.progress(0)
|
797 |
|
798 |
# Check the file type and inform user about processing steps
|
@@ -895,3 +968,60 @@ with st.expander("ℹ️ How It Works"):
|
|
895 |
|
896 |
5. **Analysis Summary**: An explanation is generated describing accent characteristics relevant for hiring evaluations.
|
897 |
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
498 |
# Create a unique filename based on timestamp
|
499 |
timestamp = str(int(time.time()))
|
500 |
|
501 |
+
# Create a deterministic uploads directory with full permissions
|
502 |
uploads_dir = os.path.join(os.getcwd(), "uploads")
|
503 |
os.makedirs(uploads_dir, exist_ok=True)
|
504 |
|
505 |
+
# Try Streamlit's own upload path first if available
|
506 |
+
streamlit_uploads_path = os.environ.get('STREAMLIT_UPLOADS_PATH')
|
507 |
+
if streamlit_uploads_path and os.path.isdir(streamlit_uploads_path):
|
508 |
+
uploads_dir = streamlit_uploads_path
|
509 |
+
st.info(f"Using Streamlit's upload directory: {uploads_dir}")
|
510 |
+
|
511 |
+
# Make sure uploads directory has proper permissions
|
512 |
+
try:
|
513 |
+
os.chmod(uploads_dir, 0o777) # Full permissions
|
514 |
+
except Exception as chmod_error:
|
515 |
+
st.warning(f"Could not set permissions on uploads directory: {str(chmod_error)}. Continuing anyway.")
|
516 |
+
|
517 |
+
# Log upload dir info for debugging
|
518 |
+
st.info(f"Upload directory: {uploads_dir} (exists: {os.path.exists(uploads_dir)}, writable: {os.access(uploads_dir, os.W_OK)})")
|
519 |
+
|
520 |
# Handle different input types
|
521 |
if isinstance(file_input, str):
|
522 |
# If it's already a file path
|
|
|
528 |
file_extension = os.path.splitext(file_input.name)[1].lower()
|
529 |
|
530 |
# Write the uploaded file to disk with proper extension in the uploads directory
|
531 |
+
# Use a unique filename to avoid conflicts
|
532 |
+
safe_filename = ''.join(c if c.isalnum() or c in '._- ' else '_' for c in file_input.name)
|
533 |
+
temp_input_path = os.path.join(uploads_dir, f"uploaded_{timestamp}_{safe_filename}")
|
534 |
+
|
535 |
+
st.info(f"Saving uploaded file to: {temp_input_path}")
|
536 |
+
|
537 |
+
try:
|
538 |
+
# Write in chunks to handle large files better
|
539 |
+
chunk_size = 1024 * 1024 # 1MB chunks
|
540 |
+
buffer = file_input.getbuffer()
|
541 |
+
with open(temp_input_path, "wb") as f:
|
542 |
+
for i in range(0, len(buffer), chunk_size):
|
543 |
+
f.write(buffer[i:i+chunk_size])
|
544 |
+
|
545 |
+
# Verify file was written properly
|
546 |
+
if os.path.exists(temp_input_path):
|
547 |
+
file_size = os.path.getsize(temp_input_path)
|
548 |
+
st.success(f"File saved successfully: {file_size} bytes")
|
549 |
+
else:
|
550 |
+
st.error(f"Failed to save file - file doesn't exist after writing")
|
551 |
+
except Exception as write_error:
|
552 |
+
st.error(f"Error writing uploaded file: {str(write_error)}")
|
553 |
+
# Try alternative temp directory as fallback
|
554 |
+
try:
|
555 |
+
import tempfile
|
556 |
+
temp_dir = tempfile.gettempdir()
|
557 |
+
temp_input_path = os.path.join(temp_dir, f"uploaded_{timestamp}_{safe_filename}")
|
558 |
+
st.warning(f"Trying alternative location: {temp_input_path}")
|
559 |
+
with open(temp_input_path, "wb") as f:
|
560 |
+
f.write(file_input.getbuffer())
|
561 |
+
except Exception as alt_write_error:
|
562 |
+
st.error(f"Alternative write also failed: {str(alt_write_error)}")
|
563 |
+
raise
|
564 |
|
565 |
# For MP4 files, extract the audio using ffmpeg
|
566 |
if file_extension == ".mp4":
|
567 |
st.info("Extracting audio from video file...")
|
568 |
audio_path = os.path.join(uploads_dir, f"extracted_audio_{timestamp}.wav")
|
569 |
try:
|
570 |
+
# Add -y flag to overwrite output file if it exists
|
571 |
subprocess.run(
|
572 |
+
['ffmpeg', '-y', '-i', temp_input_path, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', '-ac', '1', audio_path],
|
573 |
check=True,
|
574 |
capture_output=True
|
575 |
)
|
576 |
+
st.success(f"Audio extracted successfully to {audio_path}")
|
577 |
+
# Remove the original video file if extraction was successful
|
578 |
+
if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
|
579 |
+
os.remove(temp_input_path)
|
580 |
except subprocess.CalledProcessError as e:
|
581 |
st.error(f"Error extracting audio: {e}")
|
582 |
if e.stderr:
|
|
|
587 |
if file_extension in [".mp3", ".m4a", ".ogg", ".flac"]:
|
588 |
# Convert to WAV for better compatibility
|
589 |
audio_path = os.path.join(uploads_dir, f"converted_audio_{timestamp}.wav")
|
590 |
+
st.info(f"Converting {file_extension} to WAV format for analysis...")
|
591 |
try:
|
592 |
+
# Use a verbose ffmpeg command with more options for compatibility
|
593 |
process = subprocess.run(
|
594 |
+
[
|
595 |
+
'ffmpeg', '-y', '-i', temp_input_path,
|
596 |
+
'-ar', '16000', '-ac', '1', '-c:a', 'pcm_s16le',
|
597 |
+
# Add error handling flags
|
598 |
+
'-err_detect', 'ignore_err',
|
599 |
+
# Add buffers for better handling
|
600 |
+
'-analyzeduration', '10000000', '-probesize', '10000000',
|
601 |
+
audio_path
|
602 |
+
],
|
603 |
check=True,
|
604 |
capture_output=True
|
605 |
)
|
606 |
|
607 |
+
# Verify the file was created successfully
|
608 |
+
if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
|
609 |
+
st.success(f"Audio converted successfully: {os.path.getsize(audio_path)} bytes")
|
610 |
+
# If conversion was successful, remove the original file to save space
|
611 |
+
os.remove(temp_input_path)
|
612 |
+
else:
|
613 |
+
st.warning("Conversion produced an empty file. Trying fallback conversion method...")
|
614 |
+
# Try alternative conversion method - simpler command
|
615 |
+
fallback_cmd = ['ffmpeg', '-y', '-i', temp_input_path, audio_path]
|
616 |
+
subprocess.run(fallback_cmd, check=True, capture_output=True)
|
617 |
+
|
618 |
+
if not os.path.exists(audio_path) or os.path.getsize(audio_path) == 0:
|
619 |
+
st.warning("Fallback conversion also failed. Using original file.")
|
620 |
+
audio_path = temp_input_path
|
621 |
|
622 |
except subprocess.CalledProcessError as e:
|
623 |
st.warning(f"Conversion warning: {e}")
|
|
|
628 |
else:
|
629 |
# For already WAV files, use them directly
|
630 |
audio_path = temp_input_path
|
631 |
+
st.info(f"Using WAV file directly: {audio_path}")
|
632 |
|
633 |
detector = AccentDetector()
|
634 |
results = detector.analyze_audio(audio_path)
|
|
|
843 |
|
844 |
if uploaded_file is not None: # Show a preview of the audio
|
845 |
st.markdown("#### Audio Preview:")
|
846 |
+
try:
|
847 |
+
st.audio(uploaded_file)
|
848 |
+
st.markdown("#### Ready for Analysis")
|
849 |
+
col1, col2 = st.columns([1, 3])
|
850 |
+
with col1:
|
851 |
+
analyze_button = st.button("Analyze Audio", type="primary", use_container_width=True)
|
852 |
+
with col2:
|
853 |
+
st.caption("Tip: 15-30 seconds of clear speech works best for accent detection")
|
854 |
+
except Exception as preview_error:
|
855 |
+
st.warning(f"Could not preview audio: {str(preview_error)}")
|
856 |
+
# If preview fails, still allow analysis
|
857 |
+
analyze_button = st.button("Analyze Audio (Preview Failed)", type="primary")
|
858 |
+
st.caption("Proceeding with analysis might still work even if preview failed")
|
859 |
|
860 |
if analyze_button:
|
861 |
with st.spinner("Analyzing audio... (this may take 15-30 seconds)"):
|
|
|
865 |
if file_size_mb > 190: # Stay below the 200MB limit with some buffer
|
866 |
st.error(f"File size ({file_size_mb:.1f}MB) is too large. Maximum allowed is 190MB.")
|
867 |
st.info("Tip: Try trimming your audio to just the speech segment for better results.")
|
868 |
+
else: # Create a progress bar to show processing stages
|
|
|
869 |
progress_bar = st.progress(0)
|
870 |
|
871 |
# Check the file type and inform user about processing steps
|
|
|
968 |
|
969 |
5. **Analysis Summary**: An explanation is generated describing accent characteristics relevant for hiring evaluations.
|
970 |
""")
|
971 |
+
|
972 |
+
# Add debug function for troubleshooting HTTP errors
|
973 |
+
def debug_http_errors():
|
974 |
+
"""Print debug information for HTTP errors"""
|
975 |
+
st.warning("⚠️ HTTP 400 Error Debugging Mode")
|
976 |
+
st.markdown("""
|
977 |
+
### Common HTTP 400 Error Causes:
|
978 |
+
1. **File size exceeds limits** (current limit: 150MB)
|
979 |
+
2. **File format incompatibility**
|
980 |
+
3. **Network interruption** during upload
|
981 |
+
4. **Server-side timeout** during processing
|
982 |
+
5. **Permissions issues** in container
|
983 |
+
""")
|
984 |
+
|
985 |
+
# Show environment info
|
986 |
+
st.subheader("Environment Information")
|
987 |
+
env_info = {
|
988 |
+
"STREAMLIT_UPLOADS_PATH": os.environ.get("STREAMLIT_UPLOADS_PATH", "Not set"),
|
989 |
+
"STREAMLIT_SERVER_MAX_UPLOAD_SIZE": os.environ.get("STREAMLIT_SERVER_MAX_UPLOAD_SIZE", "Not set"),
|
990 |
+
"Current directory": os.getcwd(),
|
991 |
+
"Python version": sys.version
|
992 |
+
}
|
993 |
+
|
994 |
+
for key, value in env_info.items():
|
995 |
+
st.code(f"{key}: {value}")
|
996 |
+
|
997 |
+
# Check if uploads directory is writable
|
998 |
+
uploads_dir = os.environ.get("STREAMLIT_UPLOADS_PATH", os.path.join(os.getcwd(), "uploads"))
|
999 |
+
os.makedirs(uploads_dir, exist_ok=True)
|
1000 |
+
|
1001 |
+
try:
|
1002 |
+
test_file = os.path.join(uploads_dir, "test_write.txt")
|
1003 |
+
with open(test_file, "w") as f:
|
1004 |
+
f.write("Test write permission")
|
1005 |
+
os.remove(test_file)
|
1006 |
+
st.success(f"✓ Upload directory is writable: {uploads_dir}")
|
1007 |
+
except Exception as e:
|
1008 |
+
st.error(f"✗ Cannot write to upload directory: {str(e)}")
|
1009 |
+
|
1010 |
+
# Test ffmpeg
|
1011 |
+
try:
|
1012 |
+
result = subprocess.run(["ffmpeg", "-version"], capture_output=True, text=True)
|
1013 |
+
st.success(f"✓ FFmpeg is available")
|
1014 |
+
except Exception as e:
|
1015 |
+
st.error(f"✗ FFmpeg error: {str(e)}")
|
1016 |
+
|
1017 |
+
# Add debug mode flag to the app
|
1018 |
+
debug_mode = False
|
1019 |
+
with st.expander("🔧 Troubleshooting Tools"):
|
1020 |
+
debug_mode = st.checkbox("Enable Debug Mode for HTTP 400 Errors")
|
1021 |
+
if debug_mode:
|
1022 |
+
debug_http_errors()
|
1023 |
+
|
1024 |
+
# Add option for user to try different upload method
|
1025 |
+
alt_upload = st.checkbox("Use alternative upload method (for HTTP 400 errors)")
|
1026 |
+
if alt_upload:
|
1027 |
+
st.info("Using alternative upload method that may bypass some HTTP 400 errors")
|