Spaces:

amirjamali
/

accent-detector

Sleeping

App Files Files Community

amirjamali commited on 20 days ago

Commit

1b3a125

unverified ·

1 Parent(s): 7eff467

Refactor Dockerfile for improved dependency installation and update Streamlit app to handle SpeechBrain imports with fallbacks for better compatibility

Browse files

Files changed (3) hide show

Dockerfile +12 -3
requirements.txt +2 -1
src/streamlit_app.py +55 -15

Dockerfile CHANGED Viewed

@@ -1,4 +1,4 @@
-FROM python:3.9-slim
 WORKDIR /app
@@ -34,9 +34,18 @@ ENV PIP_RETRIES=3
 # Copy requirements and install Python dependencies
 COPY requirements.txt ./
 RUN pip install --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt || \
-    (sleep 2 && pip install --no-cache-dir -r requirements.txt)
 # Copy source code
 COPY src/ ./src/

+FROM python:3.9
 WORKDIR /app
 # Copy requirements and install Python dependencies
 COPY requirements.txt ./
+# First install torch and torchaudio separately for better compatibility
 RUN pip install --upgrade pip && \
+    pip install torch==2.0.1 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cpu
+# Then install the rest of the requirements with retries
+RUN pip install --no-cache-dir -r requirements.txt || \
+    (sleep 2 && pip install --no-cache-dir -r requirements.txt) || \
+    (sleep 5 && pip install --no-cache-dir -r requirements.txt --use-deprecated=legacy-resolver)
+# Install SpeechBrain directly using Git for better compatibility
+RUN pip install git+https://github.com/speechbrain/speechbrain@v0.5.14
 # Copy source code
 COPY src/ ./src/

requirements.txt CHANGED Viewed

@@ -1,6 +1,7 @@
 streamlit==1.31.0
 yt_dlp==2023.11.16
-speechbrain==0.5.15
 torch==2.0.1
 torchaudio==2.0.2
 # Pin transformers to version that has AutoProcessor

 streamlit==1.31.0
 yt_dlp==2023.11.16
+# Use a specific stable version of SpeechBrain
+speechbrain==0.5.14
 torch==2.0.1
 torchaudio==2.0.2
 # Pin transformers to version that has AutoProcessor

src/streamlit_app.py CHANGED Viewed

@@ -6,7 +6,30 @@ import librosa
 import numpy as np
 import torch
 import sys
-from speechbrain.inference.classifiers import EncoderClassifier
 # Handle potential compatibility issues with transformers
 try:
     from transformers import AutoProcessor, AutoModelForAudioClassification
@@ -117,10 +140,19 @@ def extract_audio(video_path="video.mp4", audio_path="audio.wav"):
 class AccentDetector:
     def __init__(self):
         # Initialize the language identification model
-        self.lang_id = EncoderClassifier.from_hparams(
-            source="speechbrain/lang-id-commonlanguage_ecapa",
-            savedir="tmp_model"
-        )
           # Initialize the English accent classifier - using VoxLingua107 for now
         # In production, you'd use a more specialized accent model
         try:
@@ -133,24 +165,32 @@ class AccentDetector:
                 # Fall back to using feature_extractor directly if AutoProcessor is not available
                 from transformers import AutoFeatureExtractor
                 self.processor = AutoFeatureExtractor.from_pretrained(self.model_name)
-            self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
             self.have_accent_model = True
         except Exception as e:
             st.warning(f"Could not load accent model: {str(e)}")
             self.have_accent_model = False
     def is_english(self, audio_path, threshold=0.7):
         """
         Determine if the speech is English and return confidence score
         """
-        out_prob, score, index, lang = self.lang_id.classify_file(audio_path)
-        score = float(score)
-        # Check if language is English (slightly fuzzy match)
-        is_english = "eng" in lang.lower() or "en-" in lang.lower() or lang.lower() == "en"
-        return is_english, lang, score
     def classify_accent(self, audio_path):
         """

 import numpy as np
 import torch
 import sys
+# Global flag for SpeechBrain availability
+HAS_SPEECHBRAIN = False
+# Handle SpeechBrain import with fallbacks for different versions
+try:
+    # Try the new path first (SpeechBrain 1.0+)
+    from speechbrain.inference.classifiers import EncoderClassifier
+    HAS_SPEECHBRAIN = True
+except ImportError:
+    try:
+        # Try the legacy path
+        from speechbrain.pretrained.interfaces import EncoderClassifier
+        HAS_SPEECHBRAIN = True
+    except ImportError:
+        try:
+            # Try the very old path
+            from speechbrain.pretrained import EncoderClassifier
+            HAS_SPEECHBRAIN = True
+        except ImportError:
+            # If all fail, we'll handle this later in the code
+            st.error("⚠️ Unable to import SpeechBrain. Limited functionality available.")
+            EncoderClassifier = None
 # Handle potential compatibility issues with transformers
 try:
     from transformers import AutoProcessor, AutoModelForAudioClassification
 class AccentDetector:
     def __init__(self):
         # Initialize the language identification model
+        try:
+            if EncoderClassifier is not None:
+                self.lang_id = EncoderClassifier.from_hparams(
+                    source="speechbrain/lang-id-commonlanguage_ecapa",
+                    savedir="tmp_model"
+                )
+                self.have_lang_id = True
+            else:
+                st.error("SpeechBrain not available. Language identification disabled.")
+                self.have_lang_id = False
+        except Exception as e:
+            st.error(f"Error loading language ID model: {str(e)}")
+            self.have_lang_id = False
           # Initialize the English accent classifier - using VoxLingua107 for now
         # In production, you'd use a more specialized accent model
         try:
                 # Fall back to using feature_extractor directly if AutoProcessor is not available
                 from transformers import AutoFeatureExtractor
                 self.processor = AutoFeatureExtractor.from_pretrained(self.model_name)
+                  self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
             self.have_accent_model = True
         except Exception as e:
             st.warning(f"Could not load accent model: {str(e)}")
             self.have_accent_model = False
     def is_english(self, audio_path, threshold=0.7):
         """
         Determine if the speech is English and return confidence score
         """
+        if not hasattr(self, 'have_lang_id') or not self.have_lang_id:
+            # If language ID model is not available, assume English
+            st.warning("Language identification is not available. Assuming English speech.")
+            return True, "en", 1.0
+        try:
+            out_prob, score, index, lang = self.lang_id.classify_file(audio_path)
+            score = float(score)
+            # Check if language is English (slightly fuzzy match)
+            is_english = "eng" in lang.lower() or "en-" in lang.lower() or lang.lower() == "en"
+            return is_english, lang, score
+        except Exception as e:
+            st.warning(f"Error identifying language: {str(e)}. Assuming English speech.")
+            return True, "en", 0.5
     def classify_accent(self, audio_path):
         """