amirjamali commited on
Commit
1b3a125
·
unverified ·
1 Parent(s): 7eff467

Refactor Dockerfile for improved dependency installation and update Streamlit app to handle SpeechBrain imports with fallbacks for better compatibility

Browse files
Files changed (3) hide show
  1. Dockerfile +12 -3
  2. requirements.txt +2 -1
  3. src/streamlit_app.py +55 -15
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM python:3.9-slim
2
 
3
  WORKDIR /app
4
 
@@ -34,9 +34,18 @@ ENV PIP_RETRIES=3
34
 
35
  # Copy requirements and install Python dependencies
36
  COPY requirements.txt ./
 
 
37
  RUN pip install --upgrade pip && \
38
- pip install --no-cache-dir -r requirements.txt || \
39
- (sleep 2 && pip install --no-cache-dir -r requirements.txt)
 
 
 
 
 
 
 
40
 
41
  # Copy source code
42
  COPY src/ ./src/
 
1
+ FROM python:3.9
2
 
3
  WORKDIR /app
4
 
 
34
 
35
  # Copy requirements and install Python dependencies
36
  COPY requirements.txt ./
37
+
38
+ # First install torch and torchaudio separately for better compatibility
39
  RUN pip install --upgrade pip && \
40
+ pip install torch==2.0.1 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cpu
41
+
42
+ # Then install the rest of the requirements with retries
43
+ RUN pip install --no-cache-dir -r requirements.txt || \
44
+ (sleep 2 && pip install --no-cache-dir -r requirements.txt) || \
45
+ (sleep 5 && pip install --no-cache-dir -r requirements.txt --use-deprecated=legacy-resolver)
46
+
47
+ # Install SpeechBrain directly using Git for better compatibility
48
+ RUN pip install git+https://github.com/speechbrain/speechbrain@v0.5.14
49
 
50
  # Copy source code
51
  COPY src/ ./src/
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  streamlit==1.31.0
2
  yt_dlp==2023.11.16
3
- speechbrain==0.5.15
 
4
  torch==2.0.1
5
  torchaudio==2.0.2
6
  # Pin transformers to version that has AutoProcessor
 
1
  streamlit==1.31.0
2
  yt_dlp==2023.11.16
3
+ # Use a specific stable version of SpeechBrain
4
+ speechbrain==0.5.14
5
  torch==2.0.1
6
  torchaudio==2.0.2
7
  # Pin transformers to version that has AutoProcessor
src/streamlit_app.py CHANGED
@@ -6,7 +6,30 @@ import librosa
6
  import numpy as np
7
  import torch
8
  import sys
9
- from speechbrain.inference.classifiers import EncoderClassifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  # Handle potential compatibility issues with transformers
11
  try:
12
  from transformers import AutoProcessor, AutoModelForAudioClassification
@@ -117,10 +140,19 @@ def extract_audio(video_path="video.mp4", audio_path="audio.wav"):
117
  class AccentDetector:
118
  def __init__(self):
119
  # Initialize the language identification model
120
- self.lang_id = EncoderClassifier.from_hparams(
121
- source="speechbrain/lang-id-commonlanguage_ecapa",
122
- savedir="tmp_model"
123
- )
 
 
 
 
 
 
 
 
 
124
  # Initialize the English accent classifier - using VoxLingua107 for now
125
  # In production, you'd use a more specialized accent model
126
  try:
@@ -133,24 +165,32 @@ class AccentDetector:
133
  # Fall back to using feature_extractor directly if AutoProcessor is not available
134
  from transformers import AutoFeatureExtractor
135
  self.processor = AutoFeatureExtractor.from_pretrained(self.model_name)
136
-
137
- self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
138
  self.have_accent_model = True
139
  except Exception as e:
140
  st.warning(f"Could not load accent model: {str(e)}")
141
  self.have_accent_model = False
142
-
143
  def is_english(self, audio_path, threshold=0.7):
144
  """
145
  Determine if the speech is English and return confidence score
146
  """
147
- out_prob, score, index, lang = self.lang_id.classify_file(audio_path)
148
- score = float(score)
149
-
150
- # Check if language is English (slightly fuzzy match)
151
- is_english = "eng" in lang.lower() or "en-" in lang.lower() or lang.lower() == "en"
152
-
153
- return is_english, lang, score
 
 
 
 
 
 
 
 
 
154
 
155
  def classify_accent(self, audio_path):
156
  """
 
6
  import numpy as np
7
  import torch
8
  import sys
9
+
10
+ # Global flag for SpeechBrain availability
11
+ HAS_SPEECHBRAIN = False
12
+
13
+ # Handle SpeechBrain import with fallbacks for different versions
14
+ try:
15
+ # Try the new path first (SpeechBrain 1.0+)
16
+ from speechbrain.inference.classifiers import EncoderClassifier
17
+ HAS_SPEECHBRAIN = True
18
+ except ImportError:
19
+ try:
20
+ # Try the legacy path
21
+ from speechbrain.pretrained.interfaces import EncoderClassifier
22
+ HAS_SPEECHBRAIN = True
23
+ except ImportError:
24
+ try:
25
+ # Try the very old path
26
+ from speechbrain.pretrained import EncoderClassifier
27
+ HAS_SPEECHBRAIN = True
28
+ except ImportError:
29
+ # If all fail, we'll handle this later in the code
30
+ st.error("⚠️ Unable to import SpeechBrain. Limited functionality available.")
31
+ EncoderClassifier = None
32
+
33
  # Handle potential compatibility issues with transformers
34
  try:
35
  from transformers import AutoProcessor, AutoModelForAudioClassification
 
140
  class AccentDetector:
141
  def __init__(self):
142
  # Initialize the language identification model
143
+ try:
144
+ if EncoderClassifier is not None:
145
+ self.lang_id = EncoderClassifier.from_hparams(
146
+ source="speechbrain/lang-id-commonlanguage_ecapa",
147
+ savedir="tmp_model"
148
+ )
149
+ self.have_lang_id = True
150
+ else:
151
+ st.error("SpeechBrain not available. Language identification disabled.")
152
+ self.have_lang_id = False
153
+ except Exception as e:
154
+ st.error(f"Error loading language ID model: {str(e)}")
155
+ self.have_lang_id = False
156
  # Initialize the English accent classifier - using VoxLingua107 for now
157
  # In production, you'd use a more specialized accent model
158
  try:
 
165
  # Fall back to using feature_extractor directly if AutoProcessor is not available
166
  from transformers import AutoFeatureExtractor
167
  self.processor = AutoFeatureExtractor.from_pretrained(self.model_name)
168
+ self.model = AutoModelForAudioClassification.from_pretrained(self.model_name)
 
169
  self.have_accent_model = True
170
  except Exception as e:
171
  st.warning(f"Could not load accent model: {str(e)}")
172
  self.have_accent_model = False
173
+
174
  def is_english(self, audio_path, threshold=0.7):
175
  """
176
  Determine if the speech is English and return confidence score
177
  """
178
+ if not hasattr(self, 'have_lang_id') or not self.have_lang_id:
179
+ # If language ID model is not available, assume English
180
+ st.warning("Language identification is not available. Assuming English speech.")
181
+ return True, "en", 1.0
182
+
183
+ try:
184
+ out_prob, score, index, lang = self.lang_id.classify_file(audio_path)
185
+ score = float(score)
186
+
187
+ # Check if language is English (slightly fuzzy match)
188
+ is_english = "eng" in lang.lower() or "en-" in lang.lower() or lang.lower() == "en"
189
+
190
+ return is_english, lang, score
191
+ except Exception as e:
192
+ st.warning(f"Error identifying language: {str(e)}. Assuming English speech.")
193
+ return True, "en", 0.5
194
 
195
  def classify_accent(self, audio_path):
196
  """