daihui.zhang commited on
Commit
1bf4992
·
1 Parent(s): 3a0633a

fix words missing

Browse files
pyproject.toml CHANGED
@@ -7,6 +7,7 @@ requires-python = ">=3.11"
7
  dependencies = [
8
  "av>=14.2.0",
9
  "fastapi>=0.115.12",
 
10
  "librosa>=0.11.0",
11
  "numpy>=2.1.3",
12
  "onnxruntime>=1.21.0",
 
7
  dependencies = [
8
  "av>=14.2.0",
9
  "fastapi>=0.115.12",
10
+ "funasr>=1.2.6",
11
  "librosa>=0.11.0",
12
  "numpy>=2.1.3",
13
  "onnxruntime>=1.21.0",
requirements.txt CHANGED
@@ -184,3 +184,4 @@ websockets==15.0.1
184
  # via trans (pyproject.toml)
185
  wordninja==2.0.0
186
  # via trans (pyproject.toml)
 
 
184
  # via trans (pyproject.toml)
185
  wordninja==2.0.0
186
  # via trans (pyproject.toml)
187
+ funasr==1.2.6
transcribe/whisper_llm_serve.py CHANGED
@@ -180,9 +180,13 @@ class WhisperTranscriptionService:
180
 
181
  while not self._translate_thread_stop.is_set():
182
  audio_buffer = self._vad_frame_queue.get()
183
- if audio_buffer is None or len(audio_buffer) < int(self.sample_rate):
184
  time.sleep(0.2)
185
  continue
 
 
 
 
186
 
187
  logger.debug(f"audio buffer size: {len(audio_buffer) / self.sample_rate:.2f}s")
188
  # try:
 
180
 
181
  while not self._translate_thread_stop.is_set():
182
  audio_buffer = self._vad_frame_queue.get()
183
+ if audio_buffer is None:
184
  time.sleep(0.2)
185
  continue
186
+ if len(audio_buffer) < int(self.sample_rate):
187
+ silence_audio = np.zeros(self.sample_rate, dtype=np.float32)
188
+ silence_audio[-len(audio_buffer):] = audio_buffer
189
+ audio_buffer = silence_audio
190
 
191
  logger.debug(f"audio buffer size: {len(audio_buffer) / self.sample_rate:.2f}s")
192
  # try:
uv.lock CHANGED
The diff for this file is too large to render. See raw diff