Xin Zhang commited on
Commit
ea1c85a
·
1 Parent(s): 0672a0f

[fix]: update parameter.

Browse files
Files changed (1) hide show
  1. transcribe/pipelines/pipe_vad.py +8 -8
transcribe/pipelines/pipe_vad.py CHANGED
@@ -20,7 +20,7 @@ class VadPipe(BasePipe):
20
  self._status = 'END'
21
  self.last_state_change_offset = 0
22
  self.adaptive_ctrl = AdaptiveSilenceController()
23
-
24
 
25
  def reset(self):
26
  self._offset = 0
@@ -38,7 +38,7 @@ class VadPipe(BasePipe):
38
  # speech_pad_ms=10
39
  min_silence_duration_ms = 150,
40
  # speech_pad_ms = 30,
41
- max_speech_duration_s=5.0,
42
  )
43
  cls.vac.reset_states()
44
 
@@ -57,23 +57,23 @@ class VadPipe(BasePipe):
57
  if end_frame:
58
  relative_end_frame = max(0, end_frame - self._offset)
59
  return relative_start_frame, relative_end_frame
60
-
61
  def update_silence_ms(self):
62
  min_silence = self.adaptive_ctrl.get_adaptive_silence_ms()
63
- logging.debug(f"🫠 update_silence_ms :{min_silence} ")
64
  self.vac.min_silence_duration_ms = min_silence
65
-
66
  def process(self, in_data: MetaItem) -> MetaItem:
67
  if self._offset == 0:
68
  self.vac.reset_states()
69
-
70
  # silence_audio_100ms = np.zeros(int(0.1*self.sample_rate))
71
  source_audio = np.frombuffer(in_data.source_audio, dtype=np.float32)
72
  speech_data = self._process_speech_chunk(source_audio)
73
 
74
  if speech_data: # 表示有音频的变化点出现
75
  self.update_silence_ms()
76
- rel_start_frame, rel_end_frame = speech_data
77
  if rel_start_frame is not None and rel_end_frame is None:
78
  self._status = "START" # 语音开始
79
  target_audio = source_audio[rel_start_frame:]
@@ -82,7 +82,7 @@ class VadPipe(BasePipe):
82
  silence_len = (self._offset + rel_start_frame - self.last_state_change_offset) / self.sample_rate * 1000
83
  self.adaptive_ctrl.update_silence(silence_len)
84
  self.last_state_change_offset = self._offset + rel_start_frame
85
-
86
  logging.debug("🫸 Speech start frame: {}".format(rel_start_frame))
87
  elif rel_start_frame is None and rel_end_frame is not None:
88
  self._status = "END" # 音频结束
 
20
  self._status = 'END'
21
  self.last_state_change_offset = 0
22
  self.adaptive_ctrl = AdaptiveSilenceController()
23
+
24
 
25
  def reset(self):
26
  self._offset = 0
 
38
  # speech_pad_ms=10
39
  min_silence_duration_ms = 150,
40
  # speech_pad_ms = 30,
41
+ max_speech_duration_s=20.0,
42
  )
43
  cls.vac.reset_states()
44
 
 
57
  if end_frame:
58
  relative_end_frame = max(0, end_frame - self._offset)
59
  return relative_start_frame, relative_end_frame
60
+
61
  def update_silence_ms(self):
62
  min_silence = self.adaptive_ctrl.get_adaptive_silence_ms()
63
+ logging.error(f"🫠 update_silence_ms :{min_silence} => current: {self.vac.min_silence_duration_ms} ")
64
  self.vac.min_silence_duration_ms = min_silence
65
+
66
  def process(self, in_data: MetaItem) -> MetaItem:
67
  if self._offset == 0:
68
  self.vac.reset_states()
69
+
70
  # silence_audio_100ms = np.zeros(int(0.1*self.sample_rate))
71
  source_audio = np.frombuffer(in_data.source_audio, dtype=np.float32)
72
  speech_data = self._process_speech_chunk(source_audio)
73
 
74
  if speech_data: # 表示有音频的变化点出现
75
  self.update_silence_ms()
76
+ rel_start_frame, rel_end_frame = speech_data
77
  if rel_start_frame is not None and rel_end_frame is None:
78
  self._status = "START" # 语音开始
79
  target_audio = source_audio[rel_start_frame:]
 
82
  silence_len = (self._offset + rel_start_frame - self.last_state_change_offset) / self.sample_rate * 1000
83
  self.adaptive_ctrl.update_silence(silence_len)
84
  self.last_state_change_offset = self._offset + rel_start_frame
85
+
86
  logging.debug("🫸 Speech start frame: {}".format(rel_start_frame))
87
  elif rel_start_frame is None and rel_end_frame is not None:
88
  self._status = "END" # 音频结束