Xin Zhang
commited on
Commit
·
ea1c85a
1
Parent(s):
0672a0f
[fix]: update parameter.
Browse files
transcribe/pipelines/pipe_vad.py
CHANGED
|
@@ -20,7 +20,7 @@ class VadPipe(BasePipe):
|
|
| 20 |
self._status = 'END'
|
| 21 |
self.last_state_change_offset = 0
|
| 22 |
self.adaptive_ctrl = AdaptiveSilenceController()
|
| 23 |
-
|
| 24 |
|
| 25 |
def reset(self):
|
| 26 |
self._offset = 0
|
|
@@ -38,7 +38,7 @@ class VadPipe(BasePipe):
|
|
| 38 |
# speech_pad_ms=10
|
| 39 |
min_silence_duration_ms = 150,
|
| 40 |
# speech_pad_ms = 30,
|
| 41 |
-
max_speech_duration_s=
|
| 42 |
)
|
| 43 |
cls.vac.reset_states()
|
| 44 |
|
|
@@ -57,23 +57,23 @@ class VadPipe(BasePipe):
|
|
| 57 |
if end_frame:
|
| 58 |
relative_end_frame = max(0, end_frame - self._offset)
|
| 59 |
return relative_start_frame, relative_end_frame
|
| 60 |
-
|
| 61 |
def update_silence_ms(self):
|
| 62 |
min_silence = self.adaptive_ctrl.get_adaptive_silence_ms()
|
| 63 |
-
logging.
|
| 64 |
self.vac.min_silence_duration_ms = min_silence
|
| 65 |
-
|
| 66 |
def process(self, in_data: MetaItem) -> MetaItem:
|
| 67 |
if self._offset == 0:
|
| 68 |
self.vac.reset_states()
|
| 69 |
-
|
| 70 |
# silence_audio_100ms = np.zeros(int(0.1*self.sample_rate))
|
| 71 |
source_audio = np.frombuffer(in_data.source_audio, dtype=np.float32)
|
| 72 |
speech_data = self._process_speech_chunk(source_audio)
|
| 73 |
|
| 74 |
if speech_data: # 表示有音频的变化点出现
|
| 75 |
self.update_silence_ms()
|
| 76 |
-
rel_start_frame, rel_end_frame = speech_data
|
| 77 |
if rel_start_frame is not None and rel_end_frame is None:
|
| 78 |
self._status = "START" # 语音开始
|
| 79 |
target_audio = source_audio[rel_start_frame:]
|
|
@@ -82,7 +82,7 @@ class VadPipe(BasePipe):
|
|
| 82 |
silence_len = (self._offset + rel_start_frame - self.last_state_change_offset) / self.sample_rate * 1000
|
| 83 |
self.adaptive_ctrl.update_silence(silence_len)
|
| 84 |
self.last_state_change_offset = self._offset + rel_start_frame
|
| 85 |
-
|
| 86 |
logging.debug("🫸 Speech start frame: {}".format(rel_start_frame))
|
| 87 |
elif rel_start_frame is None and rel_end_frame is not None:
|
| 88 |
self._status = "END" # 音频结束
|
|
|
|
| 20 |
self._status = 'END'
|
| 21 |
self.last_state_change_offset = 0
|
| 22 |
self.adaptive_ctrl = AdaptiveSilenceController()
|
| 23 |
+
|
| 24 |
|
| 25 |
def reset(self):
|
| 26 |
self._offset = 0
|
|
|
|
| 38 |
# speech_pad_ms=10
|
| 39 |
min_silence_duration_ms = 150,
|
| 40 |
# speech_pad_ms = 30,
|
| 41 |
+
max_speech_duration_s=20.0,
|
| 42 |
)
|
| 43 |
cls.vac.reset_states()
|
| 44 |
|
|
|
|
| 57 |
if end_frame:
|
| 58 |
relative_end_frame = max(0, end_frame - self._offset)
|
| 59 |
return relative_start_frame, relative_end_frame
|
| 60 |
+
|
| 61 |
def update_silence_ms(self):
|
| 62 |
min_silence = self.adaptive_ctrl.get_adaptive_silence_ms()
|
| 63 |
+
logging.error(f"🫠 update_silence_ms :{min_silence} => current: {self.vac.min_silence_duration_ms} ")
|
| 64 |
self.vac.min_silence_duration_ms = min_silence
|
| 65 |
+
|
| 66 |
def process(self, in_data: MetaItem) -> MetaItem:
|
| 67 |
if self._offset == 0:
|
| 68 |
self.vac.reset_states()
|
| 69 |
+
|
| 70 |
# silence_audio_100ms = np.zeros(int(0.1*self.sample_rate))
|
| 71 |
source_audio = np.frombuffer(in_data.source_audio, dtype=np.float32)
|
| 72 |
speech_data = self._process_speech_chunk(source_audio)
|
| 73 |
|
| 74 |
if speech_data: # 表示有音频的变化点出现
|
| 75 |
self.update_silence_ms()
|
| 76 |
+
rel_start_frame, rel_end_frame = speech_data
|
| 77 |
if rel_start_frame is not None and rel_end_frame is None:
|
| 78 |
self._status = "START" # 语音开始
|
| 79 |
target_audio = source_audio[rel_start_frame:]
|
|
|
|
| 82 |
silence_len = (self._offset + rel_start_frame - self.last_state_change_offset) / self.sample_rate * 1000
|
| 83 |
self.adaptive_ctrl.update_silence(silence_len)
|
| 84 |
self.last_state_change_offset = self._offset + rel_start_frame
|
| 85 |
+
|
| 86 |
logging.debug("🫸 Speech start frame: {}".format(rel_start_frame))
|
| 87 |
elif rel_start_frame is None and rel_end_frame is not None:
|
| 88 |
self._status = "END" # 音频结束
|