Update modules/vad/silero_vad.py
Browse files- modules/vad/silero_vad.py +19 -7
modules/vad/silero_vad.py
CHANGED
|
@@ -215,13 +215,6 @@ class SileroVAD:
|
|
| 215 |
|
| 216 |
return np.concatenate([audio[chunk["start"]: chunk["end"]] for chunk in chunks])
|
| 217 |
|
| 218 |
-
def get_chunk_index(self, time: float) -> int:
|
| 219 |
-
sample = int(time * self.sampling_rate)
|
| 220 |
-
return min(
|
| 221 |
-
bisect.bisect(self.chunk_end_sample, sample),
|
| 222 |
-
len(self.chunk_end_sample) - 1,
|
| 223 |
-
)
|
| 224 |
-
|
| 225 |
@staticmethod
|
| 226 |
def format_timestamp(
|
| 227 |
seconds: float,
|
|
@@ -260,5 +253,24 @@ class SileroVAD:
|
|
| 260 |
segment["start"] = ts_map.get_original_time(segment["start"])
|
| 261 |
segment["end"] = ts_map.get_original_time(segment["end"])
|
| 262 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
return segments
|
| 264 |
|
|
|
|
| 215 |
|
| 216 |
return np.concatenate([audio[chunk["start"]: chunk["end"]] for chunk in chunks])
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
@staticmethod
|
| 219 |
def format_timestamp(
|
| 220 |
seconds: float,
|
|
|
|
| 253 |
segment["start"] = ts_map.get_original_time(segment["start"])
|
| 254 |
segment["end"] = ts_map.get_original_time(segment["end"])
|
| 255 |
|
| 256 |
+
for segment in segments:
|
| 257 |
+
if segment.words:
|
| 258 |
+
words = []
|
| 259 |
+
for word in segment.words:
|
| 260 |
+
# Ensure the word start and end times are resolved to the same chunk.
|
| 261 |
+
middle = (word.start + word.end) / 2
|
| 262 |
+
chunk_index = ts_map.get_chunk_index(middle)
|
| 263 |
+
word.start = ts_map.get_original_time(word.start, chunk_index)
|
| 264 |
+
word.end = ts_map.get_original_time(word.end, chunk_index)
|
| 265 |
+
words.append(word)
|
| 266 |
+
|
| 267 |
+
segment["start"] = words[0].start
|
| 268 |
+
segment["end"] = words[-1].end
|
| 269 |
+
segment["words"] = words
|
| 270 |
+
|
| 271 |
+
else:
|
| 272 |
+
segment["start"] = ts_map.get_original_time(segment["start"])
|
| 273 |
+
segment["end"] = ts_map.get_original_time(segment["end"])
|
| 274 |
+
|
| 275 |
return segments
|
| 276 |
|