Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs.py (#27)
Browse files- Update whisper_cs.py (21d88d41c32fdb31a5e1c97551c235825afa4583)
Co-authored-by: Sarah Solito <ssolito@users.noreply.huggingface.co>
- whisper_cs.py +5 -0
whisper_cs.py
CHANGED
@@ -46,6 +46,7 @@ def convert_to_mono(input_path):
|
|
46 |
audio = AudioSegment.from_file(input_path)
|
47 |
base, ext = os.path.splitext(input_path)
|
48 |
output_path = f"{base}_merged.wav"
|
|
|
49 |
mono = audio.set_channels(1)
|
50 |
mono.export(output_path, format="wav")
|
51 |
return output_path
|
@@ -60,6 +61,7 @@ def format_audio(audio_path):
|
|
60 |
input_audio = torch.mean(input_audio, dim=0, keepdim=True)
|
61 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
|
62 |
input_audio = resampler(input_audio)
|
|
|
63 |
return input_audio.squeeze(), 16000
|
64 |
|
65 |
def assign_timestamps(asr_segments, audio_path):
|
@@ -228,10 +230,13 @@ asr_pipe = pipeline(
|
|
228 |
def diarization(audio_path):
|
229 |
diarization_result = diarization_pipeline(audio_path)
|
230 |
diarized_segments = list(diarization_result.itertracks(yield_label=True))
|
|
|
231 |
return diarized_segments
|
232 |
|
233 |
def asr(audio_path):
|
|
|
234 |
asr_result = asr_pipe(audio_path, return_timestamps=True)
|
|
|
235 |
asr_segments = hf_chunks_to_whisperx_segments(asr_result['chunks'])
|
236 |
asr_segments = assign_timestamps(asr_segments, audio_path)
|
237 |
return asr_segments
|
|
|
46 |
audio = AudioSegment.from_file(input_path)
|
47 |
base, ext = os.path.splitext(input_path)
|
48 |
output_path = f"{base}_merged.wav"
|
49 |
+
print('output_path',output_path)
|
50 |
mono = audio.set_channels(1)
|
51 |
mono.export(output_path, format="wav")
|
52 |
return output_path
|
|
|
61 |
input_audio = torch.mean(input_audio, dim=0, keepdim=True)
|
62 |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
|
63 |
input_audio = resampler(input_audio)
|
64 |
+
print('resampled')
|
65 |
return input_audio.squeeze(), 16000
|
66 |
|
67 |
def assign_timestamps(asr_segments, audio_path):
|
|
|
230 |
def diarization(audio_path):
|
231 |
diarization_result = diarization_pipeline(audio_path)
|
232 |
diarized_segments = list(diarization_result.itertracks(yield_label=True))
|
233 |
+
print('diarized_segments',diarized_segments)
|
234 |
return diarized_segments
|
235 |
|
236 |
def asr(audio_path):
|
237 |
+
print(f"[DEBUG] Starting ASR on audio: {audio_path}")
|
238 |
asr_result = asr_pipe(audio_path, return_timestamps=True)
|
239 |
+
print(f"[DEBUG] Raw ASR result: {asr_result}")
|
240 |
asr_segments = hf_chunks_to_whisperx_segments(asr_result['chunks'])
|
241 |
asr_segments = assign_timestamps(asr_segments, audio_path)
|
242 |
return asr_segments
|