federicocosta1989 ssolito commited on
Commit
069f914
·
verified ·
1 Parent(s): 8cdbd94

Update whisper_cs.py (#27)

Browse files

- Update whisper_cs.py (21d88d41c32fdb31a5e1c97551c235825afa4583)


Co-authored-by: Sarah Solito <ssolito@users.noreply.huggingface.co>

Files changed (1) hide show
  1. whisper_cs.py +5 -0
whisper_cs.py CHANGED
@@ -46,6 +46,7 @@ def convert_to_mono(input_path):
46
  audio = AudioSegment.from_file(input_path)
47
  base, ext = os.path.splitext(input_path)
48
  output_path = f"{base}_merged.wav"
 
49
  mono = audio.set_channels(1)
50
  mono.export(output_path, format="wav")
51
  return output_path
@@ -60,6 +61,7 @@ def format_audio(audio_path):
60
  input_audio = torch.mean(input_audio, dim=0, keepdim=True)
61
  resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
62
  input_audio = resampler(input_audio)
 
63
  return input_audio.squeeze(), 16000
64
 
65
  def assign_timestamps(asr_segments, audio_path):
@@ -228,10 +230,13 @@ asr_pipe = pipeline(
228
  def diarization(audio_path):
229
  diarization_result = diarization_pipeline(audio_path)
230
  diarized_segments = list(diarization_result.itertracks(yield_label=True))
 
231
  return diarized_segments
232
 
233
  def asr(audio_path):
 
234
  asr_result = asr_pipe(audio_path, return_timestamps=True)
 
235
  asr_segments = hf_chunks_to_whisperx_segments(asr_result['chunks'])
236
  asr_segments = assign_timestamps(asr_segments, audio_path)
237
  return asr_segments
 
46
  audio = AudioSegment.from_file(input_path)
47
  base, ext = os.path.splitext(input_path)
48
  output_path = f"{base}_merged.wav"
49
+ print('output_path',output_path)
50
  mono = audio.set_channels(1)
51
  mono.export(output_path, format="wav")
52
  return output_path
 
61
  input_audio = torch.mean(input_audio, dim=0, keepdim=True)
62
  resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
63
  input_audio = resampler(input_audio)
64
+ print('resampled')
65
  return input_audio.squeeze(), 16000
66
 
67
  def assign_timestamps(asr_segments, audio_path):
 
230
  def diarization(audio_path):
231
  diarization_result = diarization_pipeline(audio_path)
232
  diarized_segments = list(diarization_result.itertracks(yield_label=True))
233
+ print('diarized_segments',diarized_segments)
234
  return diarized_segments
235
 
236
  def asr(audio_path):
237
+ print(f"[DEBUG] Starting ASR on audio: {audio_path}")
238
  asr_result = asr_pipe(audio_path, return_timestamps=True)
239
+ print(f"[DEBUG] Raw ASR result: {asr_result}")
240
  asr_segments = hf_chunks_to_whisperx_segments(asr_result['chunks'])
241
  asr_segments = assign_timestamps(asr_segments, audio_path)
242
  return asr_segments