fffiloni commited on
Commit
c4f5bac
·
verified ·
1 Parent(s): bf8f862

Update gradio_app.py

Browse files
Files changed (1) hide show
  1. gradio_app.py +6 -15
gradio_app.py CHANGED
@@ -102,6 +102,8 @@ def separate_dnr_video(video_path):
102
 
103
  return dialog_video, effect_video, music_video
104
 
 
 
105
  @spaces.GPU()
106
  def separate_speakers_video(video_path):
107
  audio_path, video = extract_audio_from_video(video_path)
@@ -123,29 +125,18 @@ def separate_speakers_video(video_path):
123
 
124
  output_files = []
125
  for i in range(ests_speech.shape[0]):
126
- audio_np = ests_speech[i].cpu().numpy().astype('float32')
127
-
128
- # Ensure shape is [samples, channels]
129
- if audio_np.ndim == 1:
130
- audio_np = audio_np[:, None]
131
 
132
- audio_filename = f"speaker_{i+1}.wav"
133
- separated_audio_path = os.path.join(output_dir, audio_filename)
134
-
135
- # Explicitly set format/subtype
136
- sf.write(separated_audio_path, audio_np, TARGET_SR, format='WAV', subtype='PCM_16')
137
-
138
- # Attach to video
139
  out_video_path = os.path.join(output_dir, f"speaker_{i+1}.mp4")
140
  attach_audio_to_video(video, separated_audio_path, out_video_path)
141
  output_files.append(out_video_path)
142
 
143
- # Return only existing video files
144
  return output_files + [None] * (MAX_SPEAKERS - len(output_files))
145
 
146
 
147
 
148
-
149
  # --- Gradio UI ---
150
  with gr.Blocks() as demo:
151
  gr.Markdown("# TIGER: Time-frequency Interleaved Gain Extraction and Reconstruction for Efficient Speech Separation")
@@ -197,4 +188,4 @@ with gr.Blocks() as demo:
197
  vsep_btn.click(separate_speakers_video, inputs=vsep_input, outputs=vsep_outputs)
198
 
199
  if __name__ == "__main__":
200
- demo.launch()
 
102
 
103
  return dialog_video, effect_video, music_video
104
 
105
+
106
+
107
  @spaces.GPU()
108
  def separate_speakers_video(video_path):
109
  audio_path, video = extract_audio_from_video(video_path)
 
125
 
126
  output_files = []
127
  for i in range(ests_speech.shape[0]):
128
+ separated_audio_path = os.path.join(output_dir, f"speaker_{i+1}.wav")
129
+ torchaudio.save(separated_audio_path, ests_speech[i].unsqueeze(0).cpu(), TARGET_SR)
 
 
 
130
 
131
+ # Attach audio back to video
 
 
 
 
 
 
132
  out_video_path = os.path.join(output_dir, f"speaker_{i+1}.mp4")
133
  attach_audio_to_video(video, separated_audio_path, out_video_path)
134
  output_files.append(out_video_path)
135
 
 
136
  return output_files + [None] * (MAX_SPEAKERS - len(output_files))
137
 
138
 
139
 
 
140
  # --- Gradio UI ---
141
  with gr.Blocks() as demo:
142
  gr.Markdown("# TIGER: Time-frequency Interleaved Gain Extraction and Reconstruction for Efficient Speech Separation")
 
188
  vsep_btn.click(separate_speakers_video, inputs=vsep_input, outputs=vsep_outputs)
189
 
190
  if __name__ == "__main__":
191
+ demo.launch(ssr_mode=False)