fffiloni commited on
Commit
bf8f862
·
verified ·
1 Parent(s): 2abb303

Update gradio_app.py

Browse files
Files changed (1) hide show
  1. gradio_app.py +18 -13
gradio_app.py CHANGED
@@ -121,23 +121,28 @@ def separate_speakers_video(video_path):
121
  output_dir = os.path.join("output_sep_video", session_id)
122
  os.makedirs(output_dir, exist_ok=True)
123
 
124
- output_videos = []
125
  for i in range(ests_speech.shape[0]):
126
- separated_audio_path = os.path.join(output_dir, f"speaker_{i+1}.wav")
127
- audio_np = ests_speech[i].cpu().numpy()
 
 
 
 
 
 
 
 
128
  sf.write(separated_audio_path, audio_np, TARGET_SR, format='WAV', subtype='PCM_16')
129
 
130
- speaker_video_path = os.path.join(output_dir, f"speaker_{i+1}_video.mp4")
131
- final_video = attach_audio_to_video(video, separated_audio_path, speaker_video_path)
132
- output_videos.append(final_video)
 
 
 
 
133
 
134
- updates = []
135
- for i in range(MAX_SPEAKERS):
136
- if i < len(output_videos):
137
- updates.append(gr.update(value=output_videos[i], visible=True, label=f"Speaker {i+1}"))
138
- else:
139
- updates.append(gr.update(value=None, visible=False))
140
- return updates
141
 
142
 
143
 
 
121
  output_dir = os.path.join("output_sep_video", session_id)
122
  os.makedirs(output_dir, exist_ok=True)
123
 
124
+ output_files = []
125
  for i in range(ests_speech.shape[0]):
126
+ audio_np = ests_speech[i].cpu().numpy().astype('float32')
127
+
128
+ # Ensure shape is [samples, channels]
129
+ if audio_np.ndim == 1:
130
+ audio_np = audio_np[:, None]
131
+
132
+ audio_filename = f"speaker_{i+1}.wav"
133
+ separated_audio_path = os.path.join(output_dir, audio_filename)
134
+
135
+ # Explicitly set format/subtype
136
  sf.write(separated_audio_path, audio_np, TARGET_SR, format='WAV', subtype='PCM_16')
137
 
138
+ # Attach to video
139
+ out_video_path = os.path.join(output_dir, f"speaker_{i+1}.mp4")
140
+ attach_audio_to_video(video, separated_audio_path, out_video_path)
141
+ output_files.append(out_video_path)
142
+
143
+ # Return only existing video files
144
+ return output_files + [None] * (MAX_SPEAKERS - len(output_files))
145
 
 
 
 
 
 
 
 
146
 
147
 
148