Spaces:
Sleeping
Sleeping
Update gradio_app.py
Browse files- gradio_app.py +6 -15
gradio_app.py
CHANGED
@@ -102,6 +102,8 @@ def separate_dnr_video(video_path):
|
|
102 |
|
103 |
return dialog_video, effect_video, music_video
|
104 |
|
|
|
|
|
105 |
@spaces.GPU()
|
106 |
def separate_speakers_video(video_path):
|
107 |
audio_path, video = extract_audio_from_video(video_path)
|
@@ -123,29 +125,18 @@ def separate_speakers_video(video_path):
|
|
123 |
|
124 |
output_files = []
|
125 |
for i in range(ests_speech.shape[0]):
|
126 |
-
|
127 |
-
|
128 |
-
# Ensure shape is [samples, channels]
|
129 |
-
if audio_np.ndim == 1:
|
130 |
-
audio_np = audio_np[:, None]
|
131 |
|
132 |
-
|
133 |
-
separated_audio_path = os.path.join(output_dir, audio_filename)
|
134 |
-
|
135 |
-
# Explicitly set format/subtype
|
136 |
-
sf.write(separated_audio_path, audio_np, TARGET_SR, format='WAV', subtype='PCM_16')
|
137 |
-
|
138 |
-
# Attach to video
|
139 |
out_video_path = os.path.join(output_dir, f"speaker_{i+1}.mp4")
|
140 |
attach_audio_to_video(video, separated_audio_path, out_video_path)
|
141 |
output_files.append(out_video_path)
|
142 |
|
143 |
-
# Return only existing video files
|
144 |
return output_files + [None] * (MAX_SPEAKERS - len(output_files))
|
145 |
|
146 |
|
147 |
|
148 |
-
|
149 |
# --- Gradio UI ---
|
150 |
with gr.Blocks() as demo:
|
151 |
gr.Markdown("# TIGER: Time-frequency Interleaved Gain Extraction and Reconstruction for Efficient Speech Separation")
|
@@ -197,4 +188,4 @@ with gr.Blocks() as demo:
|
|
197 |
vsep_btn.click(separate_speakers_video, inputs=vsep_input, outputs=vsep_outputs)
|
198 |
|
199 |
if __name__ == "__main__":
|
200 |
-
demo.launch()
|
|
|
102 |
|
103 |
return dialog_video, effect_video, music_video
|
104 |
|
105 |
+
|
106 |
+
|
107 |
@spaces.GPU()
|
108 |
def separate_speakers_video(video_path):
|
109 |
audio_path, video = extract_audio_from_video(video_path)
|
|
|
125 |
|
126 |
output_files = []
|
127 |
for i in range(ests_speech.shape[0]):
|
128 |
+
separated_audio_path = os.path.join(output_dir, f"speaker_{i+1}.wav")
|
129 |
+
torchaudio.save(separated_audio_path, ests_speech[i].unsqueeze(0).cpu(), TARGET_SR)
|
|
|
|
|
|
|
130 |
|
131 |
+
# Attach audio back to video
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
out_video_path = os.path.join(output_dir, f"speaker_{i+1}.mp4")
|
133 |
attach_audio_to_video(video, separated_audio_path, out_video_path)
|
134 |
output_files.append(out_video_path)
|
135 |
|
|
|
136 |
return output_files + [None] * (MAX_SPEAKERS - len(output_files))
|
137 |
|
138 |
|
139 |
|
|
|
140 |
# --- Gradio UI ---
|
141 |
with gr.Blocks() as demo:
|
142 |
gr.Markdown("# TIGER: Time-frequency Interleaved Gain Extraction and Reconstruction for Efficient Speech Separation")
|
|
|
188 |
vsep_btn.click(separate_speakers_video, inputs=vsep_input, outputs=vsep_outputs)
|
189 |
|
190 |
if __name__ == "__main__":
|
191 |
+
demo.launch(ssr_mode=False)
|