fffiloni commited on
Commit
143b464
·
verified ·
1 Parent(s): 9eaa2e4

Update gradio_app.py

Browse files
Files changed (1) hide show
  1. gradio_app.py +18 -1
gradio_app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import uuid
3
  import torch
4
  import torchaudio
@@ -110,6 +111,17 @@ def separate_dnr_video(video_path):
110
 
111
  return dialog_video, effect_video, music_video
112
 
 
 
 
 
 
 
 
 
 
 
 
113
  @spaces.GPU()
114
  def separate_speakers_video(video_path):
115
  audio_path, video = extract_audio_from_video(video_path)
@@ -121,8 +133,12 @@ def separate_speakers_video(video_path):
121
 
122
  output_videos = []
123
  for i, audio_file in enumerate(output_files):
 
 
 
 
124
  speaker_video_path = os.path.join(output_dir, f"speaker_{i+1}_video.mp4")
125
- video_with_sep_audio = attach_audio_to_video(video, audio_file, speaker_video_path)
126
  output_videos.append(video_with_sep_audio)
127
 
128
  updates = []
@@ -135,6 +151,7 @@ def separate_speakers_video(video_path):
135
 
136
 
137
 
 
138
  # --- Gradio UI ---
139
  with gr.Blocks() as demo:
140
  gr.Markdown("# TIGER: Time-frequency Interleaved Gain Extraction and Reconstruction for Efficient Speech Separation")
 
1
  import os
2
+ import subprocess
3
  import uuid
4
  import torch
5
  import torchaudio
 
111
 
112
  return dialog_video, effect_video, music_video
113
 
114
+ def convert_to_ffmpeg_friendly(input_wav, output_wav):
115
+ subprocess.run([
116
+ "ffmpeg", "-y",
117
+ "-i", input_wav,
118
+ "-ar", str(TARGET_SR),
119
+ "-ac", "1",
120
+ "-sample_fmt", "s16",
121
+ output_wav
122
+ ], check=True)
123
+
124
+
125
  @spaces.GPU()
126
  def separate_speakers_video(video_path):
127
  audio_path, video = extract_audio_from_video(video_path)
 
133
 
134
  output_videos = []
135
  for i, audio_file in enumerate(output_files):
136
+ # Re-encode to ensure ffmpeg/moviepy compatibility
137
+ reencoded_path = os.path.join(output_dir, f"speaker_{i+1}_final.wav")
138
+ convert_to_ffmpeg_friendly(audio_file, reencoded_path)
139
+
140
  speaker_video_path = os.path.join(output_dir, f"speaker_{i+1}_video.mp4")
141
+ video_with_sep_audio = attach_audio_to_video(video, reencoded_path, speaker_video_path)
142
  output_videos.append(video_with_sep_audio)
143
 
144
  updates = []
 
151
 
152
 
153
 
154
+
155
  # --- Gradio UI ---
156
  with gr.Blocks() as demo:
157
  gr.Markdown("# TIGER: Time-frequency Interleaved Gain Extraction and Reconstruction for Efficient Speech Separation")