OpenSound commited on
Commit
30a186b
Β·
verified Β·
1 Parent(s): 76620d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -245,8 +245,9 @@ def process_audio(test_wav, enroll_wav):
245
 
246
  # List of demo audio files
247
  demo_audio_files = [
248
- ("Test Demo 1", "test1.wav", "test1_enroll.wav"),
249
- ("Test Demo 2", "test2.wav", "test2_enroll.wav")
 
250
  ]
251
 
252
  def update_audio_input(choice):
@@ -264,24 +265,31 @@ css = """
264
  with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
265
  with gr.Column(elem_id="col-container"):
266
  gr.Markdown("""
267
- # SoloSpeech: Enhancing Intelligibility and Quality in Target Speech Extraction through a Cascaded Generative Pipeline
268
- πŸ‘‹ Introduction: Extract the target voice from mixture speech given an enrollment speech.
269
 
270
  πŸ’‘ To extract sound effects or music from audio, try using [SoloAudio](https://huggingface.co/spaces/OpenSound/SoloAudio).
271
 
272
- πŸ”— Learn more about 🎯**SoloSpeech** on the [SoloSpeech Repo](https://github.com/WangHelin1997/SoloSpeech/).
273
 
274
  """)
275
 
276
  with gr.Tab("Target Speech Extraction"):
277
  with gr.Row():
278
- mixture_input = gr.Audio(label="Upload Mixture Audio", type="filepath", value="test2.wav")
 
 
279
 
280
- with gr.Row():
281
- enroll_input = gr.Audio(label="Upload Enrollment Audio (Speaker Audio)", type="filepath", value="test2_enroll.wav")
 
 
 
 
 
282
 
283
- with gr.Row():
284
- extract_button = gr.Button("Extract", variant="primary")
285
  # extract_button = gr.Button("Extract", scale=1)
286
 
287
  with gr.Row():
@@ -291,7 +299,7 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
291
  demo_selector = gr.Dropdown(
292
  label="Select Test Demo",
293
  choices=[name for name, _, _ in demo_audio_files],
294
- value="Test Demo 2"
295
  )
296
 
297
  # Update audio inputs when selecting from dropdown
 
245
 
246
  # List of demo audio files
247
  demo_audio_files = [
248
+ ("Demo1: Extract male speaker from a mixture of multiple male speakers", "examples/test1.wav", "examples/test1_enroll.wav"),
249
+ ("Demo2: Extract female speaker from a mixture of multiple female speakers", "examples/test2.wav", "examples/test2_enroll.wav"),
250
+ ("Demo3: Extract male rapper from music with complex vocals", "examples/test_3_mixture.mp3", "examples/test_3_speaker.mp3"),
251
  ]
252
 
253
  def update_audio_input(choice):
 
265
  with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
266
  with gr.Column(elem_id="col-container"):
267
  gr.Markdown("""
268
+ # SoloSpeech: A High-Quality, State-of-the-Art Target Speech Extraction Model
269
+ πŸ‘‹ Introduction: Extract the target voice from mixture speech given an enrollment speech.
270
 
271
  πŸ’‘ To extract sound effects or music from audio, try using [SoloAudio](https://huggingface.co/spaces/OpenSound/SoloAudio).
272
 
273
+ πŸ”— Learn more about this project on the [🎯SoloSpeech Repo](https://github.com/WangHelin1997/SoloSpeech/).
274
 
275
  """)
276
 
277
  with gr.Tab("Target Speech Extraction"):
278
  with gr.Row():
279
+ mixture_input = gr.Audio(label="Upload Mixture Audio",
280
+ type="filepath",
281
+ value="test1.wav")
282
 
283
+ with gr.Row(equal_height=True):
284
+ enroll_input = gr.Audio(label="Upload Enrollment/Speaker Audio",
285
+ info='A short audio clip containing only the target speaker.',
286
+ type="filepath",
287
+ value="test1_enroll.wav",
288
+ scale=4,
289
+ )
290
 
291
+ # with gr.Row():
292
+ extract_button = gr.Button("Extract", variant="primary", scale=1)
293
  # extract_button = gr.Button("Extract", scale=1)
294
 
295
  with gr.Row():
 
299
  demo_selector = gr.Dropdown(
300
  label="Select Test Demo",
301
  choices=[name for name, _, _ in demo_audio_files],
302
+ value="Demo1: Extract male speaker from a mixture of multiple male speakers"
303
  )
304
 
305
  # Update audio inputs when selecting from dropdown