fffiloni commited on
Commit
258a0ab
·
verified ·
1 Parent(s): b8350da
Files changed (1) hide show
  1. app.py +54 -7
app.py CHANGED
@@ -49,6 +49,16 @@ def save_spectrogram_image(spectrogram, filename):
49
 
50
  @spaces.GPU
51
  def infer(prompt, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
52
  pipeline = AuffusionPipeline.from_pretrained("auffusion/auffusion")
53
  prompt = prompt
54
  output = pipeline(prompt=prompt)
@@ -59,6 +69,21 @@ def infer(prompt, progress=gr.Progress(track_tqdm=True)):
59
 
60
  @spaces.GPU
61
  def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # Load your audio file
64
  input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
@@ -165,6 +190,22 @@ def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(tra
165
 
166
  @spaces.GPU
167
  def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  # Load your audio file
170
  input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
@@ -353,7 +394,8 @@ with gr.Blocks(css=css) as demo:
353
  submit_btn.click(
354
  fn = infer,
355
  inputs = [prompt],
356
- outputs = [audio_out]
 
357
  )
358
 
359
  with gr.Tab("Audio-to-Audio"):
@@ -381,7 +423,8 @@ with gr.Blocks(css=css) as demo:
381
  submit_btn_img2img.click(
382
  fn = infer_img2img,
383
  inputs = [prompt_img2img, audio_in_img2img, prompt_strength],
384
- outputs = [audio_out_img2img, input_spectrogram, output_spectrogram]
 
385
  )
386
 
387
  with gr.Tab("Audio InPainting"):
@@ -419,25 +462,29 @@ with gr.Blocks(css=css) as demo:
419
  audio_in_inp.upload(
420
  fn = load_input_spectrogram,
421
  inputs = [audio_in_inp],
422
- outputs = [audio_in_spec]
 
423
  )
424
 
425
  audio_in_inp.stop_recording(
426
  fn = load_input_spectrogram,
427
  inputs = [audio_in_inp],
428
- outputs = [audio_in_spec]
 
429
  )
430
 
431
  preview_mask_btn.click(
432
  fn = preview_masked_area,
433
  inputs = [audio_in_inp, mask_start_point, mask_end_point],
434
- outputs = [masked_spec_preview]
 
435
  )
436
 
437
  submit_btn_inp.click(
438
  fn = infer_inp,
439
  inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
440
- outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp]
 
441
  )
442
 
443
- demo.queue().launch(show_api=False, show_error=True)
 
49
 
50
  @spaces.GPU
51
  def infer(prompt, progress=gr.Progress(track_tqdm=True)):
52
+ """
53
+ Generate audio from a textual prompt using AuffusionPipeline.
54
+
55
+ Args:
56
+ prompt (str): Text description of the desired audio content.
57
+ progress (gr.Progress, optional): Progress tracker for UI feedback.
58
+
59
+ Returns:
60
+ str: The file path to the generated WAV audio file.
61
+ """
62
  pipeline = AuffusionPipeline.from_pretrained("auffusion/auffusion")
63
  prompt = prompt
64
  output = pipeline(prompt=prompt)
 
69
 
70
  @spaces.GPU
71
  def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
72
+ """
73
+ Perform audio-to-audio transformation with image-to-image style generation.
74
+
75
+ Args:
76
+ prompt (str): Text prompt guiding the audio transformation.
77
+ audio_path (str): File path to the input WAV audio reference.
78
+ desired_strength (float): Strength of prompt influence in [0.0, 1.0].
79
+ progress (gr.Progress, optional): Progress tracker for UI feedback.
80
+
81
+ Returns:
82
+ tuple:
83
+ - str: File path of the generated output WAV audio.
84
+ - str: File path of the input spectrogram image (PNG).
85
+ - str: File path of the output spectrogram image (PNG).
86
+ """
87
 
88
  # Load your audio file
89
  input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
 
190
 
191
  @spaces.GPU
192
  def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
193
+ """
194
+ Perform audio inpainting with masked spectrogram region guided by a prompt.
195
+
196
+ Args:
197
+ prompt (str): Text prompt describing the desired inpainted audio content.
198
+ audio_path (str): File path to the input WAV audio reference.
199
+ mask_start_point (int): Start index of the mask region in the spectrogram.
200
+ mask_end_point (int): End index of the mask region in the spectrogram.
201
+ progress (gr.Progress, optional): Progress tracker for UI feedback.
202
+
203
+ Returns:
204
+ tuple:
205
+ - str: File path of the generated inpainted output WAV audio.
206
+ - str: File path of the input spectrogram image (PNG).
207
+ - PIL.Image.Image: The output spectrogram image with inpainted region (PIL image).
208
+ """
209
 
210
  # Load your audio file
211
  input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
 
394
  submit_btn.click(
395
  fn = infer,
396
  inputs = [prompt],
397
+ outputs = [audio_out],
398
+ show_api=True
399
  )
400
 
401
  with gr.Tab("Audio-to-Audio"):
 
423
  submit_btn_img2img.click(
424
  fn = infer_img2img,
425
  inputs = [prompt_img2img, audio_in_img2img, prompt_strength],
426
+ outputs = [audio_out_img2img, input_spectrogram, output_spectrogram],
427
+ show_api=True
428
  )
429
 
430
  with gr.Tab("Audio InPainting"):
 
462
  audio_in_inp.upload(
463
  fn = load_input_spectrogram,
464
  inputs = [audio_in_inp],
465
+ outputs = [audio_in_spec],
466
+ show_api=False
467
  )
468
 
469
  audio_in_inp.stop_recording(
470
  fn = load_input_spectrogram,
471
  inputs = [audio_in_inp],
472
+ outputs = [audio_in_spec],
473
+ show_api=False
474
  )
475
 
476
  preview_mask_btn.click(
477
  fn = preview_masked_area,
478
  inputs = [audio_in_inp, mask_start_point, mask_end_point],
479
+ outputs = [masked_spec_preview],
480
+ show_api=False
481
  )
482
 
483
  submit_btn_inp.click(
484
  fn = infer_inp,
485
  inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
486
+ outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp],
487
+ show_api=False
488
  )
489
 
490
+ demo.queue().launch(ssr_mode=False, mcp_server=True, show_error=True)