Spaces:
Running
on
Zero
Running
on
Zero
MCP ready
Browse files
app.py
CHANGED
@@ -49,6 +49,16 @@ def save_spectrogram_image(spectrogram, filename):
|
|
49 |
|
50 |
@spaces.GPU
|
51 |
def infer(prompt, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
pipeline = AuffusionPipeline.from_pretrained("auffusion/auffusion")
|
53 |
prompt = prompt
|
54 |
output = pipeline(prompt=prompt)
|
@@ -59,6 +69,21 @@ def infer(prompt, progress=gr.Progress(track_tqdm=True)):
|
|
59 |
|
60 |
@spaces.GPU
|
61 |
def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# Load your audio file
|
64 |
input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
|
@@ -165,6 +190,22 @@ def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(tra
|
|
165 |
|
166 |
@spaces.GPU
|
167 |
def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
# Load your audio file
|
170 |
input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
|
@@ -353,7 +394,8 @@ with gr.Blocks(css=css) as demo:
|
|
353 |
submit_btn.click(
|
354 |
fn = infer,
|
355 |
inputs = [prompt],
|
356 |
-
outputs = [audio_out]
|
|
|
357 |
)
|
358 |
|
359 |
with gr.Tab("Audio-to-Audio"):
|
@@ -381,7 +423,8 @@ with gr.Blocks(css=css) as demo:
|
|
381 |
submit_btn_img2img.click(
|
382 |
fn = infer_img2img,
|
383 |
inputs = [prompt_img2img, audio_in_img2img, prompt_strength],
|
384 |
-
outputs = [audio_out_img2img, input_spectrogram, output_spectrogram]
|
|
|
385 |
)
|
386 |
|
387 |
with gr.Tab("Audio InPainting"):
|
@@ -419,25 +462,29 @@ with gr.Blocks(css=css) as demo:
|
|
419 |
audio_in_inp.upload(
|
420 |
fn = load_input_spectrogram,
|
421 |
inputs = [audio_in_inp],
|
422 |
-
outputs = [audio_in_spec]
|
|
|
423 |
)
|
424 |
|
425 |
audio_in_inp.stop_recording(
|
426 |
fn = load_input_spectrogram,
|
427 |
inputs = [audio_in_inp],
|
428 |
-
outputs = [audio_in_spec]
|
|
|
429 |
)
|
430 |
|
431 |
preview_mask_btn.click(
|
432 |
fn = preview_masked_area,
|
433 |
inputs = [audio_in_inp, mask_start_point, mask_end_point],
|
434 |
-
outputs = [masked_spec_preview]
|
|
|
435 |
)
|
436 |
|
437 |
submit_btn_inp.click(
|
438 |
fn = infer_inp,
|
439 |
inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
|
440 |
-
outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp]
|
|
|
441 |
)
|
442 |
|
443 |
-
demo.queue().launch(
|
|
|
49 |
|
50 |
@spaces.GPU
|
51 |
def infer(prompt, progress=gr.Progress(track_tqdm=True)):
|
52 |
+
"""
|
53 |
+
Generate audio from a textual prompt using AuffusionPipeline.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
prompt (str): Text description of the desired audio content.
|
57 |
+
progress (gr.Progress, optional): Progress tracker for UI feedback.
|
58 |
+
|
59 |
+
Returns:
|
60 |
+
str: The file path to the generated WAV audio file.
|
61 |
+
"""
|
62 |
pipeline = AuffusionPipeline.from_pretrained("auffusion/auffusion")
|
63 |
prompt = prompt
|
64 |
output = pipeline(prompt=prompt)
|
|
|
69 |
|
70 |
@spaces.GPU
|
71 |
def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
|
72 |
+
"""
|
73 |
+
Perform audio-to-audio transformation with image-to-image style generation.
|
74 |
+
|
75 |
+
Args:
|
76 |
+
prompt (str): Text prompt guiding the audio transformation.
|
77 |
+
audio_path (str): File path to the input WAV audio reference.
|
78 |
+
desired_strength (float): Strength of prompt influence in [0.0, 1.0].
|
79 |
+
progress (gr.Progress, optional): Progress tracker for UI feedback.
|
80 |
+
|
81 |
+
Returns:
|
82 |
+
tuple:
|
83 |
+
- str: File path of the generated output WAV audio.
|
84 |
+
- str: File path of the input spectrogram image (PNG).
|
85 |
+
- str: File path of the output spectrogram image (PNG).
|
86 |
+
"""
|
87 |
|
88 |
# Load your audio file
|
89 |
input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
|
|
|
190 |
|
191 |
@spaces.GPU
|
192 |
def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.Progress(track_tqdm=True)):
|
193 |
+
"""
|
194 |
+
Perform audio inpainting with masked spectrogram region guided by a prompt.
|
195 |
+
|
196 |
+
Args:
|
197 |
+
prompt (str): Text prompt describing the desired inpainted audio content.
|
198 |
+
audio_path (str): File path to the input WAV audio reference.
|
199 |
+
mask_start_point (int): Start index of the mask region in the spectrogram.
|
200 |
+
mask_end_point (int): End index of the mask region in the spectrogram.
|
201 |
+
progress (gr.Progress, optional): Progress tracker for UI feedback.
|
202 |
+
|
203 |
+
Returns:
|
204 |
+
tuple:
|
205 |
+
- str: File path of the generated inpainted output WAV audio.
|
206 |
+
- str: File path of the input spectrogram image (PNG).
|
207 |
+
- PIL.Image.Image: The output spectrogram image with inpainted region (PIL image).
|
208 |
+
"""
|
209 |
|
210 |
# Load your audio file
|
211 |
input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
|
|
|
394 |
submit_btn.click(
|
395 |
fn = infer,
|
396 |
inputs = [prompt],
|
397 |
+
outputs = [audio_out],
|
398 |
+
show_api=True
|
399 |
)
|
400 |
|
401 |
with gr.Tab("Audio-to-Audio"):
|
|
|
423 |
submit_btn_img2img.click(
|
424 |
fn = infer_img2img,
|
425 |
inputs = [prompt_img2img, audio_in_img2img, prompt_strength],
|
426 |
+
outputs = [audio_out_img2img, input_spectrogram, output_spectrogram],
|
427 |
+
show_api=True
|
428 |
)
|
429 |
|
430 |
with gr.Tab("Audio InPainting"):
|
|
|
462 |
audio_in_inp.upload(
|
463 |
fn = load_input_spectrogram,
|
464 |
inputs = [audio_in_inp],
|
465 |
+
outputs = [audio_in_spec],
|
466 |
+
show_api=False
|
467 |
)
|
468 |
|
469 |
audio_in_inp.stop_recording(
|
470 |
fn = load_input_spectrogram,
|
471 |
inputs = [audio_in_inp],
|
472 |
+
outputs = [audio_in_spec],
|
473 |
+
show_api=False
|
474 |
)
|
475 |
|
476 |
preview_mask_btn.click(
|
477 |
fn = preview_masked_area,
|
478 |
inputs = [audio_in_inp, mask_start_point, mask_end_point],
|
479 |
+
outputs = [masked_spec_preview],
|
480 |
+
show_api=False
|
481 |
)
|
482 |
|
483 |
submit_btn_inp.click(
|
484 |
fn = infer_inp,
|
485 |
inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
|
486 |
+
outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp],
|
487 |
+
show_api=False
|
488 |
)
|
489 |
|
490 |
+
demo.queue().launch(ssr_mode=False, mcp_server=True, show_error=True)
|