import gradio as gr import torch import scipy import numpy as np from diffusers import AudioLDMPipeline pipe = AudioLDMPipeline.from_pretrained( "cvssp/audioldm-m-full", torch_dtype=torch.float32 ).to("cpu") def generate_audio_from_description(description, output_path="output.wav"): audio = pipe(description, num_inference_steps=50).audios[0] audio_np = (audio * 32767).astype(np.int16) scipy.io.wavfile.write(output_path, rate=16000, data=audio_np) return output_path iface = gr.Interface( fn=generate_audio_from_description, inputs=gr.Textbox(lines=2, placeholder="e.g., Stirring onions in a hot pan"), outputs=gr.Audio(label="Generated Audio", type="filepath"), title="🍳 Cooking Sound Generator", description="Enter a cooking action. Get the sound it would make." ) iface.launch()