# MusicGen + Gradio + GPT Demo App (CPU-Optimized with MCP Server) import gradio as gr import os import numpy as np import torch from transformers import AutoProcessor, MusicgenForConditionalGeneration from openai import OpenAI import scipy.io.wavfile # Force CPU device (no GPU required) device = torch.device("cpu") # Load MusicGen model onto CPU model_name = "facebook/musicgen-small" model = MusicgenForConditionalGeneration.from_pretrained(model_name).to(device) processor = AutoProcessor.from_pretrained(model_name) # Initialize OpenAI client (set OPENAI_API_KEY in HF Spaces Secrets) client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) # Refine user prompt via GPT def refine_prompt(user_input): completion = client.chat.completions.create( model="gpt-4", messages=[ {"role": "system", "content": "You are a music assistant. Make the user's input more descriptive for an AI music generator."}, {"role": "user", "content": user_input} ] ) return completion.choices[0].message.content.strip() # Generate music (shorter tokens for CPU speed) def generate_music(prompt, max_new_tokens: int = 128): inputs = processor(text=[prompt], return_tensors="pt").to(device) audio_values = model.generate(**inputs, max_new_tokens=max_new_tokens) sampling_rate = model.config.audio_encoder.sampling_rate audio = audio_values[0].cpu().numpy() # Normalize to float32 in -1.0 to 1.0 range for Gradio audio = audio / np.max(np.abs(audio)) audio = audio.astype(np.float32) # Prepare int16 version and ensure 1D for WAV int_audio = (audio * 32767).astype(np.int16) int_audio = np.squeeze(int_audio) if int_audio.ndim > 1: int_audio = int_audio[:, 0] # Save as .wav file (in /tmp for Spaces) scipy.io.wavfile.write("/tmp/output.wav", sampling_rate, int_audio) return sampling_rate, audio # Combined Gradio function def main(user_input, max_new_tokens): detailed_prompt = refine_prompt(user_input) sampling_rate, audio = generate_music(detailed_prompt, max_new_tokens) return detailed_prompt, (sampling_rate, audio), "/tmp/output.wav" # Build Gradio UI demo = gr.Blocks() with demo: gr.Markdown("""# 🎵 AI Music Generator Enter a music idea or mood and get a short AI-generated track. (CPU mode)""") user_input = gr.Textbox(label="Describe the mood or style of music") max_tokens = gr.Slider(32, 256, value=128, step=32, label="Length (tokens) for CPU") generate_btn = gr.Button("Generate Music") refined_output = gr.Textbox(label="Enhanced Prompt by GPT") audio_output = gr.Audio(label="Generated Audio", type="numpy") download_wav = gr.File(label="Download .wav file") generate_btn.click( main, inputs=[user_input, max_tokens], outputs=[refined_output, audio_output, download_wav] ) # Launch with Gradio MCP Server from gradio.mcp_server import MCPServer if __name__ == "__main__": server = MCPServer(demo, host="0.0.0.0", port=7860) server.run()