# This is a Gradio app that provides a simple interface to generate images and audio using HuggingFace Spaces. import gradio as gr import json # Dictionary to store Gradio clients for different spaces mcpServers = { "gradio": { "url": "https://inoculatemedia/SanaSprint.hf.space/gradio_api/mcp/sse" }, "gradio": { "url": "https://ysharma/Dia-1.6B.hf.space/gradio_api/mcp/sse" } } # Function to get or create a Gradio client for the specified space def get_client(space_id: str): """Get or create a Gradio client for the specified space. Args: space_id: HuggingFace Space ID to use """ if space_id not in mcpServers: mcpServers[space_id] = { "command": "npx", "args": [ "mcp-remote", mcpServers["gradio"]["url"], "--transport", "sse-only" ] } return mcpServers[space_id] # Function to generate an image using a specified HuggingFace Space def generate_image(prompt: str, space_id: str = "inoculatemedia/SanaSprint") -> str: """Generate an image using Flux. Args: prompt: Text prompt describing the image to generate space_id: HuggingFace Space ID to use """ client = get_client(space_id) result = client["command"]( prompt=prompt, model_size="1.6B", seed=0, randomize_seed=True, width=1024, height=1024, guidance_scale=4.5, num_inference_steps=2, api_name="/infer" ) return result["image_url"] # Function to perform text-to-speech synthesis using a specified HuggingFace Space def run_dia_tts(prompt: str, space_id: str = "ysharma/Dia-1.6B") -> str: """Text-to-Speech Synthesis. Args: prompt: Text prompt describing the conversation between speakers S1, S2 space_id: HuggingFace Space ID to use """ client = get_client(space_id) result = client["command"]( text_input=f"""{prompt}""", audio_prompt_input=None, max_new_tokens=3072, cfg_scale=3, temperature=1.3, top_p=0.95, cfg_filter_top_k=30, speed_factor=0.94, api_name="/generate_audio" ) return result["audio_url"] # Create a Gradio interface for generating images image_interface = gr.Interface( fn=generate_image, inputs=[ gr.Textbox(label="Text Prompt"), gr.Textbox(label="Space ID", value="inoculatemedia/SanaSprint") ], outputs=gr.Image(label="Generated Image"), title="Image Generation", description="Generate images using a specified HuggingFace Space." ) # Create a Gradio interface for text-to-speech synthesis tts_interface = gr.Interface( fn=run_dia_tts, inputs=[ gr.Textbox(label="Text Prompt"), gr.Textbox(label="Space ID", value="ysharma/Dia-1.6B") ], outputs=gr.Audio(label="Generated Audio"), title="Text-to-Speech Synthesis", description="Generate audio from text using a specified HuggingFace Space." ) # Combine the interfaces into a tabbed interface with gr.Blocks() as demo: with gr.Tab("Generate Image"): image_interface.render() with gr.Tab("Text-to-Speech"): tts_interface.render() # Launch the Gradio app demo.launch(show_error=True, show_api=True)