# This is a Gradio app that provides a simple interface to generate images and audio using HuggingFace Spaces.
import gradio as gr
import json

# Dictionary to store Gradio clients for different spaces
mcpServers = {
    "gradio": {
        "url": "https://inoculatemedia/SanaSprint.hf.space/gradio_api/mcp/sse"
    },
   "gradio": {
        "url":  "https://ysharma/Dia-1.6B.hf.space/gradio_api/mcp/sse"
   }
}

# Function to get or create a Gradio client for the specified space
def get_client(space_id: str):
    """Get or create a Gradio client for the specified space.
    
    Args:
        space_id: HuggingFace Space ID to use
    """
    if space_id not in mcpServers:
        mcpServers[space_id] = {
            "command": "npx",
            "args": [
                "mcp-remote",
                mcpServers["gradio"]["url"],
                "--transport",
                "sse-only"
            ]
        }
    return mcpServers[space_id]

# Function to generate an image using a specified HuggingFace Space
def generate_image(prompt: str, space_id: str = "inoculatemedia/SanaSprint") -> str:
    """Generate an image using Flux.
    
    Args:
        prompt: Text prompt describing the image to generate
        space_id: HuggingFace Space ID to use 
    """
    client = get_client(space_id)
    result = client["command"](
        prompt=prompt,
        model_size="1.6B",
        seed=0,
        randomize_seed=True,
        width=1024,
        height=1024,
        guidance_scale=4.5,
        num_inference_steps=2,
        api_name="/infer"
    )
    return result["image_url"]

# Function to perform text-to-speech synthesis using a specified HuggingFace Space
def run_dia_tts(prompt: str, space_id: str = "ysharma/Dia-1.6B") -> str:
    """Text-to-Speech Synthesis.
    
    Args:
        prompt: Text prompt describing the conversation between speakers S1, S2
        space_id: HuggingFace Space ID to use 
    """
    client = get_client(space_id)
    result = client["command"](
        text_input=f"""{prompt}""",
        audio_prompt_input=None, 
        max_new_tokens=3072,
        cfg_scale=3,
        temperature=1.3,
        top_p=0.95,
        cfg_filter_top_k=30,
        speed_factor=0.94,
        api_name="/generate_audio"
    )
    return result["audio_url"]

# Create a Gradio interface for generating images
image_interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Textbox(label="Text Prompt"),
        gr.Textbox(label="Space ID", value="inoculatemedia/SanaSprint")
    ],
    outputs=gr.Image(label="Generated Image"),
    title="Image Generation",
    description="Generate images using a specified HuggingFace Space."
)

# Create a Gradio interface for text-to-speech synthesis
tts_interface = gr.Interface(
    fn=run_dia_tts,
    inputs=[
        gr.Textbox(label="Text Prompt"),
        gr.Textbox(label="Space ID", value="ysharma/Dia-1.6B")
    ],
    outputs=gr.Audio(label="Generated Audio"),
    title="Text-to-Speech Synthesis",
    description="Generate audio from text using a specified HuggingFace Space."
)

# Combine the interfaces into a tabbed interface
with gr.Blocks() as demo:
    with gr.Tab("Generate Image"):
        image_interface.render()
    with gr.Tab("Text-to-Speech"):
        tts_interface.render()

# Launch the Gradio app
demo.launch(show_error=True, show_api=True)