import gradio as gr
from diffusers import ControlNetModel, StableDiffusionXLPipeline, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler
import torch
import numpy as np
import cv2
from PIL import Image
import spaces


# 🌟 set device and precision
device = "cuda"
precision = torch.float16

# 🏗️ Load ControlNet model for Canny and Depth
controlnet_canny = ControlNetModel.from_pretrained(
    "xinsir/controlnet-canny-sdxl-1.0",
    torch_dtype=precision
)

controlnet_depth = ControlNetModel.from_pretrained(
    "xinsir/controlnet-depth-sdxl-1.0",
    torch_dtype=precision
)

controlnet = [controlnet_canny, controlnet_depth]

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision)

# Scheduler
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")

# Stable Diffusion Model with ControlNet
pipe_canny_depth = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    torch_dtype=precision,
    scheduler=eulera_scheduler,
)
pipe_canny_depth.to(device)


# 🎨 Image generation function from image
@spaces.GPU
def generate_image(prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale):
    
    # Generate styled image using ControlNet
    result = pipe_canny_depth(
        prompt=prompt,
        image=[canny_input, depth_input],
        num_inference_steps=30,
        guidance_scale=guidance,
        controlnet_conditioning_scale=[float(canny_conditioning_scale), float(depth_conditioning_scale)],
        strength=strength
    ).images[0]
    
    return result


# 🖥️ Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🏗️ 3D Screenshot to Styled Render with ControlNet")

    with gr.Row():
        with gr.Column():
            canny_input = gr.Image(label="Upload Canny Screenshot", type="pil")
            canny_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Canny Conditioning Scale")

        with gr.Column():        
            depth_input = gr.Image(label="Upload Depth (ZBuffer) Screenshot", type="pil")
            depth_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Depth Conditioning Scale")
    
    with gr.Row():
        prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset")
        generate_img_button = gr.Button("Generate from Image")
    with gr.Row():
        strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength")
        guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)")            

    with gr.Row():
        result_output = gr.Image(label="Generated Styled Image")

    # 🔗 Generate Button Action
    generate_img_button.click(
        fn=generate_image,
        inputs=[prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale],
        outputs=[result_output]
    )


# 🚀 Launch the app
demo.launch(share=True)