import gradio as gr from diffusers import ControlNetModel, StableDiffusionXLPipeline, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler import torch import numpy as np import cv2 from PIL import Image import spaces # 🌟 set device and precision device = "cuda" precision = torch.float16 # 🏗️ Load ControlNet model for Canny and Depth controlnet_canny = ControlNetModel.from_pretrained( "xinsir/controlnet-canny-sdxl-1.0", torch_dtype=precision ) controlnet_depth = ControlNetModel.from_pretrained( "xinsir/controlnet-depth-sdxl-1.0", torch_dtype=precision ) controlnet = [controlnet_canny, controlnet_depth] # when test with other base model, you need to change the vae also. vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision) # Scheduler eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler") # Stable Diffusion Model with ControlNet pipe_canny_depth = StableDiffusionXLControlNetPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, torch_dtype=precision, scheduler=eulera_scheduler, ) pipe_canny_depth.to(device) # 🎨 Image generation function from image @spaces.GPU def generate_image(prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale): # Generate styled image using ControlNet result = pipe_canny_depth( prompt=prompt, image=[canny_input, depth_input], num_inference_steps=30, guidance_scale=guidance, controlnet_conditioning_scale=[float(canny_conditioning_scale), float(depth_conditioning_scale)], strength=strength ).images[0] return result # 🖥️ Gradio UI with gr.Blocks() as demo: gr.Markdown("# 🏗️ 3D Screenshot to Styled Render with ControlNet") with gr.Row(): with gr.Column(): canny_input = gr.Image(label="Upload Canny Screenshot", type="pil") canny_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Canny Conditioning Scale") with gr.Column(): depth_input = gr.Image(label="Upload Depth (ZBuffer) Screenshot", type="pil") depth_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Depth Conditioning Scale") with gr.Row(): prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset") generate_img_button = gr.Button("Generate from Image") with gr.Row(): strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength") guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)") with gr.Row(): result_output = gr.Image(label="Generated Styled Image") # 🔗 Generate Button Action generate_img_button.click( fn=generate_image, inputs=[prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale], outputs=[result_output] ) # 🚀 Launch the app demo.launch(share=True)