ImageGeneration_SD-XL-1.0

Running on Zero

File size: 4,351 Bytes

74853ff
144d877
333ce15
 
 
 
 
911d75e
74853ff
333ce15
e60d8ba
 
74853ff
333ce15
55fde07
 
333ce15
55fde07
333ce15
 
 
 
e60d8ba
333ce15
c0a8ff9
 
 
0e16a70
 
333ce15
 
 
e60d8ba
c0a8ff9
caee6ce
13e92ca
 
0e16a70
5842a3a
0e16a70
 
 
 
 
caee6ce
333ce15
0494ac8
333ce15
 
 
f6e6e5d
 
 
 
 
333ce15
0494ac8
7838b65
333ce15
d39d36a
869d316
0abaf64
 
 
 
0e16a70
0abaf64
 
 
 
 
 
 
 
 
333ce15
0494ac8
7838b65
 
 
 
 
 
 
 
 
 
 
2eb74d9
 
7838b65
 
333ce15
 
 
 
 
 
 
 
 
 
 
 
9f93059
333ce15
c0a8ff9
608948e
 
 
 
7838b65
333ce15
 
 
 
 
461fe92
7838b65
333ce15
89c6fa9
333ce15
 
 
7838b65
0494ac8
 
608948e
461fe92
193e3f4
 
333ce15
5f6d4f2

import gradio as gr
from diffusers import ControlNetModel, StableDiffusionXLPipeline, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler
import torch
import numpy as np
import cv2
from PIL import Image
import spaces


# 🌟 Auto-detect device (CPU/GPU)
device = "cuda"
precision = torch.float16

# 🏗️ Load ControlNet model for Canny edge detection
# xinsir/controlnet-canny-sdxl-1.0
# diffusers/controlnet-canny-sdxl-1.0
controlnet = ControlNetModel.from_pretrained(
    "xinsir/controlnet-canny-sdxl-1.0",
    torch_dtype=precision
)

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision)

# Scheduler
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")

# Stable Diffusion Model with ControlNet
pipe_cn = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    torch_dtype=precision,
    scheduler=eulera_scheduler,
)
pipe_cn.to(device)

# Stable Diffusion Model without ControlNet
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    vae=vae,
    torch_dtype=precision,
    scheduler=eulera_scheduler,
)
pipe.to(device)


# 📸 Edge detection function using OpenCV (Canny)
@spaces.GPU
def apply_canny(image, low_threshold, high_threshold):
    image = np.array(image)
    image = cv2.Canny(image, low_threshold, high_threshold)
    image = image[:, :, None]
    image = np.concatenate([image, image, image], axis=2)
    return Image.fromarray(image)


# 🎨 Image generation function from image
@spaces.GPU
def generate_image(prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale):

    # Apply edge detection
    edge_detected = apply_canny(input_image, low_threshold, high_threshold)
    
    # Generate styled image using ControlNet
    result = pipe_cn(
        prompt=prompt,
        image=edge_detected,
        num_inference_steps=30,
        guidance_scale=guidance,
        controlnet_conditioning_scale=float(controlnet_conditioning_scale),
        strength=strength
    ).images[0]
    
    return edge_detected, result


# 🎨 Image generation function from prompt
@spaces.GPU
def generate_prompt(prompt, strength, guidance):
    
    # Generate styled image from prompt
    result = pipe(
        prompt=prompt,
        num_inference_steps=30,
        guidance_scale=guidance,
        strength=strength
    ).images[0]

    return result, result


# 🖥️ Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# 🏗️ 3D Screenshot to Styled Render with ControlNet")

    with gr.Row():
        with gr.Column():
            input_image = gr.Image(label="Upload 3D Screenshot", type="pil")
            prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset")
            
            low_threshold = gr.Slider(50, 150, value=100, label="Canny Edge Low Threshold")
            high_threshold = gr.Slider(100, 200, value=150, label="Canny Edge High Threshold")
            
            strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength")
            guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)")
            controlnet_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="ControlNet Conditioning Scale")

            with gr.Row():
                generate_img_button = gr.Button("Generate from Image")
                generate_prompt_button = gr.Button("Generate from Prompt")
            

        with gr.Column():
            edge_output = gr.Image(label="Edge Detected Image")
            result_output = gr.Image(label="Generated Styled Image")

    # 🔗 Generate Button Action
    generate_img_button.click(
        fn=generate_image,
        inputs=[prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale],
        outputs=[edge_output, result_output]
    )

    generate_prompt_button.click(
        fn=generate_prompt,
        inputs=[prompt, strength, guidance],
        outputs=[edge_output, result_output]
    )


# 🚀 Launch the app
demo.launch(share=True)