File size: 3,199 Bytes
74853ff
144d877
333ce15
 
 
 
 
911d75e
74853ff
4601988
e60d8ba
 
74853ff
4601988
 
55fde07
333ce15
 
 
4601988
 
 
 
 
 
 
333ce15
e60d8ba
333ce15
c0a8ff9
 
 
0e16a70
4601988
333ce15
 
 
e60d8ba
c0a8ff9
caee6ce
4601988
333ce15
0494ac8
7838b65
333ce15
4601988
0abaf64
 
4601988
0abaf64
4601988
0abaf64
 
4601988
0abaf64
 
 
4601988
7838b65
 
333ce15
 
 
 
 
 
4601988
 
333ce15
4601988
 
 
 
 
49df16c
 
 
 
 
4601988
 
 
333ce15
461fe92
7838b65
333ce15
4601988
 
461fe92
193e3f4
 
333ce15
5f6d4f2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
from diffusers import ControlNetModel, StableDiffusionXLPipeline, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler
import torch
import numpy as np
import cv2
from PIL import Image
import spaces


# 🌟 set device and precision
device = "cuda"
precision = torch.float16

# πŸ—οΈ Load ControlNet model for Canny and Depth
controlnet_canny = ControlNetModel.from_pretrained(
    "xinsir/controlnet-canny-sdxl-1.0",
    torch_dtype=precision
)

controlnet_depth = ControlNetModel.from_pretrained(
    "xinsir/controlnet-depth-sdxl-1.0",
    torch_dtype=precision
)

controlnet = [controlnet_canny, controlnet_depth]

# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision)

# Scheduler
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")

# Stable Diffusion Model with ControlNet
pipe_canny_depth = StableDiffusionXLControlNetPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    controlnet=controlnet,
    vae=vae,
    torch_dtype=precision,
    scheduler=eulera_scheduler,
)
pipe_canny_depth.to(device)


# 🎨 Image generation function from image
@spaces.GPU
def generate_image(prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale):
    
    # Generate styled image using ControlNet
    result = pipe_canny_depth(
        prompt=prompt,
        image=[canny_input, depth_input],
        num_inference_steps=30,
        guidance_scale=guidance,
        controlnet_conditioning_scale=[float(canny_conditioning_scale), float(depth_conditioning_scale)],
        strength=strength
    ).images[0]
    
    return result


# πŸ–₯️ Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# πŸ—οΈ 3D Screenshot to Styled Render with ControlNet")

    with gr.Row():
        with gr.Column():
            canny_input = gr.Image(label="Upload Canny Screenshot", type="pil")
            canny_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Canny Conditioning Scale")

        with gr.Column():        
            depth_input = gr.Image(label="Upload Depth (ZBuffer) Screenshot", type="pil")
            depth_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="Depth Conditioning Scale")
    
    with gr.Row():
        prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset")
        generate_img_button = gr.Button("Generate from Image")
    with gr.Row():
        strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength")
        guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)")            

    with gr.Row():
        result_output = gr.Image(label="Generated Styled Image")

    # πŸ”— Generate Button Action
    generate_img_button.click(
        fn=generate_image,
        inputs=[prompt, canny_input, depth_input, strength, guidance, canny_conditioning_scale, depth_conditioning_scale],
        outputs=[result_output]
    )


# πŸš€ Launch the app
demo.launch(share=True)