ar0551's picture
Update app.py
3f3dcbc verified
import gradio as gr
from diffusers import ControlNetModel, StableDiffusionXLPipeline, StableDiffusionXLControlNetPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler
import torch
import numpy as np
import cv2
from PIL import Image
import spaces
# ๐ŸŒŸ Auto-detect device (CPU/GPU)
device = "cuda"
precision = torch.float16
# ๐Ÿ—๏ธ Load ControlNet model for Canny edge detection
# xinsir/controlnet-canny-sdxl-1.0
# diffusers/controlnet-canny-sdxl-1.0
controlnet = ControlNetModel.from_pretrained(
"xinsir/controlnet-canny-sdxl-1.0",
torch_dtype=precision
)
# when test with other base model, you need to change the vae also.
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=precision)
# Scheduler
eulera_scheduler = EulerAncestralDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder="scheduler")
# Stable Diffusion Model with ControlNet
pipe_cn = StableDiffusionXLControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
controlnet=controlnet,
vae=vae,
torch_dtype=precision,
scheduler=eulera_scheduler,
)
pipe_cn.to(device)
# Stable Diffusion Model without ControlNet
pipe = StableDiffusionXLPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0",
vae=vae,
torch_dtype=precision,
scheduler=eulera_scheduler,
)
pipe.to(device)
# ๐Ÿ“ธ Edge detection function using OpenCV (Canny)
@spaces.GPU
def apply_canny(image, low_threshold, high_threshold):
image = np.array(image)
image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
return Image.fromarray(image)
# ๐ŸŽจ Image generation function from image
@spaces.GPU
def generate_image(prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale):
# Apply edge detection
edge_detected = apply_canny(input_image, low_threshold, high_threshold)
# Generate styled image using ControlNet
result = pipe_cn(
prompt=prompt,
image=edge_detected,
num_inference_steps=30,
guidance_scale=guidance,
controlnet_conditioning_scale=float(controlnet_conditioning_scale),
strength=strength
).images[0]
return edge_detected, result
# ๐ŸŽจ Image generation function from prompt
@spaces.GPU
def generate_prompt(prompt, strength, guidance):
# Generate styled image from prompt
result = pipe(
prompt=prompt,
num_inference_steps=30,
guidance_scale=guidance,
strength=strength
).images[0]
return result, result
# ๐Ÿ–ฅ๏ธ Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# ๐Ÿ—๏ธ 3D Screenshot to Styled Render with ControlNet")
with gr.Row():
with gr.Column():
input_image = gr.Image(label="Upload 3D Screenshot", type="pil")
prompt = gr.Textbox(label="Style Prompt", placeholder="e.g., Futuristic building in sunset")
low_threshold = gr.Slider(50, 150, value=100, label="Canny Edge Low Threshold")
high_threshold = gr.Slider(100, 200, value=150, label="Canny Edge High Threshold")
strength = gr.Slider(0.1, 1.0, value=0.7, label="Denoising Strength")
guidance = gr.Slider(1, 20, value=7.5, label="Guidance Scale (Creativity)")
controlnet_conditioning_scale = gr.Slider(0, 1, value=0.5, step=0.01, label="ControlNet Conditioning Scale")
with gr.Row():
generate_img_button = gr.Button("Generate from Image")
generate_prompt_button = gr.Button("Generate from Prompt")
with gr.Column():
edge_output = gr.Image(label="Edge Detected Image")
result_output = gr.Image(label="Generated Styled Image")
# ๐Ÿ”— Generate Button Action
generate_img_button.click(
fn=generate_image,
inputs=[prompt, input_image, low_threshold, high_threshold, strength, guidance, controlnet_conditioning_scale],
outputs=[edge_output, result_output]
)
generate_prompt_button.click(
fn=generate_prompt,
inputs=[prompt, strength, guidance],
outputs=[edge_output, result_output]
)
# ๐Ÿš€ Launch the app
demo.launch(share=True)