|
import spaces |
|
from diffusers import ( |
|
StableDiffusionXLPipeline, |
|
EulerDiscreteScheduler, |
|
UNet2DConditionModel, |
|
AutoencoderTiny, |
|
) |
|
import torch |
|
import os |
|
from huggingface_hub import hf_hub_download |
|
from compel import Compel, ReturnedEmbeddingsType |
|
from gradio_promptweighting import PromptWeighting |
|
|
|
|
|
from PIL import Image |
|
import gradio as gr |
|
import time |
|
from safetensors.torch import load_file |
|
import time |
|
import tempfile |
|
from pathlib import Path |
|
|
|
|
|
BASE = "stabilityai/stable-diffusion-xl-base-1.0" |
|
REPO = "ByteDance/SDXL-Lightning" |
|
|
|
CHECKPOINT = "sdxl_lightning_2step_unet.safetensors" |
|
taesd_model = "madebyollin/taesdxl" |
|
|
|
|
|
SFAST_COMPILE = os.environ.get("SFAST_COMPILE", "0") == "1" |
|
SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", "0") == "1" |
|
USE_TAESD = os.environ.get("USE_TAESD", "0") == "1" |
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
torch_device = device |
|
torch_dtype = torch.float16 |
|
|
|
print(f"SAFETY_CHECKER: {SAFETY_CHECKER}") |
|
print(f"SFAST_COMPILE: {SFAST_COMPILE}") |
|
print(f"USE_TAESD: {USE_TAESD}") |
|
print(f"device: {device}") |
|
|
|
|
|
unet = UNet2DConditionModel.from_config(BASE, subfolder="unet").to( |
|
"cuda", torch.float16 |
|
) |
|
unet.load_state_dict(load_file(hf_hub_download(REPO, CHECKPOINT), device="cuda")) |
|
pipe = StableDiffusionXLPipeline.from_pretrained( |
|
BASE, unet=unet, torch_dtype=torch.float16, variant="fp16", safety_checker=False |
|
).to("cuda") |
|
unet = unet.to(dtype=torch.float16) |
|
|
|
compel = Compel( |
|
tokenizer=[pipe.tokenizer, pipe.tokenizer_2], |
|
text_encoder=[pipe.text_encoder, pipe.text_encoder_2], |
|
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, |
|
requires_pooled=[False, True], |
|
) |
|
|
|
|
|
if USE_TAESD: |
|
pipe.vae = AutoencoderTiny.from_pretrained( |
|
taesd_model, torch_dtype=torch_dtype, use_safetensors=True |
|
).to(device) |
|
|
|
|
|
|
|
pipe.scheduler = EulerDiscreteScheduler.from_config( |
|
pipe.scheduler.config, timestep_spacing="trailing" |
|
) |
|
pipe.set_progress_bar_config(disable=True) |
|
if SAFETY_CHECKER: |
|
from safety_checker import StableDiffusionSafetyChecker |
|
from transformers import CLIPFeatureExtractor |
|
|
|
safety_checker = StableDiffusionSafetyChecker.from_pretrained( |
|
"CompVis/stable-diffusion-safety-checker" |
|
).to(device) |
|
feature_extractor = CLIPFeatureExtractor.from_pretrained( |
|
"openai/clip-vit-base-patch32" |
|
) |
|
|
|
def check_nsfw_images( |
|
images: list[Image.Image], |
|
) -> tuple[list[Image.Image], list[bool]]: |
|
safety_checker_input = feature_extractor(images, return_tensors="pt").to(device) |
|
has_nsfw_concepts = safety_checker( |
|
images=[images], |
|
clip_input=safety_checker_input.pixel_values.to(torch_device), |
|
) |
|
|
|
return images, has_nsfw_concepts |
|
|
|
|
|
if SFAST_COMPILE: |
|
from sfast.compilers.diffusion_pipeline_compiler import compile, CompilationConfig |
|
|
|
|
|
config = CompilationConfig.Default() |
|
try: |
|
import xformers |
|
|
|
config.enable_xformers = True |
|
except ImportError: |
|
print("xformers not installed, skip") |
|
try: |
|
import triton |
|
|
|
config.enable_triton = True |
|
except ImportError: |
|
print("Triton not installed, skip") |
|
|
|
|
|
|
|
config.enable_cuda_graph = True |
|
|
|
pipe = compile(pipe, config) |
|
|
|
|
|
@spaces.GPU |
|
def predict(prompt, prompt_w, guidance_scale, seed=1231231): |
|
generator = torch.manual_seed(seed) |
|
last_time = time.time() |
|
prompt_w = " ".join( |
|
[f"({p['prompt']}){p['scale']}" for p in prompt_w if p["prompt"]] |
|
) |
|
|
|
conditioning, pooled = compel([prompt + " " + prompt_w, ""]) |
|
|
|
results = pipe( |
|
prompt_embeds=conditioning[0:1], |
|
pooled_prompt_embeds=pooled[0:1], |
|
negative_prompt_embeds=conditioning[1:2], |
|
negative_pooled_prompt_embeds=pooled[1:2], |
|
generator=generator, |
|
num_inference_steps=2, |
|
guidance_scale=guidance_scale, |
|
|
|
|
|
output_type="pil", |
|
) |
|
print(f"Pipe took {time.time() - last_time} seconds") |
|
if SAFETY_CHECKER: |
|
images, has_nsfw_concepts = check_nsfw_images(results.images) |
|
if any(has_nsfw_concepts): |
|
gr.Warning("NSFW content detected.") |
|
return Image.new("RGB", (512, 512)) |
|
image = results.images[0] |
|
with tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) as tmpfile: |
|
image.save(tmpfile, "JPEG", quality=80, optimize=True, progressive=True) |
|
return Path(tmpfile.name) |
|
|
|
|
|
css = """ |
|
#container{ |
|
margin: 0 auto; |
|
max-width: 80rem; |
|
} |
|
#intro{ |
|
max-width: 100%; |
|
margin: 0 auto; |
|
} |
|
.generating { |
|
display: none |
|
} |
|
""" |
|
with gr.Blocks(css=css) as demo: |
|
with gr.Column(elem_id="container"): |
|
gr.Markdown( |
|
""" |
|
# SDXL-Lightning- Text To Image 2-Steps |
|
**Model**: https://huggingface.co/ByteDance/SDXL-Lightning |
|
""", |
|
elem_id="intro", |
|
) |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Group(): |
|
prompt = gr.Textbox( |
|
placeholder="Insert your prompt here:", |
|
max_lines=1, |
|
label="Prompt", |
|
) |
|
prompt_w = PromptWeighting( |
|
min=0, |
|
max=3, |
|
step=0.005, |
|
show_label=False, |
|
) |
|
|
|
with gr.Accordion("Advanced options", open=True): |
|
seed = gr.Slider( |
|
minimum=0, |
|
maximum=12013012031030, |
|
label="Seed", |
|
step=1, |
|
) |
|
guidance_scale = gr.Slider( |
|
minimum=0.0, |
|
maximum=20.0, |
|
label="Guidance scale", |
|
value=0.0, |
|
step=0.1, |
|
) |
|
generate_bt = gr.Button("Generate") |
|
with gr.Column(): |
|
image = gr.Image(type="filepath") |
|
|
|
inputs = [ |
|
prompt, |
|
prompt_w, |
|
guidance_scale, |
|
seed, |
|
] |
|
outputs = [image] |
|
|
|
gr.on( |
|
triggers=[ |
|
prompt.input, |
|
prompt_w.input, |
|
generate_bt.click, |
|
guidance_scale.input, |
|
seed.input, |
|
], |
|
fn=predict, |
|
inputs=inputs, |
|
outputs=outputs, |
|
show_progress="hidden", |
|
show_api=False, |
|
trigger_mode="always_last", |
|
) |
|
|
|
demo.queue(api_open=False) |
|
demo.launch() |
|
|