import gradio as gr import numpy as np from diffusers import DiffusionPipeline import torch # load both base & refiner base = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, cache_dir="./local_model_cache" ) # base.enable_model_cpu_offload() base.to("cpu") refiner = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-refiner-1.0", text_encoder_2=base.text_encoder_2, vae=base.vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16", cache_dir="./local_model_cache" ) # refiner.enable_model_cpu_offload() refiner.to("cpu") # Define how many steps and what % of steps to be run on each experts (80/20) here n_steps = 40 high_noise_frac = 0.8 def create_image(prompt): # run both experts # torch.cuda.empty_cache() image = base( prompt=prompt, num_inference_steps=n_steps, denoising_end=high_noise_frac, output_type="latent", ).images image = refiner( prompt=prompt, num_inference_steps=n_steps, denoising_start=high_noise_frac, image=image, ).images[0] # torch.cuda.empty_cache() return image demo = gr.Interface( fn=create_image, inputs=["text"], outputs=["image"], ) demo.launch()