import spaces import gradio as gr import numpy as np import PIL.Image from PIL import Image import random from diffusers import StableDiffusionXLPipeline from diffusers import EulerAncestralDiscreteScheduler import torch from compel import Compel, ReturnedEmbeddingsType import gc import os # Check if CUDA is available print(f"CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"CUDA device: {torch.cuda.get_device_name(0)}") # Initialize the pipeline ONCE at startup print("Loading pipeline...") pipe = StableDiffusionXLPipeline.from_pretrained( "dhead/wai-nsfw-illustrious-sdxl-v140-sdxl", torch_dtype=torch.float16, variant="fp16", use_safetensors=True, low_cpu_mem_usage=True ) pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) # Enable memory efficient attention if available if hasattr(pipe, "enable_model_cpu_offload"): pipe.enable_model_cpu_offload() elif hasattr(pipe, "enable_attention_slicing"): pipe.enable_attention_slicing() print("Pipeline loaded successfully!") # Initialize Compel for long prompt processing compel = None MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 1216 # Default prompt DEFAULT_PROMPT = "Detailed illustration, realistic style, portrait of a beautiful Japanese woman, wearing an elegant traditional Japanese uniform, neatly tailored with intricate patterns and subtle textures, serene expression, soft natural lighting, standing gracefully in a traditional Japanese garden with cherry blossom petals gently falling in the background, cinematic quality, ultra-detailed, high-resolution, warm tones" def initialize_compel(): """Initialize Compel with the current pipeline's components""" global compel if compel is None: try: compel = Compel( tokenizer=[pipe.tokenizer, pipe.tokenizer_2], text_encoder=[pipe.text_encoder, pipe.text_encoder_2], returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED, requires_pooled=[False, True], truncate_long_prompts=False ) except Exception as e: print(f"Failed to initialize Compel: {e}") compel = None return compel def process_long_prompt(prompt, negative_prompt=""): """Simple long prompt processing using Compel""" try: comp = initialize_compel() if comp is None: return None, None conditioning, pooled = comp([prompt, negative_prompt]) return conditioning, pooled except Exception as e: print(f"Long prompt processing failed: {e}, falling back to standard processing") return None, None @spaces.GPU(duration=90) # Increased duration for stability def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps): try: # Move pipeline to GPU inside the GPU-decorated function pipe.to("cuda") # Ensure all components are on GPU with correct dtype pipe.text_encoder = pipe.text_encoder.to(dtype=torch.float16) pipe.text_encoder_2 = pipe.text_encoder_2.to(dtype=torch.float16) pipe.vae = pipe.vae.to(dtype=torch.float16) pipe.unet = pipe.unet.to(dtype=torch.float16) use_long_prompt = len(prompt.split()) > 60 or len(prompt) > 300 if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator(device="cuda").manual_seed(seed) # Try long prompt processing first if prompt is long if use_long_prompt: print("Using long prompt processing...") conditioning, pooled = process_long_prompt(prompt, negative_prompt) if conditioning is not None: output_image = pipe( prompt_embeds=conditioning[0:1], pooled_prompt_embeds=pooled[0:1], negative_prompt_embeds=conditioning[1:2], negative_pooled_prompt_embeds=pooled[1:2], guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, width=width, height=height, generator=generator ).images[0] # Clear GPU cache torch.cuda.empty_cache() gc.collect() return output_image # Fall back to standard processing output_image = pipe( prompt=prompt, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, width=width, height=height, generator=generator ).images[0] # Clear GPU cache torch.cuda.empty_cache() gc.collect() return output_image except RuntimeError as e: print(f"Runtime error during generation: {e}") torch.cuda.empty_cache() gc.collect() # Return a blank image with error message error_img = Image.new('RGB', (width, height), color=(50, 50, 50)) return error_img except Exception as e: print(f"Unexpected error: {e}") torch.cuda.empty_cache() gc.collect() error_img = Image.new('RGB', (width, height), color=(100, 0, 0)) return error_img css = """ /* Main container styling */ #col-container { margin: 0 auto; max-width: 1024px; } /* Gradient background for the entire app */ .gradio-container { background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #f5576c 75%, #ffc947 100%); min-height: 100vh; } /* Main block styling with semi-transparent background */ .contain { background: rgba(255, 255, 255, 0.95); border-radius: 20px; padding: 20px; box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37); backdrop-filter: blur(4px); border: 1px solid rgba(255, 255, 255, 0.18); } /* Input field styling */ .gr-text-input { background: rgba(255, 255, 255, 0.9) !important; border: 2px solid rgba(102, 126, 234, 0.3) !important; border-radius: 10px !important; } /* Button styling */ .gr-button { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; border: none !important; color: white !important; font-weight: bold !important; transition: all 0.3s ease !important; } .gr-button:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4); } /* Accordion styling */ .gr-accordion { background: rgba(255, 255, 255, 0.8) !important; border-radius: 10px !important; margin-top: 10px !important; } /* Result image container */ .gr-image { border-radius: 15px !important; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.1) !important; } /* Slider styling */ .gr-slider { background: rgba(255, 255, 255, 0.8) !important; } /* Additional styling for headers */ h1, h2, h3 { color: #333 !important; text-align: center; } /* Markdown text styling */ .gr-markdown { text-align: center; margin-bottom: 20px; } """ print("Building Gradio interface...") # Build the Gradio interface with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown( """ # 🎨 Stable Diffusion XL Image Generator ### Create stunning AI-generated images with advanced controls """ ) # Badge section gr.HTML( """
""" ) with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt (long prompts are automatically supported)", container=False, value=DEFAULT_PROMPT ) run_button = gr.Button("Run", scale=0) result = gr.Image(format="png", label="Result", show_label=False) with gr.Accordion("Advanced Settings", open=False): negative_prompt = gr.Text( label="Negative prompt", max_lines=1, placeholder="Enter a negative prompt", value="monochrome, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn," ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) with gr.Row(): width = gr.Slider( label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024, ) height = gr.Slider( label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=MAX_IMAGE_SIZE, ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance scale", minimum=0.0, maximum=20.0, step=0.1, value=7, ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=1, maximum=28, step=1, value=28, ) # Connect the run button to the inference function run_button.click( fn=infer, inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps], outputs=[result] ) print("Starting Gradio app...") # Launch the app - CRITICAL: This must be at the module level for Spaces demo.queue(max_size=20) demo.launch()