# No need for dynamic package installation since we're using requirements.txt import subprocess import sys # Optional: Try to install flash-attn if not present (but don't fail if it doesn't work) try: import flash_attn print("Flash attention is available") except ImportError: print("Flash attention not available - using standard attention (this is fine)") # Ensure PEFT is available for LoRA try: import peft print("PEFT library is available") except ImportError: print("Installing PEFT for LoRA support...") subprocess.run([sys.executable, "-m", "pip", "install", "peft>=0.7.0"], check=True) import peft print("PEFT installed successfully") import spaces import argparse import os import time from os import path import shutil from datetime import datetime from safetensors.torch import load_file from huggingface_hub import hf_hub_download import gradio as gr import torch from diffusers import FluxPipeline from diffusers.pipelines.stable_diffusion import safety_checker from PIL import Image from transformers import AutoProcessor, AutoModelForCausalLM # No special attention mechanisms needed - the model will work fine without them # Setup and initialization code cache_path = path.join(path.dirname(path.abspath(__file__)), "models") PERSISTENT_DIR = os.environ.get("PERSISTENT_DIR", ".") os.environ["TRANSFORMERS_CACHE"] = cache_path os.environ["HF_HUB_CACHE"] = cache_path os.environ["HF_HOME"] = cache_path torch.backends.cuda.matmul.allow_tf32 = True # Florence 모델 초기화 print("Initializing Florence models...") florence_models = {} florence_processors = {} # Check if timm is available try: import timm print("timm library is available") except ImportError: print("ERROR: timm library not found. Installing...") subprocess.run([sys.executable, "-m", "pip", "install", "timm"], check=True) import timm print("timm installed successfully") # Initialize Florence models with better error handling model_configs = [ ('gokaygokay/Florence-2-Flux-Large', True), # Primary model ('gokaygokay/Florence-2-Flux', False), # Fallback model ] for model_name, is_primary in model_configs: try: print(f"Loading {model_name}...") florence_models[model_name] = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, low_cpu_mem_usage=True, # Add memory optimization ).eval() florence_processors[model_name] = AutoProcessor.from_pretrained( model_name, trust_remote_code=True ) print(f"Successfully loaded {model_name}") # If we successfully loaded the primary model, we can skip the fallback if is_primary: break except Exception as e: print(f"Warning: Could not load {model_name}: {e}") if is_primary: print("Attempting to load fallback model...") continue if not florence_models: print("WARNING: No Florence models could be loaded. Caption generation will not be available.") print("Users will need to manually enter image descriptions.") else: print(f"Successfully loaded {len(florence_models)} Florence model(s)") def filter_prompt(prompt): inappropriate_keywords = [ "sex" ] prompt_lower = prompt.lower() for keyword in inappropriate_keywords: if keyword in prompt_lower: return False, "부적절한 내용이 포함된 프롬프트입니다." return True, prompt class timer: def __init__(self, method_name="timed process"): self.method = method_name def __enter__(self): self.start = time.time() print(f"{self.method} starts") def __exit__(self, exc_type, exc_val, exc_tb): end = time.time() print(f"{self.method} took {str(round(end - self.start, 2))}s") # Model initialization if not path.exists(cache_path): os.makedirs(cache_path, exist_ok=True) print("Loading FLUX pipeline...") pipe = FluxPipeline.from_pretrained( "black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16 ) # Track whether LoRA was loaded successfully LORA_LOADED = False print("Loading LoRA weights...") try: # Method 1: Try loading with file path lora_path = hf_hub_download( "ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors" ) pipe.load_lora_weights(lora_path) pipe.fuse_lora(lora_scale=0.125) LORA_LOADED = True print("LoRA weights loaded and fused successfully (Method 1)") except Exception as e1: print(f"Method 1 failed: {e1}") try: # Method 2: Try loading directly from repo pipe.load_lora_weights("ByteDance/Hyper-SD", weight_name="Hyper-FLUX.1-dev-8steps-lora.safetensors") pipe.fuse_lora(lora_scale=0.125) LORA_LOADED = True print("LoRA weights loaded and fused successfully (Method 2)") except Exception as e2: print(f"Method 2 failed: {e2}") print("WARNING: Could not load LoRA weights. Continuing without LoRA.") print("The model will still work but may require more inference steps for good quality.") print("Recommended: Use 20-30 inference steps instead of 8.") pipe.to(device="cuda", dtype=torch.bfloat16) # Safety checker initialization try: pipe.safety_checker = safety_checker.StableDiffusionSafetyChecker.from_pretrained( "CompVis/stable-diffusion-safety-checker" ) except Exception as e: print(f"Warning: Could not load safety checker: {e}") pipe.safety_checker = None @spaces.GPU def generate_caption(image, model_name='gokaygokay/Florence-2-Flux-Large'): if not florence_models: gr.Warning("Caption models are not loaded. Please refresh the page.") return "Caption generation unavailable - please describe your image manually" # Use fallback model if the requested one isn't available if model_name not in florence_models: model_name = list(florence_models.keys())[0] print(f"Using fallback model: {model_name}") image = Image.fromarray(image) task_prompt = "" prompt = task_prompt + "Describe this image in great detail." if image.mode != "RGB": image = image.convert("RGB") model = florence_models[model_name] processor = florence_processors[model_name] inputs = processor(text=prompt, images=image, return_tensors="pt") generated_ids = model.generate( input_ids=inputs["input_ids"], pixel_values=inputs["pixel_values"], max_new_tokens=1024, num_beams=3, repetition_penalty=1.10, ) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height)) return parsed_answer[""] @spaces.GPU def process_and_save_image(height, width, steps, scales, prompt, seed): is_safe, filtered_prompt = filter_prompt(prompt) if not is_safe: gr.Warning("The prompt contains inappropriate content.") return None with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16), timer("inference"): try: generated_image = pipe( prompt=[filtered_prompt], generator=torch.Generator().manual_seed(int(seed)), num_inference_steps=int(steps), guidance_scale=float(scales), height=int(height), width=int(width), max_sequence_length=256 ).images[0] return generated_image except Exception as e: print(f"Error in image generation: {str(e)}") gr.Warning(f"Error generating image: {str(e)}") return None def get_random_seed(): return torch.randint(0, 1000000, (1,)).item() def update_seed(): return get_random_seed() # CSS 스타일 css = """ footer {display: none !important} .gradio-container { max-width: 1200px; margin: auto; } .contain { background: rgba(255, 255, 255, 0.05); border-radius: 12px; padding: 20px; } .generate-btn { background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important; border: none !important; color: white !important; } .generate-btn:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(0,0,0,0.2); } .title { text-align: center; font-size: 2.5em; font-weight: bold; margin-bottom: 1em; background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; } .tabs { margin-top: 20px; border-radius: 10px; overflow: hidden; } .tab-nav { background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%); padding: 10px; } .tab-nav button { color: white; border: none; padding: 10px 20px; margin: 0 5px; border-radius: 5px; transition: all 0.3s ease; } .tab-nav button.selected { background: rgba(255, 255, 255, 0.2); } .image-upload-container { border: 2px dashed #4B79A1; border-radius: 10px; padding: 20px; text-align: center; transition: all 0.3s ease; } .image-upload-container:hover { border-color: #283E51; background: rgba(75, 121, 161, 0.1); } .primary-btn { background: linear-gradient(90deg, #4B79A1 0%, #283E51 100%) !important; font-size: 1.2em !important; padding: 12px 20px !important; margin-top: 20px !important; } hr { border: none; border-top: 1px solid rgba(75, 121, 161, 0.2); margin: 20px 0; } .input-section { background: rgba(255, 255, 255, 0.03); border-radius: 12px; padding: 20px; margin-bottom: 20px; } .output-section { background: rgba(255, 255, 255, 0.03); border-radius: 12px; padding: 20px; } .example-images { display: grid; grid-template-columns: repeat(4, 1fr); gap: 10px; margin-bottom: 20px; } .example-images img { width: 100%; height: 150px; object-fit: cover; border-radius: 8px; cursor: pointer; transition: transform 0.2s; } .example-images img:hover { transform: scale(1.05); } """ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: gr.HTML('
FLUX VisionReply
') gr.HTML('
Upload an image(Image2Text2Image)
') with gr.Row(): # 왼쪽 컬럼: 입력 섹션 with gr.Column(scale=3): # 이미지 업로드 섹션 input_image = gr.Image( label="Upload Image (Optional)", type="numpy", elem_classes=["image-upload-container"] ) # 예시 이미지 갤러리 추가 example_images = [ "5.jpg", "6.jpg", "2.jpg", "3.jpg", "1.jpg", "4.jpg", ] gr.Examples( examples=example_images, inputs=input_image, label="Example Images", examples_per_page=4 ) # Florence 모델 선택 - 숨김 처리 available_models = list(florence_models.keys()) if florence_models else [] florence_model = gr.Dropdown( choices=available_models, label="Caption Model", value=available_models[0] if available_models else None, visible=False ) # Caption button - only show if models are available if florence_models: caption_button = gr.Button( "🔍 Generate Caption from Image", elem_classes=["generate-btn"] ) else: caption_button = gr.Button( "⚠️ Caption Generation Unavailable - Enter Description Manually", elem_classes=["generate-btn"], interactive=False ) # 구분선 gr.HTML('
') # 텍스트 프롬프트 섹션 prompt = gr.Textbox( label="Image Description", placeholder="Enter text description or use generated caption above...", lines=3 ) with gr.Accordion("Advanced Settings", open=False): with gr.Row(): height = gr.Slider( label="Height", minimum=256, maximum=1152, step=64, value=1024 ) width = gr.Slider( label="Width", minimum=256, maximum=1152, step=64, value=1024 ) with gr.Row(): # Adjust default steps based on whether LoRA is loaded default_steps = 8 if LORA_LOADED else 20 steps = gr.Slider( label="Inference Steps" + (" (LoRA Enabled)" if LORA_LOADED else " (No LoRA - More Steps Recommended)"), minimum=6, maximum=50, step=1, value=default_steps ) scales = gr.Slider( label="Guidance Scale", minimum=0.0, maximum=5.0, step=0.1, value=3.5 ) seed = gr.Number( label="Seed", value=get_random_seed(), precision=0 ) randomize_seed = gr.Button( "🎲 Randomize Seed", elem_classes=["generate-btn"] ) generate_btn = gr.Button( "✨ Generate Image", elem_classes=["generate-btn", "primary-btn"] ) # 오른쪽 컬럼: 출력 섹션 with gr.Column(scale=4): output = gr.Image( label="Generated Image", elem_classes=["output-image"] ) # Event handlers if florence_models: caption_button.click( generate_caption, inputs=[input_image, florence_model], outputs=[prompt] ) generate_btn.click( process_and_save_image, inputs=[height, width, steps, scales, prompt, seed], outputs=[output] ) randomize_seed.click( update_seed, outputs=[seed] ) generate_btn.click( update_seed, outputs=[seed] ) if __name__ == "__main__": demo.launch(allowed_paths=[PERSISTENT_DIR])