import gradio as gr import numpy as np import random import torch import spaces from PIL import Image from diffusers import QwenImageEditPipeline from diffusers.utils import is_xformers_available import os import base64 import json from huggingface_hub import InferenceClient import logging ############################# os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False') os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1') logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) ############################# def get_caption_language(prompt): """Detects if the prompt contains Chinese characters.""" ranges = [ ('\u4e00', '\u9fff'), # CJK Unified Ideographs ] for char in prompt: if any(start <= char <= end for start, end in ranges): return 'zh' return 'en' def polish_prompt(original_prompt, system_prompt, hf_token): """ Rewrites the prompt using a Hugging Face InferenceClient. Requires user-provided HF token for API access. """ if not hf_token or not hf_token.strip(): gr.Warning("HF Token is required for prompt rewriting but was not provided!") return original_prompt client = InferenceClient( provider="cerebras", api_key=hf_token, ) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": original_prompt} ] try: completion = client.chat.completions.create( model="Qwen/Qwen3-235B-A22B-Instruct-2507", messages=messages, max_tokens=512, ) polished_prompt = completion.choices[0].message.content polished_prompt = polished_prompt.strip().replace("\n", " ") return polished_prompt except Exception as e: print(f"Error during Hugging Face API call: {e}") gr.Warning("Failed to rewrite prompt. Using original.") return original_prompt SYSTEM_PROMPT_EDIT = ''' # Edit Instruction Rewriter You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image. ## 1. General Principles - Keep the rewritten instruction **concise** and clear. - Avoid contradictions, vagueness, or unachievable instructions. - Maintain the core logic of the original instruction; only enhance clarity and feasibility. - Ensure new added elements or modifications align with the image's original context and art style. ## 2. Task Types ### Add, Delete, Replace: - When the input is detailed, only refine grammar and clarity. - For vague instructions, infer minimal but sufficient details. - For replacement, use the format: `"Replace X with Y"`. ### Text Editing (e.g., text replacement): - Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`. - Preserving the original structure and language—**do not translate** or alter style. ### Human Editing (e.g., change a person's face/hair): - Preserve core visual identity (gender, ethnic features). - Describe expressions in subtle and natural terms. - Maintain key clothing or styling details unless explicitly replaced. ### Style Transformation: - If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits. - Use a fixed template for **coloring/restoration**: `"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"` if applicable. ## 4. Output Format Please provide the rewritten instruction in a clean `json` format as: { "Rewritten": "..." } ''' dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device) # Load LoRA weights for acceleration pipe.load_lora_weights( "lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors" ) pipe.fuse_lora() if is_xformers_available(): pipe.enable_xformers_memory_efficient_attention() else: print("xformers not available or failed to load.") @spaces.GPU(duration=60) def infer( image, prompt, seed=42, randomize_seed=False, true_guidance_scale=1.0, num_inference_steps=8, rewrite_prompt=False, hf_token="", num_images_per_prompt=1, progress=gr.Progress(track_tqdm=True), ): """ Requires user-provided HF token for prompt rewriting. """ original_prompt = prompt # Save original prompt for display negative_prompt = " " prompt_info = "" # Initialize info text # Handle prompt rewriting with status messages if rewrite_prompt: if not hf_token.strip(): gr.Warning("HF Token is required for prompt rewriting but was not provided!") prompt_info = f"""

⚠️ Prompt Rewriting Skipped

Original: {original_prompt}

HF Token required for enhancement

""" rewritten_prompt = original_prompt else: try: rewritten_prompt = polish_prompt(original_prompt, SYSTEM_PROMPT_EDIT, hf_token) prompt_info = f"""

✨ Enhanced Successfully

Original: {original_prompt}

Enhanced: {rewritten_prompt}

""" except Exception as e: gr.Warning(f"Prompt rewriting failed: {str(e)}") rewritten_prompt = original_prompt prompt_info = f"""

❌ Enhancement Failed

Original: {original_prompt}

Error: {str(e)}

""" else: rewritten_prompt = original_prompt prompt_info = f"""

📝 Original Prompt

{original_prompt}

""" # Generate images if randomize_seed: seed = random.randint(0, MAX_SEED) generator = torch.Generator(device=device).manual_seed(seed) edited_images = pipe( image, prompt=rewritten_prompt, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, generator=generator, true_cfg_scale=true_guidance_scale, num_images_per_prompt=num_images_per_prompt, ).images return edited_images, seed, prompt_info MAX_SEED = np.iinfo(np.int32).max examples = [ "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.", "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.", "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.", "Remove the blue sky and replace it with a dark night cityscape.", """Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font.""" ] # Custom CSS for enhanced visual design custom_css = """ /* Gradient background */ .gradio-container { background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #fecfef 75%, #fecfef 100%); min-height: 100vh; } /* Main container styling */ .container { max-width: 1400px !important; margin: 0 auto !important; padding: 2rem !important; } /* Card-like sections */ .gr-box { background: rgba(255, 255, 255, 0.95) !important; backdrop-filter: blur(10px) !important; border-radius: 20px !important; box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1) !important; border: 1px solid rgba(255, 255, 255, 0.5) !important; padding: 1.5rem !important; margin-bottom: 1.5rem !important; } /* Header styling */ h1 { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; font-size: 3rem !important; font-weight: 800 !important; text-align: center; margin-bottom: 0.5rem !important; text-shadow: 2px 2px 4px rgba(0,0,0,0.1); } h2 { color: #4a5568 !important; font-size: 1.5rem !important; font-weight: 600 !important; margin-bottom: 1rem !important; } /* Button styling */ .gr-button-primary { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; border: none !important; color: white !important; font-weight: 600 !important; font-size: 1.1rem !important; padding: 0.8rem 2rem !important; border-radius: 12px !important; box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important; transition: all 0.3s ease !important; } .gr-button-primary:hover { transform: translateY(-2px) !important; box-shadow: 0 6px 20px rgba(102, 126, 234, 0.5) !important; } /* Input fields styling */ .gr-input, .gr-text-input, .gr-slider, .gr-dropdown { border-radius: 10px !important; border: 2px solid #e2e8f0 !important; background: white !important; transition: all 0.3s ease !important; } .gr-input:focus, .gr-text-input:focus { border-color: #667eea !important; box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important; } /* Accordion styling */ .gr-accordion { background: rgba(255, 255, 255, 0.8) !important; border-radius: 12px !important; border: 1px solid rgba(102, 126, 234, 0.2) !important; overflow: hidden !important; } /* Gallery styling */ .gr-gallery { border-radius: 12px !important; overflow: hidden !important; } /* Prompt info boxes */ .prompt-info-box { padding: 1.5rem; border-radius: 12px; margin: 1rem 0; animation: fadeIn 0.5s ease; } .prompt-info-box h3 { margin: 0 0 0.75rem 0; font-size: 1.2rem; font-weight: 600; } .prompt-info-box p { margin: 0.5rem 0; line-height: 1.6; } .prompt-info-box.success { background: linear-gradient(135deg, #d4f4dd 0%, #e3f9e5 100%); border-left: 4px solid #48bb78; } .prompt-info-box.warning { background: linear-gradient(135deg, #fef5e7 0%, #fff9ec 100%); border-left: 4px solid #f6ad55; } .prompt-info-box.error { background: linear-gradient(135deg, #fed7d7 0%, #fee5e5 100%); border-left: 4px solid #fc8181; } .prompt-info-box.default { background: linear-gradient(135deg, #e6f3ff 0%, #f0f7ff 100%); border-left: 4px solid #667eea; } .prompt-info-box .note { font-size: 0.9rem; color: #718096; font-style: italic; } /* Checkbox styling */ .gr-checkbox { background: white !important; border-radius: 8px !important; padding: 0.5rem !important; } /* Token input field */ input[type="password"] { font-family: monospace !important; letter-spacing: 0.05em !important; } /* Info badges */ .gr-markdown p { color: #4a5568; line-height: 1.6; } .gr-markdown a { color: #667eea !important; text-decoration: none !important; font-weight: 500 !important; transition: color 0.3s ease !important; } .gr-markdown a:hover { color: #764ba2 !important; text-decoration: underline !important; } /* Animation */ @keyframes fadeIn { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } /* Slider styling */ .gr-slider input[type="range"] { background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important; } /* Group styling */ .gr-group { background: rgba(249, 250, 251, 0.8) !important; border-radius: 12px !important; padding: 1rem !important; margin-top: 1rem !important; } /* Loading spinner customization */ .gr-loading { color: #667eea !important; } /* Example buttons */ .gr-examples button { background: white !important; border: 2px solid #e2e8f0 !important; border-radius: 8px !important; padding: 0.5rem 1rem !important; transition: all 0.3s ease !important; } .gr-examples button:hover { border-color: #667eea !important; background: rgba(102, 126, 234, 0.05) !important; } """ with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo: gr.Markdown("# 🎨 Nano-Banana") gr.Markdown("✨ **Ultra-fast 8-step image editing with AI-powered prompt enhancement**") gr.Markdown("🔐 **Secure prompt rewriting with your [Hugging Face token](https://huggingface.co/settings/tokens)**") with gr.Row(): with gr.Column(scale=1): with gr.Group(): input_image = gr.Image( label="📸 Input Image", type="pil", elem_classes="gr-box" ) prompt = gr.Text( label="✏️ Edit Instruction", placeholder="e.g. Add a dog to the right side, change the sky to sunset...", lines=3, elem_classes="gr-box" ) with gr.Accordion("⚙️ Advanced Settings", open=False): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0 ) randomize_seed = gr.Checkbox(label="🎲 Randomize Seed", value=True) with gr.Row(): true_guidance_scale = gr.Slider( label="Guidance Scale", minimum=1.0, maximum=5.0, step=0.1, value=4.0 ) num_inference_steps = gr.Slider( label="Inference Steps", minimum=4, maximum=16, step=1, value=8 ) num_images_per_prompt = gr.Slider( label="Images per Prompt", minimum=1, maximum=4, step=1, value=1 ) run_button = gr.Button("🚀 Generate Edit", variant="primary", size="lg") with gr.Column(scale=1): result = gr.Gallery( label="🖼️ Output Images", show_label=True, columns=2, rows=2, elem_classes="gr-box" ) # Prompt display component prompt_info = gr.HTML(visible=False) with gr.Group(): rewrite_toggle = gr.Checkbox( label="🤖 Enable AI Prompt Enhancement", value=False, interactive=True ) hf_token_input = gr.Textbox( label="🔑 Hugging Face API Token", type="password", placeholder="hf_xxxxxxxxxxxxxxxx", visible=False, info="Your token is secure and only used for API calls. Get yours from HuggingFace settings.", elem_classes="gr-box" ) def toggle_token_visibility(checked): return gr.update(visible=checked) rewrite_toggle.change( toggle_token_visibility, inputs=[rewrite_toggle], outputs=[hf_token_input] ) # Examples section gr.Examples( examples=examples, inputs=prompt, label="💡 Example Prompts" ) gr.on( triggers=[run_button.click, prompt.submit], fn=infer, inputs=[ input_image, prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, rewrite_toggle, hf_token_input, num_images_per_prompt ], outputs=[result, seed, prompt_info] ) # Show prompt info box after processing def set_prompt_visible(): return gr.update(visible=True) run_button.click( fn=set_prompt_visible, inputs=None, outputs=[prompt_info], queue=False ) prompt.submit( fn=set_prompt_visible, inputs=None, outputs=[prompt_info], queue=False ) if __name__ == "__main__": demo.launch()