Nano-Banana

Running on Zero

File size: 16,809 Bytes

import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
from diffusers import QwenImageEditPipeline
from diffusers.utils import is_xformers_available
import os
import base64
import json
from huggingface_hub import InferenceClient
import logging

#############################
os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
#############################

def get_caption_language(prompt):
    """Detects if the prompt contains Chinese characters."""
    ranges = [
        ('\u4e00', '\u9fff'),  # CJK Unified Ideographs
    ]
    for char in prompt:
        if any(start <= char <= end for start, end in ranges):
            return 'zh'
    return 'en'

def polish_prompt(original_prompt, system_prompt, hf_token):
    """
    Rewrites the prompt using a Hugging Face InferenceClient.
    Requires user-provided HF token for API access.
    """
    if not hf_token or not hf_token.strip():
        gr.Warning("HF Token is required for prompt rewriting but was not provided!")
        return original_prompt
    client = InferenceClient(
        provider="cerebras",
        api_key=hf_token,
    )
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": original_prompt}
    ]
    try:
        completion = client.chat.completions.create(
            model="Qwen/Qwen3-235B-A22B-Instruct-2507",
            messages=messages,
            max_tokens=512,
        )
        polished_prompt = completion.choices[0].message.content
        polished_prompt = polished_prompt.strip().replace("\n", " ")
        return polished_prompt
    except Exception as e:
        print(f"Error during Hugging Face API call: {e}")
        gr.Warning("Failed to rewrite prompt. Using original.")
        return original_prompt

SYSTEM_PROMPT_EDIT = '''
# Edit Instruction Rewriter
You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image.
## 1. General Principles
- Keep the rewritten instruction **concise** and clear.
- Avoid contradictions, vagueness, or unachievable instructions.
- Maintain the core logic of the original instruction; only enhance clarity and feasibility.
- Ensure new added elements or modifications align with the image's original context and art style.
## 2. Task Types
### Add, Delete, Replace:
- When the input is detailed, only refine grammar and clarity.
- For vague instructions, infer minimal but sufficient details.
- For replacement, use the format: `"Replace X with Y"`.
### Text Editing (e.g., text replacement):
- Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`.
- Preserving the original structure and language—**do not translate** or alter style.
### Human Editing (e.g., change a person's face/hair):
- Preserve core visual identity (gender, ethnic features).
- Describe expressions in subtle and natural terms.
- Maintain key clothing or styling details unless explicitly replaced.
### Style Transformation:
- If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits.
- Use a fixed template for **coloring/restoration**:  
  `"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"`  
  if applicable.
## 4. Output Format
Please provide the rewritten instruction in a clean `json` format as:
{
  "Rewritten": "..."
}
'''

dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)

# Load LoRA weights for acceleration
pipe.load_lora_weights(
    "lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
)
pipe.fuse_lora()

if is_xformers_available():
    pipe.enable_xformers_memory_efficient_attention()
else:
    print("xformers not available or failed to load.")

@spaces.GPU(duration=60)
def infer(
    image,
    prompt,
    seed=42,
    randomize_seed=False,
    true_guidance_scale=1.0,
    num_inference_steps=8,
    rewrite_prompt=False,
    hf_token="",
    num_images_per_prompt=1,
    progress=gr.Progress(track_tqdm=True),
):
    """
    Requires user-provided HF token for prompt rewriting.
    """
    original_prompt = prompt  # Save original prompt for display
    negative_prompt = " "
    prompt_info = ""  # Initialize info text
    
    # Handle prompt rewriting with status messages
    if rewrite_prompt:
        if not hf_token.strip():
            gr.Warning("HF Token is required for prompt rewriting but was not provided!")
            prompt_info = f"""<div class="prompt-info-box warning">
<h3>⚠️ Prompt Rewriting Skipped</h3>
<p><strong>Original:</strong> {original_prompt}</p>
<p class="note">HF Token required for enhancement</p>
</div>"""
            rewritten_prompt = original_prompt
        else:
            try:
                rewritten_prompt = polish_prompt(original_prompt, SYSTEM_PROMPT_EDIT, hf_token)
                prompt_info = f"""<div class="prompt-info-box success">
<h3>✨ Enhanced Successfully</h3>
<p><strong>Original:</strong> {original_prompt}</p>
<p><strong>Enhanced:</strong> {rewritten_prompt}</p>
</div>"""
            except Exception as e:
                gr.Warning(f"Prompt rewriting failed: {str(e)}")
                rewritten_prompt = original_prompt
                prompt_info = f"""<div class="prompt-info-box error">
<h3>❌ Enhancement Failed</h3>
<p><strong>Original:</strong> {original_prompt}</p>
<p class="note">Error: {str(e)}</p>
</div>"""
    else:
        rewritten_prompt = original_prompt
        prompt_info = f"""<div class="prompt-info-box default">
<h3>📝 Original Prompt</h3>
<p>{original_prompt}</p>
</div>"""
    
    # Generate images
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator(device=device).manual_seed(seed)
    
    edited_images = pipe(
        image,
        prompt=rewritten_prompt,
        negative_prompt=negative_prompt,
        num_inference_steps=num_inference_steps,
        generator=generator,
        true_cfg_scale=true_guidance_scale,
        num_images_per_prompt=num_images_per_prompt,
    ).images
    
    return edited_images, seed, prompt_info

MAX_SEED = np.iinfo(np.int32).max

examples = [
    "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.",
    "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.",
    "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.",
    "Remove the blue sky and replace it with a dark night cityscape.",
    """Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font."""
]

# Custom CSS for enhanced visual design
custom_css = """
/* Gradient background */
.gradio-container {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #fecfef 75%, #fecfef 100%);
    min-height: 100vh;
}

/* Main container styling */
.container {
    max-width: 1400px !important;
    margin: 0 auto !important;
    padding: 2rem !important;
}

/* Card-like sections */
.gr-box {
    background: rgba(255, 255, 255, 0.95) !important;
    backdrop-filter: blur(10px) !important;
    border-radius: 20px !important;
    box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1) !important;
    border: 1px solid rgba(255, 255, 255, 0.5) !important;
    padding: 1.5rem !important;
    margin-bottom: 1.5rem !important;
}

/* Header styling */
h1 {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    background-clip: text;
    font-size: 3rem !important;
    font-weight: 800 !important;
    text-align: center;
    margin-bottom: 0.5rem !important;
    text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
}

h2 {
    color: #4a5568 !important;
    font-size: 1.5rem !important;
    font-weight: 600 !important;
    margin-bottom: 1rem !important;
}

/* Button styling */
.gr-button-primary {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    color: white !important;
    font-weight: 600 !important;
    font-size: 1.1rem !important;
    padding: 0.8rem 2rem !important;
    border-radius: 12px !important;
    box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
    transition: all 0.3s ease !important;
}

.gr-button-primary:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 6px 20px rgba(102, 126, 234, 0.5) !important;
}

/* Input fields styling */
.gr-input, .gr-text-input, .gr-slider, .gr-dropdown {
    border-radius: 10px !important;
    border: 2px solid #e2e8f0 !important;
    background: white !important;
    transition: all 0.3s ease !important;
}

.gr-input:focus, .gr-text-input:focus {
    border-color: #667eea !important;
    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
}

/* Accordion styling */
.gr-accordion {
    background: rgba(255, 255, 255, 0.8) !important;
    border-radius: 12px !important;
    border: 1px solid rgba(102, 126, 234, 0.2) !important;
    overflow: hidden !important;
}

/* Gallery styling */
.gr-gallery {
    border-radius: 12px !important;
    overflow: hidden !important;
}

/* Prompt info boxes */
.prompt-info-box {
    padding: 1.5rem;
    border-radius: 12px;
    margin: 1rem 0;
    animation: fadeIn 0.5s ease;
}

.prompt-info-box h3 {
    margin: 0 0 0.75rem 0;
    font-size: 1.2rem;
    font-weight: 600;
}

.prompt-info-box p {
    margin: 0.5rem 0;
    line-height: 1.6;
}

.prompt-info-box.success {
    background: linear-gradient(135deg, #d4f4dd 0%, #e3f9e5 100%);
    border-left: 4px solid #48bb78;
}

.prompt-info-box.warning {
    background: linear-gradient(135deg, #fef5e7 0%, #fff9ec 100%);
    border-left: 4px solid #f6ad55;
}

.prompt-info-box.error {
    background: linear-gradient(135deg, #fed7d7 0%, #fee5e5 100%);
    border-left: 4px solid #fc8181;
}

.prompt-info-box.default {
    background: linear-gradient(135deg, #e6f3ff 0%, #f0f7ff 100%);
    border-left: 4px solid #667eea;
}

.prompt-info-box .note {
    font-size: 0.9rem;
    color: #718096;
    font-style: italic;
}

/* Checkbox styling */
.gr-checkbox {
    background: white !important;
    border-radius: 8px !important;
    padding: 0.5rem !important;
}

/* Token input field */
input[type="password"] {
    font-family: monospace !important;
    letter-spacing: 0.05em !important;
}

/* Info badges */
.gr-markdown p {
    color: #4a5568;
    line-height: 1.6;
}

.gr-markdown a {
    color: #667eea !important;
    text-decoration: none !important;
    font-weight: 500 !important;
    transition: color 0.3s ease !important;
}

.gr-markdown a:hover {
    color: #764ba2 !important;
    text-decoration: underline !important;
}

/* Animation */
@keyframes fadeIn {
    from {
        opacity: 0;
        transform: translateY(10px);
    }
    to {
        opacity: 1;
        transform: translateY(0);
    }
}

/* Slider styling */
.gr-slider input[type="range"] {
    background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
}

/* Group styling */
.gr-group {
    background: rgba(249, 250, 251, 0.8) !important;
    border-radius: 12px !important;
    padding: 1rem !important;
    margin-top: 1rem !important;
}

/* Loading spinner customization */
.gr-loading {
    color: #667eea !important;
}

/* Example buttons */
.gr-examples button {
    background: white !important;
    border: 2px solid #e2e8f0 !important;
    border-radius: 8px !important;
    padding: 0.5rem 1rem !important;
    transition: all 0.3s ease !important;
}

.gr-examples button:hover {
    border-color: #667eea !important;
    background: rgba(102, 126, 234, 0.05) !important;
}
"""

with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎨 Nano-Banana")
    gr.Markdown("✨ **Ultra-fast 8-step image editing with AI-powered prompt enhancement**")
    gr.Markdown("🔐 **Secure prompt rewriting with your [Hugging Face token](https://huggingface.co/settings/tokens)**")
    
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Group():
                input_image = gr.Image(
                    label="📸 Input Image", 
                    type="pil",
                    elem_classes="gr-box"
                )
                prompt = gr.Text(
                    label="✏️ Edit Instruction", 
                    placeholder="e.g. Add a dog to the right side, change the sky to sunset...",
                    lines=3,
                    elem_classes="gr-box"
                )
                
                with gr.Accordion("⚙️ Advanced Settings", open=False):
                    seed = gr.Slider(
                        label="Seed",
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        value=0
                    )
                    randomize_seed = gr.Checkbox(label="🎲 Randomize Seed", value=True)
                    
                    with gr.Row():
                        true_guidance_scale = gr.Slider(
                            label="Guidance Scale",
                            minimum=1.0,
                            maximum=5.0,
                            step=0.1,
                            value=4.0
                        )
                        num_inference_steps = gr.Slider(
                            label="Inference Steps",
                            minimum=4,
                            maximum=16,
                            step=1,
                            value=8
                        )
                    
                    num_images_per_prompt = gr.Slider(
                        label="Images per Prompt",
                        minimum=1,
                        maximum=4,
                        step=1,
                        value=1
                    )
                
                run_button = gr.Button("🚀 Generate Edit", variant="primary", size="lg")
        
        with gr.Column(scale=1):
            result = gr.Gallery(
                label="🖼️ Output Images", 
                show_label=True, 
                columns=2,
                rows=2,
                elem_classes="gr-box"
            )
            
            # Prompt display component
            prompt_info = gr.HTML(visible=False)
            
            with gr.Group():
                rewrite_toggle = gr.Checkbox(
                    label="🤖 Enable AI Prompt Enhancement", 
                    value=False, 
                    interactive=True
                )
                hf_token_input = gr.Textbox(
                    label="🔑 Hugging Face API Token",
                    type="password",
                    placeholder="hf_xxxxxxxxxxxxxxxx",
                    visible=False,
                    info="Your token is secure and only used for API calls. Get yours from HuggingFace settings.",
                    elem_classes="gr-box"
                )
                
                def toggle_token_visibility(checked):
                    return gr.update(visible=checked)
                
                rewrite_toggle.change(
                    toggle_token_visibility,
                    inputs=[rewrite_toggle],
                    outputs=[hf_token_input]
                )
    
    # Examples section
    gr.Examples(
        examples=examples,
        inputs=prompt,
        label="💡 Example Prompts"
    )
    
    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[
            input_image,
            prompt,
            seed,
            randomize_seed,
            true_guidance_scale,
            num_inference_steps,
            rewrite_toggle,
            hf_token_input,
            num_images_per_prompt
        ],
        outputs=[result, seed, prompt_info]
    )
    
    # Show prompt info box after processing
    def set_prompt_visible():
        return gr.update(visible=True)
    
    run_button.click(
        fn=set_prompt_visible,
        inputs=None,
        outputs=[prompt_info],
        queue=False
    )
    prompt.submit(
        fn=set_prompt_visible,
        inputs=None,
        outputs=[prompt_info],
        queue=False
    )

if __name__ == "__main__":
    demo.launch()