import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
from diffusers import QwenImageEditPipeline
from diffusers.utils import is_xformers_available
import os
import base64
import json
from huggingface_hub import InferenceClient
import logging
#############################
os.environ.setdefault('GRADIO_ANALYTICS_ENABLED', 'False')
os.environ.setdefault('HF_HUB_DISABLE_TELEMETRY', '1')
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
#############################
def get_caption_language(prompt):
"""Detects if the prompt contains Chinese characters."""
ranges = [
('\u4e00', '\u9fff'), # CJK Unified Ideographs
]
for char in prompt:
if any(start <= char <= end for start, end in ranges):
return 'zh'
return 'en'
def polish_prompt(original_prompt, system_prompt, hf_token):
"""
Rewrites the prompt using a Hugging Face InferenceClient.
Requires user-provided HF token for API access.
"""
if not hf_token or not hf_token.strip():
gr.Warning("HF Token is required for prompt rewriting but was not provided!")
return original_prompt
client = InferenceClient(
provider="cerebras",
api_key=hf_token,
)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": original_prompt}
]
try:
completion = client.chat.completions.create(
model="Qwen/Qwen3-235B-A22B-Instruct-2507",
messages=messages,
max_tokens=512,
)
polished_prompt = completion.choices[0].message.content
polished_prompt = polished_prompt.strip().replace("\n", " ")
return polished_prompt
except Exception as e:
print(f"Error during Hugging Face API call: {e}")
gr.Warning("Failed to rewrite prompt. Using original.")
return original_prompt
SYSTEM_PROMPT_EDIT = '''
# Edit Instruction Rewriter
You are a professional edit instruction rewriter. Your task is to generate a precise, concise, and visually achievable instruction based on the user's intent and the input image.
## 1. General Principles
- Keep the rewritten instruction **concise** and clear.
- Avoid contradictions, vagueness, or unachievable instructions.
- Maintain the core logic of the original instruction; only enhance clarity and feasibility.
- Ensure new added elements or modifications align with the image's original context and art style.
## 2. Task Types
### Add, Delete, Replace:
- When the input is detailed, only refine grammar and clarity.
- For vague instructions, infer minimal but sufficient details.
- For replacement, use the format: `"Replace X with Y"`.
### Text Editing (e.g., text replacement):
- Enclose text content in quotes, e.g., `Replace "abc" with "xyz"`.
- Preserving the original structure and language—**do not translate** or alter style.
### Human Editing (e.g., change a person's face/hair):
- Preserve core visual identity (gender, ethnic features).
- Describe expressions in subtle and natural terms.
- Maintain key clothing or styling details unless explicitly replaced.
### Style Transformation:
- If a style is specified, e.g., `Disco style`, rewrite it to encapsulate the essential visual traits.
- Use a fixed template for **coloring/restoration**:
`"Restore old photograph, remove scratches, reduce noise, enhance details, high resolution, realistic, natural skin tones, clear facial features, no distortion, vintage photo restoration"`
if applicable.
## 4. Output Format
Please provide the rewritten instruction in a clean `json` format as:
{
"Rewritten": "..."
}
'''
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = QwenImageEditPipeline.from_pretrained("Qwen/Qwen-Image-Edit", torch_dtype=dtype).to(device)
# Load LoRA weights for acceleration
pipe.load_lora_weights(
"lightx2v/Qwen-Image-Lightning", weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
)
pipe.fuse_lora()
if is_xformers_available():
pipe.enable_xformers_memory_efficient_attention()
else:
print("xformers not available or failed to load.")
@spaces.GPU(duration=60)
def infer(
image,
prompt,
seed=42,
randomize_seed=False,
true_guidance_scale=1.0,
num_inference_steps=8,
rewrite_prompt=False,
hf_token="",
num_images_per_prompt=1,
progress=gr.Progress(track_tqdm=True),
):
"""
Requires user-provided HF token for prompt rewriting.
"""
original_prompt = prompt # Save original prompt for display
negative_prompt = " "
prompt_info = "" # Initialize info text
# Handle prompt rewriting with status messages
if rewrite_prompt:
if not hf_token.strip():
gr.Warning("HF Token is required for prompt rewriting but was not provided!")
prompt_info = f"""
⚠️ Prompt Rewriting Skipped
Original: {original_prompt}
HF Token required for enhancement
"""
rewritten_prompt = original_prompt
else:
try:
rewritten_prompt = polish_prompt(original_prompt, SYSTEM_PROMPT_EDIT, hf_token)
prompt_info = f"""
✨ Enhanced Successfully
Original: {original_prompt}
Enhanced: {rewritten_prompt}
"""
except Exception as e:
gr.Warning(f"Prompt rewriting failed: {str(e)}")
rewritten_prompt = original_prompt
prompt_info = f"""
❌ Enhancement Failed
Original: {original_prompt}
Error: {str(e)}
"""
else:
rewritten_prompt = original_prompt
prompt_info = f"""
📝 Original Prompt
{original_prompt}
"""
# Generate images
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator(device=device).manual_seed(seed)
edited_images = pipe(
image,
prompt=rewritten_prompt,
negative_prompt=negative_prompt,
num_inference_steps=num_inference_steps,
generator=generator,
true_cfg_scale=true_guidance_scale,
num_images_per_prompt=num_images_per_prompt,
).images
return edited_images, seed, prompt_info
MAX_SEED = np.iinfo(np.int32).max
examples = [
"Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.",
"Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.",
"Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.",
"Remove the blue sky and replace it with a dark night cityscape.",
"""Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used for "通义" and position it to the top left with a light heading-style font."""
]
# Custom CSS for enhanced visual design
custom_css = """
/* Gradient background */
.gradio-container {
background: linear-gradient(135deg, #667eea 0%, #764ba2 25%, #f093fb 50%, #fecfef 75%, #fecfef 100%);
min-height: 100vh;
}
/* Main container styling */
.container {
max-width: 1400px !important;
margin: 0 auto !important;
padding: 2rem !important;
}
/* Card-like sections */
.gr-box {
background: rgba(255, 255, 255, 0.95) !important;
backdrop-filter: blur(10px) !important;
border-radius: 20px !important;
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1) !important;
border: 1px solid rgba(255, 255, 255, 0.5) !important;
padding: 1.5rem !important;
margin-bottom: 1.5rem !important;
}
/* Header styling */
h1 {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
font-size: 3rem !important;
font-weight: 800 !important;
text-align: center;
margin-bottom: 0.5rem !important;
text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
}
h2 {
color: #4a5568 !important;
font-size: 1.5rem !important;
font-weight: 600 !important;
margin-bottom: 1rem !important;
}
/* Button styling */
.gr-button-primary {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
border: none !important;
color: white !important;
font-weight: 600 !important;
font-size: 1.1rem !important;
padding: 0.8rem 2rem !important;
border-radius: 12px !important;
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important;
transition: all 0.3s ease !important;
}
.gr-button-primary:hover {
transform: translateY(-2px) !important;
box-shadow: 0 6px 20px rgba(102, 126, 234, 0.5) !important;
}
/* Input fields styling */
.gr-input, .gr-text-input, .gr-slider, .gr-dropdown {
border-radius: 10px !important;
border: 2px solid #e2e8f0 !important;
background: white !important;
transition: all 0.3s ease !important;
}
.gr-input:focus, .gr-text-input:focus {
border-color: #667eea !important;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
}
/* Accordion styling */
.gr-accordion {
background: rgba(255, 255, 255, 0.8) !important;
border-radius: 12px !important;
border: 1px solid rgba(102, 126, 234, 0.2) !important;
overflow: hidden !important;
}
/* Gallery styling */
.gr-gallery {
border-radius: 12px !important;
overflow: hidden !important;
}
/* Prompt info boxes */
.prompt-info-box {
padding: 1.5rem;
border-radius: 12px;
margin: 1rem 0;
animation: fadeIn 0.5s ease;
}
.prompt-info-box h3 {
margin: 0 0 0.75rem 0;
font-size: 1.2rem;
font-weight: 600;
}
.prompt-info-box p {
margin: 0.5rem 0;
line-height: 1.6;
}
.prompt-info-box.success {
background: linear-gradient(135deg, #d4f4dd 0%, #e3f9e5 100%);
border-left: 4px solid #48bb78;
}
.prompt-info-box.warning {
background: linear-gradient(135deg, #fef5e7 0%, #fff9ec 100%);
border-left: 4px solid #f6ad55;
}
.prompt-info-box.error {
background: linear-gradient(135deg, #fed7d7 0%, #fee5e5 100%);
border-left: 4px solid #fc8181;
}
.prompt-info-box.default {
background: linear-gradient(135deg, #e6f3ff 0%, #f0f7ff 100%);
border-left: 4px solid #667eea;
}
.prompt-info-box .note {
font-size: 0.9rem;
color: #718096;
font-style: italic;
}
/* Checkbox styling */
.gr-checkbox {
background: white !important;
border-radius: 8px !important;
padding: 0.5rem !important;
}
/* Token input field */
input[type="password"] {
font-family: monospace !important;
letter-spacing: 0.05em !important;
}
/* Info badges */
.gr-markdown p {
color: #4a5568;
line-height: 1.6;
}
.gr-markdown a {
color: #667eea !important;
text-decoration: none !important;
font-weight: 500 !important;
transition: color 0.3s ease !important;
}
.gr-markdown a:hover {
color: #764ba2 !important;
text-decoration: underline !important;
}
/* Animation */
@keyframes fadeIn {
from {
opacity: 0;
transform: translateY(10px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
/* Slider styling */
.gr-slider input[type="range"] {
background: linear-gradient(90deg, #667eea 0%, #764ba2 100%) !important;
}
/* Group styling */
.gr-group {
background: rgba(249, 250, 251, 0.8) !important;
border-radius: 12px !important;
padding: 1rem !important;
margin-top: 1rem !important;
}
/* Loading spinner customization */
.gr-loading {
color: #667eea !important;
}
/* Example buttons */
.gr-examples button {
background: white !important;
border: 2px solid #e2e8f0 !important;
border-radius: 8px !important;
padding: 0.5rem 1rem !important;
transition: all 0.3s ease !important;
}
.gr-examples button:hover {
border-color: #667eea !important;
background: rgba(102, 126, 234, 0.05) !important;
}
"""
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🎨 Nano-Banana")
gr.Markdown("✨ **Ultra-fast 8-step image editing with AI-powered prompt enhancement**")
gr.Markdown("🔐 **Secure prompt rewriting with your [Hugging Face token](https://huggingface.co/settings/tokens)**")
with gr.Row():
with gr.Column(scale=1):
with gr.Group():
input_image = gr.Image(
label="📸 Input Image",
type="pil",
elem_classes="gr-box"
)
prompt = gr.Text(
label="✏️ Edit Instruction",
placeholder="e.g. Add a dog to the right side, change the sky to sunset...",
lines=3,
elem_classes="gr-box"
)
with gr.Accordion("⚙️ Advanced Settings", open=False):
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0
)
randomize_seed = gr.Checkbox(label="🎲 Randomize Seed", value=True)
with gr.Row():
true_guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1.0,
maximum=5.0,
step=0.1,
value=4.0
)
num_inference_steps = gr.Slider(
label="Inference Steps",
minimum=4,
maximum=16,
step=1,
value=8
)
num_images_per_prompt = gr.Slider(
label="Images per Prompt",
minimum=1,
maximum=4,
step=1,
value=1
)
run_button = gr.Button("🚀 Generate Edit", variant="primary", size="lg")
with gr.Column(scale=1):
result = gr.Gallery(
label="🖼️ Output Images",
show_label=True,
columns=2,
rows=2,
elem_classes="gr-box"
)
# Prompt display component
prompt_info = gr.HTML(visible=False)
with gr.Group():
rewrite_toggle = gr.Checkbox(
label="🤖 Enable AI Prompt Enhancement",
value=False,
interactive=True
)
hf_token_input = gr.Textbox(
label="🔑 Hugging Face API Token",
type="password",
placeholder="hf_xxxxxxxxxxxxxxxx",
visible=False,
info="Your token is secure and only used for API calls. Get yours from HuggingFace settings.",
elem_classes="gr-box"
)
def toggle_token_visibility(checked):
return gr.update(visible=checked)
rewrite_toggle.change(
toggle_token_visibility,
inputs=[rewrite_toggle],
outputs=[hf_token_input]
)
# Examples section
gr.Examples(
examples=examples,
inputs=prompt,
label="💡 Example Prompts"
)
gr.on(
triggers=[run_button.click, prompt.submit],
fn=infer,
inputs=[
input_image,
prompt,
seed,
randomize_seed,
true_guidance_scale,
num_inference_steps,
rewrite_toggle,
hf_token_input,
num_images_per_prompt
],
outputs=[result, seed, prompt_info]
)
# Show prompt info box after processing
def set_prompt_visible():
return gr.update(visible=True)
run_button.click(
fn=set_prompt_visible,
inputs=None,
outputs=[prompt_info],
queue=False
)
prompt.submit(
fn=set_prompt_visible,
inputs=None,
outputs=[prompt_info],
queue=False
)
if __name__ == "__main__":
demo.launch()