Spaces:

ysharma
/

guided-placement-nano-banana

Running

File size: 12,129 Bytes

import gradio as gr
from gradio_image_annotation import image_annotator
import fal_client
from PIL import Image
import io
import base64
import numpy as np
import os

def process_images(annotated_image, second_image, user_api_key=None, progress=gr.Progress()):
    """
    Process the annotated image and second image using fal API
    """
    # Check if annotated_image is provided
    if annotated_image is None:
        return None, "Please provide the first image and draw an annotation box"
    
    # Check if second_image is provided (could be None or numpy array)
    if second_image is None or (isinstance(second_image, np.ndarray) and second_image.size == 0):
        return None, "Please provide the second image"
    
    # Check if annotation box exists
    if not annotated_image.get("boxes") or len(annotated_image["boxes"]) == 0:
        return None, "Please draw an annotation box on the first image"
    
    # Extract bounding box coordinates
    box = annotated_image["boxes"][0]  # Get the first (and only) box
    xmin = box.get("xmin")
    ymin = box.get("ymin")
    xmax = box.get("xmax")
    ymax = box.get("ymax")
    
    # Construct the dynamic prompt with the actual box coordinates
    prompt = f"""add the <central object in the second image> in the first image only inside an imaginary box defined by pixels values "xmin": {xmin}, "ymin": {ymin}, "xmax": {xmax}, "ymax": {ymax}. Take care of shadows, lighting, style, and general concept of objects as per the first image."""
    
    progress(0.2, desc="Gradio is preparing your images...")
    
    try:
        # Set API key - prioritize user input, then environment variable
        original_key = os.environ.get("FAL_KEY", "")
        
        if user_api_key and user_api_key.strip():
            # Use user-provided key
            os.environ["FAL_KEY"] = user_api_key.strip()
            api_key_source = "user-provided"
        elif original_key:
            # Use environment variable (secret)
            api_key_source = "environment"
        else:
            # No API key available
            return None, "⚠️ No FAL API key found. Please either:\n1. Duplicate this app and set your FAL_KEY as a secret, or\n2. Enter your FAL API key in the field provided above."
        
        # Convert first image to file for upload
        first_img = annotated_image["image"]
        if isinstance(first_img, np.ndarray):
            # Convert numpy array to PIL Image
            first_img_pil = Image.fromarray(first_img.astype('uint8'))
            # Save to bytes
            img1_bytes = io.BytesIO()
            first_img_pil.save(img1_bytes, format='PNG')
            img1_bytes.seek(0)
            uploaded_file1 = fal_client.upload(img1_bytes.getvalue(), "image/png")
        elif isinstance(first_img, str):
            # If it's a file path
            uploaded_file1 = fal_client.upload_file(first_img)
        else:
            # If it's already a PIL Image
            img1_bytes = io.BytesIO()
            first_img.save(img1_bytes, format='PNG')
            img1_bytes.seek(0)
            uploaded_file1 = fal_client.upload(img1_bytes.getvalue(), "image/png")
        
        # Convert second image to file for upload
        if isinstance(second_image, np.ndarray):
            second_img_pil = Image.fromarray(second_image.astype('uint8'))
            img2_bytes = io.BytesIO()
            second_img_pil.save(img2_bytes, format='PNG')
            img2_bytes.seek(0)
            uploaded_file2 = fal_client.upload(img2_bytes.getvalue(), "image/png")
        elif isinstance(second_image, str):
            uploaded_file2 = fal_client.upload_file(second_image)
        else:
            img2_bytes = io.BytesIO()
            second_image.save(img2_bytes, format='PNG')
            img2_bytes.seek(0)
            uploaded_file2 = fal_client.upload(img2_bytes.getvalue(), "image/png")
        
        progress(0.4, desc="Processing with nano-banana...")
        
        # Setup progress callback
        def on_queue_update(update):
            if isinstance(update, fal_client.InProgress):
                # InProgress updates don't have a progress attribute, just show we're processing
                progress(0.6, desc="nano-banana is working on your image...")
                # Optionally log any messages if they exist
                if hasattr(update, 'logs') and update.logs:
                    for log in update.logs:
                        print(log.get("message", ""))
        
        # Call fal API with the dynamic prompt including box coordinates
        result = fal_client.subscribe(
            "fal-ai/nano-banana/edit",
            arguments={
                "prompt": prompt,
                "image_urls": [f"{uploaded_file1}", f"{uploaded_file2}"]
            },
            with_logs=True,
            on_queue_update=on_queue_update,
        )
        
        progress(0.95, desc="Finalizing...")
        
        # Extract the result image URL
        if result and "images" in result and len(result["images"]) > 0:
            output_url = result["images"][0]["url"]
            description = result.get("description", "Image processed successfully!")
            progress(1.0, desc="Complete!")
            return output_url, description
        else:
            return None, "Failed to generate image. Please check your API key or try again."
            
    except Exception as e:
        error_message = str(e).lower()
        
        # Check for authentication errors
        if "401" in error_message or "unauthorized" in error_message or "api key" in error_message:
            return None, f"⚠️ API Authentication Error: Invalid or missing FAL API key.\n\nPlease either:\n1. Duplicate this app and set your FAL_KEY as a secret, or\n2. Enter your valid FAL API key in the field provided above.\n\nGet your API key at: https://fal.ai"
        
        # Check for rate limit errors
        elif "429" in error_message or "rate limit" in error_message:
            return None, "⚠️ Rate limit exceeded. Please wait a moment and try again, or use your own API key for higher limits."
        
        # Check for server errors
        elif "500" in error_message or "502" in error_message or "503" in error_message:
            return None, f"⚠️ FAL API server error. The service might be temporarily unavailable.\n\nPlease either:\n1. Try again in a few moments, or\n2. Use your own API key by entering it in the field above.\n\nError details: {str(e)}"
        
        # Generic error with fallback message
        else:
            return None, f"⚠️ Error occurred: {str(e)}\n\nIf the error persists, please either:\n1. Duplicate this app and set your FAL_KEY as a secret, or\n2. Enter your FAL API key in the field provided above.\n\nGet your API key at: https://fal.ai"
    
    finally:
        # Restore original API key if we temporarily changed it
        if user_api_key and user_api_key.strip():
            if original_key:
                os.environ["FAL_KEY"] = original_key
            else:
                os.environ.pop("FAL_KEY", None)


# Create the Gradio interface
with gr.Blocks(theme='ocean') as demo:
    # Add navigation bar
    navbar = gr.Navbar(
        value=[
            ("Documentation", "https://docs.fal.ai"),
            ("FAL.AI nano-banana", "https://fal.ai/models/fal-ai/nano-banana/edit/api"),
            ("Learn more about Gradio Navbar", "https://www.gradio.app/guides/multipage-apps#customizing-the-navbar")
        ],
        visible=True,
        main_page_name="🎨 guided nano banana"
    )
    
    gr.HTML(
        """
        <h1><center>Guide Your Nano Banana👉🍌</center></h1>
        
        <b>How to use:</b><br>
        1. Upload or capture the first image and draw a box where you want to place an object<br>
        2. Upload the second image containing the object you want to insert<br>
        3. Click "Generate Composite Image" and wait for the Gradio and Nano-Banana to blend the images<br>
        
        The Gradio app will intelligently place the object from the second image into the boxed area of the first image,
        taking care of lighting, shadows, and proper integration.
        """
    )
    
    # API Key input section
    with gr.Row():
        with gr.Column():
            with gr.Accordion("🔑 API Configuration (Optional)", open=False):
                gr.Markdown(
                    """
                    **Note:** If you're experiencing API errors or want to use your own FAL account:
                    - Enter your FAL API key below, or
                    - [Duplicate this Space](https://huggingface.co/spaces) and set FAL_KEY as a secret
                    - Get your API key at [fal.ai](https://fal.ai)
                    """
                )
                api_key_input = gr.Textbox(
                    label="FAL API Key",
                    placeholder="Enter your FAL key (optional)",
                    type="password",
                    interactive=True,
                    info="Your key will be used only for this session and won't be stored"
                )
    
    with gr.Row():
        with gr.Column(scale=1):
            with gr.Row():
                with gr.Column(scale=1):
                    gr.Markdown("### Step 1: Annotate First Image")
                    # Image annotator for first image
                    from gradio_image_annotation import image_annotator
                    #first_image = ImageAnnotator(
                    first_image = image_annotator(
                        value=None,
                        label="Draw a box where you want to place the object",
                        image_type="pil",
                        single_box=True,  # Only allow one box
                        disable_edit_boxes=True,  
                        show_download_button=False,
                        show_share_button=False,
                        box_thickness=3,
                        box_selected_thickness=4,
                        show_label=True,
                        #image_mode="RGB",
                        #box_min_size=20,
                    )
                    
                with gr.Column(scale=1):
                    gr.Markdown("### Step 2: Upload Second Image")
                    # Regular image input for second image
                    second_image = gr.Image(
                        label="Image containing the object to insert",
                        type="numpy",
                        height=400,
                    )
                    # Generate button
                    generate_btn = gr.Button("Step 3: 🚀 Generate Composite Image", variant="primary", size="lg")
    
    # Output section
    with gr.Column():
        output_image = gr.Image(
            label="Generated Composite Image",
            type="filepath",
            height=500,
        )
        status_text = gr.Textbox(
            label="Status",
            placeholder="Results will appear here...",
            lines=3,
        )
        
    # Connect the button to the processing function
    generate_btn.click(
        fn=process_images,
        inputs=[first_image, second_image, api_key_input],
        outputs=[output_image, status_text],
        show_progress=True,
    )

with demo.route("Tips", "/tips"):
    gr.Markdown(
        """
        # ℹ️ Tips for Best Results
        - **Box Placement**: Draw the box exactly where you want the object to appear
        - **Image Quality**: Use high-resolution images for better results
        - **Object Selection**: The second image should clearly show the object you want to insert
        - **Lighting**: Images with similar lighting conditions work best
        - **Processing Time**: Generation typically takes 10-30 seconds
        - **API Key**: If you encounter errors, try using your own FAL API key
        """
    )

    # Different navbar for the Settings page
    navbar = gr.Navbar(
        visible=True,
        main_page_name="Home",
    )  
    
if __name__ == "__main__":
    demo.launch(ssr_mode=False)