File size: 5,849 Bytes
67867a6
 
68c4f35
67867a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68c4f35
67867a6
68c4f35
67867a6
68c4f35
67867a6
 
 
 
 
 
 
 
 
 
 
68c4f35
67867a6
 
 
68c4f35
67867a6
68c4f35
 
67867a6
 
68c4f35
 
 
 
67867a6
 
 
 
68c4f35
 
 
 
 
67867a6
68c4f35
 
67867a6
68c4f35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67867a6
 
 
 
 
68c4f35
 
67867a6
 
68c4f35
 
67867a6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import tempfile
from PIL import Image
import gradio as gr
from google import genai
from google.genai import types

def save_binary_file(file_name, data):
    with open(file_name, "wb") as f:
        f.write(data)

def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
    client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != "" else os.environ.get("GEMINI_API_KEY")))
    files = [client.files.upload(file=file_name)]
    
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_uri(file_uri=files[0].uri, mime_type=files[0].mime_type),
                types.Part.from_text(text=text),
            ],
        ),
    ]
    generate_content_config = types.GenerateContentConfig(
        temperature=1,
        top_p=0.95,
        top_k=40,
        max_output_tokens=8192,
        response_modalities=["image", "text"],
        response_mime_type="text/plain",
    )

    text_response = ""
    image_path = None
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
        temp_path = tmp.name
        for chunk in client.models.generate_content_stream(model=model, contents=contents, config=generate_content_config):
            if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                continue
            candidate = chunk.candidates[0].content.parts[0]
            if candidate.inline_data:
                save_binary_file(temp_path, candidate.inline_data.data)
                image_path = temp_path
                break
            else:
                text_response += chunk.text + "\n"
    
    del files
    return image_path, text_response

def process_image_and_prompt(composite_pil, prompt, gemini_api_key):
    try:
        if not composite_pil:
            raise gr.Error("Carregue uma imagem primeiro.", duration=5)
        if not prompt:
            raise gr.Error("Digite um prompt antes de gerar.", duration=5)
        if not gemini_api_key and not os.environ.get("GEMINI_API_KEY"):
            raise gr.Error("Insira uma chave API Gemini ou configure a variável GEMINI_API_KEY.", duration=10)
        
        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
            composite_path = tmp.name
            composite_pil.save(composite_path)
        
        image_path, text_response = generate(text=prompt, file_name=composite_path, api_key=gemini_api_key)
        
        if image_path:
            result_img = Image.open(image_path)
            if result_img.mode == "RGBA":
                result_img = result_img.convert("RGB")
            return result_img, ""
        else:
            return None, text_response
    except Exception as e:
        raise gr.Error(f"Erro: {e}", duration=5)

# Interface Moderna
with gr.Blocks(css="style.css", theme=gr.themes.Soft(), title="Gemini Image Editor") as demo:
    gr.HTML(
    """
    <div class="header">
        <img src="https://www.gstatic.com/lamda/images/gemini_favicon_f069958c85030456e93de685481c559f160ea06b.png" alt="Logo" class="logo">
        <h1>Gemini Image Editor</h1>
        <p class="subtitle">Edite imagens com IA de forma simples e poderosa</p>
    </div>
    """
    )
    
    with gr.Row(elem_classes="main-container"):
        with gr.Column(scale=1, elem_classes="input-section"):
            image_input = gr.Image(type="pil", label="Carregar Imagem (PNG)", elem_classes="image-upload")
            prompt_input = gr.Textbox(placeholder="Digite o prompt (ex: 'add a hat')", label="Prompt", elem_classes="prompt-box")
            gemini_api_key = gr.Textbox(placeholder="Chave API Gemini", label="API Key", type="password", elem_classes="api-key-box")
            with gr.Row():
                submit_btn = gr.Button("Gerar", variant="primary", elem_classes="submit-btn")
                clear_btn = gr.Button("Limpar", variant="secondary", elem_classes="clear-btn")
        
        with gr.Column(scale=2, elem_classes="output-section"):
            output_image = gr.Image(label="Resultado", elem_classes="output-image")
            output_text = gr.Textbox(label="Mensagem", placeholder="Resultados de texto aparecem aqui", interactive=False, elem_classes="output-text")

    with gr.Accordion("ℹ️ Como Usar e Configurar", open=False, elem_classes="info-accordion"):
        gr.Markdown("""
        ### Como Usar
        1. Faça upload de uma imagem PNG.
        2. Digite um prompt em inglês (ex: "remove the background").
        3. Insira sua chave API Gemini ou configure a variável `GEMINI_API_KEY`.
        4. Clique em "Gerar" e veja o resultado!

        ### Configuração
        - Obtenha sua chave API em <a href="https://aistudio.google.com/apikey">Google AI Studio</a>.
        - Duplique este projeto em <a href="https://huggingface.co/spaces/ameerazam08/Gemini-Image-Edit?duplicate=true">Hugging Face</a>.
        - Contato: <a href="https://www.linkedin.com/in/dheiver-santos/">Dheiver Santos</a>.
        """)

    gr.Markdown("### Exemplos", elem_classes="examples-header")
    examples = [
        ["data/1.webp", "change text to 'AMEER'", ""],
        ["data/2.webp", "remove the spoon from hand only", ""],
        ["data/3.webp", "change text to 'Make it'", ""],
    ]
    gr.Examples(examples=examples, inputs=[image_input, prompt_input, gemini_api_key], outputs=[output_image, output_text],
                fn=process_image_and_prompt, cache_examples=False)

    # Eventos
    submit_btn.click(fn=process_image_and_prompt, inputs=[image_input, prompt_input, gemini_api_key], outputs=[output_image, output_text])
    clear_btn.click(fn=lambda: (None, "", "", None, ""), inputs=[], outputs=[image_input, prompt_input, gemini_api_key, output_image, output_text])

demo.queue(max_size=50).launch()