File size: 7,832 Bytes
c7eeb94
7ef3905
 
 
 
17c1398
 
 
c7eeb94
17c1398
7ef3905
17c1398
 
c7eeb94
7ef3905
17c1398
 
 
7ef3905
 
17c1398
 
 
 
7ef3905
 
17c1398
 
 
7ef3905
17c1398
 
ea295fd
17c1398
ea295fd
17c1398
ea295fd
e51fcbf
 
 
ea295fd
17c1398
 
 
ea295fd
17c1398
7ef3905
 
17c1398
7ef3905
ea295fd
7ef3905
 
17c1398
 
ea295fd
7ef3905
17c1398
 
 
7ef3905
17c1398
 
ea295fd
17c1398
7ef3905
 
17c1398
98b1f6d
17c1398
ea295fd
7ef3905
17c1398
 
 
 
 
 
 
 
 
ea295fd
 
 
 
 
17c1398
 
ea295fd
 
17c1398
 
 
 
 
ea295fd
 
 
 
 
 
 
 
 
 
 
17c1398
 
 
 
 
 
 
 
 
 
ea295fd
17c1398
 
 
 
 
 
 
 
 
 
 
7ef3905
17c1398
 
 
 
 
 
 
 
 
ea295fd
17c1398
ea295fd
17c1398
7ef3905
 
 
ea295fd
17c1398
ea295fd
 
17c1398
7ef3905
17c1398
 
 
ea295fd
17c1398
 
 
 
7ef3905
17c1398
7ef3905
 
ea295fd
7ef3905
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import gradio as gr
import requests
import base64
import os
import io
from PIL import Image

# --- OpenAI API Call Logic ---

def call_openai_image_api(prompt: str, api_key: str, input_image: Image.Image | None = None):
    """
    Calls the appropriate OpenAI Image API (generation or edit)
    based on whether an input image is provided.

    Args:
        prompt: The text prompt for image generation or editing.
        api_key: The OpenAI API key.
        input_image: A PIL Image object for editing, or None for generation.

    Returns:
        A tuple containing:
        - original_image (PIL.Image or None): The original image if editing, else None.
        - result_image (PIL.Image or None): The generated/edited image, or None on error.
        - status_message (str): A message indicating success or error details.
    """
    if not api_key:
        return None, None, "Error: OpenAI API Key is missing."
    if not prompt:
        return None, None, "Error: Prompt cannot be empty."

    headers = {"Authorization": f"Bearer {api_key}"}
    # Hypothetical model name from the original code. Replace with "dall-e-2" or "dall-e-3" if needed.
    model = "gpt-image-1" # Using the model specified in the original code
    size = "1024x1024"
    response = None # Initialize response variable

    try:
        if input_image:
            # --- Image Editing ---
            if not isinstance(input_image, Image.Image):
                 return None, None, "Error: Invalid image provided for editing."

            # Convert PIL Image to bytes for the API request
            byte_stream = io.BytesIO()
            input_image.save(byte_stream, format="PNG") # Save PIL image to bytes buffer [[1]]
            byte_stream.seek(0) # Rewind buffer to the beginning

            files = {
                "image": ("input_image.png", byte_stream, "image/png"),
            }
            # CORRECTED data dictionary: removed 'response_format'
            data = {
                "prompt": prompt,
                "model": model,
                "size": size,
                # "response_format": "b64_json", # <-- THIS LINE IS REMOVED
            }
            api_url = "https://api.openai.com/v1/images/edits"
            print("Calling OpenAI Image Edit API...") # Debug print
            response = requests.post(api_url, headers=headers, files=files, data=data)

        else:
            # --- Image Generation ---
            # (This part remains the same as it uses response_format correctly via json payload)
            headers["Content-Type"] = "application/json"
            payload = {
                "prompt": prompt,
                "model": model,
                "response_format": "b64_json", # Keep this for generation
                "size": size,
                "n": 1, # Generate one image
            }
            api_url = "https://api.openai.com/v1/images/generations"
            print("Calling OpenAI Image Generation API...") # Debug print
            response = requests.post(api_url, headers=headers, json=payload)

        print(f"API Response Status Code: {response.status_code}") # Debug print
        response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)

        # Process successful response
        response_data = response.json()
        # Ensure the expected data structure is present
        if not response_data.get("data") or not isinstance(response_data["data"], list) or len(response_data["data"]) == 0:
            return input_image, None, f"Error: Unexpected API response format - 'data' array missing or empty: {response_data}"
        if not response_data["data"][0].get("b64_json"):
             return input_image, None, f"Error: Unexpected API response format - 'b64_json' key missing: {response_data}"

        img_b64 = response_data["data"][0]["b64_json"]
        img_bytes = base64.b64decode(img_b64) # Decode base64 string [[1]]
        result_image = Image.open(io.BytesIO(img_bytes)) # Convert bytes to PIL Image [[1]]
        print("Image processed successfully.") # Debug print
        return input_image, result_image, "Success!"

    except requests.exceptions.RequestException as e:
        error_message = f"API Request Error: {e}"
        # Check if response exists before trying to access its attributes/methods
        if response is not None:
            try:
                # Attempt to get more specific error from OpenAI response
                error_detail = response.json()
                error_message += f"\nAPI Error Details: {error_detail}"
            except requests.exceptions.JSONDecodeError:
                # Fallback if response is not JSON
                error_message += f"\nRaw Response Text: {response.text}"
            except Exception as json_e:
                 error_message += f"\nError parsing JSON response: {json_e}\nRaw Response Text: {response.text}"
        print(error_message) # Debug print
        return input_image, None, error_message
    except Exception as e:
        error_message = f"An unexpected error occurred: {e}"
        print(error_message) # Debug print
        return input_image, None, error_message


# --- Gradio Interface Setup ---

# Check for API key in environment variables
api_key_env = os.environ.get("OPENAI_API_KEY")
api_key_present_info = "OpenAI API Key found in environment variables." if api_key_env else "OpenAI API Key not found in environment variables. Please enter it below."

def process_image_request(prompt_input, api_key_input, uploaded_image):
    """
    Wrapper function for Gradio interface.
    Determines the API key to use (input field first, then environment variable).
    Calls the main API function.
    """
    # Prioritize the API key entered in the input field
    final_api_key = api_key_input if api_key_input else api_key_env

    # Call the actual API logic
    original_img, result_img, status = call_openai_image_api(prompt_input, final_api_key, uploaded_image)

    # Return values for the Gradio output components
    # If generating (original_img is None), return None for the original image display
    # If editing, return the uploaded image (original_img) for the original image display
    return original_img, result_img, status


# Build the Gradio interface using Blocks for more layout control [[7]]
with gr.Blocks() as demo:
    gr.Markdown("# OpenAI GPT-Image-1 Text-to-Image Demo") # App title
    gr.Markdown("Enter a prompt to generate an image, or upload an image and enter a prompt to edit it.")

    with gr.Row():
        with gr.Column(scale=1):
            prompt_input = gr.Textbox(label="Image Description (Prompt)", lines=3, placeholder="e.g., A futuristic cityscape at sunset") # Text input for prompt
            gr.Markdown(f"*{api_key_present_info}*")
            api_key_input = gr.Textbox(label="OpenAI API Key", type="password", placeholder="Enter your key if not set in environment") # Password input for API key
            uploaded_image_input = gr.Image(type="pil", label="Upload Image to Edit (Optional)") # Image upload [[4]]
            submit_button = gr.Button("Generate / Edit Image")
        with gr.Column(scale=2):
            status_output = gr.Textbox(label="Status", interactive=False)
            with gr.Row():
                 original_image_output = gr.Image(label="Original Image", interactive=False)
                 result_image_output = gr.Image(label="Generated / Edited Image", interactive=False) # Display output image

    # Connect the button click event to the processing function
    submit_button.click(
        fn=process_image_request,
        inputs=[prompt_input, api_key_input, uploaded_image_input],
        outputs=[original_image_output, result_image_output, status_output]
    )

# Launch the Gradio app [[2]]
if __name__ == "__main__":
    demo.launch()