ImageGen / text2image.py
3v324v23's picture
加入imagetotext.py
ae03257
raw
history blame
1.64 kB
import os
import logging
from io import BytesIO
from PIL import Image
import gradio as gr
from google import genai
from google.genai import types
# 設定 logging
logging.basicConfig(
filename='app.log',
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
# 初始化 Gemini API
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
client = genai.Client(api_key=GEMINI_API_KEY)
# 定義「圖解釋文」功能
def explain_image(image: Image.Image):
# 將 PIL Image 轉成 Gemini 所需的格式
buffered = BytesIO()
image.save(buffered, format="PNG")
image_data = buffered.getvalue()
# 準備輸入給 Gemini 的內容
contents = [
types.ContentPart.from_data(data=image_data, mime_type="image/png"),
types.ContentPart.text("請用繁體中文說明這張圖片的內容。")
]
# 呼叫 Gemini 模型
response = client.models.generate_content(
model="gemini-1.5-flash",
contents=contents,
config=types.GenerateContentConfig(response_modalities=["TEXT"])
)
# 回傳第一個回答
explanation = response.candidates[0].content.parts[0].text
logging.info("圖片說明成功取得。")
return explanation
# Gradio 介面
with gr.Blocks() as demo:
gr.Markdown("## 🧠 Gemini 圖片解釋器(圖 ➜ 文)")
image_input = gr.Image(type="pil", label="上傳圖片")
explain_button = gr.Button("解釋圖片")
output_text = gr.Textbox(label="圖片說明", lines=5)
explain_button.click(fn=explain_image, inputs=image_input, outputs=output_text)
if __name__ == "__main__":
demo.launch()