import os import logging from io import BytesIO from PIL import Image import gradio as gr from google import genai from google.genai import types # 設定 logging logging.basicConfig( filename='app.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) # 初始化 Gemini API GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") client = genai.Client(api_key=GEMINI_API_KEY) # 定義「圖解釋文」功能 def explain_image(image: Image.Image): # 將 PIL Image 轉成 Gemini 所需的格式 buffered = BytesIO() image.save(buffered, format="PNG") image_data = buffered.getvalue() # 準備輸入給 Gemini 的內容 contents = [ types.ContentPart.from_data(data=image_data, mime_type="image/png"), types.ContentPart.text("請用繁體中文說明這張圖片的內容。") ] # 呼叫 Gemini 模型 response = client.models.generate_content( model="gemini-1.5-flash", contents=contents, config=types.GenerateContentConfig(response_modalities=["TEXT"]) ) # 回傳第一個回答 explanation = response.candidates[0].content.parts[0].text logging.info("圖片說明成功取得。") return explanation # Gradio 介面 with gr.Blocks() as demo: gr.Markdown("## 🧠 Gemini 圖片解釋器(圖 ➜ 文)") image_input = gr.Image(type="pil", label="上傳圖片") explain_button = gr.Button("解釋圖片") output_text = gr.Textbox(label="圖片說明", lines=5) explain_button.click(fn=explain_image, inputs=image_input, outputs=output_text) if __name__ == "__main__": demo.launch()