llllkkkkkk / app.py
AIRider's picture
Create app.py
5625f85 verified
import os
import openai
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from dotenv import load_dotenv
import torch
from PIL import Image # PIL을 μ‚¬μš©ν•˜μ—¬ 이미지λ₯Ό μ—΄κΈ° μœ„ν•΄ μΆ”κ°€
# .env νŒŒμΌμ—μ„œ ν™˜κ²½ λ³€μˆ˜λ₯Ό 뢈러옴
load_dotenv()
# API ν‚€ 및 검증
API_KEY = os.getenv("OPENAI_API_KEY")
if API_KEY is None:
raise ValueError("OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
openai.api_key = API_KEY # OpenAI API ν‚€ μ„€μ •
# BLIP λͺ¨λΈ λ‘œλ“œ (이미지 캑셔닝)
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # GPU λ˜λŠ” CPU μ„€μ •
blip_model.to(device) # λͺ¨λΈμ„ μ„ νƒν•œ λ””λ°”μ΄μŠ€λ‘œ 이동
# GPT-4 API 호좜 ν•¨μˆ˜ (temperature, top_p κ°’ μΆ”κ°€ μ‘°μ •)
def call_api(content, system_message, max_tokens=500, temperature=0.6, top_p=1.0):
try:
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": content},
],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
return response.choices[0].message['content'].strip()
except openai.OpenAIError as e:
return f"OpenAI API Error: {str(e)}"
# generate_blog_post_in_korean ν•¨μˆ˜ μˆ˜μ • (temperature, top_p μ‘°μ • 및 ν”„λ‘¬ν”„νŠΈ μˆ˜μ •)
def generate_blog_post_in_korean(image_path, user_input, style):
# 1. 파일 κ²½λ‘œμ—μ„œ 이미지λ₯Ό μ—΄μ–΄ PIL μ΄λ―Έμ§€λ‘œ λ³€ν™˜
image = Image.open(image_path)
# 2. 이미지 캑셔닝 생성 (BLIP)
inputs = blip_processor(image, return_tensors="pt").to(device)
out = blip_model.generate(**inputs)
image_caption = blip_processor.decode(out[0], skip_special_tokens=True)
# 3. μŠ€νƒ€μΌμ— 따라 ν”„λ‘¬ν”„νŠΈ 및 temperature/top_p μ„€μ •
if style == "사싀적인":
combined_prompt = (
f"이미지 μ„€λͺ…: {image_caption}\n"
f"μ‚¬μš©μž μž…λ ₯: {user_input}\n\n"
"이 두 μ„€λͺ…을 기반으둜 μžˆλŠ” κ·ΈλŒ€λ‘œμ˜ μ‚¬μ‹€λ§Œ κ°„κ²°ν•˜κ³  μ •ν™•ν•˜κ²Œ λ¬˜μ‚¬ν•΄ μ£Όμ„Έμš”. "
"λΆˆν•„μš”ν•œ λ°°κ²½ μ„€λͺ…μ΄λ‚˜ 좔둠은 ν”Όν•˜κ³ , μž₯면에 λŒ€ν•œ μ •ν™•ν•œ μ •λ³΄λ§Œ μ œκ³΅ν•΄ μ£Όμ„Έμš”.\n\n"
"μ˜ˆμ‹œ: 'ν…Œμ΄λΈ” μœ„μ— μ—¬λŸ¬ κ·Έλ¦‡μ˜ 된μž₯μ°Œκ°œμ™€ λ‹€μ–‘ν•œ μŒμ‹λ“€μ΄ 놓여져 μžˆλ‹€. "
"쀑앙에 λšλ°°κΈ°μ— λ‹΄κΈ΄ 된μž₯μ°Œκ°œκ°€ 있고, κ·Έ μ˜†μ—λŠ” 각쒅 λ°˜μ°¬λ“€μ΄ 놓여 μžˆμŠ΅λ‹ˆλ‹€.'"
)
temperature = 0.2 # μ΅œλŒ€ν•œ 사싀에 기반
top_p = 0.7 # 예츑의 λ‹€μ–‘μ„± μ–΅μ œ
elif style == "감성적인":
combined_prompt = (
f"이미지 μ„€λͺ…: {image_caption}\n"
f"μ‚¬μš©μž μž…λ ₯: {user_input}\n\n"
"이 두 μ„€λͺ…을 μ°Έκ³ ν•΄μ„œ 일상적이고 λ”°λœ»ν•œ λΆ„μœ„κΈ°μ˜ κΈ€λ‘œ ν‘œν˜„ν•΄ μ£Όμ„Έμš”. "
"좔가적인 μ„€λͺ…μ΄λ‚˜ λ°°κ²½λ³΄λ‹€λŠ” μž₯λ©΄κ³Ό 감정을 μžμ—°μŠ€λŸ½κ²Œ μ „λ‹¬ν•˜λŠ” 글을 써 μ£Όμ„Έμš”.\n\n"
"μ˜ˆμ‹œ: '된μž₯μ°Œκ°œκ°€ 놓인 ν…Œμ΄λΈ”μ—λŠ” λ‹€μ–‘ν•œ μŒμ‹λ“€μ΄ μ •κ°ˆν•˜κ²Œ μ°¨λ €μ Έ μžˆμŠ΅λ‹ˆλ‹€. "
"λœ¨λˆν•œ 된μž₯μ°Œκ°œμ—μ„œλŠ” κ΅¬μˆ˜ν•œ ν–₯이 풍기고, κ·Έ μ˜†μ—λŠ” 고기와 μ±„μ†Œκ°€ 듬뿍 λ‹΄κΈ΄ λ°˜μ°¬λ“€μ΄ 놓여 μžˆμ–΄μš”. "
"λ°₯κ³Ό ν•¨κ»˜ λ¨ΉκΈ° 쒋은 μŒμ‹λ“€μ΄ μ€€λΉ„λ˜μ–΄ 있고, μ§‘μ—μ„œ μ •μ„±μŠ€λŸ½κ²Œ λ§Œλ“  λ”°λœ»ν•œ λŠλ‚Œμ΄ λ“­λ‹ˆλ‹€.'"
)
temperature = 0.7 # 더 창의적이고 감성적인 ν‘œν˜„
top_p = 0.9 # ν’λΆ€ν•œ ν‘œν˜„μ„ μœ„ν•΄ λ‹€μ–‘μ„± ν—ˆμš©
# 4. GPT-4둜 μ„€λͺ… 생성
system_message = "You are an AI assistant that generates either factual or emotional descriptions based on image descriptions and user input."
translated_caption = call_api(combined_prompt, system_message, temperature=temperature, top_p=top_p)
return translated_caption
# ν•˜λ‚˜μ˜ μ΄λ―Έμ§€λ§Œ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
def generate_blog_post_single(image, desc, style):
if image is not None and desc.strip() != "":
result = generate_blog_post_in_korean(image, desc, style)
return result
else:
return "" # 이미지가 μ—†κ±°λ‚˜ μ„€λͺ…이 μ—†μœΌλ©΄ 빈 λ¬Έμžμ—΄ λ°˜ν™˜
# Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ • (ν•˜λ‚˜μ˜ 이미지와 μ„€λͺ…λ§Œ λ°›μŒ)
iface = gr.Interface(
fn=generate_blog_post_single,
inputs=[
gr.File(label="이미지 μ—…λ‘œλ“œ"), # gr.Image λŒ€μ‹  gr.File둜 λ³€κ²½
gr.Textbox(label="사진에 λŒ€ν•œ μ„€λͺ… μž…λ ₯", placeholder="사진 μ„€λͺ…을 μž…λ ₯ν•˜μ„Έμš”"),
gr.Radio(["사싀적인", "감성적인"], label="μ„€λͺ… μŠ€νƒ€μΌ 선택", value="사싀적인") # default -> value둜 λ³€κ²½
],
outputs=gr.Textbox(label="이미지 μ„€λͺ… κ²°κ³Ό"),
title="이미지 μ„€λͺ… 생성기",
description="ν•˜λ‚˜μ˜ 이미지와 ν…μŠ€νŠΈλ₯Ό λ°”νƒ•μœΌλ‘œ μ΅œμƒμ˜ ν•œκ΅­μ–΄λ‘œ ν‘œν˜„ν•©λ‹ˆλ‹€.",
allow_flagging="never"
)
if __name__ == "__main__":
iface.launch(share=True)