Spaces:
Running
Running
File size: 5,285 Bytes
5625f85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import os
import openai
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from dotenv import load_dotenv
import torch
from PIL import Image # PILμ μ¬μ©νμ¬ μ΄λ―Έμ§λ₯Ό μ΄κΈ° μν΄ μΆκ°
# .env νμΌμμ νκ²½ λ³μλ₯Ό λΆλ¬μ΄
load_dotenv()
# API ν€ λ° κ²μ¦
API_KEY = os.getenv("OPENAI_API_KEY")
if API_KEY is None:
raise ValueError("OPENAI_API_KEY νκ²½ λ³μκ° μ€μ λμ§ μμμ΅λλ€.")
openai.api_key = API_KEY # OpenAI API ν€ μ€μ
# BLIP λͺ¨λΈ λ‘λ (μ΄λ―Έμ§ μΊ‘μ
λ)
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # GPU λλ CPU μ€μ
blip_model.to(device) # λͺ¨λΈμ μ νν λλ°μ΄μ€λ‘ μ΄λ
# GPT-4 API νΈμΆ ν¨μ (temperature, top_p κ° μΆκ° μ‘°μ )
def call_api(content, system_message, max_tokens=500, temperature=0.6, top_p=1.0):
try:
response = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": content},
],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
return response.choices[0].message['content'].strip()
except openai.OpenAIError as e:
return f"OpenAI API Error: {str(e)}"
# generate_blog_post_in_korean ν¨μ μμ (temperature, top_p μ‘°μ λ° ν둬ννΈ μμ )
def generate_blog_post_in_korean(image_path, user_input, style):
# 1. νμΌ κ²½λ‘μμ μ΄λ―Έμ§λ₯Ό μ΄μ΄ PIL μ΄λ―Έμ§λ‘ λ³ν
image = Image.open(image_path)
# 2. μ΄λ―Έμ§ μΊ‘μ
λ μμ± (BLIP)
inputs = blip_processor(image, return_tensors="pt").to(device)
out = blip_model.generate(**inputs)
image_caption = blip_processor.decode(out[0], skip_special_tokens=True)
# 3. μ€νμΌμ λ°λΌ ν둬ννΈ λ° temperature/top_p μ€μ
if style == "μ¬μ€μ μΈ":
combined_prompt = (
f"μ΄λ―Έμ§ μ€λͺ
: {image_caption}\n"
f"μ¬μ©μ μ
λ ₯: {user_input}\n\n"
"μ΄ λ μ€λͺ
μ κΈ°λ°μΌλ‘ μλ κ·Έλλ‘μ μ¬μ€λ§ κ°κ²°νκ³ μ ννκ² λ¬μ¬ν΄ μ£ΌμΈμ. "
"λΆνμν λ°°κ²½ μ€λͺ
μ΄λ μΆλ‘ μ νΌνκ³ , μ₯λ©΄μ λν μ νν μ λ³΄λ§ μ κ³΅ν΄ μ£ΌμΈμ.\n\n"
"μμ: 'ν
μ΄λΈ μμ μ¬λ¬ κ·Έλ¦μ λμ₯μ°κ°μ λ€μν μμλ€μ΄ λμ¬μ Έ μλ€. "
"μ€μμ λλ°°κΈ°μ λ΄κΈ΄ λμ₯μ°κ°κ° μκ³ , κ·Έ μμλ κ°μ’
λ°μ°¬λ€μ΄ λμ¬ μμ΅λλ€.'"
)
temperature = 0.2 # μ΅λν μ¬μ€μ κΈ°λ°
top_p = 0.7 # μμΈ‘μ λ€μμ± μ΅μ
elif style == "κ°μ±μ μΈ":
combined_prompt = (
f"μ΄λ―Έμ§ μ€λͺ
: {image_caption}\n"
f"μ¬μ©μ μ
λ ₯: {user_input}\n\n"
"μ΄ λ μ€λͺ
μ μ°Έκ³ ν΄μ μΌμμ μ΄κ³ λ°λ»ν λΆμκΈ°μ κΈλ‘ ννν΄ μ£ΌμΈμ. "
"μΆκ°μ μΈ μ€λͺ
μ΄λ 배경보λ€λ μ₯λ©΄κ³Ό κ°μ μ μμ°μ€λ½κ² μ λ¬νλ κΈμ μ¨ μ£ΌμΈμ.\n\n"
"μμ: 'λμ₯μ°κ°κ° λμΈ ν
μ΄λΈμλ λ€μν μμλ€μ΄ μ κ°νκ² μ°¨λ €μ Έ μμ΅λλ€. "
"λ¨λν λμ₯μ°κ°μμλ ꡬμν ν₯μ΄ νκΈ°κ³ , κ·Έ μμλ κ³ κΈ°μ μ±μκ° λ¬λΏ λ΄κΈ΄ λ°μ°¬λ€μ΄ λμ¬ μμ΄μ. "
"λ°₯κ³Ό ν¨κ» λ¨ΉκΈ° μ’μ μμλ€μ΄ μ€λΉλμ΄ μκ³ , μ§μμ μ μ±μ€λ½κ² λ§λ λ°λ»ν λλμ΄ λλλ€.'"
)
temperature = 0.7 # λ μ°½μμ μ΄κ³ κ°μ±μ μΈ νν
top_p = 0.9 # νλΆν ννμ μν΄ λ€μμ± νμ©
# 4. GPT-4λ‘ μ€λͺ
μμ±
system_message = "You are an AI assistant that generates either factual or emotional descriptions based on image descriptions and user input."
translated_caption = call_api(combined_prompt, system_message, temperature=temperature, top_p=top_p)
return translated_caption
# νλμ μ΄λ―Έμ§λ§ μ²λ¦¬νλ ν¨μ
def generate_blog_post_single(image, desc, style):
if image is not None and desc.strip() != "":
result = generate_blog_post_in_korean(image, desc, style)
return result
else:
return "" # μ΄λ―Έμ§κ° μκ±°λ μ€λͺ
μ΄ μμΌλ©΄ λΉ λ¬Έμμ΄ λ°ν
# Gradio μΈν°νμ΄μ€ μ€μ (νλμ μ΄λ―Έμ§μ μ€λͺ
λ§ λ°μ)
iface = gr.Interface(
fn=generate_blog_post_single,
inputs=[
gr.File(label="μ΄λ―Έμ§ μ
λ‘λ"), # gr.Image λμ gr.Fileλ‘ λ³κ²½
gr.Textbox(label="μ¬μ§μ λν μ€λͺ
μ
λ ₯", placeholder="μ¬μ§ μ€λͺ
μ μ
λ ₯νμΈμ"),
gr.Radio(["μ¬μ€μ μΈ", "κ°μ±μ μΈ"], label="μ€λͺ
μ€νμΌ μ ν", value="μ¬μ€μ μΈ") # default -> valueλ‘ λ³κ²½
],
outputs=gr.Textbox(label="μ΄λ―Έμ§ μ€λͺ
κ²°κ³Ό"),
title="μ΄λ―Έμ§ μ€λͺ
μμ±κΈ°",
description="νλμ μ΄λ―Έμ§μ ν
μ€νΈλ₯Ό λ°νμΌλ‘ μ΅μμ νκ΅μ΄λ‘ ννν©λλ€.",
allow_flagging="never"
)
if __name__ == "__main__":
iface.launch(share=True) |