File size: 5,285 Bytes
5625f85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import os
import openai
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from dotenv import load_dotenv
import torch
from PIL import Image  # PIL을 μ‚¬μš©ν•˜μ—¬ 이미지λ₯Ό μ—΄κΈ° μœ„ν•΄ μΆ”κ°€

# .env νŒŒμΌμ—μ„œ ν™˜κ²½ λ³€μˆ˜λ₯Ό 뢈러옴
load_dotenv()

# API ν‚€ 및 검증
API_KEY = os.getenv("OPENAI_API_KEY")
if API_KEY is None:
    raise ValueError("OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")

openai.api_key = API_KEY  # OpenAI API ν‚€ μ„€μ •

# BLIP λͺ¨λΈ λ‘œλ“œ (이미지 캑셔닝)
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # GPU λ˜λŠ” CPU μ„€μ •
blip_model.to(device)  # λͺ¨λΈμ„ μ„ νƒν•œ λ””λ°”μ΄μŠ€λ‘œ 이동

# GPT-4 API 호좜 ν•¨μˆ˜ (temperature, top_p κ°’ μΆ”κ°€ μ‘°μ •)
def call_api(content, system_message, max_tokens=500, temperature=0.6, top_p=1.0):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": content},
            ],
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
        )
        return response.choices[0].message['content'].strip()
    except openai.OpenAIError as e:
        return f"OpenAI API Error: {str(e)}"

# generate_blog_post_in_korean ν•¨μˆ˜ μˆ˜μ • (temperature, top_p μ‘°μ • 및 ν”„λ‘¬ν”„νŠΈ μˆ˜μ •)
def generate_blog_post_in_korean(image_path, user_input, style):
    # 1. 파일 κ²½λ‘œμ—μ„œ 이미지λ₯Ό μ—΄μ–΄ PIL μ΄λ―Έμ§€λ‘œ λ³€ν™˜
    image = Image.open(image_path)
    
    # 2. 이미지 캑셔닝 생성 (BLIP)
    inputs = blip_processor(image, return_tensors="pt").to(device)
    out = blip_model.generate(**inputs)
    image_caption = blip_processor.decode(out[0], skip_special_tokens=True)

    # 3. μŠ€νƒ€μΌμ— 따라 ν”„λ‘¬ν”„νŠΈ 및 temperature/top_p μ„€μ •
    if style == "사싀적인":
        combined_prompt = (
            f"이미지 μ„€λͺ…: {image_caption}\n"
            f"μ‚¬μš©μž μž…λ ₯: {user_input}\n\n"
            "이 두 μ„€λͺ…을 기반으둜 μžˆλŠ” κ·ΈλŒ€λ‘œμ˜ μ‚¬μ‹€λ§Œ κ°„κ²°ν•˜κ³  μ •ν™•ν•˜κ²Œ λ¬˜μ‚¬ν•΄ μ£Όμ„Έμš”. "
            "λΆˆν•„μš”ν•œ λ°°κ²½ μ„€λͺ…μ΄λ‚˜ 좔둠은 ν”Όν•˜κ³ , μž₯면에 λŒ€ν•œ μ •ν™•ν•œ μ •λ³΄λ§Œ μ œκ³΅ν•΄ μ£Όμ„Έμš”.\n\n"
            "μ˜ˆμ‹œ: 'ν…Œμ΄λΈ” μœ„μ— μ—¬λŸ¬ κ·Έλ¦‡μ˜ 된μž₯μ°Œκ°œμ™€ λ‹€μ–‘ν•œ μŒμ‹λ“€μ΄ 놓여져 μžˆλ‹€. "
            "쀑앙에 λšλ°°κΈ°μ— λ‹΄κΈ΄ 된μž₯μ°Œκ°œκ°€ 있고, κ·Έ μ˜†μ—λŠ” 각쒅 λ°˜μ°¬λ“€μ΄ 놓여 μžˆμŠ΅λ‹ˆλ‹€.'"
        )
        temperature = 0.2  # μ΅œλŒ€ν•œ 사싀에 기반
        top_p = 0.7  # 예츑의 λ‹€μ–‘μ„± μ–΅μ œ
    elif style == "감성적인":
        combined_prompt = (
            f"이미지 μ„€λͺ…: {image_caption}\n"
            f"μ‚¬μš©μž μž…λ ₯: {user_input}\n\n"
            "이 두 μ„€λͺ…을 μ°Έκ³ ν•΄μ„œ 일상적이고 λ”°λœ»ν•œ λΆ„μœ„κΈ°μ˜ κΈ€λ‘œ ν‘œν˜„ν•΄ μ£Όμ„Έμš”. "
            "좔가적인 μ„€λͺ…μ΄λ‚˜ λ°°κ²½λ³΄λ‹€λŠ” μž₯λ©΄κ³Ό 감정을 μžμ—°μŠ€λŸ½κ²Œ μ „λ‹¬ν•˜λŠ” 글을 써 μ£Όμ„Έμš”.\n\n"
            "μ˜ˆμ‹œ: '된μž₯μ°Œκ°œκ°€ 놓인 ν…Œμ΄λΈ”μ—λŠ” λ‹€μ–‘ν•œ μŒμ‹λ“€μ΄ μ •κ°ˆν•˜κ²Œ μ°¨λ €μ Έ μžˆμŠ΅λ‹ˆλ‹€. "
            "λœ¨λˆν•œ 된μž₯μ°Œκ°œμ—μ„œλŠ” κ΅¬μˆ˜ν•œ ν–₯이 풍기고, κ·Έ μ˜†μ—λŠ” 고기와 μ±„μ†Œκ°€ 듬뿍 λ‹΄κΈ΄ λ°˜μ°¬λ“€μ΄ 놓여 μžˆμ–΄μš”. "
            "λ°₯κ³Ό ν•¨κ»˜ λ¨ΉκΈ° 쒋은 μŒμ‹λ“€μ΄ μ€€λΉ„λ˜μ–΄ 있고, μ§‘μ—μ„œ μ •μ„±μŠ€λŸ½κ²Œ λ§Œλ“  λ”°λœ»ν•œ λŠλ‚Œμ΄ λ“­λ‹ˆλ‹€.'"
        )
        temperature = 0.7  # 더 창의적이고 감성적인 ν‘œν˜„
        top_p = 0.9  # ν’λΆ€ν•œ ν‘œν˜„μ„ μœ„ν•΄ λ‹€μ–‘μ„± ν—ˆμš©

    # 4. GPT-4둜 μ„€λͺ… 생성
    system_message = "You are an AI assistant that generates either factual or emotional descriptions based on image descriptions and user input."
    translated_caption = call_api(combined_prompt, system_message, temperature=temperature, top_p=top_p)
    
    return translated_caption

# ν•˜λ‚˜μ˜ μ΄λ―Έμ§€λ§Œ μ²˜λ¦¬ν•˜λŠ” ν•¨μˆ˜
def generate_blog_post_single(image, desc, style):
    if image is not None and desc.strip() != "":
        result = generate_blog_post_in_korean(image, desc, style)
        return result
    else:
        return ""  # 이미지가 μ—†κ±°λ‚˜ μ„€λͺ…이 μ—†μœΌλ©΄ 빈 λ¬Έμžμ—΄ λ°˜ν™˜

# Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ • (ν•˜λ‚˜μ˜ 이미지와 μ„€λͺ…λ§Œ λ°›μŒ)
iface = gr.Interface(
    fn=generate_blog_post_single,
    inputs=[
        gr.File(label="이미지 μ—…λ‘œλ“œ"),  # gr.Image λŒ€μ‹  gr.File둜 λ³€κ²½
        gr.Textbox(label="사진에 λŒ€ν•œ μ„€λͺ… μž…λ ₯", placeholder="사진 μ„€λͺ…을 μž…λ ₯ν•˜μ„Έμš”"),
        gr.Radio(["사싀적인", "감성적인"], label="μ„€λͺ… μŠ€νƒ€μΌ 선택", value="사싀적인")  # default -> value둜 λ³€κ²½
    ],
    outputs=gr.Textbox(label="이미지 μ„€λͺ… κ²°κ³Ό"),
    title="이미지 μ„€λͺ… 생성기",
    description="ν•˜λ‚˜μ˜ 이미지와 ν…μŠ€νŠΈλ₯Ό λ°”νƒ•μœΌλ‘œ μ΅œμƒμ˜ ν•œκ΅­μ–΄λ‘œ ν‘œν˜„ν•©λ‹ˆλ‹€.",
    allow_flagging="never"
)

if __name__ == "__main__":
    iface.launch(share=True)