import os
import openai
import gradio as gr
from transformers import BlipProcessor, BlipForConditionalGeneration
from dotenv import load_dotenv
import torch
from PIL import Image  # PIL을 사용하여 이미지를 열기 위해 추가

# .env 파일에서 환경 변수를 불러옴
load_dotenv()

# API 키 및 검증
API_KEY = os.getenv("OPENAI_API_KEY")
if API_KEY is None:
    raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")

openai.api_key = API_KEY  # OpenAI API 키 설정

# BLIP 모델 로드 (이미지 캡셔닝)
blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # GPU 또는 CPU 설정
blip_model.to(device)  # 모델을 선택한 디바이스로 이동

# GPT-4 API 호출 함수 (temperature, top_p 값 추가 조정)
def call_api(content, system_message, max_tokens=500, temperature=0.6, top_p=1.0):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": system_message},
                {"role": "user", "content": content},
            ],
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
        )
        return response.choices[0].message['content'].strip()
    except openai.OpenAIError as e:
        return f"OpenAI API Error: {str(e)}"

# generate_blog_post_in_korean 함수 수정 (temperature, top_p 조정 및 프롬프트 수정)
def generate_blog_post_in_korean(image_path, user_input, style):
    # 1. 파일 경로에서 이미지를 열어 PIL 이미지로 변환
    image = Image.open(image_path)
    
    # 2. 이미지 캡셔닝 생성 (BLIP)
    inputs = blip_processor(image, return_tensors="pt").to(device)
    out = blip_model.generate(**inputs)
    image_caption = blip_processor.decode(out[0], skip_special_tokens=True)

    # 3. 스타일에 따라 프롬프트 및 temperature/top_p 설정
    if style == "사실적인":
        combined_prompt = (
            f"이미지 설명: {image_caption}\n"
            f"사용자 입력: {user_input}\n\n"
            "이 두 설명을 기반으로 있는 그대로의 사실만 간결하고 정확하게 묘사해 주세요. "
            "불필요한 배경 설명이나 추론은 피하고, 장면에 대한 정확한 정보만 제공해 주세요.\n\n"
            "예시: '테이블 위에 여러 그릇의 된장찌개와 다양한 음식들이 놓여져 있다. "
            "중앙에 뚝배기에 담긴 된장찌개가 있고, 그 옆에는 각종 반찬들이 놓여 있습니다.'"
        )
        temperature = 0.2  # 최대한 사실에 기반
        top_p = 0.7  # 예측의 다양성 억제
    elif style == "감성적인":
        combined_prompt = (
            f"이미지 설명: {image_caption}\n"
            f"사용자 입력: {user_input}\n\n"
            "이 두 설명을 참고해서 일상적이고 따뜻한 분위기의 글로 표현해 주세요. "
            "추가적인 설명이나 배경보다는 장면과 감정을 자연스럽게 전달하는 글을 써 주세요.\n\n"
            "예시: '된장찌개가 놓인 테이블에는 다양한 음식들이 정갈하게 차려져 있습니다. "
            "뜨끈한 된장찌개에서는 구수한 향이 풍기고, 그 옆에는 고기와 채소가 듬뿍 담긴 반찬들이 놓여 있어요. "
            "밥과 함께 먹기 좋은 음식들이 준비되어 있고, 집에서 정성스럽게 만든 따뜻한 느낌이 듭니다.'"
        )
        temperature = 0.7  # 더 창의적이고 감성적인 표현
        top_p = 0.9  # 풍부한 표현을 위해 다양성 허용

    # 4. GPT-4로 설명 생성
    system_message = "You are an AI assistant that generates either factual or emotional descriptions based on image descriptions and user input."
    translated_caption = call_api(combined_prompt, system_message, temperature=temperature, top_p=top_p)
    
    return translated_caption

# 하나의 이미지만 처리하는 함수
def generate_blog_post_single(image, desc, style):
    if image is not None and desc.strip() != "":
        result = generate_blog_post_in_korean(image, desc, style)
        return result
    else:
        return ""  # 이미지가 없거나 설명이 없으면 빈 문자열 반환

# Gradio 인터페이스 설정 (하나의 이미지와 설명만 받음)
iface = gr.Interface(
    fn=generate_blog_post_single,
    inputs=[
        gr.File(label="이미지 업로드"),  # gr.Image 대신 gr.File로 변경
        gr.Textbox(label="사진에 대한 설명 입력", placeholder="사진 설명을 입력하세요"),
        gr.Radio(["사실적인", "감성적인"], label="설명 스타일 선택", value="사실적인")  # default -> value로 변경
    ],
    outputs=gr.Textbox(label="이미지 설명 결과"),
    title="이미지 설명 생성기",
    description="하나의 이미지와 텍스트를 바탕으로 최상의 한국어로 표현합니다.",
    allow_flagging="never"
)

if __name__ == "__main__":
    iface.launch(share=True)