Spaces:

AIRider
/

llllkkkkkk

Running

App Files Files Community

llllkkkkkk / app.py

AIRider

Create app.py

5625f85 verified 11 months ago

raw

history blame contribute delete

5.29 kB

	import os
	import openai
	import gradio as gr
	from transformers import BlipProcessor, BlipForConditionalGeneration
	from dotenv import load_dotenv
	import torch
	from PIL import Image # PIL을 사용하여 이미지를 열기 위해 추가

	# .env 파일에서 환경 변수를 불러옴
	load_dotenv()

	# API 키 및 검증
	API_KEY = os.getenv("OPENAI_API_KEY")
	if API_KEY is None:
	raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")

	openai.api_key = API_KEY # OpenAI API 키 설정

	# BLIP 모델 로드 (이미지 캡셔닝)
	blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # GPU 또는 CPU 설정
	blip_model.to(device) # 모델을 선택한 디바이스로 이동

	# GPT-4 API 호출 함수 (temperature, top_p 값 추가 조정)
	def call_api(content, system_message, max_tokens=500, temperature=0.6, top_p=1.0):
	try:
	response = openai.ChatCompletion.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": system_message},
	{"role": "user", "content": content},
	],
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	)
	return response.choices[0].message['content'].strip()
	except openai.OpenAIError as e:
	return f"OpenAI API Error: {str(e)}"

	# generate_blog_post_in_korean 함수 수정 (temperature, top_p 조정 및 프롬프트 수정)
	def generate_blog_post_in_korean(image_path, user_input, style):
	# 1. 파일 경로에서 이미지를 열어 PIL 이미지로 변환
	image = Image.open(image_path)

	# 2. 이미지 캡셔닝 생성 (BLIP)
	inputs = blip_processor(image, return_tensors="pt").to(device)
	out = blip_model.generate(**inputs)
	image_caption = blip_processor.decode(out[0], skip_special_tokens=True)

	# 3. 스타일에 따라 프롬프트 및 temperature/top_p 설정
	if style == "사실적인":
	combined_prompt = (
	f"이미지 설명: {image_caption}\n"
	f"사용자 입력: {user_input}\n\n"
	"이 두 설명을 기반으로 있는 그대로의 사실만 간결하고 정확하게 묘사해 주세요. "
	"불필요한 배경 설명이나 추론은 피하고, 장면에 대한 정확한 정보만 제공해 주세요.\n\n"
	"예시: '테이블 위에 여러 그릇의 된장찌개와 다양한 음식들이 놓여져 있다. "
	"중앙에 뚝배기에 담긴 된장찌개가 있고, 그 옆에는 각종 반찬들이 놓여 있습니다.'"
	)
	temperature = 0.2 # 최대한 사실에 기반
	top_p = 0.7 # 예측의 다양성 억제
	elif style == "감성적인":
	combined_prompt = (
	f"이미지 설명: {image_caption}\n"
	f"사용자 입력: {user_input}\n\n"
	"이 두 설명을 참고해서 일상적이고 따뜻한 분위기의 글로 표현해 주세요. "
	"추가적인 설명이나 배경보다는 장면과 감정을 자연스럽게 전달하는 글을 써 주세요.\n\n"
	"예시: '된장찌개가 놓인 테이블에는 다양한 음식들이 정갈하게 차려져 있습니다. "
	"뜨끈한 된장찌개에서는 구수한 향이 풍기고, 그 옆에는 고기와 채소가 듬뿍 담긴 반찬들이 놓여 있어요. "
	"밥과 함께 먹기 좋은 음식들이 준비되어 있고, 집에서 정성스럽게 만든 따뜻한 느낌이 듭니다.'"
	)
	temperature = 0.7 # 더 창의적이고 감성적인 표현
	top_p = 0.9 # 풍부한 표현을 위해 다양성 허용

	# 4. GPT-4로 설명 생성
	system_message = "You are an AI assistant that generates either factual or emotional descriptions based on image descriptions and user input."
	translated_caption = call_api(combined_prompt, system_message, temperature=temperature, top_p=top_p)

	return translated_caption

	# 하나의 이미지만 처리하는 함수
	def generate_blog_post_single(image, desc, style):
	if image is not None and desc.strip() != "":
	result = generate_blog_post_in_korean(image, desc, style)
	return result
	else:
	return "" # 이미지가 없거나 설명이 없으면 빈 문자열 반환

	# Gradio 인터페이스 설정 (하나의 이미지와 설명만 받음)
	iface = gr.Interface(
	fn=generate_blog_post_single,
	inputs=[
	gr.File(label="이미지 업로드"), # gr.Image 대신 gr.File로 변경
	gr.Textbox(label="사진에 대한 설명 입력", placeholder="사진 설명을 입력하세요"),
	gr.Radio(["사실적인", "감성적인"], label="설명 스타일 선택", value="사실적인") # default -> value로 변경
	],
	outputs=gr.Textbox(label="이미지 설명 결과"),
	title="이미지 설명 생성기",
	description="하나의 이미지와 텍스트를 바탕으로 최상의 한국어로 표현합니다.",
	allow_flagging="never"
	)

	if __name__ == "__main__":
	iface.launch(share=True)