|
|
|
import asyncio |
|
import os |
|
from typing import Union |
|
|
|
import numpy as np |
|
from loguru import logger |
|
from openai import AsyncOpenAI |
|
from PIL import Image |
|
|
|
from encode_image import encode_image |
|
from string_utils import StringUtils |
|
|
|
Image.MAX_IMAGE_PIXELS = None |
|
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") |
|
|
|
|
|
class OpenAIService: |
|
def __init__(self): |
|
|
|
self.model_name = "o4-mini" |
|
self.temperature = 0.3 |
|
self.client = AsyncOpenAI(api_key=OPENAI_API_KEY) |
|
|
|
self.deprecated_temperature_models = [ |
|
"o4-mini", |
|
"o4", |
|
"o3-mini", |
|
"o3", |
|
] |
|
|
|
@staticmethod |
|
def encode_image(image: Union[str, np.ndarray]) -> str: |
|
return encode_image(image=image) |
|
|
|
def get_temperature(self, temperature: float | None) -> dict: |
|
return ( |
|
{ |
|
"temperature": temperature |
|
if temperature is not None |
|
else self.temperature |
|
} |
|
if self.model_name not in self.deprecated_temperature_models |
|
else {} |
|
) |
|
|
|
async def chat_with_text( |
|
self, |
|
prompt: str, |
|
return_as_json: bool = False, |
|
retry_left: int = 3, |
|
temperature: float | None = None, |
|
) -> str: |
|
""" |
|
Sends a text-based chat prompt to the OpenAI model. |
|
|
|
Args: |
|
prompt (str): User input text. |
|
return_as_json (bool): whether to generate output as a json object |
|
retry_left (int): number of retries left |
|
temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic. |
|
|
|
Returns: |
|
str: Response from the model. |
|
""" |
|
|
|
model_kwargs = { |
|
"model": self.model_name, |
|
"messages": [ |
|
{"role": "system", "content": "You are a helpful assistant."}, |
|
{"role": "user", "content": prompt}, |
|
], |
|
**self.get_temperature(temperature=temperature), |
|
} |
|
|
|
if return_as_json: |
|
model_kwargs["response_format"] = {"type": "json_object"} |
|
|
|
try: |
|
response = await self.client.chat.completions.create(**model_kwargs) |
|
except Exception as e: |
|
if retry_left > 0: |
|
logger.warning(f"OpenAI API calling failed due to {e}. Retry!") |
|
await asyncio.sleep(1) |
|
return await self.chat_with_text( |
|
prompt=prompt, |
|
return_as_json=return_as_json, |
|
retry_left=retry_left - 1, |
|
temperature=temperature, |
|
) |
|
else: |
|
logger.error( |
|
f"OpenAI API calling failed due to {e}. Return empty string!" |
|
) |
|
return "" |
|
|
|
return response.choices[0].message.content |
|
|
|
async def chat_with_image( |
|
self, |
|
prompt: str, |
|
image: str, |
|
return_as_json: bool = False, |
|
retry_left: int = 3, |
|
temperature: float | None = None, |
|
) -> str: |
|
""" |
|
Sends an image along with a text prompt to the OpenAI model. |
|
|
|
Args: |
|
prompt (str): User input text. |
|
image_path (str): Path to the image file. |
|
return_as_json (bool): whether to generate output as a json object |
|
retry_left (int): number of retries left |
|
temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic. |
|
|
|
Returns: |
|
str: Response from the model. |
|
""" |
|
if os.path.isfile(image): |
|
base64_image = self.encode_image(image=image) |
|
elif StringUtils.is_base64(image): |
|
base64_image = image |
|
else: |
|
raise Exception( |
|
"ServiceAiError.UNSUPPORT_INPUT_IMAGE_TYPE.as_http_exception()" |
|
) |
|
|
|
model_kwargs = { |
|
"model": self.model_name, |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": prompt}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}" |
|
}, |
|
}, |
|
], |
|
} |
|
], |
|
**self.get_temperature(temperature=temperature), |
|
} |
|
|
|
if return_as_json: |
|
model_kwargs["response_format"] = {"type": "json_object"} |
|
|
|
try: |
|
response = await self.client.chat.completions.create(**model_kwargs) |
|
except Exception as e: |
|
if retry_left > 0: |
|
logger.warning(f"OpenAI API calling failed due to {e}. Retry!") |
|
await asyncio.sleep(1) |
|
return await self.chat_with_image( |
|
prompt=prompt, |
|
image=image, |
|
return_as_json=return_as_json, |
|
retry_left=retry_left - 1, |
|
temperature=temperature, |
|
) |
|
else: |
|
logger.error( |
|
f"OpenAI API calling failed due to {e}. Return empty string!" |
|
) |
|
return "" |
|
return response.choices[0].message.content |
|
|
|
async def chat_with_multiple_images( |
|
self, |
|
prompt: str, |
|
images: list[str], |
|
return_as_json: bool = False, |
|
retry_left: int = 3, |
|
temperature: float | None = None, |
|
) -> str: |
|
""" |
|
Sends multiple images along with a text prompt to the OpenAI model. |
|
Args: |
|
prompt (str): User input text. |
|
images (list[str]): List of base64 encoded images. |
|
return_as_json (bool): whether to generate output as a json object |
|
retry_left (int): number of retries left |
|
temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic. |
|
Returns: |
|
list[str]: Responses from the model for each image. |
|
""" |
|
if len(images) == 0: |
|
logger.warning("OpenAI chats with multiple images mode without any images") |
|
|
|
base64_images = [] |
|
for image in images: |
|
if os.path.isfile(image): |
|
base64_images.append(self.encode_image(image=image)) |
|
elif StringUtils.is_base64(image): |
|
base64_images.append(image) |
|
else: |
|
raise Exception( |
|
"ServiceAiError.UNSUPPORT_INPUT_IMAGE_TYPE.as_http_exception()" |
|
) |
|
|
|
model_kwargs = { |
|
"model": self.model_name, |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": prompt}, |
|
*[ |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}" |
|
}, |
|
} |
|
for base64_image in base64_images |
|
], |
|
], |
|
} |
|
], |
|
**self.get_temperature(temperature=temperature), |
|
} |
|
|
|
if return_as_json: |
|
model_kwargs["response_format"] = {"type": "json_object"} |
|
|
|
try: |
|
response = await self.client.chat.completions.create(**model_kwargs) |
|
except Exception as e: |
|
if retry_left > 0: |
|
logger.warning(f"OpenAI API calling failed due to {e}. Retry!") |
|
await asyncio.sleep(1) |
|
return await self.chat_with_multiple_images( |
|
prompt=prompt, |
|
images=images, |
|
return_as_json=return_as_json, |
|
retry_left=retry_left - 1, |
|
temperature=temperature, |
|
) |
|
else: |
|
logger.error( |
|
f"OpenAI API calling failed due to {e}. Return empty list!" |
|
) |
|
return "" |
|
|
|
return response.choices[0].message.content |
|
|
|
|
|
class LLMService: |
|
@classmethod |
|
def from_partner(cls): |
|
return OpenAIService() |
|
|