Datum-3D / llm_service.py
TeeA's picture
o4-mini
af70222
# %%writefile llm_service.py
import asyncio
import os
from typing import Union
import numpy as np
from loguru import logger
from openai import AsyncOpenAI
from PIL import Image
from encode_image import encode_image
from string_utils import StringUtils
Image.MAX_IMAGE_PIXELS = None # Removes the limit, use with caution
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
class OpenAIService:
def __init__(self):
# self.llm_settings = getattr(settings.llm, settings.llm.name)
self.model_name = "o4-mini" # settings.llm.openai.model
self.temperature = 0.3 # settings.llm.openai.temperature
self.client = AsyncOpenAI(api_key=OPENAI_API_KEY)
# Follow the documentation: https://platform.openai.com/docs/models
self.deprecated_temperature_models = [
"o4-mini",
"o4",
"o3-mini",
"o3",
] # settings.llm.openai.deprecated_temperature_models
@staticmethod
def encode_image(image: Union[str, np.ndarray]) -> str:
return encode_image(image=image)
def get_temperature(self, temperature: float | None) -> dict:
return (
{
"temperature": temperature
if temperature is not None
else self.temperature
}
if self.model_name not in self.deprecated_temperature_models
else {}
)
async def chat_with_text(
self,
prompt: str,
return_as_json: bool = False,
retry_left: int = 3, # settings.llm.openai.retry_left,
temperature: float | None = None,
) -> str:
"""
Sends a text-based chat prompt to the OpenAI model.
Args:
prompt (str): User input text.
return_as_json (bool): whether to generate output as a json object
retry_left (int): number of retries left
temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic.
Returns:
str: Response from the model.
"""
model_kwargs = {
"model": self.model_name,
"messages": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
],
**self.get_temperature(temperature=temperature),
}
if return_as_json:
model_kwargs["response_format"] = {"type": "json_object"}
try:
response = await self.client.chat.completions.create(**model_kwargs)
except Exception as e:
if retry_left > 0:
logger.warning(f"OpenAI API calling failed due to {e}. Retry!")
await asyncio.sleep(1) # quota out
return await self.chat_with_text(
prompt=prompt,
return_as_json=return_as_json,
retry_left=retry_left - 1,
temperature=temperature,
)
else:
logger.error(
f"OpenAI API calling failed due to {e}. Return empty string!"
)
return ""
return response.choices[0].message.content
async def chat_with_image(
self,
prompt: str,
image: str,
return_as_json: bool = False,
retry_left: int = 3, # settings.llm.openai.retry_left,
temperature: float | None = None,
) -> str:
"""
Sends an image along with a text prompt to the OpenAI model.
Args:
prompt (str): User input text.
image_path (str): Path to the image file.
return_as_json (bool): whether to generate output as a json object
retry_left (int): number of retries left
temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic.
Returns:
str: Response from the model.
"""
if os.path.isfile(image):
base64_image = self.encode_image(image=image)
elif StringUtils.is_base64(image):
base64_image = image
else:
raise Exception(
"ServiceAiError.UNSUPPORT_INPUT_IMAGE_TYPE.as_http_exception()"
)
model_kwargs = {
"model": self.model_name,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
},
],
}
],
**self.get_temperature(temperature=temperature),
}
if return_as_json:
model_kwargs["response_format"] = {"type": "json_object"}
try:
response = await self.client.chat.completions.create(**model_kwargs)
except Exception as e:
if retry_left > 0:
logger.warning(f"OpenAI API calling failed due to {e}. Retry!")
await asyncio.sleep(1) # quota out
return await self.chat_with_image(
prompt=prompt,
image=image,
return_as_json=return_as_json,
retry_left=retry_left - 1,
temperature=temperature,
)
else:
logger.error(
f"OpenAI API calling failed due to {e}. Return empty string!"
)
return ""
return response.choices[0].message.content
async def chat_with_multiple_images(
self,
prompt: str,
images: list[str],
return_as_json: bool = False,
retry_left: int = 3, # settings.llm.openai.retry_left,
temperature: float | None = None,
) -> str:
"""
Sends multiple images along with a text prompt to the OpenAI model.
Args:
prompt (str): User input text.
images (list[str]): List of base64 encoded images.
return_as_json (bool): whether to generate output as a json object
retry_left (int): number of retries left
temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic.
Returns:
list[str]: Responses from the model for each image.
"""
if len(images) == 0:
logger.warning("OpenAI chats with multiple images mode without any images")
base64_images = []
for image in images:
if os.path.isfile(image):
base64_images.append(self.encode_image(image=image))
elif StringUtils.is_base64(image):
base64_images.append(image)
else:
raise Exception(
"ServiceAiError.UNSUPPORT_INPUT_IMAGE_TYPE.as_http_exception()"
)
model_kwargs = {
"model": self.model_name,
"messages": [
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
*[
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
},
}
for base64_image in base64_images
],
],
}
],
**self.get_temperature(temperature=temperature),
}
if return_as_json:
model_kwargs["response_format"] = {"type": "json_object"}
try:
response = await self.client.chat.completions.create(**model_kwargs)
except Exception as e:
if retry_left > 0:
logger.warning(f"OpenAI API calling failed due to {e}. Retry!")
await asyncio.sleep(1) # quota out
return await self.chat_with_multiple_images(
prompt=prompt,
images=images,
return_as_json=return_as_json,
retry_left=retry_left - 1,
temperature=temperature,
)
else:
logger.error(
f"OpenAI API calling failed due to {e}. Return empty list!"
)
return ""
return response.choices[0].message.content
class LLMService:
@classmethod
def from_partner(cls):
return OpenAIService()