# %%writefile llm_service.py import asyncio import os from typing import Union import numpy as np from loguru import logger from openai import AsyncOpenAI from PIL import Image from encode_image import encode_image from string_utils import StringUtils Image.MAX_IMAGE_PIXELS = None # Removes the limit, use with caution OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "") class OpenAIService: def __init__(self): # self.llm_settings = getattr(settings.llm, settings.llm.name) self.model_name = "o4-mini" # settings.llm.openai.model self.temperature = 0.3 # settings.llm.openai.temperature self.client = AsyncOpenAI(api_key=OPENAI_API_KEY) # Follow the documentation: https://platform.openai.com/docs/models self.deprecated_temperature_models = [ "o4-mini", "o4", "o3-mini", "o3", ] # settings.llm.openai.deprecated_temperature_models @staticmethod def encode_image(image: Union[str, np.ndarray]) -> str: return encode_image(image=image) def get_temperature(self, temperature: float | None) -> dict: return ( { "temperature": temperature if temperature is not None else self.temperature } if self.model_name not in self.deprecated_temperature_models else {} ) async def chat_with_text( self, prompt: str, return_as_json: bool = False, retry_left: int = 3, # settings.llm.openai.retry_left, temperature: float | None = None, ) -> str: """ Sends a text-based chat prompt to the OpenAI model. Args: prompt (str): User input text. return_as_json (bool): whether to generate output as a json object retry_left (int): number of retries left temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic. Returns: str: Response from the model. """ model_kwargs = { "model": self.model_name, "messages": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}, ], **self.get_temperature(temperature=temperature), } if return_as_json: model_kwargs["response_format"] = {"type": "json_object"} try: response = await self.client.chat.completions.create(**model_kwargs) except Exception as e: if retry_left > 0: logger.warning(f"OpenAI API calling failed due to {e}. Retry!") await asyncio.sleep(1) # quota out return await self.chat_with_text( prompt=prompt, return_as_json=return_as_json, retry_left=retry_left - 1, temperature=temperature, ) else: logger.error( f"OpenAI API calling failed due to {e}. Return empty string!" ) return "" return response.choices[0].message.content async def chat_with_image( self, prompt: str, image: str, return_as_json: bool = False, retry_left: int = 3, # settings.llm.openai.retry_left, temperature: float | None = None, ) -> str: """ Sends an image along with a text prompt to the OpenAI model. Args: prompt (str): User input text. image_path (str): Path to the image file. return_as_json (bool): whether to generate output as a json object retry_left (int): number of retries left temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic. Returns: str: Response from the model. """ if os.path.isfile(image): base64_image = self.encode_image(image=image) elif StringUtils.is_base64(image): base64_image = image else: raise Exception( "ServiceAiError.UNSUPPORT_INPUT_IMAGE_TYPE.as_http_exception()" ) model_kwargs = { "model": self.model_name, "messages": [ { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" }, }, ], } ], **self.get_temperature(temperature=temperature), } if return_as_json: model_kwargs["response_format"] = {"type": "json_object"} try: response = await self.client.chat.completions.create(**model_kwargs) except Exception as e: if retry_left > 0: logger.warning(f"OpenAI API calling failed due to {e}. Retry!") await asyncio.sleep(1) # quota out return await self.chat_with_image( prompt=prompt, image=image, return_as_json=return_as_json, retry_left=retry_left - 1, temperature=temperature, ) else: logger.error( f"OpenAI API calling failed due to {e}. Return empty string!" ) return "" return response.choices[0].message.content async def chat_with_multiple_images( self, prompt: str, images: list[str], return_as_json: bool = False, retry_left: int = 3, # settings.llm.openai.retry_left, temperature: float | None = None, ) -> str: """ Sends multiple images along with a text prompt to the OpenAI model. Args: prompt (str): User input text. images (list[str]): List of base64 encoded images. return_as_json (bool): whether to generate output as a json object retry_left (int): number of retries left temperature (float | None): Controls randomness in the response. Lower values make responses more focused and deterministic. Returns: list[str]: Responses from the model for each image. """ if len(images) == 0: logger.warning("OpenAI chats with multiple images mode without any images") base64_images = [] for image in images: if os.path.isfile(image): base64_images.append(self.encode_image(image=image)) elif StringUtils.is_base64(image): base64_images.append(image) else: raise Exception( "ServiceAiError.UNSUPPORT_INPUT_IMAGE_TYPE.as_http_exception()" ) model_kwargs = { "model": self.model_name, "messages": [ { "role": "user", "content": [ {"type": "text", "text": prompt}, *[ { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" }, } for base64_image in base64_images ], ], } ], **self.get_temperature(temperature=temperature), } if return_as_json: model_kwargs["response_format"] = {"type": "json_object"} try: response = await self.client.chat.completions.create(**model_kwargs) except Exception as e: if retry_left > 0: logger.warning(f"OpenAI API calling failed due to {e}. Retry!") await asyncio.sleep(1) # quota out return await self.chat_with_multiple_images( prompt=prompt, images=images, return_as_json=return_as_json, retry_left=retry_left - 1, temperature=temperature, ) else: logger.error( f"OpenAI API calling failed due to {e}. Return empty list!" ) return "" return response.choices[0].message.content class LLMService: @classmethod def from_partner(cls): return OpenAIService()