import logging from openai import OpenAI from typing import List, Generator, Optional logging.basicConfig(level=logging.INFO) def request_generation( api_key: str, api_base: str, message: str, system_prompt: str, model_name: str, chat_history: Optional[List[dict]] = None, temperature: float = 0.3, frequency_penalty: float = 0.0, presence_penalty: float = 0.0, max_new_tokens: int = 1024, tools: Optional[List[dict]] = None, tool_choice: Optional[str] = None, ) -> Generator[str, None, None]: """ Sends a streaming chat request to an OpenAI-compatible backend using the official OpenAI client. Buffers output to improve LaTeX rendering. """ client = OpenAI(api_key=api_key, base_url=api_base) messages = [{"role": "system", "content": system_prompt}] if chat_history: messages.extend(chat_history) messages.append({"role": "user", "content": message}) request_args = { "model": model_name, "messages": messages, "temperature": temperature, "frequency_penalty": frequency_penalty, "presence_penalty": presence_penalty, "max_tokens": max_new_tokens, "stream": True, } if tools: request_args["tools"] = tools if tool_choice: request_args["tool_choice"] = tool_choice logging.info(f"[Gateway] Request to {api_base} | Model: {model_name}") try: stream = client.chat.completions.create(**request_args) collected = "" buffer = "" for chunk in stream: delta = chunk.choices[0].delta.content or "" collected += delta buffer += delta if "\n" in buffer or len(buffer) > 150: yield buffer buffer = "" if buffer: yield buffer except Exception as e: logging.exception("[Gateway] Streaming failed") yield f"Error: {e}"