gpt-oss-120b-chatbot / gateway.py
mahdicv's picture
initial commit to add working code
f1b7ce9
import logging
from openai import OpenAI
from typing import List, Generator, Optional
logging.basicConfig(level=logging.INFO)
def request_generation(
api_key: str,
api_base: str,
message: str,
system_prompt: str,
model_name: str,
chat_history: Optional[List[dict]] = None,
temperature: float = 0.3,
frequency_penalty: float = 0.0,
presence_penalty: float = 0.0,
max_new_tokens: int = 1024,
tools: Optional[List[dict]] = None,
tool_choice: Optional[str] = None,
) -> Generator[str, None, None]:
"""
Sends a streaming chat request to an OpenAI-compatible backend using the official OpenAI client.
Buffers output to improve LaTeX rendering.
"""
client = OpenAI(api_key=api_key, base_url=api_base)
messages = [{"role": "system", "content": system_prompt}]
if chat_history:
messages.extend(chat_history)
messages.append({"role": "user", "content": message})
request_args = {
"model": model_name,
"messages": messages,
"temperature": temperature,
"frequency_penalty": frequency_penalty,
"presence_penalty": presence_penalty,
"max_tokens": max_new_tokens,
"stream": True,
}
if tools:
request_args["tools"] = tools
if tool_choice:
request_args["tool_choice"] = tool_choice
logging.info(f"[Gateway] Request to {api_base} | Model: {model_name}")
try:
stream = client.chat.completions.create(**request_args)
collected = ""
buffer = ""
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
collected += delta
buffer += delta
if "\n" in buffer or len(buffer) > 150:
yield buffer
buffer = ""
if buffer:
yield buffer
except Exception as e:
logging.exception("[Gateway] Streaming failed")
yield f"Error: {e}"