Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,953 Bytes
f1b7ce9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import logging
from openai import OpenAI
from typing import List, Generator, Optional
logging.basicConfig(level=logging.INFO)
def request_generation(
api_key: str,
api_base: str,
message: str,
system_prompt: str,
model_name: str,
chat_history: Optional[List[dict]] = None,
temperature: float = 0.3,
frequency_penalty: float = 0.0,
presence_penalty: float = 0.0,
max_new_tokens: int = 1024,
tools: Optional[List[dict]] = None,
tool_choice: Optional[str] = None,
) -> Generator[str, None, None]:
"""
Sends a streaming chat request to an OpenAI-compatible backend using the official OpenAI client.
Buffers output to improve LaTeX rendering.
"""
client = OpenAI(api_key=api_key, base_url=api_base)
messages = [{"role": "system", "content": system_prompt}]
if chat_history:
messages.extend(chat_history)
messages.append({"role": "user", "content": message})
request_args = {
"model": model_name,
"messages": messages,
"temperature": temperature,
"frequency_penalty": frequency_penalty,
"presence_penalty": presence_penalty,
"max_tokens": max_new_tokens,
"stream": True,
}
if tools:
request_args["tools"] = tools
if tool_choice:
request_args["tool_choice"] = tool_choice
logging.info(f"[Gateway] Request to {api_base} | Model: {model_name}")
try:
stream = client.chat.completions.create(**request_args)
collected = ""
buffer = ""
for chunk in stream:
delta = chunk.choices[0].delta.content or ""
collected += delta
buffer += delta
if "\n" in buffer or len(buffer) > 150:
yield buffer
buffer = ""
if buffer:
yield buffer
except Exception as e:
logging.exception("[Gateway] Streaming failed")
yield f"Error: {e}"
|