mahdicv's picture
initial commit to add working code
f1b7ce9
import os, re, logging, gradio as gr
from openai import OpenAI
from gateway import request_generation
from utils import LATEX_DELIMS
openai_api_key = os.getenv("API_KEY")
openai_api_base = os.getenv("API_ENDPOINT")
MODEL = os.getenv("MODEL_NAME", "")
client = OpenAI(api_key=openai_api_key, base_url=openai_api_base)
MAX_NEW_TOKENS = int(os.getenv("MAX_NEW_TOKENS", 1024))
CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", 20))
QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", CONCURRENCY_LIMIT * 4))
logging.basicConfig(level=logging.INFO)
def format_analysis_response(text):
m = re.search(r"analysis(.*?)assistantfinal", text, re.DOTALL)
if m:
reasoning = m.group(1).strip()
response = text.split("assistantfinal", 1)[-1].strip()
return (
f"**🤔 Analysis:**\n\n*{reasoning}*\n\n---\n\n"
f"**💬 Response:**\n\n{response}"
)
return text.strip()
def generate(message, history,
system_prompt, temperature,
frequency_penalty, presence_penalty,
max_new_tokens):
if not message.strip():
yield "Please enter a prompt."
return
msgs = []
for h in history:
if isinstance(h, dict):
msgs.append(h)
elif isinstance(h, (list, tuple)) and len(h) == 2:
u, a = h
if u: msgs.append({"role": "user", "content": u})
if a: msgs.append({"role": "assistant", "content": a})
logging.info(f"[User] {message}")
logging.info(f"[System] {system_prompt} | Temp={temperature}")
collected, buffer = "", ""
yielded_once = False
try:
for delta in request_generation(
api_key=openai_api_key, api_base=openai_api_base,
message=message, system_prompt=system_prompt,
model_name=MODEL, chat_history=msgs,
temperature=temperature,
frequency_penalty=frequency_penalty,
presence_penalty=presence_penalty,
max_new_tokens=max_new_tokens,
):
if not delta:
continue
collected += delta
buffer += delta
if not yielded_once:
yield delta
buffer = ""
yielded_once = True
continue
if "\n" in buffer or len(buffer) > 150:
yield collected
buffer = ""
final = format_analysis_response(collected)
if final.count("$") % 2:
final += "$"
yield final
except Exception as e:
logging.exception("Stream failed")
yield f"❌ Error: {e}"
chatbot_ui = gr.ChatInterface(
fn=generate,
type="messages",
chatbot=gr.Chatbot(
label="OSS vLLM Chatbot",
type="messages",
scale=2,
height=600,
latex_delimiters=LATEX_DELIMS,
),
stop_btn=True,
additional_inputs=[
gr.Textbox(label="System prompt", value="You are a helpful assistant.", lines=2),
gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.7),
],
examples=[
["Explain the difference between supervised and unsupervised learning."],
["Summarize the plot of Inception in two sentences."],
["Show me the LaTeX for the quadratic formula."],
["What are advantages of AMD Instinct MI300X GPU?"],
["Derive the gradient of softmax cross-entropy loss."],
["Explain why ∂/∂x xⁿ = n·xⁿ⁻¹ holds."],
],
# title="Open-source GPT-OSS-120B on AMD MI300X",
title=" GPT-OSS-120B on AMD MI300X",
description="This Space is an Alpha release that demonstrates gpt-oss-120b model running on AMD MI300 infrastructure. The space is built with Apache 2.0 License.",
)
if __name__ == "__main__":
chatbot_ui.queue(max_size=QUEUE_SIZE,
default_concurrency_limit=CONCURRENCY_LIMIT).launch()