import os import gradio as gr from huggingface_hub import InferenceClient # Lấy token từ biến môi trường (đã tạo trong Secrets với tên HF_TOKEN) HF_TOKEN = os.getenv("HF_TOKEN") # model GPT-OSS-120B client = InferenceClient( model="openai/gpt-oss-120b", token=HF_TOKEN, ) def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": system_message}] for user_msg, bot_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if bot_msg: messages.append({"role": "assistant", "content": bot_msg}) messages.append({"role": "user", "content": message}) response = "" for response_chunk in client.chat_completion( messages=messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): delta = response_chunk.choices[0].delta.content if delta: response += delta yield response # UI với Gradio demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a helpful assistant.", label="System message"), gr.Slider(minimum=1, maximum=10240, value=4096, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), ], title="GPT-OSS-120B на сайте GPT-ChatBot.ru", description="powered by openai/gpt-oss-120b" ) if __name__ == "__main__": demo.launch()