|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
with open("system_prompt.txt", "r") as f: |
|
SYSTEM_PROMPT = f.read() |
|
|
|
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" |
|
client = InferenceClient(MODEL_NAME) |
|
|
|
|
|
MAX_TOKENS = 512 |
|
TEMPERATURE = 0.7 |
|
TOP_P = 0.95 |
|
|
|
def respond(message, history): |
|
messages = [{"role": "system", "content": SYSTEM_PROMPT}] |
|
|
|
for user_msg, bot_msg in history: |
|
if user_msg: |
|
messages.append({"role": "user", "content": user_msg}) |
|
if bot_msg: |
|
messages.append({"role": "assistant", "content": bot_msg}) |
|
|
|
messages.append({"role": "user", "content": message}) |
|
response = "" |
|
|
|
for chunk in client.chat_completion( |
|
messages, |
|
max_tokens=MAX_TOKENS, |
|
stream=True, |
|
temperature=TEMPERATURE, |
|
top_p=TOP_P, |
|
): |
|
token = chunk.choices[0].delta.content |
|
if token: |
|
response += token |
|
yield response |
|
|
|
|
|
demo = gr.ChatInterface( |
|
respond, |
|
title="BoundrAI", |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |