Spaces:
Runtime error
Runtime error
File size: 1,989 Bytes
eb3516b 4959ed7 b74ae79 4959ed7 eb3516b 4959ed7 eb3516b 46c0344 eb3516b 4959ed7 2b2be0b 4959ed7 2b2be0b 4959ed7 eb3516b 2b2be0b eb3516b ae3ecf1 eb3516b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import gradio as gr
import sys
import json
from huggingface_hub import InferenceClient
MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
client = InferenceClient(model=MODEL)
NUM_THREADS = 2
DISABLED = False
def exception_handler(exception_type, exception, traceback):
print("%s: %s" % (exception_type.__name__, exception))
sys.excepthook = exception_handler
sys.tracebacklimit = 0
def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request: gr.Request):
prompt = "<|system|>You are a helpful assistant.<|end|>\n"
for i, msg in enumerate(history):
role = "user" if i % 2 == 0 else "assistant"
prompt += f"<|{role}|>{msg}<|end|>\n"
prompt += f"<|user|>{inputs}<|end|>\n<|assistant|>"
chat_counter += 1
history.append(inputs)
partial_words = ""
token_counter = 0
try:
for token in client.text_generation(prompt, max_new_tokens=200, temperature=temperature, top_p=top_p, stream=True):
partial_words += token
if token_counter == 0:
history.append(partial_words)
else:
history[-1] = partial_words
token_counter += 1
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200 OK", gr.update(interactive=False), gr.update(interactive=False)
except Exception as e:
print(f'error found: {e}')
yield [], history, chat_counter, f"Error: {e}", gr.update(interactive=True), gr.update(interactive=True)
print(json.dumps({"chat_counter": chat_counter, "partial_words": partial_words, "token_counter": token_counter}))
def reset_textbox():
return gr.update(value='', interactive=False), gr.update(interactive=False)
title = """<h1 align="center">GPT-4.1 mini: Research Preview (Short-Term Availability)</h1>"""
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form:
|