Spaces:
Runtime error
Runtime error
import gradio as gr | |
import sys | |
import json | |
from huggingface_hub import InferenceClient | |
MODEL = "meta-llama/Meta-Llama-3-8B-Instruct" | |
client = InferenceClient(model=MODEL) | |
NUM_THREADS = 2 | |
DISABLED = False | |
def exception_handler(exception_type, exception, traceback): | |
print("%s: %s" % (exception_type.__name__, exception)) | |
sys.excepthook = exception_handler | |
sys.tracebacklimit = 0 | |
def predict(inputs, top_p, temperature, chat_counter, chatbot, history, request: gr.Request): | |
prompt = "<|system|>You are a helpful assistant.<|end|>\n" | |
for i, msg in enumerate(history): | |
role = "user" if i % 2 == 0 else "assistant" | |
prompt += f"<|{role}|>{msg}<|end|>\n" | |
prompt += f"<|user|>{inputs}<|end|>\n<|assistant|>" | |
chat_counter += 1 | |
history.append(inputs) | |
partial_words = "" | |
token_counter = 0 | |
try: | |
for token in client.text_generation(prompt, max_new_tokens=200, temperature=temperature, top_p=top_p, stream=True): | |
partial_words += token | |
if token_counter == 0: | |
history.append(partial_words) | |
else: | |
history[-1] = partial_words | |
token_counter += 1 | |
yield [(history[i], history[i + 1]) for i in range(0, len(history) - 1, 2)], history, chat_counter, "200 OK", gr.update(interactive=False), gr.update(interactive=False) | |
except Exception as e: | |
print(f'error found: {e}') | |
yield [], history, chat_counter, f"Error: {e}", gr.update(interactive=True), gr.update(interactive=True) | |
print(json.dumps({"chat_counter": chat_counter, "partial_words": partial_words, "token_counter": token_counter})) | |
def reset_textbox(): | |
return gr.update(value='', interactive=False), gr.update(interactive=False) | |
title = """<h1 align="center">GPT-4.1 mini: Research Preview (Short-Term Availability)</h1>""" | |
description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form: | |