neuralworm's picture
Update app.py
ae40801 verified
raw
history blame
799 Bytes
import gradio as gr
from huggingface_hub.utils import HfHubHTTPError
def predict(message, history):
try:
# Load the model interface directly using the load method
chat_interface = gr.Interface.load("models/meta-llama/Meta-Llama-3.1-8B")
# Get the prediction using the correct method call
response = chat_interface.predict(message)
history.append((message, response))
return "", history
except HfHubHTTPError as e:
if e.response.status_code == 504:
return "Server overloaded. Please try again later.", history
else:
raise e
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.ClearButton([msg, chatbot])
msg.submit(predict, [msg, chatbot], [msg, chatbot])