Spaces:

alphaoumardev
/

Summerpro

Runtime error

File size: 1,921 Bytes

51e03da
6331ef2
ff380d5
 
 
51e03da
 
 
 
ff380d5
 
51e03da
 
 
ff380d5
 
51e03da
ff380d5
 
 
 
 
 
51e03da
ff380d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51e03da
ff380d5
 
 
51e03da
ff380d5
 
51e03da
ff380d5
 
51e03da
ff380d5

import os
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Get the HF token from environment
hf_token = os.getenv("HUGGINGFACE_TOKEN")

# Your fine-tuned model
model_id = "alphaoumardev/Llama3-8B-noryu-instruct"

# Authenticate with token when loading tokenizer/model
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token)
model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=hf_token)
model.eval()

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def chat(user_input, history=[]):
    history.append({"role": "user", "content": user_input})

    # Format the prompt
    prompt = ""
    for turn in history:
        role = turn["role"]
        content = turn["content"]
        prompt += f"{role}: {content}\n"
    prompt += "assistant:"

    # Tokenize and generate
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=200,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )

    output_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    assistant_reply = output_text.split("assistant:")[-1].strip()
    history.append({"role": "assistant", "content": assistant_reply})

    # Gradio expects tuple list format for Chatbot display
    chat_history = [(h["content"], history[i + 1]["content"]) for i, h in enumerate(history[:-1]) if h["role"] == "user"]
    return chat_history, history

# Gradio Blocks UI
with gr.Blocks() as demo:
    chatbot = gr.Chatbot()
    state = gr.State([])  # memory of the conversation
    txt = gr.Textbox(show_label=False, placeholder="Type your message...")

    txt.submit(chat, [txt, state], [chatbot, state])

demo.launch()