Spaces:

dracoox
/

HuggingFaceH4-zephyr-7b-beta

Sleeping

File size: 2,022 Bytes

b919fb5
7dedb90
 
b919fb5
7dedb90
 
 
 
 
 
 
 
 
 
 
 
b919fb5
 
47dafa1
 
7dedb90
 
47dafa1
7dedb90
47dafa1
7dedb90
47dafa1
 
7dedb90
 
afa7b92
7dedb90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
afa7b92
 
 
7dedb90
47dafa1
b919fb5

import gradio as gr
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

# Load tokenizer and model
model_id = "HuggingFaceH4/zephyr-7b-beta"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Define the Gradio interface
with gr.Blocks(fill_height=True) as demo:
    with gr.Sidebar():
        gr.Markdown("## Zephyr-7B Unlimited Assistant")
        gr.Markdown(
            "This assistant is powered by the HuggingFaceH4/zephyr-7b-beta model.\n"
            "You can start chatting right away!"
        )
        login_button = gr.LoginButton("🔐 Sign in to Hugging Face")  # Optional UI

    chatbot = gr.Chatbot(label="🧠 Zephyr-7B Assistant")
    user_input = gr.Textbox(placeholder="Ask anything...", show_label=False)

    chat_history = []

    def chat(user_msg, history):
        # Add system + user messages to chat history
        messages = [
            {"role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate."}
        ]
        for human, ai in history:
            messages.append({"role": "user", "content": human})
            messages.append({"role": "assistant", "content": ai})
        messages.append({"role": "user", "content": user_msg})

        # Format the prompt using the tokenizer's chat template
        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

        # Generate response
        outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
        response = outputs[0]["generated_text"].split("</s>")[-1].strip()

        # Append new interaction
        history.append((user_msg, response))
        return history, ""

    user_input.submit(chat, inputs=[user_input, chatbot], outputs=[chatbot, user_input])

demo.launch()