import gradio as gr import torch from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # Load tokenizer and model model_id = "HuggingFaceH4/zephyr-7b-beta" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, torch_dtype=torch.bfloat16, device_map="auto" ) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) # Define the Gradio interface with gr.Blocks(fill_height=True) as demo: with gr.Sidebar(): gr.Markdown("## Zephyr-7B Unlimited Assistant") gr.Markdown( "This assistant is powered by the HuggingFaceH4/zephyr-7b-beta model.\n" "You can start chatting right away!" ) login_button = gr.LoginButton("🔐 Sign in to Hugging Face") # Optional UI chatbot = gr.Chatbot(label="🧠 Zephyr-7B Assistant") user_input = gr.Textbox(placeholder="Ask anything...", show_label=False) chat_history = [] def chat(user_msg, history): # Add system + user messages to chat history messages = [ {"role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate."} ] for human, ai in history: messages.append({"role": "user", "content": human}) messages.append({"role": "assistant", "content": ai}) messages.append({"role": "user", "content": user_msg}) # Format the prompt using the tokenizer's chat template prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # Generate response outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95) response = outputs[0]["generated_text"].split("")[-1].strip() # Append new interaction history.append((user_msg, response)) return history, "" user_input.submit(chat, inputs=[user_input, chatbot], outputs=[chatbot, user_input]) demo.launch()