Spaces:
Runtime error
Runtime error
File size: 2,355 Bytes
2674eef 1440d47 2674eef 1440d47 2674eef d1255fb 2674eef 1440d47 8824bc9 1440d47 d1255fb 8824bc9 1440d47 8824bc9 2674eef 8824bc9 1440d47 8824bc9 2674eef 1440d47 2674eef 0f7c2dd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel
# β
Model and Tokenizer Loading
model_name = "microsoft/phi-2"
#device_map = {"": 0}
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
model_name,
low_cpu_mem_usage=True,
return_dict=True,
torch_dtype=torch.float16,
trust_remote_code=True,
device_map="auto",
)
# Load fine-tuned LoRA weights
fine_tuned_model_path = "piyushgrover/phi2-qlora-adapter-s18erav3"
model = PeftModel.from_pretrained(base_model, fine_tuned_model_path)
model = model.merge_and_unload() # Merge LoRA weights
# β
Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
# β
Set up text generation pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=500, truncation=True)
def chat(user_input, history=[]):
"""Generates a response from the fine-tuned Phi-2 model with conversation memory."""
'''
# Format conversation history
formatted_history = ""
for usr, bot in history:
formatted_history += f"\n\n### User:\n{usr}\n\n### Assistant:\n{bot}"
# Append the latest user message
prompt = f"{formatted_history}\n\n### User:\n{user_input}\n\n### Assistant:\n"
# Generate response
response = generator(prompt, max_length=128, do_sample=True, truncation=True)
answer = response[0]["generated_text"].split("### Assistant:\n")[-1].strip()
# Append new response to history
#history.append((user_input, answer))
return answer
'''
prompt = f"\n\n### User:\n{user_input}\n\n### Assistant:\n"
response = generator(prompt, max_length=128, do_sample=True, truncation=True)
answer = response[0]["generated_text"].split("### Assistant:\n")[-1].strip()
# Append new response to history
# history.append((user_input, answer))
return answer
# β
Create Gradio Chat Interface
chatbot = gr.ChatInterface(
fn=chat,
title="Fine-Tuned Phi-2 Conversational Chat Assistant",
description="π Chat with a fine-tuned Phi-2 model. It remembers the conversation!",
theme="compact",
)
# β
Launch App
if __name__ == "__main__":
chatbot.launch(debug=True)
|