File size: 2,355 Bytes
2674eef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1440d47
2674eef
 
 
1440d47
2674eef
 
 
 
 
d1255fb
2674eef
 
 
1440d47
8824bc9
 
1440d47
 
 
 
 
 
 
 
d1255fb
8824bc9
1440d47
8824bc9
 
 
 
 
 
 
2674eef
 
8824bc9
 
1440d47
8824bc9
2674eef
 
 
 
 
1440d47
 
2674eef
 
 
 
 
0f7c2dd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from peft import PeftModel

# βœ… Model and Tokenizer Loading
model_name = "microsoft/phi-2"
#device_map = {"": 0}

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    trust_remote_code=True,
    device_map="auto",
)

# Load fine-tuned LoRA weights
fine_tuned_model_path = "piyushgrover/phi2-qlora-adapter-s18erav3"
model = PeftModel.from_pretrained(base_model, fine_tuned_model_path)
model = model.merge_and_unload()  # Merge LoRA weights

# βœ… Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# βœ… Set up text generation pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=500, truncation=True)


def chat(user_input, history=[]):
    """Generates a response from the fine-tuned Phi-2 model with conversation memory."""
    '''
    # Format conversation history
    formatted_history = ""
    for usr, bot in history:
        formatted_history += f"\n\n### User:\n{usr}\n\n### Assistant:\n{bot}"

    # Append the latest user message
    prompt = f"{formatted_history}\n\n### User:\n{user_input}\n\n### Assistant:\n"

    # Generate response
    response = generator(prompt, max_length=128, do_sample=True, truncation=True)
    answer = response[0]["generated_text"].split("### Assistant:\n")[-1].strip()

    # Append new response to history
    #history.append((user_input, answer))

    return answer
    '''
    prompt = f"\n\n### User:\n{user_input}\n\n### Assistant:\n"
    response = generator(prompt, max_length=128, do_sample=True, truncation=True)
    answer = response[0]["generated_text"].split("### Assistant:\n")[-1].strip()

    # Append new response to history
    # history.append((user_input, answer))

    return answer


# βœ… Create Gradio Chat Interface
chatbot = gr.ChatInterface(
    fn=chat,
    title="Fine-Tuned Phi-2 Conversational Chat Assistant",
    description="πŸš€ Chat with a fine-tuned Phi-2 model. It remembers the conversation!",
    theme="compact",
)

# βœ… Launch App
if __name__ == "__main__":
    chatbot.launch(debug=True)