File size: 5,099 Bytes
369215c
 
 
 
 
 
 
 
 
 
9a9a9f8
369215c
 
 
9a9a9f8
 
 
 
 
369215c
9a9a9f8
369215c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7964c3f
369215c
 
 
7964c3f
 
 
 
369215c
 
 
7964c3f
 
369215c
 
7964c3f
 
 
 
369215c
 
 
7964c3f
 
369215c
 
 
 
7964c3f
 
 
369215c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158

import gradio as gr
import base64
import os
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# === Load and Embed Documents ===
loader = DirectoryLoader(
    "courses",
    glob="**/*.txt",
    loader_cls=TextLoader
)
raw_docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=700,
    chunk_overlap=100,
    separators=["\n###", "\n##", "\n\n", "\n", ".", " "]
)
docs = text_splitter.split_documents(raw_docs)

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(docs, embedding=embedding_model)
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})


# === Prompt Template ===
custom_prompt_template = """
You are a helpful and knowledgeable course advisor at the University of Hertfordshire. Answer the student's question using only the information provided in the context below.

If the context does not contain the answer, politely respond that the information is not available.

Context:
{context}

Question:
{question}

Answer:
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=custom_prompt_template
)

# === Load Falcon Model ===
model_name = "tiiuae/Falcon3-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    do_sample=False,
    temperature=0.1,
    top_p=0.9
)

llm = HuggingFacePipeline(pipeline=generator, model_kwargs={"return_full_text": False})

# === Setup Retrieval QA Chain ===
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={"prompt": prompt}
)

# === Avatar and Crest ===
avatar_img = "images/UH.png"        # Avatar shown beside bot messages
logo = "images/UH Crest.png"        # Crest image

# # === Chat Logic with Course Memory ===
def chat_with_bot(message, history, course_state):
    lower_msg = message.lower()

    # Try to detect course from first question
    if "msc" in lower_msg:
        course_state = message.strip()  # Store it for later use
        full_query = f"For the course '{course_state}': {message}"
    elif "change course to" in lower_msg:
        course_state = message.replace("change course to", "").strip()
        response = f"🔁 Course changed. Now answering based on: **{course_state}**"
        history.append((message, response))
        return "", history, course_state
    elif course_state:
        full_query = f"For the course '{course_state}': {message}"
    else:
        full_query = message  # No course memory yet

    try:
        raw_output = qa_chain.run(full_query)
        response = raw_output.split("Answer:")[-1].strip()
        response = response.replace("<|assistant|>", "").strip()
    except Exception as e:
        response = f"⚠️ An error occurred: {str(e)}"

    history.append((message, response))
    return "", history, course_state




# === Build Gradio UI ===
initial_message = (
    "👋 Welcome! I'm your Assistant for the University of Hertfordshire.\n"
    "Struggling to find something on our website?\n"
    "Want to know anything about your MSc course?\n\n"
    "Simply ask and we can get started!\n\n"
    "⚠️ Please avoid sharing personal details in this chat.\n"
    "If personal details are ever needed, we’ll always ask for consent first."
)

with gr.Blocks(title="🎓 UH Academic Advisor", css="""
.message.user {
    background-color: #d2e5ff !important;
}
""") as demo:
    # Convert crest image to base64
    with open(logo, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")

    # Logo header
    gr.Markdown(f"""
    <div style='display: flex; align-items: center; gap: 6px; line-height: 1;'>
        <img src="data:image/png;base64,{encoded_string}" style="height: 30px; margin-bottom: 2px;">
        <h1 style='font-size: 18px; margin: 0;'>University of Hertfordshire Course Advisor Chatbot</h1>
    </div>
    """)

    chatbot = gr.Chatbot(
        avatar_images=(None, avatar_img),
        value=[(initial_message, "I'm ready to help!")],
        show_copy_button=True
    )

    state = gr.State("")  # Keeps course memory in-session

    with gr.Row():
        msg = gr.Textbox(placeholder="Ask a question...", lines=1, scale=5)
        send_btn = gr.Button(" Send", scale=1)

    msg.submit(chat_with_bot, [msg, chatbot, state], [msg, chatbot, state])
    send_btn.click(chat_with_bot, [msg, chatbot, state], [msg, chatbot, state])

# === Launch ===
demo.launch()