Spaces:

ak0601
/

Precollege_bot

Sleeping

App Files Files Community

ak0601 commited on Mar 6

Commit

6a8bd7b

verified ·

1 Parent(s): c4897d1

Upload app.py

Browse files

Files changed (1) hide show

app.py +316 -243

app.py CHANGED Viewed

@@ -1,243 +1,316 @@
-import streamlit as st
-from langchain.memory import ConversationBufferMemory
-from llama_index.core.indices.query.schema import QueryBundle
-from llama_index.core import Document, VectorStoreIndex
-from llama_index.core.text_splitter import SentenceSplitter
-from llama_index.core.retrievers import QueryFusionRetriever
-from llama_index.retrievers.bm25 import BM25Retriever
-from llama_index.core.postprocessor import SentenceTransformerRerank
-from llama_index.core.prompts import PromptTemplate
-from llama_index.core.query_engine import RetrieverQueryEngine
-from llama_index.embeddings.gemini import GeminiEmbedding
-from llama_index.llms.gemini import Gemini
-from llama_index.core import Settings
-from llama_index.vector_stores.faiss import FaissVectorStore
-from llama_index.core import (
-    SimpleDirectoryReader,
-    load_index_from_storage,
-    VectorStoreIndex,
-    StorageContext,
-)
-from llama_index.core.node_parser import SemanticSplitterNodeParser
-import os
-import faiss
-import pickle
-import spacy
-# Load NLP model
-# nlp = spacy.load("en_core_web_sm")
-GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
-# Function to load documents
-def load_documents(filename="documents.pkl"):
-    with open(filename, "rb") as file:
-        return pickle.load(file)
-# Load stored documents
-loaded_docs = load_documents()
-# Function to split text into sentences
-# def spacy_sentence_splitter(text):
-#     doc = nlp(text)
-#     return [sent.text for sent in doc.sents]
-embed_model = GeminiEmbedding(model_name="models/embedding-001", use_async=False)
-splitter = SemanticSplitterNodeParser(
-    buffer_size=5, breakpoint_percentile_threshold=95, embed_model=embed_model
-)
-# splitter = SentenceSplitter(chunk_size=512, chunk_overlap=50, separator="\n")
-nodes = splitter.get_nodes_from_documents([doc for doc in loaded_docs])
-chunked_documents = [Document(text=node.text, metadata=node.metadata) for node in nodes]
-# Process documents
-# chunked_documents = [
-#     Document(text=chunk_text, metadata=doc.metadata)
-#     for doc in loaded_docs for chunk_text in spacy_sentence_splitter(doc.text)
-# ]
-# Configure LLM and embeddings
-Settings.llm = Gemini(model="models/gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.5)
-dimension = 768
-faiss_index = faiss.IndexFlatL2(dimension)
-vector_store = FaissVectorStore(faiss_index=faiss_index)
-storage_context = StorageContext.from_defaults(vector_store=vector_store)
-# Build index
-index = VectorStoreIndex.from_documents(
-    documents=chunked_documents,
-    storage_context=storage_context,
-    embed_model=embed_model,
-    show_progress=True
-)
-index.storage_context.persist()
-# Initialize memory
-memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-def get_chat_history():
-    return memory.load_memory_variables({})["chat_history"]
-# Define chatbot prompt template
-prompt_template = PromptTemplate(
-    """You are a friendly college counselor with expertise in Indian technical institutes.
-    Previous conversation context (if any):\n{chat_history}\n\n
-    Available college information:\n{context_str}\n\n"
-    User query: {query_str}\n\n
-    Instructions:\n
-    1. Provide a brief, direct answer using only the information available above\n
-    2. If specific data is not available, clearly state that\n
-    3. Keep responses under 3 sentences when possible\n
-    4. If comparing colleges, use bullet points for clarity\n
-    5. Use a friendly, conversational tone\n
-    6. Always be interactive and ask follow-up questions\n
-    7. Always try to give answers in points each point should focus on single aspect of the response.\n
-    8. Always try to give conclusion of your answer in the end for the user to take a decision.\n
-    9. Whenever you don't know the answer to a particular query, just reply with conatct to the respective college mentor to the user. \n
-    10. Try to give answers in tabular format wherever needed.
-    Response:"""
-)
-# Configure retrieval and query engine
-vector_retriever = index.as_retriever(similarity_top_k=10)
-bm25_retriever = BM25Retriever.from_defaults(index=index, similarity_top_k=10)
-hybrid_retriever = QueryFusionRetriever(
-    [vector_retriever, bm25_retriever],
-    similarity_top_k=10,
-    num_queries=10,
-    mode="reciprocal_rerank",
-    use_async=False
-)
-reranker = SentenceTransformerRerank(
-    model="cross-encoder/ms-marco-MiniLM-L-2-v2",
-    top_n=10,
-)
-query_engine = RetrieverQueryEngine.from_args(
-    retriever=hybrid_retriever,
-    node_postprocessors=[reranker],
-    llm=Settings.llm,
-    verbose=True,
-    prompt_template=prompt_template,
-    use_async=False,
-)
-# Streamlit UI
-st.title("📚 Precollege Chatbot")
-st.write("Ask me anything about different colleges and their courses!")
-# Custom CSS for WhatsApp-like interface
-st.markdown("""
-<style>
-body {
-    background-color: #111b21;
-    color: #e9edef;
-}
-.stApp {
-    background-color: #111b21;
-}
-.chat-container {
-    padding: 10px;
-    color: #111b21;
-}
-.user-message {
-    background-color: #005c4b;
-    color: #e9edef;
-    padding: 10px 15px;
-    border-radius: 15px;
-    margin: 5px 0;
-    max-width: 70%;
-    margin-left: auto;
-    margin-right: 10px;
-}
-.ai-message {
-    background-color: #1f2c33;
-    color: #e9edef;
-    padding: 10px 15px;
-    border-radius: 15px;
-    margin: 5px 0;
-    max-width: 70%;
-    margin-right: auto;
-    margin-left: 10px;
-    box-shadow: 0 1px 2px rgba(255,255,255,0.1);
-}
-.message-container {
-    display: flex;
-    margin-bottom: 10px;
-}
-.stTextInput input {
-    border-radius: 20px;
-    padding: 10px 20px;
-    border: 1px solid #ccc;
-    background-color: #2a3942;
-    color: #e9edef;
-}
-.stButton button {
-    border-radius: 50%;  /* Make it circular */
-    width: 40px;
-    height: 40px;
-    padding: 0px;
-    background-color: #005c4b;
-    color: #e9edef;
-    font-size: 20px;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    border: none;
-    cursor: pointer;
-}
-.stButton button:hover {
-    background-color: #00735e;
-}
-div[data-testid="stToolbar"] {
-    display: none;
-}
-.stMarkdown {
-    color: #e9edef;
-}
-header {
-    background-color: #202c33 !important;
-}
-</style>
-""", unsafe_allow_html=True)
-if "chat_history" not in st.session_state:
-    st.session_state.chat_history = []
-# Create a container for chat messages
-chat_container = st.container()
-# Create a form for input
-with st.form(key="message_form", clear_on_submit=True):
-    col1, col2 = st.columns([5,1])
-    with col1:
-        user_input = st.text_input("", placeholder="Type a message...", label_visibility="collapsed")
-    with col2:
-        submit_button = st.form_submit_button("➤")
-    if submit_button and user_input.strip():
-        chat_history = get_chat_history()
-        query_bundle = QueryBundle(query_str=f"{chat_history}\n\nUser: {user_input}")
-        response_obj = query_engine.query(query_bundle)
-        response_text = str(response_obj.response) if hasattr(response_obj, "response") else str(response_obj)
-        memory.save_context({"query_str": user_input}, {"response": response_text})
-        st.session_state.chat_history.append(("You", user_input))
-        st.session_state.chat_history.append(("AI", response_text))
-# Display chat history with custom styling
-with chat_container:
-    for role, message in st.session_state.chat_history:
-        message = message.replace("</div>", "").replace("<div>", "")  # Sanitize the message
-        if role == "You":
-            st.markdown(
-                f'<div class="message-container"><div class="user-message">{message}</div></div>',
-                unsafe_allow_html=True
-            )
-        else:
-            st.markdown(
-                f'<div class="message-container"><div class="ai-message">{message}</div></div>',
-                unsafe_allow_html=True
-            )

+import streamlit as st
+from langchain.memory import ConversationBufferMemory
+from llama_index.core.indices.query.schema import QueryBundle
+from llama_index.core import Document, VectorStoreIndex
+from llama_index.core.text_splitter import SentenceSplitter
+from llama_index.core.retrievers import QueryFusionRetriever
+from llama_index.retrievers.bm25 import BM25Retriever
+from llama_index.core.postprocessor import SentenceTransformerRerank
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.query_engine import RetrieverQueryEngine
+from llama_index.embeddings.gemini import GeminiEmbedding
+from llama_index.llms.gemini import Gemini
+from llama_index.core import Settings
+from llama_index.vector_stores.faiss import FaissVectorStore
+from llama_index.core import (
+    SimpleDirectoryReader,
+    load_index_from_storage,
+    VectorStoreIndex,
+    StorageContext,
+)
+from llama_index.core.node_parser import SemanticSplitterNodeParser
+import os
+import faiss
+import pickle
+import spacy
+# Load NLP model
+# nlp = spacy.load("en_core_web_sm")
+GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
+# Function to load documents
+def load_documents(filename="documents.pkl"):
+    with open(filename, "rb") as file:
+        return pickle.load(file)
+# Load stored documents
+loaded_docs = load_documents()
+# Function to split text into sentences
+# def spacy_sentence_splitter(text):
+#     doc = nlp(text)
+#     return [sent.text for sent in doc.sents]
+embed_model = GeminiEmbedding(model_name="models/embedding-001", use_async=False)
+splitter = SemanticSplitterNodeParser(
+    buffer_size=5, breakpoint_percentile_threshold=95, embed_model=embed_model
+)
+# splitter = SentenceSplitter(chunk_size=512, chunk_overlap=50, separator="\n")
+nodes = splitter.get_nodes_from_documents([doc for doc in loaded_docs])
+chunked_documents = [Document(text=node.text, metadata=node.metadata) for node in nodes]
+# Process documents
+# chunked_documents = [
+#     Document(text=chunk_text, metadata=doc.metadata)
+#     for doc in loaded_docs for chunk_text in spacy_sentence_splitter(doc.text)
+# ]
+# Configure LLM and embeddings
+Settings.llm = Gemini(model="models/gemini-2.0-flash", api_key=GOOGLE_API_KEY, temperature=0.5)
+dimension = 768
+faiss_index = faiss.IndexFlatL2(dimension)
+vector_store = FaissVectorStore(faiss_index=faiss_index)
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
+# Build index
+index = VectorStoreIndex.from_documents(
+    documents=chunked_documents,
+    storage_context=storage_context,
+    embed_model=embed_model,
+    show_progress=True
+)
+index.storage_context.persist()
+# Initialize memory
+memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+def get_chat_history():
+    return memory.load_memory_variables({})["chat_history"]
+# Define chatbot prompt template
+prompt_template = PromptTemplate(
+    """You are a friendly college counselor with expertise in Indian technical institutes.
+    Previous conversation context (if any):\n{chat_history}\n\n
+    Available college information:\n{context_str}\n\n"
+    User query: {query_str}\n\n
+    Instructions:\n
+    1. Provide a brief, direct answer using only the information available above\n
+    2. If specific data is not available, clearly state that\n
+    3. Keep responses under 3 sentences when possible\n
+    4. If comparing colleges, use bullet points for clarity\n
+    5. Use a friendly, conversational tone\n
+    6. Always be interactive and ask follow-up questions\n
+    7. Always try to give answers in points each point should focus on single aspect of the response.\n
+    8. Always try to give conclusion of your answer in the end for the user to take a decision.\n
+    Response:"""
+)
+# Configure retrieval and query engine
+vector_retriever = index.as_retriever(similarity_top_k=10)
+bm25_retriever = BM25Retriever.from_defaults(index=index, similarity_top_k=10)
+hybrid_retriever = QueryFusionRetriever(
+    [vector_retriever, bm25_retriever],
+    similarity_top_k=10,
+    num_queries=10,
+    mode="reciprocal_rerank",
+    use_async=False
+)
+reranker = SentenceTransformerRerank(
+    model="cross-encoder/ms-marco-MiniLM-L-2-v2",
+    top_n=10,
+)
+query_engine = RetrieverQueryEngine.from_args(
+    retriever=hybrid_retriever,
+    node_postprocessors=[reranker],
+    llm=Settings.llm,
+    verbose=True,
+    prompt_template=prompt_template,
+    use_async=False,
+)
+# Streamlit UI
+st.title("📚 Precollege Chatbot")
+st.write("Ask me anything about different colleges and their courses!")
+# Custom CSS for WhatsApp-like interface
+st.markdown("""
+<style>
+body {
+    background-color: #111b21;
+    color: #e9edef;
+}
+.stApp {
+    background-color: #111b21;
+}
+.chat-container {
+    padding: 10px;
+    color: #111b21;
+}
+.user-message {
+    background-color: #005c4b;
+    color: #e9edef;
+    padding: 10px 15px;
+    border-radius: 15px;
+    margin: 5px 0;
+    max-width: 70%;
+    margin-left: auto;
+    margin-right: 10px;
+}
+.ai-message {
+    background-color: #1f2c33;
+    color: #e9edef;
+    padding: 10px 15px;
+    border-radius: 15px;
+    margin: 5px 0;
+    max-width: 70%;
+    margin-right: auto;
+    margin-left: 10px;
+    box-shadow: 0 1px 2px rgba(255,255,255,0.1);
+}
+.ai-message table {
+    border-collapse: collapse;
+    width: 100%;
+    margin: 10px 0;
+}
+.ai-message th, .ai-message td {
+    border: 1px solid #e9edef;
+    padding: 8px;
+    text-align: left;
+}
+.ai-message th {
+    background-color: #2a3942;
+}
+.message-container {
+    display: flex;
+    margin-bottom: 10px;
+}
+.stTextInput input {
+    border-radius: 20px;
+    padding: 10px 20px;
+    border: 1px solid #ccc;
+    background-color: #2a3942;
+    color: #e9edef;
+}
+.stButton button {
+    border-radius: 50%;  /* Make it circular */
+    width: 40px;
+    height: 40px;
+    padding: 0px;
+    background-color: #005c4b;
+    color: #e9edef;
+    font-size: 20px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    border: none;
+    cursor: pointer;
+}
+.stButton button:hover {
+    background-color: #00735e;
+}
+div[data-testid="stToolbar"] {
+    display: none;
+}
+.stMarkdown {
+    color: #e9edef;
+}
+header {
+    background-color: #202c33 !important;
+}
+.ai-message table.ai-table {
+    border-collapse: collapse;
+    width: 100%;
+    margin: 10px 0;
+    background-color: #2a3942;
+}
+.ai-message table.ai-table th,
+.ai-message table.ai-table td {
+    border: 1px solid #e9edef;
+    padding: 8px;
+    text-align: left;
+    color: #e9edef;
+}
+.ai-message table.ai-table th {
+    background-color: #005c4b;
+    font-weight: bold;
+}
+.ai-message table.ai-table tr:nth-child(even) {
+    background-color: #1f2c33;
+}
+</style>
+""", unsafe_allow_html=True)
+if "chat_history" not in st.session_state:
+    st.session_state.chat_history = []
+# Create a container for chat messages
+chat_container = st.container()
+# Create a form for input
+with st.form(key="message_form", clear_on_submit=True):
+    col1, col2 = st.columns([5,1])
+    with col1:
+        user_input = st.text_input("", placeholder="Type a message...", label_visibility="collapsed")
+    with col2:
+        submit_button = st.form_submit_button("➤")
+    if submit_button and user_input.strip():
+        chat_history = get_chat_history()
+        query_bundle = QueryBundle(query_str=f"{chat_history}\n\nUser: {user_input}")
+        response_obj = query_engine.query(query_bundle)
+        response_text = str(response_obj.response) if hasattr(response_obj, "response") else str(response_obj)
+        memory.save_context({"query_str": user_input}, {"response": response_text})
+        st.session_state.chat_history.append(("You", user_input))
+        st.session_state.chat_history.append(("AI", response_text))
+# Display chat history with custom styling
+with chat_container:
+    for role, message in st.session_state.chat_history:
+        message = message.replace("</div>", "").replace("<div>", "")  # Sanitize the message
+        if role == "You":
+            st.markdown(
+                f'<div class="message-container"><div class="user-message">{message}</div></div>',
+                unsafe_allow_html=True
+            )
+        else:
+            # Convert markdown tables to HTML tables with proper styling
+            if "|" in message and "-|-" in message:  # Detect markdown tables
+                # Split the message into lines
+                lines = message.split("\n")
+                table_html = []
+                in_table = False
+                formatted_lines = []
+                for line in lines:
+                    if "|" in line:
+                        if not in_table:
+                            in_table = True
+                            table_html.append('<table class="ai-table">')
+                            # Add header
+                            header = line.strip().strip("|").split("|")
+                            table_html.append("<tr>")
+                            for h in header:
+                                table_html.append(f"<th>{h.strip()}</th>")
+                            table_html.append("</tr>")
+                        elif "-|-" not in line:  # Skip separator line
+                            # Add row
+                            row = line.strip().strip("|").split("|")
+                            table_html.append("<tr>")
+                            for cell in row:
+                                table_html.append(f"<td>{cell.strip()}</td>")
+                            table_html.append("</tr>")
+                    else:
+                        if in_table:
+                            in_table = False
+                            table_html.append("</table>")
+                            formatted_lines.append("".join(table_html))
+                            table_html = []
+                        formatted_lines.append(line)
+                if in_table:
+                    table_html.append("</table>")
+                    formatted_lines.append("".join(table_html))
+                message = "\n".join(formatted_lines)
+            st.markdown(
+                f'<div class="message-container"><div class="ai-message">{message}</div></div>',
+                unsafe_allow_html=True
+            )