Spaces:
Running
Running
File size: 5,736 Bytes
ea1ba01 1c19c94 ea1ba01 542a0cb ea1ba01 542a0cb ea1ba01 1c19c94 ea1ba01 be1430e 1c19c94 886eee7 be1430e 542a0cb 886eee7 be1430e 886eee7 e31157e 8c09e73 886eee7 6968fd4 886eee7 111ed77 be1430e ea1ba01 27232ee 6cc3b6e 27232ee 886eee7 882e3b8 886eee7 ea1ba01 882e3b8 886eee7 1c19c94 882e3b8 22ea197 1c19c94 080213f 1c19c94 080213f 1c19c94 080213f 1c19c94 e735ca8 882e3b8 22ea197 1c19c94 080213f 1c19c94 080213f 8d70ef7 886eee7 882e3b8 886eee7 882e3b8 886eee7 ea1ba01 882e3b8 ea1ba01 886eee7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import streamlit as st
import os
from dotenv import load_dotenv
from langsmith import traceable
from datetime import datetime
from typing import List, Dict, Optional
from app.chat import initialize_session_state, display_chat_history
from app.data_loader import get_data, list_all_files, load_docs
from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase
from app.prompts import sahabat_prompt
from app.db import supabase
from langchain_community.llms import Replicate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_transformers import LongContextReorder
load_dotenv()
# CONFIG
BUCKET_NAME = "pnp-bot-storage-archive"
VECTOR_STORE_PREFIX = "vector_store"
# UTILITY
def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
"""Get the latest timestamp from files in a Supabase storage bucket."""
files = list_all_files(bucket_name)
latest_time = 0.0
for file in files:
iso_time = file.get("updated_at") or file.get("created_at")
if iso_time:
try:
timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp()
latest_time = max(latest_time, timestamp)
except Exception as e:
print(f"Gagal parsing waktu dari {file.get('name')}: {e}")
return latest_time
def get_supabase_vector_store_timestamp() -> Optional[str]:
"""Get the latest timestamp of vector store files in the Supabase storage."""
try:
response = supabase.storage.from_(BUCKET_NAME).list()
timestamps = []
for file in response:
if file["name"].startswith(VECTOR_STORE_PREFIX) and (
file["name"].endswith(".faiss") or file["name"].endswith(".pkl")
):
timestamps.append(file["updated_at"])
if len(timestamps) >= 2:
return max(timestamps)
return None
except Exception as e:
print(f"Error getting Supabase timestamp: {e}")
return None
def vector_store_is_outdated() -> bool:
"""Check if vector store needs to be updated based on files in Supabase storage."""
supabase_timestamp = get_supabase_vector_store_timestamp()
if supabase_timestamp is None:
return True
supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp()
data_time = get_latest_data_timestamp_from_files("pnp-bot-storage")
return data_time > supabase_time
def reorder_embedding(docs):
"""Reorder documents for long context retrieval."""
reordering = LongContextReorder()
return reordering.transform_documents(docs)
# RAG CHAIN
@traceable(name="Create RAG Conversational Chain")
def create_conversational_chain(vector_store):
"""Create a Conversational Retrieval Chain for RAG."""
llm = Replicate(
model="fauzi3007/sahabat-ai-replicate:c3fc398f441379bd3fb6a4498950f9302aa75b7a95e76978a689ceb5c4b4bf09",
model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 10000}
)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
chain = ConversationalRetrievalChain.from_llm(
llm,
retriever=vector_store.as_retriever(search_kwargs={"k": 6}),
combine_docs_chain_kwargs={"prompt": sahabat_prompt},
return_source_documents=True,
memory=memory,
)
return chain
def get_rag_chain(vector_store):
"""Return a Conversational Retrieval Chain for external use."""
return create_conversational_chain(vector_store)
# MAIN FUNCTION
@traceable(name="Main Chatbot RAG App")
def main():
initialize_session_state()
st.set_page_config(
page_title="PNP-Bot",
page_icon="logo-pnp.ico",
)
vector_store = None
if len(st.session_state["history"]) == 0:
if vector_store_is_outdated():
with st.spinner("Memuat dan memproses dokumen..."):
get_data()
docs = load_docs()
if len(docs) > 0:
reordered_docs = reorder_embedding(docs)
vector_store = process_documents(reordered_docs)
with st.spinner("Mengunggah vector store ke Supabase..."):
success = save_vector_store_to_supabase(vector_store, supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
if success:
print("β
Vector store berhasil diunggah ke Supabase!")
else:
print("β Gagal mengunggah vector store ke Supabase.")
else:
print("β οΈ Folder 'data/' kosong. Chatbot tetap bisa digunakan, tetapi tanpa konteks dokumen.")
vector_store = None
else:
with st.spinner("Memuat vector store dari Supabase..."):
vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
if vector_store:
print("β
Vector store berhasil dimuat dari Supabase!")
else:
print("β Gagal memuat vector store dari Supabase.")
else:
vector_store = st.session_state.get("vector_store") or load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
st.session_state["vector_store"] = vector_store
if st.session_state["vector_store"] is not None:
chain = create_conversational_chain(st.session_state["vector_store"])
display_chat_history(chain)
if __name__ == "__main__":
main()
|