File size: 5,736 Bytes
ea1ba01
 
1c19c94
ea1ba01
542a0cb
 
 
ea1ba01
542a0cb
 
ea1ba01
1c19c94
ea1ba01
 
 
 
 
 
 
be1430e
 
 
1c19c94
886eee7
 
be1430e
 
 
542a0cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
886eee7
 
 
 
 
 
 
be1430e
 
 
886eee7
 
 
 
e31157e
8c09e73
886eee7
 
 
 
6968fd4
886eee7
 
 
 
 
 
 
 
 
 
 
111ed77
be1430e
 
 
ea1ba01
 
 
27232ee
6cc3b6e
 
27232ee
886eee7
882e3b8
886eee7
ea1ba01
882e3b8
886eee7
1c19c94
 
 
 
882e3b8
 
22ea197
1c19c94
080213f
1c19c94
080213f
1c19c94
080213f
1c19c94
e735ca8
882e3b8
22ea197
1c19c94
080213f
1c19c94
080213f
8d70ef7
886eee7
882e3b8
886eee7
882e3b8
886eee7
 
ea1ba01
 
882e3b8
ea1ba01
886eee7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import streamlit as st
import os
from dotenv import load_dotenv
from langsmith import traceable
from datetime import datetime
from typing import List, Dict, Optional

from app.chat import initialize_session_state, display_chat_history
from app.data_loader import get_data, list_all_files, load_docs
from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase
from app.prompts import sahabat_prompt
from app.db import supabase
from langchain_community.llms import Replicate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_transformers import LongContextReorder

load_dotenv()


#  CONFIG

BUCKET_NAME = "pnp-bot-storage-archive"
VECTOR_STORE_PREFIX = "vector_store"


#  UTILITY

def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
    """Get the latest timestamp from files in a Supabase storage bucket."""
    files = list_all_files(bucket_name)
    latest_time = 0.0
    for file in files:
        iso_time = file.get("updated_at") or file.get("created_at")
        if iso_time:
            try:
                timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp()
                latest_time = max(latest_time, timestamp)
            except Exception as e:
                print(f"Gagal parsing waktu dari {file.get('name')}: {e}")
    return latest_time


def get_supabase_vector_store_timestamp() -> Optional[str]:
    """Get the latest timestamp of vector store files in the Supabase storage."""
    try:
        response = supabase.storage.from_(BUCKET_NAME).list()
        timestamps = []
        for file in response:
            if file["name"].startswith(VECTOR_STORE_PREFIX) and (
                file["name"].endswith(".faiss") or file["name"].endswith(".pkl")
            ):
                timestamps.append(file["updated_at"])
        if len(timestamps) >= 2:
            return max(timestamps)
        return None
    except Exception as e:
        print(f"Error getting Supabase timestamp: {e}")
        return None


def vector_store_is_outdated() -> bool:
    """Check if vector store needs to be updated based on files in Supabase storage."""
    supabase_timestamp = get_supabase_vector_store_timestamp()
    if supabase_timestamp is None:
        return True
    supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp()
    data_time = get_latest_data_timestamp_from_files("pnp-bot-storage")

    return data_time > supabase_time


def reorder_embedding(docs):
    """Reorder documents for long context retrieval."""
    reordering = LongContextReorder()
    return reordering.transform_documents(docs)



#  RAG CHAIN

@traceable(name="Create RAG Conversational Chain")
def create_conversational_chain(vector_store):
    """Create a Conversational Retrieval Chain for RAG."""
    llm = Replicate(
        model="fauzi3007/sahabat-ai-replicate:c3fc398f441379bd3fb6a4498950f9302aa75b7a95e76978a689ceb5c4b4bf09",
        model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 10000}
    )
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, output_key="answer")
    chain = ConversationalRetrievalChain.from_llm(
        llm,
        retriever=vector_store.as_retriever(search_kwargs={"k": 6}),
        combine_docs_chain_kwargs={"prompt": sahabat_prompt},
        return_source_documents=True,
        memory=memory,
    )
    return chain


def get_rag_chain(vector_store):
    """Return a Conversational Retrieval Chain for external use."""
    return create_conversational_chain(vector_store)



#  MAIN FUNCTION

@traceable(name="Main Chatbot RAG App")
def main():
    initialize_session_state()
    st.set_page_config(
        page_title="PNP-Bot", 
        page_icon="logo-pnp.ico",             
    )
    vector_store = None

    if len(st.session_state["history"]) == 0:
        if vector_store_is_outdated():
            with st.spinner("Memuat dan memproses dokumen..."):
                get_data()
                docs = load_docs()
                if len(docs) > 0:
                    reordered_docs = reorder_embedding(docs)
                    vector_store = process_documents(reordered_docs)

                    with st.spinner("Mengunggah vector store ke Supabase..."):
                        success = save_vector_store_to_supabase(vector_store, supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
                        if success:
                            print("βœ… Vector store berhasil diunggah ke Supabase!")
                        else:
                            print("❌ Gagal mengunggah vector store ke Supabase.")
                else:
                    print("⚠️ Folder 'data/' kosong. Chatbot tetap bisa digunakan, tetapi tanpa konteks dokumen.")
                    vector_store = None
        else:
            with st.spinner("Memuat vector store dari Supabase..."):
                vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
                if vector_store:
                    print("βœ… Vector store berhasil dimuat dari Supabase!")
                else:
                    print("❌ Gagal memuat vector store dari Supabase.")
    else:
        vector_store = st.session_state.get("vector_store") or load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)

    st.session_state["vector_store"] = vector_store

    if st.session_state["vector_store"] is not None:
        chain = create_conversational_chain(st.session_state["vector_store"])
        display_chat_history(chain)


if __name__ == "__main__":
    main()