Spaces:
Sleeping
Sleeping
Commit
Β·
eeef8f5
1
Parent(s):
2681dfc
simplify
Browse files
src/legisqa_local/app.py
CHANGED
|
@@ -36,10 +36,8 @@ def main():
|
|
| 36 |
setup_chromadb()
|
| 37 |
logger.info("β
ChromaDB setup complete")
|
| 38 |
|
| 39 |
-
# Initialize vectorstore (load once and cache)
|
| 40 |
-
logger.info("π Initializing vectorstore...")
|
| 41 |
initialize_vectorstore()
|
| 42 |
-
logger.info("β
Vectorstore initialization complete")
|
| 43 |
|
| 44 |
# Main content
|
| 45 |
st.title(":classical_building: LegisQA :classical_building:")
|
|
|
|
| 36 |
setup_chromadb()
|
| 37 |
logger.info("β
ChromaDB setup complete")
|
| 38 |
|
| 39 |
+
# Initialize vectorstore (load once and cache in session state)
|
|
|
|
| 40 |
initialize_vectorstore()
|
|
|
|
| 41 |
|
| 42 |
# Main content
|
| 43 |
st.title(":classical_building: LegisQA :classical_building:")
|
src/legisqa_local/components/sidebar.py
CHANGED
|
@@ -3,28 +3,29 @@
|
|
| 3 |
import streamlit as st
|
| 4 |
import os
|
| 5 |
from legisqa_local.config.settings import get_chroma_config
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def render_chromadb_status():
|
| 9 |
"""Render ChromaDB status in sidebar"""
|
| 10 |
st.subheader("ποΈ Vector Database")
|
| 11 |
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
st.success("β
ChromaDB Ready")
|
| 18 |
-
st.caption("π
|
|
|
|
|
|
|
| 19 |
st.caption(f"π Collection: {config['collection_name']}")
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
st.
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
st.error("β ChromaDB Configuration Error")
|
| 27 |
-
st.caption(f"Error: {str(e)[:50]}...")
|
| 28 |
|
| 29 |
|
| 30 |
def render_outreach_links():
|
|
|
|
| 3 |
import streamlit as st
|
| 4 |
import os
|
| 5 |
from legisqa_local.config.settings import get_chroma_config
|
| 6 |
+
from legisqa_local.core.vectorstore import get_vectorstore
|
| 7 |
|
| 8 |
|
| 9 |
def render_chromadb_status():
|
| 10 |
"""Render ChromaDB status in sidebar"""
|
| 11 |
st.subheader("ποΈ Vector Database")
|
| 12 |
|
| 13 |
+
vectorstore = get_vectorstore()
|
| 14 |
+
if vectorstore is not None:
|
| 15 |
+
try:
|
| 16 |
+
# Test the vectorstore to get document count
|
| 17 |
+
count = vectorstore._collection.count()
|
| 18 |
st.success("β
ChromaDB Ready")
|
| 19 |
+
st.caption(f"π {count:,} documents loaded")
|
| 20 |
+
|
| 21 |
+
config = get_chroma_config()
|
| 22 |
st.caption(f"π Collection: {config['collection_name']}")
|
| 23 |
+
except Exception as e:
|
| 24 |
+
st.warning("β οΈ ChromaDB Loaded (verification failed)")
|
| 25 |
+
st.caption(f"Error: {str(e)[:50]}...")
|
| 26 |
+
else:
|
| 27 |
+
st.info("β³ ChromaDB Loading...")
|
| 28 |
+
st.caption("Vectorstore is being initialized")
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
def render_outreach_links():
|
src/legisqa_local/core/rag.py
CHANGED
|
@@ -4,7 +4,7 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
| 4 |
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
|
| 5 |
|
| 6 |
from legisqa_local.core.llm import get_llm
|
| 7 |
-
from legisqa_local.core.vectorstore import
|
| 8 |
from legisqa_local.utils.formatting import format_docs
|
| 9 |
|
| 10 |
|
|
@@ -41,7 +41,15 @@ Query: {query}"""
|
|
| 41 |
|
| 42 |
def process_query(gen_config: dict, ret_config: dict, query: str):
|
| 43 |
"""Process a query using RAG"""
|
| 44 |
-
vectorstore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
llm = get_llm(gen_config)
|
| 46 |
vs_filter = get_vectorstore_filter(ret_config)
|
| 47 |
|
|
|
|
| 4 |
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
|
| 5 |
|
| 6 |
from legisqa_local.core.llm import get_llm
|
| 7 |
+
from legisqa_local.core.vectorstore import get_vectorstore, get_vectorstore_filter
|
| 8 |
from legisqa_local.utils.formatting import format_docs
|
| 9 |
|
| 10 |
|
|
|
|
| 41 |
|
| 42 |
def process_query(gen_config: dict, ret_config: dict, query: str):
|
| 43 |
"""Process a query using RAG"""
|
| 44 |
+
# Check if vectorstore is loaded
|
| 45 |
+
vectorstore = get_vectorstore()
|
| 46 |
+
if vectorstore is None:
|
| 47 |
+
return {
|
| 48 |
+
"aimessage": "β³ Vectorstore is still loading. Please wait a moment and try again.",
|
| 49 |
+
"docs": [],
|
| 50 |
+
"query": query
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
llm = get_llm(gen_config)
|
| 54 |
vs_filter = get_vectorstore_filter(ret_config)
|
| 55 |
|
src/legisqa_local/core/vectorstore.py
CHANGED
|
@@ -11,65 +11,42 @@ logger = logging.getLogger(__name__)
|
|
| 11 |
|
| 12 |
|
| 13 |
def load_vectorstore():
|
| 14 |
-
"""Load and return the ChromaDB vectorstore
|
| 15 |
-
# Check if vectorstore is already loaded in session state
|
| 16 |
-
if hasattr(st, 'session_state') and hasattr(st.session_state, 'vectorstore'):
|
| 17 |
-
logger.debug("Using cached vectorstore from session state")
|
| 18 |
-
return st.session_state.vectorstore
|
| 19 |
-
|
| 20 |
logger.info("π Loading ChromaDB vectorstore...")
|
| 21 |
config = get_chroma_config()
|
| 22 |
emb_fn = load_embeddings()
|
| 23 |
|
| 24 |
-
# Debug logging to identify path issues
|
| 25 |
-
logger.info(f"π Vectorstore config:")
|
| 26 |
-
logger.info(f" persist_directory: {config['persist_directory']}")
|
| 27 |
-
logger.info(f" collection_name: {config['collection_name']}")
|
| 28 |
-
logger.info(f"π Environment variables:")
|
| 29 |
-
for key, value in os.environ.items():
|
| 30 |
-
if "CHROMA" in key:
|
| 31 |
-
logger.info(f" {key}={value}")
|
| 32 |
-
|
| 33 |
-
# Check if the directory actually exists
|
| 34 |
-
if not os.path.exists(config["persist_directory"]):
|
| 35 |
-
logger.error(f"β ChromaDB directory does not exist: {config['persist_directory']}")
|
| 36 |
-
# Try to find the correct path
|
| 37 |
-
if os.path.exists("/data/chromadb"):
|
| 38 |
-
logger.info(f"π§ Found ChromaDB at /data/chromadb, updating config")
|
| 39 |
-
config["persist_directory"] = "/data/chromadb"
|
| 40 |
-
os.environ["CHROMA_PERSIST_DIRECTORY"] = "/data/chromadb"
|
| 41 |
-
|
| 42 |
vectorstore = Chroma(
|
| 43 |
persist_directory=config["persist_directory"],
|
| 44 |
collection_name=config["collection_name"],
|
| 45 |
embedding_function=emb_fn,
|
| 46 |
)
|
| 47 |
|
| 48 |
-
|
| 49 |
-
if hasattr(st, 'session_state'):
|
| 50 |
-
st.session_state.vectorstore = vectorstore
|
| 51 |
-
logger.info("β
Vectorstore loaded and cached in session state")
|
| 52 |
-
else:
|
| 53 |
-
logger.info("β
Vectorstore loaded (session state not available)")
|
| 54 |
-
|
| 55 |
return vectorstore
|
| 56 |
|
| 57 |
|
| 58 |
def initialize_vectorstore():
|
| 59 |
-
"""Initialize the vectorstore at application startup"""
|
| 60 |
logger.info("π Initializing vectorstore at startup...")
|
| 61 |
-
vectorstore = load_vectorstore()
|
| 62 |
|
| 63 |
-
# Test the vectorstore with a simple query to ensure it's working
|
| 64 |
try:
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
| 66 |
collection = vectorstore._collection
|
| 67 |
count = collection.count()
|
| 68 |
-
logger.info(f"β
Vectorstore initialized
|
| 69 |
-
|
| 70 |
except Exception as e:
|
| 71 |
-
logger.error(f"β Error
|
| 72 |
-
raise
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
def get_vectorstore_filter(ret_config: dict) -> dict:
|
|
|
|
| 11 |
|
| 12 |
|
| 13 |
def load_vectorstore():
|
| 14 |
+
"""Load and return the ChromaDB vectorstore"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
logger.info("π Loading ChromaDB vectorstore...")
|
| 16 |
config = get_chroma_config()
|
| 17 |
emb_fn = load_embeddings()
|
| 18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
vectorstore = Chroma(
|
| 20 |
persist_directory=config["persist_directory"],
|
| 21 |
collection_name=config["collection_name"],
|
| 22 |
embedding_function=emb_fn,
|
| 23 |
)
|
| 24 |
|
| 25 |
+
logger.info("β
Vectorstore loaded successfully")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
return vectorstore
|
| 27 |
|
| 28 |
|
| 29 |
def initialize_vectorstore():
|
| 30 |
+
"""Initialize the vectorstore at application startup and cache in session state"""
|
| 31 |
logger.info("π Initializing vectorstore at startup...")
|
|
|
|
| 32 |
|
|
|
|
| 33 |
try:
|
| 34 |
+
vectorstore = load_vectorstore()
|
| 35 |
+
st.session_state.vectorstore = vectorstore
|
| 36 |
+
|
| 37 |
+
# Test the vectorstore to verify it's working
|
| 38 |
collection = vectorstore._collection
|
| 39 |
count = collection.count()
|
| 40 |
+
logger.info(f"β
Vectorstore initialized and cached - {count} documents available")
|
| 41 |
+
|
| 42 |
except Exception as e:
|
| 43 |
+
logger.error(f"β Error initializing vectorstore: {e}")
|
| 44 |
+
# Don't raise - let the app continue and show loading message to users
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def get_vectorstore():
|
| 48 |
+
"""Get vectorstore from session state, or return None if not loaded"""
|
| 49 |
+
return getattr(st.session_state, 'vectorstore', None)
|
| 50 |
|
| 51 |
|
| 52 |
def get_vectorstore_filter(ret_config: dict) -> dict:
|