Spaces:
Sleeping
Sleeping
Commit
Β·
c45b00b
1
Parent(s):
9b6148b
update
Browse files- src/legisqa_local/app.py +4 -4
- src/legisqa_local/config/settings.py +41 -59
src/legisqa_local/app.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
|
| 3 |
import logging
|
| 4 |
import streamlit as st
|
| 5 |
-
from legisqa_local.config.settings import STREAMLIT_CONFIG, setup_environment,
|
| 6 |
from legisqa_local.core.vectorstore import initialize_vectorstore
|
| 7 |
from legisqa_local.components.sidebar import render_sidebar
|
| 8 |
from legisqa_local.tabs.rag_tab import RAGTab
|
|
@@ -32,9 +32,9 @@ def main():
|
|
| 32 |
logger.info("β
Environment setup complete")
|
| 33 |
|
| 34 |
# Setup ChromaDB (download if needed)
|
| 35 |
-
logger.info("πΎ
|
| 36 |
-
|
| 37 |
-
logger.info("β
ChromaDB
|
| 38 |
|
| 39 |
# Initialize vectorstore (load once and cache in session state)
|
| 40 |
initialize_vectorstore()
|
|
|
|
| 2 |
|
| 3 |
import logging
|
| 4 |
import streamlit as st
|
| 5 |
+
from legisqa_local.config.settings import STREAMLIT_CONFIG, setup_environment, inspect_chromadb
|
| 6 |
from legisqa_local.core.vectorstore import initialize_vectorstore
|
| 7 |
from legisqa_local.components.sidebar import render_sidebar
|
| 8 |
from legisqa_local.tabs.rag_tab import RAGTab
|
|
|
|
| 32 |
logger.info("β
Environment setup complete")
|
| 33 |
|
| 34 |
# Setup ChromaDB (download if needed)
|
| 35 |
+
logger.info("πΎ Inspecting ChromaDB...")
|
| 36 |
+
inspect_chromadb()
|
| 37 |
+
logger.info("β
ChromaDB inspection complete")
|
| 38 |
|
| 39 |
# Initialize vectorstore (load once and cache in session state)
|
| 40 |
initialize_vectorstore()
|
src/legisqa_local/config/settings.py
CHANGED
|
@@ -58,73 +58,55 @@ def create_chroma_client():
|
|
| 58 |
return client
|
| 59 |
|
| 60 |
|
| 61 |
-
def inspect_chromadb(
|
| 62 |
"""Inspect ChromaDB collection to verify it's working correctly"""
|
| 63 |
logger.info("π === ChromaDB Collection Inspection ===")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
-
# Get collection
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
logger.info(f"π Available collections: {[c.name for c in collections]}")
|
| 77 |
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
logger.warning(f"β οΈ Collection '{collection_name}' not found, trying first available collection")
|
| 89 |
-
first_collection = collections[0]
|
| 90 |
-
logger.info(f"π Using collection '{first_collection.name}' instead")
|
| 91 |
-
collection = first_collection
|
| 92 |
-
count = collection.count()
|
| 93 |
-
logger.info(f"π Collection count: {count} items")
|
| 94 |
-
|
| 95 |
-
# Update environment variable for future use
|
| 96 |
-
os.environ["CHROMA_COLLECTION_NAME"] = first_collection.name
|
| 97 |
-
logger.info(f"π§ Updated CHROMA_COLLECTION_NAME to: {first_collection.name}")
|
| 98 |
-
else:
|
| 99 |
-
logger.error(f"β Collection '{collection_name}' not found!")
|
| 100 |
-
logger.error(f"Available collections: {[c.name for c in collections]}")
|
| 101 |
-
return
|
| 102 |
-
|
| 103 |
-
# Process the collection (either the requested one or the fallback)
|
| 104 |
-
if count > 0:
|
| 105 |
-
# Get a sample item
|
| 106 |
-
logger.info("π Fetching sample items...")
|
| 107 |
-
sample = collection.get(limit=3, include=["documents", "metadatas"])
|
| 108 |
-
|
| 109 |
-
if sample.get('ids'):
|
| 110 |
-
logger.info(f"π Sample IDs: {sample['ids']}")
|
| 111 |
-
|
| 112 |
-
if sample.get('documents'):
|
| 113 |
-
logger.info(f"π Sample document (first 200 chars):")
|
| 114 |
-
logger.info(f" {sample['documents'][0][:200]}...")
|
| 115 |
-
|
| 116 |
-
if sample.get('metadatas'):
|
| 117 |
-
logger.info(f"π·οΈ Sample metadata:")
|
| 118 |
-
for i, metadata in enumerate(sample['metadatas'][:2]):
|
| 119 |
-
logger.info(f" Item {i}: {metadata}")
|
| 120 |
-
else:
|
| 121 |
-
logger.warning("β οΈ Collection is empty!")
|
| 122 |
|
| 123 |
-
|
| 124 |
-
logger.error(f"β Import error during ChromaDB inspection: {e}")
|
| 125 |
-
except Exception as e:
|
| 126 |
-
logger.error(f"β Error inspecting ChromaDB: {e}")
|
| 127 |
-
logger.error(f"Exception type: {type(e).__name__}")
|
| 128 |
|
| 129 |
logger.info("π === ChromaDB Inspection Complete ===")
|
| 130 |
|
|
|
|
| 58 |
return client
|
| 59 |
|
| 60 |
|
| 61 |
+
def inspect_chromadb():
|
| 62 |
"""Inspect ChromaDB collection to verify it's working correctly"""
|
| 63 |
logger.info("π === ChromaDB Collection Inspection ===")
|
| 64 |
+
client = create_chroma_client()
|
| 65 |
+
|
| 66 |
+
# Get collection name from config
|
| 67 |
+
chroma_config = get_chroma_config()
|
| 68 |
+
collection_name = chroma_config["collection_name"]
|
| 69 |
|
| 70 |
+
logger.info(f"π Collection name: {collection_name}")
|
| 71 |
+
|
| 72 |
+
# List all collections
|
| 73 |
+
collections = client.list_collections()
|
| 74 |
+
logger.info(f"π Available collections: {[c.name for c in collections]}")
|
| 75 |
+
|
| 76 |
+
# Get the specific collection
|
| 77 |
+
if collection_name in [c.name for c in collections]:
|
| 78 |
+
collection = client.get_collection(name=collection_name)
|
| 79 |
+
logger.info(f"β
Collection '{collection_name}' found")
|
| 80 |
|
| 81 |
+
# Get collection count
|
| 82 |
+
count = collection.count()
|
| 83 |
+
logger.info(f"π Collection count: {count} items")
|
| 84 |
+
else:
|
| 85 |
+
logger.error(f"β Collection '{collection_name}' not found!")
|
| 86 |
+
logger.error(f"Available collections: {[c.name for c in collections]}")
|
| 87 |
+
return
|
| 88 |
|
| 89 |
+
# Process the collection (either the requested one or the fallback)
|
| 90 |
+
if count > 0:
|
| 91 |
+
# Get a sample item
|
| 92 |
+
logger.info("π Fetching sample items...")
|
| 93 |
+
sample = collection.get(limit=3, include=["documents", "metadatas"])
|
| 94 |
|
| 95 |
+
if sample.get('ids'):
|
| 96 |
+
logger.info(f"π Sample IDs: {sample['ids']}")
|
|
|
|
| 97 |
|
| 98 |
+
if sample.get('documents'):
|
| 99 |
+
logger.info(f"π Sample document (first 200 chars):")
|
| 100 |
+
logger.info(f" {sample['documents'][0][:200]}...")
|
| 101 |
+
|
| 102 |
+
if sample.get('metadatas'):
|
| 103 |
+
logger.info(f"π·οΈ Sample metadata:")
|
| 104 |
+
for i, metadata in enumerate(sample['metadatas'][:2]):
|
| 105 |
+
logger.info(f" Item {i}: {metadata}")
|
| 106 |
+
else:
|
| 107 |
+
logger.warning("β οΈ Collection is empty!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
logger.info("π === ChromaDB Inspection Complete ===")
|
| 112 |
|