gabrielaltay commited on
Commit
c45b00b
Β·
1 Parent(s): 9b6148b
src/legisqa_local/app.py CHANGED
@@ -2,7 +2,7 @@
2
 
3
  import logging
4
  import streamlit as st
5
- from legisqa_local.config.settings import STREAMLIT_CONFIG, setup_environment, setup_chromadb
6
  from legisqa_local.core.vectorstore import initialize_vectorstore
7
  from legisqa_local.components.sidebar import render_sidebar
8
  from legisqa_local.tabs.rag_tab import RAGTab
@@ -32,9 +32,9 @@ def main():
32
  logger.info("βœ… Environment setup complete")
33
 
34
  # Setup ChromaDB (download if needed)
35
- logger.info("πŸ’Ύ Setting up ChromaDB...")
36
- setup_chromadb()
37
- logger.info("βœ… ChromaDB setup complete")
38
 
39
  # Initialize vectorstore (load once and cache in session state)
40
  initialize_vectorstore()
 
2
 
3
  import logging
4
  import streamlit as st
5
+ from legisqa_local.config.settings import STREAMLIT_CONFIG, setup_environment, inspect_chromadb
6
  from legisqa_local.core.vectorstore import initialize_vectorstore
7
  from legisqa_local.components.sidebar import render_sidebar
8
  from legisqa_local.tabs.rag_tab import RAGTab
 
32
  logger.info("βœ… Environment setup complete")
33
 
34
  # Setup ChromaDB (download if needed)
35
+ logger.info("πŸ’Ύ Inspecting ChromaDB...")
36
+ inspect_chromadb()
37
+ logger.info("βœ… ChromaDB inspection complete")
38
 
39
  # Initialize vectorstore (load once and cache in session state)
40
  initialize_vectorstore()
src/legisqa_local/config/settings.py CHANGED
@@ -58,73 +58,55 @@ def create_chroma_client():
58
  return client
59
 
60
 
61
- def inspect_chromadb(client: chromadb.HttpClient):
62
  """Inspect ChromaDB collection to verify it's working correctly"""
63
  logger.info("πŸ” === ChromaDB Collection Inspection ===")
 
 
 
 
 
64
 
65
- try:
66
- from chromadb import PersistentClient
 
 
 
 
 
 
 
 
67
 
68
- # Get collection name from config
69
- chroma_config = get_chroma_config()
70
- collection_name = chroma_config["collection_name"]
 
 
 
 
71
 
72
- logger.info(f"πŸ“‹ Collection name: {collection_name}")
 
 
 
 
73
 
74
- # List all collections
75
- collections = client.list_collections()
76
- logger.info(f"πŸ“š Available collections: {[c.name for c in collections]}")
77
 
78
- # Get the specific collection
79
- if collection_name in [c.name for c in collections]:
80
- collection = client.get_collection(name=collection_name)
81
- logger.info(f"βœ… Collection '{collection_name}' found")
82
-
83
- # Get collection count
84
- count = collection.count()
85
- logger.info(f"πŸ“Š Collection count: {count} items")
86
- elif collections:
87
- # Try the first available collection if the requested one doesn't exist
88
- logger.warning(f"⚠️ Collection '{collection_name}' not found, trying first available collection")
89
- first_collection = collections[0]
90
- logger.info(f"πŸ”„ Using collection '{first_collection.name}' instead")
91
- collection = first_collection
92
- count = collection.count()
93
- logger.info(f"πŸ“Š Collection count: {count} items")
94
-
95
- # Update environment variable for future use
96
- os.environ["CHROMA_COLLECTION_NAME"] = first_collection.name
97
- logger.info(f"πŸ”§ Updated CHROMA_COLLECTION_NAME to: {first_collection.name}")
98
- else:
99
- logger.error(f"❌ Collection '{collection_name}' not found!")
100
- logger.error(f"Available collections: {[c.name for c in collections]}")
101
- return
102
-
103
- # Process the collection (either the requested one or the fallback)
104
- if count > 0:
105
- # Get a sample item
106
- logger.info("πŸ” Fetching sample items...")
107
- sample = collection.get(limit=3, include=["documents", "metadatas"])
108
-
109
- if sample.get('ids'):
110
- logger.info(f"πŸ“ Sample IDs: {sample['ids']}")
111
-
112
- if sample.get('documents'):
113
- logger.info(f"πŸ“„ Sample document (first 200 chars):")
114
- logger.info(f" {sample['documents'][0][:200]}...")
115
-
116
- if sample.get('metadatas'):
117
- logger.info(f"🏷️ Sample metadata:")
118
- for i, metadata in enumerate(sample['metadatas'][:2]):
119
- logger.info(f" Item {i}: {metadata}")
120
- else:
121
- logger.warning("⚠️ Collection is empty!")
122
 
123
- except ImportError as e:
124
- logger.error(f"❌ Import error during ChromaDB inspection: {e}")
125
- except Exception as e:
126
- logger.error(f"❌ Error inspecting ChromaDB: {e}")
127
- logger.error(f"Exception type: {type(e).__name__}")
128
 
129
  logger.info("πŸ” === ChromaDB Inspection Complete ===")
130
 
 
58
  return client
59
 
60
 
61
+ def inspect_chromadb():
62
  """Inspect ChromaDB collection to verify it's working correctly"""
63
  logger.info("πŸ” === ChromaDB Collection Inspection ===")
64
+ client = create_chroma_client()
65
+
66
+ # Get collection name from config
67
+ chroma_config = get_chroma_config()
68
+ collection_name = chroma_config["collection_name"]
69
 
70
+ logger.info(f"πŸ“‹ Collection name: {collection_name}")
71
+
72
+ # List all collections
73
+ collections = client.list_collections()
74
+ logger.info(f"πŸ“š Available collections: {[c.name for c in collections]}")
75
+
76
+ # Get the specific collection
77
+ if collection_name in [c.name for c in collections]:
78
+ collection = client.get_collection(name=collection_name)
79
+ logger.info(f"βœ… Collection '{collection_name}' found")
80
 
81
+ # Get collection count
82
+ count = collection.count()
83
+ logger.info(f"πŸ“Š Collection count: {count} items")
84
+ else:
85
+ logger.error(f"❌ Collection '{collection_name}' not found!")
86
+ logger.error(f"Available collections: {[c.name for c in collections]}")
87
+ return
88
 
89
+ # Process the collection (either the requested one or the fallback)
90
+ if count > 0:
91
+ # Get a sample item
92
+ logger.info("πŸ” Fetching sample items...")
93
+ sample = collection.get(limit=3, include=["documents", "metadatas"])
94
 
95
+ if sample.get('ids'):
96
+ logger.info(f"πŸ“ Sample IDs: {sample['ids']}")
 
97
 
98
+ if sample.get('documents'):
99
+ logger.info(f"πŸ“„ Sample document (first 200 chars):")
100
+ logger.info(f" {sample['documents'][0][:200]}...")
101
+
102
+ if sample.get('metadatas'):
103
+ logger.info(f"🏷️ Sample metadata:")
104
+ for i, metadata in enumerate(sample['metadatas'][:2]):
105
+ logger.info(f" Item {i}: {metadata}")
106
+ else:
107
+ logger.warning("⚠️ Collection is empty!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
+
 
 
 
 
110
 
111
  logger.info("πŸ” === ChromaDB Inspection Complete ===")
112