gabrielaltay commited on
Commit
471185d
Β·
1 Parent(s): c1ea157

more logging

Browse files
src/legisqa_local/app.py CHANGED
@@ -1,5 +1,6 @@
1
  """Main Streamlit application for LegisQA"""
2
 
 
3
  import streamlit as st
4
  from legisqa_local.config.settings import STREAMLIT_CONFIG, setup_environment, setup_chromadb
5
  from legisqa_local.components.sidebar import render_sidebar
@@ -7,17 +8,32 @@ from legisqa_local.tabs.rag_tab import RAGTab
7
  from legisqa_local.tabs.rag_sbs_tab import RAGSideBySideTab
8
  from legisqa_local.tabs.guide_tab import GuideTab
9
 
 
 
 
 
 
 
 
 
10
 
11
  def main():
12
  """Main application function"""
 
 
13
  # Configure Streamlit
14
  st.set_page_config(**STREAMLIT_CONFIG)
 
15
 
16
  # Setup environment
 
17
  setup_environment()
 
18
 
19
  # Setup ChromaDB (download if needed)
 
20
  setup_chromadb()
 
21
 
22
  # Main content
23
  st.title(":classical_building: LegisQA :classical_building:")
 
1
  """Main Streamlit application for LegisQA"""
2
 
3
+ import logging
4
  import streamlit as st
5
  from legisqa_local.config.settings import STREAMLIT_CONFIG, setup_environment, setup_chromadb
6
  from legisqa_local.components.sidebar import render_sidebar
 
8
  from legisqa_local.tabs.rag_sbs_tab import RAGSideBySideTab
9
  from legisqa_local.tabs.guide_tab import GuideTab
10
 
11
+ # Configure logging (should be done once at application startup)
12
+ logging.basicConfig(
13
+ level=logging.INFO,
14
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
15
+ force=True # Force reconfiguration if already configured
16
+ )
17
+ logger = logging.getLogger(__name__)
18
+
19
 
20
  def main():
21
  """Main application function"""
22
+ logger.info("πŸš€ Starting LegisQA application...")
23
+
24
  # Configure Streamlit
25
  st.set_page_config(**STREAMLIT_CONFIG)
26
+ logger.info("βœ… Streamlit configuration complete")
27
 
28
  # Setup environment
29
+ logger.info("πŸ”§ Setting up environment...")
30
  setup_environment()
31
+ logger.info("βœ… Environment setup complete")
32
 
33
  # Setup ChromaDB (download if needed)
34
+ logger.info("πŸ’Ύ Setting up ChromaDB...")
35
  setup_chromadb()
36
+ logger.info("βœ… ChromaDB setup complete")
37
 
38
  # Main content
39
  st.title(":classical_building: LegisQA :classical_building:")
src/legisqa_local/config/settings.py CHANGED
@@ -1,8 +1,11 @@
1
  """Application settings and configuration"""
2
 
3
  import os
 
4
  import streamlit as st
5
 
 
 
6
  # Streamlit configuration
7
  STREAMLIT_CONFIG = {
8
  "layout": "wide",
@@ -36,74 +39,109 @@ def get_chroma_config():
36
 
37
  def setup_chromadb():
38
  """Setup ChromaDB - use persistent storage (/data) or download from S3 if needed"""
 
 
39
  chroma_config = get_chroma_config()
40
  chroma_path = chroma_config["persist_directory"]
 
41
 
42
  # For HF Spaces with persistent storage, prefer /data directory
43
  persistent_chroma_path = "/data/chromadb"
 
 
44
  if os.path.exists("/data"):
45
- print("HF Spaces persistent storage detected at /data")
46
 
47
  # Check if ChromaDB exists in persistent storage
48
  if os.path.exists(persistent_chroma_path) and os.listdir(persistent_chroma_path):
49
- print(f"βœ… ChromaDB found in persistent storage: {persistent_chroma_path}")
50
  # Update environment variable to point to persistent storage
51
  os.environ["CHROMA_PERSIST_DIRECTORY"] = persistent_chroma_path
 
52
  return persistent_chroma_path
53
 
54
- # Download from S3 to persistent storage
 
 
55
  s3_bucket = os.getenv("CHROMA_S3_BUCKET", "")
56
  s3_prefix = os.getenv("CHROMA_S3_PREFIX", "")
57
 
 
 
 
58
  if s3_bucket and s3_prefix:
59
- print(f"πŸ“₯ Downloading ChromaDB from S3 to persistent storage...")
60
- print(f" Source: s3://{s3_bucket}/{s3_prefix}")
61
- print(f" Target: {persistent_chroma_path}")
62
 
63
  success = download_chromadb_from_s3(s3_bucket, s3_prefix, persistent_chroma_path)
64
  if success:
65
  # Update environment variable to point to persistent storage
66
  os.environ["CHROMA_PERSIST_DIRECTORY"] = persistent_chroma_path
 
67
  return persistent_chroma_path
 
 
68
  else:
69
- print("❌ No S3 configuration found (CHROMA_S3_BUCKET, CHROMA_S3_PREFIX)")
 
 
 
 
 
 
70
 
71
  # Fallback: check if ChromaDB exists at configured path (local development)
72
- if os.path.exists(chroma_path) and os.listdir(chroma_path):
73
- print(f"βœ… ChromaDB found at {chroma_path}")
74
- return chroma_path
 
 
 
 
 
 
75
 
76
- print(f"⚠️ Using default ChromaDB path: {chroma_path}")
 
77
  return chroma_path
78
 
79
  def download_chromadb_from_s3(bucket: str, prefix: str, local_path: str) -> bool:
80
  """Download ChromaDB from S3"""
 
 
81
  try:
82
  import subprocess
83
  import os
84
 
85
  # Ensure target directory exists
 
86
  os.makedirs(local_path, exist_ok=True)
87
 
88
  # Use AWS CLI to sync from S3 (no credentials needed for public buckets)
89
  s3_url = f"s3://{bucket}/{prefix}"
90
  cmd = ["aws", "s3", "sync", s3_url, local_path, "--no-sign-request"]
91
 
92
- print(f"Running: {' '.join(cmd)}")
93
  result = subprocess.run(cmd, capture_output=True, text=True)
94
 
95
  if result.returncode == 0:
96
- print("βœ… ChromaDB download from S3 complete!")
 
 
97
  return True
98
  else:
99
- print(f"❌ S3 download failed: {result.stderr}")
 
 
 
100
  return False
101
 
102
  except FileNotFoundError:
103
- print("❌ AWS CLI not found. Trying with boto3...")
104
  return download_chromadb_from_s3_boto3(bucket, prefix, local_path)
105
  except Exception as e:
106
- print(f"❌ Error downloading from S3: {e}")
107
  return False
108
 
109
  def download_chromadb_from_s3_boto3(bucket: str, prefix: str, local_path: str) -> bool:
@@ -114,12 +152,13 @@ def download_chromadb_from_s3_boto3(bucket: str, prefix: str, local_path: str) -
114
  from botocore.config import Config
115
  import os
116
 
117
- print("πŸ“¦ Using boto3 for S3 download...")
118
 
119
  # Create S3 client with no credentials (for public buckets)
120
  s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
121
 
122
  # List objects in the S3 prefix
 
123
  paginator = s3.get_paginator('list_objects_v2')
124
  pages = paginator.paginate(Bucket=bucket, Prefix=prefix)
125
 
@@ -141,18 +180,18 @@ def download_chromadb_from_s3_boto3(bucket: str, prefix: str, local_path: str) -
141
  # Download file
142
  file_count += 1
143
  if file_count % 10 == 0:
144
- print(f"Downloaded {file_count} files...")
145
 
146
  s3.download_file(bucket, key, local_file_path)
147
 
148
- print(f"βœ… ChromaDB download from S3 (boto3) complete! Downloaded {file_count} files.")
149
  return True
150
 
151
  except ImportError:
152
- print("❌ boto3 not available. Please install: pip install boto3")
153
  return False
154
  except Exception as e:
155
- print(f"❌ Error downloading from S3 with boto3: {e}")
156
  return False
157
 
158
  # Embedding model configuration
 
1
  """Application settings and configuration"""
2
 
3
  import os
4
+ import logging
5
  import streamlit as st
6
 
7
+ logger = logging.getLogger(__name__)
8
+
9
  # Streamlit configuration
10
  STREAMLIT_CONFIG = {
11
  "layout": "wide",
 
39
 
40
  def setup_chromadb():
41
  """Setup ChromaDB - use persistent storage (/data) or download from S3 if needed"""
42
+ logger.info("=== ChromaDB Setup Starting ===")
43
+
44
  chroma_config = get_chroma_config()
45
  chroma_path = chroma_config["persist_directory"]
46
+ logger.info(f"Initial ChromaDB path: {chroma_path}")
47
 
48
  # For HF Spaces with persistent storage, prefer /data directory
49
  persistent_chroma_path = "/data/chromadb"
50
+
51
+ # Check if we're in HF Spaces with persistent storage
52
  if os.path.exists("/data"):
53
+ logger.info("πŸš€ HF Spaces persistent storage detected at /data")
54
 
55
  # Check if ChromaDB exists in persistent storage
56
  if os.path.exists(persistent_chroma_path) and os.listdir(persistent_chroma_path):
57
+ logger.info(f"βœ… ChromaDB found in persistent storage: {persistent_chroma_path}")
58
  # Update environment variable to point to persistent storage
59
  os.environ["CHROMA_PERSIST_DIRECTORY"] = persistent_chroma_path
60
+ logger.info(f"Updated CHROMA_PERSIST_DIRECTORY to: {persistent_chroma_path}")
61
  return persistent_chroma_path
62
 
63
+ # ChromaDB not found in persistent storage, try to download from S3
64
+ logger.info("ChromaDB not found in persistent storage, checking S3 configuration...")
65
+
66
  s3_bucket = os.getenv("CHROMA_S3_BUCKET", "")
67
  s3_prefix = os.getenv("CHROMA_S3_PREFIX", "")
68
 
69
+ logger.info(f"S3 Bucket: {s3_bucket}")
70
+ logger.info(f"S3 Prefix: {s3_prefix}")
71
+
72
  if s3_bucket and s3_prefix:
73
+ logger.info(f"πŸ“₯ Downloading ChromaDB from S3 to persistent storage...")
74
+ logger.info(f" Source: s3://{s3_bucket}/{s3_prefix}")
75
+ logger.info(f" Target: {persistent_chroma_path}")
76
 
77
  success = download_chromadb_from_s3(s3_bucket, s3_prefix, persistent_chroma_path)
78
  if success:
79
  # Update environment variable to point to persistent storage
80
  os.environ["CHROMA_PERSIST_DIRECTORY"] = persistent_chroma_path
81
+ logger.info(f"βœ… ChromaDB download successful! Updated path to: {persistent_chroma_path}")
82
  return persistent_chroma_path
83
+ else:
84
+ logger.error("❌ ChromaDB download from S3 failed!")
85
  else:
86
+ logger.error("❌ No S3 configuration found (CHROMA_S3_BUCKET, CHROMA_S3_PREFIX)")
87
+ logger.info("Available environment variables:")
88
+ for key, value in os.environ.items():
89
+ if "CHROMA" in key:
90
+ logger.info(f" {key}={value}")
91
+ else:
92
+ logger.info("No /data directory found (not in HF Spaces with persistent storage)")
93
 
94
  # Fallback: check if ChromaDB exists at configured path (local development)
95
+ logger.info(f"Checking fallback path: {chroma_path}")
96
+ if os.path.exists(chroma_path):
97
+ if os.listdir(chroma_path):
98
+ logger.info(f"βœ… ChromaDB found at {chroma_path}")
99
+ return chroma_path
100
+ else:
101
+ logger.warning(f"ChromaDB directory exists but is empty: {chroma_path}")
102
+ else:
103
+ logger.warning(f"ChromaDB directory does not exist: {chroma_path}")
104
 
105
+ logger.warning(f"⚠️ Using default ChromaDB path: {chroma_path}")
106
+ logger.info("=== ChromaDB Setup Complete ===")
107
  return chroma_path
108
 
109
  def download_chromadb_from_s3(bucket: str, prefix: str, local_path: str) -> bool:
110
  """Download ChromaDB from S3"""
111
+ logger.info(f"Starting S3 download: s3://{bucket}/{prefix} -> {local_path}")
112
+
113
  try:
114
  import subprocess
115
  import os
116
 
117
  # Ensure target directory exists
118
+ logger.info(f"Creating target directory: {local_path}")
119
  os.makedirs(local_path, exist_ok=True)
120
 
121
  # Use AWS CLI to sync from S3 (no credentials needed for public buckets)
122
  s3_url = f"s3://{bucket}/{prefix}"
123
  cmd = ["aws", "s3", "sync", s3_url, local_path, "--no-sign-request"]
124
 
125
+ logger.info(f"Running AWS CLI command: {' '.join(cmd)}")
126
  result = subprocess.run(cmd, capture_output=True, text=True)
127
 
128
  if result.returncode == 0:
129
+ logger.info("βœ… ChromaDB download from S3 (AWS CLI) complete!")
130
+ if result.stdout:
131
+ logger.info(f"AWS CLI output: {result.stdout}")
132
  return True
133
  else:
134
+ logger.error(f"❌ AWS CLI failed with return code {result.returncode}")
135
+ logger.error(f"AWS CLI stderr: {result.stderr}")
136
+ if result.stdout:
137
+ logger.error(f"AWS CLI stdout: {result.stdout}")
138
  return False
139
 
140
  except FileNotFoundError:
141
+ logger.warning("❌ AWS CLI not found. Trying with boto3...")
142
  return download_chromadb_from_s3_boto3(bucket, prefix, local_path)
143
  except Exception as e:
144
+ logger.error(f"❌ Error downloading from S3: {e}")
145
  return False
146
 
147
  def download_chromadb_from_s3_boto3(bucket: str, prefix: str, local_path: str) -> bool:
 
152
  from botocore.config import Config
153
  import os
154
 
155
+ logger.info("πŸ“¦ Using boto3 for S3 download...")
156
 
157
  # Create S3 client with no credentials (for public buckets)
158
  s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
159
 
160
  # List objects in the S3 prefix
161
+ logger.info(f"Listing objects in s3://{bucket}/{prefix}")
162
  paginator = s3.get_paginator('list_objects_v2')
163
  pages = paginator.paginate(Bucket=bucket, Prefix=prefix)
164
 
 
180
  # Download file
181
  file_count += 1
182
  if file_count % 10 == 0:
183
+ logger.info(f"Downloaded {file_count} files...")
184
 
185
  s3.download_file(bucket, key, local_file_path)
186
 
187
+ logger.info(f"βœ… ChromaDB download from S3 (boto3) complete! Downloaded {file_count} files.")
188
  return True
189
 
190
  except ImportError:
191
+ logger.error("❌ boto3 not available. Please install: pip install boto3")
192
  return False
193
  except Exception as e:
194
+ logger.error(f"❌ Error downloading from S3 with boto3: {e}")
195
  return False
196
 
197
  # Embedding model configuration