Spaces:

MuhammadNoman7600
/

derm-ai

Running

App Files Files Community

muhammadnoman76 commited on May 5

Commit

d2d11be

1 Parent(s): b3b4203

update

Browse files

Files changed (1) hide show

app/services/vector_database_search.py +28 -2

app/services/vector_database_search.py CHANGED Viewed

@@ -52,13 +52,17 @@ class VectorDatabaseSearch:
             split_docs = splitter.split_documents(docs)
             book_name = os.path.splitext(os.path.basename(pdf_path))[0]
             for doc in split_docs:
                 doc.metadata = {
                     "source": book_name,
                     "page": doc.metadata.get('page', 1),
                     "id": str(uuid.uuid4())
                 }
             self.vectorstore.add_documents(split_docs)
             print(f"Added {len(split_docs)} chunks from {book_name}")
             return True
@@ -87,13 +91,35 @@ class VectorDatabaseSearch:
     def get_book_info(self):
         """Retrieve list of unique book sources in the collection"""
         try:
             points = self.client.scroll(
                 collection_name=self.collection_name,
                 limit=1000,
-                with_payload=True
             )[0]
-            books = set(point.payload.get('source', '') for point in points if point.payload)
             return list(books)
         except Exception as e:
             print(f"Error retrieving book info: {e}")

             split_docs = splitter.split_documents(docs)
             book_name = os.path.splitext(os.path.basename(pdf_path))[0]
+            print(f"Processing {book_name} with {len(split_docs)} chunks")
             for doc in split_docs:
+                # Ensure metadata is stored in a consistent way
                 doc.metadata = {
                     "source": book_name,
                     "page": doc.metadata.get('page', 1),
                     "id": str(uuid.uuid4())
                 }
+            # Add documents to vector store
             self.vectorstore.add_documents(split_docs)
             print(f"Added {len(split_docs)} chunks from {book_name}")
             return True
     def get_book_info(self):
         """Retrieve list of unique book sources in the collection"""
         try:
+            # First check if the collection exists
+            collections = self.client.get_collections()
+            if not any(c.name == self.collection_name for c in collections.collections):
+                print(f"Collection {self.collection_name} does not exist yet")
+                return []
+            # Get all points with payload from the collection
             points = self.client.scroll(
                 collection_name=self.collection_name,
                 limit=1000,
+                with_payload=True,
+                with_vectors=False  # We don't need vector data
             )[0]
+            # Debug information
+            print(f"Retrieved {len(points)} points from collection")
+            # Extract unique book sources from payloads
+            books = set()
+            for point in points:
+                # Check if payload exists and has 'metadata' field with 'source'
+                if hasattr(point, 'payload') and point.payload:
+                    # Check different possible payload structures
+                    if 'metadata' in point.payload and 'source' in point.payload['metadata']:
+                        books.add(point.payload['metadata']['source'])
+                    elif 'source' in point.payload:
+                        books.add(point.payload['source'])
+            print(f"Found {len(books)} unique books")
             return list(books)
         except Exception as e:
             print(f"Error retrieving book info: {e}")