Spaces:
Running
Running
File size: 1,746 Bytes
8828f20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# Running this script to split FAISS index collection to the second/different cluster.
from pymongo import MongoClient
from dotenv import load_dotenv
import os
# Load environment variables from .env
load_dotenv()
# Connection strings (update as needed)
mongo_uri = os.getenv("MONGO_URI") # QA cluster connection string
index_uri = os.getenv("INDEX_URI") # FAISS index cluster connection string
if not mongo_uri:
raise ValueError("MONGO_URI is missing!")
if not index_uri:
raise ValueError("INDEX_URI is missing!")
# Connect to the QA cluster (where FAISS data was accidentally stored)
qa_client = MongoClient(mongo_uri)
qa_db = qa_client["MedicalChatbotDB"]
# Connect to the FAISS index cluster
faiss_client = MongoClient(index_uri)
faiss_db = faiss_client["MedicalChatbotDB"] # Use the same database name if desired
# Define the GridFS collections to move.
# In GridFS, files are stored in two collections: "<bucket>.files" and "<bucket>.chunks".
source_files = qa_db["faiss_index_files.files"]
source_chunks = qa_db["faiss_index_files.chunks"]
dest_files = faiss_db["faiss_index_files.files"]
dest_chunks = faiss_db["faiss_index_files.chunks"]
print("Moving FAISS index GridFS files...")
# Copy documents from the source 'files' collection
for doc in source_files.find():
dest_files.insert_one(doc)
# Copy documents from the source 'chunks' collection
for doc in source_chunks.find():
dest_chunks.insert_one(doc)
print("✅ FAISS GridFS collections moved successfully.")
# Optionally, drop the old collections from the QA cluster to free up space:
qa_db.drop_collection("faiss_index_files.files")
qa_db.drop_collection("faiss_index_files.chunks")
print("Old FAISS GridFS collections dropped from the QA cluster.")
|