SCR_Course_ChatBot / scripts /setup_vectorstore.py
MaryamKarimi080's picture
Update scripts/setup_vectorstore.py
0ee80e8 verified
raw
history blame contribute delete
917 Bytes
import pickle
from pathlib import Path
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
def main():
BASE_DIR = Path(__file__).resolve().parent.parent
CHUNKS_PATH = BASE_DIR / "output" / "chunks.pkl"
DB_DIR = BASE_DIR / "db"
DB_DIR.mkdir(parents=True, exist_ok=True) # <-- create the db folder if not exists
with open(CHUNKS_PATH, "rb") as f:
chunks = pickle.load(f)
embedding = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = Chroma(persist_directory=str(DB_DIR), embedding_function=embedding)
BATCH_SIZE = 100
print(f"🧠 Embedding and adding {len(chunks)} chunks in batches...")
for i in range(0, len(chunks), BATCH_SIZE):
batch = chunks[i:i + BATCH_SIZE]
vectorstore.add_documents(batch)
print(f"✅ Added batch {i // BATCH_SIZE + 1}")
if __name__ == "__main__":
main()