from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import TextLoader from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.document_loaders import PyPDFLoader import glob from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # 1. Load all files filepaths = glob.glob("ratelist_offers.pdf") # Adjust pattern if needed all_documents = [] for path in filepaths: loader = PyPDFLoader(path) docs = loader.load() all_documents.extend(docs) # 2. Chunk all documents text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=100 ) chunks = text_splitter.split_documents(all_documents) # 3. Create embeddings embeddings = OpenAIEmbeddings() # 4. Store vectors in FAISS faiss_index = FAISS.from_documents(chunks, embeddings) faiss_index.save_local("faiss_index_store")