Spaces:
Sleeping
Sleeping
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.document_loaders import TextLoader | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.document_loaders import PyPDFLoader | |
import glob | |
from dotenv import load_dotenv | |
# Load environment variables from .env file | |
load_dotenv() | |
# 1. Load all files | |
filepaths = glob.glob("ratelist_offers.pdf") # Adjust pattern if needed | |
all_documents = [] | |
for path in filepaths: | |
loader = PyPDFLoader(path) | |
docs = loader.load() | |
all_documents.extend(docs) | |
# 2. Chunk all documents | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=500, | |
chunk_overlap=100 | |
) | |
chunks = text_splitter.split_documents(all_documents) | |
# 3. Create embeddings | |
embeddings = OpenAIEmbeddings() | |
# 4. Store vectors in FAISS | |
faiss_index = FAISS.from_documents(chunks, embeddings) | |
faiss_index.save_local("faiss_index_store") |