File size: 931 Bytes
cf1b384
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from sentence_transformers import SentenceTransformer
import faiss
import os
import pickle

model = SentenceTransformer('all-MiniLM-L6-v2')

def load_documents():
    docs = []
    for filename in ["milindgatha.txt", "bhaktas.txt", "apologetics.txt", "poc_questions.txt", "satire_offerings.txt"]:
        if os.path.exists(filename):
            with open(filename, "r", encoding="utf-8") as f:
                text = f.read()
                chunks = text.split("\n\n")
                for chunk in chunks:
                    docs.append(chunk.strip())
    return docs

texts = load_documents()
embeddings = model.encode(texts, convert_to_numpy=True)

dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)
index.add(embeddings)

# Save FAISS index
faiss.write_index(index, "baba.index")

# Save texts list for retrieval
with open("texts.pkl", "wb") as f:
    pickle.dump(texts, f)

print("Index and texts saved.")