Spaces:
Build error
Build error
File size: 1,188 Bytes
6c1e91d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import sys
from langchain.vectorstores import FAISS
from pathlib import Path
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
import pickle
import faiss
def train(files_path):
# trainingData = list(Path("training/facts/").glob("**/*.*"))
trainingData = list(Path(files_path).glob("**/*.*"))
if len(trainingData) < 1:
print("The folder training/facts should be populated with at least one .txt or .md file.", file=sys.stderr)
return
data = []
for training in trainingData:
with open(training, "r", encoding='utf-8') as f:
print(f"Add {f.name} to dataset")
data.append(f.read())
textSplitter = CharacterTextSplitter(chunk_size=1000, separator="\n", chunk_overlap=0)
docs = []
for sets in data:
docs.extend(textSplitter.split_text(sets))
store1 = FAISS.from_texts(docs, OpenAIEmbeddings())
faiss.write_index(store1.index, "after_training/training.index")
store1.index = None
with open("after_training/faiss.pkl", "wb") as f:
pickle.dump(store1, f)
return "训练完成"
|