Spaces:
Build error
Build error
| import sys | |
| from langchain.vectorstores import FAISS | |
| from pathlib import Path | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.embeddings import OpenAIEmbeddings | |
| import pickle | |
| import faiss | |
| def train(files_path): | |
| # trainingData = list(Path("training/facts/").glob("**/*.*")) | |
| trainingData = list(Path(files_path).glob("**/*.*")) | |
| if len(trainingData) < 1: | |
| print("The folder training/facts should be populated with at least one .txt or .md file.", file=sys.stderr) | |
| return | |
| data = [] | |
| for training in trainingData: | |
| with open(training, "r", encoding='utf-8') as f: | |
| print(f"Add {f.name} to dataset") | |
| data.append(f.read()) | |
| textSplitter = CharacterTextSplitter(chunk_size=1000, separator="\n", chunk_overlap=0) | |
| docs = [] | |
| for sets in data: | |
| docs.extend(textSplitter.split_text(sets)) | |
| store1 = FAISS.from_texts(docs, OpenAIEmbeddings()) | |
| faiss.write_index(store1.index, "after_training/training.index") | |
| store1.index = None | |
| with open("after_training/faiss.pkl", "wb") as f: | |
| pickle.dump(store1, f) | |
| return "训练完成" | |