Spaces:
Runtime error
Runtime error
File size: 925 Bytes
8e70e09 fb75b53 8e70e09 fb75b53 8e70e09 fb75b53 8e70e09 fb75b53 8e70e09 fb75b53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
from datasets import Dataset, load_from_disk
import faiss
import numpy as np
from transformers import RagTokenizer, RagSequenceForGeneration
def create_and_save_faiss_index(dataset_path, index_path):
dataset = load_from_disk(dataset_path)
passages = dataset["text"]
tokenizer = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
model = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq")
passage_embeddings = model.get_encoder()(
tokenizer(passages, return_tensors="pt", padding=True, truncation=True)
).last_hidden_state.mean(dim=1).detach().numpy()
index = faiss.IndexFlatL2(passage_embeddings.shape[1])
index.add(passage_embeddings)
faiss.write_index(index, index_path)
if __name__ == "__main__":
dataset_path = "path/to/your/hf_dataset"
index_path = "path/to/your/hf_index"
create_and_save_faiss_index(dataset_path, index_path) |