Spaces:
Running
Running
File size: 520 Bytes
696ae63 |
1 2 3 4 5 6 7 8 9 10 11 |
from sentence_transformers import SentenceTransformer
import faiss, json, glob, os, numpy as np
model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
texts=[]; vecs=[]
for f in glob.glob("nyc_ads_dataset/*.json"):
cap=json.load(open(f))["caption"]
texts.append(cap); vecs.append(model.encode(cap,normalize_embeddings=True))
vecs=np.vstack(vecs).astype("float32")
index=faiss.IndexFlatIP(vecs.shape[1]); index.add(vecs)
faiss.write_index(index,"prompt.index"); json.dump(texts,open("prompt.txt","w")) |