Spaces:
Sleeping
Sleeping
File size: 1,166 Bytes
2ebf9ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import duckdb
from OLAP_Conn.OLAP_Connection import OLAP_Connection
from sentence_transformers import util
class DuckConn(OLAP_Connection):
def __init__(self,path_duckdb="first_aid.duckdb"):
super().__init__()
self.path_duckdb = path_duckdb
self.con = duckdb.connect(self.path_duckdb)
def make_data_frame(self,data_,name):
self.con.register(name, data_)
self.con.execute("CREATE TABLE IF NOT EXISTS documents AS SELECT * FROM "+name)
self.con.commit()
def get_relevant_docs(self, embedded_query, top_k=3,limit=100):
# Retrive docs
docs = self.con.execute(f"SELECT * FROM documents;").fetchall()
# Calcualte distance
scored_docs = []
for page_content, embedding_doc in docs:
score = util.cos_sim(embedded_query, embedding_doc)
scored_docs.append((page_content, score))
# Sort Desc
scored_docs.sort(key=lambda x: -x[1])
# Return top k result
return [doc[0] for doc in scored_docs[:top_k]]
def close(self):
self.con.commit()
self.con.close()
|