Spaces:
Running
Running
import duckdb | |
from OLAP_Conn.OLAP_Connection import OLAP_Connection | |
from sentence_transformers import util | |
class DuckConn(OLAP_Connection): | |
def __init__(self,path_duckdb="first_aid.duckdb"): | |
super().__init__() | |
self.path_duckdb = path_duckdb | |
self.con = duckdb.connect(self.path_duckdb) | |
def make_data_frame(self,data_,name): | |
self.con.register(name, data_) | |
self.con.execute("CREATE TABLE IF NOT EXISTS documents AS SELECT * FROM "+name) | |
self.con.commit() | |
def get_relevant_docs(self, embedded_query, top_k=3,limit=100): | |
# Retrive docs | |
docs = self.con.execute(f"SELECT * FROM documents;").fetchall() | |
# Calcualte distance | |
scored_docs = [] | |
for page_content, embedding_doc in docs: | |
score = util.cos_sim(embedded_query, embedding_doc) | |
scored_docs.append((page_content, score)) | |
# Sort Desc | |
scored_docs.sort(key=lambda x: -x[1]) | |
# Return top k result | |
return [doc[0] for doc in scored_docs[:top_k]] | |
def close(self): | |
self.con.commit() | |
self.con.close() | |