import duckdb from OLAP_Conn.OLAP_Connection import OLAP_Connection from sentence_transformers import util class DuckConn(OLAP_Connection): def __init__(self,path_duckdb="first_aid.duckdb"): super().__init__() self.path_duckdb = path_duckdb self.con = duckdb.connect(self.path_duckdb) def make_data_frame(self,data_,name): self.con.register(name, data_) self.con.execute("CREATE TABLE IF NOT EXISTS documents AS SELECT * FROM "+name) self.con.commit() def get_relevant_docs(self, embedded_query, top_k=3,limit=100): # Retrive docs docs = self.con.execute(f"SELECT * FROM documents;").fetchall() # Calcualte distance scored_docs = [] for page_content, embedding_doc in docs: score = util.cos_sim(embedded_query, embedding_doc) scored_docs.append((page_content, score)) # Sort Desc scored_docs.sort(key=lambda x: -x[1]) # Return top k result return [doc[0] for doc in scored_docs[:top_k]] def close(self): self.con.commit() self.con.close()