File size: 1,166 Bytes
2ebf9ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import duckdb
from OLAP_Conn.OLAP_Connection import OLAP_Connection
from sentence_transformers import util

class DuckConn(OLAP_Connection):
    def __init__(self,path_duckdb="first_aid.duckdb"):
        super().__init__()
        self.path_duckdb = path_duckdb
        self.con = duckdb.connect(self.path_duckdb)
        

    def make_data_frame(self,data_,name):
        self.con.register(name, data_)
        self.con.execute("CREATE TABLE IF NOT EXISTS documents AS SELECT * FROM "+name)
        self.con.commit()


    def get_relevant_docs(self, embedded_query, top_k=3,limit=100):
        # Retrive docs
        docs = self.con.execute(f"SELECT * FROM documents;").fetchall()

      
        # Calcualte distance
        scored_docs = []
        for page_content, embedding_doc in docs:
            score = util.cos_sim(embedded_query, embedding_doc)
            scored_docs.append((page_content, score))
            
        #  Sort Desc
        scored_docs.sort(key=lambda x: -x[1])

        

        # Return top k result
        return [doc[0] for doc in scored_docs[:top_k]]

    def close(self):
          self.con.commit()
          self.con.close()