Spaces:
Runtime error
Runtime error
File size: 1,789 Bytes
75703a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
#build retriever on supabase
#create project, table, indexes, and functions
#create client with url and key
#insert data with embedding
#
# Load metadata.jsonl
import json
import os
from dotenv import load_dotenv
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import SupabaseVectorStore
from supabase.client import Client, create_client
from langchain.schema import Document
# Load the metadata.jsonl file
with open('metadata.jsonl', 'r') as jsonl_file:
json_list = list(jsonl_file)
json_QA = []
for json_str in json_list:
json_data = json.loads(json_str)
json_QA.append(json_data)
### build a vector database based on the metadata.jsonl
# https://python.langchain.com/docs/integrations/vectorstores/supabase/
load_dotenv()
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # dim=768
supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_SERVICE_KEY")
supabase: Client = create_client(supabase_url, supabase_key)
# wrap the metadata.jsonl's questions and answers into a list of document
docs = []
for sample in json_QA:
content = f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}"
doc = {
"content" : content,
"metadata" : { # meatadata็ๆ ผๅผๅฟ
้กปๆถsource้ฎ๏ผๅฆๅไผๆฅ้
"source" : sample['task_id']
},
"embedding" : embeddings.embed_query(content),
}
docs.append(doc)
table_name = os.environ.get('TABLE_NAME')
# upload the documents to the vector database
try:
response = (
supabase.table("documents")
.insert(docs)
.execute()
)
except Exception as exception:
print("Error inserting data into Supabase:", exception) |