#build retriever on supabase #create project, table, indexes, and functions #create client with url and key #insert data with embedding # # Load metadata.jsonl import json import os from dotenv import load_dotenv from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import SupabaseVectorStore from supabase.client import Client, create_client from langchain.schema import Document # Load the metadata.jsonl file with open('metadata.jsonl', 'r') as jsonl_file: json_list = list(jsonl_file) json_QA = [] for json_str in json_list: json_data = json.loads(json_str) json_QA.append(json_data) ### build a vector database based on the metadata.jsonl # https://python.langchain.com/docs/integrations/vectorstores/supabase/ load_dotenv() embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") # dim=768 supabase_url = os.environ.get("SUPABASE_URL") supabase_key = os.environ.get("SUPABASE_SERVICE_KEY") supabase: Client = create_client(supabase_url, supabase_key) # wrap the metadata.jsonl's questions and answers into a list of document docs = [] for sample in json_QA: content = f"Question : {sample['Question']}\n\nFinal answer : {sample['Final answer']}" doc = { "content" : content, "metadata" : { # meatadata的格式必须时source键,否则会报错 "source" : sample['task_id'] }, "embedding" : embeddings.embed_query(content), } docs.append(doc) table_name = os.environ.get('TABLE_NAME') # upload the documents to the vector database try: response = ( supabase.table("documents") .insert(docs) .execute() ) except Exception as exception: print("Error inserting data into Supabase:", exception)