Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import chromadb | |
| import pandas as pd | |
| import json | |
| client = chromadb.Client() | |
| collection = client.create_collection("bolivian-recipes") | |
| df = pd.read_parquet("hf://datasets/asoria/bolivian-recipes@~parquet/default/last/0000.parquet") | |
| text_column = "preparation" | |
| ids = [str(i) for i in range(df.shape[0])] | |
| documents = df[text_column].to_list() | |
| metadatas = df.drop(text_column, axis=1).to_dict("records") | |
| collection.add(ids=ids, documents=documents, metadatas=metadatas) | |
| with gr.Blocks() as demo: | |
| gr.Markdown(" ## Chroma demo using datasets server parquet files") | |
| gr.Markdown("Embedding parquet files from https://huggingface.co/datasets/asoria/bolivian-recipes ('preparation' column)") | |
| query = gr.Textbox(label="query", placeholder="anticucho") | |
| get_result_button = gr.Button("Submit") | |
| cached_responses_table = gr.DataFrame() | |
| def get_result(query) -> str: | |
| result = collection.query(query_texts=[query], n_results=2) | |
| ids = result["ids"][0] | |
| distances = result["distances"][0] | |
| metadatas = [json.dumps(data) for data in result["metadatas"][0]] | |
| documents = result["documents"][0] | |
| return { | |
| cached_responses_table: gr.update(value=pd.DataFrame(data={"ids": ids, "distances":distances, "metadatas": metadatas, "documents":documents})), | |
| } | |
| get_result_button.click(get_result, inputs=query, outputs=[cached_responses_table]) | |
| if __name__ == "__main__": | |
| demo.launch() | |