chroma-demo / app.py
asoria's picture
asoria HF staff
Update app.py
9447ea8 verified
import gradio as gr
import chromadb
import pandas as pd
import json
client = chromadb.Client()
collection = client.create_collection("bolivian-recipes")
df = pd.read_parquet("hf://datasets/asoria/bolivian-recipes@~parquet/default/last/0000.parquet")
text_column = "preparation"
ids = [str(i) for i in range(df.shape[0])]
documents = df[text_column].to_list()
metadatas = df.drop(text_column, axis=1).to_dict("records")
collection.add(ids=ids, documents=documents, metadatas=metadatas)
with gr.Blocks() as demo:
gr.Markdown(" ## Chroma demo using datasets server parquet files")
gr.Markdown("Embedding parquet files from https://huggingface.co/datasets/asoria/bolivian-recipes ('preparation' column)")
query = gr.Textbox(label="query", placeholder="anticucho")
get_result_button = gr.Button("Submit")
cached_responses_table = gr.DataFrame()
def get_result(query) -> str:
result = collection.query(query_texts=[query], n_results=2)
ids = result["ids"][0]
distances = result["distances"][0]
metadatas = [json.dumps(data) for data in result["metadatas"][0]]
documents = result["documents"][0]
return {
cached_responses_table: gr.update(value=pd.DataFrame(data={"ids": ids, "distances":distances, "metadatas": metadatas, "documents":documents})),
}
get_result_button.click(get_result, inputs=query, outputs=[cached_responses_table])
if __name__ == "__main__":
demo.launch()