all_the_food_public

Sleeping

App Files Files Community

Mikeplockhart commited on Apr 21, 2024

Commit

a5c05cc

verified ·

1 Parent(s): 90191bd

Update app.py

Browse files

langchain added

Files changed (1) hide show

app.py +63 -84

app.py CHANGED Viewed

@@ -1,95 +1,74 @@
 import gradio as gr
-import jsonlines
-from sentence_transformers import CrossEncoder, SentenceTransformer
-import json
-from qdrant_client import QdrantClient
-print("Setup client")
-# chroma_client = chromadb.Client()
-# collection = chroma_client.create_collection(
-# name="food_collection",
-# metadata={"hnsw:space": "cosine"} # l2 is the default
-# )
-client = QdrantClient(":memory:")
-print("load data")
-with open("test_json.json", "r") as f:
-    payload = json.load(f)
-def embedding_function(items_to_embed: list[str]):
-    print("embedding")
-    sentence_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
-    embedded_items = sentence_model.encode(items_to_embed)
-    print(len(embedded_items))
-    print(type(embedded_items[0]))
-    print(type(embedded_items[0][0]))
-    embedded_list = [item.tolist() for item in embedded_items]
-    print(len(embedded_list))
-    print(type(embedded_list[0]))
-    print(type(embedded_list[0][0]))
-    return embedded_list
-print("upserting")
-print("printing item:")
-embedding = embedding_function([item["doc"] for item in payload])
-print(type(embedding))
-client.add(
-    collection_name="food",
-    documents=[item["doc"] for item in payload],
-    # embeddings=embedding,
-    metadata=[{"payload": item} for item in payload],
-    ids=[idx for idx, _ in enumerate(payload)],
 )
-def search_chroma(query: str):
-    results = client.query(
-        # query_embeddings=embedding_function([query]),
-        collection_name="food",
-        query_text=query,
-        limit=5,
-    )
-    # print(results[0])
-    # print(results[0].QueryResponse.metadata)
-    # instructions = ['\n'.join(item.metadata['payload']['instructions']) for item in results]
-    # text_only= [f"# Title:\n{item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}" for item in results]
-    top_k = [item.document for item in results]
-    reranked = reranking_results(query, top_k)
-    ordered_results = []
-    for item in reranked:
-        for result in results:
-            if item["text"] == result.document:
-                ordered_results.append(result)
-    text_only = []
-    for item in ordered_results:
-        instructions = "- " + "<br>- ".join(item.metadata["payload"]["instructions"])
-        markdown_text = f"# Dish: {item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}\n\n### Score: {item.score}\n"
-        text_only.append(markdown_text)
-    return "\n".join(text_only)
-def reranking_results(query: str, top_k_results: list[str]):
-    # Load the model, here we use our base sized model
-    rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
-    reranked_results = rerank_model.rank(query, top_k_results, return_documents=True)
-    return reranked_results
-def run_query(query_string: str):
-    meal_string = search_chroma(query_string)
-    return meal_string
-with gr.Blocks() as meal_search:
     gr.Markdown("Start typing below and then click **Run** to see the output.")
-    with gr.Row():
-        inp = gr.Textbox(placeholder="What sort of meal are you after?")
-        out = gr.Markdown()
     btn = gr.Button("Run")
-    btn.click(fn=run_query, inputs=inp, outputs=out)
-meal_search.launch()

 import gradio as gr
+from langchain_community.document_loaders import JSONLoader
+from langchain_community.vectorstores import Qdrant
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from sentence_transformers.cross_encoder import CrossEncoder
+# loading data
+json_path = "format_food.json"
+def metadata_func(record: dict, metadata: dict) -> dict:
+    metadata["title"] = record.get("title")
+    metadata["cuisine"] = record.get("cuisine")
+    metadata["time"] = record.get("time")
+    metadata["instructions"] = record.get("instructions")
+    return metadata
+def reranking_results(query, top_k_results, rerank_model):
+    # Load the model, here we use our base sized model
+    top_results_formatted = [f"{item.metadata['title']}, {item.page_content}" for item in top_k_results]
+    reranked_results = rerank_model.rank(query, top_results_formatted, return_documents=True)
+    return reranked_results
+json_path = "format_food.json"
+loader = JSONLoader(
+    file_path=json_path,
+    jq_schema='.dishes[].dish',
+    text_content=False,
+    content_key='doc',
+    metadata_func=metadata_func
 )
+data = loader.load()
+# Models
+model_name = "Snowflake/snowflake-arctic-embed-xs"
+# rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
+# Embedding
+model_kwargs = {"device": "cpu"}
+encode_kwargs = {"normalize_embeddings": True}
+hf_embedding = HuggingFaceEmbeddings(
+    model_name=model_name, encode_kwargs=encode_kwargs, model_kwargs=model_kwargs
+)
+qdrant = Qdrant.from_documents(
+    data,
+    hf_embedding,
+    location=":memory:",  # Local mode with in-memory storage only
+    collection_name="my_documents",
+)
+def format_to_markdown(response_list):
+    response_list[0] = "- " + response_list[0]
+    temp_string = "\n- ".join(response_list)
+    return temp_string
+def run_query(query):
+    print("Running Query")
+    answer = qdrant.similarity_search(query=query, k=10)
+    title_and_description = f"# Best Choice:\nA {answer[0].metadata['title']}: {answer[0].page_content}"
+    instructions = format_to_markdown(answer[0].metadata['instructions'])
+    recipe = f"# Cooking time:\n{answer[0].metadata['time']}\n\n# Recipe:\n{instructions}"
+    print("Returning query")
+    return title_and_description, recipe
+with gr.Blocks() as demo:
     gr.Markdown("Start typing below and then click **Run** to see the output.")
+    inp = gr.Textbox(placeholder="What sort of meal are you after?")
+    title_output = gr.Markdown(label="Title and description")
+    instructions_output = gr.Markdown(label="Recipe")
     btn = gr.Button("Run")
+    btn.click(fn=run_query, inputs=inp, outputs=[title_output, instructions_output])
+demo.launch()