Mikeplockhart commited on
Commit
a5c05cc
1 Parent(s): 90191bd

Update app.py

Browse files

langchain added

Files changed (1) hide show
  1. app.py +63 -84
app.py CHANGED
@@ -1,95 +1,74 @@
1
  import gradio as gr
2
- import jsonlines
3
- from sentence_transformers import CrossEncoder, SentenceTransformer
4
- import json
5
- from qdrant_client import QdrantClient
6
-
7
- print("Setup client")
8
- # chroma_client = chromadb.Client()
9
- # collection = chroma_client.create_collection(
10
- # name="food_collection",
11
- # metadata={"hnsw:space": "cosine"} # l2 is the default
12
- # )
13
- client = QdrantClient(":memory:")
14
-
15
- print("load data")
16
- with open("test_json.json", "r") as f:
17
- payload = json.load(f)
18
-
19
-
20
- def embedding_function(items_to_embed: list[str]):
21
- print("embedding")
22
- sentence_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
23
- embedded_items = sentence_model.encode(items_to_embed)
24
- print(len(embedded_items))
25
- print(type(embedded_items[0]))
26
- print(type(embedded_items[0][0]))
27
- embedded_list = [item.tolist() for item in embedded_items]
28
- print(len(embedded_list))
29
- print(type(embedded_list[0]))
30
- print(type(embedded_list[0][0]))
31
- return embedded_list
32
 
33
 
34
- print("upserting")
35
- print("printing item:")
36
- embedding = embedding_function([item["doc"] for item in payload])
37
- print(type(embedding))
38
- client.add(
39
- collection_name="food",
40
- documents=[item["doc"] for item in payload],
41
- # embeddings=embedding,
42
- metadata=[{"payload": item} for item in payload],
43
- ids=[idx for idx, _ in enumerate(payload)],
44
  )
 
45
 
 
 
 
46
 
47
- def search_chroma(query: str):
48
- results = client.query(
49
- # query_embeddings=embedding_function([query]),
50
- collection_name="food",
51
- query_text=query,
52
- limit=5,
53
- )
54
- # print(results[0])
55
- # print(results[0].QueryResponse.metadata)
56
- # instructions = ['\n'.join(item.metadata['payload']['instructions']) for item in results]
57
- # text_only= [f"# Title:\n{item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}" for item in results]
58
- top_k = [item.document for item in results]
59
- reranked = reranking_results(query, top_k)
60
-
61
- ordered_results = []
62
- for item in reranked:
63
- for result in results:
64
- if item["text"] == result.document:
65
- ordered_results.append(result)
66
-
67
- text_only = []
68
- for item in ordered_results:
69
- instructions = "- " + "<br>- ".join(item.metadata["payload"]["instructions"])
70
- markdown_text = f"# Dish: {item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}\n\n### Score: {item.score}\n"
71
- text_only.append(markdown_text)
72
- return "\n".join(text_only)
73
-
74
-
75
- def reranking_results(query: str, top_k_results: list[str]):
76
- # Load the model, here we use our base sized model
77
- rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
78
- reranked_results = rerank_model.rank(query, top_k_results, return_documents=True)
79
- return reranked_results
80
-
81
-
82
- def run_query(query_string: str):
83
- meal_string = search_chroma(query_string)
84
- return meal_string
85
 
 
 
 
 
 
 
86
 
87
- with gr.Blocks() as meal_search:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  gr.Markdown("Start typing below and then click **Run** to see the output.")
89
- with gr.Row():
90
- inp = gr.Textbox(placeholder="What sort of meal are you after?")
91
- out = gr.Markdown()
92
  btn = gr.Button("Run")
93
- btn.click(fn=run_query, inputs=inp, outputs=out)
94
 
95
- meal_search.launch()
 
1
  import gradio as gr
2
+ from langchain_community.document_loaders import JSONLoader
3
+ from langchain_community.vectorstores import Qdrant
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from sentence_transformers.cross_encoder import CrossEncoder
6
+
7
+ # loading data
8
+ json_path = "format_food.json"
9
+
10
+ def metadata_func(record: dict, metadata: dict) -> dict:
11
+ metadata["title"] = record.get("title")
12
+ metadata["cuisine"] = record.get("cuisine")
13
+ metadata["time"] = record.get("time")
14
+ metadata["instructions"] = record.get("instructions")
15
+ return metadata
16
+
17
+ def reranking_results(query, top_k_results, rerank_model):
18
+ # Load the model, here we use our base sized model
19
+ top_results_formatted = [f"{item.metadata['title']}, {item.page_content}" for item in top_k_results]
20
+ reranked_results = rerank_model.rank(query, top_results_formatted, return_documents=True)
21
+ return reranked_results
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
+ json_path = "format_food.json"
25
+ loader = JSONLoader(
26
+ file_path=json_path,
27
+ jq_schema='.dishes[].dish',
28
+ text_content=False,
29
+ content_key='doc',
30
+ metadata_func=metadata_func
 
 
 
31
  )
32
+ data = loader.load()
33
 
34
+ # Models
35
+ model_name = "Snowflake/snowflake-arctic-embed-xs"
36
+ # rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
37
 
38
+ # Embedding
39
+ model_kwargs = {"device": "cpu"}
40
+ encode_kwargs = {"normalize_embeddings": True}
41
+ hf_embedding = HuggingFaceEmbeddings(
42
+ model_name=model_name, encode_kwargs=encode_kwargs, model_kwargs=model_kwargs
43
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ qdrant = Qdrant.from_documents(
46
+ data,
47
+ hf_embedding,
48
+ location=":memory:", # Local mode with in-memory storage only
49
+ collection_name="my_documents",
50
+ )
51
 
52
+ def format_to_markdown(response_list):
53
+ response_list[0] = "- " + response_list[0]
54
+ temp_string = "\n- ".join(response_list)
55
+ return temp_string
56
+
57
+ def run_query(query):
58
+ print("Running Query")
59
+ answer = qdrant.similarity_search(query=query, k=10)
60
+ title_and_description = f"# Best Choice:\nA {answer[0].metadata['title']}: {answer[0].page_content}"
61
+ instructions = format_to_markdown(answer[0].metadata['instructions'])
62
+ recipe = f"# Cooking time:\n{answer[0].metadata['time']}\n\n# Recipe:\n{instructions}"
63
+ print("Returning query")
64
+ return title_and_description, recipe
65
+
66
+ with gr.Blocks() as demo:
67
  gr.Markdown("Start typing below and then click **Run** to see the output.")
68
+ inp = gr.Textbox(placeholder="What sort of meal are you after?")
69
+ title_output = gr.Markdown(label="Title and description")
70
+ instructions_output = gr.Markdown(label="Recipe")
71
  btn = gr.Button("Run")
72
+ btn.click(fn=run_query, inputs=inp, outputs=[title_output, instructions_output])
73
 
74
+ demo.launch()