Mikeplockhart commited on
Commit
81d4c87
1 Parent(s): fa8a179

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -35
app.py CHANGED
@@ -5,25 +5,22 @@ import json
5
  from qdrant_client import QdrantClient
6
 
7
  print("Setup client")
8
- #chroma_client = chromadb.Client()
9
- #collection = chroma_client.create_collection(
10
- #name="food_collection",
11
- #metadata={"hnsw:space": "cosine"} # l2 is the default
12
- #)
13
  client = QdrantClient(":memory:")
14
 
15
  print("load data")
16
  with open("test_json.json", "r") as f:
17
  payload = json.load(f)
18
 
 
19
  def embedding_function(items_to_embed: list[str]):
20
  print("embedding")
21
- sentence_model = SentenceTransformer(
22
- "mixedbread-ai/mxbai-embed-large-v1"
23
- )
24
- embedded_items = sentence_model.encode(
25
- items_to_embed
26
- )
27
  print(len(embedded_items))
28
  print(type(embedded_items[0]))
29
  print(type(embedded_items[0][0]))
@@ -34,58 +31,65 @@ def embedding_function(items_to_embed: list[str]):
34
  return embedded_list
35
 
36
 
37
- print('upserting')
38
  print("printing item:")
39
- embedding = embedding_function([item['doc'] for item in payload])
40
  print(type(embedding))
41
  client.add(
42
  collection_name="food",
43
- documents=[item['doc'] for item in payload],
44
- #embeddings=embedding,
45
- metadata=[{'payload':item} for item in payload],
46
- ids=[idx for idx, _ in enumerate(payload)]
47
- )
 
48
 
49
- def search_chroma(query:str):
50
  results = client.query(
51
- #query_embeddings=embedding_function([query]),
52
  collection_name="food",
53
  query_text=query,
54
- limit=2
55
  )
56
- #print(results[0])
57
- #print(results[0].QueryResponse.metadata)
58
- #instructions = ['\n'.join(item.metadata['payload']['instructions']) for item in results]
59
- #text_only= [f"# Title:\n{item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}" for item in results]
 
 
 
 
 
 
 
 
 
60
  text_only = []
61
- for item in results:
62
- instructions = "- "+'<br>- '.join(item.metadata['payload']['instructions'])
63
- markdown_text = f"# Title:\n{item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}"
64
  text_only.append(markdown_text)
65
- print(text_only)
66
  return "\n".join(text_only)
67
 
 
68
  def reranking_results(query: str, top_k_results: list[str]):
69
  # Load the model, here we use our base sized model
70
  rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
71
  reranked_results = rerank_model.rank(query, top_k_results, return_documents=True)
72
  return reranked_results
73
 
 
74
  def run_query(query_string: str):
75
  meal_string = search_chroma(query_string)
76
  return meal_string
77
 
 
78
  with gr.Blocks() as meal_search:
79
  gr.Markdown("Start typing below and then click **Run** to see the output.")
80
  with gr.Row():
81
  inp = gr.Textbox(placeholder="What sort of meal are you after?")
82
  out = gr.Markdown()
83
  btn = gr.Button("Run")
84
- btn.click(
85
- fn=run_query,
86
- inputs=inp,
87
- outputs=out
88
- )
89
 
90
  meal_search.launch()
91
-
 
5
  from qdrant_client import QdrantClient
6
 
7
  print("Setup client")
8
+ # chroma_client = chromadb.Client()
9
+ # collection = chroma_client.create_collection(
10
+ # name="food_collection",
11
+ # metadata={"hnsw:space": "cosine"} # l2 is the default
12
+ # )
13
  client = QdrantClient(":memory:")
14
 
15
  print("load data")
16
  with open("test_json.json", "r") as f:
17
  payload = json.load(f)
18
 
19
+
20
  def embedding_function(items_to_embed: list[str]):
21
  print("embedding")
22
+ sentence_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
23
+ embedded_items = sentence_model.encode(items_to_embed)
 
 
 
 
24
  print(len(embedded_items))
25
  print(type(embedded_items[0]))
26
  print(type(embedded_items[0][0]))
 
31
  return embedded_list
32
 
33
 
34
+ print("upserting")
35
  print("printing item:")
36
+ embedding = embedding_function([item["doc"] for item in payload])
37
  print(type(embedding))
38
  client.add(
39
  collection_name="food",
40
+ documents=[item["doc"] for item in payload],
41
+ # embeddings=embedding,
42
+ metadata=[{"payload": item} for item in payload],
43
+ ids=[idx for idx, _ in enumerate(payload)],
44
+ )
45
+
46
 
47
+ def search_chroma(query: str):
48
  results = client.query(
49
+ # query_embeddings=embedding_function([query]),
50
  collection_name="food",
51
  query_text=query,
52
+ limit=5,
53
  )
54
+ # print(results[0])
55
+ # print(results[0].QueryResponse.metadata)
56
+ # instructions = ['\n'.join(item.metadata['payload']['instructions']) for item in results]
57
+ # text_only= [f"# Title:\n{item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}" for item in results]
58
+ top_k = [item.document for item in results]
59
+ reranked = reranking_results(query, top_k)
60
+
61
+ ordered_results = []
62
+ for item in reranked:
63
+ for result in results:
64
+ if item["text"] == result.document:
65
+ ordered_results.append(result)
66
+
67
  text_only = []
68
+ for item in ordered_results:
69
+ instructions = "- " + "<br>- ".join(item.metadata["payload"]["instructions"])
70
+ markdown_text = f"# Dish: {item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}\n\n### Score: {item.score}\n"
71
  text_only.append(markdown_text)
 
72
  return "\n".join(text_only)
73
 
74
+
75
  def reranking_results(query: str, top_k_results: list[str]):
76
  # Load the model, here we use our base sized model
77
  rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
78
  reranked_results = rerank_model.rank(query, top_k_results, return_documents=True)
79
  return reranked_results
80
 
81
+
82
  def run_query(query_string: str):
83
  meal_string = search_chroma(query_string)
84
  return meal_string
85
 
86
+
87
  with gr.Blocks() as meal_search:
88
  gr.Markdown("Start typing below and then click **Run** to see the output.")
89
  with gr.Row():
90
  inp = gr.Textbox(placeholder="What sort of meal are you after?")
91
  out = gr.Markdown()
92
  btn = gr.Button("Run")
93
+ btn.click(fn=run_query, inputs=inp, outputs=out)
 
 
 
 
94
 
95
  meal_search.launch()