Spaces:
Sleeping
Sleeping
Mikeplockhart
commited on
Commit
•
a5c05cc
1
Parent(s):
90191bd
Update app.py
Browse fileslangchain added
app.py
CHANGED
@@ -1,95 +1,74 @@
|
|
1 |
import gradio as gr
|
2 |
-
import
|
3 |
-
from
|
4 |
-
import
|
5 |
-
from
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
sentence_model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
|
23 |
-
embedded_items = sentence_model.encode(items_to_embed)
|
24 |
-
print(len(embedded_items))
|
25 |
-
print(type(embedded_items[0]))
|
26 |
-
print(type(embedded_items[0][0]))
|
27 |
-
embedded_list = [item.tolist() for item in embedded_items]
|
28 |
-
print(len(embedded_list))
|
29 |
-
print(type(embedded_list[0]))
|
30 |
-
print(type(embedded_list[0][0]))
|
31 |
-
return embedded_list
|
32 |
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
# embeddings=embedding,
|
42 |
-
metadata=[{"payload": item} for item in payload],
|
43 |
-
ids=[idx for idx, _ in enumerate(payload)],
|
44 |
)
|
|
|
45 |
|
|
|
|
|
|
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
)
|
54 |
-
# print(results[0])
|
55 |
-
# print(results[0].QueryResponse.metadata)
|
56 |
-
# instructions = ['\n'.join(item.metadata['payload']['instructions']) for item in results]
|
57 |
-
# text_only= [f"# Title:\n{item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}" for item in results]
|
58 |
-
top_k = [item.document for item in results]
|
59 |
-
reranked = reranking_results(query, top_k)
|
60 |
-
|
61 |
-
ordered_results = []
|
62 |
-
for item in reranked:
|
63 |
-
for result in results:
|
64 |
-
if item["text"] == result.document:
|
65 |
-
ordered_results.append(result)
|
66 |
-
|
67 |
-
text_only = []
|
68 |
-
for item in ordered_results:
|
69 |
-
instructions = "- " + "<br>- ".join(item.metadata["payload"]["instructions"])
|
70 |
-
markdown_text = f"# Dish: {item.metadata['payload']['title']}\n\n## Description:\n{item.metadata['payload']['doc']}\n\n ## Instructions:\n{instructions}\n\n### Score: {item.score}\n"
|
71 |
-
text_only.append(markdown_text)
|
72 |
-
return "\n".join(text_only)
|
73 |
-
|
74 |
-
|
75 |
-
def reranking_results(query: str, top_k_results: list[str]):
|
76 |
-
# Load the model, here we use our base sized model
|
77 |
-
rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
|
78 |
-
reranked_results = rerank_model.rank(query, top_k_results, return_documents=True)
|
79 |
-
return reranked_results
|
80 |
-
|
81 |
-
|
82 |
-
def run_query(query_string: str):
|
83 |
-
meal_string = search_chroma(query_string)
|
84 |
-
return meal_string
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
gr.Markdown("Start typing below and then click **Run** to see the output.")
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
btn = gr.Button("Run")
|
93 |
-
btn.click(fn=run_query, inputs=inp, outputs=
|
94 |
|
95 |
-
|
|
|
1 |
import gradio as gr
|
2 |
+
from langchain_community.document_loaders import JSONLoader
|
3 |
+
from langchain_community.vectorstores import Qdrant
|
4 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
5 |
+
from sentence_transformers.cross_encoder import CrossEncoder
|
6 |
+
|
7 |
+
# loading data
|
8 |
+
json_path = "format_food.json"
|
9 |
+
|
10 |
+
def metadata_func(record: dict, metadata: dict) -> dict:
|
11 |
+
metadata["title"] = record.get("title")
|
12 |
+
metadata["cuisine"] = record.get("cuisine")
|
13 |
+
metadata["time"] = record.get("time")
|
14 |
+
metadata["instructions"] = record.get("instructions")
|
15 |
+
return metadata
|
16 |
+
|
17 |
+
def reranking_results(query, top_k_results, rerank_model):
|
18 |
+
# Load the model, here we use our base sized model
|
19 |
+
top_results_formatted = [f"{item.metadata['title']}, {item.page_content}" for item in top_k_results]
|
20 |
+
reranked_results = rerank_model.rank(query, top_results_formatted, return_documents=True)
|
21 |
+
return reranked_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
|
24 |
+
json_path = "format_food.json"
|
25 |
+
loader = JSONLoader(
|
26 |
+
file_path=json_path,
|
27 |
+
jq_schema='.dishes[].dish',
|
28 |
+
text_content=False,
|
29 |
+
content_key='doc',
|
30 |
+
metadata_func=metadata_func
|
|
|
|
|
|
|
31 |
)
|
32 |
+
data = loader.load()
|
33 |
|
34 |
+
# Models
|
35 |
+
model_name = "Snowflake/snowflake-arctic-embed-xs"
|
36 |
+
# rerank_model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")
|
37 |
|
38 |
+
# Embedding
|
39 |
+
model_kwargs = {"device": "cpu"}
|
40 |
+
encode_kwargs = {"normalize_embeddings": True}
|
41 |
+
hf_embedding = HuggingFaceEmbeddings(
|
42 |
+
model_name=model_name, encode_kwargs=encode_kwargs, model_kwargs=model_kwargs
|
43 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
qdrant = Qdrant.from_documents(
|
46 |
+
data,
|
47 |
+
hf_embedding,
|
48 |
+
location=":memory:", # Local mode with in-memory storage only
|
49 |
+
collection_name="my_documents",
|
50 |
+
)
|
51 |
|
52 |
+
def format_to_markdown(response_list):
|
53 |
+
response_list[0] = "- " + response_list[0]
|
54 |
+
temp_string = "\n- ".join(response_list)
|
55 |
+
return temp_string
|
56 |
+
|
57 |
+
def run_query(query):
|
58 |
+
print("Running Query")
|
59 |
+
answer = qdrant.similarity_search(query=query, k=10)
|
60 |
+
title_and_description = f"# Best Choice:\nA {answer[0].metadata['title']}: {answer[0].page_content}"
|
61 |
+
instructions = format_to_markdown(answer[0].metadata['instructions'])
|
62 |
+
recipe = f"# Cooking time:\n{answer[0].metadata['time']}\n\n# Recipe:\n{instructions}"
|
63 |
+
print("Returning query")
|
64 |
+
return title_and_description, recipe
|
65 |
+
|
66 |
+
with gr.Blocks() as demo:
|
67 |
gr.Markdown("Start typing below and then click **Run** to see the output.")
|
68 |
+
inp = gr.Textbox(placeholder="What sort of meal are you after?")
|
69 |
+
title_output = gr.Markdown(label="Title and description")
|
70 |
+
instructions_output = gr.Markdown(label="Recipe")
|
71 |
btn = gr.Button("Run")
|
72 |
+
btn.click(fn=run_query, inputs=inp, outputs=[title_output, instructions_output])
|
73 |
|
74 |
+
demo.launch()
|