eshop-chat / app.py
jitendra.kasaudhan
Add product url instead of image
207907e
raw
history blame contribute delete
No virus
6.68 kB
from langchain import PromptTemplate, OpenAI, LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
import chainlit as cl
from chainlit import user_session
import pandas as pd
persist_directory = "vector_db"
template = """Question: {question}
Answer: Let's think step by step."""
# Get processed data from a json file
# PRODUCTS_DATA = pd.read_json('data/bestbuy-dataset-products.json').sample(n=3).to_dict(orient='records')
PRODUCTS_DATA = []
@cl.on_chat_start
def main():
# Instantiate the chain for that user session
# prompt = PromptTemplate(template=template, input_variables=["question"])
# llm_chain = LLMChain(prompt=prompt, llm=OpenAI(temperature=0), verbose=True)
# Create a Chroma vector store
embeddings = OpenAIEmbeddings(
disallowed_special=(),
)
# products_data = [
# {"sku":43900, "name":"Duracell - AAA Batteries (4-Pack)","product_spec_in_natural_language":"Product with name: Duracell - AAA Batteries (4-Pack) belongs to multiple categories: Connected Home & Housewares, Housewares, Household Batteries.\n Description of the product is following:\n product desctiption: Compatible with select electronic devices; AAA size; DURALOCK Power Preserve technology; 4-pack.\n\n Manufacturer of the product is Duracell and price is 5.49.\n ", "url": "a.com"},
# {"sku":48530,"name":"Duracell - AA 1.5V CopperTop Batteries (4-Pack)","product_spec_in_natural_language":"Product with name: Duracell - AA 1.5V CopperTop Batteries (4-Pack) belongs to multiple categories: Connected Home & Housewares, Housewares, Household Batteries.\n Description of the product is following:\n product desctiption: Long-lasting energy; DURALOCK Power Preserve technology; for toys, clocks, radios, games, remotes, PDAs and more.\n\n Manufacturer of the product is Duracell and price is 5.49.\n ","url": "b.com"},
# {"sku":127687,"name":"Duracell - AA Batteries (8-Pack)","product_spec_in_natural_language":"Product with name: Duracell - AA Batteries (8-Pack) belongs to multiple categories: Connected Home & Housewares, Housewares, Household Batteries.\n Description of the product is following:\n product desctiption: Compatible with select electronic devices; AA size; DURALOCK Power Preserve technology; 8-pack.\n\n Manufacturer of the product is Duracell and price is 7.49.\n ","url": "c.com"}
# ]
# products_data = pd.read_json('data/bestbuy-dataset-products.json').to_dict(orient='records')
PRODUCTS_DATA = pd.read_json('data/bestbuy-dataset-products.json').to_dict(orient='records')
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 20,
length_function = len,
)
for item in PRODUCTS_DATA:
product_summary_data = item["product_spec_in_natural_language"]
docs = [
Document(page_content=product_summary_data,
metadata={"source": item["sku"], "name": item["name"], "url": item['url'], "image": item["image"]})
]
documents = text_splitter.split_documents(docs)
vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory)
vectordb.persist()
# chroma_data_collection= {
# # embeddings=[[1.2, 2.3, 4.5], [6.7, 8.2, 9.2]],
# documents: [products_data[0]["product_spec_in_natural_language"], products_data[1]["product_spec_in_natural_language"], products_data[2]["product_spec_in_natural_language"]],
# metadatas: [{"source": "43900"}, {"source": "48530"}, {"source": "127687"}],
# ids: ["43900", "48530", "127687"]
# }
# vectordb = None
# Create a chain that uses the Chroma vector store
chain = RetrievalQAWithSourcesChain.from_chain_type(
ChatOpenAI(
model_name="gpt-3.5-turbo",
temperature=0,
),
chain_type="stuff",
retriever=vectordb.as_retriever(),
return_source_documents=True,
)
# Store the chain in the user session
cl.user_session.set("llm_chain", chain)
@cl.on_message
async def main(message: str):
# Retrieve the chain from the user session
llm_chain = cl.user_session.get("llm_chain") # type: LLMChain
# Call the chain asynchronously
res = await llm_chain.acall(message, callbacks=[cl.AsyncLangchainCallbackHandler()])
# Do any post processing here
print(res)
answer = res["answer"]
source_elements_dict = {}
source_elements = []
for idx, source in enumerate(res["source_documents"]):
doc_id = source.metadata["source"]
# Get data using unique id of a product, so that we don't have to save
# unnecessary metadata in vecotor store
# product_df = pd.DataFrame(PRODUCTS_DATA)
# product = product_df.where(product_df['sku'] == f"{doc_id}")
# print('########', f"{doc_id}")
# print(product)
if doc_id not in source_elements_dict:
source_elements_dict[doc_id] = {
"url": source.metadata.get("url"),
"name": source.metadata.get("name"),
"image": source.metadata.get("image"),
}
for key, values in source_elements_dict.items():
# product_links = ", ".join([str(x) for x in links])
text_for_source = f"Product url: {values['url']}\n"
# if values["image"] is not None:
# source_elements.append(cl.Image(name="Image", display="inline", url=values["image"], size="small"))
# source_elements.append(cl.Text(name=values["name"], content=text_for_source, display="inline"))
source_elements = [
# cl.Image(url=values["image"], name="image1", display="inline"),
cl.Text(content=text_for_source, name=values["name"], display="inline"),
]
not_found_indicators = ["not mentioned", "no mention", "not specified", "no information"]
if any([text in answer.lower() for text in not_found_indicators]):
# If product not found, do not show any product urls
source_elements = []
# This varies from chain to chain, you should check which key to read.
await cl.Message(content=answer, elements=source_elements).send()