from langchain import PromptTemplate, OpenAI, LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

import chainlit as cl
from chainlit import user_session
import pandas as pd

persist_directory = "vector_db"

template = """Question: {question}

Answer: Let's think step by step."""

# Get processed data from a json file
# PRODUCTS_DATA = pd.read_json('data/bestbuy-dataset-products.json').sample(n=3).to_dict(orient='records')
PRODUCTS_DATA = []

@cl.on_chat_start
def main():
    # Instantiate the chain for that user session
    # prompt = PromptTemplate(template=template, input_variables=["question"])
    # llm_chain = LLMChain(prompt=prompt, llm=OpenAI(temperature=0), verbose=True)

    # Create a Chroma vector store
    embeddings = OpenAIEmbeddings(
        disallowed_special=(),
    )

    # products_data = [
    #     {"sku":43900, "name":"Duracell - AAA Batteries (4-Pack)","product_spec_in_natural_language":"Product with name: Duracell - AAA Batteries (4-Pack) belongs to multiple categories: Connected Home & Housewares, Housewares, Household Batteries.\n    Description of the product is following:\n    product desctiption: Compatible with select electronic devices; AAA size; DURALOCK Power Preserve technology; 4-pack.\n\n    Manufacturer of the product is Duracell and price is 5.49.\n  ", "url": "a.com"},
    #     {"sku":48530,"name":"Duracell - AA 1.5V CopperTop Batteries (4-Pack)","product_spec_in_natural_language":"Product with name: Duracell - AA 1.5V CopperTop Batteries (4-Pack) belongs to multiple categories: Connected Home & Housewares, Housewares, Household Batteries.\n    Description of the product is following:\n    product desctiption: Long-lasting energy; DURALOCK Power Preserve technology; for toys, clocks, radios, games, remotes, PDAs and more.\n\n    Manufacturer of the product is Duracell and price is 5.49.\n  ","url": "b.com"},
    #     {"sku":127687,"name":"Duracell - AA Batteries (8-Pack)","product_spec_in_natural_language":"Product with name: Duracell - AA Batteries (8-Pack) belongs to multiple categories: Connected Home & Housewares, Housewares, Household Batteries.\n    Description of the product is following:\n    product desctiption: Compatible with select electronic devices; AA size; DURALOCK Power Preserve technology; 8-pack.\n\n    Manufacturer of the product is Duracell and price is 7.49.\n  ","url": "c.com"}
    # ]
   
    # products_data = pd.read_json('data/bestbuy-dataset-products.json').to_dict(orient='records')
    PRODUCTS_DATA  = pd.read_json('data/bestbuy-dataset-products.json').to_dict(orient='records')
    text_splitter = RecursiveCharacterTextSplitter(
                    chunk_size = 1000,
                    chunk_overlap  = 20,
                    length_function = len,
                    )

    for item in PRODUCTS_DATA:
        product_summary_data = item["product_spec_in_natural_language"]
        docs = [
                Document(page_content=product_summary_data, 
                metadata={"source": item["sku"], "name": item["name"], "url": item['url'],  "image": item["image"]})
                ]

        documents = text_splitter.split_documents(docs)
        vectordb = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=persist_directory)
       
    vectordb.persist()

    # chroma_data_collection= {
    #     # embeddings=[[1.2, 2.3, 4.5], [6.7, 8.2, 9.2]],
    #     documents: [products_data[0]["product_spec_in_natural_language"], products_data[1]["product_spec_in_natural_language"], products_data[2]["product_spec_in_natural_language"]],
    #     metadatas: [{"source": "43900"}, {"source": "48530"}, {"source": "127687"}],
    #     ids: ["43900", "48530", "127687"]
    # }

    # vectordb = None

    # Create a chain that uses the Chroma vector store
    chain = RetrievalQAWithSourcesChain.from_chain_type(
        ChatOpenAI(
            model_name="gpt-3.5-turbo",
            temperature=0,
        ),
        chain_type="stuff",
        retriever=vectordb.as_retriever(),
        return_source_documents=True,
    )

    # Store the chain in the user session
    cl.user_session.set("llm_chain", chain)


@cl.on_message
async def main(message: str):
    # Retrieve the chain from the user session
    llm_chain = cl.user_session.get("llm_chain")  # type: LLMChain

    # Call the chain asynchronously
    res = await llm_chain.acall(message, callbacks=[cl.AsyncLangchainCallbackHandler()])

    # Do any post processing here
    print(res)
    answer = res["answer"]

    source_elements_dict = {}
    source_elements = []
    for idx, source in enumerate(res["source_documents"]):
        doc_id = source.metadata["source"]

        # Get data using unique id of a product, so that we don't have to save
        # unnecessary metadata in vecotor store
        # product_df = pd.DataFrame(PRODUCTS_DATA)
        # product = product_df.where(product_df['sku'] == f"{doc_id}")
        # print('########', f"{doc_id}")
        # print(product)

        if doc_id not in source_elements_dict:  
            source_elements_dict[doc_id] = {
                "url": source.metadata.get("url"),
                "name": source.metadata.get("name"),
                "image":  source.metadata.get("image"),
            }

    for key, values in source_elements_dict.items():
        # product_links = ", ".join([str(x) for x in links])
        text_for_source = f"Product url: {values['url']}\n"
        
        # if values["image"] is not None:
        #     source_elements.append(cl.Image(name="Image", display="inline", url=values["image"], size="small"))
       
        # source_elements.append(cl.Text(name=values["name"], content=text_for_source, display="inline"))
    
        source_elements = [
            # cl.Image(url=values["image"], name="image1", display="inline"),
            cl.Text(content=text_for_source, name=values["name"], display="inline"),
        ]

    not_found_indicators = ["not mentioned", "no mention", "not specified", "no information"]
    if any([text in answer.lower() for text in not_found_indicators]):
        # If product not found, do not show any product urls
        source_elements = []

    # This varies from chain to chain, you should check which key to read.
    await cl.Message(content=answer, elements=source_elements).send()