# importing necessary libraries
import os
import time
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from docx import Document
from docx.text.paragraph import Paragraph
from docx.table import Table
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferWindowMemory
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS


# load the environment variables into the python script
load_dotenv()
# fetching the openai_api_key environment variable
openai_api_key = os.getenv("OPENAI_API_KEY")


# Initialize session states
if "vectorDB" not in st.session_state:
    st.session_state.vectorDB = None
if "messages" not in st.session_state:
    st.session_state.messages = []
if "bot_name" not in st.session_state:
    st.session_state.bot_name = ""
if "chain" not in st.session_state:
    st.session_state.chain = None


def process_paragraph(paragraph):
    """This Function returns the content of the paragraph present inside the DOC file"""
    return paragraph.text


def process_table(table):
    """This function extracts the content from the table present inside the DOC file"""
    text = ""
    for row in table.rows:
        for cell in row.cells:
            text += cell.text

    return text


def read_docx(file_path):
    """This function extracts the text from the DOC file"""
    doc = Document(file_path)
    text = []

    for element in doc.iter_inner_content():
        if isinstance(element, Paragraph):
            text.append(process_paragraph(element))
        elif isinstance(element, Table):
            text.append(process_table(element))

    return " ".join(text)


def read_text_file(text_file):
    """This function extracts the text from the TEXT file"""
    try:
        text = text_file.read().decode("utf-8")
        return text

    except Exception as e:
        st.error(f"Error while reading {text_file.name} file : **{e}**")
        return None


def get_pdf_text(pdf):
    """This function extracts the text from the PDF file"""
    try:
        text = []
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text.append(page.extract_text())

        return " ".join(text)

    except Exception as e:
        st.error(f"Error while reading {pdf.name} file : **{e}**")
        return None


def get_vectorstore(text_chunks):
    """This function will create a vector database as well as create & store the embedding of the text chunks into the VectorDB"""
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore


def get_text_chunks(text: str):
    """This function will split the text into the smaller chunks"""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=50,
        length_function=len,
        is_separator_regex=False,
    )
    chunks = text_splitter.split_text(text)
    return chunks


def processing(files):
    """This function"""

    data = []
    for file in files:
        if file.name.endswith(".docx"):
            text = read_docx(file)

        elif file.name.endswith(".pdf"):
            text = get_pdf_text(file)

        else:
            text = read_text_file(file)

        data.append(text)

    raw_text = " ".join(data)

    # divinding the raw text into smaller chunks
    text_chunks = get_text_chunks(raw_text)

    # Creating and storing the chunks in vector database
    vectorDB = get_vectorstore(text_chunks)

    return vectorDB


def get_response(query: str):
    """This function will return the output of the user query!"""

    # getting the context from the database that is similar to the user query
    query_context = st.session_state.vectorDB.similarity_search(query=query)
    # calling the chain to get the output from the LLM
    response = st.session_state.chain.invoke(
        {
            "human_input": query,
            "context": query_context[0].page_content,
            "name": st.session_state.bot_name,
        }
    )["text"]
    # Iterate through each word in the 'response' string after splitting it based on whitespace
    for word in response.split():
        # Yield the current word followed by a space, effectively creating a generator
        yield word + " "

        # Pause execution for 0.05 seconds (50 milliseconds) to introduce a delay
        time.sleep(0.05)


def get_conversation_chain(vectorDB):
    """This function will create and return a LLM-Chain"""

    # using OPENAI ChatModel
    llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo-16k")

    # creating a template to pass into LLM
    template = """You are a friendly customer support ChatBot with a name: {name} for the company, aiming to enhance the customer experience by providing tailored assistance and information.
    Answer the question as detailed as possible and to the point from the context: {context}\n\n.
    If the answer is not in the provided context then only just say, "answer is not available in the context", do not provide the wrong answer\n\n
    {chat_history}
    Human: {human_input}
    AI: """

    # creating a prompt that is used to format the input of the user
    prompt = PromptTemplate(
        template=template,
        input_variables=["chat_history", "human_input", "name", "context"],
    )

    # creating a memory that will store the chat history between chatbot and user
    memory = ConversationBufferWindowMemory(
        memory_key="chat_history", input_key="human_input", k=5
    )

    chain = LLMChain(llm=llm, prompt=prompt, memory=memory, verbose=True)

    return chain


if __name__ == "__main__":
    # setting the config of WebPage
    st.set_page_config(page_title="Personalized ChatBot", page_icon="🤖")
    st.header("Personalized Customer Support Chatbot 🤖", divider="rainbow")

    # taking input( bot name and pdf file) from the user
    with st.sidebar:
        st.caption("Please enter the **Bot Name** and Upload **PDF** File!")

        bot_name = st.text_input(
            label="Bot Name", placeholder="Enter the bot name here....", key="bot_name"
        )

        files = st.file_uploader(
            label="Upload Files!",
            type=["pdf", "txt", "docx"],
            accept_multiple_files=True,
        )

        # moving forward only when both the inputs are given by the user
        if files and bot_name:
            # the Process File button will process the pdf file and save the chunks into the vector database
            if st.button("Process File"):
                # if there is existing chat history we will delete it
                if st.session_state.messages != []:
                    st.session_state.messages = []

                with st.spinner("Processing....."):
                    st.session_state["vectorDB"] = processing(files)
                    st.session_state["chain"] = get_conversation_chain(
                        st.session_state["vectorDB"]
                    )
                st.success("File Processed", icon="✅")

    # if the vector database is ready to use then only show the chatbot interface
    if st.session_state.vectorDB:
        # Display chat messages from history on app rerun
        for message in st.session_state.messages:
            with st.chat_message(message["role"]):
                st.write(message["content"])

        # taking the input i.e. query from the user (walrus operator)
        if prompt := st.chat_input(f"Message {st.session_state.bot_name}"):
            # Add user message to chat history
            st.session_state.messages.append({"role": "user", "content": prompt})
            # Display user message in chat message container
            with st.chat_message("user"):
                st.write(prompt)

            # Display assistant response in chat message container
            with st.chat_message("assistant"):
                response = st.write_stream(get_response(prompt))
            # Add assistant response to chat history
            st.session_state.messages.append({"role": "assistant", "content": response})