from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
from langchain_pinecone import PineconeVectorStore
from gradio_client import Client
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
import os
from dotenv import load_dotenv
from pinecone import Pinecone, ServerlessSpec


load_dotenv()
HF_TOKEN = os.environ["HF_TOKEN"]
PINECONE_API_KEY = os.environ["PINECONE_API_KEY"]


# Initialisation de Pinecone et du Modèle d'Embeddings

# index_name = "db"
pc = Pinecone(
    api_key=PINECONE_API_KEY
)  # initialise une instance de Pinecone avec la clé API.

embedder = HuggingFaceInferenceAPIEmbeddings(  # initialise un modèle d'embeddings
    api_key=HF_TOKEN,
    model_name="mixedbread-ai/mxbai-embed-large-v1",
)

index = "db"


# users = {
#     "aymen": "admin",
#     "amin": "root",
#     "nour":"admin"
#     # Add more users as needed
# }


# Funtion that loads data from url and pdf
# returns a list
def load_data(url=None, description=None, pdf=None):
    data = []
    if url != None:
        try:
            loader = WebBaseLoader(
                url, encoding="utf-8"
            )  # WebBaseLoader: charge et extrait le contenu textuel d'une page web
            loaded = loader.load()
            for page in loaded : 
                data.append(page.page_content)
        except Exception as e:
            print("An error occurred while loading data from the URL:", e)

    if description != None:
        data.append(description)
    if pdf != None:
        loader = PyPDFLoader(
            pdf
        )  # PyPDFLoader: charge et divise un fichier PDF en pages
        pages = loader.load_and_split()
        for page in pages:
            data.append(page.page_content)
    return data


# function to Split the loaded data
def split_data(
    data,
):  # divise les données en segments plus petits pour faciliter l'analyse et l'indexation
    # data = "\n".join(data)
    # Create a RecursiveCharacterTextSplitter instance
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=512, chunk_overlap=60
    )  # divise le texte en morceaux de taille spécifiée avec un chevauchement entre les morceaux pour éviter la perte de contexte

    # Split the text document into smaller chunks
    texts = text_splitter.create_documents(data)
    return texts


# crée un index Pinecone pour un utilisateur s'il n'existe pas déjà
def create_user_index(index_name):
    """Creates a Pinecone index with the username, validating the name first."""

    existing_indexes = [
        index.name for index in pc.list_indexes()
    ]  # liste les index existants.
    if index_name in existing_indexes:
        # L'index existe déjà, ne le recréez pas
        return

    # Create the index (you might want to specify dimension and metric)
    pc.create_index(
        name=index_name,
        dimension=1024,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )


# embed: crée des embeddings pour les documents divisés et les stocke dans un magasin de vecteurs Pinecone.
def embed(splited_docs, username):
    # Créez ou vérifiez l'index pour l'utilisateur
    create_user_index(index)

    # Créez une base de données vectorielle Pinecone à partir des documents divisés
    PineconeVectorStore.from_documents(  # PineconeVectorStore.from_documents: crée et stocke des vecteurs pour les documents fournis.
        documents=splited_docs,
        index_name=index,
        embedding=embedder,
        namespace=username,
    )

    # Créez un retrieveur
    # retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
    # return retriever


# Récupération des Documents
# retrieve documents from the dataset
def retrieve(prompt, username):
    vectorstore = PineconeVectorStore.from_existing_index(  # from_existing_index: initialise un magasin de vecteurs à partir d'un index existant.
        index_name=index, embedding=embedder, namespace=username
    )
    retriever = vectorstore.as_retriever(
        search_type="mmr", search_kwargs={"k": 2}
    )  # as_retriever: crée un retrieveur pour interroger le magasin de vecteurs.
    retrieved_docs = retriever.invoke(prompt)
    return retrieved_docs


def format_prompt(prompt, retrieved_documents, tone, marketing_technique, social_media):
    prompt = f"""You are an assistant for digital marketing
You are given the extracted parts of a long document and a question. Provide a conversational answer.
If you don't know the answer, just ignore the context.
Question: \n{prompt}\n"""
    if tone != "Default":
        prompt += f"Tone {tone} \n"
    if marketing_technique != "Default":
        prompt += f"Marketing technique: {marketing_technique}\n"
    if social_media != "Default":
        prompt += f"Social media plateform: {social_media}\n"

    prompt += "Context:\n"
    for document in retrieved_documents:
        prompt += f"{document.page_content}\n"
    # prompt += """If you don't know the answer, just say "I do not know."Don't make up an answer."""
    return prompt


# Quelle est la capitale de la France ? based on the following context,
# Context :
# La France est un pays situé en Europe de l'Ouest. Sa capitale est Paris.
# Paris est non seulement la capitale de la France, mais aussi la plus grande ville du pays.


# based on the following context
# basé sur le contexte suivant
# If you don't know the answer, just say "I do not know."Don't make up an answer.


def clear_history(history):
    return []


# function to Use a mistral llm via api hugging face space
def ask_mistral(prompt):
    client = Client("hysts/mistral-7b")
    result = client.predict(
		message= prompt,
		max_new_tokens=1024,
		temperature=0.6,
		top_p=0.9,
		top_k=50,
		repetition_penalty=1.2,
		api_name="/chat"
        )
    return result


def inject_history(final_prompt, history):
    if len(history) > 0:
        final_prompt = (
            final_prompt + "\n\nHistory : \n "
        )
        for user, assistant in history:
            final_prompt = final_prompt + "USER : " + user + "\n"
            final_prompt = "ASSISTANT : " + assistant + "\n"
        return final_prompt
    else:
        return final_prompt


# what is my name based on the following context .
# context:
# retreived documents:
# and the following history of the conversation :
# USER : my name is nour
# ASSISTANT : hi nour
# USER : what is my name ?


def upload_user_data(username, url=None, description=None, pdf_file=None):
    data = load_data(url, description, pdf_file)
    splitted_data = split_data(data)
    embed(splitted_data, username)
    message = f"data has been uploaded successfully to {username}"
    return message


def user_retrieve_and_generate(
    username, tone, marketing_technique, prompt, history, social_media
):
    # retrieve data from vector store
    retrieved_documents = retrieve(prompt, username)
    # format prompt
    formatted_prompt = format_prompt(
        prompt, retrieved_documents, tone, marketing_technique, social_media
    )
    # inject history
    # formatted_prompt = inject_history(formatted_prompt, history)
    # ask mistral
    result = ask_mistral(formatted_prompt)
    # history.append([prompt,result])
    new_history = history + [(prompt, result)]
    return new_history


# def custom_auth(username, password):
#     if username in users and users[username] == password:
#         return True
#     return False

upload_data = gr.Interface(
    fn=upload_user_data,
    inputs=[
        gr.Textbox(label="username"),
        gr.Textbox(label="URL"),
        gr.Textbox(label="Description"),
        gr.File(label="PDF", type="filepath", file_count="single"),
    ],
    outputs=gr.Textbox(label="Output"),
    title="Upload Data",
    description="Upload your data to extract text and answer questions.",
    api_name="upload",
)


def clear_prompt(prompt):
    return ""


with gr.Blocks() as user_interface:
    gr.Markdown(
        value="""user interface to retreive and genarate text based on uploaded data.""",
        label=None,
    )
    username = gr.Textbox(label="username")
    with gr.Accordion("Extra options ⚙️", open=False):
        tone = gr.Dropdown(
            ["Default", "neutral", "funny", "serious", "formal"],
            value="Default",
            label="tone of voice used in the replies",
        )
        marketing_technique = gr.Radio(
            [
                "Default",
                "Retargeting",
                "AIDA",
                "Promotion",
                "Testimonial",
                "FOMO",
                "Before and after",
                "Problem and solution",
            ],
            value="Default",
            label="marketing technique to be used in the replies",
        )
        social_media = gr.Radio(
            ["Default", "instagram", "facebook", "twitter"],
            value="Default",
            label="social media platform to be used in the replies",
        )
    chatbot = gr.Chatbot(
        height=450, label="Gradio ChatInterface", show_copy_button=True
    )
    prompt = gr.Textbox(label="prompt")
    with gr.Row():
        clear = gr.Button("🗑️Clear", variant="secondary")
        submit = gr.Button("✅Submit", variant="primary")

        
    submit.click(
        fn=user_retrieve_and_generate,
        inputs=[username, tone, marketing_technique, prompt, chatbot, social_media],
        outputs=[chatbot],
        api_name="generate",
    ).then(clear_prompt, inputs=prompt, outputs=prompt, show_api=False)


    prompt.submit(
        fn=user_retrieve_and_generate,
        inputs=[username, tone, marketing_technique, prompt, chatbot, social_media],
        outputs=[chatbot],
        api_name=False,
    ).then(clear_prompt, inputs=prompt, outputs=prompt, show_api=False)

    clear.click(fn=clear_history, inputs=chatbot, outputs=chatbot, show_api=False)

demo = gr.TabbedInterface(
    [upload_data, user_interface],
    ["upload", "generate"],
    theme="upsatwal/mlsc_tiet",  
)


if __name__ == "__main__":
    demo.launch(
        debug=True  # ,auth= custom_auth ,auth_message="Enter your username and password"
    )