import os
from langchain_community.vectorstores import FAISS
from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage
from langchain_core.runnables.history import RunnableWithMessageHistory
import gradio as gr
import time


embedder = NVIDIAEmbeddings(model="NV-Embed-QA", model_type=None)
db = FAISS.load_local("faiss_index", embedder, allow_dangerous_deserialization=True)
model = ChatNVIDIA(model="meta/llama3-70b-instruct")

retriever = db.as_retriever(search_kwargs={"k": 8})

retrieved_docs = retriever.invoke("Seafood restaurants in Phuket")
print(len(retrieved_docs))
for doc in retrieved_docs:
    print(doc.metadata)
    

def get_session_history(session_id):
    return MongoDBChatMessageHistory(
        session_id=session_id,
        connection_string=os.environ["MONGODB_URI"],
        database_name="tour_planner_db",
        collection_name="chat_histories",
    )
    

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", """
### [INST] Instruction: Answer the question based on your knowledge about places in Thailand. You are Roam Mate which is a chat bot to help users with their travel and recommending places according to their reference. Here is context to help:
Also provides your rationale for generating the places you are recommending.
Context:\n{context}\n
(Answer from retrieval if they are relevant to the question. Only cite sources that are used. Make your response conversational.)


### QUESTION:
{question} [/INST]
 """),
        MessagesPlaceholder(variable_name="history")
    ]
)

runnable = prompt | model

runnable_with_history = RunnableWithMessageHistory(
    runnable,
    get_session_history,
    input_messages_key="question",
    history_messages_key="history",
)

initial_msg = (
    "Hello! I am a chatbot to help with vacation."
    f"\nHow can I help you?"
)

def chat_gen(message, history, session_id, return_buffer=True):
    print(session_id)
    buffer = ""
    for token in runnable_with_history.stream(
        {"question": message, "context": db.as_retriever(search_type="similarity", search_kwargs={"k": 5})},
        config={"configurable": {"session_id": session_id}},
    ):
        buffer += token.content
        time.sleep(0.05)
        yield buffer

with gr.Blocks(fill_height=True) as demo:
    session_id = gr.Textbox("1", label="Session ID")
    chatbot = gr.Chatbot(value = [[None, initial_msg]], bubble_full_width=True, scale=1)
    gr.ChatInterface(chat_gen, chatbot=chatbot, additional_inputs=[session_id]).queue()

    
if __name__ == "__main__":
    demo.launch()