import os from langchain_community.vectorstores import FAISS from langchain_mongodb.chat_message_histories import MongoDBChatMessageHistory from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings, ChatNVIDIA from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.messages import HumanMessage from langchain_core.runnables.history import RunnableWithMessageHistory import gradio as gr import time embedder = NVIDIAEmbeddings(model="NV-Embed-QA", model_type=None) db = FAISS.load_local("faiss_index", embedder, allow_dangerous_deserialization=True) model = ChatNVIDIA(model="meta/llama3-70b-instruct") retriever = db.as_retriever(search_kwargs={"k": 8}) retrieved_docs = retriever.invoke("Seafood restaurants in Phuket") print(len(retrieved_docs)) for doc in retrieved_docs: print(doc.metadata) def get_session_history(session_id): return MongoDBChatMessageHistory( session_id=session_id, connection_string=os.environ["MONGODB_URI"], database_name="tour_planner_db", collection_name="chat_histories", ) prompt = ChatPromptTemplate.from_messages( [ ("system", """ ### [INST] Instruction: Answer the question based on your knowledge about places in Thailand. You are Roam Mate which is a chat bot to help users with their travel and recommending places according to their reference. Here is context to help: Also provides your rationale for generating the places you are recommending. Context:\n{context}\n (Answer from retrieval if they are relevant to the question. Only cite sources that are used. Make your response conversational.) ### QUESTION: {question} [/INST] """), MessagesPlaceholder(variable_name="history") ] ) runnable = prompt | model runnable_with_history = RunnableWithMessageHistory( runnable, get_session_history, input_messages_key="question", history_messages_key="history", ) initial_msg = ( "Hello! I am a chatbot to help with vacation." f"\nHow can I help you?" ) def chat_gen(message, history, session_id, return_buffer=True): print(session_id) buffer = "" for token in runnable_with_history.stream( {"question": message, "context": db.as_retriever(search_type="similarity", search_kwargs={"k": 5})}, config={"configurable": {"session_id": session_id}}, ): buffer += token.content time.sleep(0.05) yield buffer with gr.Blocks(fill_height=True) as demo: session_id = gr.Textbox("1", label="Session ID") chatbot = gr.Chatbot(value = [[None, initial_msg]], bubble_full_width=True, scale=1) gr.ChatInterface(chat_gen, chatbot=chatbot, additional_inputs=[session_id]).queue() if __name__ == "__main__": demo.launch()