# chainlit run app.py -w # You can find this code for Chainlit python streaming here (https://docs.chainlit.io/concepts/streaming/python) # OpenAI Chat completion from dotenv import load_dotenv load_dotenv() import os import sys import faiss import openai import chainlit as cl # importing chainlit for our app import llama_index from llama_index.core import Settings from llama_index.core import VectorStoreIndex from llama_index.core import StorageContext from llama_index.vector_stores.faiss import FaissVectorStore from llama_index.core import set_global_handler from llama_index.core.node_parser import MarkdownElementNodeParser from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker from llama_parse import LlamaParse LLAMA_CLOUD_API_KEY= os.getenv('LLAMA_CLOUD_API_KEY') OPENAI_API_KEY=os.getenv("OPENAI_API_KEY") """ os.environ["LLAMA_CLOUD_API_KEY"] = getpass.getpass("LLamaParse API Key:") os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:") # os.environ["WANDB_API_KEY"] = getpass.getpass("WandB API Key: ") """ # PARSING the pdf file parser = LlamaParse( result_type="markdown", verbose=True, language="en", num_workers=2, ) nvidia_docs = parser.load_data(["./nvidia_2tables.pdf"]) # Note: nvidia_docs contains only one file (it could contain more). nvidia_docs[0] is the pdf we loaded. # print(nvidia_docs[0].text[:1000]) # Getting Settings out of llama_index.core which is a major part of their v0.10 update! Settings.llm = OpenAI(model="gpt-3.5-turbo") Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small") # Using MarkdownElementNodeParser to help make sense of our Markdown objects so we can leverage the potentially structured information in the parsed documents. node_parser = MarkdownElementNodeParser(llm=OpenAI(model="gpt-3.5-turbo"), num_workers=8) nodes = node_parser.get_nodes_from_documents(documents=[nvidia_docs[0]]) # Let's see what's in the metadata of the nodes: # for nd in nodes: # print(nd.metadata) # for k,v in nd: # if k=='table_df': # print(nd) # Now we extract our `base_nodes` and `objects` to create the `VectorStoreIndex`. base_nodes, objects = node_parser.get_nodes_and_objects(nodes) # We could use the VectorStoreIndex from llama_index.core # Or we can use the llama_index FAISS llama-index-vector-stores-faiss # Trying the faiss, and setting its vectors' dimension. faiss_dim = 1536 faiss_index = faiss.IndexFlatL2(faiss_dim) # default param overwrite=False, so it will append new vector. # Parameter overwrite=True suppresses appending a vector. # Creating the FaissVectorStore and its recursicve_index_faiss llama_faiss_vector_store = FaissVectorStore(faiss_index=faiss_index) storage_context = StorageContext.from_defaults(vector_store=llama_faiss_vector_store) recursive_index_faiss = VectorStoreIndex(nodes=base_nodes+objects, storage_context=storage_context) # Now we can build our Recursive Query Engine with reranking! # We'll need to do a couple steps: # 1. Initalize our reranker using `FlagEmbeddingReranker` powered by the `BAAI/bge-reranker-large`. # 2. Set up our recursive query engine! reranker = FlagEmbeddingReranker( top_n=5, model="BAAI/bge-reranker-large", ) def resursive_fn(reranker): recursive_query_engine = recursive_index_faiss.as_query_engine( similarity_top_k=5, node_postprocessors=[reranker], verbose=True ) return recursive_query_engine recursive_fn_val = resursive_fn(reranker) @cl.on_chat_start async def start_chat(): print("A new chat session has started!") # ChatOpenAI Templates system_template = """Use the following pieces of context to answer the user's question. If you don't know the answer, say that you don't know, do not try to make up an answer. ALWAYS return a "SOURCES" part in your answer. The "SOURCES" part should be a reference to the source inside the document from which you got your answer. You are a helpful assistant who always speaks in a pleasant tone! """ user_template = """ Think through your response step by step.""" #user_query = "Who are the E-VP, Operations - and how old are they?" cl.user_session.set("recursive_query_engine", recursive_fn_val) @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user async def main(message: cl.Message): settings = cl.user_session.get("settings") user_query = message.content print("inside on_message - user_query: ",user_query) prompt=system_template + user_query + user_template recursive_query_engine = cl.user_session.get("recursive_query_engine") print("inside on_message - recursive_query_engine: ",recursive_query_engine) response = await recursive_query_engine.query(prompt) print("inside on_message - response: ",response) str_resp ="{}".format(response) # response = await recursive_fn_call(recursive_query_engine, system_template, user_template, user_query=user_query) msg = cl.Message(content= str_resp) print("inside on_message - after msg: ",msg) await msg.send()