lillybak commited on
Commit
7b580a0
1 Parent(s): b79351e

Back to the original app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -36
app.py CHANGED
@@ -6,10 +6,15 @@ load_dotenv()
6
 
7
  import os
8
  import sys
 
 
 
9
  import faiss
10
  import openai
11
 
12
  import chainlit as cl # importing chainlit for our app
 
 
13
 
14
  import llama_index
15
  from llama_index.core import Settings
@@ -23,6 +28,10 @@ from llama_index.embeddings.openai import OpenAIEmbedding
23
  from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker
24
  from llama_parse import LlamaParse
25
 
 
 
 
 
26
  LLAMA_CLOUD_API_KEY= os.getenv('LLAMA_CLOUD_API_KEY')
27
  OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
28
 
@@ -32,6 +41,8 @@ os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
32
  # os.environ["WANDB_API_KEY"] = getpass.getpass("WandB API Key: ")
33
  """
34
 
 
 
35
  # PARSING the pdf file
36
  parser = LlamaParse(
37
  result_type="markdown",
@@ -42,7 +53,7 @@ parser = LlamaParse(
42
 
43
  nvidia_docs = parser.load_data(["./nvidia_2tables.pdf"])
44
  # Note: nvidia_docs contains only one file (it could contain more). nvidia_docs[0] is the pdf we loaded.
45
- # print(nvidia_docs[0].text[:1000])
46
 
47
  # Getting Settings out of llama_index.core which is a major part of their v0.10 update!
48
  Settings.llm = OpenAI(model="gpt-3.5-turbo")
@@ -54,11 +65,11 @@ node_parser = MarkdownElementNodeParser(llm=OpenAI(model="gpt-3.5-turbo"), num_w
54
 
55
  nodes = node_parser.get_nodes_from_documents(documents=[nvidia_docs[0]])
56
  # Let's see what's in the metadata of the nodes:
57
- # for nd in nodes:
58
- # print(nd.metadata)
59
- # for k,v in nd:
60
- # if k=='table_df':
61
- # print(nd)
62
  # Now we extract our `base_nodes` and `objects` to create the `VectorStoreIndex`.
63
  base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
64
 
@@ -79,11 +90,24 @@ recursive_index_faiss = VectorStoreIndex(nodes=base_nodes+objects, storage_conte
79
  # We'll need to do a couple steps:
80
  # 1. Initalize our reranker using `FlagEmbeddingReranker` powered by the `BAAI/bge-reranker-large`.
81
  # 2. Set up our recursive query engine!
 
82
  reranker = FlagEmbeddingReranker(
83
- top_n=1,
84
  model="BAAI/bge-reranker-large",
85
  )
86
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  # ChatOpenAI Templates
88
  system_template = """Use the following pieces of context to answer the user's question.
89
  If you don't know the answer, say that you don't know, do not try to make up an answer.
@@ -92,44 +116,29 @@ The "SOURCES" part should be a reference to the source inside the document from
92
  You are a helpful assistant who always speaks in a pleasant tone! """
93
 
94
  user_template = """ Think through your response step by step."""
95
-
96
  #user_query = "Who are the E-VP, Operations - and how old are they?"
97
- def resursive_fn(reranker):
98
- recursive_query_engine = recursive_index_faiss.as_query_engine(
99
- similarity_top_k=1,
100
- node_postprocessors=[reranker],
101
- verbose=True
102
- )
103
 
104
- return recursive_query_engine
 
 
105
 
106
- recursive_fn_val = resursive_fn(reranker)
107
 
108
- @cl.on_chat_start
109
- async def start_chat():
110
- print("A new chat session has started!")
111
- cl.user_session.set("recursive_query_engine", recursive_fn_val)
112
-
113
 
114
  @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
115
  async def main(message: cl.Message):
116
  settings = cl.user_session.get("settings")
117
-
118
- user_query = message.content
119
- print("inside on_message - user_query: ",user_query)
120
- prompt=system_template + user_query + user_template
121
-
122
- recursive_query_engine = cl.user_session.get("recursive_query_engine")
123
- print("inside on_message - recursive_query_engine: ",recursive_query_engine)
124
-
125
- response = await recursive_query_engine.query(prompt)
126
-
127
- print("inside on_message - response: ",response)
128
 
 
 
 
 
 
129
  str_resp ="{}".format(response)
130
-
131
- # response = await recursive_fn_call(recursive_query_engine, system_template, user_template, user_query=user_query)
132
  msg = cl.Message(content= str_resp)
133
- print("inside on_message - after msg: ",msg)
134
-
135
  await msg.send()
 
6
 
7
  import os
8
  import sys
9
+ import getpass
10
+ import nest_asyncio
11
+ # import pandas as pd
12
  import faiss
13
  import openai
14
 
15
  import chainlit as cl # importing chainlit for our app
16
+ # https://docs.chainlit.io/api-reference/step-class#update-a-step
17
+ # DEPRICATED: from chainlit.prompt import Prompt, PromptMessage # importing prompt tools
18
 
19
  import llama_index
20
  from llama_index.core import Settings
 
28
  from llama_index.postprocessor.flag_embedding_reranker import FlagEmbeddingReranker
29
  from llama_parse import LlamaParse
30
 
31
+ from openai import AsyncOpenAI # importing openai for API usage
32
+
33
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
34
+ # GET KEYS
35
  LLAMA_CLOUD_API_KEY= os.getenv('LLAMA_CLOUD_API_KEY')
36
  OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
37
 
 
41
  # os.environ["WANDB_API_KEY"] = getpass.getpass("WandB API Key: ")
42
  """
43
 
44
+ nest_asyncio.apply()
45
+
46
  # PARSING the pdf file
47
  parser = LlamaParse(
48
  result_type="markdown",
 
53
 
54
  nvidia_docs = parser.load_data(["./nvidia_2tables.pdf"])
55
  # Note: nvidia_docs contains only one file (it could contain more). nvidia_docs[0] is the pdf we loaded.
56
+ print(nvidia_docs[0].text[:1000])
57
 
58
  # Getting Settings out of llama_index.core which is a major part of their v0.10 update!
59
  Settings.llm = OpenAI(model="gpt-3.5-turbo")
 
65
 
66
  nodes = node_parser.get_nodes_from_documents(documents=[nvidia_docs[0]])
67
  # Let's see what's in the metadata of the nodes:
68
+ for nd in nodes:
69
+ print(nd.metadata)
70
+ for k,v in nd:
71
+ if k=='table_df':
72
+ print(nd)
73
  # Now we extract our `base_nodes` and `objects` to create the `VectorStoreIndex`.
74
  base_nodes, objects = node_parser.get_nodes_and_objects(nodes)
75
 
 
90
  # We'll need to do a couple steps:
91
  # 1. Initalize our reranker using `FlagEmbeddingReranker` powered by the `BAAI/bge-reranker-large`.
92
  # 2. Set up our recursive query engine!
93
+
94
  reranker = FlagEmbeddingReranker(
95
+ top_n=5,
96
  model="BAAI/bge-reranker-large",
97
  )
98
 
99
+ recursive_query_engine = recursive_index_faiss.as_query_engine(
100
+ similarity_top_k=15,
101
+ node_postprocessors=[reranker],
102
+ verbose=True
103
+ )
104
+
105
+ """
106
+ # Create pandas dataframe to store query+generated response+added truth
107
+ columns=["Query", "Response", "Truth"]
108
+ gen_df = pd.DataFrame(columns=columns,dtype='str')
109
+ """
110
+
111
  # ChatOpenAI Templates
112
  system_template = """Use the following pieces of context to answer the user's question.
113
  If you don't know the answer, say that you don't know, do not try to make up an answer.
 
116
  You are a helpful assistant who always speaks in a pleasant tone! """
117
 
118
  user_template = """ Think through your response step by step."""
119
+
120
  #user_query = "Who are the E-VP, Operations - and how old are they?"
 
 
 
 
 
 
121
 
122
+ #response = recursive_query_engine.query(system_template + user_query + user_template)
123
+
124
+ #str_resp ="{}".format(response)
125
 
 
126
 
127
+ def retriever_resp(prompt):
128
+ import time
129
+ response = "this is my response"
130
+ time.sleep(5)
131
+ return response
132
 
133
  @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
134
  async def main(message: cl.Message):
135
  settings = cl.user_session.get("settings")
 
 
 
 
 
 
 
 
 
 
 
136
 
137
+ user_query = message.content
138
+ # prompt = system_template+user_query+user_template
139
+ response = recursive_query_engine.query(system_template + user_query + user_template)
140
+ # response = retriever_resp(prompt)
141
+ # print("AAA",user_query)
142
  str_resp ="{}".format(response)
 
 
143
  msg = cl.Message(content= str_resp)
 
 
144
  await msg.send()