mahynski commited on
Commit
0a65b22
·
verified ·
1 Parent(s): d2a0bfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -173
app.py CHANGED
@@ -59,191 +59,199 @@ class GeminiTokens:
59
  def main():
60
  with st.sidebar:
61
  st.title('Document Summarization and QA System')
62
-
63
- # Select Provider
64
- provider = st.selectbox(
65
- label="Select LLM Provider",
66
- options=['google', 'huggingface', 'mistralai', 'openai'],
67
- index=3
68
- )
69
-
70
- # Select LLM
71
- if provider == 'google':
72
- llm_list = ['gemini-1.0-pro', 'gemini-1.5-flash', 'gemini-1.5-pro']
73
- elif provider == 'huggingface':
74
- llm_list = []
75
- elif provider == 'mistralai':
76
- llm_list = ["mistral-large-latest", "open-mistral-nemo-latest"]
77
- elif provider == 'openai':
78
- llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o', 'gpt-4o-mini']
79
- else:
80
- llm_list = []
81
-
82
- if provider == 'huggingface':
83
- llm_name = st.text_input(
84
- "Enter LLM namespace/model-name",
85
- value="HuggingFaceH4/zephyr-7b-alpha",
86
- )
87
-
88
- # Also give the user the option for different embedding models, too
89
- embed_name = st.text_input(
90
- label="Enter embedding namespace/model-name",
91
- value="BAAI/bge-small-en-v1.5",
92
- )
93
- else:
94
- llm_name = st.selectbox(
95
- label="Select LLM Model",
96
- options=llm_list,
97
- index=0
98
  )
99
-
100
- # Temperature
101
- temperature = st.slider(
102
- "Temperature",
103
- min_value=0.0,
104
- max_value=1.0,
105
- value=0.0,
106
- step=0.05,
107
- )
108
-
109
- # Enter Parsing API Key
110
- parse_key = st.text_input(
111
- "Enter your LlamaParse API Key",
112
- value=None
113
- )
114
-
115
- # Enter LLM API Key
116
- llm_key = st.text_input(
117
- "Enter your LLM provider API Key",
118
- value=None,
119
- )
120
-
121
- # Create LLM
122
- # Global tokenization needs to be consistent with LLM for token counting
123
- # https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
124
- if llm_key is not None:
125
  if provider == 'google':
126
- from llama_index.llms.gemini import Gemini
127
- from llama_index.embeddings.gemini import GeminiEmbedding
128
- max_output_tokens = 8192 # https://firebase.google.com/docs/vertex-ai/gemini-models
129
-
130
- os.environ['GOOGLE_API_KEY'] = str(llm_key)
131
- Settings.llm = Gemini(
132
- model=f"models/{llm_name}",
133
- token=os.environ.get("GOOGLE_API_KEY"),
134
- temperature=temperature,
135
- max_tokens=max_output_tokens
 
 
 
 
 
 
 
 
 
 
136
  )
137
- Settings.tokenizer = GeminiTokens(llm_name)
138
- Settings.num_output = max_output_tokens
139
- Settings.embed_model = GeminiEmbedding(
140
- model_name="models/text-embedding-004", api_key=os.environ.get("GOOGLE_API_KEY") #, title="this is a document"
 
141
  )
142
- if llm_name == 'gemini-1.0-pro':
143
- total_token_limit = 32760
144
- else:
145
- total_token_limit = 1e6
146
- Settings.context_window = total_token_limit - max_output_tokens # Gemini counts total tokens
147
- elif provider == 'huggingface':
148
- if llm_name is not None and embed_name is not None:
149
- from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
150
- from llama_index.embeddings.huggingface import HuggingFaceInferenceAPIEmbedding
151
- from transformers import AutoTokenizer
152
-
153
- max_output_tokens = 2048 # Just a generic value
154
 
155
- os.environ['HF_TOKEN'] = str(llm_key)
156
- Settings.llm = HuggingFaceInferenceAPI(
157
- model_name=llm_name,
158
- token=os.environ.get("HF_TOKEN"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  temperature=temperature,
160
  max_tokens=max_output_tokens
161
  )
162
- Settings.tokenizer = AutoTokenizer.from_pretrained(
163
- llm_name,
164
- token=os.environ.get("HF_TOKEN"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  )
 
166
  Settings.num_output = max_output_tokens
167
- Settings.embed_model = HuggingFaceInferenceAPIEmbedding(
168
- model_name=embed_name
 
169
  )
170
- Settings.context_window = 4096 # Just a generic value
171
- elif provider == 'mistralai':
172
- from llama_index.llms.mistralai import MistralAI
173
- from llama_index.embeddings.mistralai import MistralAIEmbedding
174
- max_output_tokens = 8192 # Based on internet consensus since this is not well documented
175
-
176
- os.environ['MISTRAL_API_KEY'] = str(llm_key)
177
- Settings.llm = MistralAI(
178
- model=llm_name,
179
- temperature=temperature,
180
- max_tokens=max_output_tokens,
181
- random_seed=42,
182
- safe_mode=True
183
- )
184
- Settings.tokenizer = MistralTokens(llm_name)
185
- Settings.num_output = max_output_tokens
186
- Settings.embed_model = MistralAIEmbedding(
187
- model_name="mistral-embed",
188
- api_key=os.environ.get("MISTRAL_API_KEY")
189
- )
190
- Settings.context_window = 128000 # 128k for flagship models - doesn't seem to count input tokens
191
- elif provider == 'openai':
192
- from llama_index.llms.openai import OpenAI
193
- from llama_index.embeddings.openai import OpenAIEmbedding
194
-
195
- # https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4
196
- if llm_name == 'gpt-3.5-turbo':
197
- max_output_tokens = 4096
198
- context_window = 16385
199
- elif llm_name == 'gpt-4':
200
- max_output_tokens = 8192
201
- context_window = 8192
202
- elif llm_name == 'gpt-4-turbo':
203
- max_output_tokens = 4096
204
- context_window = 128000
205
- elif llm_name == 'gpt-4o':
206
- max_output_tokens = 4096
207
- context_window = 128000
208
- elif llm_name == 'gpt-4o-mini':
209
- max_output_tokens = 16384
210
- context_window = 128000
211
-
212
- os.environ["OPENAI_API_KEY"] = str(llm_key)
213
- Settings.llm = OpenAI(
214
- model=llm_name,
215
- temperature=temperature,
216
- max_tokens=max_output_tokens
217
  )
218
- Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
219
- Settings.num_output = max_output_tokens
220
- Settings.embed_model = OpenAIEmbedding()
221
- Settings.context_window = context_window
222
- else:
223
- raise NotImplementedError(f"{provider} is not supported yet")
224
-
225
- uploaded_file = st.file_uploader(
226
- "Choose a PDF file to upload",
227
- type=['pdf'],
228
- accept_multiple_files=False
229
- )
230
-
231
- parsed_document = None
232
- if uploaded_file is not None:
233
- # Parse the file
234
- parser = LlamaParse(
235
- api_key=parse_key, # Can also be set in your env as LLAMA_CLOUD_API_KEY
236
- result_type="text" # "markdown" and "text" are available
237
  )
238
 
239
- # Create a temporary directory to save the file then load and parse it
240
- temp_dir = tempfile.TemporaryDirectory()
241
- temp_filename = os.path.join(temp_dir.name, uploaded_file.name)
242
- with open(temp_filename, "wb") as f:
243
- f.write(uploaded_file.getvalue())
244
- parsed_document = parser.load_data(temp_filename)
245
- temp_dir.cleanup()
246
-
247
  col1, col2 = st.columns(2)
248
 
249
  with col2:
@@ -281,9 +289,15 @@ def main():
281
 
282
  run = st.button("Answer", type="primary")
283
 
284
- if parsed_document is not None and run:
285
  index = VectorStoreIndex.from_documents(parsed_document)
286
- query_engine = index.as_query_engine()
 
 
 
 
 
 
287
  response = query_engine.query(prompt)
288
  st.write(response.response)
289
 
 
59
  def main():
60
  with st.sidebar:
61
  st.title('Document Summarization and QA System')
62
+
63
+ with st.form(key="model_settings"):
64
+ # Select Provider
65
+ provider = st.selectbox(
66
+ label="Select LLM Provider",
67
+ options=['google', 'huggingface', 'mistralai', 'openai'],
68
+ index=3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  )
70
+
71
+ # Select LLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  if provider == 'google':
73
+ llm_list = ['gemini-1.0-pro', 'gemini-1.5-flash', 'gemini-1.5-pro']
74
+ elif provider == 'huggingface':
75
+ llm_list = []
76
+ elif provider == 'mistralai':
77
+ llm_list = ["mistral-large-latest", "open-mistral-nemo-latest"]
78
+ elif provider == 'openai':
79
+ llm_list = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-turbo', 'gpt-4o', 'gpt-4o-mini']
80
+ else:
81
+ llm_list = []
82
+
83
+ if provider == 'huggingface':
84
+ llm_name = st.text_input(
85
+ "Enter LLM namespace/model-name",
86
+ value="HuggingFaceH4/zephyr-7b-alpha",
87
+ )
88
+
89
+ # Also give the user the option for different embedding models, too
90
+ embed_name = st.text_input(
91
+ label="Enter embedding namespace/model-name",
92
+ value="BAAI/bge-small-en-v1.5",
93
  )
94
+ else:
95
+ llm_name = st.selectbox(
96
+ label="Select LLM Model",
97
+ options=llm_list,
98
+ index=0
99
  )
100
+
101
+ # Temperature
102
+ temperature = st.slider(
103
+ "Temperature",
104
+ min_value=0.0,
105
+ max_value=1.0,
106
+ value=0.0,
107
+ step=0.05,
108
+ )
 
 
 
109
 
110
+ similarity_top_k = st.number_input("Top k nodes to retrieve (similarity_top_k)", min_value=1, max_value=100, value=5, step=1)
111
+ similarity_cutoff = st.slider("Select node similarity cutoff", min_value=0.0, max_value=1.0, value=0.7)
112
+
113
+ # Enter Parsing API Key
114
+ parse_key = st.text_input(
115
+ "Enter your LlamaParse API Key",
116
+ value=None
117
+ )
118
+
119
+ # Enter LLM API Key
120
+ llm_key = st.text_input(
121
+ "Enter your LLM provider API Key",
122
+ value=None,
123
+ )
124
+
125
+ # Create LLM
126
+ # Global tokenization needs to be consistent with LLM for token counting
127
+ # https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
128
+ if llm_key is not None:
129
+ if provider == 'google':
130
+ from llama_index.llms.gemini import Gemini
131
+ from llama_index.embeddings.gemini import GeminiEmbedding
132
+ max_output_tokens = 8192 # https://firebase.google.com/docs/vertex-ai/gemini-models
133
+
134
+ os.environ['GOOGLE_API_KEY'] = str(llm_key)
135
+ Settings.llm = Gemini(
136
+ model=f"models/{llm_name}",
137
+ token=os.environ.get("GOOGLE_API_KEY"),
138
  temperature=temperature,
139
  max_tokens=max_output_tokens
140
  )
141
+ Settings.tokenizer = GeminiTokens(llm_name)
142
+ Settings.num_output = max_output_tokens
143
+ Settings.embed_model = GeminiEmbedding(
144
+ model_name="models/text-embedding-004", api_key=os.environ.get("GOOGLE_API_KEY") #, title="this is a document"
145
+ )
146
+ if llm_name == 'gemini-1.0-pro':
147
+ total_token_limit = 32760
148
+ else:
149
+ total_token_limit = 1e6
150
+ Settings.context_window = total_token_limit - max_output_tokens # Gemini counts total tokens
151
+ elif provider == 'huggingface':
152
+ if llm_name is not None and embed_name is not None:
153
+ from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
154
+ from llama_index.embeddings.huggingface import HuggingFaceInferenceAPIEmbedding
155
+ from transformers import AutoTokenizer
156
+
157
+ max_output_tokens = 2048 # Just a generic value
158
+
159
+ os.environ['HF_TOKEN'] = str(llm_key)
160
+ Settings.llm = HuggingFaceInferenceAPI(
161
+ model_name=llm_name,
162
+ token=os.environ.get("HF_TOKEN"),
163
+ temperature=temperature,
164
+ max_tokens=max_output_tokens
165
+ )
166
+ Settings.tokenizer = AutoTokenizer.from_pretrained(
167
+ llm_name,
168
+ token=os.environ.get("HF_TOKEN"),
169
+ )
170
+ Settings.num_output = max_output_tokens
171
+ Settings.embed_model = HuggingFaceInferenceAPIEmbedding(
172
+ model_name=embed_name
173
+ )
174
+ Settings.context_window = 4096 # Just a generic value
175
+ elif provider == 'mistralai':
176
+ from llama_index.llms.mistralai import MistralAI
177
+ from llama_index.embeddings.mistralai import MistralAIEmbedding
178
+ max_output_tokens = 8192 # Based on internet consensus since this is not well documented
179
+
180
+ os.environ['MISTRAL_API_KEY'] = str(llm_key)
181
+ Settings.llm = MistralAI(
182
+ model=llm_name,
183
+ temperature=temperature,
184
+ max_tokens=max_output_tokens,
185
+ random_seed=42,
186
+ safe_mode=True
187
  )
188
+ Settings.tokenizer = MistralTokens(llm_name)
189
  Settings.num_output = max_output_tokens
190
+ Settings.embed_model = MistralAIEmbedding(
191
+ model_name="mistral-embed",
192
+ api_key=os.environ.get("MISTRAL_API_KEY")
193
  )
194
+ Settings.context_window = 128000 # 128k for flagship models - doesn't seem to count input tokens
195
+ elif provider == 'openai':
196
+ from llama_index.llms.openai import OpenAI
197
+ from llama_index.embeddings.openai import OpenAIEmbedding
198
+
199
+ # https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4
200
+ if llm_name == 'gpt-3.5-turbo':
201
+ max_output_tokens = 4096
202
+ context_window = 16385
203
+ elif llm_name == 'gpt-4':
204
+ max_output_tokens = 8192
205
+ context_window = 8192
206
+ elif llm_name == 'gpt-4-turbo':
207
+ max_output_tokens = 4096
208
+ context_window = 128000
209
+ elif llm_name == 'gpt-4o':
210
+ max_output_tokens = 4096
211
+ context_window = 128000
212
+ elif llm_name == 'gpt-4o-mini':
213
+ max_output_tokens = 16384
214
+ context_window = 128000
215
+
216
+ os.environ["OPENAI_API_KEY"] = str(llm_key)
217
+ Settings.llm = OpenAI(
218
+ model=llm_name,
219
+ temperature=temperature,
220
+ max_tokens=max_output_tokens
221
+ )
222
+ Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
223
+ Settings.num_output = max_output_tokens
224
+ Settings.embed_model = OpenAIEmbedding()
225
+ Settings.context_window = context_window
226
+ else:
227
+ raise NotImplementedError(f"{provider} is not supported yet")
228
+
229
+ uploaded_file = st.file_uploader(
230
+ "Choose a PDF file to upload",
231
+ type=['pdf'],
232
+ accept_multiple_files=False
233
+ )
234
+
235
+ parsed_document = None
236
+ if uploaded_file is not None:
237
+ # Parse the file
238
+ parser = LlamaParse(
239
+ api_key=parse_key, # Can also be set in your env as LLAMA_CLOUD_API_KEY
240
+ result_type="text" # "markdown" and "text" are available
241
  )
242
+
243
+ # Create a temporary directory to save the file then load and parse it
244
+ temp_dir = tempfile.TemporaryDirectory()
245
+ temp_filename = os.path.join(temp_dir.name, uploaded_file.name)
246
+ with open(temp_filename, "wb") as f:
247
+ f.write(uploaded_file.getvalue())
248
+ parsed_document = parser.load_data(temp_filename)
249
+ temp_dir.cleanup()
250
+
251
+ submit_button = st.form_submit_button(
252
+ "Construct RAG"
 
 
 
 
 
 
 
 
253
  )
254
 
 
 
 
 
 
 
 
 
255
  col1, col2 = st.columns(2)
256
 
257
  with col2:
 
289
 
290
  run = st.button("Answer", type="primary")
291
 
292
+ if parsed_document is not None and run and submit_button:
293
  index = VectorStoreIndex.from_documents(parsed_document)
294
+ query_engine = index.as_query_engine(
295
+ similarity_top_k=similarity_top_k,
296
+ similarity_cutoff=similarity_cutoff,
297
+ response_mode='compact',
298
+ # text_qa_template=text_qa_template,
299
+ # refine_template=refine_template,
300
+ )
301
  response = query_engine.query(prompt)
302
  st.write(response.response)
303