sourabhzanwar commited on
Commit
412a390
β€’
1 Parent(s): 7d397b0

added authentication

Browse files
Files changed (4) hide show
  1. app.py +138 -123
  2. generate_keys.py +15 -0
  3. hashed_password.pkl +0 -0
  4. requirements.txt +1 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  from utils.check_pydantic_version import use_pydantic_v1
2
  use_pydantic_v1() #This function has to be run before importing haystack. as haystack requires pydantic v1 to run
3
 
 
4
  from operator import index
5
  import streamlit as st
6
  import logging
@@ -17,11 +18,15 @@ import haystack
17
 
18
  from datetime import datetime
19
 
20
- from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor
 
 
 
 
 
 
 
21
 
22
- pdf_converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en","de"])
23
- docx_converter = DocxToTextConverter(remove_numeric_tables=False, valid_languages=["en","de"])
24
- txt_converter = TextConverter(remove_numeric_tables=True, valid_languages=["en","de"])
25
 
26
 
27
  # Whether the file upload should be enabled or not
@@ -101,142 +106,152 @@ try:
101
  )
102
  st.sidebar.image("ml_logo.png", use_column_width=True)
103
 
104
- # Sidebar for Task Selection
105
- st.sidebar.header('Options:')
106
 
107
- # OpenAI Key Input
108
- openai_key = st.sidebar.text_input("Enter OpenAI Key:", type="password")
109
 
110
- if openai_key:
111
- task_options = ['Extractive', 'Generative']
112
- else:
113
- task_options = ['Extractive']
114
 
115
- task_selection = st.sidebar.radio('Select the task:', task_options)
 
116
 
117
- # Check the task and initialize pipeline accordingly
118
- if task_selection == 'Extractive':
119
- pipeline_extractive = initialize_pipeline("extractive", document_store, retriever, reader)
120
- elif task_selection == 'Generative' and openai_key: # Check for openai_key to ensure user has entered it
121
- pipeline_rag = initialize_pipeline("rag", document_store, retriever, reader, openai_key=openai_key)
122
 
 
 
123
 
124
- set_initial_state()
 
 
 
125
 
126
- st.write('# ' + args.name)
127
-
128
-
129
- # File upload block
130
- if not DISABLE_FILE_UPLOAD:
131
- upload_container = st.sidebar.container()
132
- upload_container.write("## File Upload:")
133
- data_files = upload_files()
134
 
135
- # Button to update files in the documentStore
136
- upload_container.button('Upload Files', on_click=upload_document, args=())
 
 
 
137
 
138
- # Button to reset the documents in DocumentStore
139
- st.sidebar.button("Reset documents", on_click=reset_documents, args=())
140
 
141
- if "question" not in st.session_state:
142
- st.session_state.question = ""
143
- # Search bar
144
- question = st.text_input("Question", value=st.session_state.question, max_chars=100, on_change=reset_results, label_visibility="hidden")
145
 
146
- run_pressed = st.button("Run")
 
 
 
 
 
 
 
147
 
148
- run_query = (
149
- run_pressed or question != st.session_state.question #or task_selection != st.session_state.task
150
- )
151
 
152
- # Get results for query
153
- if run_query and question:
154
- if task_selection == 'Extractive':
155
- reset_results()
156
- st.session_state.question = question
157
- with st.spinner("πŸ”Ž    Running your pipeline"):
158
- try:
159
- st.session_state.results_extractive = query(pipeline_extractive, question)
160
- st.session_state.task = task_selection
161
- except JSONDecodeError as je:
162
- st.error(
163
- "πŸ‘“    An error occurred reading the results. Is the document store working?"
164
- )
165
- except Exception as e:
166
- logging.exception(e)
167
- st.error("🐞    An error occurred during the request.")
168
 
169
- elif task_selection == 'Generative':
170
- reset_results()
171
- st.session_state.question = question
172
- with st.spinner("πŸ”Ž    Running your pipeline"):
173
- try:
174
- st.session_state.results_generative = query(pipeline_rag, question)
175
- st.session_state.task = task_selection
176
- except JSONDecodeError as je:
177
- st.error(
178
- "πŸ‘“    An error occurred reading the results. Is the document store working?"
179
- )
180
- except Exception as e:
181
- if "API key is invalid" in str(e):
182
- logging.exception(e)
183
- st.error("🐞    incorrect API key provided. You can find your API key at https://platform.openai.com/account/api-keys.")
184
- else:
 
 
 
 
 
 
 
 
 
185
  logging.exception(e)
186
  st.error("🐞    An error occurred during the request.")
187
- # Display results
188
- if (st.session_state.results_extractive or st.session_state.results_generative) and run_query:
189
 
190
- # Handle Extractive Answers
191
- if task_selection == 'Extractive':
192
- results = st.session_state.results_extractive
193
-
194
- st.subheader("Extracted Answers:")
195
-
196
- if 'answers' in results:
197
- answers = results['answers']
198
- treshold = 0.2
199
- higher_then_treshold = any(ans.score > treshold for ans in answers)
200
- if not higher_then_treshold:
201
- st.markdown(f"<span style='color:red'>Please note none of the answers achieved a score higher then {int(treshold) * 100}%. Which probably means that the desired answer is not in the searched documents.</span>", unsafe_allow_html=True)
202
- for count, answer in enumerate(answers):
203
- if answer.answer:
204
- text, context = answer.answer, answer.context
205
- start_idx = context.find(text)
206
- end_idx = start_idx + len(text)
207
- score = round(answer.score, 3)
208
- st.markdown(f"**Answer {count + 1}:**")
209
- st.markdown(
210
- context[:start_idx] + str(annotation(body=text, label=f'SCORE {score}', background='#964448', color='#ffffff')) + context[end_idx:],
211
- unsafe_allow_html=True,
212
- )
213
- else:
214
- st.info(
215
- "πŸ€” &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
216
  )
217
-
218
- # Handle Generative Answers
219
- elif task_selection == 'Generative':
220
- results = st.session_state.results_generative
221
- st.subheader("Generated Answer:")
222
- if 'results' in results:
223
- st.markdown("**Answer:**")
224
- st.write(results['results'][0])
225
-
226
- # Handle Retrieved Documents
227
- if 'documents' in results:
228
- retrieved_documents = results['documents']
229
- st.subheader("Retriever Results:")
230
-
231
- data = []
232
- for i, document in enumerate(retrieved_documents):
233
- # Truncate the content
234
- truncated_content = (document.content[:150] + '...') if len(document.content) > 150 else document.content
235
- data.append([i + 1, document.meta['name'], truncated_content])
236
-
237
- # Convert data to DataFrame and display using Streamlit
238
- df = pd.DataFrame(data, columns=['Ranked Context', 'Document Name', 'Content'])
239
- st.table(df)
240
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  except SystemExit as e:
242
  os._exit(e.code)
 
1
  from utils.check_pydantic_version import use_pydantic_v1
2
  use_pydantic_v1() #This function has to be run before importing haystack. as haystack requires pydantic v1 to run
3
 
4
+
5
  from operator import index
6
  import streamlit as st
7
  import logging
 
18
 
19
  from datetime import datetime
20
 
21
+ import streamlit_authenticator as stauth
22
+ import pickle
23
+
24
+
25
+ names = ['admin']
26
+ usernames = ['admin']
27
+ with open('hashed_password.pkl','rb') as f:
28
+ hashed_passwords = pickle.load(f)
29
 
 
 
 
30
 
31
 
32
  # Whether the file upload should be enabled or not
 
106
  )
107
  st.sidebar.image("ml_logo.png", use_column_width=True)
108
 
109
+ authenticator = stauth.Authenticate(names, usernames, hashed_passwords, "document_search", "random_text", cookie_expiry_days=2)
 
110
 
111
+ name, authentication_status, username = authenticator.login("Login", "main")
 
112
 
113
+ if authentication_status == False:
114
+ st.error("Username/Password is incorrect")
 
 
115
 
116
+ if authentication_status == None:
117
+ st.warning("Please enter youe username and password")
118
 
119
+ if authentication_status:
120
+ # Sidebar for Task Selection
121
+ st.sidebar.header('Options:')
 
 
122
 
123
+ # OpenAI Key Input
124
+ openai_key = st.sidebar.text_input("Enter OpenAI Key:", type="password")
125
 
126
+ if openai_key:
127
+ task_options = ['Extractive', 'Generative']
128
+ else:
129
+ task_options = ['Extractive']
130
 
131
+ task_selection = st.sidebar.radio('Select the task:', task_options)
 
 
 
 
 
 
 
132
 
133
+ # Check the task and initialize pipeline accordingly
134
+ if task_selection == 'Extractive':
135
+ pipeline_extractive = initialize_pipeline("extractive", document_store, retriever, reader)
136
+ elif task_selection == 'Generative' and openai_key: # Check for openai_key to ensure user has entered it
137
+ pipeline_rag = initialize_pipeline("rag", document_store, retriever, reader, openai_key=openai_key)
138
 
 
 
139
 
140
+ set_initial_state()
 
 
 
141
 
142
+ st.write('# ' + args.name)
143
+
144
+
145
+ # File upload block
146
+ if not DISABLE_FILE_UPLOAD:
147
+ upload_container = st.sidebar.container()
148
+ upload_container.write("## File Upload:")
149
+ data_files = upload_files()
150
 
151
+ # Button to update files in the documentStore
152
+ upload_container.button('Upload Files', on_click=upload_document, args=())
 
153
 
154
+ # Button to reset the documents in DocumentStore
155
+ st.sidebar.button("Reset documents", on_click=reset_documents, args=())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
+ if "question" not in st.session_state:
158
+ st.session_state.question = ""
159
+ # Search bar
160
+ question = st.text_input("Question", value=st.session_state.question, max_chars=100, on_change=reset_results, label_visibility="hidden")
161
+
162
+ run_pressed = st.button("Run")
163
+
164
+ run_query = (
165
+ run_pressed or question != st.session_state.question #or task_selection != st.session_state.task
166
+ )
167
+
168
+ # Get results for query
169
+ if run_query and question:
170
+ if task_selection == 'Extractive':
171
+ reset_results()
172
+ st.session_state.question = question
173
+ with st.spinner("πŸ”Ž &nbsp;&nbsp; Running your pipeline"):
174
+ try:
175
+ st.session_state.results_extractive = query(pipeline_extractive, question)
176
+ st.session_state.task = task_selection
177
+ except JSONDecodeError as je:
178
+ st.error(
179
+ "πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
180
+ )
181
+ except Exception as e:
182
  logging.exception(e)
183
  st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
 
 
184
 
185
+ elif task_selection == 'Generative':
186
+ reset_results()
187
+ st.session_state.question = question
188
+ with st.spinner("πŸ”Ž &nbsp;&nbsp; Running your pipeline"):
189
+ try:
190
+ st.session_state.results_generative = query(pipeline_rag, question)
191
+ st.session_state.task = task_selection
192
+ except JSONDecodeError as je:
193
+ st.error(
194
+ "πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  )
196
+ except Exception as e:
197
+ if "API key is invalid" in str(e):
198
+ logging.exception(e)
199
+ st.error("🐞 &nbsp;&nbsp; incorrect API key provided. You can find your API key at https://platform.openai.com/account/api-keys.")
200
+ else:
201
+ logging.exception(e)
202
+ st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
203
+ # Display results
204
+ if (st.session_state.results_extractive or st.session_state.results_generative) and run_query:
205
+
206
+ # Handle Extractive Answers
207
+ if task_selection == 'Extractive':
208
+ results = st.session_state.results_extractive
209
+
210
+ st.subheader("Extracted Answers:")
211
+
212
+ if 'answers' in results:
213
+ answers = results['answers']
214
+ treshold = 0.2
215
+ higher_then_treshold = any(ans.score > treshold for ans in answers)
216
+ if not higher_then_treshold:
217
+ st.markdown(f"<span style='color:red'>Please note none of the answers achieved a score higher then {int(treshold) * 100}%. Which probably means that the desired answer is not in the searched documents.</span>", unsafe_allow_html=True)
218
+ for count, answer in enumerate(answers):
219
+ if answer.answer:
220
+ text, context = answer.answer, answer.context
221
+ start_idx = context.find(text)
222
+ end_idx = start_idx + len(text)
223
+ score = round(answer.score, 3)
224
+ st.markdown(f"**Answer {count + 1}:**")
225
+ st.markdown(
226
+ context[:start_idx] + str(annotation(body=text, label=f'SCORE {score}', background='#964448', color='#ffffff')) + context[end_idx:],
227
+ unsafe_allow_html=True,
228
+ )
229
+ else:
230
+ st.info(
231
+ "πŸ€” &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
232
+ )
233
+
234
+ # Handle Generative Answers
235
+ elif task_selection == 'Generative':
236
+ results = st.session_state.results_generative
237
+ st.subheader("Generated Answer:")
238
+ if 'results' in results:
239
+ st.markdown("**Answer:**")
240
+ st.write(results['results'][0])
241
+
242
+ # Handle Retrieved Documents
243
+ if 'documents' in results:
244
+ retrieved_documents = results['documents']
245
+ st.subheader("Retriever Results:")
246
+
247
+ data = []
248
+ for i, document in enumerate(retrieved_documents):
249
+ # Truncate the content
250
+ truncated_content = (document.content[:150] + '...') if len(document.content) > 150 else document.content
251
+ data.append([i + 1, document.meta['name'], truncated_content])
252
+
253
+ # Convert data to DataFrame and display using Streamlit
254
+ df = pd.DataFrame(data, columns=['Ranked Context', 'Document Name', 'Content'])
255
+ st.table(df)
256
  except SystemExit as e:
257
  os._exit(e.code)
generate_keys.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import pickle
4
+ from pathlib import Path
5
+
6
+ import streamlit_authenticator as stauth
7
+
8
+ names = ['admin']
9
+ usernames = ['admin']
10
+ passwords = ['admin1']
11
+
12
+ hashed_passwords = stauth.Hasher((passwords)).generate()
13
+
14
+ with open('hashed_password.pkl','wb') as f:
15
+ pickle.dump(hashed_passwords, f)
hashed_password.pkl ADDED
Binary file (78 Bytes). View file
 
requirements.txt CHANGED
@@ -2,6 +2,7 @@ safetensors==0.3.3.post1
2
  farm-haystack[inference,weaviate,opensearch,file-conversion,pdf]==1.20.0
3
  milvus-haystack
4
  streamlit==1.23.0
 
5
  markdown
6
  st-annotated-text
7
  datasets
 
2
  farm-haystack[inference,weaviate,opensearch,file-conversion,pdf]==1.20.0
3
  milvus-haystack
4
  streamlit==1.23.0
5
+ streamlit-authenticator==0.1.5
6
  markdown
7
  st-annotated-text
8
  datasets