AI-Docwhiz

Sleeping

App Files Files Community

sourabhzanwar commited on Nov 24, 2023

Commit

412a390

•

1 Parent(s): 7d397b0

added authentication

Browse files

Files changed (4) hide show

app.py +138 -123
generate_keys.py +15 -0
hashed_password.pkl +0 -0
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from utils.check_pydantic_version import use_pydantic_v1
 use_pydantic_v1() #This function has to be run before importing haystack. as haystack requires pydantic v1 to run
 from operator import index
 import streamlit as st
 import logging
@@ -17,11 +18,15 @@ import haystack
 from datetime import datetime
-from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor
-pdf_converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en","de"])
-docx_converter = DocxToTextConverter(remove_numeric_tables=False, valid_languages=["en","de"])
-txt_converter = TextConverter(remove_numeric_tables=True, valid_languages=["en","de"])
 # Whether the file upload should be enabled or not
@@ -101,142 +106,152 @@ try:
     )
     st.sidebar.image("ml_logo.png", use_column_width=True)
-    # Sidebar for Task Selection
-    st.sidebar.header('Options:')
-    # OpenAI Key Input
-    openai_key = st.sidebar.text_input("Enter OpenAI Key:", type="password")
-    if openai_key:
-        task_options = ['Extractive', 'Generative']
-    else:
-        task_options = ['Extractive']
-    task_selection = st.sidebar.radio('Select the task:', task_options)
-    # Check the task and initialize pipeline accordingly
-    if task_selection == 'Extractive':
-        pipeline_extractive = initialize_pipeline("extractive", document_store, retriever, reader)
-    elif task_selection == 'Generative' and openai_key:  # Check for openai_key to ensure user has entered it
-        pipeline_rag = initialize_pipeline("rag", document_store, retriever, reader, openai_key=openai_key)
-    set_initial_state()
-    st.write('# ' + args.name)
-    # File upload block
-    if not DISABLE_FILE_UPLOAD:
-        upload_container = st.sidebar.container()
-        upload_container.write("## File Upload:")
-        data_files = upload_files()
-        # Button to update files in the documentStore
-        upload_container.button('Upload Files', on_click=upload_document, args=())
-    # Button to reset the documents in DocumentStore
-    st.sidebar.button("Reset documents", on_click=reset_documents, args=())
-    if "question" not in st.session_state:
-        st.session_state.question = ""
-    # Search bar
-    question = st.text_input("Question", value=st.session_state.question, max_chars=100, on_change=reset_results, label_visibility="hidden")
-    run_pressed = st.button("Run")
-    run_query = (
-        run_pressed or question != st.session_state.question #or task_selection != st.session_state.task
-    )
-    # Get results for query
-    if run_query and question:
-        if task_selection == 'Extractive':
-            reset_results()
-            st.session_state.question = question
-            with st.spinner("🔎 &nbsp;&nbsp; Running your pipeline"):
-                try:
-                    st.session_state.results_extractive = query(pipeline_extractive, question)
-                    st.session_state.task = task_selection
-                except JSONDecodeError as je:
-                    st.error(
-                        "👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
-                    )
-                except Exception as e:
-                    logging.exception(e)
-                    st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
-        elif task_selection == 'Generative':
-            reset_results()
-            st.session_state.question = question
-            with st.spinner("🔎 &nbsp;&nbsp; Running your pipeline"):
-                try:
-                    st.session_state.results_generative = query(pipeline_rag, question)
-                    st.session_state.task = task_selection
-                except JSONDecodeError as je:
-                    st.error(
-                        "👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
-                    )
-                except Exception as e:
-                    if "API key is invalid" in str(e):
-                        logging.exception(e)
-                        st.error("🐞 &nbsp;&nbsp; incorrect API key provided. You can find your API key at https://platform.openai.com/account/api-keys.")
-                    else:
                         logging.exception(e)
                         st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
-    # Display results
-    if (st.session_state.results_extractive or st.session_state.results_generative) and run_query:
-        # Handle Extractive Answers
-        if task_selection == 'Extractive':
-            results = st.session_state.results_extractive
-            st.subheader("Extracted Answers:")
-            if 'answers' in results:
-                answers = results['answers']
-                treshold = 0.2
-                higher_then_treshold = any(ans.score > treshold for ans in answers)
-                if not higher_then_treshold:
-                    st.markdown(f"<span style='color:red'>Please note none of the answers achieved a score higher then {int(treshold) * 100}%. Which probably means that the desired answer is not in the searched documents.</span>", unsafe_allow_html=True)
-                for count, answer in enumerate(answers):
-                    if answer.answer:
-                        text, context = answer.answer, answer.context
-                        start_idx = context.find(text)
-                        end_idx = start_idx + len(text)
-                        score = round(answer.score, 3)
-                        st.markdown(f"**Answer {count + 1}:**")
-                        st.markdown(
-                            context[:start_idx] + str(annotation(body=text, label=f'SCORE {score}', background='#964448', color='#ffffff')) + context[end_idx:],
-                            unsafe_allow_html=True,
-                        )
-                    else:
-                        st.info(
-                            "🤔 &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
                         )
-        # Handle Generative Answers
-        elif task_selection == 'Generative':
-            results = st.session_state.results_generative
-            st.subheader("Generated Answer:")
-            if 'results' in results:
-                st.markdown("**Answer:**")
-                st.write(results['results'][0])
-        # Handle Retrieved Documents
-        if 'documents' in results:
-            retrieved_documents = results['documents']
-            st.subheader("Retriever Results:")
-            data = []
-            for i, document in enumerate(retrieved_documents):
-                # Truncate the content
-                truncated_content = (document.content[:150] + '...') if len(document.content) > 150 else document.content
-                data.append([i + 1, document.meta['name'], truncated_content])
-            # Convert data to DataFrame and display using Streamlit
-            df = pd.DataFrame(data, columns=['Ranked Context', 'Document Name', 'Content'])
-            st.table(df)
 except SystemExit as e:
     os._exit(e.code)

 from utils.check_pydantic_version import use_pydantic_v1
 use_pydantic_v1() #This function has to be run before importing haystack. as haystack requires pydantic v1 to run
 from operator import index
 import streamlit as st
 import logging
 from datetime import datetime
+import streamlit_authenticator as stauth
+import pickle
+names = ['admin']
+usernames = ['admin']
+with open('hashed_password.pkl','rb') as f:
+    hashed_passwords = pickle.load(f)
 # Whether the file upload should be enabled or not
     )
     st.sidebar.image("ml_logo.png", use_column_width=True)
+    authenticator = stauth.Authenticate(names, usernames, hashed_passwords, "document_search", "random_text", cookie_expiry_days=2)
+    name, authentication_status, username = authenticator.login("Login", "main")
+    if authentication_status == False:
+        st.error("Username/Password is incorrect")
+    if authentication_status == None:
+        st.warning("Please enter youe username and password")
+    if authentication_status:
+        # Sidebar for Task Selection
+        st.sidebar.header('Options:')
+        # OpenAI Key Input
+        openai_key = st.sidebar.text_input("Enter OpenAI Key:", type="password")
+        if openai_key:
+            task_options = ['Extractive', 'Generative']
+        else:
+            task_options = ['Extractive']
+        task_selection = st.sidebar.radio('Select the task:', task_options)
+        # Check the task and initialize pipeline accordingly
+        if task_selection == 'Extractive':
+            pipeline_extractive = initialize_pipeline("extractive", document_store, retriever, reader)
+        elif task_selection == 'Generative' and openai_key:  # Check for openai_key to ensure user has entered it
+            pipeline_rag = initialize_pipeline("rag", document_store, retriever, reader, openai_key=openai_key)
+        set_initial_state()
+        st.write('# ' + args.name)
+        # File upload block
+        if not DISABLE_FILE_UPLOAD:
+            upload_container = st.sidebar.container()
+            upload_container.write("## File Upload:")
+            data_files = upload_files()
+            # Button to update files in the documentStore
+            upload_container.button('Upload Files', on_click=upload_document, args=())
+        # Button to reset the documents in DocumentStore
+        st.sidebar.button("Reset documents", on_click=reset_documents, args=())
+        if "question" not in st.session_state:
+            st.session_state.question = ""
+        # Search bar
+        question = st.text_input("Question", value=st.session_state.question, max_chars=100, on_change=reset_results, label_visibility="hidden")
+        run_pressed = st.button("Run")
+        run_query = (
+            run_pressed or question != st.session_state.question #or task_selection != st.session_state.task
+        )
+        # Get results for query
+        if run_query and question:
+            if task_selection == 'Extractive':
+                reset_results()
+                st.session_state.question = question
+                with st.spinner("🔎 &nbsp;&nbsp; Running your pipeline"):
+                    try:
+                        st.session_state.results_extractive = query(pipeline_extractive, question)
+                        st.session_state.task = task_selection
+                    except JSONDecodeError as je:
+                        st.error(
+                            "👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
+                        )
+                    except Exception as e:
                         logging.exception(e)
                         st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
+            elif task_selection == 'Generative':
+                reset_results()
+                st.session_state.question = question
+                with st.spinner("🔎 &nbsp;&nbsp; Running your pipeline"):
+                    try:
+                        st.session_state.results_generative = query(pipeline_rag, question)
+                        st.session_state.task = task_selection
+                    except JSONDecodeError as je:
+                        st.error(
+                            "👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
                         )
+                    except Exception as e:
+                        if "API key is invalid" in str(e):
+                            logging.exception(e)
+                            st.error("🐞 &nbsp;&nbsp; incorrect API key provided. You can find your API key at https://platform.openai.com/account/api-keys.")
+                        else:
+                            logging.exception(e)
+                            st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
+        # Display results
+        if (st.session_state.results_extractive or st.session_state.results_generative) and run_query:
+            # Handle Extractive Answers
+            if task_selection == 'Extractive':
+                results = st.session_state.results_extractive
+                st.subheader("Extracted Answers:")
+                if 'answers' in results:
+                    answers = results['answers']
+                    treshold = 0.2
+                    higher_then_treshold = any(ans.score > treshold for ans in answers)
+                    if not higher_then_treshold:
+                        st.markdown(f"<span style='color:red'>Please note none of the answers achieved a score higher then {int(treshold) * 100}%. Which probably means that the desired answer is not in the searched documents.</span>", unsafe_allow_html=True)
+                    for count, answer in enumerate(answers):
+                        if answer.answer:
+                            text, context = answer.answer, answer.context
+                            start_idx = context.find(text)
+                            end_idx = start_idx + len(text)
+                            score = round(answer.score, 3)
+                            st.markdown(f"**Answer {count + 1}:**")
+                            st.markdown(
+                                context[:start_idx] + str(annotation(body=text, label=f'SCORE {score}', background='#964448', color='#ffffff')) + context[end_idx:],
+                                unsafe_allow_html=True,
+                            )
+                        else:
+                            st.info(
+                                "🤔 &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
+                            )
+            # Handle Generative Answers
+            elif task_selection == 'Generative':
+                results = st.session_state.results_generative
+                st.subheader("Generated Answer:")
+                if 'results' in results:
+                    st.markdown("**Answer:**")
+                    st.write(results['results'][0])
+            # Handle Retrieved Documents
+            if 'documents' in results:
+                retrieved_documents = results['documents']
+                st.subheader("Retriever Results:")
+                data = []
+                for i, document in enumerate(retrieved_documents):
+                    # Truncate the content
+                    truncated_content = (document.content[:150] + '...') if len(document.content) > 150 else document.content
+                    data.append([i + 1, document.meta['name'], truncated_content])
+                # Convert data to DataFrame and display using Streamlit
+                df = pd.DataFrame(data, columns=['Ranked Context', 'Document Name', 'Content'])
+                st.table(df)
 except SystemExit as e:
     os._exit(e.code)

generate_keys.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# -*- coding: utf-8 -*-
+import pickle
+from pathlib import Path
+import streamlit_authenticator as stauth
+names = ['admin']
+usernames = ['admin']
+passwords = ['admin1']
+hashed_passwords = stauth.Hasher((passwords)).generate()
+with open('hashed_password.pkl','wb') as f:
+    pickle.dump(hashed_passwords, f)

hashed_password.pkl ADDED Viewed

Binary file (78 Bytes). View file

requirements.txt CHANGED Viewed

@@ -2,6 +2,7 @@ safetensors==0.3.3.post1
 farm-haystack[inference,weaviate,opensearch,file-conversion,pdf]==1.20.0
 milvus-haystack
 streamlit==1.23.0
 markdown
 st-annotated-text
 datasets

 farm-haystack[inference,weaviate,opensearch,file-conversion,pdf]==1.20.0
 milvus-haystack
 streamlit==1.23.0
+streamlit-authenticator==0.1.5
 markdown
 st-annotated-text
 datasets