AI-Docwhiz

Sleeping

App Files Files Community

sourabhzanwar commited on Nov 23, 2023

Commit

203aa9d

•

1 Parent(s): 73b392d

Added reset documents functionality.

Browse files

Files changed (3) hide show

.streamlit/config.toml +1 -1
app.py +32 -19
utils/haystack.py +8 -1

.streamlit/config.toml CHANGED Viewed

@@ -3,4 +3,4 @@ primaryColor = "#E694FF"
 backgroundColor = "#FFFFFF"
 secondaryBackgroundColor = "#F0F0F0"
 textColor = "#262730"
-font = "sans-serif"

 backgroundColor = "#FFFFFF"
 secondaryBackgroundColor = "#F0F0F0"
 textColor = "#262730"
+font = "sans serif"

app.py CHANGED Viewed

@@ -46,8 +46,8 @@ import haystack
 DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
 # Define a function to handle file uploads
 def upload_files():
-    uploaded_files = st.sidebar.file_uploader(
-            "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
         )
     return uploaded_files
@@ -77,6 +77,26 @@ def process_file(data_file, preprocesor, document_store):
     except Exception as e:
         print(e)
 try:
     args = parser.parse_args()
     preprocesor = start_preprocessor_node()
@@ -94,6 +114,7 @@ try:
         }
     )
     st.sidebar.image("ml_logo.png", use_column_width=True)
     # Sidebar for Task Selection
     st.sidebar.header('Options:')
@@ -118,28 +139,20 @@ try:
     set_initial_state()
     st.write('# ' + args.name)
     # File upload block
     if not DISABLE_FILE_UPLOAD:
-        st.sidebar.write("## File Upload:")
         #data_files = st.sidebar.file_uploader(
         #    "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
         #)
         data_files = upload_files()
-        if data_files is not None:
-            for data_file in data_files:
-                # Upload file
-                if data_file:
-                    try:
-                        #raw_json = upload_doc(data_file)
-                        # Call the process_file function for each uploaded file
-                        if args.store == 'inmemory':
-                            processed_data = process_file(data_file, preprocesor, document_store)
-                        st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ✅ ")
-                    except Exception as e:
-                        st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ❌ ")
-                        st.sidebar.write("_This file could not be parsed, see the logs for more information._")
     if "question" not in st.session_state:
         st.session_state.question = ""
@@ -242,4 +255,4 @@ try:
             st.table(df)
 except SystemExit as e:
-    os._exit(e.code)

 DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
 # Define a function to handle file uploads
 def upload_files():
+    uploaded_files = upload_container.file_uploader(
+            "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="collapsed"
         )
     return uploaded_files
     except Exception as e:
         print(e)
+def reset_documents():
+    print('Reseting documents list')
+    document_store.delete_documents()
+def upload_document():
+    upload_status = 0
+    if data_files is not None:
+        for data_file in data_files:
+            # Upload file
+            if data_file:
+                try:
+                    #raw_json = upload_doc(data_file)
+                    # Call the process_file function for each uploaded file
+                    if args.store == 'inmemory':
+                        processed_data = process_file(data_file, preprocesor, document_store)
+                    upload_container.write(str(data_file.name) + " &nbsp;&nbsp; ✅ ")
+                except Exception as e:
+                    upload_container.write(str(data_file.name) + " &nbsp;&nbsp; ❌ ")
+                    upload_container.write("_This file could not be parsed, see the logs for more information._")
 try:
     args = parser.parse_args()
     preprocesor = start_preprocessor_node()
         }
     )
     st.sidebar.image("ml_logo.png", use_column_width=True)
     # Sidebar for Task Selection
     st.sidebar.header('Options:')
     set_initial_state()
     st.write('# ' + args.name)
     # File upload block
     if not DISABLE_FILE_UPLOAD:
+        upload_container = st.sidebar.container()
+        upload_container.write("## File Upload:")
         #data_files = st.sidebar.file_uploader(
         #    "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
         #)
         data_files = upload_files()
+        upload_container.button('Upload Files', on_click=upload_document, args=())
+    st.sidebar.button("Reset documents", on_click=reset_documents, args=())
     if "question" not in st.session_state:
         st.session_state.question = ""
             st.table(df)
 except SystemExit as e:
+    os._exit(e.code)

utils/haystack.py CHANGED Viewed

@@ -5,10 +5,15 @@ from haystack import Pipeline
 from haystack.schema import Answer
 from haystack.document_stores import BaseDocumentStore
 from haystack.document_stores import InMemoryDocumentStore, OpenSearchDocumentStore, WeaviateDocumentStore
-from haystack.nodes import EmbeddingRetriever, FARMReader, PromptNode, PreProcessor
 from milvus_haystack import MilvusDocumentStore
 #Use this file to set up your Haystack pipeline and querying
 @st.cache_resource(show_spinner=False)
 def start_preprocessor_node():
     print('initializing preprocessor node')
@@ -118,3 +123,5 @@ def initialize_pipeline(task, document_store, retriever, reader, openai_key = ""
         return start_haystack_extractive(document_store, retriever, reader)
     elif task == 'rag':
         return start_haystack_rag(document_store, retriever, openai_key)

 from haystack.schema import Answer
 from haystack.document_stores import BaseDocumentStore
 from haystack.document_stores import InMemoryDocumentStore, OpenSearchDocumentStore, WeaviateDocumentStore
+from haystack.nodes import EmbeddingRetriever, FARMReader, PromptNode, PreProcessor, TextConverter, FileTypeClassifier, PDFToTextConverter
 from milvus_haystack import MilvusDocumentStore
 #Use this file to set up your Haystack pipeline and querying
+file_type_classifier = FileTypeClassifier()
+text_converter = TextConverter()
+pdf_converter = PDFToTextConverter()
 @st.cache_resource(show_spinner=False)
 def start_preprocessor_node():
     print('initializing preprocessor node')
         return start_haystack_extractive(document_store, retriever, reader)
     elif task == 'rag':
         return start_haystack_rag(document_store, retriever, openai_key)