sourabhzanwar commited on
Commit
203aa9d
1 Parent(s): 73b392d

Added reset documents functionality.

Browse files
Files changed (3) hide show
  1. .streamlit/config.toml +1 -1
  2. app.py +32 -19
  3. utils/haystack.py +8 -1
.streamlit/config.toml CHANGED
@@ -3,4 +3,4 @@ primaryColor = "#E694FF"
3
  backgroundColor = "#FFFFFF"
4
  secondaryBackgroundColor = "#F0F0F0"
5
  textColor = "#262730"
6
- font = "sans-serif"
 
3
  backgroundColor = "#FFFFFF"
4
  secondaryBackgroundColor = "#F0F0F0"
5
  textColor = "#262730"
6
+ font = "sans serif"
app.py CHANGED
@@ -46,8 +46,8 @@ import haystack
46
  DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
47
  # Define a function to handle file uploads
48
  def upload_files():
49
- uploaded_files = st.sidebar.file_uploader(
50
- "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
51
  )
52
  return uploaded_files
53
 
@@ -77,6 +77,26 @@ def process_file(data_file, preprocesor, document_store):
77
  except Exception as e:
78
  print(e)
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  try:
81
  args = parser.parse_args()
82
  preprocesor = start_preprocessor_node()
@@ -94,6 +114,7 @@ try:
94
  }
95
  )
96
  st.sidebar.image("ml_logo.png", use_column_width=True)
 
97
 
98
  # Sidebar for Task Selection
99
  st.sidebar.header('Options:')
@@ -118,28 +139,20 @@ try:
118
  set_initial_state()
119
 
120
  st.write('# ' + args.name)
121
-
122
-
123
  # File upload block
124
  if not DISABLE_FILE_UPLOAD:
125
- st.sidebar.write("## File Upload:")
 
126
  #data_files = st.sidebar.file_uploader(
127
  # "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
128
  #)
129
  data_files = upload_files()
130
- if data_files is not None:
131
- for data_file in data_files:
132
- # Upload file
133
- if data_file:
134
- try:
135
- #raw_json = upload_doc(data_file)
136
- # Call the process_file function for each uploaded file
137
- if args.store == 'inmemory':
138
- processed_data = process_file(data_file, preprocesor, document_store)
139
- st.sidebar.write(str(data_file.name) + "    ✅ ")
140
- except Exception as e:
141
- st.sidebar.write(str(data_file.name) + "    ❌ ")
142
- st.sidebar.write("_This file could not be parsed, see the logs for more information._")
143
 
144
  if "question" not in st.session_state:
145
  st.session_state.question = ""
@@ -242,4 +255,4 @@ try:
242
  st.table(df)
243
 
244
  except SystemExit as e:
245
- os._exit(e.code)
 
46
  DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
47
  # Define a function to handle file uploads
48
  def upload_files():
49
+ uploaded_files = upload_container.file_uploader(
50
+ "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="collapsed"
51
  )
52
  return uploaded_files
53
 
 
77
  except Exception as e:
78
  print(e)
79
 
80
+ def reset_documents():
81
+ print('Reseting documents list')
82
+ document_store.delete_documents()
83
+
84
+ def upload_document():
85
+ upload_status = 0
86
+ if data_files is not None:
87
+ for data_file in data_files:
88
+ # Upload file
89
+ if data_file:
90
+ try:
91
+ #raw_json = upload_doc(data_file)
92
+ # Call the process_file function for each uploaded file
93
+ if args.store == 'inmemory':
94
+ processed_data = process_file(data_file, preprocesor, document_store)
95
+ upload_container.write(str(data_file.name) + "    ✅ ")
96
+ except Exception as e:
97
+ upload_container.write(str(data_file.name) + "    ❌ ")
98
+ upload_container.write("_This file could not be parsed, see the logs for more information._")
99
+
100
  try:
101
  args = parser.parse_args()
102
  preprocesor = start_preprocessor_node()
 
114
  }
115
  )
116
  st.sidebar.image("ml_logo.png", use_column_width=True)
117
+
118
 
119
  # Sidebar for Task Selection
120
  st.sidebar.header('Options:')
 
139
  set_initial_state()
140
 
141
  st.write('# ' + args.name)
142
+
143
+
144
  # File upload block
145
  if not DISABLE_FILE_UPLOAD:
146
+ upload_container = st.sidebar.container()
147
+ upload_container.write("## File Upload:")
148
  #data_files = st.sidebar.file_uploader(
149
  # "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
150
  #)
151
  data_files = upload_files()
152
+
153
+ upload_container.button('Upload Files', on_click=upload_document, args=())
154
+
155
+ st.sidebar.button("Reset documents", on_click=reset_documents, args=())
 
 
 
 
 
 
 
 
 
156
 
157
  if "question" not in st.session_state:
158
  st.session_state.question = ""
 
255
  st.table(df)
256
 
257
  except SystemExit as e:
258
+ os._exit(e.code)
utils/haystack.py CHANGED
@@ -5,10 +5,15 @@ from haystack import Pipeline
5
  from haystack.schema import Answer
6
  from haystack.document_stores import BaseDocumentStore
7
  from haystack.document_stores import InMemoryDocumentStore, OpenSearchDocumentStore, WeaviateDocumentStore
8
- from haystack.nodes import EmbeddingRetriever, FARMReader, PromptNode, PreProcessor
9
  from milvus_haystack import MilvusDocumentStore
10
  #Use this file to set up your Haystack pipeline and querying
11
 
 
 
 
 
 
12
  @st.cache_resource(show_spinner=False)
13
  def start_preprocessor_node():
14
  print('initializing preprocessor node')
@@ -118,3 +123,5 @@ def initialize_pipeline(task, document_store, retriever, reader, openai_key = ""
118
  return start_haystack_extractive(document_store, retriever, reader)
119
  elif task == 'rag':
120
  return start_haystack_rag(document_store, retriever, openai_key)
 
 
 
5
  from haystack.schema import Answer
6
  from haystack.document_stores import BaseDocumentStore
7
  from haystack.document_stores import InMemoryDocumentStore, OpenSearchDocumentStore, WeaviateDocumentStore
8
+ from haystack.nodes import EmbeddingRetriever, FARMReader, PromptNode, PreProcessor, TextConverter, FileTypeClassifier, PDFToTextConverter
9
  from milvus_haystack import MilvusDocumentStore
10
  #Use this file to set up your Haystack pipeline and querying
11
 
12
+ file_type_classifier = FileTypeClassifier()
13
+
14
+ text_converter = TextConverter()
15
+ pdf_converter = PDFToTextConverter()
16
+
17
  @st.cache_resource(show_spinner=False)
18
  def start_preprocessor_node():
19
  print('initializing preprocessor node')
 
123
  return start_haystack_extractive(document_store, retriever, reader)
124
  elif task == 'rag':
125
  return start_haystack_rag(document_store, retriever, openai_key)
126
+
127
+