Spaces:
Sleeping
Sleeping
sourabhzanwar
commited on
Commit
•
203aa9d
1
Parent(s):
73b392d
Added reset documents functionality.
Browse files- .streamlit/config.toml +1 -1
- app.py +32 -19
- utils/haystack.py +8 -1
.streamlit/config.toml
CHANGED
@@ -3,4 +3,4 @@ primaryColor = "#E694FF"
|
|
3 |
backgroundColor = "#FFFFFF"
|
4 |
secondaryBackgroundColor = "#F0F0F0"
|
5 |
textColor = "#262730"
|
6 |
-
font = "sans
|
|
|
3 |
backgroundColor = "#FFFFFF"
|
4 |
secondaryBackgroundColor = "#F0F0F0"
|
5 |
textColor = "#262730"
|
6 |
+
font = "sans serif"
|
app.py
CHANGED
@@ -46,8 +46,8 @@ import haystack
|
|
46 |
DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
|
47 |
# Define a function to handle file uploads
|
48 |
def upload_files():
|
49 |
-
uploaded_files =
|
50 |
-
"upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="
|
51 |
)
|
52 |
return uploaded_files
|
53 |
|
@@ -77,6 +77,26 @@ def process_file(data_file, preprocesor, document_store):
|
|
77 |
except Exception as e:
|
78 |
print(e)
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
try:
|
81 |
args = parser.parse_args()
|
82 |
preprocesor = start_preprocessor_node()
|
@@ -94,6 +114,7 @@ try:
|
|
94 |
}
|
95 |
)
|
96 |
st.sidebar.image("ml_logo.png", use_column_width=True)
|
|
|
97 |
|
98 |
# Sidebar for Task Selection
|
99 |
st.sidebar.header('Options:')
|
@@ -118,28 +139,20 @@ try:
|
|
118 |
set_initial_state()
|
119 |
|
120 |
st.write('# ' + args.name)
|
121 |
-
|
122 |
-
|
123 |
# File upload block
|
124 |
if not DISABLE_FILE_UPLOAD:
|
125 |
-
st.sidebar.
|
|
|
126 |
#data_files = st.sidebar.file_uploader(
|
127 |
# "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
|
128 |
#)
|
129 |
data_files = upload_files()
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
try:
|
135 |
-
#raw_json = upload_doc(data_file)
|
136 |
-
# Call the process_file function for each uploaded file
|
137 |
-
if args.store == 'inmemory':
|
138 |
-
processed_data = process_file(data_file, preprocesor, document_store)
|
139 |
-
st.sidebar.write(str(data_file.name) + " ✅ ")
|
140 |
-
except Exception as e:
|
141 |
-
st.sidebar.write(str(data_file.name) + " ❌ ")
|
142 |
-
st.sidebar.write("_This file could not be parsed, see the logs for more information._")
|
143 |
|
144 |
if "question" not in st.session_state:
|
145 |
st.session_state.question = ""
|
@@ -242,4 +255,4 @@ try:
|
|
242 |
st.table(df)
|
243 |
|
244 |
except SystemExit as e:
|
245 |
-
os._exit(e.code)
|
|
|
46 |
DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
|
47 |
# Define a function to handle file uploads
|
48 |
def upload_files():
|
49 |
+
uploaded_files = upload_container.file_uploader(
|
50 |
+
"upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="collapsed"
|
51 |
)
|
52 |
return uploaded_files
|
53 |
|
|
|
77 |
except Exception as e:
|
78 |
print(e)
|
79 |
|
80 |
+
def reset_documents():
|
81 |
+
print('Reseting documents list')
|
82 |
+
document_store.delete_documents()
|
83 |
+
|
84 |
+
def upload_document():
|
85 |
+
upload_status = 0
|
86 |
+
if data_files is not None:
|
87 |
+
for data_file in data_files:
|
88 |
+
# Upload file
|
89 |
+
if data_file:
|
90 |
+
try:
|
91 |
+
#raw_json = upload_doc(data_file)
|
92 |
+
# Call the process_file function for each uploaded file
|
93 |
+
if args.store == 'inmemory':
|
94 |
+
processed_data = process_file(data_file, preprocesor, document_store)
|
95 |
+
upload_container.write(str(data_file.name) + " ✅ ")
|
96 |
+
except Exception as e:
|
97 |
+
upload_container.write(str(data_file.name) + " ❌ ")
|
98 |
+
upload_container.write("_This file could not be parsed, see the logs for more information._")
|
99 |
+
|
100 |
try:
|
101 |
args = parser.parse_args()
|
102 |
preprocesor = start_preprocessor_node()
|
|
|
114 |
}
|
115 |
)
|
116 |
st.sidebar.image("ml_logo.png", use_column_width=True)
|
117 |
+
|
118 |
|
119 |
# Sidebar for Task Selection
|
120 |
st.sidebar.header('Options:')
|
|
|
139 |
set_initial_state()
|
140 |
|
141 |
st.write('# ' + args.name)
|
142 |
+
|
143 |
+
|
144 |
# File upload block
|
145 |
if not DISABLE_FILE_UPLOAD:
|
146 |
+
upload_container = st.sidebar.container()
|
147 |
+
upload_container.write("## File Upload:")
|
148 |
#data_files = st.sidebar.file_uploader(
|
149 |
# "upload", type=["pdf", "txt", "docx"], accept_multiple_files=True, label_visibility="hidden"
|
150 |
#)
|
151 |
data_files = upload_files()
|
152 |
+
|
153 |
+
upload_container.button('Upload Files', on_click=upload_document, args=())
|
154 |
+
|
155 |
+
st.sidebar.button("Reset documents", on_click=reset_documents, args=())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
if "question" not in st.session_state:
|
158 |
st.session_state.question = ""
|
|
|
255 |
st.table(df)
|
256 |
|
257 |
except SystemExit as e:
|
258 |
+
os._exit(e.code)
|
utils/haystack.py
CHANGED
@@ -5,10 +5,15 @@ from haystack import Pipeline
|
|
5 |
from haystack.schema import Answer
|
6 |
from haystack.document_stores import BaseDocumentStore
|
7 |
from haystack.document_stores import InMemoryDocumentStore, OpenSearchDocumentStore, WeaviateDocumentStore
|
8 |
-
from haystack.nodes import EmbeddingRetriever, FARMReader, PromptNode, PreProcessor
|
9 |
from milvus_haystack import MilvusDocumentStore
|
10 |
#Use this file to set up your Haystack pipeline and querying
|
11 |
|
|
|
|
|
|
|
|
|
|
|
12 |
@st.cache_resource(show_spinner=False)
|
13 |
def start_preprocessor_node():
|
14 |
print('initializing preprocessor node')
|
@@ -118,3 +123,5 @@ def initialize_pipeline(task, document_store, retriever, reader, openai_key = ""
|
|
118 |
return start_haystack_extractive(document_store, retriever, reader)
|
119 |
elif task == 'rag':
|
120 |
return start_haystack_rag(document_store, retriever, openai_key)
|
|
|
|
|
|
5 |
from haystack.schema import Answer
|
6 |
from haystack.document_stores import BaseDocumentStore
|
7 |
from haystack.document_stores import InMemoryDocumentStore, OpenSearchDocumentStore, WeaviateDocumentStore
|
8 |
+
from haystack.nodes import EmbeddingRetriever, FARMReader, PromptNode, PreProcessor, TextConverter, FileTypeClassifier, PDFToTextConverter
|
9 |
from milvus_haystack import MilvusDocumentStore
|
10 |
#Use this file to set up your Haystack pipeline and querying
|
11 |
|
12 |
+
file_type_classifier = FileTypeClassifier()
|
13 |
+
|
14 |
+
text_converter = TextConverter()
|
15 |
+
pdf_converter = PDFToTextConverter()
|
16 |
+
|
17 |
@st.cache_resource(show_spinner=False)
|
18 |
def start_preprocessor_node():
|
19 |
print('initializing preprocessor node')
|
|
|
123 |
return start_haystack_extractive(document_store, retriever, reader)
|
124 |
elif task == 'rag':
|
125 |
return start_haystack_rag(document_store, retriever, openai_key)
|
126 |
+
|
127 |
+
|