Spaces:
Build error
Build error
cache haystack setup
Browse files
app.py
CHANGED
@@ -4,17 +4,20 @@ from haystack.nodes import TransformersSummarizer, PreProcessor, PDFToTextConver
|
|
4 |
from haystack.schema import Document
|
5 |
import logging
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
18 |
|
19 |
|
20 |
def pdf_to_document_store(pdf_files):
|
@@ -33,6 +36,7 @@ def summarize(files):
|
|
33 |
summary = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
|
34 |
st.write(summary)
|
35 |
|
|
|
36 |
|
37 |
uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True)
|
38 |
|
|
|
4 |
from haystack.schema import Document
|
5 |
import logging
|
6 |
|
7 |
+
@st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},allow_output_mutation=True)
|
8 |
+
def start_haystack():
|
9 |
+
document_store = InMemoryDocumentStore()
|
10 |
+
preprocessor = PreProcessor(
|
11 |
+
clean_empty_lines=True,
|
12 |
+
clean_whitespace=True,
|
13 |
+
clean_header_footer=True,
|
14 |
+
split_by="word",
|
15 |
+
split_length=100,
|
16 |
+
split_respect_sentence_boundary=True,
|
17 |
+
split_overlap=3
|
18 |
+
)
|
19 |
+
summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
|
20 |
+
return document_store, summarizer, preprocessor
|
21 |
|
22 |
|
23 |
def pdf_to_document_store(pdf_files):
|
|
|
36 |
summary = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
|
37 |
st.write(summary)
|
38 |
|
39 |
+
document_store, summarizer, preprocessor = start_haystack()
|
40 |
|
41 |
uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True)
|
42 |
|