Spaces:
Build error
Build error
revert to only pdfs
Browse files
app.py
CHANGED
@@ -32,21 +32,9 @@ def pdf_to_document_store(pdf_file):
|
|
32 |
preprocessed_docs=preprocessor.process(doc)
|
33 |
document_store.write_documents(preprocessed_docs)
|
34 |
temp_file.close()
|
35 |
-
|
36 |
-
def crawl_url(url):
|
37 |
-
crawler = Crawler(output_dir="crawled_files", overwrite_existing_files=True, crawler_depth=1)
|
38 |
-
try:
|
39 |
-
docs = crawler.crawl(urls=[url])
|
40 |
-
preprocessed_docs = preprocessor.process(docs)
|
41 |
-
document_store.write_documents(preprocessed_docs)
|
42 |
-
except:
|
43 |
-
st.write('We were unable to crawl the contents of that URL, please try something else')
|
44 |
|
45 |
def summarize(content):
|
46 |
-
|
47 |
-
pdf_to_document_store(content)
|
48 |
-
elif st.session_state.url:
|
49 |
-
crawl_url(content)
|
50 |
summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
|
51 |
return summaries
|
52 |
|
@@ -55,8 +43,6 @@ def set_state_if_absent(key, value):
|
|
55 |
st.session_state[key] = value
|
56 |
|
57 |
set_state_if_absent("summaries", None)
|
58 |
-
set_state_if_absent("url", False)
|
59 |
-
set_state_if_absent("pdf", False)
|
60 |
|
61 |
document_store, summarizer, preprocessor = start_haystack()
|
62 |
|
@@ -69,24 +55,11 @@ This Summarization demo uses a [Haystack TransformerSummarizer node](https://hay
|
|
69 |
""", unsafe_allow_html=True)
|
70 |
|
71 |
uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False)
|
72 |
-
url = st.text_input(label="enter a URL")
|
73 |
-
|
74 |
-
if (validators.url(url)) and (uploaded_file is None):
|
75 |
-
if st.button('Summarize contents of URL'):
|
76 |
-
with st.spinner("π Please wait while we produce a summary..."):
|
77 |
-
try:
|
78 |
-
st.session_state.pdf = False
|
79 |
-
st.session_state.url = True
|
80 |
-
st. session_state.summaries = summarize(url)
|
81 |
-
except Exception as e:
|
82 |
-
logging.exception(e)
|
83 |
|
84 |
-
if
|
85 |
if st.button('Summarize Document'):
|
86 |
with st.spinner("π Please wait while we produce a summary..."):
|
87 |
try:
|
88 |
-
st.session_state.pdf = True
|
89 |
-
st.session_state.url = False
|
90 |
st.session_state.summaries = summarize(uploaded_file)
|
91 |
except Exception as e:
|
92 |
logging.exception(e)
|
|
|
32 |
preprocessed_docs=preprocessor.process(doc)
|
33 |
document_store.write_documents(preprocessed_docs)
|
34 |
temp_file.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
def summarize(content):
|
37 |
+
pdf_to_document_store(content)
|
|
|
|
|
|
|
38 |
summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
|
39 |
return summaries
|
40 |
|
|
|
43 |
st.session_state[key] = value
|
44 |
|
45 |
set_state_if_absent("summaries", None)
|
|
|
|
|
46 |
|
47 |
document_store, summarizer, preprocessor = start_haystack()
|
48 |
|
|
|
55 |
""", unsafe_allow_html=True)
|
56 |
|
57 |
uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
if uploaded_file is not None :
|
60 |
if st.button('Summarize Document'):
|
61 |
with st.spinner("π Please wait while we produce a summary..."):
|
62 |
try:
|
|
|
|
|
63 |
st.session_state.summaries = summarize(uploaded_file)
|
64 |
except Exception as e:
|
65 |
logging.exception(e)
|