Spaces:
Build error
Build error
Document from dict
Browse files
app.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import streamlit as st
|
2 |
from haystack.document_stores import InMemoryDocumentStore
|
3 |
from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever
|
|
|
4 |
import logging
|
5 |
|
6 |
document_store = InMemoryDocumentStore()
|
@@ -18,7 +19,7 @@ preprocessor = PreProcessor(
|
|
18 |
def pdf_to_document_store(pdf_files):
|
19 |
document_store.delete_documents()
|
20 |
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
|
21 |
-
documents = [converter.convert(file_path=pdf.name, meta=None) for pdf in pdf_files]
|
22 |
preprocessed_docs = preprocessor.process(documents)
|
23 |
document_store.write_documents(preprocessed_docs)
|
24 |
return None
|
|
|
1 |
import streamlit as st
|
2 |
from haystack.document_stores import InMemoryDocumentStore
|
3 |
from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever
|
4 |
+
from haystack.schema import Document
|
5 |
import logging
|
6 |
|
7 |
document_store = InMemoryDocumentStore()
|
|
|
19 |
def pdf_to_document_store(pdf_files):
|
20 |
document_store.delete_documents()
|
21 |
converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
|
22 |
+
documents = [Document.from_dict(converter.convert(file_path=pdf.name, meta=None) for pdf in pdf_files)]
|
23 |
preprocessed_docs = preprocessor.process(documents)
|
24 |
document_store.write_documents(preprocessed_docs)
|
25 |
return None
|