Spaces:
Runtime error
Runtime error
Ignoring book if missing.
Browse files
utils.py
CHANGED
@@ -183,12 +183,13 @@ def get_links(index_url, paths):
|
|
183 |
|
184 |
def get_document_data(book_file, book_url):
|
185 |
document_list = []
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
|
|
192 |
|
193 |
# print("document list" + str(len(document_list)))
|
194 |
return document_list
|
|
|
183 |
|
184 |
def get_document_data(book_file, book_url):
|
185 |
document_list = []
|
186 |
+
if os.path.isfile(book_file):
|
187 |
+
with open(book_file, 'rb') as f:
|
188 |
+
pdf_reader = PdfReader(f)
|
189 |
+
for i in range(len(pdf_reader.pages)):
|
190 |
+
page_text = pdf_reader.pages[i].extract_text()
|
191 |
+
metadata = {"source": book_url}
|
192 |
+
document_list.append(Document(page_content=page_text, metadata=metadata))
|
193 |
|
194 |
# print("document list" + str(len(document_list)))
|
195 |
return document_list
|