Spaces:
Runtime error
Runtime error
Update modules/index_func.py
Browse files- modules/index_func.py +5 -2
modules/index_func.py
CHANGED
@@ -36,9 +36,12 @@ def get_documents(file_src):
|
|
36 |
except:
|
37 |
pdftext = ""
|
38 |
with open(filepath, "rb") as pdfFileObj:
|
39 |
-
pdfReader = PyPDF2.PdfReader(pdfFileObj)
|
40 |
for page in tqdm(pdfReader.pages):
|
41 |
-
|
|
|
|
|
|
|
42 |
texts = [Document(page_content=pdftext,
|
43 |
metadata={"source": filepath})]
|
44 |
elif file_type == ".docx":
|
|
|
36 |
except:
|
37 |
pdftext = ""
|
38 |
with open(filepath, "rb") as pdfFileObj:
|
39 |
+
pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
|
40 |
for page in tqdm(pdfReader.pages):
|
41 |
+
pdftmp = page.extract_text() #每頁的文字加起來
|
42 |
+
logging.info(f"pdftmp:{pdftmp}")
|
43 |
+
pdftext += pdftmp
|
44 |
+
|
45 |
texts = [Document(page_content=pdftext,
|
46 |
metadata={"source": filepath})]
|
47 |
elif file_type == ".docx":
|