Spaces:
Runtime error
Runtime error
Update modules/index_func.py
Browse files- modules/index_func.py +3 -2
modules/index_func.py
CHANGED
@@ -38,8 +38,9 @@ def get_documents(file_src):
|
|
38 |
with open(filepath, "rb") as pdfFileObj:
|
39 |
pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
|
40 |
for page in tqdm(pdfReader.pages):
|
41 |
-
|
42 |
-
logging.info(f"
|
|
|
43 |
texts = [Document(page_content=pdftext,
|
44 |
metadata={"source": filepath})]
|
45 |
elif file_type == ".docx":
|
|
|
38 |
with open(filepath, "rb") as pdfFileObj:
|
39 |
pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
|
40 |
for page in tqdm(pdfReader.pages):
|
41 |
+
pdftmp = page.extract_text() #每頁的文字加起來
|
42 |
+
logging.info(f"pdftmp:{pdftmp}")
|
43 |
+
pdftext += pdftmp
|
44 |
texts = [Document(page_content=pdftext,
|
45 |
metadata={"source": filepath})]
|
46 |
elif file_type == ".docx":
|