Kate0816 commited on
Commit
338ab6f
·
1 Parent(s): 920e041

Update modules/index_func.py

Browse files
Files changed (1) hide show
  1. modules/index_func.py +3 -2
modules/index_func.py CHANGED
@@ -38,8 +38,9 @@ def get_documents(file_src):
38
  with open(filepath, "rb") as pdfFileObj:
39
  pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
40
  for page in tqdm(pdfReader.pages):
41
- pdftext += page.extract_text() #每頁的文字加起來
42
- logging.info(f"pdftext:{pdftext}")
 
43
  texts = [Document(page_content=pdftext,
44
  metadata={"source": filepath})]
45
  elif file_type == ".docx":
 
38
  with open(filepath, "rb") as pdfFileObj:
39
  pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
40
  for page in tqdm(pdfReader.pages):
41
+ pdftmp = page.extract_text() #每頁的文字加起來
42
+ logging.info(f"pdftmp:{pdftmp}")
43
+ pdftext += pdftmp
44
  texts = [Document(page_content=pdftext,
45
  metadata={"source": filepath})]
46
  elif file_type == ".docx":