Kate0816 commited on
Commit
920e041
·
1 Parent(s): bb875a8

Update modules/index_func.py

Browse files
Files changed (1) hide show
  1. modules/index_func.py +2 -4
modules/index_func.py CHANGED
@@ -38,10 +38,8 @@ def get_documents(file_src):
38
  with open(filepath, "rb") as pdfFileObj:
39
  pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
40
  for page in tqdm(pdfReader.pages):
41
- pdftmp = page.extract_text() #每頁的文字加起來
42
- logging.info(f"pdftmp:{pdftmp}")
43
- pdftext += pdftmp
44
-
45
  texts = [Document(page_content=pdftext,
46
  metadata={"source": filepath})]
47
  elif file_type == ".docx":
 
38
  with open(filepath, "rb") as pdfFileObj:
39
  pdfReader = PyPDF2.PdfReader(pdfFileObj) #pdfReader.pages 有多少頁
40
  for page in tqdm(pdfReader.pages):
41
+ pdftext += page.extract_text() #每頁的文字加起來
42
+ logging.info(f"pdftext:{pdftext}")
 
 
43
  texts = [Document(page_content=pdftext,
44
  metadata={"source": filepath})]
45
  elif file_type == ".docx":