YchKhan commited on
Commit
288dd42
1 Parent(s): bf3f371

change chunk sizes

Browse files
Files changed (1) hide show
  1. split_files_to_excel.py +1 -1
split_files_to_excel.py CHANGED
@@ -645,7 +645,7 @@ def split_in_df(files, nb_pages):
645
  print(f"BASE FOLDERS LIST : {base_folders}, FILES LIST : {processed_files}")
646
  print("Finished processing zip files\nSplitting files into chunks...")
647
  documents = split_doc_in_chunks(processed_files, base_folders, nb_pages)
648
- re_docs = resplit_by_end_of_sentence(documents, 1000, 100, 1500)
649
  print("Finished splitting")
650
  df = pd.DataFrame()
651
  for re_doc in re_docs:
 
645
  print(f"BASE FOLDERS LIST : {base_folders}, FILES LIST : {processed_files}")
646
  print("Finished processing zip files\nSplitting files into chunks...")
647
  documents = split_doc_in_chunks(processed_files, base_folders, nb_pages)
648
+ re_docs = resplit_by_end_of_sentence(documents, 700, 100, 1000)
649
  print("Finished splitting")
650
  df = pd.DataFrame()
651
  for re_doc in re_docs: