Spaces:
Sleeping
Sleeping
change chunk sizes
Browse files- split_files_to_excel.py +1 -1
split_files_to_excel.py
CHANGED
@@ -645,7 +645,7 @@ def split_in_df(files, nb_pages):
|
|
645 |
print(f"BASE FOLDERS LIST : {base_folders}, FILES LIST : {processed_files}")
|
646 |
print("Finished processing zip files\nSplitting files into chunks...")
|
647 |
documents = split_doc_in_chunks(processed_files, base_folders, nb_pages)
|
648 |
-
re_docs = resplit_by_end_of_sentence(documents,
|
649 |
print("Finished splitting")
|
650 |
df = pd.DataFrame()
|
651 |
for re_doc in re_docs:
|
|
|
645 |
print(f"BASE FOLDERS LIST : {base_folders}, FILES LIST : {processed_files}")
|
646 |
print("Finished processing zip files\nSplitting files into chunks...")
|
647 |
documents = split_doc_in_chunks(processed_files, base_folders, nb_pages)
|
648 |
+
re_docs = resplit_by_end_of_sentence(documents, 700, 100, 1000)
|
649 |
print("Finished splitting")
|
650 |
df = pd.DataFrame()
|
651 |
for re_doc in re_docs:
|