Spaces:
Sleeping
Sleeping
Update split_files_to_excel.py
Browse files- split_files_to_excel.py +2 -2
split_files_to_excel.py
CHANGED
@@ -470,7 +470,7 @@ def split_doc_in_chunks(input_folder, base_folders):
|
|
470 |
print("Treatment of pdf file", path)
|
471 |
raw_chunks = split_pdf(path, input_folder)
|
472 |
for raw_chunk in raw_chunks:
|
473 |
-
|
474 |
raw_chunk.metadata["Base Folder"] = base_folders[i]
|
475 |
chunks = group_chunks_by_section(raw_chunks)
|
476 |
print(f"Document splitted in {len(chunks)} chunks")
|
@@ -585,7 +585,7 @@ def split_in_df(files):
|
|
585 |
else:
|
586 |
processed_files.append(file_path)
|
587 |
base_folders.append("")
|
588 |
-
|
589 |
print("Finished processing zip files\nSplitting files into chunks...")
|
590 |
documents = split_doc_in_chunks(processed_files, base_folders)
|
591 |
re_docs = resplit_by_end_of_sentence(documents, 1000, 100, 1500)
|
|
|
470 |
print("Treatment of pdf file", path)
|
471 |
raw_chunks = split_pdf(path, input_folder)
|
472 |
for raw_chunk in raw_chunks:
|
473 |
+
print(f"BASE zzzzz LIST : {base_folders} = i = {i}")
|
474 |
raw_chunk.metadata["Base Folder"] = base_folders[i]
|
475 |
chunks = group_chunks_by_section(raw_chunks)
|
476 |
print(f"Document splitted in {len(chunks)} chunks")
|
|
|
585 |
else:
|
586 |
processed_files.append(file_path)
|
587 |
base_folders.append("")
|
588 |
+
print(f"BASE FOLDERS LIST : {base_folders}")
|
589 |
print("Finished processing zip files\nSplitting files into chunks...")
|
590 |
documents = split_doc_in_chunks(processed_files, base_folders)
|
591 |
re_docs = resplit_by_end_of_sentence(documents, 1000, 100, 1500)
|