import os import glob import tarfile def split_lome_files(lome_folder, output_folder): for file in glob.glob(f"{lome_folder}/**/*.comm.*"): doc_id = os.path.basename(file).split(".")[0].split("_")[1] doc_key = doc_id[:2] print(file, "->", doc_key) with tarfile.open(f"{output_folder}/block_{doc_key}.tar", "a") as tar_f: tar_f.add(file) if __name__ == "__main__": #split_lome_files("output/migration/lome/multilabel/lome_0shot/pavia/", "output/migration/lome/lome_0shot/multilabel_pavia_blocks") # split_lome_files("output/femicides/lome/lome_0shot/multilabel/rai/", "output/femicides/lome/lome_0shot/multilabel_rai_blocks") split_lome_files("output/femicides/lome/lome_0shot/multilabel/rai_ALL/", "output/femicides/lome/lome_0shot/multilabel_rai_ALL_blocks") # split_lome_files("output/femicides/lome/lome_0shot/multilabel/olv/", "output/femicides/lome/lome_0shot/multilabel_olv_blocks") # split_lome_files("output/crashes/lome/lome_0shot/multilabel/thecrashes/", "output/crashes/lome/lome_0shot/multilabel_thecrashes_blocks")