File size: 1,101 Bytes
b11ac48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import os
import glob
import tarfile


def split_lome_files(lome_folder, output_folder):
    for file in glob.glob(f"{lome_folder}/**/*.comm.*"):
        doc_id = os.path.basename(file).split(".")[0].split("_")[1]
        doc_key = doc_id[:2]

        print(file, "->", doc_key)

        with tarfile.open(f"{output_folder}/block_{doc_key}.tar", "a") as tar_f:
            tar_f.add(file)


if __name__ == "__main__":
    #split_lome_files("output/migration/lome/multilabel/lome_0shot/pavia/", "output/migration/lome/lome_0shot/multilabel_pavia_blocks")
    # split_lome_files("output/femicides/lome/lome_0shot/multilabel/rai/", "output/femicides/lome/lome_0shot/multilabel_rai_blocks")
    split_lome_files("output/femicides/lome/lome_0shot/multilabel/rai_ALL/", "output/femicides/lome/lome_0shot/multilabel_rai_ALL_blocks")
    # split_lome_files("output/femicides/lome/lome_0shot/multilabel/olv/", "output/femicides/lome/lome_0shot/multilabel_olv_blocks")
    # split_lome_files("output/crashes/lome/lome_0shot/multilabel/thecrashes/", "output/crashes/lome/lome_0shot/multilabel_thecrashes_blocks")