File size: 780 Bytes
5381499
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import sys

sys.path.append("..")

import os
import argparse
from torch.utils.data import random_split
from src.datamodule import VLSP2020TarDataset, VLSP2020Dataset


def prepare_tar_dataset(data_dir: str, dest_dir: str):
    dts = VLSP2020Dataset(data_dir)
    train_set, val_set = random_split(dts, [42_000, 14_427])

    VLSP2020TarDataset(os.path.join(dest_dir, "vlsp2020_train_set.tar")).convert(
        train_set
    )
    VLSP2020TarDataset(os.path.join(dest_dir, "vlsp2020_val_set.tar")).convert(val_set)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--data_dir", type=str, required=True)
    parser.add_argument("--dest_dir", type=str, required=True)
    args = parser.parse_args()

    prepare_tar_dataset(args.data_dir, args.dest_dir)