import argparse from datasets import load_dataset parser = argparse.ArgumentParser() parser.add_argument("path_prefix") parser.add_argument("output_path") args = parser.parse_args() path_prefix: str = args.path_prefix output_path: str = args.output_path dataset = load_dataset( "text", data_files={ "train": [path_prefix + "_train_text.txt"], "validation": [path_prefix + "_val_text.txt"], }, cache_dir="/dev/shm/.cache", ) dataset.save_to_disk(output_path)