politics / create_dataset.py
jacobthebanana's picture
Saving weights and logs of step 143142
cc8e143
raw
history blame
496 Bytes
import argparse
from datasets import load_dataset
parser = argparse.ArgumentParser()
parser.add_argument("path_prefix")
parser.add_argument("output_path")
args = parser.parse_args()
path_prefix: str = args.path_prefix
output_path: str = args.output_path
dataset = load_dataset(
"text",
data_files={
"train": [path_prefix + "_train_text.txt"],
"validation": [path_prefix + "_val_text.txt"],
},
cache_dir="/dev/shm/.cache",
)
dataset.save_to_disk(output_path)