# pv ~/data/bignews/processed/bignews_val_text.txt > /dev/shm/news_val_text.txt # pv ~/data/bignews/processed/bignews_train_text.txt > /dev/shm/news_train_text.txt python3 process_bignews.py ~/data/bignews/bignews_train.json /dev/shm/news_train python3 process_bignews.py ~/data/bignews/bignews_val.json /dev/shm/news_val python3 create_dataset.py /dev/shm/news /dev/shm/bignews_flattened # python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json ~/data/bignews/processed_lite/bignews_$SPLIT --ratio 0.05 # pv ~/data/bignews/processed_lite/bignews_val_text.txt > /dev/shm/news_lite_val_text.txt # pv ~/data/bignews/processed_lite/bignews_train_text.txt > /dev/shm/news_lite_train_text.txt # python3 create_dataset.py /dev/shm/news_lite /dev/shm/bignews_lite_flattened