File size: 774 Bytes
cc8e143 |
1 2 3 4 5 6 7 8 9 10 |
# pv ~/data/bignews/processed/bignews_val_text.txt > /dev/shm/news_val_text.txt
# pv ~/data/bignews/processed/bignews_train_text.txt > /dev/shm/news_train_text.txt
python3 process_bignews.py ~/data/bignews/bignews_train.json /dev/shm/news_train
python3 process_bignews.py ~/data/bignews/bignews_val.json /dev/shm/news_val
python3 create_dataset.py /dev/shm/news /dev/shm/bignews_flattened
# python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json ~/data/bignews/processed_lite/bignews_$SPLIT --ratio 0.05
# pv ~/data/bignews/processed_lite/bignews_val_text.txt > /dev/shm/news_lite_val_text.txt
# pv ~/data/bignews/processed_lite/bignews_train_text.txt > /dev/shm/news_lite_train_text.txt
# python3 create_dataset.py /dev/shm/news_lite /dev/shm/bignews_lite_flattened |