File size: 774 Bytes
cc8e143
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
# pv ~/data/bignews/processed/bignews_val_text.txt > /dev/shm/news_val_text.txt
# pv ~/data/bignews/processed/bignews_train_text.txt > /dev/shm/news_train_text.txt
python3 process_bignews.py ~/data/bignews/bignews_train.json /dev/shm/news_train
python3 process_bignews.py ~/data/bignews/bignews_val.json /dev/shm/news_val
python3 create_dataset.py /dev/shm/news /dev/shm/bignews_flattened

# python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json ~/data/bignews/processed_lite/bignews_$SPLIT --ratio 0.05
# pv ~/data/bignews/processed_lite/bignews_val_text.txt > /dev/shm/news_lite_val_text.txt
# pv ~/data/bignews/processed_lite/bignews_train_text.txt > /dev/shm/news_lite_train_text.txt
# python3 create_dataset.py /dev/shm/news_lite /dev/shm/bignews_lite_flattened