python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json /dev/shm/news_$SPLIT # python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json ~/data/bignews/processed_lite/bignews_$SPLIT --ratio 0.05