politics / create_dataset.sh
jacobthebanana's picture
Saving weights and logs of step 143142
cc8e143
raw
history blame contribute delete
774 Bytes
# pv ~/data/bignews/processed/bignews_val_text.txt > /dev/shm/news_val_text.txt
# pv ~/data/bignews/processed/bignews_train_text.txt > /dev/shm/news_train_text.txt
python3 process_bignews.py ~/data/bignews/bignews_train.json /dev/shm/news_train
python3 process_bignews.py ~/data/bignews/bignews_val.json /dev/shm/news_val
python3 create_dataset.py /dev/shm/news /dev/shm/bignews_flattened
# python3 process_bignews.py ~/data/bignews/bignews_$SPLIT.json ~/data/bignews/processed_lite/bignews_$SPLIT --ratio 0.05
# pv ~/data/bignews/processed_lite/bignews_val_text.txt > /dev/shm/news_lite_val_text.txt
# pv ~/data/bignews/processed_lite/bignews_train_text.txt > /dev/shm/news_lite_train_text.txt
# python3 create_dataset.py /dev/shm/news_lite /dev/shm/bignews_lite_flattened