export LC_ALL=C.UTF-8 | |
export LANG=C.UTF-8 | |
export OUTPUT_DIR=/home/m3hrdadfi/data/ | |
export DATASET_NAME=oscar | |
export DATASET_CONFIG_NAME=unshuffled_deduplicated_fa | |
python src/create_dataset.py \ | |
--output_dir="$OUTPUT_DIR" \ | |
--dataset_name="$DATASET_NAME" \ | |
--dataset_config_name="$DATASET_CONFIG_NAME" |