JustinLin610's picture
first commit
ee21b96
raw
history blame
846 Bytes
#!/bin/bash
# Copyright (c) Facebook, Inc. and its affiliates.
# All rights reserved.
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
if [ -z $WORKDIR_ROOT ] ;
then
echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..."
exit
fi
if [ -z $SPM_PATH ] ;
then
echo "Please install sentence piecence from https://github.com/google/sentencepiece and set SPM_PATH pointing to the installed spm_encode.py. Exitting..."
exit
fi
ML50=${WORKDIR_ROOT}/ML50
mkdir -p $ML50/dedup
mkdir -p $ML50/cleaned_dedup
python ./dedup_all.py --from-folder $ML50/raw --to-folder $ML50/dedup
python ./remove_valid_test_in_train.py --from-folder $ML50/dedup --to-folder $ML50/clean
python ./binarize.py --raw-folder $ML50/clean