JustinLin610's picture
first commit
ee21b96
raw
history blame
961 Bytes
#!/bin/bash
if [ $# -ne 5 ]; then
echo "usage: $0 [dataset=wmt14/full] [langpair=en-de] [databin] [bpecode] [model]"
exit
fi
DATASET=$1
LANGPAIR=$2
DATABIN=$3
BPECODE=$4
MODEL=$5
SRCLANG=$(echo $LANGPAIR | cut -d '-' -f 1)
TGTLANG=$(echo $LANGPAIR | cut -d '-' -f 2)
BPEROOT=examples/backtranslation/subword-nmt/subword_nmt
if [ ! -e $BPEROOT ]; then
BPEROOT=subword-nmt/subword_nmt
if [ ! -e $BPEROOT ]; then
echo 'Cloning Subword NMT repository (for BPE pre-processing)...'
git clone https://github.com/rsennrich/subword-nmt.git
fi
fi
sacrebleu -t $DATASET -l $LANGPAIR --echo src \
| sacremoses tokenize -a -l $SRCLANG -q \
| python $BPEROOT/apply_bpe.py -c $BPECODE \
| fairseq-interactive $DATABIN --path $MODEL \
-s $SRCLANG -t $TGTLANG \
--beam 5 --remove-bpe --buffer-size 1024 --max-tokens 8000 \
| grep ^H- | cut -f 3- \
| sacremoses detokenize -l $TGTLANG -q \
| sacrebleu -t $DATASET -l $LANGPAIR