Spaces:
Runtime error
Runtime error
File size: 2,380 Bytes
10b0761 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
CWD=`pwd`
INSTALL_PATH=$CWD/tokenizers/thirdparty
MOSES=$INSTALL_PATH/mosesdecoder
if [ ! -d $MOSES ]; then
echo 'Cloning Moses github repository (for tokenization scripts)...'
git clone https://github.com/moses-smt/mosesdecoder.git $MOSES
cd $MOSES
# To deal with differences in handling ' vs "
git checkout 03578921cc1a03402
cd -
fi
WMT16_SCRIPTS=$INSTALL_PATH/wmt16-scripts
if [ ! -d $WMT16_SCRIPTS ]; then
echo 'Cloning Romanian tokenization scripts'
git clone https://github.com/rsennrich/wmt16-scripts.git $WMT16_SCRIPTS
fi
KYTEA=$INSTALL_PATH/kytea
if [ ! -f $KYTEA/bin/kytea ]; then
git clone https://github.com/neubig/kytea.git $KYTEA
cd $KYTEA
autoreconf -i
./configure --prefix=`pwd`
make
make install
cd ..
fi
export MECAB=$INSTALL_PATH/mecab-0.996-ko-0.9.2
if [ ! -f $MECAB/bin/mecab ]; then
cd $INSTALL_PATH
curl -LO https://bitbucket.org/eunjeon/mecab-ko/downloads/mecab-0.996-ko-0.9.2.tar.gz
tar zxfv mecab-0.996-ko-0.9.2.tar.gz
cd mecab-0.996-ko-0.9.2/
./configure --prefix=`pwd`
make
make install
cd ..
curl -LO https://bitbucket.org/eunjeon/mecab-ko-dic/downloads/mecab-ko-dic-2.1.1-20180720.tar.gz
tar zxfv mecab-ko-dic-2.1.1-20180720.tar.gz
cd mecab-ko-dic-2.1.1-20180720/
./autogen.sh
./configure --prefix=`pwd` --with-dicdir=$MECAB/lib/mecab/dic/mecab-ko-dic --with-mecab-config=$MECAB/bin/mecab-config
make
sh -c 'echo "dicdir=$MECAB/lib/mecab/dic/mecab-ko-dic" > $MECAB/etc/mecabrc'
make install
cd $CWD
fi
INDIC_RESOURCES_PATH=$INSTALL_PATH/indic_nlp_resources
if [ ! -d $INDIC_RESOURCES_PATH ]; then
echo 'Cloning indic_nlp_resources'
git clone https://github.com/anoopkunchukuttan/indic_nlp_resources.git $INDIC_RESOURCES_PATH
fi
if [ ! -f $INSTALL_PATH/seg_my.py ]; then
cd $INSTALL_PATH
wget http://lotus.kuee.kyoto-u.ac.jp/WAT/my-en-data/wat2020.my-en.zip
unzip wat2020.my-en.zip
# switch to python3
cat wat2020.my-en/myseg.py |sed 's/^sys.std/###sys.std/g' | sed 's/### sys/sys/g' | sed 's/unichr/chr/g' > seg_my.py
cd $CWD
fi
pip install pythainlp sacrebleu indic-nlp-library
|