Spaces:
Runtime error
Runtime error
File size: 844 Bytes
10b0761 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
#!/usr/bin/env python3
#
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import sys
import sacremoses
def main(args):
"""Tokenizes, preserving tabs"""
mt = sacremoses.MosesTokenizer(lang=args.lang)
def tok(s):
return mt.tokenize(s, return_str=True)
for line in sys.stdin:
parts = list(map(tok, line.split("\t")))
print(*parts, sep="\t", flush=True)
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--lang", "-l", default="en")
parser.add_argument("--penn", "-p", action="store_true")
parser.add_argument("--fields", "-f", help="fields to tokenize")
args = parser.parse_args()
main(args)
|