File size: 825 Bytes
ee21b96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#!/usr/bin/env python3 -u
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import fileinput

import sacremoses


def main():
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("files", nargs="*", help="input files")
    args = parser.parse_args()

    detok = sacremoses.MosesDetokenizer()

    for line in fileinput.input(args.files, openhook=fileinput.hook_compressed):
        print(
            detok.detokenize(line.strip().split(" "))
            .replace(" @", "")
            .replace("@ ", "")
            .replace(" =", "=")
            .replace("= ", "=")
            .replace(" – ", "–")
        )


if __name__ == "__main__":
    main()