Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
# Copyright (c) Facebook, Inc. and its affiliates. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
# Use: echo {text} | python tokenize_indic.py {language} | |
import sys | |
from indicnlp.normalize.indic_normalize import IndicNormalizerFactory | |
from indicnlp.tokenize.indic_tokenize import trivial_tokenize | |
factory = IndicNormalizerFactory() | |
normalizer = factory.get_normalizer( | |
sys.argv[1], remove_nuktas=False, nasals_mode="do_nothing" | |
) | |
for line in sys.stdin: | |
normalized_line = normalizer.normalize(line.strip()) | |
tokenized_line = " ".join(trivial_tokenize(normalized_line, sys.argv[1])) | |
print(tokenized_line) | |