Spaces:
Runtime error
Runtime error
File size: 727 Bytes
ee21b96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# Use: echo {text} | python tokenize_indic.py {language}
import sys
from indicnlp.normalize.indic_normalize import IndicNormalizerFactory
from indicnlp.tokenize.indic_tokenize import trivial_tokenize
factory = IndicNormalizerFactory()
normalizer = factory.get_normalizer(
sys.argv[1], remove_nuktas=False, nasals_mode="do_nothing"
)
for line in sys.stdin:
normalized_line = normalizer.normalize(line.strip())
tokenized_line = " ".join(trivial_tokenize(normalized_line, sys.argv[1]))
print(tokenized_line)
|