from argparse import ArgumentParser | |
from diaparser.parsers import Parser | |
from Tokenizer.src.tokenizer import split_into_sentences | |
parser = ArgumentParser() | |
parser.add_argument('--parser') | |
parser.add_argument('--infile') | |
args = parser.parse_args() | |
PARSER = Parser.load(args.parser) | |
def read_test_file(file): | |
with open(file, 'r', encoding='utf-8') as infile: | |
for line in infile: | |
yield [tok for tok in ' '.join(split_into_sentences(line)).split()] | |
test_file = list(read_test_file(args.infile)) | |
dataset = PARSER.predict(test_file, prob=True) | |
for i in dataset.sentences: | |
print(i) | |