from argparse import ArgumentParser from diaparser.parsers import Parser from Tokenizer.src.tokenizer import split_into_sentences parser = ArgumentParser() parser.add_argument('--parser') parser.add_argument('--infile') args = parser.parse_args() PARSER = Parser.load(args.parser) def read_test_file(file): with open(file, 'r', encoding='utf-8') as infile: for line in infile: yield [tok for tok in ' '.join(split_into_sentences(line)).split()] test_file = list(read_test_file(args.infile)) dataset = PARSER.predict(test_file, prob=True) for i in dataset.sentences: print(i)