biaffine_parser / parse_file.py
danielschnell's picture
Added files from Clarin: http://hdl.handle.net/20.500.12537/302
695687f
raw
history blame contribute delete
609 Bytes
from argparse import ArgumentParser
from diaparser.parsers import Parser
from Tokenizer.src.tokenizer import split_into_sentences
parser = ArgumentParser()
parser.add_argument('--parser')
parser.add_argument('--infile')
args = parser.parse_args()
PARSER = Parser.load(args.parser)
def read_test_file(file):
with open(file, 'r', encoding='utf-8') as infile:
for line in infile:
yield [tok for tok in ' '.join(split_into_sentences(line)).split()]
test_file = list(read_test_file(args.infile))
dataset = PARSER.predict(test_file, prob=True)
for i in dataset.sentences:
print(i)