Icelandic-lt
/

biaffine_parser

biaffine_parser / parse_file.py

Added files from Clarin: http://hdl.handle.net/20.500.12537/302

695687f 8 months ago

609 Bytes

	from argparse import ArgumentParser
	from diaparser.parsers import Parser
	from Tokenizer.src.tokenizer import split_into_sentences

	parser = ArgumentParser()
	parser.add_argument('--parser')
	parser.add_argument('--infile')
	args = parser.parse_args()
	PARSER = Parser.load(args.parser)


	def read_test_file(file):
	with open(file, 'r', encoding='utf-8') as infile:
	for line in infile:
	yield [tok for tok in ' '.join(split_into_sentences(line)).split()]

	test_file = list(read_test_file(args.infile))


	dataset = PARSER.predict(test_file, prob=True)
	for i in dataset.sentences:
	print(i)