diff --git a/Bart-gen-arg b/Bart-gen-arg new file mode 160000 index 0000000000000000000000000000000000000000..26debc5ccc7a55a0186cc440e4eb824f7eb66b0c --- /dev/null +++ b/Bart-gen-arg @@ -0,0 +1 @@ +Subproject commit 26debc5ccc7a55a0186cc440e4eb824f7eb66b0c diff --git a/TEST/ttest2.py b/TEST/ttest2.py new file mode 100644 index 0000000000000000000000000000000000000000..3fd628f6f9b22a5cd1185e50fe2cf82b0f3965c0 --- /dev/null +++ b/TEST/ttest2.py @@ -0,0 +1,246 @@ +import json +import re +import spacy +from tqdm import tqdm + +from src.genie.utils import WhitespaceTokenizer + +#x = 0 +def find_head(arg_start, arg_end, doc): + # 设置一个临时变量 存储 论元短语的开始索引 cur_i = arg_start + cur_i = arg_start + # 进行遍历 + while doc[cur_i].head.i >= arg_start and doc[cur_i].head.i <= arg_end: + if doc[cur_i].head.i == cur_i: + # self is the head + break + else: + cur_i = doc[cur_i].head.i + + arg_head = cur_i + + return (arg_head, arg_head) + +def find_arg_span(arg, context_words, trigger_start, trigger_end, head_only=False, doc=None): + # 要定义一个match 作为匹配项 + match = None + # arg 是论元短语 是预测文件中predicted中生成的论元短语 arg_len目前的含义是获取生成论元短语的长度 + arg_len = len(arg) + # context_words 是文本 min_dis是最短距离 + min_dis = len(context_words) # minimum distance to trigger + #print(arg) + + #x = 0 + # i 代表文本中的单词索引 w 代表文本中的i索引对应的单词 + for i, w in enumerate(context_words): + # 如果文本单词列表中有一段单词 和 模型生成的单词是相等的 + if context_words[i:i + arg_len] == arg: + # 如果 这个论元单词的开始索引在触发词单词索引之前 + # global x += 1 + # print('aa') + if i < trigger_start: + # 那么距离就是 触发词单词的开始索引减去论元短语的开始索引再减去论元短语的长度 + dis = abs(trigger_start - i - arg_len) + else: + # 反之 + dis = abs(i - trigger_end) + if dis < min_dis: + # match是一个元组 + match = (i, i + arg_len - 1) + min_dis = dis + + #print(match) + if match and head_only: + assert (doc != None) + match = find_head(match[0], match[1], doc) + #print(x) + return match + +def get_event_type(ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append(t[0]) + return evt_type + +def extract_args_from_template(ex, template, ontology_dict,): + # extract argument text + # 这个函数的返回值是一个字典 因此需要 template列表和ex中的predicted列表同时进行遍历放入字典中 + # 在这里定义两个列表 分别存放 定义存放模板的列表 TEMPLATE 和 相对应的生成 PREDICTED + # 传过来的参数中的template就是包含所有模板的列表 因此不需要再定义TEMPLATE 还是需要定义一个存放分词后的template + # 这里的template是相应事件类型下的模板包含多个 + # 原来处理的方式是一个数据和一个综合性模板 现在模板是分开的 为什么要把template传过来 这不是脱裤子放屁的操作? + # 下面这段操作是因为上次模板的定义是相同因此只需要去列表中的第一个模板就行 这次需要用循环进行遍历 + # print(ex) + t = [] + TEMPLATE = [] + for i in template: + t = i.strip().split() + TEMPLATE.append(t) + t = [] + # 到此为止 得到存放该ex即该数据类型下的所有模板的分词后的列表存储 下面获取对应的predicted同理 + PREDICTED = [] + p = [] + # 形参中插入的ex应该包含了该条数据(即该事件类型下)所有应该生成的论元对应的模板 + # 在程序中出现了不一样的情况 貌似只有一条模板数据 这个问题解决了 + # print(ex['predicted']) + for i in ex['predicted']: + p = i.strip().split() + PREDICTED.append(p) + p = [] + # print(TEMPLATE) + # print(PREDICTED) + # 这个字典变量定义了这个函数的返回值 应该是论元角色-论元短语的key-value映射 + predicted_args = {} + evt_type = get_event_type(ex)[0] + # print(evt_type) + # 不出意外的话 TEMPLATE和PREDICTED的长度应该是相等的 + length = len(TEMPLATE) + for i in range(length): + #if i < 4: + #continue + template_words = TEMPLATE[i] + predicted_words = PREDICTED[i] + t_ptr = 0 + p_ptr = 0 + print(template_words) + print(predicted_words) + while t_ptr < len(template_words) and p_ptr < len(predicted_words): + if re.match(r'<(arg\d+)>', template_words[t_ptr]): + # print('aa') + m = re.match(r'<(arg\d+)>', template_words[t_ptr]) + # 这一步的操作是从模板中得到 这样的词符 即arg_num 然后通过arg_num找到对应论元角色arg_name + arg_num = m.group(1) + # print(arg_num) + arg_name = ontology_dict[evt_type.replace('n/a', 'unspecified')][arg_num] + + if predicted_words[p_ptr] == '': + # missing argument + p_ptr +=1 + t_ptr +=1 + else: + arg_start = p_ptr + if t_ptr + 1 == len(template_words): + while (p_ptr < len(predicted_words)): + p_ptr += 1 + else: + while (p_ptr < len(predicted_words)) and (predicted_words[p_ptr] != template_words[t_ptr+1]): + p_ptr += 1 + arg_text = predicted_words[arg_start:p_ptr] + predicted_args[arg_name] = arg_text + t_ptr += 1 + # aligned + else: + t_ptr += 1 + p_ptr += 1 + + # print(predicted_args) + return predicted_args + +def pro(): + nlp = spacy.load('en_core_web_sm') + nlp.tokenizer = WhitespaceTokenizer(nlp.vocab) + ontology_dict = {} + with open('./aida_ontology_fj-5.csv', 'r') as f: + for lidx, line in enumerate(f): + if lidx == 0: # header + continue + fields = line.strip().split(',') + if len(fields) < 2: + break + evt_type = fields[0] + if evt_type in ontology_dict.keys(): + arguments = fields[2:] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(arguments): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + else: + ontology_dict[evt_type] = {} + arguments = fields[2:] + ontology_dict[evt_type]['template'] = [] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(arguments): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + + examples = {} + x = 0 + with open('./data/RAMS_1.0/data/test_head_coref.jsonlines', 'r') as f: + for line in f: + x += 1 + ex = json.loads(line.strip()) + ex['ref_evt_links'] = ex['gold_evt_links'] + ex['gold_evt_links'] = [] + examples[ex['doc_key']] = ex + + flag = {} + y = 0 + with open('./checkpoints/gen-RAMS-pred/predictions.jsonl', 'r') as f: + for line in f: + y += 1 + pred = json.loads(line.strip()) + # print(pred['predicted']) + if pred['doc_key'] in flag.keys(): + examples[pred['doc_key']]['predicted'].append(pred['predicted']) + examples[pred['doc_key']]['gold'].append(pred['gold']) + # 如果没有 说明这是新的事件类型 + else: + flag[pred['doc_key']] = True + examples[pred['doc_key']]['predicted'] = [] + examples[pred['doc_key']]['gold'] = [] + # 然后将此条数据存入 + examples[pred['doc_key']]['predicted'].append(pred['predicted']) + examples[pred['doc_key']]['gold'].append(pred['gold']) + # print(len(examples), x, y) 871 871 3614 + + for ex in tqdm(examples.values()): + if 'predicted' not in ex:# this is used for testing + continue + # print(ex) + # break + # print(ex) + # get template 获取事件类型 + # print('nw_RC00c8620ef5810429342a1c339e6c76c1b0b9add3f6010f04482fd832') + evt_type = get_event_type(ex)[0] + context_words = [w for sent in ex['sentences'] for w in sent] + # 这里的template是ontology_dict中 template 包含一个事件类型下的所有事件模板 + template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + # extract argument text + # 这里应该是提取预测文件中预测到的论元短语 ex是一条json数据 template是这条json数据对应下的模板 on是论元角色和的映射 + # 这里ex中的predicted和gold已经包括了该事件类型下的所有论元 用列表的形式进行存储 且顺序是一一对应的 + # 这里返回的predicted_args是一个字典: + # ex = {'predicted': [' A man attacked target using something at place in order to take something', ' Attacker attacked EgyptAir plane using something at place in order to take something', ' Attacker attacked target using a suicide belt at place in order to take something', ' Attacker attacked target using something at Flight 181 place in order to take something', ' Attacker attacked target using something at place in order to take EgyptAir Flight 181']} + # template = ontology_dict['conflict.attack.stealrobhijack']['template'] + # print(ex) + predicted_args = extract_args_from_template(ex, template, ontology_dict) + # print(predicted_args) + # break + trigger_start = ex['evt_triggers'][0][0] + trigger_end = ex['evt_triggers'][0][1] + # 上面返回的predicted_args是一个字典 暂时认为是论元角色和具体论元短语的映射 + # 还没有发现doc的作用 + doc = None + # 通过test_rams.sh文件的设置 可以发现args.head_only的值为true + head_only = True + if head_only: + # # 从原始文本中取出标记 + doc = nlp(' '.join(context_words)) + for argname in predicted_args: + # 通过find_arg_span函数找出 + arg_span = find_arg_span(predicted_args[argname], context_words, + trigger_start, trigger_end, head_only=True, doc=doc) + # print() + #print(arg_span) +pro() +#print(x) + +# dict = {'A': 1, 'B': 2, 'C': 3} +# +# for x in dict: +# print(x) +# if '1' in dict.keys(): +# print('aaaaaaaa') + diff --git a/TEST/ytest.py b/TEST/ytest.py new file mode 100644 index 0000000000000000000000000000000000000000..8d5f78cc1e34e50c6773449dba871ad1c782933b --- /dev/null +++ b/TEST/ytest.py @@ -0,0 +1,265 @@ +import re +from copy import deepcopy +import transformers +from transformers import BartTokenizer +import jsonlines +import json + + +# dict = {"rel_triggers": [], "gold_rel_links": [], "doc_key": "nw_RC00c8620ef5810429342a1c339e6c76c1b0b9add3f6010f04482fd832", "ent_spans": [[27, 27, [["evt043arg01communicator", 1.0]]], [48, 48, [["evt043arg03place", 1.0]]], [32, 36, [["evt043arg02recipient", 1.0]]]], "language_id": "eng", "source_url": "http://bbc.co.uk/sport/athletics/36295481", "evt_triggers": [[31, 31, [["contact.prevarication.broadcast", 1.0]]]], "split": "test", "sentences": [["We", "are", "ashamed", "of", "them", ".", "\""], ["However", ",", "Mutko", "stopped", "short", "of", "admitting", "the", "doping", "scandal", "was", "state", "sponsored", "."], ["\"", "We", "are", "very", "sorry", "that", "athletes", "who", "tried", "to", "deceive", "us", ",", "and", "the", "world", ",", "were", "not", "caught", "sooner", "."], ["We", "are", "very", "sorry", "because", "Russia", "is", "committed", "to", "upholding", "the", "highest", "standards", "in", "sport", "and", "is", "opposed", "to", "anything", "that", "threatens", "the", "Olympic", "values", ",", "\"", "he", "said", "."], ["English", "former", "heptathlete", "and", "Athens", "2004", "bronze", "medallist", "Kelly", "Sotherton", "was", "unhappy", "with", "Mutko", "'s", "plea", "for", "Russia", "'s", "ban", "to", "be", "lifted", "for", "Rio"]], "gold_evt_links": [[[31, 31], [27, 27], "evt043arg01communicator"], [[31, 31], [32, 32], "evt043arg02recipient"], [[31, 31], [48, 48], "evt043arg03place"]], "clusters": [[[0, 0], [22, 22], [32, 32], [43, 43], [48, 48], [90, 91]], [[9, 9], [70, 70], [86, 87]]], "corefs": [[[0, 0], [22, 22], [32, 32], [43, 43], [48, 48], [90, 90]], [[9, 9], [70, 70], [86, 86]]]} +# +# template = "what is the " +# +# context_words = [w for sent in dict['sentences'] for w in sent] +# +# argtext = context_words[27] +# +# print(argtext) +# +# template = re.sub('', argtext, template) +# +# print(template) + +# for lidx, triple in enumerate(dict['gold_evt_links']): + # # 触发词 论元 论元 + # # 例子: "gold_evt_links": + # # [[[40, 40], [33, 33], "evt089arg01victim"], + # # [[40, 40], [28, 28], "evt089arg02place"]] + # trigger_span, argument_span, arg_name = triple + # # 第几个论元 + # arg_num = ontology_dict[evt_type.replace('n/a', 'unspecified')][arg_name] + # # 具体论元内容 短语 + # arg_text = ' '.join(context_words[argument_span[0]:argument_span[1] + 1]) + # # 通过正则表达式的方式将模板中的每个 替换为具体的论元内容 + # # 按照顺序将列表中的依次替换为 + # template[lidx] = re.sub('<{}>'.format(arg_num), arg_text, template[lidx]) +# # "ent_spans": [[27, 27, [["evt043arg01communicator", 1.0]]], +# # [48, 48, [["evt043arg03place", 1.0]]], +# # [32, 36, [["evt043arg02recipient", 1.0]]]] +# context_words = [w for sent in dict['sentences'] for w in sent] +# +# print(context_words[32]) +# print(context_words[33]) +# print(context_words[34]) +# print(context_words[35]) +# print(context_words[36]) + +def get_event_type(ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append(t[0]) + return evt_type + +def create_gold_gen(ex, ontology_dict, mark_trigger=True): + # 设置三个总列表、存放输入模板、输出模板 + # 设置三个总列表、存放输入模板、输出模板 + INPUT = [] + OUTPUT = [] + CONTEXT = [] + evt_type = get_event_type(ex)[0] + context_words = [w for sent in ex['sentences'] for w in sent] + # print(context_words[48]) + input_template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + i = len(input_template) + input_list = [] + for x in range(i): + str = re.sub(r'', '', input_template[x]) + input_list.append(str) + # 其中input_list种存放的是 原始数据中 全部替换为 之后的模板 下一步应该进行分词 + # temp = [] + for x in range(i): + space_tokenized_template = input_list[x].split(' ') + INPUT.append(space_tokenized_template) + space_tokenized_template = [] + # 其中temp中存放的都是分词后的模板 下一步对temp中的所有元素进行tokenize + tokenized_input_template = [] + tokenizer = BartTokenizer.from_pretrained('facebook/bart-large') + temp = [] + for x in range(len(INPUT)): + for w in INPUT[x]: + tokenized_input_template.extend(tokenizer.tokenize(w, add_prefix_space=True)) + + # print(tokenized_input_template) + + temp.append(tokenized_input_template) + tokenized_input_template = [] + print(temp) + break + template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + + # if ex['doc_key'] == 'nw_RC04992035300b2ec94d8692646a28dc8b5f210c94842d20834c5342df': + # print('bbb') + # [' was injured by injurer with medical issue at place', + # 'Victim was injured by with medical issue at place', + # 'Victim was injured by injurer with medical issue at place', + # 'Victim was injured by injurer with medical issue at place'] + # print(template) + for lidx, triple in enumerate(ex['gold_evt_links']): + # 触发词 论元 论元 + # 例子: "gold_evt_links": + # [[50, 50], [48, 48], 'evt092arg01victim'] + # [[50, 50], [7, 7], 'evt092arg03place'] + + trigger_span, argument_span, arg_name = triple + if ex['doc_key'] == 'nw_RC013c8e78b7e8a4fb22193483877058f712dfd8b75b7a06d950de0b8f': + print(len(ex['gold_evt_links'])) + # 第几个论元 + arg_num = ontology_dict[evt_type.replace('n/a', 'unspecified')][arg_name] + # 具体论元内容 短语 + arg_text = ' '.join(context_words[argument_span[0]:argument_span[1] + 1]) + # if ex['doc_key'] == 'nw_RC04992035300b2ec94d8692646a28dc8b5f210c94842d20834c5342df': + # print(arg_num) + # print(arg_text) + # print(arg_text) + # 通过正则表达式的方式将模板中的每个 替换为具体的论元内容 + # 搜索templat中的arg_num 找到对应的序列 + # INDEX = 0 + for index in range(len(template)): + if arg_num in template[index]: + break + else: + continue + # INDEX += 1 + template[index] = re.sub('<{}>'.format(arg_num), arg_text, template[index]) + if ex['doc_key'] == 'nw_RC013c8e78b7e8a4fb22193483877058f712dfd8b75b7a06d950de0b8f': + print('aaa') + print(template) + trigger = ex['evt_triggers'][0] + + # 将context放入CONTEXT中 + for w in range(i): + CONTEXT.append(context_words) + output_template = [] + # 此时的template中已经全部替换为论元短语 这部是将 替换为 + for i in range(len(template)): + output_template.append(re.sub(r'', '', template[i])) + # spaceout_tokenized_template = [] + for i in range(len(output_template)): + OUTPUT.append(output_template[i].split(' ')) + + # tokenized_out_template = [] + # for i in range(len(spaceout_tokenized_template)): + # for w in spaceout_tokenized_template[i]: + # tokenized_out_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + # OUTPUT.append(tokenized_out_template) + # tokenized_out_template = [] + + return INPUT, OUTPUT, CONTEXT + + +def load_ontology(): + ontology_dict = {} + with open('aida_ontology_fj-5.csv', 'r') as f: + for lidx, line in enumerate(f): + if lidx == 0: # header + continue + fields = line.strip().split(',') + if len(fields) < 2: + break + evt_type = fields[0] + if evt_type in ontology_dict.keys(): + # 得到该事件类型下的所有论元类型 + args = fields[2:] + # 将该事件类型对应的模板中的论元模板 填充到onto_logy字典中 + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(args): + if arg != '': + # 事件类型下添加字典一项 arg1的值为arg + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + # 即扫描到的事件类型在 evt_type_dict.keys() 还未存在过 + else: + # 建立该事件类型的key + ontology_dict[evt_type] = {} + args = fields[2:] + ontology_dict[evt_type]['template'] = [] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(args): + if arg != '': + # 事件类型下添加字典一项 arg1的值为arg + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + + return ontology_dict + +def prepare_data(): + + ontology_dict = load_ontology() + # ('train', './data/RAMS_1.0/data/train.jsonlines'), + # ('test', './data/RAMS_1.0/data/test_head_coref.jsonlines') + for split, f in [('val', './data/RAMS_1.0/data/dev.jsonlines'),('train', './data/RAMS_1.0/data/train.jsonlines'), + ('test', './data/RAMS_1.0/data/test_head_coref.jsonlines')]: + # , open('head_templates_preprocessed_data/{}.jsonl'.format(split), 'w') as writer + with open(f, 'r') as reader: + + # print(ontology_dict['contact.prevarication.broadcast']) + for lidx, line in enumerate(reader): + ex = json.loads(line.strip()) + # print(lidx) + # print(ex) + event_type = get_event_type(ex)[0] + if ex['doc_key'] == 'nw_RC013c8e78b7e8a4fb22193483877058f712dfd8b75b7a06d950de0b8f': + # {'rel_triggers': [], 'gold_rel_links': [], + # 'doc_key': 'nw_RC04992035300b2ec94d8692646a28dc8b5f210c94842d20834c5342df', + # 'ent_spans': [[48, 48, [['evt092arg01victim', 1.0]]], [7, 7, [['evt092arg03place', 1.0]]]], + # 'language_id': 'eng', + # 'source_url': 'http://news.sky.com/story/attack-in-nice-truck-ploughes-into-crowd-10502068', + # 'evt_triggers': [[50, 50, [['life.injure.n/a', 1.0]]]], 'split': 'test', + # 'sentences': [[':', ':', 'History', 'Of', 'Deadly', 'Attacks', 'In', 'France'], + # ['One', ',', 'Laurence', 'Olding', ',', 'was', 'with', 'his', 'fiance', 'and', + # 'jumped', 'over', 'a', 'promenade', 'wall', 'onto', 'the', 'concrete', 'below', + # 'to', 'avoid', 'the', 'truck', '.'], + # ['Emerging', 'from', 'hospital', 'in', 'bandages', 'he', 'said', ':', '"', 'There', + # 'was', 'debris', 'in', 'the', 'streets', ',', 'people', 'lying', 'injured', 'or', + # 'dead', 'in', 'the', 'road', '.', '"'], + # ['Video', ':', 'Hollande', 'On', "'", 'Horror', "'", 'Of', 'Attack'], + # ['Two', 'Americans', '-', 'Sean', 'Copeland', 'and', 'his', '11-year', '-', 'old', + # 'son', 'Brodie', 'from', 'Texas', '-', 'have', 'been', 'confirmed', 'among', 'the', + # 'dead', ',', 'a', 'US', 'official', 'said', '.']], + # 'gold_evt_links': [[[50, 50], [48, 48], 'evt092arg01victim'], + # [[50, 50], [7, 7], 'evt092arg03place']], + # 'clusters': [[[10, 11], [15, 15], [37, 37]], [[70, 71], [73, 73]]], + # 'corefs': [[[11, 11], [15, 15], [37, 37]], [[71, 71], [73, 73]]]} + print(ex) + # {'template': [' was injured by injurer with medical issue at place', + # 'Victim was injured by with medical issue at place', + # 'Victim was injured by injurer with medical issue at place', + # 'Victim was injured by injurer with medical issue at place'], + # 'arg1': 'evt092arg01victim', 'evt092arg01victim': 'arg1', 'arg2': 'evt092arg02injurer', + # 'evt092arg02injurer': 'arg2', 'arg3': 'evt092arg03place', 'evt092arg03place': 'arg3', + # 'arg4': 'evt092arg04medicalissue', 'evt092arg04medicalissue': 'arg4'} + print(ontology_dict[event_type.replace('n/a','unspecified')]) + input_template, output_template, context = create_gold_gen(ex, ontology_dict, + True) + ontology_dict = load_ontology() + if ex['doc_key'] == 'nw_RC013c8e78b7e8a4fb22193483877058f712dfd8b75b7a06d950de0b8f': + # [['', 'was', 'injured', 'by', 'injurer', 'with', 'medical', 'issue', 'at', 'place'], + # ['Victim', 'was', 'injured', 'by', '', 'with', 'medical', 'issue', 'at', 'place'], + # ['Victim', 'was', 'injured', 'by', 'injurer', 'with', 'medical', 'issue', 'at', '', 'place'], + # ['Victim', 'was', 'injured', 'by', 'injurer', 'with', '', 'medical', 'issue', 'at', 'place']] + print(input_template) + # [['people', 'was', 'injured', 'by', 'injurer', 'with', 'medical', 'issue', 'at', 'place'], + # ['Victim', 'was', 'injured', 'by', '', 'with', 'medical', 'issue', 'at', 'place'], + # ['Victim', 'was', 'injured', 'by', 'injurer', 'with', 'medical', 'issue', 'at', '', 'place'], + # ['Victim', 'was', 'injured', 'by', 'injurer', 'with', '', 'medical', 'issue', 'at', 'place']] + print(output_template) + # print(input_template) + # 4 4 4 + # print(len(input_template)) + # print(len(output_template)) + # print(len(context)) +[[':', ':', 'History', 'Of', 'Deadly', 'Attacks', 'In', 'France'], + ['One', ',', 'Laurence', 'Olding', ',', 'was', 'with', 'his', 'fiance', 'and', 'jumped', 'over', 'a', 'promenade', 'wall', 'onto', 'the', 'concrete', 'below', 'to', 'avoid', 'the', 'truck', '.'], + ['Emerging', 'from', 'hospital', 'in', 'bandages', 'he', 'said', ':', '"', 'There', 'was', 'debris', 'in', 'the', 'streets', ',', 'people', 'lying', 'injured', 'or', 'dead', 'in', 'the', 'road', '.', '"'], + ['Video', ':', 'Hollande', 'On', "'", 'Horror', "'", 'Of', 'Attack'], + ['Two', 'Americans', '-', 'Sean', 'Copeland', 'and', 'his', '11-year', '-', 'old', 'son', 'Brodie', 'from', 'Texas', '-', 'have', 'been', 'confirmed', 'among', 'the', 'dead', ',', 'a', 'US', 'official', 'said', '.']] + +[':', ':', 'History', 'Of', 'Deadly', 'Attacks', 'In', 'France', + + 'One', ',', 'Laurence', 'Olding', ',', 'was', 'with', 'his', 'fiance', 'and', 'jumped', 'over', 'a', 'promenade', 'wall', 'onto', 'the', 'concrete', 'below', 'to', 'avoid', 'the', 'truck', '.', + 'Emerging', 'from', 'hospital', 'in', 'bandages', 'he', 'said', ':', '"', 'There', 'was', 'debris', 'in', 'the', 'streets', ',', 'people', 'lying', 'injured', 'or', 'dead', 'in', 'the', 'road', '.', '"', + 'Video', ':', 'Hollande', 'On', "'", 'Horror', "'", 'Of', 'Attack', + 'Two', 'Americans', '-', 'Sean', 'Copeland', 'and', 'his', '11-year', '-', 'old', 'son', 'Brodie', 'from', 'Texas', '-', 'have', 'been', 'confirmed', 'among', 'the', 'dead', ',', 'a', 'US', 'official', 'said', '.'] + +prepare_data() diff --git a/aida_ontology_fj-5.csv b/aida_ontology_fj-5.csv new file mode 100644 index 0000000000000000000000000000000000000000..26bdba6165eea83e8b07fabad8fc7dc9532e3ea7 --- /dev/null +++ b/aida_ontology_fj-5.csv @@ -0,0 +1,593 @@ +event_type,template,arg1,arg2,arg3,arg4,arg5 +artifactexistence.artifactfailure.mechanicalfailure, mechanical artifact failed due to instrument at place,evt152arg01mechanicalartifact,,,, +artifactexistence.artifactfailure.mechanicalfailure,Mechanical artifact failed due to instrument at place,,evt152arg02instrument,,, +artifactexistence.artifactfailure.mechanicalfailure,Mechanical artifact failed due to instrument at place,,,evt152arg03place,, +artifactexistence.damagedestroy.unspecified, damaged or destroyed something using instrument in place,evt001arg01damagerdestroyer,,,, +artifactexistence.damagedestroy.unspecified,Damager damaged or destroyed using instrument in place,,evt001arg02artifact,,, +artifactexistence.damagedestroy.unspecified,Damager damaged or destroyed something using instrument in place,,,evt001arg03instrument,, +artifactexistence.damagedestroy.unspecified,Damager damaged or destroyed something using instrument in place,,,,evt001arg04place, +artifactexistence.damagedestroy.damage, damaged something using instrument in place,evt002arg01damager,,,, +artifactexistence.damagedestroy.damage,Damager damaged using instrument in place,,evt002arg02artifact,,, +artifactexistence.damagedestroy.damage,Damager damaged something using instrument in place,,,evt002arg03instrument,, +artifactexistence.damagedestroy.damage,Damager damaged something using instrument in place,,,,evt002arg04place, +artifactexistence.damagedestroy.destroy, destroyed something using instrument in place,evt003arg01destroyer,,,, +artifactexistence.damagedestroy.destroy,Damager destroyed using instrument in place,,evt003arg02artifact,,, +artifactexistence.damagedestroy.destroy,Damager destroyed something using instrument in place,,,evt003arg03instrument,, +artifactexistence.damagedestroy.destroy,Damager destroyed something using instrument in place,,,,evt003arg04place, +artifactexistence.shortage.shortage, experienced a shortage of something supply at place,evt149arg01experiencer ,,,, +artifactexistence.shortage.shortage,Experiencer experienced a shortage of supply at place,,evt149arg02supply ,,, +artifactexistence.shortage.shortage,Experiencer experienced a shortage of something supply at place,,,evt149arg03place ,, +conflict.attack.unspecified, attacked target using something at place,evt004arg01attacker,,,, +conflict.attack.unspecified,Attacker attacked using something at place,,evt004arg02target,,, +conflict.attack.unspecified,Attacker attacked target using at place,,,evt004arg03instrument,, +conflict.attack.unspecified,Attacker attacked target using something at place,,,,evt004arg04place, +conflict.attack.airstrikemissilestrike, attacked target using something at place,evt005arg01attacker,,,, +conflict.attack.airstrikemissilestrike,Attacker attacked using something at place,,evt005arg02target,,, +conflict.attack.airstrikemissilestrike,Attacker attacked target using at place,,,evt005arg03instrument,, +conflict.attack.airstrikemissilestrike,Attacker attacked target using something at place,,,,evt005arg04place, +conflict.attack.biologicalchemicalpoisonattack, attacked target using something at place,evt006arg01attacker,,,, +conflict.attack.biologicalchemicalpoisonattack,Attacker attacked using something at place,,evt006arg02target,,, +conflict.attack.biologicalchemicalpoisonattack,Attacker attacked target using at place,,,evt006arg03instrument,, +conflict.attack.biologicalchemicalpoisonattack,Attacker attacked target using something at place,,,,evt006arg04place, +conflict.attack.bombing, attacked target using something at place,evt007arg01attacker,,,, +conflict.attack.bombing,Attacker attacked using something at place,,evt007arg02target,,, +conflict.attack.bombing,Attacker attacked target using at place,,,evt007arg03instrument,, +conflict.attack.bombing,Attacker attacked target using something at place,,,,evt007arg04place, +conflict.attack.firearmattack, attacked target using something at place,evt008arg01attacker,,,, +conflict.attack.firearmattack,Attacker attacked using something at place,,evt008arg02target,,, +conflict.attack.firearmattack,Attacker attacked target using at place,,,evt008arg03instrument,, +conflict.attack.firearmattack,Attacker attacked target using something at place,,,,evt008arg04place, +conflict.attack.hanging, attacked target using something at place,evt009arg01attacker,,,, +conflict.attack.hanging,Attacker attacked using something at place,,evt009arg02target,,, +conflict.attack.hanging,Attacker attacked target using at place,,,evt009arg03instrument,, +conflict.attack.hanging,Attacker attacked target using something at place,,,,evt009arg04place, +conflict.attack.invade, attacked target using something at place,evt010arg01attacker,,,, +conflict.attack.invade,Attacker attacked using something at place,,evt010arg02target,,, +conflict.attack.invade,Attacker attacked target using at place,,,evt010arg03instrument,, +conflict.attack.invade,Attacker attacked target using something at place,,,,evt010arg04place, +conflict.attack.selfdirectedbattle, attacked target using something at place,evt011arg01attacker,,,, +conflict.attack.selfdirectedbattle,Attacker attacked using something at place,,evt011arg02target,,, +conflict.attack.selfdirectedbattle,Attacker attacked target using at place,,,evt011arg03instrument,, +conflict.attack.selfdirectedbattle,Attacker attacked target using something at place,,,,evt011arg04place, +conflict.attack.setfire, attacked target using something at place,evt012arg01attacker,,,, +conflict.attack.setfire,Attacker attacked using something at place,,evt012arg02target,,, +conflict.attack.setfire,Attacker attacked target using at place,,,evt012arg03instrument,, +conflict.attack.setfire,Attacker attacked target using something at place,,,,evt012arg04place, +conflict.attack.stabbing, attacked target using something at place,evt013arg01attacker,,,, +conflict.attack.stabbing,Attacker attacked using something at place,,evt013arg02target,,, +conflict.attack.stabbing,Attacker attacked target using at place,,,evt013arg03instrument,, +conflict.attack.stabbing,Attacker attacked target using something at place,,,,evt013arg04place, +conflict.attack.stealrobhijack, attacked target using something at place in order to take something,evt014arg01attacker,,,, +conflict.attack.stealrobhijack,Attacker attacked using something at place in order to take something,,evt014arg02target,,, +conflict.attack.stealrobhijack,Attacker attacked target using at place in order to take something,,,evt014arg03instrument,, +conflict.attack.stealrobhijack,Attacker attacked target using something at place in order to take something,,,,evt014arg04place, +conflict.attack.stealrobhijack,Attacker attacked target using something at place in order to take ,,,,,evt014arg05artifact +conflict.attack.strangling, attacked target using something at place,evt015arg01attacker,,,, +conflict.attack.strangling,Attacker attacked using something at place,,evt015arg02target,,, +conflict.attack.strangling,Attacker attacked target using at place,,,evt015arg03instrument,, +conflict.attack.strangling,Attacker attacked target using something at place,,,,evt015arg04place, +conflict.coup.coup, was deposed by somebody at place,evt151arg01deposedentity ,,,, +conflict.coup.coup,Somebody was deposed by at place,,evt151arg02deposingentity ,,, +conflict.coup.coup,Somebody was deposed by somebody at place,,,evt151arg03place ,, +conflict.demonstrate.unspecified, was in a demonstration at place,evt016arg01demonstrator,,,, +conflict.demonstrate.unspecified,Demonstrator was in a demonstration at place,,evt016arg02place,,, +conflict.demonstrate.marchprotestpoliticalgathering, was in a demonstration or protest at place,evt017arg01demonstrator,,,, +conflict.demonstrate.marchprotestpoliticalgathering,Demonstrator was in a demonstration or protest at place,,evt017arg02place,,, +conflict.yield.unspecified, yielded to recipient at place,evt018arg01yielder,,,, +conflict.yield.unspecified,Yielder yielded to at place,,evt018arg02recipient,,, +conflict.yield.unspecified,Yielder yielded to recipient at place,,,evt018arg03place,, +conflict.yield.retreat, retreated from original place to target place,evt019arg01retreater,,,, +conflict.yield.retreat,Retreater retreated from place to target place,,evt019arg02origin,,, +conflict.yield.retreat,Retreater retreated from original place to place,,,evt019arg03destination,, +conflict.yield.surrender, surrendered to recipient at place,evt020arg01surrenderer,,,, +conflict.yield.surrender,Surrenderer surrendered to at place,,evt020arg02recipient,,, +conflict.yield.surrender,Surrenderer surrendered to recipient at place,,,evt020arg03place,, +contact.collaborate.unspecified, communicated with participant at place,evt021arg01participant,,,, +contact.collaborate.unspecified,Participant communicated with at place,,evt021arg02participant,,, +contact.collaborate.unspecified,Participant communicated with participant at place,,,evt021arg03place,, +contact.collaborate.correspondence, communicated remotely with participant at place,evt022arg01participant,,,, +contact.collaborate.correspondence,Participant communicated remotely with at place,,evt022arg02participant,,, +contact.collaborate.correspondence,Participant communicated remotely with participant at place,,,evt022arg03place,, +contact.collaborate.meet, met face-to-face with participant at place,evt023arg01participant,,,, +contact.collaborate.meet,Participant met face-to-face with at place,,evt023arg02participant,,, +contact.collaborate.meet,Participant met face-to-face with participant at place,,,evt023arg03place,, +contact.commandorder.unspecified, communicated with recipient about topic at place,evt024arg01communicator,,,, +contact.commandorder.unspecified,Communicator communicated with about topic at place,,evt024arg02recipient,,, +contact.commandorder.unspecified,Communicator communicated with recipient about topic at place,,,evt024arg03place,, +contact.commandorder.unspecified,Communicator communicated with recipient about topic at place,,,,evt024arg04topic, +contact.commandorder.broadcast, communicated to recipient about topic at place (one-way communication),evt025arg01communicator,,,, +contact.commandorder.broadcast,Communicator communicated to about topic at place (one-way communication),,evt025arg02recipient,,, +contact.commandorder.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,evt025arg03place,, +contact.commandorder.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,,evt025arg04topic, +contact.commandorder.correspondence, communicated remotely with recipient about topic at place,evt026arg01communicator,,,, +contact.commandorder.correspondence,Communicator communicated remotely with about topic at place,,evt026arg02recipient,,, +contact.commandorder.correspondence,Communicator communicated remotely with recipient about topic at place,,,evt026arg03place,, +contact.commandorder.correspondence,Communicator communicated remotely with recipient about topic at place,,,,evt026arg04topic, +contact.commandorder.meet, met face-to-face with recipient about topic at place,evt027arg01communicator,,,, +contact.commandorder.meet,Communicator met face-to-face with about topic at place,,evt027arg02recipient,,, +contact.commandorder.meet,Communicator met face-to-face with recipient about topic at place,,,evt027arg03place,, +contact.commandorder.meet,Communicator met face-to-face with recipient about topic at place,,,,evt027arg04topic, +contact.commitmentpromiseexpressintent.unspecified, communicated with recipient about topic at place,evt028arg01communicator,,,, +contact.commitmentpromiseexpressintent.unspecified,Communicator communicated with about topic at place,,evt028arg02recipient,,, +contact.commitmentpromiseexpressintent.unspecified,Communicator communicated with recipient about topic at place,,,evt028arg03place,, +contact.commitmentpromiseexpressintent.unspecified,Communicator communicated with recipient about topic at place,,,,evt028arg04topic, +contact.commitmentpromiseexpressintent.broadcast, communicated to recipient about topic at place (one-way communication),evt029arg01communicator,,,, +contact.commitmentpromiseexpressintent.broadcast,Communicator communicated to about topic at place (one-way communication),,evt029arg02recipient,,, +contact.commitmentpromiseexpressintent.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,evt029arg03place,, +contact.commitmentpromiseexpressintent.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,,evt029arg04topic, +contact.commitmentpromiseexpressintent.correspondence, communicated remotely with recipient about topic at place,evt030arg01communicator,,,, +contact.commitmentpromiseexpressintent.correspondence,Communicator communicated remotely with about topic at place,,evt030arg02recipient,,, +contact.commitmentpromiseexpressintent.correspondence,Communicator communicated remotely with recipient about topic at place,,,evt030arg03place,, +contact.commitmentpromiseexpressintent.correspondence,Communicator communicated remotely with recipient about topic at place,,,,evt030arg04topic, +contact.commitmentpromiseexpressintent.meet, met face-to-face with recipient about topic at place,evt031arg01communicator,,,, +contact.commitmentpromiseexpressintent.meet,Communicator met face-to-face with about topic at place,,evt031arg02recipient,,, +contact.commitmentpromiseexpressintent.meet,Communicator met face-to-face with recipient about topic at place,,,evt031arg03place,, +contact.commitmentpromiseexpressintent.meet,Communicator met face-to-face with recipient about topic at place,,,,evt031arg04topic, +contact.discussion.unspecified, communicated with participant at place,evt032arg01participant,,,, +contact.discussion.unspecified,Participant communicated with at place,,evt032arg02participant,,, +contact.discussion.unspecified,Participant communicated with participant at place,,,evt032arg03place,, +contact.discussion.correspondence, communicated remotely with participant at place,evt033arg01participant,,,, +contact.discussion.correspondence,Participant communicated remotely with at place,,evt033arg02participant,,, +contact.discussion.correspondence,Participant communicated remotely with participant at place,,,evt033arg03place,, +contact.discussion.meet, met face-to-face with participant at place,evt034arg01participant,,,, +contact.discussion.meet,Participant met face-to-face with at place,,evt034arg02participant,,, +contact.discussion.meet,Participant met face-to-face with participant at place,,,evt034arg03place,, +contact.funeralvigil.unspecified, communicated with participant during a funeral or vigil for something at place,evt035arg01participant,,,, +contact.funeralvigil.unspecified,Participant communicated with during a funeral or vigil for something at place,,evt035arg02participant,,, +contact.funeralvigil.unspecified,Participant communicated with participant during a funeral or vigil for at place,,,evt035arg03deceased,, +contact.funeralvigil.unspecified,Participant communicated with participant during a funeral or vigil for something at place,,,,evt035arg04place, +contact.funeralvigil.meet, met face-to-face with participant during a funeral or vigil for something at place,evt036arg01participant,,,, +contact.funeralvigil.meet,Participant met face-to-face with during a funeral or vigil for something at place,,evt036arg02participant,,, +contact.funeralvigil.meet,Participant met face-to-face with participant during a funeral or vigil for at place,,,evt036arg03deceased,, +contact.funeralvigil.meet,Participant met face-to-face with participant during a funeral or vigil for something at place,,,,evt036arg04place, +contact.mediastatement.unspecified, communicated with participant at place,evt037arg01communicator,,,, +contact.mediastatement.unspecified,Participant communicated with at place,,evt037arg02recipient,,, +contact.mediastatement.unspecified,Participant communicated with participant at place,,,evt037arg03place,, +contact.mediastatement.broadcast, communicated to recipient at place (one-way communication),evt038arg01communicator,,,, +contact.mediastatement.broadcast,Communicator communicated to at place (one-way communication),,evt038arg02recipient,,, +contact.mediastatement.broadcast,Communicator communicated to recipient at place (one-way communication),,,evt038arg03place,, +contact.negotiate.unspecified, communicated with recipient about topic at place,evt039arg01participant,,,, +contact.negotiate.unspecified,Communicator communicated with about topic at place,,evt039arg02participant,,, +contact.negotiate.unspecified,Communicator communicated with recipient about topic at place,,,evt039arg03place,, +contact.negotiate.unspecified,Communicator communicated with recipient about topic at place,,,,evt039arg04topic, +contact.negotiate.correspondence, communicated remotely with participant about topic at place,evt040arg01participant,,,, +contact.negotiate.correspondence,Participant communicated remotely with about topic at place,,evt040arg02participant,,, +contact.negotiate.correspondence,Participant communicated remotely with participant about topic at place,,,evt040arg03place,, +contact.negotiate.correspondence,Participant communicated remotely with participant about topic at place,,,,evt040arg04topic, +contact.negotiate.meet, met face-to-face with recipient about topic at place,evt041arg01participant,,,, +contact.negotiate.meet,Communicator met face-to-face with about topic at place,,evt041arg02participant,,, +contact.negotiate.meet,Communicator met face-to-face with recipient about topic at place,,,evt041arg03place,, +contact.negotiate.meet,Communicator met face-to-face with recipient about topic at place,,,,evt041arg04topic, +contact.prevarication.unspecified, communicated with recipient about topic at place,evt042arg01communicator,,,, +contact.prevarication.unspecified,Communicator communicated with about topic at place,,evt042arg02recipient,,, +contact.prevarication.unspecified,Communicator communicated with recipient about topic at place,,,evt042arg03place,, +contact.prevarication.unspecified,Communicator communicated with recipient about topic at place,,,,evt042arg04topic, +contact.prevarication.broadcast, communicated to recipient about topic at place (one-way communication),evt043arg01communicator,,,, +contact.prevarication.broadcast,Communicator communicated to about topic at place (one-way communication),,evt043arg02recipient,,, +contact.prevarication.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,evt043arg03place,, +contact.prevarication.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,,evt043arg04topic, +contact.prevarication.correspondence, communicated remotely with recipient about topic at place,evt044arg01communicator,,,, +contact.prevarication.correspondence,Communicator communicated remotely with about topic at place,,evt044arg02recipient,,, +contact.prevarication.correspondence,Communicator communicated remotely with recipient about topic at place,,,evt044arg03place,, +contact.prevarication.correspondence,Communicator communicated remotely with recipient about topic at place,,,,evt044arg04topic, +contact.prevarication.meet, met face-to-face with recipient about topic at place,evt045arg01communicator,,,, +contact.prevarication.meet,Communicator met face-to-face with about topic at place,,evt045arg02recipient,,, +contact.prevarication.meet,Communicator met face-to-face with recipient about topic at place,,,evt045arg03place,, +contact.prevarication.meet,Communicator met face-to-face with recipient about topic at place,,,,evt045arg04topic, +contact.publicstatementinperson.unspecified, communicated with participant at place,evt046arg01communicator,,,, +contact.publicstatementinperson.unspecified,Participant communicated with at place,,evt046arg02recipient,,, +contact.publicstatementinperson.unspecified,Participant communicated with participant at place,,,evt046arg03place,, +contact.publicstatementinperson.broadcast, communicated to recipient at place (one-way communication),evt047arg01communicator,,,, +contact.publicstatementinperson.broadcast,Communicator communicated to at place (one-way communication),,evt047arg02recipient,,, +contact.publicstatementinperson.broadcast,Communicator communicated to recipient at place (one-way communication),,,evt047arg03place,, +contact.requestadvise.unspecified, communicated with recipient about topic at place,evt048arg01communicator,,,, +contact.requestadvise.unspecified,Communicator communicated with about topic at place,,evt048arg02recipient,,, +contact.requestadvise.unspecified,Communicator communicated with recipient about topic at place,,,evt048arg03place,, +contact.requestadvise.unspecified,Communicator communicated with recipient about topic at place,,,,evt048arg04topic, +contact.requestadvise.broadcast, communicated to recipient about topic at place (one-way communication),evt049arg01communicator,,,, +contact.requestadvise.broadcast,Communicator communicated to about topic at place (one-way communication),,evt049arg02recipient,,, +contact.requestadvise.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,evt049arg03place,, +contact.requestadvise.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,,evt049arg04topic, +contact.requestadvise.correspondence, communicated remotely with recipient about topic at place,evt050arg01communicator,,,, +contact.requestadvise.correspondence,Communicator communicated remotely with about topic at place,,evt050arg02recipient,,, +contact.requestadvise.correspondence,Communicator communicated remotely with recipient about topic at place,,,evt050arg03place,, +contact.requestadvise.correspondence,Communicator communicated remotely with recipient about topic at place,,,,evt050arg04topic, +contact.requestadvise.meet, met face-to-face with recipient about topic at place,evt051arg01communicator,,,, +contact.requestadvise.meet,Communicator met face-to-face with about topic at place,,evt051arg02recipient,,, +contact.requestadvise.meet,Communicator met face-to-face with recipient about topic at place,,,evt051arg03place,, +contact.requestadvise.meet,Communicator met face-to-face with recipient about topic at place,,,,evt051arg04topic, +contact.threatencoerce.unspecified, communicated with recipient about topic at place,evt052arg01communicator,,,, +contact.threatencoerce.unspecified,Communicator communicated with about topic at place,,evt052arg02recipient,,, +contact.threatencoerce.unspecified,Communicator communicated with recipient about topic at place,,,evt052arg03place,, +contact.threatencoerce.unspecified,Communicator communicated with recipient about topic at place,,,,evt052arg04topic, +contact.threatencoerce.broadcast, communicated to recipient about topic at place (one-way communication),evt053arg01communicator,,,, +contact.threatencoerce.broadcast,Communicator communicated to about topic at place (one-way communication),,evt053arg02recipient,,, +contact.threatencoerce.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,evt053arg03place,, +contact.threatencoerce.broadcast,Communicator communicated to recipient about topic at place (one-way communication),,,,evt053arg04topic, +contact.threatencoerce.correspondence, communicated remotely with recipient about topic at place,evt054arg01communicator,,,, +contact.threatencoerce.correspondence,Communicator communicated remotely with about topic at place,,evt054arg02recipient,,, +contact.threatencoerce.correspondence,Communicator communicated remotely with recipient about topic at place,,,evt054arg03place,, +contact.threatencoerce.correspondence,Communicator communicated remotely with recipient about topic at place,,,,evt054arg04topic, +contact.threatencoerce.meet, met face-to-face with recipient about topic at place,evt055arg01communicator,,,, +contact.threatencoerce.meet,Communicator met face-to-face with about topic at place,,evt055arg02recipient,,, +contact.threatencoerce.meet,Communicator met face-to-face with recipient about topic at place,,,evt055arg03place,, +contact.threatencoerce.meet,Communicator met face-to-face with recipient about topic at place,,,,evt055arg04topic, +disaster.accidentcrash.accidentcrash, person in vehicle crashed into something at place,evt057arg01driverpassenger,,,, +disaster.accidentcrash.accidentcrash,Person in vehicle crashed into something at place,,evt057arg02vehicle,,, +disaster.accidentcrash.accidentcrash,Person in vehicle crashed into at place,,,evt057arg03crashobject,, +disaster.accidentcrash.accidentcrash,Person in vehicle crashed into something at place,,,,evt057arg04place, +disaster.diseaseoutbreak.diseaseoutbreak, disease broke out among victims or population at place,evt148arg01disease ,,,, +disaster.diseaseoutbreak.diseaseoutbreak,Disease broke out among victims or population at place,,evt148arg02victim ,,, +disaster.diseaseoutbreak.diseaseoutbreak,Disease broke out among victims or population at place,,,evt148arg03place ,, +disaster.fireexplosion.fireexplosion, caught fire or exploded from instrument at place,evt059arg01fireexplosionobject,,,, +disaster.fireexplosion.fireexplosion,Something caught fire or exploded from instrument at place,,evt059arg02instrument,,, +disaster.fireexplosion.fireexplosion,Something caught fire or exploded from instrument at place,,,evt059arg03place,, +genericcrime.genericcrime.genericcrime, committed a crime against victim at place,evt154arg01perpetrator ,,,, +genericcrime.genericcrime.genericcrime,Perpetrator committed a crime against at place,,evt154arg02victim ,,, +genericcrime.genericcrime.genericcrime,Perpetrator committed a crime against victim at place,,,evt154arg03place ,, +government.agreements.unspecified, and participant signed an agreement in place,evt060arg01participant,,,, +government.agreements.unspecified,Participant and signed an agreement in place,,evt060arg02participant,,, +government.agreements.unspecified,Participant and participant signed an agreement in place,,,evt060arg03place,, +government.agreements.acceptagreementcontractceasefire, and participant signed an agreement in place,evt061arg01participant,,,, +government.agreements.acceptagreementcontractceasefire,Participant and signed an agreement in place,,evt061arg02participant,,, +government.agreements.acceptagreementcontractceasefire,Participant and participant signed an agreement in place,,,evt061arg03place,, +government.agreements.rejectnullifyagreementcontractceasefire, rejected or nullified an agreement with participant in place,evt062arg01rejecternullifier,,,, +government.agreements.rejectnullifyagreementcontractceasefire,Rejecter rejected or nullified an agreement with in place,,evt062arg02otherparticipant,,, +government.agreements.rejectnullifyagreementcontractceasefire,Rejecter rejected or nullified an agreement with participant in place,,,evt062arg03place,, +government.agreements.violateagreement, violated an agreement with participant in place,evt063arg01violator,,,, +government.agreements.violateagreement,Violator violated an agreement with in place,,evt063arg02otherparticipant,,, +government.agreements.violateagreement,Violator violated an agreement with participant in place,,,evt063arg03place,, +government.convene.convene, convened somebody at place,evt145arg01convener ,,,, +government.convene.convene,Convener convened at place,,evt145arg02convenedthing ,,, +government.convene.convene,Convener convened somebody at place,,,evt145arg03place ,, +government.formation.unspecified, was formed by founder in place,evt064arg01gpe,,,, +government.formation.unspecified,Somebody was formed by in place,,evt064arg02founder,,, +government.formation.unspecified,Somebody was formed by founder in place,,,evt064arg03place,, +government.formation.mergegpe, merged with participant at place,evt065arg01participant,,,, +government.formation.mergegpe,Participant merged with at place,,evt065arg02participant,,, +government.formation.mergegpe,Participant merged with participant at place,,,evt065arg03place,, +government.formation.startgpe, was started by founder in place,evt066arg01gpe,,,, +government.formation.startgpe,Something was started by in place,,evt066arg02founder,,, +government.formation.startgpe,Something was started by founder in place,,,evt066arg03place,, +government.legislate.legislate, legislature enacted law in place,evt068arg01governmentbody,,,, +government.legislate.legislate,Government legislature enacted law in place,,evt068arg02law,,, +government.legislate.legislate,Government legislature enacted law in place,,,evt068arg03place,, +government.spy.spy, spied on something to the benefit of beneficiary in place,evt070arg01spy,,,, +government.spy.spy,Spy spied on to the benefit of beneficiary in place,,evt070arg02observedentity,,, +government.spy.spy,Spy spied on something to the benefit of in place,,,evt070arg03beneficiary,, +government.spy.spy,Spy spied on something to the benefit of beneficiary in place,,,,evt070arg04place, +government.vote.unspecified, voted for candidate on ballot with results in place,evt071arg01voter,,,, +government.vote.unspecified,Voter voted for on ballot with results in place,,evt071arg02candidate,,, +government.vote.unspecified,Voter voted for candidate on ballot with results in place,,,evt071arg03ballot,, +government.vote.unspecified,Voter voted for candidate on ballot with results in place,,,,evt071arg04result, +government.vote.unspecified,Voter voted for candidate on ballot with results in place,,,,,evt071arg05place +government.vote.castvote, voted for candidate on ballot with results in place,evt072arg01voter,,,, +government.vote.castvote,Voter voted for on ballot with results in place,,evt072arg02candidate,,, +government.vote.castvote,Voter voted for candidate on ballot with results in place,,,evt072arg03ballot,, +government.vote.castvote,Voter voted for candidate on ballot with results in place,,,,evt072arg04result, +government.vote.castvote,Voter voted for candidate on ballot with results in place,,,,,evt072arg05place +government.vote.violationspreventvote, prevented voter from voting for candidate on ballot in place,evt073arg01preventer,,,, +government.vote.violationspreventvote,Preventer prevented from voting for candidate on ballot in place,,evt073arg02voter,,, +government.vote.violationspreventvote,Preventer prevented voter from voting for on ballot in place,,,evt073arg03candidate,, +government.vote.violationspreventvote,Preventer prevented voter from voting for candidate on ballot in place,,,,evt073arg04ballot, +government.vote.violationspreventvote,Preventer prevented voter from voting for candidate on ballot in place,,,,,evt073arg05place +inspection.sensoryobserve.unspecified, observed something in place,evt074arg01observer,,,, +inspection.sensoryobserve.unspecified,Observer observed in place,,evt074arg02observedentity,,, +inspection.sensoryobserve.unspecified,Observer observed something in place,,,evt074arg03place,, +inspection.sensoryobserve.inspectpeopleorganization, inspected something in place,evt075arg01inspector,,,, +inspection.sensoryobserve.inspectpeopleorganization,Inspector inspected in place,,evt075arg02inspectedentity,,, +inspection.sensoryobserve.inspectpeopleorganization,Inspector inspected something in place,,,evt075arg03place,, +inspection.sensoryobserve.monitorelection, monitored something taking part in an election in place,evt076arg01monitor,,,, +inspection.sensoryobserve.monitorelection,Monitor monitored taking part in an election in place,,evt076arg02monitoredentity,,, +inspection.sensoryobserve.monitorelection,Monitor monitored something taking part in an election in place,,,evt076arg03place,, +inspection.sensoryobserve.physicalinvestigateinspect, inspected something in place,evt077arg01inspector,,,, +inspection.sensoryobserve.physicalinvestigateinspect,Inspector inspected in place,,evt077arg02inspectedentity,,, +inspection.sensoryobserve.physicalinvestigateinspect,Inspector inspected something in place,,,evt077arg03place,, +inspection.targetaimat.targetaimat, physically targeted target with instrument at place,evt153arg01targeter ,,,, +inspection.targetaimat.targetaimat,Targeter physically targeted with instrument at place,,evt153arg02target ,,, +inspection.targetaimat.targetaimat,Targeter physically targeted target with instrument at place,,,evt153arg03instrument ,, +inspection.targetaimat.targetaimat,Targeter physically targeted target with instrument at place,,,,evt153arg04place , +justice.arrestjaildetain.arrestjaildetain, arrested or jailed detainee for crime at place,evt079arg01jailer,,,, +justice.arrestjaildetain.arrestjaildetain,Jailer arrested or jailed for crime at place,,evt079arg02detainee,,, +justice.arrestjaildetain.arrestjaildetain,Jailer arrested or jailed detainee for crime at place,,,evt079arg03crime,, +justice.arrestjaildetain.arrestjaildetain,Jailer arrested or jailed detainee for crime at place,,,,evt079arg04place, +justice.initiatejudicialprocess.unspecified, initiated judicial process pertaining to defendant before court or judge for crime in place,evt080arg01prosecutor,,,, +justice.initiatejudicialprocess.unspecified,Prosecutor initiated judicial process pertaining to before court or judge for crime in place,,evt080arg02defendant,,, +justice.initiatejudicialprocess.unspecified,Prosecutor initiated judicial process pertaining to defendant before court or judge for crime in place,,,evt080arg03judgecourt,, +justice.initiatejudicialprocess.unspecified,Prosecutor initiated judicial process pertaining to defendant before court or judge for crime in place,,,,evt080arg04crime, +justice.initiatejudicialprocess.unspecified,Prosecutor initiated judicial process pertaining to defendant before court or judge for crime in place,,,,,evt080arg05place +justice.initiatejudicialprocess.chargeindict, charged or indicted defendant before court or judge for crime in place,evt081arg01prosecutor,,,, +justice.initiatejudicialprocess.chargeindict,Prosecutor charged or indicted before court or judge for crime in place,,evt081arg02defendant,,, +justice.initiatejudicialprocess.chargeindict,Prosecutor charged or indicted defendant before court or judge for crime in place,,,evt081arg03judgecourt,, +justice.initiatejudicialprocess.chargeindict,Prosecutor charged or indicted defendant before court or judge for crime in place,,,,evt081arg04crime, +justice.initiatejudicialprocess.chargeindict,Prosecutor charged or indicted defendant before court or judge for crime in place,,,,,evt081arg05place +justice.initiatejudicialprocess.trialhearing, tried defendant before court or judge for crime in place,evt082arg01prosecutor,,,, +justice.initiatejudicialprocess.trialhearing,Prosecutor tried before court or judge for crime in place,,evt082arg02defendant,,, +justice.initiatejudicialprocess.trialhearing,Prosecutor tried defendant before court or judge for crime in place,,,evt082arg03judgecourt,, +justice.initiatejudicialprocess.trialhearing,Prosecutor tried defendant before court or judge for crime in place,,,,evt082arg04crime, +justice.initiatejudicialprocess.trialhearing,Prosecutor tried defendant before court or judge for crime in place,,,,,evt082arg05place +justice.investigate.unspecified, investigated defendant in place,evt083arg01investigator,,,, +justice.investigate.unspecified,Investigator investigated in place,,evt083arg02defendant,,, +justice.investigate.unspecified,Investigator investigated defendant in place,,,evt083arg03place,, +justice.investigate.investigatecrime, investigated defendant for crime in place,evt084arg01investigator,,,, +justice.investigate.investigatecrime,Investigator investigated for crime in place,,evt084arg02defendant,,, +justice.investigate.investigatecrime,Investigator investigated defendant for crime in place,,,evt084arg03crime,, +justice.investigate.investigatecrime,Investigator investigated defendant for crime in place,,,,evt084arg04place, +justice.judicialconsequences.unspecified, court or judge decided consequences of crime committed by defendant in place,evt085arg01judgecourt,,,, +justice.judicialconsequences.unspecified,Court or judge decided consequences of crime committed by in place,,evt085arg02defendant,,, +justice.judicialconsequences.unspecified,Court or judge decided consequences of crime committed by defendant in place,,,evt085arg03crime,, +justice.judicialconsequences.unspecified,Court or judge decided consequences of crime committed by defendant in place,,,,evt085arg04place, +justice.judicialconsequences.convict, court or judge convicted defendant of crime in place,evt086arg01judgecourt,,,, +justice.judicialconsequences.convict,Court or judge convicted of crime in place,,evt086arg02defendant,,, +justice.judicialconsequences.convict,Court or judge convicted defendant of crime in place,,,evt086arg03crime,, +justice.judicialconsequences.convict,Court or judge convicted defendant of crime in place,,,,evt086arg04place, +justice.judicialconsequences.execute, executed defendant for crime in place,evt087arg01executioner,,,, +justice.judicialconsequences.execute,Executioner executed for crime in place,,evt087arg02defendant,,, +justice.judicialconsequences.execute,Executioner executed defendant for crime in place,,,evt087arg03crime,, +justice.judicialconsequences.execute,Executioner executed defendant for crime in place,,,,evt087arg04place, +justice.judicialconsequences.extradite, extradited defendant for crime from place to place,evt088arg01extraditer,,,, +justice.judicialconsequences.extradite,Extraditer extradited for crime from place to place,,evt088arg02defendant,,, +justice.judicialconsequences.extradite,Extraditer extradited defendant for crime from place to place,,,evt088arg03crime,, +justice.judicialconsequences.extradite,Extraditer extradited defendant for crime from place to place,,,,evt088arg04origin, +justice.judicialconsequences.extradite,Extraditer extradited defendant for crime from place to place,,,,,evt088arg05destination +life.die.unspecified, died at place from medical issue killed by killer,evt089arg01victim,,,, +life.die.unspecified,Victim died at place from medical issue killed by killer,,evt089arg02place,,, +life.die.unspecified,Victim died at place from medical issue killed by killer,,,evt089arg03killer,, +life.die.unspecified,Victim died at place from medical issue killed by killer,,,,evt089arg04medicalissue, +life.die.deathcausedbyviolentevents, killed victim using instrument or medical issue at place,evt090arg01killer,,,, +life.die.deathcausedbyviolentevents,Killer killed using instrument or medical issue at place,,evt090arg02victim,,, +life.die.deathcausedbyviolentevents,Killer killed victim using instrument or medical issue at place,,,evt090arg03instrument,, +life.die.deathcausedbyviolentevents,Killer killed victim using instrument or medical issue at place,,,,evt090arg04place, +life.die.deathcausedbyviolentevents,Killer killed victim using instrument or medical issue at place,,,,,evt090arg05medicalissue +life.die.nonviolentdeath, died at place from medical issue killed by killer,evt091arg01victim,,,, +life.die.nonviolentdeath,Victim died at place from medical issue killed by killer,,evt091arg02place,,, +life.die.nonviolentdeath,Victim died at place from medical issue killed by killer,,,evt091arg03killer,, +life.die.nonviolentdeath,Victim died at place from medical issue killed by killer,,,,evt091arg04medicalissue, +life.injure.unspecified, was injured by injurer with medical issue at place,evt092arg01victim,,,, +life.injure.unspecified,Victim was injured by with medical issue at place,,evt092arg02injurer,,, +life.injure.unspecified,Victim was injured by injurer with medical issue at place,,,evt092arg03place,, +life.injure.unspecified,Victim was injured by injurer with medical issue at place,,,,evt092arg04medicalissue, +life.injure.illnessdegradationhungerthirst, has extreme hunger or thirst from medical issue imposed by injurer at place,evt093arg01victim,,,, +life.injure.illnessdegradationhungerthirst,Victim has extreme hunger or thirst from medical issue imposed by injurer at place,,evt093arg02place,,, +life.injure.illnessdegradationhungerthirst,Victim has extreme hunger or thirst from medical issue imposed by injurer at place,,,evt093arg03injurer,, +life.injure.illnessdegradationhungerthirst,Victim has extreme hunger or thirst from medical issue imposed by injurer at place,,,,evt093arg04medicalissue, +life.injure.illnessdegradationphysical, person has some physical degradation from medical issue imposed by injurer at place,evt094arg01victim,,,, +life.injure.illnessdegradationphysical,Victim person has some physical degradation from medical issue imposed by injurer at place,,evt094arg02place,,, +life.injure.illnessdegradationphysical,Victim person has some physical degradation from medical issue imposed by injurer at place,,,evt094arg03injurer,, +life.injure.illnessdegradationphysical,Victim person has some physical degradation from medical issue imposed by injurer at place,,,,evt094arg04medicalissue, +life.injure.illnessdegredationsickness, has sickness or illness at place deliberately infected by injurer ,evt150arg01victim ,,,, +life.injure.illnessdegredationsickness,Victim has sickness or illness at place deliberately infected by ,,evt150arg02injurer ,,, +life.injure.illnessdegredationsickness,Victim has sickness or illness at place deliberately infected by injurer ,,,evt150arg03disease ,, +life.injure.illnessdegredationsickness,Victim has sickness or illness at place deliberately infected by injurer ,,,,evt150arg04place , +life.injure.injurycausedbyviolentevents, injured victim using instrument or medical issue at place,evt095arg01injurer,,,, +life.injure.injurycausedbyviolentevents,Injurer injured using instrument or medical issue at place,,evt095arg02victim,,, +life.injure.injurycausedbyviolentevents,Injurer injured victim using instrument or medical issue at place,,,evt095arg03instrument,, +life.injure.injurycausedbyviolentevents,Injurer injured victim using instrument or medical issue at place,,,,evt095arg04place, +life.injure.injurycausedbyviolentevents,Injurer injured victim using instrument or medical issue at place,,,,,evt095arg05medicalissue +manufacture.artifact.unspecified, manufactured or created or produced artifact using instrument at place,evt096arg01manufacturer,,,, +manufacture.artifact.unspecified,Manufacturer manufactured or created or produced using instrument at place,,evt096arg02artifact,,, +manufacture.artifact.unspecified,Manufacturer manufactured or created or produced artifact using at place,,,evt096arg03instrument,, +manufacture.artifact.unspecified,Manufacturer manufactured or created or produced artifact using instrument at place,,,,evt096arg04place, +manufacture.artifact.build, manufactured or created or produced artifact using instrument at place,evt097arg01manufacturer,,,, +manufacture.artifact.build,Manufacturer manufactured or created or produced using instrument at place,,evt097arg02artifact,,, +manufacture.artifact.build,Manufacturer manufactured or created or produced artifact using at place,,,evt097arg03instrument,, +manufacture.artifact.build,Manufacturer manufactured or created or produced artifact using instrument at place,,,,evt097arg04place, +manufacture.artifact.createintellectualproperty, manufactured or created or produced artifact using instrument at place,evt098arg01manufacturer,,,, +manufacture.artifact.createintellectualproperty,Manufacturer manufactured or created or produced using instrument at place,,evt098arg02artifact,,, +manufacture.artifact.createintellectualproperty,Manufacturer manufactured or created or produced artifact using at place,,,evt098arg03instrument,, +manufacture.artifact.createintellectualproperty,Manufacturer manufactured or created or produced artifact using instrument at place,,,,evt098arg04place, +manufacture.artifact.createmanufacture, manufactured or created or produced artifact using instrument at place,evt099arg01manufacturer,,,, +manufacture.artifact.createmanufacture,Manufacturer manufactured or created or produced using instrument at place,,evt099arg02artifact,,, +manufacture.artifact.createmanufacture,Manufacturer manufactured or created or produced artifact using at place,,,evt099arg03instrument,, +manufacture.artifact.createmanufacture,Manufacturer manufactured or created or produced artifact using instrument at place,,,,evt099arg04place, +medical.intervention.intervention, treater treated patient for medical issue with means at place,evt147arg01treater ,,,, +medical.intervention.intervention,Treater treated patient for medical issue with means at place,,evt147arg02patient ,,, +medical.intervention.intervention,Treater treated patient for medical issue with means at place,,,evt147arg03medicalissue ,, +medical.intervention.intervention,Treater treated patient for medical issue with means at place,,,,evt147arg04instrument , +medical.intervention.intervention,Treater treated patient for medical issue with means at place,,,,,evt147arg05place +movement.transportartifact.unspecified, transported artifact in vehicle from place to place,evt100arg01transporter,,,, +movement.transportartifact.unspecified,Transporter transported in vehicle from place to place,,evt100arg02artifact,,, +movement.transportartifact.unspecified,Transporter transported artifact in from place to place,,,evt100arg03vehicle,, +movement.transportartifact.unspecified,Transporter transported artifact in vehicle from place to place,,,,evt100arg04origin, +movement.transportartifact.unspecified,Transporter transported artifact in vehicle from place to place,,,,,evt100arg05destination +movement.transportartifact.bringcarryunload, transported artifact in vehicle from place to place,evt101arg01transporter,,,, +movement.transportartifact.bringcarryunload,Transporter transported in vehicle from place to place,,evt101arg02artifact,,, +movement.transportartifact.bringcarryunload,Transporter transported artifact in from place to place,,,evt101arg03vehicle,, +movement.transportartifact.bringcarryunload,Transporter transported artifact in vehicle from place to place,,,,evt101arg04origin, +movement.transportartifact.bringcarryunload,Transporter transported artifact in vehicle from place to place,,,,,evt101arg05destination +movement.transportartifact.disperseseparate, transported artifact in vehicle from place to place,evt102arg01transporter,,,, +movement.transportartifact.disperseseparate,Transporter transported in vehicle from place to place,,evt102arg02artifact,,, +movement.transportartifact.disperseseparate,Transporter transported artifact in from place to place,,,evt102arg03vehicle,, +movement.transportartifact.disperseseparate,Transporter transported artifact in vehicle from place to place,,,,evt102arg04origin, +movement.transportartifact.disperseseparate,Transporter transported artifact in vehicle from place to place,,,,,evt102arg05destination +movement.transportartifact.fall, fell from place to place,evt103arg01artifact,,,, +movement.transportartifact.fall,Artifact fell from place to place,,evt103arg02origin,,, +movement.transportartifact.fall,Artifact fell from place to place,,,evt103arg03destination,, +movement.transportartifact.grantentry, grants artifact entry to place from place,evt104arg01transporter,,,, +movement.transportartifact.grantentry,Transporter grants entry to place from place,,evt104arg02artifact,,, +movement.transportartifact.grantentry,Transporter grants artifact entry to place from place,,,evt104arg03origin,, +movement.transportartifact.grantentry,Transporter grants artifact entry to place from place,,,,evt104arg04destination, +movement.transportartifact.hide, concealed artifact in place transported in vehicle from place,evt105arg01transporter,,,, +movement.transportartifact.hide,Transporter concealed in place transported in vehicle from place,,evt105arg02artifact,,, +movement.transportartifact.hide,Transporter concealed artifact in place transported in vehicle from place,,,evt105arg03hidingplace,, +movement.transportartifact.hide,Transporter concealed artifact in place transported in vehicle from place,,,,evt105arg04vehicle, +movement.transportartifact.hide,Transporter concealed artifact in place transported in vehicle from place,,,,,evt105arg05origin +movement.transportartifact.lossofcontrol, lost control of moving something at place,evt146arg01controller ,,,, +movement.transportartifact.lossofcontrol,Controller lost control of moving at place,,evt146arg02controlledthing ,,, +movement.transportartifact.lossofcontrol,Controller lost control of moving something at place,,,evt146arg03place ,, +movement.transportartifact.nonviolentthrowlaunch, transported artifact in vehicle from place to place,evt106arg01transporter,,,, +movement.transportartifact.nonviolentthrowlaunch,Transporter transported in vehicle from place to place,,evt106arg02artifact,,, +movement.transportartifact.nonviolentthrowlaunch,Transporter transported artifact in from place to place,,,evt106arg03vehicle,, +movement.transportartifact.nonviolentthrowlaunch,Transporter transported artifact in vehicle from place to place,,,,evt106arg04origin, +movement.transportartifact.nonviolentthrowlaunch,Transporter transported artifact in vehicle from place to place,,,,,evt106arg05destination +movement.transportartifact.prevententry, prevents transporter from transporting artifact from place to place,evt107arg01preventer,,,, +movement.transportartifact.prevententry,Preventer prevents from transporting artifact from place to place,,evt107arg02transporter,,, +movement.transportartifact.prevententry,Preventer prevents transporter from transporting from place to place,,,evt107arg03artifact,, +movement.transportartifact.prevententry,Preventer prevents transporter from transporting artifact from place to place,,,,evt107arg04origin, +movement.transportartifact.prevententry,Preventer prevents transporter from transporting artifact from place to place,,,,,evt107arg05destination +movement.transportartifact.preventexit, prevents transporter from transporting artifact from place to place,evt108arg01preventer,,,, +movement.transportartifact.preventexit,Preventer prevents from transporting artifact from place to place,,evt108arg02transporter,,, +movement.transportartifact.preventexit,Preventer prevents transporter from transporting from place to place,,,evt108arg03artifact,, +movement.transportartifact.preventexit,Preventer prevents transporter from transporting artifact from place to place,,,,evt108arg04origin, +movement.transportartifact.preventexit,Preventer prevents transporter from transporting artifact from place to place,,,,,evt108arg05destination +movement.transportartifact.receiveimport, transported artifact in vehicle from place to place,evt109arg01transporter,,,, +movement.transportartifact.receiveimport,Transporter transported in vehicle from place to place,,evt109arg02artifact,,, +movement.transportartifact.receiveimport,Transporter transported artifact in from place to place,,,evt109arg03vehicle,, +movement.transportartifact.receiveimport,Transporter transported artifact in vehicle from place to place,,,,evt109arg04origin, +movement.transportartifact.receiveimport,Transporter transported artifact in vehicle from place to place,,,,,evt109arg05destination +movement.transportartifact.sendsupplyexport, transported artifact in vehicle from place to place,evt110arg01transporter,,,, +movement.transportartifact.sendsupplyexport,Transporter transported in vehicle from place to place,,evt110arg02artifact,,, +movement.transportartifact.sendsupplyexport,Transporter transported artifact in from place to place,,,evt110arg03vehicle,, +movement.transportartifact.sendsupplyexport,Transporter transported artifact in vehicle from place to place,,,,evt110arg04origin, +movement.transportartifact.sendsupplyexport,Transporter transported artifact in vehicle from place to place,,,,,evt110arg05destination +movement.transportartifact.smuggleextract, transported artifact in vehicle from place to place,evt111arg01transporter,,,, +movement.transportartifact.smuggleextract,Transporter transported in vehicle from place to place,,evt111arg02artifact,,, +movement.transportartifact.smuggleextract,Transporter transported artifact in from place to place,,,evt111arg03vehicle,, +movement.transportartifact.smuggleextract,Transporter transported artifact in vehicle from place to place,,,,evt111arg04origin, +movement.transportartifact.smuggleextract,Transporter transported artifact in vehicle from place to place,,,,,evt111arg05destination +movement.transportperson.unspecified, transported artifact in vehicle from place to place,evt112arg01transporter,,,, +movement.transportperson.unspecified,Transporter transported in vehicle from place to place,,evt112arg02passenger,,, +movement.transportperson.unspecified,Transporter transported artifact in from place to place,,,evt112arg03vehicle,, +movement.transportperson.unspecified,Transporter transported artifact in vehicle from place to place,,,,evt112arg04origin, +movement.transportperson.unspecified,Transporter transported artifact in vehicle from place to place,,,,,evt112arg05destination +movement.transportperson.bringcarryunload, transported artifact in vehicle from place to place,evt113arg01transporter,,,, +movement.transportperson.bringcarryunload,Transporter transported in vehicle from place to place,,evt113arg02passenger,,, +movement.transportperson.bringcarryunload,Transporter transported artifact in from place to place,,,evt113arg03vehicle,, +movement.transportperson.bringcarryunload,Transporter transported artifact in vehicle from place to place,,,,evt113arg04origin, +movement.transportperson.bringcarryunload,Transporter transported artifact in vehicle from place to place,,,,,evt113arg05destination +movement.transportperson.disperseseparate, transported artifact in vehicle from place to place,evt114arg01transporter,,,, +movement.transportperson.disperseseparate,Transporter transported in vehicle from place to place,,evt114arg02passenger,,, +movement.transportperson.disperseseparate,Transporter transported artifact in from place to place,,,evt114arg03vehicle,, +movement.transportperson.disperseseparate,Transporter transported artifact in vehicle from place to place,,,,evt114arg04origin, +movement.transportperson.disperseseparate,Transporter transported artifact in vehicle from place to place,,,,,evt114arg05destination +movement.transportperson.evacuationrescue, transported artifact in vehicle from place to place,evt115arg01transporter,,,, +movement.transportperson.evacuationrescue,Transporter transported in vehicle from place to place,,evt115arg02passenger,,, +movement.transportperson.evacuationrescue,Transporter transported artifact in from place to place,,,evt115arg03vehicle,, +movement.transportperson.evacuationrescue,Transporter transported artifact in vehicle from place to place,,,,evt115arg04origin, +movement.transportperson.evacuationrescue,Transporter transported artifact in vehicle from place to place,,,,,evt115arg05destination +movement.transportperson.fall, fell from place to place,evt116arg01passenger,,,, +movement.transportperson.fall,Artifact fell from place to place,,evt116arg02origin,,, +movement.transportperson.fall,Artifact fell from place to place,,,evt116arg03destination,, +movement.transportperson.grantentryasylum, grants entry to transporter transporting passenger from place to place,evt117arg01granter,,,, +movement.transportperson.grantentryasylum,Granter grants entry to transporting passenger from place to place,,evt117arg02transporter,,, +movement.transportperson.grantentryasylum,Granter grants entry to transporter transporting from place to place,,,evt117arg03passenger,, +movement.transportperson.grantentryasylum,Granter grants entry to transporter transporting passenger from place to place,,,,evt117arg04origin, +movement.transportperson.grantentryasylum,Granter grants entry to transporter transporting passenger from place to place,,,,,evt117arg05destination +movement.transportperson.hide, concealed artifact in place transported in vehicle from place,evt118arg01transporter,,,, +movement.transportperson.hide,Transporter concealed in place transported in vehicle from place,,evt118arg02passenger,,, +movement.transportperson.hide,Transporter concealed artifact in place transported in vehicle from place,,,evt118arg03hidingplace,, +movement.transportperson.hide,Transporter concealed artifact in place transported in vehicle from place,,,,evt118arg04vehicle, +movement.transportperson.hide,Transporter concealed artifact in place transported in vehicle from place,,,,,evt118arg05origin +movement.transportperson.prevententry, prevents transporter from transporting artifact from place to place,evt119arg01preventer,,,, +movement.transportperson.prevententry,Preventer prevents from transporting artifact from place to place,,evt119arg02transporter,,, +movement.transportperson.prevententry,Preventer prevents transporter from transporting from place to place,,,evt119arg03passenger,, +movement.transportperson.prevententry,Preventer prevents transporter from transporting artifact from place to place,,,,evt119arg04origin, +movement.transportperson.prevententry,Preventer prevents transporter from transporting artifact from place to place,,,,,evt119arg05destination +movement.transportperson.preventexit, prevents transporter from transporting artifact from place to place,evt120arg01preventer,,,, +movement.transportperson.preventexit,Preventer prevents from transporting artifact from place to place,,evt120arg02transporter,,, +movement.transportperson.preventexit,Preventer prevents transporter from transporting from place to place,,,evt120arg03passenger,, +movement.transportperson.preventexit,Preventer prevents transporter from transporting artifact from place to place,,,,evt120arg04origin, +movement.transportperson.preventexit,Preventer prevents transporter from transporting artifact from place to place,,,,,evt120arg05destination +movement.transportperson.selfmotion, moved in vehicle from place to place,evt121arg01transporter,,,, +movement.transportperson.selfmotion,Transporter moved in from place to place,,evt121arg02vehicle,,, +movement.transportperson.selfmotion,Transporter moved in vehicle from place to place,,,evt121arg03origin,, +movement.transportperson.selfmotion,Transporter moved in vehicle from place to place,,,,evt121arg04destination, +movement.transportperson.smuggleextract, transported passenger in vehicle from place to place,evt122arg01transporter,,,, +movement.transportperson.smuggleextract,Transporter transported in vehicle from place to place,,evt122arg02passenger,,, +movement.transportperson.smuggleextract,Transporter transported passenger in from place to place,,,evt122arg03vehicle,, +movement.transportperson.smuggleextract,Transporter transported passenger in vehicle from place to place,,,,evt122arg04origin, +movement.transportperson.smuggleextract,Transporter transported passenger in vehicle from place to place,,,,,evt122arg05destination +personnel.elect.unspecified, elected candidate in place,evt123arg01voter,,,, +personnel.elect.unspecified,Voter elected in place,,evt123arg02candidate,,, +personnel.elect.unspecified,Voter elected candidate in place,,,evt123arg03place,, +personnel.elect.winelection, elected candidate in place,evt124arg01voter,,,, +personnel.elect.winelection,Voter elected in place,,evt124arg02candidate,,, +personnel.elect.winelection,Voter elected candidate in place,,,evt124arg03place,, +personnel.endposition.unspecified, stopped working at place of employment in place,evt125arg01employee,,,, +personnel.endposition.unspecified,Employee stopped working at in place,,evt125arg02placeofemployment,,, +personnel.endposition.unspecified,Employee stopped working at place of employment in place,,,evt125arg03place,, +personnel.endposition.firinglayoff, stopped working at place of employment in place,evt126arg01employee,,,, +personnel.endposition.firinglayoff,Employee stopped working at in place,,evt126arg02placeofemployment,,, +personnel.endposition.firinglayoff,Employee stopped working at place of employment in place,,,evt126arg03place,, +personnel.endposition.quitretire, stopped working at place of employment in place,evt127arg01employee,,,, +personnel.endposition.quitretire,Employee stopped working at in place,,evt127arg02placeofemployment,,, +personnel.endposition.quitretire,Employee stopped working at place of employment in place,,,evt127arg03place,, +personnel.startposition.unspecified, started working at place of employment in place,evt128arg01employee,,,, +personnel.startposition.unspecified,Employee started working at in place,,evt128arg02placeofemployment,,, +personnel.startposition.unspecified,Employee started working at place of employment in place,,,evt128arg03place,, +personnel.startposition.hiring, started working at place of employment in place,evt129arg01employee,,,, +personnel.startposition.hiring,Employee started working at in place,,evt129arg02placeofemployment,,, +personnel.startposition.hiring,Employee started working at place of employment in place,,,evt129arg03place,, +transaction.transaction.unspecified,A transaction occurred between and participant for the benefit of beneficiary at place,evt130arg01participant,,,, +transaction.transaction.unspecified,A transaction occurred between participant and for the benefit of beneficiary at place,,evt130arg02participant,,, +transaction.transaction.unspecified,A transaction occurred between participant and participant for the benefit of at place,,,evt130arg03beneficiary,, +transaction.transaction.unspecified,A transaction occurred between participant and participant for the benefit of beneficiary at place,,,,evt130arg04place, +transaction.transaction.embargosanction, prevented giver from giving artifact money to recipient at place,evt131arg01preventer,,,, +transaction.transaction.embargosanction,Preventer prevented from giving artifact money to recipient at place,,evt131arg02giver,,, +transaction.transaction.embargosanction,Preventer prevented giver from giving artifact money to at place,,,evt131arg03recipient,, +transaction.transaction.embargosanction,Preventer prevented giver from giving to recipient at place,,,,evt131arg04artifactmoney, +transaction.transaction.embargosanction,Preventer prevented giver from giving artifact money to recipient at place,,,,,evt131arg05place +transaction.transaction.giftgrantprovideaid, gave something to recipient for the benefit of beneficiary at place,evt132arg01giver,,,, +transaction.transaction.giftgrantprovideaid,Giver gave something to for the benefit of beneficiary at place,,evt132arg02recipient,,, +transaction.transaction.giftgrantprovideaid,Giver gave something to recipient for the benefit of at place,,,evt132arg03beneficiary,, +transaction.transaction.giftgrantprovideaid,Giver gave something to recipient for the benefit of beneficiary at place,,,,evt132arg04place, +transaction.transfermoney.unspecified, gave money to recipient for the benefit of beneficiary at place,evt133arg01giver,,,, +transaction.transfermoney.unspecified,Giver gave money to for the benefit of beneficiary at place,,evt133arg02recipient,,, +transaction.transfermoney.unspecified,Giver gave money to recipient for the benefit of at place,,,evt133arg03beneficiary,, +transaction.transfermoney.unspecified,Giver gave money to recipient for the benefit of beneficiary at place,,,,evt133arg04money, +transaction.transfermoney.unspecified,Giver gave money to recipient for the benefit of beneficiary at place,,,,,evt133arg05place +transaction.transfermoney.borrowlend, gave money to recipient for the benefit of beneficiary at place,evt134arg01giver,,,, +transaction.transfermoney.borrowlend,Giver gave money to for the benefit of beneficiary at place,,evt134arg02recipient,,, +transaction.transfermoney.borrowlend,Giver gave money to recipient for the benefit of at place,,,evt134arg03beneficiary,, +transaction.transfermoney.borrowlend,Giver gave money to recipient for the benefit of beneficiary at place,,,,evt134arg04money, +transaction.transfermoney.borrowlend,Giver gave money to recipient for the benefit of beneficiary at place,,,,,evt134arg05place +transaction.transfermoney.embargosanction, prevented giver from giving artifact money to recipient at place,evt135arg01preventer,,,, +transaction.transfermoney.embargosanction,Preventer prevented from giving artifact money to recipient at place,,evt135arg02giver,,, +transaction.transfermoney.embargosanction,Preventer prevented giver from giving artifact money to at place,,,evt135arg03recipient,, +transaction.transfermoney.embargosanction,Preventer prevented giver from giving to recipient at place,,,,evt135arg04money, +transaction.transfermoney.embargosanction,Preventer prevented giver from giving artifact money to recipient at place,,,,,evt135arg05place +transaction.transfermoney.giftgrantprovideaid, gave money to recipient for the benefit of beneficiary at place,evt136arg01giver,,,, +transaction.transfermoney.giftgrantprovideaid,Giver gave money to for the benefit of beneficiary at place,,evt136arg02recipient,,, +transaction.transfermoney.giftgrantprovideaid,Giver gave money to recipient for the benefit of at place,,,evt136arg03beneficiary,, +transaction.transfermoney.giftgrantprovideaid,Giver gave money to recipient for the benefit of beneficiary at place,,,,evt136arg04money, +transaction.transfermoney.giftgrantprovideaid,Giver gave money to recipient for the benefit of beneficiary at place,,,,,evt136arg05place +transaction.transfermoney.payforservice, gave money to recipient for the benefit of beneficiary at place,evt137arg01giver,,,, +transaction.transfermoney.payforservice,Giver gave money to for the benefit of beneficiary at place,,evt137arg02recipient,,, +transaction.transfermoney.payforservice,Giver gave money to recipient for the benefit of at place,,,evt137arg03beneficiary,, +transaction.transfermoney.payforservice,Giver gave money to recipient for the benefit of beneficiary at place,,,,evt137arg04money, +transaction.transfermoney.payforservice,Giver gave money to recipient for the benefit of beneficiary at place,,,,,evt137arg05place +transaction.transfermoney.purchase, gave money to recipient for the benefit of beneficiary at place,evt138arg01giver,,,, +transaction.transfermoney.purchase,Giver gave money to for the benefit of beneficiary at place,,evt138arg02recipient,,, +transaction.transfermoney.purchase,Giver gave money to recipient for the benefit of at place,,,evt138arg03beneficiary,, +transaction.transfermoney.purchase,Giver gave money to recipient for the benefit of beneficiary at place,,,,evt138arg04money, +transaction.transfermoney.purchase,Giver gave money to recipient for the benefit of beneficiary at place,,,,,evt138arg05place +transaction.transferownership.unspecified, gave artifact to recipient for the benefit of beneficiary at place,evt139arg01giver,,,, +transaction.transferownership.unspecified,Giver gave artifact to for the benefit of beneficiary at place,,evt139arg02recipient,,, +transaction.transferownership.unspecified,Giver gave artifact to recipient for the benefit of at place,,,evt139arg03beneficiary,, +transaction.transferownership.unspecified,Giver gave to recipient for the benefit of beneficiary at place,,,,evt139arg04artifact, +transaction.transferownership.unspecified,Giver gave artifact to recipient for the benefit of beneficiary at place,,,,,evt139arg05place +transaction.transferownership.borrowlend, gave artifact to recipient for the benefit of beneficiary at place,evt140arg01giver,,,, +transaction.transferownership.borrowlend,Giver gave artifact to for the benefit of beneficiary at place,,evt140arg02recipient,,, +transaction.transferownership.borrowlend,Giver gave artifact to recipient for the benefit of at place,,,evt140arg03beneficiary,, +transaction.transferownership.borrowlend,Giver gave to recipient for the benefit of beneficiary at place,,,,evt140arg04artifact, +transaction.transferownership.borrowlend,Giver gave artifact to recipient for the benefit of beneficiary at place,,,,,evt140arg05place +transaction.transferownership.embargosanction, prevented giver from giving artifact money to recipient at place,evt141arg01preventer,,,, +transaction.transferownership.embargosanction,Preventer prevented from giving artifact money to recipient at place,,evt141arg02giver,,, +transaction.transferownership.embargosanction,Preventer prevented giver from giving artifact money to at place,,,evt141arg03recipient,, +transaction.transferownership.embargosanction,Preventer prevented giver from giving to recipient at place,,,,evt141arg04artifact, +transaction.transferownership.embargosanction,Preventer prevented giver from giving artifact money to recipient at place,,,,,evt141arg05place +transaction.transferownership.giftgrantprovideaid, gave artifact to recipient for the benefit of beneficiary at place,evt142arg01giver,,,, +transaction.transferownership.giftgrantprovideaid,Giver gave artifact to for the benefit of beneficiary at place,,evt142arg02recipient,,, +transaction.transferownership.giftgrantprovideaid,Giver gave artifact to recipient for the benefit of at place,,,evt142arg03beneficiary,, +transaction.transferownership.giftgrantprovideaid,Giver gave to recipient for the benefit of beneficiary at place,,,,evt142arg04artifact, +transaction.transferownership.giftgrantprovideaid,Giver gave artifact to recipient for the benefit of beneficiary at place,,,,,evt142arg05place +transaction.transferownership.purchase, gave artifact to recipient for the benefit of beneficiary at place,evt143arg01giver,,,, +transaction.transferownership.purchase,Giver gave artifact to for the benefit of beneficiary at place,,evt143arg02recipient,,, +transaction.transferownership.purchase,Giver gave artifact to recipient for the benefit of at place,,,evt143arg03beneficiary,, +transaction.transferownership.purchase,Giver gave to recipient for the benefit of beneficiary at place,,,,evt143arg04artifact, +transaction.transferownership.purchase,Giver gave artifact to recipient for the benefit of beneficiary at place,,,,,evt143arg05place +transaction.transaction.transfercontrol, transferred control of something to recipient for the benefit of beneficiary in place,evt144arg01giver,,,, +transaction.transaction.transfercontrol,Giver transferred control of something to for the benefit of beneficiary in place,,evt144arg02recipient,,, +transaction.transaction.transfercontrol,Giver transferred control of something to recipient for the benefit of in place,,,evt144arg03beneficiary,, +transaction.transaction.transfercontrol,Giver transferred control of to recipient for the benefit of beneficiary in place,,,,evt144arg04territoryorfacility, +transaction.transaction.transfercontrol,Giver transferred control of something to recipient for the benefit of beneficiary in place,,,,,evt144arg05place diff --git a/aida_ontology_new.csv b/aida_ontology_new.csv deleted file mode 100644 index 144b7c6be4d84a0528c10774fd8b9dbcfcb8f617..0000000000000000000000000000000000000000 --- a/aida_ontology_new.csv +++ /dev/null @@ -1,150 +0,0 @@ -event_type,template,arg1,arg2,arg3,arg4,arg5 -artifactexistence.artifactfailure.mechanicalfailure,what is the in ,evt152arg01mechanicalartifact ,evt152arg02instrument ,evt152arg03place ,, -artifactexistence.damagedestroy.unspecified,what is the in ,evt001arg01damagerdestroyer,evt001arg02artifact,evt001arg03instrument,evt001arg04place, -artifactexistence.damagedestroy.damage,what is the in ,evt002arg01damager,evt002arg02artifact,evt002arg03instrument,evt002arg04place, -artifactexistence.damagedestroy.destroy,what is the in ,evt003arg01destroyer,evt003arg02artifact,evt003arg03instrument,evt003arg04place, -artifactexistence.shortage.shortage,what is the in ,evt149arg01experiencer ,evt149arg02supply ,evt149arg03place ,, -conflict.attack.unspecified,what is the in ,evt004arg01attacker,evt004arg02target,evt004arg03instrument,evt004arg04place, -conflict.attack.airstrikemissilestrike,what is the in ,evt005arg01attacker,evt005arg02target,evt005arg03instrument,evt005arg04place, -conflict.attack.biologicalchemicalpoisonattack,what is the in ,evt006arg01attacker,evt006arg02target,evt006arg03instrument,evt006arg04place, -conflict.attack.bombing,what is the in ,evt007arg01attacker,evt007arg02target,evt007arg03instrument,evt007arg04place, -conflict.attack.firearmattack,what is the in ,evt008arg01attacker,evt008arg02target,evt008arg03instrument,evt008arg04place, -conflict.attack.hanging,what is the in ,evt009arg01attacker,evt009arg02target,evt009arg03instrument,evt009arg04place, -conflict.attack.invade,what is the in ,evt010arg01attacker,evt010arg02target,evt010arg03instrument,evt010arg04place, -conflict.attack.selfdirectedbattle,what is the in ,evt011arg01attacker,evt011arg02target,evt011arg03instrument,evt011arg04place, -conflict.attack.setfire,what is the in ,evt012arg01attacker,evt012arg02target,evt012arg03instrument,evt012arg04place, -conflict.attack.stabbing,what is the in ,evt013arg01attacker,evt013arg02target,evt013arg03instrument,evt013arg04place, -conflict.attack.stealrobhijack,what is the in ,evt014arg01attacker,evt014arg02target,evt014arg03instrument,evt014arg04place,evt014arg05artifact -conflict.attack.strangling,what is the in ,evt015arg01attacker,evt015arg02target,evt015arg03instrument,evt015arg04place, -conflict.coup.coup,what is the in ,evt151arg01deposedentity ,evt151arg02deposingentity ,evt151arg03place ,, -conflict.demonstrate.unspecified,what is the in ,evt016arg01demonstrator,evt016arg02place,,, -conflict.demonstrate.marchprotestpoliticalgathering,what is the in ,evt017arg01demonstrator,evt017arg02place,,, -conflict.yield.unspecified,what is the in ,evt018arg01yielder,evt018arg02recipient,evt018arg03place,, -conflict.yield.retreat,what is the in ,evt019arg01retreater,evt019arg02origin,evt019arg03destination,, -conflict.yield.surrender,what is the in ,evt020arg01surrenderer,evt020arg02recipient,evt020arg03place,, -contact.collaborate.unspecified,what is the in ,evt021arg01participant,evt021arg02participant,evt021arg03place,, -contact.collaborate.correspondence,what is the in ,evt022arg01participant,evt022arg02participant,evt022arg03place,, -contact.collaborate.meet,what is the in ,evt023arg01participant,evt023arg02participant,evt023arg03place,, -contact.commandorder.unspecified,what is the in ,evt024arg01communicator,evt024arg02recipient,evt024arg03place,evt024arg04topic, -contact.commandorder.broadcast,what is the in ,evt025arg01communicator,evt025arg02recipient,evt025arg03place,evt025arg04topic, -contact.commandorder.correspondence,what is the in ,evt026arg01communicator,evt026arg02recipient,evt026arg03place,evt026arg04topic, -contact.commandorder.meet,what is the in ,evt027arg01communicator,evt027arg02recipient,evt027arg03place,evt027arg04topic, -contact.commitmentpromiseexpressintent.unspecified,what is the in ,evt028arg01communicator,evt028arg02recipient,evt028arg03place,evt028arg04topic, -contact.commitmentpromiseexpressintent.broadcast,what is the in ,evt029arg01communicator,evt029arg02recipient,evt029arg03place,evt029arg04topic, -contact.commitmentpromiseexpressintent.correspondence,what is the in ,evt030arg01communicator,evt030arg02recipient,evt030arg03place,evt030arg04topic, -contact.commitmentpromiseexpressintent.meet,what is the in ,evt031arg01communicator,evt031arg02recipient,evt031arg03place,evt031arg04topic, -contact.discussion.unspecified,what is the in ,evt032arg01participant,evt032arg02participant,evt032arg03place,, -contact.discussion.correspondence,what is the in ,evt033arg01participant,evt033arg02participant,evt033arg03place,, -contact.discussion.meet,what is the in ,evt034arg01participant,evt034arg02participant,evt034arg03place,, -contact.funeralvigil.unspecified,what is the in ,evt035arg01participant,evt035arg02participant,evt035arg03deceased,evt035arg04place, -contact.funeralvigil.meet,what is the in ,evt036arg01participant,evt036arg02participant,evt036arg03deceased,evt036arg04place, -contact.mediastatement.unspecified,what is the in ,evt037arg01communicator,evt037arg02recipient,evt037arg03place,, -contact.mediastatement.broadcast,what is the in ,evt038arg01communicator,evt038arg02recipient,evt038arg03place,, -contact.negotiate.unspecified,what is the in ,evt039arg01participant,evt039arg02participant,evt039arg03place,evt039arg04topic, -contact.negotiate.correspondence,what is the in ,evt040arg01participant,evt040arg02participant,evt040arg03place,evt040arg04topic, -contact.negotiate.meet,what is the in ,evt041arg01participant,evt041arg02participant,evt041arg03place,evt041arg04topic, -contact.prevarication.unspecified,what is the in ,evt042arg01communicator,evt042arg02recipient,evt042arg03place,evt042arg04topic, -contact.prevarication.broadcast,what is the in ,evt043arg01communicator,evt043arg02recipient,evt043arg03place,evt043arg04topic, -contact.prevarication.correspondence,what is the in ,evt044arg01communicator,evt044arg02recipient,evt044arg03place,evt044arg04topic, -contact.prevarication.meet,what is the in ,evt045arg01communicator,evt045arg02recipient,evt045arg03place,evt045arg04topic, -contact.publicstatementinperson.unspecified,what is the in ,evt046arg01communicator,evt046arg02recipient,evt046arg03place,, -contact.publicstatementinperson.broadcast,what is the in ,evt047arg01communicator,evt047arg02recipient,evt047arg03place,, -contact.requestadvise.unspecified,what is the in ,evt048arg01communicator,evt048arg02recipient,evt048arg03place,evt048arg04topic, -contact.requestadvise.broadcast,what is the in ,evt049arg01communicator,evt049arg02recipient,evt049arg03place,evt049arg04topic, -contact.requestadvise.correspondence,what is the in ,evt050arg01communicator,evt050arg02recipient,evt050arg03place,evt050arg04topic, -contact.requestadvise.meet,what is the in ,evt051arg01communicator,evt051arg02recipient,evt051arg03place,evt051arg04topic, -contact.threatencoerce.unspecified,what is the in ,evt052arg01communicator,evt052arg02recipient,evt052arg03place,evt052arg04topic, -contact.threatencoerce.broadcast,what is the in ,evt053arg01communicator,evt053arg02recipient,evt053arg03place,evt053arg04topic, -contact.threatencoerce.correspondence,what is the in ,evt054arg01communicator,evt054arg02recipient,evt054arg03place,evt054arg04topic, -contact.threatencoerce.meet,what is the in ,evt055arg01communicator,evt055arg02recipient,evt055arg03place,evt055arg04topic, -disaster.accidentcrash.accidentcrash,what is the in ,evt057arg01driverpassenger,evt057arg02vehicle,evt057arg03crashobject,evt057arg04place, -disaster.diseaseoutbreak.diseaseoutbreak,what is the in ,evt148arg01disease ,evt148arg02victim ,evt148arg03place ,, -disaster.fireexplosion.fireexplosion,what is the in ,evt059arg01fireexplosionobject,evt059arg02instrument,evt059arg03place,, -genericcrime.genericcrime.genericcrime,what is the in ,evt154arg01perpetrator ,evt154arg02victim ,evt154arg03place ,, -government.agreements.unspecified,what is the in ,evt060arg01participant,evt060arg02participant,evt060arg03place,, -government.agreements.acceptagreementcontractceasefire,what is the in ,evt061arg01participant,evt061arg02participant,evt061arg03place,, -government.agreements.rejectnullifyagreementcontractceasefire,what is the in ,evt062arg01rejecternullifier,evt062arg02otherparticipant,evt062arg03place,, -government.agreements.violateagreement,what is the in ,evt063arg01violator,evt063arg02otherparticipant,evt063arg03place,, -government.convene.convene,what is the in ,evt145arg01convener ,evt145arg02convenedthing ,evt145arg03place ,, -government.formation.unspecified,what is the in ,evt064arg01gpe,evt064arg02founder,evt064arg03place,, -government.formation.mergegpe,what is the in ,evt065arg01participant,evt065arg02participant,evt065arg03place,, -government.formation.startgpe,what is the in ,evt066arg01gpe,evt066arg02founder,evt066arg03place,, -government.legislate.legislate,what is the in ,evt068arg01governmentbody,evt068arg02law,evt068arg03place,, -government.spy.spy,what is the in ,evt070arg01spy,evt070arg02observedentity,evt070arg03beneficiary,evt070arg04place, -government.vote.unspecified,what is the in ,evt071arg01voter,evt071arg02candidate,evt071arg03ballot,evt071arg04result,evt071arg05place -government.vote.castvote,what is the in ,evt072arg01voter,evt072arg02candidate,evt072arg03ballot,evt072arg04result,evt072arg05place -government.vote.violationspreventvote,what is the in ,evt073arg01preventer,evt073arg02voter,evt073arg03candidate,evt073arg04ballot,evt073arg05place -inspection.sensoryobserve.unspecified,what is the in ,evt074arg01observer,evt074arg02observedentity,evt074arg03place,, -inspection.sensoryobserve.inspectpeopleorganization,what is the in ,evt075arg01inspector,evt075arg02inspectedentity,evt075arg03place,, -inspection.sensoryobserve.monitorelection,what is the in ,evt076arg01monitor,evt076arg02monitoredentity,evt076arg03place,, -inspection.sensoryobserve.physicalinvestigateinspect,what is the in ,evt077arg01inspector,evt077arg02inspectedentity,evt077arg03place,, -inspection.targetaimat.targetaimat,what is the in ,evt153arg01targeter ,evt153arg02target ,evt153arg03instrument ,evt153arg04place , -justice.arrestjaildetain.arrestjaildetain,what is the in ,evt079arg01jailer,evt079arg02detainee,evt079arg03crime,evt079arg04place, -justice.initiatejudicialprocess.unspecified,what is the in ,evt080arg01prosecutor,evt080arg02defendant,evt080arg03judgecourt,evt080arg04crime,evt080arg05place -justice.initiatejudicialprocess.chargeindict,what is the in ,evt081arg01prosecutor,evt081arg02defendant,evt081arg03judgecourt,evt081arg04crime,evt081arg05place -justice.initiatejudicialprocess.trialhearing,what is the in ,evt082arg01prosecutor,evt082arg02defendant,evt082arg03judgecourt,evt082arg04crime,evt082arg05place -justice.investigate.unspecified,what is the in ,evt083arg01investigator,evt083arg02defendant,evt083arg03place,, -justice.investigate.investigatecrime,what is the in ,evt084arg01investigator,evt084arg02defendant,evt084arg03crime,evt084arg04place, -justice.judicialconsequences.unspecified,what is the in ,evt085arg01judgecourt,evt085arg02defendant,evt085arg03crime,evt085arg04place, -justice.judicialconsequences.convict,what is the in ,evt086arg01judgecourt,evt086arg02defendant,evt086arg03crime,evt086arg04place, -justice.judicialconsequences.execute,what is the in ,evt087arg01executioner,evt087arg02defendant,evt087arg03crime,evt087arg04place, -justice.judicialconsequences.extradite,what is the in ,evt088arg01extraditer,evt088arg02defendant,evt088arg03crime,evt088arg04origin,evt088arg05destination -life.die.unspecified,what is the in ,evt089arg01victim,evt089arg02place,evt089arg03killer,evt089arg04medicalissue, -life.die.deathcausedbyviolentevents,what is the in ,evt090arg01killer,evt090arg02victim,evt090arg03instrument,evt090arg04place,evt090arg05medicalissue -life.die.nonviolentdeath,what is the in ,evt091arg01victim,evt091arg02place,evt091arg03killer,evt091arg04medicalissue, -life.injure.unspecified,what is the in ,evt092arg01victim,evt092arg02injurer,evt092arg03place,evt092arg04medicalissue, -life.injure.illnessdegradationhungerthirst,what is the in ,evt093arg01victim,evt093arg02place,evt093arg03injurer,evt093arg04medicalissue, -life.injure.illnessdegradationphysical,what is the in ,evt094arg01victim,evt094arg02place,evt094arg03injurer,evt094arg04medicalissue, -life.injure.illnessdegredationsickness,what is the in ,evt150arg01victim ,evt150arg02injurer ,evt150arg03disease ,evt150arg04place , -life.injure.injurycausedbyviolentevents,what is the in ,evt095arg01injurer,evt095arg02victim,evt095arg03instrument,evt095arg04place,evt095arg05medicalissue -manufacture.artifact.unspecified,what is the in ,evt096arg01manufacturer,evt096arg02artifact,evt096arg03instrument,evt096arg04place, -manufacture.artifact.build,what is the in ,evt097arg01manufacturer,evt097arg02artifact,evt097arg03instrument,evt097arg04place, -manufacture.artifact.createintellectualproperty,what is the in ,evt098arg01manufacturer,evt098arg02artifact,evt098arg03instrument,evt098arg04place, -manufacture.artifact.createmanufacture,what is the in ,evt099arg01manufacturer,evt099arg02artifact,evt099arg03instrument,evt099arg04place, -medical.intervention.intervention,what is the in ,evt147arg01treater ,evt147arg02patient ,evt147arg03medicalissue ,evt147arg04instrument ,evt147arg05place -movement.transportartifact.unspecified,what is the in ,evt100arg01transporter,evt100arg02artifact,evt100arg03vehicle,evt100arg04origin,evt100arg05destination -movement.transportartifact.bringcarryunload,what is the in ,evt101arg01transporter,evt101arg02artifact,evt101arg03vehicle,evt101arg04origin,evt101arg05destination -movement.transportartifact.disperseseparate,what is the in ,evt102arg01transporter,evt102arg02artifact,evt102arg03vehicle,evt102arg04origin,evt102arg05destination -movement.transportartifact.fall,what is the in ,evt103arg01artifact,evt103arg02origin,evt103arg03destination,, -movement.transportartifact.grantentry,what is the in ,evt104arg01transporter,evt104arg02artifact,evt104arg03origin,evt104arg04destination, -movement.transportartifact.hide,what is the in ,evt105arg01transporter,evt105arg02artifact,evt105arg03hidingplace,evt105arg04vehicle,evt105arg05origin -movement.transportartifact.lossofcontrol,what is the in ,evt146arg01controller ,evt146arg02controlledthing ,evt146arg03place ,, -movement.transportartifact.nonviolentthrowlaunch,what is the in ,evt106arg01transporter,evt106arg02artifact,evt106arg03vehicle,evt106arg04origin,evt106arg05destination -movement.transportartifact.prevententry,what is the in ,evt107arg01preventer,evt107arg02transporter,evt107arg03artifact,evt107arg04origin,evt107arg05destination -movement.transportartifact.preventexit,what is the in ,evt108arg01preventer,evt108arg02transporter,evt108arg03artifact,evt108arg04origin,evt108arg05destination -movement.transportartifact.receiveimport,what is the in ,evt109arg01transporter,evt109arg02artifact,evt109arg03vehicle,evt109arg04origin,evt109arg05destination -movement.transportartifact.sendsupplyexport,what is the in ,evt110arg01transporter,evt110arg02artifact,evt110arg03vehicle,evt110arg04origin,evt110arg05destination -movement.transportartifact.smuggleextract,what is the in ,evt111arg01transporter,evt111arg02artifact,evt111arg03vehicle,evt111arg04origin,evt111arg05destination -movement.transportperson.unspecified,what is the in ,evt112arg01transporter,evt112arg02passenger,evt112arg03vehicle,evt112arg04origin,evt112arg05destination -movement.transportperson.bringcarryunload,what is the in ,evt113arg01transporter,evt113arg02passenger,evt113arg03vehicle,evt113arg04origin,evt113arg05destination -movement.transportperson.disperseseparate,what is the in ,evt114arg01transporter,evt114arg02passenger,evt114arg03vehicle,evt114arg04origin,evt114arg05destination -movement.transportperson.evacuationrescue,what is the in ,evt115arg01transporter,evt115arg02passenger,evt115arg03vehicle,evt115arg04origin,evt115arg05destination -movement.transportperson.fall,what is the in ,evt116arg01passenger,evt116arg02origin,evt116arg03destination,, -movement.transportperson.grantentryasylum,what is the in ,evt117arg01granter,evt117arg02transporter,evt117arg03passenger,evt117arg04origin,evt117arg05destination -movement.transportperson.hide,what is the in ,evt118arg01transporter,evt118arg02passenger,evt118arg03hidingplace,evt118arg04vehicle,evt118arg05origin -movement.transportperson.prevententry,what is the in ,evt119arg01preventer,evt119arg02transporter,evt119arg03passenger,evt119arg04origin,evt119arg05destination -movement.transportperson.preventexit,what is the in ,evt120arg01preventer,evt120arg02transporter,evt120arg03passenger,evt120arg04origin,evt120arg05destination -movement.transportperson.selfmotion,what is the in ,evt121arg01transporter,evt121arg02vehicle,evt121arg03origin,evt121arg04destination, -movement.transportperson.smuggleextract,what is the in ,evt122arg01transporter,evt122arg02passenger,evt122arg03vehicle,evt122arg04origin,evt122arg05destination -personnel.elect.unspecified,what is the in ,evt123arg01voter,evt123arg02candidate,evt123arg03place,, -personnel.elect.winelection,what is the in ,evt124arg01voter,evt124arg02candidate,evt124arg03place,, -personnel.endposition.unspecified,what is the in ,evt125arg01employee,evt125arg02placeofemployment,evt125arg03place,, -personnel.endposition.firinglayoff,what is the in ,evt126arg01employee,evt126arg02placeofemployment,evt126arg03place,, -personnel.endposition.quitretire,what is the in ,evt127arg01employee,evt127arg02placeofemployment,evt127arg03place,, -personnel.startposition.unspecified,what is the in ,evt128arg01employee,evt128arg02placeofemployment,evt128arg03place,, -personnel.startposition.hiring,what is the in ,evt129arg01employee,evt129arg02placeofemployment,evt129arg03place,, -transaction.transaction.unspecified,what is the in ,evt130arg01participant,evt130arg02participant,evt130arg03beneficiary,evt130arg04place, -transaction.transaction.embargosanction,what is the in ,evt131arg01preventer,evt131arg02giver,evt131arg03recipient,evt131arg04artifactmoney,evt131arg05place -transaction.transaction.giftgrantprovideaid,what is the in ,evt132arg01giver,evt132arg02recipient,evt132arg03beneficiary,evt132arg04place, -transaction.transfermoney.unspecified,what is the in ,evt133arg01giver,evt133arg02recipient,evt133arg03beneficiary,evt133arg04money,evt133arg05place -transaction.transfermoney.borrowlend,what is the in ,evt134arg01giver,evt134arg02recipient,evt134arg03beneficiary,evt134arg04money,evt134arg05place -transaction.transfermoney.embargosanction,what is the in ,evt135arg01preventer,evt135arg02giver,evt135arg03recipient,evt135arg04money,evt135arg05place -transaction.transfermoney.giftgrantprovideaid,what is the in ,evt136arg01giver,evt136arg02recipient,evt136arg03beneficiary,evt136arg04money,evt136arg05place -transaction.transfermoney.payforservice,what is the in ,evt137arg01giver,evt137arg02recipient,evt137arg03beneficiary,evt137arg04money,evt137arg05place -transaction.transfermoney.purchase,what is the in ,evt138arg01giver,evt138arg02recipient,evt138arg03beneficiary,evt138arg04money,evt138arg05place -transaction.transferownership.unspecified,what is the in ,evt139arg01giver,evt139arg02recipient,evt139arg03beneficiary,evt139arg04artifact,evt139arg05place -transaction.transferownership.borrowlend,what is the in ,evt140arg01giver,evt140arg02recipient,evt140arg03beneficiary,evt140arg04artifact,evt140arg05place -transaction.transferownership.embargosanction,what is the in ,evt141arg01preventer,evt141arg02giver,evt141arg03recipient,evt141arg04artifact,evt141arg05place -transaction.transferownership.giftgrantprovideaid,what is the in ,evt142arg01giver,evt142arg02recipient,evt142arg03beneficiary,evt142arg04artifact,evt142arg05place -transaction.transferownership.purchase,what is the in ,evt143arg01giver,evt143arg02recipient,evt143arg03beneficiary,evt143arg04artifact,evt143arg05place -transaction.transaction.transfercontrol,what is the in ,evt144arg01giver,evt144arg02recipient,evt144arg03beneficiary,evt144arg04territoryorfacility,evt144arg05place diff --git a/git_token.txt b/git_token.txt new file mode 100644 index 0000000000000000000000000000000000000000..c3739d9e791657b541802442418e47209df233cf --- /dev/null +++ b/git_token.txt @@ -0,0 +1 @@ +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 diff --git a/head_templates_preprocessed_data/test.jsonl b/head_templates_preprocessed_data/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..14516c18fcb48b8a4c640a870be119da2b27afc2 --- /dev/null +++ b/head_templates_preprocessed_data/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fc0897dedcf9a21f9c6ff714252dcd6a848e4232b25b53fa8abe01531861ec6 +size 3066918 diff --git a/head_templates_preprocessed_data/train.jsonl b/head_templates_preprocessed_data/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e13c0a34e5ed6d155ecc897e182e22d4bf1ba70f --- /dev/null +++ b/head_templates_preprocessed_data/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25889f02191e5795956b8b90296fb3321ae2791dccde73c5357b3b8222a55f25 +size 25867551 diff --git a/head_templates_preprocessed_data/val.jsonl b/head_templates_preprocessed_data/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..eeec05f97260e585296b96fedec17804e57a3e35 --- /dev/null +++ b/head_templates_preprocessed_data/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf4088666e0b8bcf0c0dfa7a73e37888e7b59b372d2aa1e0ce639968f527cce +size 3253644 diff --git a/head_templates_preprocessed_data1/test.jsonl b/head_templates_preprocessed_data1/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e9c7d5d699e7ad29732644ade8ffe60044218fc7 --- /dev/null +++ b/head_templates_preprocessed_data1/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83d87f29c4a507a480909aa143eb08f2899190ca79f45802085bed2abe6d50ad +size 12594026 diff --git a/head_templates_preprocessed_data1/train.jsonl b/head_templates_preprocessed_data1/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6d8425fe6810ca499a349732ae39187604f3b955 --- /dev/null +++ b/head_templates_preprocessed_data1/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d2580d63a3bcc3860b2ba831f37cddbec554245911efeb74c0a5a2283181c6 +size 106010729 diff --git a/head_templates_preprocessed_data1/val.jsonl b/head_templates_preprocessed_data1/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..64adfc70324a3bca61768bd0237379a456d05add --- /dev/null +++ b/head_templates_preprocessed_data1/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7645dbb2d4ea9a8b87312017c0b0c2b29c04b6a25ae49c44b76a47d495cd238 +size 13401304 diff --git a/head_templates_preprocessed_data_new/temp/test.jsonl b/head_templates_preprocessed_data_new/temp/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..98fa9e1272b28def83cc98be884b32db5c56f896 --- /dev/null +++ b/head_templates_preprocessed_data_new/temp/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ad788635d60421739fd18f4bd5d78b4c90480aee2cafbe874cd19e0db5ce74d +size 15027755 diff --git a/head_templates_preprocessed_data_new/temp/train.jsonl b/head_templates_preprocessed_data_new/temp/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5aefc15f5ffbf30c47e016c0137bf604ccffe7c2 --- /dev/null +++ b/head_templates_preprocessed_data_new/temp/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:353fbe88531d1ed9d0643aa41143ec24eb49e4a837c456252c6e1a8926019fa2 +size 9445758 diff --git a/head_templates_preprocessed_data_new/temp/val.jsonl b/head_templates_preprocessed_data_new/temp/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..dc9dad530c1ec8bff1cd8f13d47299391f560966 --- /dev/null +++ b/head_templates_preprocessed_data_new/temp/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90eb683a2f263cd036d938a150a78a6f4b90d97d3138d65ea699cff70f0e6c45 +size 15911790 diff --git a/head_templates_preprocessed_data_new/temp1/test.jsonl b/head_templates_preprocessed_data_new/temp1/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e479f5e5a0b963b829c15bbb37a5aaf6b0979ca2 --- /dev/null +++ b/head_templates_preprocessed_data_new/temp1/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b2bedfaca006752cfa871997d29fcc41101f6db44e55dd522893453a6204d63 +size 15027727 diff --git a/head_templates_preprocessed_data_new/temp1/train.jsonl b/head_templates_preprocessed_data_new/temp1/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..87b60496bcb5d6ce26753849e15e729e5addc2e3 --- /dev/null +++ b/head_templates_preprocessed_data_new/temp1/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb16a36aaf8b636f67ab18efbe5e4c1e1afbecc3ad38ee7412a27cb75386090b +size 126534242 diff --git a/head_templates_preprocessed_data_new/temp1/val.jsonl b/head_templates_preprocessed_data_new/temp1/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..5caefc722d91f3079cd2244a8402a5a5503b7984 --- /dev/null +++ b/head_templates_preprocessed_data_new/temp1/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:143c303d5c7048be41d40d021817af2ade7ccd2d91385bc895d099f00e7019ed +size 15911353 diff --git a/head_templates_preprocessed_data_new/test.jsonl b/head_templates_preprocessed_data_new/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..af654b50a625770f0f1ed0902ef2be76c069dc1f --- /dev/null +++ b/head_templates_preprocessed_data_new/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:025353048caa8160e84a9abf9f075e783681cde39b86db7600c6de7a30de8eb9 +size 12555891 diff --git a/head_templates_preprocessed_data_new/train.jsonl b/head_templates_preprocessed_data_new/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..36355a90938a6fc3e02ad26e7cf74b5312092f06 --- /dev/null +++ b/head_templates_preprocessed_data_new/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6de08bf114d936e7b56c72b19959d2845ee49030dd0bac3200c00fe5e26693 +size 105639380 diff --git a/head_templates_preprocessed_data_new/val.jsonl b/head_templates_preprocessed_data_new/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..657363c247c381d1b3c61e3aeec292e8239a81cd --- /dev/null +++ b/head_templates_preprocessed_data_new/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4efc8da757337673393c6f9956e1d4096d6472aa95cb0b82b7034d9a0b101f2 +size 13355352 diff --git a/head_templates_preprocessed_data_test/test.jsonl b/head_templates_preprocessed_data_test/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e9c7d5d699e7ad29732644ade8ffe60044218fc7 --- /dev/null +++ b/head_templates_preprocessed_data_test/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83d87f29c4a507a480909aa143eb08f2899190ca79f45802085bed2abe6d50ad +size 12594026 diff --git a/head_templates_preprocessed_data_test/train.jsonl b/head_templates_preprocessed_data_test/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6d8425fe6810ca499a349732ae39187604f3b955 --- /dev/null +++ b/head_templates_preprocessed_data_test/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d2580d63a3bcc3860b2ba831f37cddbec554245911efeb74c0a5a2283181c6 +size 106010729 diff --git a/head_templates_preprocessed_data_test/val.jsonl b/head_templates_preprocessed_data_test/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..64adfc70324a3bca61768bd0237379a456d05add --- /dev/null +++ b/head_templates_preprocessed_data_test/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7645dbb2d4ea9a8b87312017c0b0c2b29c04b6a25ae49c44b76a47d495cd238 +size 13401304 diff --git a/head_what_preprocessed_data/test.jsonl b/head_what_preprocessed_data/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a448039e7cc9b19ac060d99ef0cc2f83424f2c65 --- /dev/null +++ b/head_what_preprocessed_data/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fbbdf09172f43a2baa638a802158509a3ee8408f745488bea0edfd330ee5883 +size 12517258 diff --git a/head_what_preprocessed_data/train.jsonl b/head_what_preprocessed_data/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..39ec481b2db5c85be7af91298c98fc3bfd7fa2e3 --- /dev/null +++ b/head_what_preprocessed_data/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8efdc975af0cccee3658c564154129919e12d546ef0638439e10959930ed48a8 +size 105370045 diff --git a/head_what_preprocessed_data/val.jsonl b/head_what_preprocessed_data/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e94d549d6524210fe91efb5d4cc6a0befe69db38 --- /dev/null +++ b/head_what_preprocessed_data/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b955d1868f0e7cafa614129d48599f707cc332b648da7cd8942d875e0d1e33 +size 13318850 diff --git a/span_preprocessed_data/test.jsonl b/span_preprocessed_data/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..53dc7d99e697c774add9dd34367d550bb73912e6 --- /dev/null +++ b/span_preprocessed_data/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7c8c3a8c868525a8d22a8f93ee71eeaccc4561d10ccabb48c5a8819d8ce2f42 +size 3040920 diff --git a/span_preprocessed_data/train.jsonl b/span_preprocessed_data/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c65172edcd11c6d2be4fc976911e4d85aa01a574 --- /dev/null +++ b/span_preprocessed_data/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59722f2d8c512844a2995fdebd15b687581fde11b714ee49e4dc19922fd9f8f +size 25648444 diff --git a/span_preprocessed_data/val.jsonl b/span_preprocessed_data/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6d70a557e273e6d4c95cf5ba57ebd0b39fe09ca0 --- /dev/null +++ b/span_preprocessed_data/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38da46ab63668fb37a8592573e0562e48245c6d81335c78d40dc957ea161ec79 +size 3226018 diff --git a/span_templates_preprocessed_data1/test.jsonl b/span_templates_preprocessed_data1/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..24df24a2ab5dff317f4735a799e9785fe301b1c5 --- /dev/null +++ b/span_templates_preprocessed_data1/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b6ba5486138269e29be526d6a42a5eab956851f2b06bc638dcbe6342c8278fd +size 12599344 diff --git a/span_templates_preprocessed_data1/train.jsonl b/span_templates_preprocessed_data1/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..6d8425fe6810ca499a349732ae39187604f3b955 --- /dev/null +++ b/span_templates_preprocessed_data1/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d2580d63a3bcc3860b2ba831f37cddbec554245911efeb74c0a5a2283181c6 +size 106010729 diff --git a/span_templates_preprocessed_data1/val.jsonl b/span_templates_preprocessed_data1/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..64adfc70324a3bca61768bd0237379a456d05add --- /dev/null +++ b/span_templates_preprocessed_data1/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7645dbb2d4ea9a8b87312017c0b0c2b29c04b6a25ae49c44b76a47d495cd238 +size 13401304 diff --git a/span_what_preprocessed_data/test.jsonl b/span_what_preprocessed_data/test.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..fa55a2c1f1fb990f4f9546861f019d5f8e5a7ae2 --- /dev/null +++ b/span_what_preprocessed_data/test.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ece6b9edc106e02298fb1105d7f3972f46b23b307503c3470b2d24e156bb930 +size 12522576 diff --git a/span_what_preprocessed_data/train.jsonl b/span_what_preprocessed_data/train.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..39ec481b2db5c85be7af91298c98fc3bfd7fa2e3 --- /dev/null +++ b/span_what_preprocessed_data/train.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8efdc975af0cccee3658c564154129919e12d546ef0638439e10959930ed48a8 +size 105370045 diff --git a/span_what_preprocessed_data/val.jsonl b/span_what_preprocessed_data/val.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..e94d549d6524210fe91efb5d4cc6a0befe69db38 --- /dev/null +++ b/span_what_preprocessed_data/val.jsonl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62b955d1868f0e7cafa614129d48599f707cc332b648da7cd8942d875e0d1e33 +size 13318850 diff --git a/src/genie/.data_module3.py.swp b/src/genie/.data_module3.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..da2f2f639bc230c98d5679839d1f9304275cdf6c Binary files /dev/null and b/src/genie/.data_module3.py.swp differ diff --git a/src/genie/convert_gen_to_output5.py b/src/genie/convert_gen_to_output5.py new file mode 100644 index 0000000000000000000000000000000000000000..00e4def6378a9f67d31aac7542c908210711257f --- /dev/null +++ b/src/genie/convert_gen_to_output5.py @@ -0,0 +1,221 @@ +import os +import json +import argparse +import re +from copy import deepcopy +from tqdm import tqdm + +from utils import find_head, WhitespaceTokenizer, find_arg_span +import spacy +print("convert_gen_to_output5.py") +def extract_args_from_template(ex, template, ontology_dict,): + # extract argument text + # 这个函数的返回值是一个字典 因此需要 template列表和ex中的predicted列表同时进行遍历放入字典中 + # 在这里定义两个列表 分别存放 定义存放模板的列表 TEMPLATE 和 相对应的生成 PREDICTED + # 传过来的参数中的template就是包含所有模板的列表 因此不需要再定义TEMPLATE 还是需要定义一个存放分词后的template + # 这里的template是相应事件类型下的模板包含多个 + # 原来处理的方式是一个数据和一个综合性模板 现在模板是分开的 为什么要把template传过来 这不是脱裤子放屁的操作? + # 下面这段操作是因为上次模板的定义是相同因此只需要去列表中的第一个模板就行 这次需要用循环进行遍历 + t = [] + TEMPLATE = [] + for i in template: + t = i.strip().split() + TEMPLATE.append(t) + t = [] + # 到此为止 得到存放该ex即该数据类型下的所有模板的分词后的列表存储 下面获取对应的predicted同理 + PREDICTED = [] + p = [] + for i in ex['predicted']: + p = i.strip().split() + PREDICTED.append(p) + p = [] + # 这个字典变量定义了这个函数的返回值 应该是论元角色-论元短语的key-value映射 + predicted_args = {} + evt_type = get_event_type(ex)[0] + # 不出意外的话 TEMPLATE和PREDICTED的长度应该是相等的 + length = len(TEMPLATE) + for i in range(length): + template_words = TEMPLATE[i] + predicted_words = PREDICTED[i] + t_ptr = 0 + p_ptr = 0 + while t_ptr < len(template_words) and p_ptr < len(predicted_words): + if re.match(r'<(arg\d+)>', template_words[t_ptr]): + m = re.match(r'<(arg\d+)>', template_words[t_ptr]) + # 这一步的操作是从模板中到 这样的词符 即arg_num 然后通过arg_num找到对应论元角色arg_name + arg_num = m.group(1) + arg_name = ontology_dict[evt_type.replace('n/a', 'unspecified')][arg_num] + + if predicted_words[p_ptr] == '': + # missing argument + p_ptr +=1 + t_ptr +=1 + else: + arg_start = p_ptr + if t_ptr + 1 == len(template_words): + while p_ptr < len(predicted_words): + p_ptr += 1 + else: + while (p_ptr < len(predicted_words)) and (predicted_words[p_ptr] != template_words[t_ptr+1]): + p_ptr += 1 + arg_text = predicted_words[arg_start:p_ptr] + predicted_args[arg_name] = arg_text + t_ptr += 1 + # aligned + else: + t_ptr += 1 + p_ptr += 1 + + return predicted_args + +def get_event_type(ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append( t[0]) + return evt_type + +def check_coref(ex, arg_span, gold_spans): + for clus in ex['corefs']: + if arg_span in clus: + matched_gold_spans = [span for span in gold_spans if span in clus] + if len(matched_gold_spans) > 0: + return matched_gold_spans[0] + return arg_span + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--gen-file',type=str, default='checkpoints/gen-new-tokenization-pred/sample_predictions.jsonl') + parser.add_argument('--test-file', type=str,default='data/RAMS_1.0/data/test_head.jsonlines') + parser.add_argument('--output-file',type=str, default='test_output.jsonl') + parser.add_argument('--ontology-file',type=str, default='aida_ontology_new.csv') + parser.add_argument('--head-only',action='store_true',default=False) + parser.add_argument('--coref', action='store_true', default=False) + args = parser.parse_args() + + # 加载词典 + nlp = spacy.load('en_core_web_sm') + nlp.tokenizer = WhitespaceTokenizer(nlp.vocab) + # read ontology 读取事件本体 模板文件中的内容 + ontology_dict = {} + with open('aida_ontology_fj-w-2.csv', 'r') as f: + for lidx, line in enumerate(f): + if lidx == 0: # header + continue + fields = line.strip().split(',') + if len(fields) < 2: + break + evt_type = fields[0] + if evt_type in ontology_dict.keys(): + arguments = fields[2:] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(arguments): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + else: + ontology_dict[evt_type] = {} + arguments = fields[2:] + ontology_dict[evt_type]['template'] = [] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(arguments): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + examples = {} + print(args.gen_file) + # data/RAMS_1.0/data/test_head_coref.jsonlines + key = [] + with open(args.test_file, 'r') as f: + for line in f: + ex = json.loads(line.strip()) + #if ex['gold_evt_links'] == []: + #key.append(ex['doc_key']) + #continue + ex['ref_evt_links'] = deepcopy(ex['gold_evt_links']) + ex['gold_evt_links'] = [] + examples[ex['doc_key']] = ex + + # checkpoints/gen-RAMS-pred/predictions.jsonl + flag = {} + with open(args.gen_file,'r') as f: + for line in f: + pred = json.loads(line.strip()) + # print(pred) + # 因为最后生成 应该是 多个相同的事件类型在并列 这个操作好像把已经填入的predicte覆盖掉了 + # 在这里的循环中 应该继续向下扫描 采取和ontology中相同的处理方式 用列表的方式存储将pred中的内容存放到examples中的数据中 + # pred 是对预测文件中的预测结果句用空格进行分隔单词后的结果 + # pred中的内容主要包括 doc_key predicted gold + # 如果扫描到的预测json数据事件类型在examples中存在 那么就将predicted存入列表 + # if pred['doc_key'] not in key: + if pred['doc_key'] in flag.keys(): + #print(examples[pred['doc_key']]['predicted']) + examples[pred['doc_key']]['predicted'].append(pred['predicted']) + examples[pred['doc_key']]['gold'].append(pred['gold']) + # 如果没有 说明这是新的事件类型 + else: + flag[pred['doc_key']] = True + examples[pred['doc_key']]['predicted'] = [] + examples[pred['doc_key']]['gold'] = [] + # 然后将此条数据存入 + examples[pred['doc_key']]['predicted'].append(pred['predicted']) + examples[pred['doc_key']]['gold'].append(pred['gold']) + + # checkpoints/gen-RAMS-pred/out_put.jsonl + writer = open(args.output_file, 'w') + for ex in tqdm(examples.values()): + if 'predicted' not in ex:# this is used for testing + continue + # get template 获取事件类型 + evt_type = get_event_type(ex)[0] + context_words = [w for sent in ex['sentences'] for w in sent] + # 这里的template是ontology_dict中 template 包含一个事件类型下的所有事件模板 + template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + # extract argument text + # 这里应该是提取预测文件中预测到的论元短语 ex是一条json数据 template是这条json数据对应下的模板 on是论元角色和的映射 + # 这里ex中的predicted和gold已经包括了该事件类型下的所有论元 用列表的形式进行存储 且顺序是一一对应的 + # 这里返回的predicted_args是一个字典: + predicted_args = extract_args_from_template(ex, template, ontology_dict) + # get trigger + # extract argument span 找出触发词在文段中的索引 + str_p = '' + str_g = '' + for i in range(len(ex['predicted'])): + str_p += ex['predicted'][i] + str_g += ex['gold'][i] + + ex['predicted'] = str_p + ex['gold'] = str_g + trigger_start = ex['evt_triggers'][0][0] + trigger_end = ex['evt_triggers'][0][1] + # 上面返回的predicted_args是一个字典 暂时认为是论元角色和具体论元短语的映射 + # 还没有发现doc的作用 + doc = None + # 通过test_rams.sh文件的设置 可以发现args.head_only的值为true + # print('aa', args.head_only, args.coref) + if args.head_only: + # 从原始文本中取出标记 + doc = nlp(' '.join(context_words)) + # 其中arg_name是论元角色类型 + for argname in predicted_args: + # 通过find_arg_span函数找出 + arg_span = find_arg_span(predicted_args[argname], context_words, + trigger_start, trigger_end, head_only=args.head_only, doc=doc) + #print(arg_span) + if arg_span:# if None means hullucination + + if args.head_only and args.coref: + # consider coreferential mentions as matching + assert('corefs' in ex) + print('aaa') + gold_spans = [a[1] for a in ex['ref_evt_links'] if a[2]==argname] + arg_span = check_coref(ex, list(arg_span), gold_spans) + + ex['gold_evt_links'].append([[trigger_start, trigger_end], list(arg_span), argname]) + + writer.write(json.dumps(ex)+'\n') + + writer.close() + + + diff --git a/src/genie/data_module4.py b/src/genie/data_module4.py new file mode 100644 index 0000000000000000000000000000000000000000..c176eb207c4903203a824d32daf31215b0afe033 --- /dev/null +++ b/src/genie/data_module4.py @@ -0,0 +1,245 @@ +import os +import json +import jsonlines +import re +import random +from collections import defaultdict +import argparse + +import transformers +from transformers import BartTokenizer +import torch +from torch.utils.data import DataLoader +import pytorch_lightning as pl + +from .data import IEDataset, my_collate + +MAX_LENGTH = 424 +MAX_TGT_LENGTH = 72 +DOC_STRIDE = 256 + +print("data_module4.py") + +class RAMSDataModule(pl.LightningDataModule): + def __init__(self, args): + super().__init__() + self.hparams = args + self.tokenizer = BartTokenizer.from_pretrained('facebook/bart-large') + self.tokenizer.add_tokens([' ', ' ']) + + def get_event_type(self, ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append(t[0]) + return evt_type + + def create_gold_gen(self, ex, ontology_dict, mark_trigger=True): + # 设置三个总列表、存放输入模板、输出模板 + INPUT = [] + OUTPUT = [] + CONTEXT = [] + evt_type = self.get_event_type(ex)[0] + + context_words = [w for sent in ex['sentences'] for w in sent] + input_template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + i = len(input_template) + input_list = [] + for x in range(i): + str = re.sub(r'', '', input_template[x]) + input_list.append(str) + # 其中input_list种存放的是 原始数据中 全部替换为 之后的模板 下一步应该进行分词 + temp = [] + for x in range(i): + space_tokenized_template = input_list[x].split(' ') + temp.append(space_tokenized_template) + space_tokenized_template = [] + # 其中temp中存放的都是分词后的模板 下一步对temp中的所有元素进行tokenize + tokenized_input_template = [] + for x in range(len(temp)): + for w in temp[x]: + tokenized_input_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + INPUT.append(tokenized_input_template) + tokenized_input_template = [] + template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + for lidx, triple in enumerate(ex['gold_evt_links']): + # 触发词 论元 论元 + # 例子: "gold_evt_links": + # [[[40, 40], [33, 33], "evt089arg01victim"], + # [[40, 40], [28, 28], "evt089arg02place"]] + trigger_span, argument_span, arg_name = triple + # 第几个论元 + arg_num = ontology_dict[evt_type.replace('n/a', 'unspecified')][arg_name] + # 具体论元内容 短语 + arg_text = ' '.join(context_words[argument_span[0]:argument_span[1] + 1]) + # 通过正则表达式的方式将模板中的每个 替换为具体的论元内容 + for index in range(len(template)): + if arg_num in template[index]: + break + else: + continue + + + template[index] = re.sub('<{}>'.format(arg_num), arg_text, template[index]) + + + trigger = ex['evt_triggers'][0] + if mark_trigger: + trigger_span_start = trigger[0] + trigger_span_end = trigger[1] + 2 # one for inclusion, one for extra start marker + # 触发词之前的单词 + prefix = self.tokenizer.tokenize(' '.join(context_words[:trigger[0]]), add_prefix_space=True) + # 触发词短语 + tgt = self.tokenizer.tokenize(' '.join(context_words[trigger[0]: trigger[1] + 1]), + add_prefix_space=True) + # 触发词之后的单词 + suffix = self.tokenizer.tokenize(' '.join(context_words[trigger[1] + 1:]), add_prefix_space=True) + context = prefix + [' ', ] + tgt + [' ', ] + suffix + else: + context = self.tokenizer.tokenize(' '.join(context_words), add_prefix_space=True) + # 将context放入CONTEXT中 + for w in range(i): + CONTEXT.append(context) + output_template = [] + # 此时的template中已经全部替换为论元短语 这部是将 替换为 + for i in range(len(template)): + output_template.append(re.sub(r'', '', template[i])) + spaceout_tokenized_template = [] + for i in range(len(output_template)): + spaceout_tokenized_template.append(output_template[i].split(' ')) + tokenized_out_template = [] + for i in range(len(spaceout_tokenized_template)): + for w in spaceout_tokenized_template[i]: + tokenized_out_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + OUTPUT.append(tokenized_out_template) + tokenized_out_template = [] + + return INPUT, OUTPUT, CONTEXT + + def load_ontology(self): + ontology_dict = {} + with open('aida_ontology_fj-5.csv', 'r') as f: + for lidx, line in enumerate(f): + if lidx == 0: # header + continue + fields = line.strip().split(',') + if len(fields) < 2: + break + evt_type = fields[0] + if evt_type in ontology_dict.keys(): + args = fields[2:] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(args): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + else: + ontology_dict[evt_type] = {} + args = fields[2:] + ontology_dict[evt_type]['template'] = [] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(args): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + + return ontology_dict + + + def prepare_data(self): + if not os.path.exists('span_templates_preprocessed_data1'): + os.makedirs('span_templates_preprocessed_data1') + + ontology_dict = self.load_ontology() + + # print(ontology_dict['contact.prevarication.broadcast']) + + for split, f in [('train', self.hparams.train_file), ('val', self.hparams.val_file), + ('test', self.hparams.test_file)]: + with open(f, 'r') as reader, open('span_templates_preprocessed_data1/{}.jsonl'.format(split), 'w') as writer: + for lidx, line in enumerate(reader): + ex = json.loads(line.strip()) + input_template, output_template, context = self.create_gold_gen(ex, ontology_dict, + self.hparams.mark_trigger) + ontology_dict = self.load_ontology() + length = len(input_template) + # print(input_template) + # print(output_template) + # print(context) + for i in range(length): + input_tokens = self.tokenizer.encode_plus(input_template[i], context[i], + add_special_tokens=True, + add_prefix_space=True, + max_length=MAX_LENGTH, + truncation='only_second', + padding='max_length') + # target_tokens + tgt_tokens = self.tokenizer.encode_plus(output_template[i], + add_special_tokens=True, + add_prefix_space=True, + max_length=MAX_TGT_LENGTH, + truncation=True, + padding='max_length') + # input_ids 单词在词典中的编码 + # tgt_tokens 指定对哪些词进行self_attention操作 + processed_ex = { + # 'idx': lidx, + 'doc_key': ex['doc_key'], + 'input_token_ids': input_tokens['input_ids'], + 'input_attn_mask': input_tokens['attention_mask'], + 'tgt_token_ids': tgt_tokens['input_ids'], + 'tgt_attn_mask': tgt_tokens['attention_mask'], + } + #print(processed_ex) + writer.write(json.dumps(processed_ex) + "\n") + + def train_dataloader(self): + dataset = IEDataset('span_templates_preprocessed_data1/train.jsonl') + + dataloader = DataLoader(dataset, + pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.train_batch_size, + shuffle=True) + return dataloader + + def val_dataloader(self): + dataset = IEDataset('span_templates_preprocessed_data1/val.jsonl') + + dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.eval_batch_size, shuffle=False) + return dataloader + + def test_dataloader(self): + dataset = IEDataset('span_templates_preprocessed_data1/test.jsonl') + + dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.eval_batch_size, shuffle=False) + + return dataloader + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--train-file', type=str, default='data/RAMS_1.0/data/train.jsonlines') + parser.add_argument('--val-file', type=str, default='data/RAMS_1.0/data/dev.jsonlines') + parser.add_argument('--test-file', type=str, default='data/RAMS_1.0/data/test.jsonlines') + parser.add_argument('--train_batch_size', type=int, default=2) + parser.add_argument('--eval_batch_size', type=int, default=4) + parser.add_argument('--mark-trigger', action='store_true', default=True) + args = parser.parse_args() + + print("data_module1.pyaaaaaaaaaaaaaaa") + dm = RAMSDataModule(args=args) + dm.prepare_data() + + # training dataloader + dataloader = dm.train_dataloader() + + for idx, batch in enumerate(dataloader): + print(batch) + break + + # val dataloader diff --git a/src/genie/data_module_w.py b/src/genie/data_module_w.py new file mode 100644 index 0000000000000000000000000000000000000000..e0b8c41edef7d3095f4c3b723e8f4a99bf5ee7d3 --- /dev/null +++ b/src/genie/data_module_w.py @@ -0,0 +1,255 @@ +import os +import json +import jsonlines +import re +import random +from collections import defaultdict +import argparse + +import transformers +from transformers import BartTokenizer +import torch +from torch.utils.data import DataLoader +import pytorch_lightning as pl + +from .data import IEDataset, my_collate + +MAX_LENGTH = 424 +MAX_TGT_LENGTH = 72 +DOC_STRIDE = 256 + +print("data_module-w.py") + +class RAMSDataModule(pl.LightningDataModule): + def __init__(self, args): + super().__init__() + self.hparams = args + self.tokenizer = BartTokenizer.from_pretrained('facebook/bart-large') + self.tokenizer.add_tokens([' ', ' ']) + + def get_event_type(self, ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append(t[0]) + return evt_type + + # 此函数找出对应的trigger的索引 + def get_trigger_index(self, ex): + return ex['evt_triggers'][0][0] + + def create_gold_gen(self, ex, ontology_dict, mark_trigger=True): + # 设置三个总列表、存放输入模板、输出模板 + INPUT = [] + OUTPUT = [] + CONTEXT = [] + evt_type = self.get_event_type(ex)[0] + + context_words = [w for sent in ex['sentences'] for w in sent] + input_template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + trigger_index = self.get_trigger_index(ex) + # 找到对应的trigger + trigger = context_words[trigger_index] + i = len(input_template) + input_list = [] + for x in range(i): + str = re.sub('', trigger, input_template[x]) + str = re.sub('', trigger, str) + input_list.append(str) + # 其中input_list种存放的是 原始数据中 全部替换为 之后的模板 下一步应该进行分词 + temp = [] + for x in range(i): + space_tokenized_template = input_list[x].split(' ') + temp.append(space_tokenized_template) + space_tokenized_template = [] + # 其中temp中存放的都是分词后的模板 下一步对temp中的所有元素进行tokenize + tokenized_input_template = [] + for x in range(len(temp)): + for w in temp[x]: + tokenized_input_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + INPUT.append(tokenized_input_template) + tokenized_input_template = [] + template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + for y in range(len(template)): + template[y] = re.sub('', trigger, template[y]) + for lidx, triple in enumerate(ex['gold_evt_links']): + # 触发词 论元 论元 + # 例子: "gold_evt_links": + # [[[40, 40], [33, 33], "evt089arg01victim"], + # [[40, 40], [28, 28], "evt089arg02place"]] + trigger_span, argument_span, arg_name = triple + # 第几个论元 + arg_num = ontology_dict[evt_type.replace('n/a', 'unspecified')][arg_name] + # 具体论元内容 短语 + arg_text = ' '.join(context_words[argument_span[0]:argument_span[1] + 1]) + # 通过正则表达式的方式将模板中的每个 替换为具体的论元内容 + for index in range(len(template)): + if arg_num in template[index]: + break + else: + continue + + + template[index] = re.sub('<{}>'.format(arg_num), arg_text, template[index]) + + + trigger = ex['evt_triggers'][0] + if mark_trigger: + trigger_span_start = trigger[0] + trigger_span_end = trigger[1] + 2 # one for inclusion, one for extra start marker + # 触发词之前的单词 + prefix = self.tokenizer.tokenize(' '.join(context_words[:trigger[0]]), add_prefix_space=True) + # 触发词短语 + tgt = self.tokenizer.tokenize(' '.join(context_words[trigger[0]: trigger[1] + 1]), + add_prefix_space=True) + # 触发词之后的单词 + suffix = self.tokenizer.tokenize(' '.join(context_words[trigger[1] + 1:]), add_prefix_space=True) + context = prefix + [' ', ] + tgt + [' ', ] + suffix + else: + context = self.tokenizer.tokenize(' '.join(context_words), add_prefix_space=True) + # 将context放入CONTEXT中 + for w in range(i): + CONTEXT.append(context) + output_template = [] + # 此时的template中已经全部替换为论元短语 这部是将 替换为 + for i in range(len(template)): + output_template.append(re.sub(r'', '', template[i])) + spaceout_tokenized_template = [] + for i in range(len(output_template)): + spaceout_tokenized_template.append(output_template[i].split(' ')) + tokenized_out_template = [] + for i in range(len(spaceout_tokenized_template)): + for w in spaceout_tokenized_template[i]: + tokenized_out_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + OUTPUT.append(tokenized_out_template) + tokenized_out_template = [] + + return INPUT, OUTPUT, CONTEXT + + def load_ontology(self): + ontology_dict = {} + with open('aida_ontology_fj-w-2.csv', 'r') as f: + for lidx, line in enumerate(f): + if lidx == 0: # header + continue + fields = line.strip().split(',') + if len(fields) < 2: + break + evt_type = fields[0] + if evt_type in ontology_dict.keys(): + args = fields[2:] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(args): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + else: + ontology_dict[evt_type] = {} + args = fields[2:] + ontology_dict[evt_type]['template'] = [] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(args): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + + return ontology_dict + + + def prepare_data(self): + if not os.path.exists('head_what_preprocessed_data'): + os.makedirs('head_what_preprocessed_data') + + ontology_dict = self.load_ontology() + + # print(ontology_dict['contact.prevarication.broadcast']) + + for split, f in [('train', self.hparams.train_file), ('val', self.hparams.val_file), + ('test', self.hparams.test_file)]: + with open(f, 'r') as reader, open('head_what_preprocessed_data/{}.jsonl'.format(split), 'w') as writer: + for lidx, line in enumerate(reader): + ex = json.loads(line.strip()) + input_template, output_template, context = self.create_gold_gen(ex, ontology_dict, + self.hparams.mark_trigger) + ontology_dict = self.load_ontology() + length = len(input_template) + # print(input_template) + # print(output_template) + # print(context) + for i in range(length): + input_tokens = self.tokenizer.encode_plus(input_template[i], context[i], + add_special_tokens=True, + add_prefix_space=True, + max_length=MAX_LENGTH, + truncation='only_second', + padding='max_length') + # target_tokens + tgt_tokens = self.tokenizer.encode_plus(output_template[i], + add_special_tokens=True, + add_prefix_space=True, + max_length=MAX_TGT_LENGTH, + truncation=True, + padding='max_length') + # input_ids 单词在词典中的编码 + # tgt_tokens 指定对哪些词进行self_attention操作 + processed_ex = { + # 'idx': lidx, + 'doc_key': ex['doc_key'], + 'input_token_ids': input_tokens['input_ids'], + 'input_attn_mask': input_tokens['attention_mask'], + 'tgt_token_ids': tgt_tokens['input_ids'], + 'tgt_attn_mask': tgt_tokens['attention_mask'], + } + #print(processed_ex) + writer.write(json.dumps(processed_ex) + "\n") + + def train_dataloader(self): + dataset = IEDataset('head_what_preprocessed_data/train.jsonl') + + dataloader = DataLoader(dataset, + pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.train_batch_size, + shuffle=True) + return dataloader + + def val_dataloader(self): + dataset = IEDataset('head_what_preprocessed_data/val.jsonl') + + dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.eval_batch_size, shuffle=False) + return dataloader + + def test_dataloader(self): + dataset = IEDataset('head_what_preprocessed_data/test.jsonl') + + dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.eval_batch_size, shuffle=False) + + return dataloader + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--train-file', type=str, default='data/RAMS_1.0/data/train.jsonlines') + parser.add_argument('--val-file', type=str, default='data/RAMS_1.0/data/dev.jsonlines') + parser.add_argument('--test-file', type=str, default='data/RAMS_1.0/data/test_head.jsonlines') + parser.add_argument('--train_batch_size', type=int, default=2) + parser.add_argument('--eval_batch_size', type=int, default=4) + parser.add_argument('--mark-trigger', action='store_true', default=True) + args = parser.parse_args() + + print("data_module1.pyaaaaaaaaaaaaaaa") + dm = RAMSDataModule(args=args) + dm.prepare_data() + + # training dataloader + dataloader = dm.train_dataloader() + + for idx, batch in enumerate(dataloader): + print(batch) + break + + # val dataloader diff --git a/src/genie/question/convert_gen_to_output2.py b/src/genie/question/convert_gen_to_output2.py new file mode 100644 index 0000000000000000000000000000000000000000..52b45d05210b4cf708cb171071d8aa07202beebe --- /dev/null +++ b/src/genie/question/convert_gen_to_output2.py @@ -0,0 +1,172 @@ +import os +import json +import argparse +import re +from copy import deepcopy +from tqdm import tqdm + +from utils import find_head, WhitespaceTokenizer, find_arg_span +import spacy +print("convert_gen_to_output2.py") +def extract_args_from_template(ex, template, ontology_dict,): + # extract argument text + template_words = template[0].strip().split() + predicted_words = ex['predicted'].strip().split() + predicted_args = {} + t_ptr= 0 + p_ptr= 0 + evt_type = get_event_type(ex)[0] + + while t_ptr < len(template_words) and p_ptr < len(predicted_words): + if re.match(r'<(arg\d+)>', template_words[t_ptr]): + m = re.match(r'<(arg\d+)>', template_words[t_ptr]) + arg_num = m.group(1) + arg_name = ontology_dict[evt_type.replace('n/a','unspecified')][arg_num] + + if predicted_words[p_ptr] == '': + # missing argument + p_ptr +=1 + t_ptr +=1 + else: + arg_start = p_ptr + while (p_ptr < len(predicted_words)) and (predicted_words[p_ptr] != template_words[t_ptr+1]): + p_ptr+=1 + arg_text = predicted_words[arg_start:p_ptr] + predicted_args[arg_name] = arg_text + t_ptr+=1 + # aligned + else: + t_ptr+=1 + p_ptr+=1 + + return predicted_args + + + + + + +def get_event_type(ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append( t[0]) + return evt_type + +def check_coref(ex, arg_span, gold_spans): + for clus in ex['corefs']: + if arg_span in clus: + matched_gold_spans = [span for span in gold_spans if span in clus] + if len(matched_gold_spans) > 0: + return matched_gold_spans[0] + return arg_span + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--gen-file',type=str, default='checkpoints/gen-new-tokenization-pred/sample_predictions.jsonl') + parser.add_argument('--test-file', type=str,default='data/RAMS_1.0/data/test_head.jsonlines') + parser.add_argument('--output-file',type=str, default='test_output.jsonl') + parser.add_argument('--ontology-file',type=str, default='aida_ontology_new.csv') + parser.add_argument('--head-only',action='store_true',default=False) + parser.add_argument('--coref', action='store_true', default=False) + args = parser.parse_args() + + nlp = spacy.load('en_core_web_sm') + nlp.tokenizer = WhitespaceTokenizer(nlp.vocab) + # read ontology 读取事件本体 模板文件中的内容 + ontology_dict ={} + with open('aida_ontology_new.csv','r') as f: + for lidx, line in enumerate(f): + # 跳过第一行表头字段 + if lidx == 0:# header + continue + fields = line.strip().split(',') + # 说明该事件类型下不存在待抽取的论元 + if len(fields) < 2: + break + # 事件类型是第一个 + evt_type = fields[0] + # 从第三个元素往后都是待抽取论语及其论元角色 + arguments = fields[2:] + # 获取该事件类型下带带抽取的论元数量 + args_len = 0 + for i, arg in enumerate(arguments): + if arg != '': + args_len = args_len + 1 + # 将事件本体字典中添加事件类型的key,该key下对应的value为模板 + # 利用args_len将template中的子模板数量进行循环增加, 将后续的子模板通过字符串拼接的方式进行增加 + # 最终的模板样式变为 what is the in what is the in + # 先利用一个临时的字符串变量来存储模板 ----------> temp_template + temp_template = [] + for i in range(len(arguments)): + temp_template.append(" what is the in ".format(i + 1)) + print(temp_template) + # 在事件本体字典中建立key-value 以事件类型为关键字 + ontology_dict[evt_type] = { + 'template': temp_template + } + + for i, arg in enumerate(arguments): + if arg !='': + ontology_dict[evt_type]['arg{}'.format(i+1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i+1) + + + examples = {} + print(args.gen_file) + # data/RAMS_1.0/data/test_head_coref.jsonlines + with open(args.test_file, 'r') as f: + for line in f: + ex = json.loads(line.strip()) + ex['ref_evt_links'] = deepcopy(ex['gold_evt_links']) + ex['gold_evt_links'] = [] + examples[ex['doc_key']] =ex + + # checkpoints/gen-RAMS-pred/predictions.jsonl + with open(args.gen_file,'r') as f: + for line in f: + pred = json.loads(line.strip()) + # print(pred) + examples[pred['doc_key']]['predicted'] = pred['predicted'] + examples[pred['doc_key']]['gold'] = pred['gold'] + + # checkpoints/gen-RAMS-pred/out_put.jsonl + writer = open(args.output_file, 'w') + for ex in tqdm(examples.values()): + if 'predicted' not in ex:# this is used for testing + continue + # get template + evt_type = get_event_type(ex)[0] + context_words = [w for sent in ex['sentences'] for w in sent ] + template = ontology_dict[evt_type.replace('n/a','unspecified')]['template'] + # extract argument text + + predicted_args = extract_args_from_template(ex,template, ontology_dict) + # get trigger + # extract argument span + trigger_start = ex['evt_triggers'][0][0] + trigger_end = ex['evt_triggers'][0][1] + doc = None + if args.head_only: + doc = nlp(' '.join(context_words)) + + for argname in predicted_args: + arg_span = find_arg_span(predicted_args[argname], context_words, + trigger_start, trigger_end, head_only=args.head_only, doc=doc) + if arg_span:# if None means hullucination + + if args.head_only and args.coref: + # consider coreferential mentions as matching + assert('corefs' in ex) + gold_spans = [a[1] for a in ex['ref_evt_links'] if a[2]==argname] + arg_span = check_coref(ex, list(arg_span), gold_spans) + + ex['gold_evt_links'].append([[trigger_start, trigger_end], list(arg_span), argname]) + + writer.write(json.dumps(ex)+'\n') + + writer.close() + + + diff --git a/src/genie/question/convert_gen_to_output4.py b/src/genie/question/convert_gen_to_output4.py new file mode 100644 index 0000000000000000000000000000000000000000..c6d60a47d17f52cafd336b128698c93a5347dc19 --- /dev/null +++ b/src/genie/question/convert_gen_to_output4.py @@ -0,0 +1,172 @@ +import os +import json +import argparse +import re +from copy import deepcopy +from tqdm import tqdm + +from utils import find_head, WhitespaceTokenizer, find_arg_span +import spacy +print("convert_gen_to_output2.py") +def extract_args_from_template(ex, template, ontology_dict,): + # extract argument text + template_words = template[0].strip().split() + predicted_words = ex['predicted'].strip().split() + predicted_args = {} + t_ptr= 0 + p_ptr= 0 + evt_type = get_event_type(ex)[0] + + while t_ptr < len(template_words) and p_ptr < len(predicted_words): + if re.match(r'<(arg\d+)>', template_words[t_ptr]): + m = re.match(r'<(arg\d+)>', template_words[t_ptr]) + arg_num = m.group(1) + arg_name = ontology_dict[evt_type.replace('n/a','unspecified')][arg_num] + + if predicted_words[p_ptr] == '': + # missing argument + p_ptr +=1 + t_ptr +=1 + else: + arg_start = p_ptr + while (p_ptr < len(predicted_words)) and (predicted_words[p_ptr] != template_words[t_ptr+1]): + p_ptr+=1 + arg_text = predicted_words[arg_start:p_ptr] + predicted_args[arg_name] = arg_text + t_ptr+=1 + # aligned + else: + t_ptr+=1 + p_ptr+=1 + + return predicted_args + + + + + + +def get_event_type(ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append( t[0]) + return evt_type + +def check_coref(ex, arg_span, gold_spans): + for clus in ex['corefs']: + if arg_span in clus: + matched_gold_spans = [span for span in gold_spans if span in clus] + if len(matched_gold_spans) > 0: + return matched_gold_spans[0] + return arg_span + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--gen-file',type=str, default='checkpoints/gen-new-tokenization-pred/sample_predictions.jsonl') + parser.add_argument('--test-file', type=str,default='data/RAMS_1.0/data/test_head.jsonlines') + parser.add_argument('--output-file',type=str, default='test_output.jsonl') + parser.add_argument('--ontology-file',type=str, default='aida_ontology_new.csv') + parser.add_argument('--head-only',action='store_true',default=False) + parser.add_argument('--coref', action='store_true', default=False) + args = parser.parse_args() + + nlp = spacy.load('en_core_web_sm') + nlp.tokenizer = WhitespaceTokenizer(nlp.vocab) + # read ontology 读取事件本体 模板文件中的内容 + ontology_dict ={} + with open('aida_ontology_new.csv','r') as f: + for lidx, line in enumerate(f): + if lidx == 0: # header + continue + fields = line.strip().split(',') + if len(fields) < 2: + break + # 获取事件类型 + evt_type = fields[0] + # 如果该事件类型已经存在过 + if evt_type in ontology_dict.keys(): + # 得到该事件类型下的所有论元类型 + arguments = fields[2:] + # 将该事件类型对应的模板中的论元模板 填充到onto_logy字典中 + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(arguments): + if arg != '': + # 事件类型下添加字典一项 arg1的值为arg + # x += 1 + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + # 即扫描到的事件类型在 evt_type_dict.keys() 还未存在过 + else: + # 建立该事件类型的key + ontology_dict[evt_type] = {} + arguments = fields[2:] + ontology_dict[evt_type]['template'] = [] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(arguments): + if arg != '': + # 事件类型下添加字典一项 arg1的值为arg + # x += 1 + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + + + examples = {} + #print(args) + print(args.gen_file) + # data/RAMS_1.0/data/test_head_coref.jsonlines + with open(args.test_file, 'r') as f: + for line in f: + ex = json.loads(line.strip()) + ex['ref_evt_links'] = deepcopy(ex['gold_evt_links']) + ex['gold_evt_links'] = [] + examples[ex['doc_key']] =ex + + # checkpoints/gen-RAMS-pred/predictions.jsonl + with open(args.gen_file,'r') as f: + for line in f: + pred = json.loads(line.strip()) + # print(pred) + examples[pred['doc_key']]['predicted'] = pred['predicted'] + examples[pred['doc_key']]['gold'] = pred['gold'] + + # checkpoints/gen-RAMS-pred/out_put.jsonl + writer = open(args.output_file, 'w') + for ex in tqdm(examples.values()): + if 'predicted' not in ex:# this is used for testing + continue + # get template + evt_type = get_event_type(ex)[0] + context_words = [w for sent in ex['sentences'] for w in sent ] + template = ontology_dict[evt_type.replace('n/a','unspecified')]['template'] + # extract argument text + + predicted_args = extract_args_from_template(ex,template, ontology_dict) + # get trigger + # extract argument span + trigger_start = ex['evt_triggers'][0][0] + trigger_end = ex['evt_triggers'][0][1] + doc = None + if args.head_only: + doc = nlp(' '.join(context_words)) + + for argname in predicted_args: + arg_span = find_arg_span(predicted_args[argname], context_words, + trigger_start, trigger_end, head_only=args.head_only, doc=doc) + if arg_span:# if None means hullucination + + if args.head_only and args.coref: + # consider coreferential mentions as matching + assert('corefs' in ex) + gold_spans = [a[1] for a in ex['ref_evt_links'] if a[2]==argname] + arg_span = check_coref(ex, list(arg_span), gold_spans) + + ex['gold_evt_links'].append([[trigger_start, trigger_end], list(arg_span), argname]) + + writer.write(json.dumps(ex)+'\n') + + writer.close() + + + diff --git a/src/genie/data_module.py b/src/genie/question/data_module.py similarity index 94% rename from src/genie/data_module.py rename to src/genie/question/data_module.py index ac7c4598829afc6f069d34e4fbf6b9b2fb2f101a..fc6a6a6991f305f60c17513361ad812872069565 100644 --- a/src/genie/data_module.py +++ b/src/genie/question/data_module.py @@ -101,18 +101,18 @@ class RAMSDataModule(pl.LightningDataModule): x = 1 while(x > 0): - print(ontology_dict) + #print(ontology_dict) x = x - 1 return ontology_dict def prepare_data(self): - if not os.path.exists('preprocessed_data'): - os.makedirs('preprocessed_data') + if not os.path.exists('span_preprocessed_data'): + os.makedirs('span_preprocessed_data') ontology_dict = self.load_ontology() for split,f in [('train',self.hparams.train_file), ('val',self.hparams.val_file), ('test',self.hparams.test_file)]: - with open(f,'r') as reader, open('preprocessed_data/{}.jsonl'.format(split), 'w') as writer: + with open(f,'r') as reader, open('span_preprocessed_data/{}.jsonl'.format(split), 'w') as writer: for lidx, line in enumerate(reader): ex = json.loads(line.strip()) input_template, output_template, context= self.create_gold_gen(ex, ontology_dict, self.hparams.mark_trigger) @@ -145,7 +145,7 @@ class RAMSDataModule(pl.LightningDataModule): def train_dataloader(self): - dataset = IEDataset('preprocessed_data/train.jsonl') + dataset = IEDataset('span_preprocessed_data/train.jsonl') dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, @@ -156,7 +156,7 @@ class RAMSDataModule(pl.LightningDataModule): def val_dataloader(self): - dataset = IEDataset('preprocessed_data/val.jsonl') + dataset = IEDataset('span_preprocessed_data/val.jsonl') dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, collate_fn=my_collate, @@ -164,7 +164,7 @@ class RAMSDataModule(pl.LightningDataModule): return dataloader def test_dataloader(self): - dataset = IEDataset('preprocessed_data/test.jsonl') + dataset = IEDataset('span_preprocessed_data/test.jsonl') dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, collate_fn=my_collate, diff --git a/src/genie/data_module1.py b/src/genie/question/data_module1.py similarity index 95% rename from src/genie/data_module1.py rename to src/genie/question/data_module1.py index f8707640f52a163d7bf804ceb7a6b852f8625311..481cc19f996009cc309adab70210aa38bbe3b4c0 100644 --- a/src/genie/data_module1.py +++ b/src/genie/question/data_module1.py @@ -96,6 +96,7 @@ class RAMSDataModule(pl.LightningDataModule): # 输出模板中的等都替换为统一的 output_template = re.sub(r'', '', template) space_tokenized_template = output_template.split(' ') + print(output_template) tokenized_template = [] for w in space_tokenized_template: tokenized_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) @@ -142,15 +143,15 @@ class RAMSDataModule(pl.LightningDataModule): return ontology_dict def prepare_data(self): - #if not os.path.exists('preprocessed_data'): - #os.makedirs('preprocessed_data') + if not os.path.exists('head_templates_preprocessed_data'): + os.makedirs('head_templates_preprocessed_data') ontology_dict = self.load_ontology() #print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") for split, f in [('train', self.hparams.train_file), ('val', self.hparams.val_file), ('test', self.hparams.test_file)]: - with open(f, 'r') as reader, open('preprocessed_data/{}.jsonl'.format(split), 'w') as writer: + with open(f, 'r') as reader, open('head_templates_preprocessed_data/{}.jsonl'.format(split), 'w') as writer: for lidx, line in enumerate(reader): # 读取jsonlines中的每一行 ex = json.loads(line.strip()) @@ -186,7 +187,7 @@ class RAMSDataModule(pl.LightningDataModule): writer.write(json.dumps(processed_ex) + "\n") def train_dataloader(self): - dataset = IEDataset('preprocessed_data/train.jsonl') + dataset = IEDataset('head_templates_preprocessed_data/train.jsonl') dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, @@ -196,7 +197,7 @@ class RAMSDataModule(pl.LightningDataModule): return dataloader def val_dataloader(self): - dataset = IEDataset('preprocessed_data/val.jsonl') + dataset = IEDataset('head_templates_preprocessed_data/val.jsonl') dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, collate_fn=my_collate, @@ -204,7 +205,7 @@ class RAMSDataModule(pl.LightningDataModule): return dataloader def test_dataloader(self): - dataset = IEDataset('preprocessed_data/test.jsonl') + dataset = IEDataset('head_templates_preprocessed_data/test.jsonl') dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, collate_fn=my_collate, diff --git a/src/genie/question/data_module2.py b/src/genie/question/data_module2.py new file mode 100644 index 0000000000000000000000000000000000000000..b74c11bb9b71cd52c44bfdf027f888aba3b44802 --- /dev/null +++ b/src/genie/question/data_module2.py @@ -0,0 +1,282 @@ +import os +import json +import jsonlines +import re +import random +from collections import defaultdict +import argparse + +import transformers +from transformers import BartTokenizer +import torch +from torch.utils.data import DataLoader +import pytorch_lightning as pl + +from .data import IEDataset, my_collate + +MAX_LENGTH = 424 +MAX_TGT_LENGTH = 72 +DOC_STRIDE = 256 + +print("data_module2.py") +class RAMSDataModule(pl.LightningDataModule): + def __init__(self, args): + super().__init__() + self.hparams = args + self.tokenizer = BartTokenizer.from_pretrained('facebook/bart-large') + self.tokenizer.add_tokens([' ', ' ']) + + def get_event_type(self, ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append(t[0]) + return evt_type + # 获取标签数据 + + def create_gold_gen(self, ex, ontology_dict, mark_trigger=True): + '''assumes that each line only contains 1 event. + Input: Template with special placeholders Passage + Output: Template with arguments and when no argument is found. + ''' + # 目前的模板: what is the in + # 设置三个总列表、存放输入模板、输出模板 + INPUT = [] + OUTPUT = [] + CONTEXT = [] + # ex 是json数据 + # 得到每条数据的事件类型 + evt_type = self.get_event_type(ex)[0] + # 将文档中的每个单词取出放入context_words这个新建列表里 + context_words = [w for sent in ex['sentences'] for w in sent] + # 从事件本体中取出事件模板 有的事件类型模板做特殊处理 + # 新建立的onto_logy_dict中的模板template是一个列表 每次需要取其中一个 + template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + # 将占位符 用 trigger进行替换 + trigger_index = ex['evt_triggers'][0][0] + # trg就是本条json下的触发词 + trg = context_words[trigger_index] + i = 0 + # 这里需要遍历整个列表 将其中每个模板中的trg进行替换 template是一个列表 + for x in range(len(template)): + template[x] = re.sub(r'', trg, template[x]) + i += 1 + # 将输入模板中的arg1 arg2等编号论元全部替换为统一的 和上面一样需要重新修改 + # for x in template: + # x = re.sub(r'', '', x) + # 转换之后 template变为['what is the in trg', 'what is the in trg'] + input_template = re.sub(r'', template[0]) + + # 将模板进行分词 + space_tokenized_input_template = input_template.split(' ') + # 分词后存储的列表 + tokenized_input_template = [] + # 将每个单词进行分词后添加到上面这个列表中 + for w in space_tokenized_input_template: + tokenized_input_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + for j in range(i): + INPUT.append(tokenized_input_template) + # input_template 的值应该固定为 what is the in trg + # 将原数据集中的json取出后, 其中的template列表不应该变化 + # 获取三元组 构建输出模板 即标签 + for lidx, triple in enumerate(ex['gold_evt_links']): + # 触发词 论元 论元 + # 例子: "gold_evt_links": + # [[[40, 40], [33, 33], "evt089arg01victim"], + # [[40, 40], [28, 28], "evt089arg02place"]] + #print(triple) + trigger_span, argument_span, arg_name = triple + # 第几个论元 + #print(evt_type) + arg_num = ontology_dict[evt_type.replace('n/a', 'unspecified')][arg_name] + # 具体论元内容 短语 + arg_text = ' '.join(context_words[argument_span[0]:argument_span[1] + 1]) + # 通过正则表达式的方式将模板中的每个 替换为具体的论元内容 + # 按照顺序将列表中的依次替换为 + template[lidx] = re.sub('<{}>'.format(arg_num), arg_text, template[lidx]) + + + #print(template) + trigger = ex['evt_triggers'][0] + if mark_trigger: + trigger_span_start = trigger[0] + trigger_span_end = trigger[1] + 2 # one for inclusion, one for extra start marker + # 触发词之前的单词 + prefix = self.tokenizer.tokenize(' '.join(context_words[:trigger[0]]), add_prefix_space=True) + # 触发词短语 + tgt = self.tokenizer.tokenize(' '.join(context_words[trigger[0]: trigger[1] + 1]), add_prefix_space=True) + # 触发词之后的单词 + suffix = self.tokenizer.tokenize(' '.join(context_words[trigger[1] + 1:]), add_prefix_space=True) + context = prefix + [' ', ] + tgt + [' ', ] + suffix + else: + context = self.tokenizer.tokenize(' '.join(context_words), add_prefix_space=True) + # 将context放入CONTEXT中 + for w in range(i): + CONTEXT.append(context) + # 输出模板中的等都替换为统一的 + # 构建输出模板 template + # output_template 的构建需要循环输出 此时的template中的内容已经替换为文本中应该抽取的论文短语 + # 下面这个循环不是很懂什么意思 + # 建立一个output_template + output_template = [] + for i in range(len(template)): + output_template.append(re.sub(r'', '', template[i])) + # 此时的output_template(列表)中的内容存放的是应该生成的template标签模板 + # output_template = re.sub(r'', '', template) + # 使用一个新的space_tokenized_template 来存放分词后的每个template标签模板 + space_tokenized_template = [] + for i in range(len(output_template)): + space_tokenized_template.append(output_template[i].split()) + # space_tokenized_template = output_template.split(' ') + #print(space_tokenized_template) + tokenized_template = [] + # 此时的space_tokenized_template[[],[],[]] + # len == 5 此时遍历每一个分词后的模板(已填充) + for i in range(len(space_tokenized_template)): + for w in space_tokenized_template[i]: + tokenized_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + #print(tokenized_template) + OUTPUT.append(tokenized_template) + tokenized_template = [] + #print(OUTPUT) + # for w in space_tokenized_template: + # tokenized_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + + return INPUT, OUTPUT, CONTEXT + + def load_ontology(self): + # read ontology + ontology_dict = {} + with open('aida_ontology_new.csv', 'r') as f: + for lidx, line in enumerate(f): + if lidx == 0: # header + continue + fields = line.strip().split(',') + if len(fields) < 2: + break + # 获取事件类型 + evt_type = fields[0] + # 得到该事件类型下的所有论元类型 + args = fields[2:] + # 将事件本体字典中添加事件类型的key,该key下对应的value为模板 + # 利用args_len将template中的子模板数量进行循环增加, 将后续的子模板通过字符串拼接的方式进行增加 + # 最终的模板样式变为 what is the in what is the in + # 先利用一个临时的字符串变量来存储模板 ----------> temp_template + temp_template = [] + for i in range(len(args)): + temp_template.append("what is the in ".format(i+1)) + # for i in range(args_len): + # temp_template = temp_template + " what is the in ".format(i + 1) + # 将事件本体字典中添加事件类型的key,该key下对应的value为模板 + ontology_dict[evt_type] = { + 'template': temp_template + } + # 对每个论元类型进行遍历 + for i, arg in enumerate(args): + if arg != '': + # 事件类型下添加字典一项 arg1的值为arg + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + + return ontology_dict + + def prepare_data(self): + #if not os.path.exists('head_templates_preprocessed_data_new'): + #os.makedirs('head_templates_preprocessed_data_new') + + ontology_dict = self.load_ontology() + + #print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + for split, f in [('train', self.hparams.train_file), ('val', self.hparams.val_file), + ('test', self.hparams.test_file)]: + with open(f, 'r') as reader, open('head_templates_preprocessed_data_new/{}.jsonl'.format(split), 'w') as writer: + for lidx, line in enumerate(reader): + # 读取jsonlines中的每一行 + ex = json.loads(line.strip()) + # 输入模板 应该输出的模板 文本 + # 在输入到函数进行处理之后 应该进行一个arg对应一个输入模板、一个输出模板以及一个文本 + # 可以选择以列表的形式进行返回 + input_template, output_template, context = self.create_gold_gen(ex, ontology_dict, + self.hparams.mark_trigger) + + # 返回所有的编码信息 + # 返回的是三个列表 INPUT OUTPUT CONTEXT 这三个列表的长度相等 举个例子 列表长度为3 + length = len(input_template) + #print(output_template) + for i in range(length): + input_tokens = self.tokenizer.encode_plus(input_template[i], context[i], + add_special_tokens=True, + add_prefix_space=True, + max_length=MAX_LENGTH, + truncation='only_second', + padding='max_length') + # target_tokens + tgt_tokens = self.tokenizer.encode_plus(output_template[i], + add_special_tokens=True, + add_prefix_space=True, + max_length=MAX_TGT_LENGTH, + truncation=True, + padding='max_length') + # input_ids 单词在词典中的编码 + # tgt_tokens 指定对哪些词进行self_attention操作 + processed_ex = { + # 'idx': lidx, + 'doc_key': ex['doc_key'], + 'input_token_ids': input_tokens['input_ids'], + 'input_attn_mask': input_tokens['attention_mask'], + 'tgt_token_ids': tgt_tokens['input_ids'], + 'tgt_attn_mask': tgt_tokens['attention_mask'], + } + #print(processed_ex) + writer.write(json.dumps(processed_ex) + "\n") + + def train_dataloader(self): + dataset = IEDataset('head_templates_preprocessed_data_new/train.jsonl') + + dataloader = DataLoader(dataset, + pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.train_batch_size, + shuffle=True) + return dataloader + + def val_dataloader(self): + dataset = IEDataset('head_templates_preprocessed_data_new/val.jsonl') + + dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.eval_batch_size, shuffle=False) + return dataloader + + def test_dataloader(self): + dataset = IEDataset('head_templates_preprocessed_data_new/test.jsonl') + + dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.eval_batch_size, shuffle=False) + + return dataloader + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--train-file', type=str, default='data/RAMS_1.0/data/train.jsonlines') + parser.add_argument('--val-file', type=str, default='data/RAMS_1.0/data/dev.jsonlines') + parser.add_argument('--test-file', type=str, default='data/RAMS_1.0/data/test.jsonlines') + parser.add_argument('--train_batch_size', type=int, default=2) + parser.add_argument('--eval_batch_size', type=int, default=4) + parser.add_argument('--mark-trigger', action='store_true', default=True) + args = parser.parse_args() + + print("data_module1.pyaaaaaaaaaaaaaaa") + dm = RAMSDataModule(args=args) + dm.prepare_data() + + # training dataloader + dataloader = dm.train_dataloader() + + for idx, batch in enumerate(dataloader): + print(batch) + break + + # val dataloader diff --git a/src/genie/question/data_module3.py b/src/genie/question/data_module3.py new file mode 100644 index 0000000000000000000000000000000000000000..7c0fd998aed07aada6c94c04b2e483887fcc0bfb --- /dev/null +++ b/src/genie/question/data_module3.py @@ -0,0 +1,285 @@ +import os +import json +import jsonlines +import re +import random +from collections import defaultdict +import argparse + +import transformers +from transformers import BartTokenizer +import torch +from torch.utils.data import DataLoader +import pytorch_lightning as pl + +from .data import IEDataset, my_collate + +MAX_LENGTH = 424 +MAX_TGT_LENGTH = 72 +DOC_STRIDE = 256 + +print("data_module3.py") +class RAMSDataModule(pl.LightningDataModule): + def __init__(self, args): + super().__init__() + self.hparams = args + self.tokenizer = BartTokenizer.from_pretrained('facebook/bart-large') + self.tokenizer.add_tokens([' ', ' ']) + + def get_event_type(self, ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append(t[0]) + return evt_type + # 获取标签数据 + + def create_gold_gen(self, ex, ontology_dict, mark_trigger=True): + '''assumes that each line only contains 1 event. + Input: Template with special placeholders Passage + Output: Template with arguments and when no argument is found. + ''' + # 设置三个总列表、存放输入模板、输出模板 + INPUT = [] + OUTPUT = [] + CONTEXT = [] + # ex 是json数据 + # 得到每条数据的事件类型 + evt_type = self.get_event_type(ex)[0] + # 将文档中的每个单词取出放入context_words这个新建列表里 + context_words = [w for sent in ex['sentences'] for w in sent] + # 从事件本体中取出事件模板 有的事件类型模板做特殊处理 + # 新建立的onto_logy_dict中的模板template是一个列表 每次需要取其中一个 + template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + # 将占位符 用 trigger进行替换 + trigger_index = ex['evt_triggers'][0][0] + # trg就是本条json下的触发词 + trg = context_words[trigger_index] + i = 0 + # 这里需要遍历整个列表 将其中每个模板中的trg进行替换 template是一个列表 + for x in range(len(template)): + template[x] = re.sub(r'', trg, template[x]) + i += 1 + # 将输入模板中的arg1 arg2等编号论元全部替换为统一的 和上面一样需要重新修改 + # for x in template: + # x = re.sub(r'', '', x) + # 转换之后 template变为['what is the in trg', 'what is the in trg'] + input_template = re.sub(r'', template[0]) + + # 将模板进行分词 + space_tokenized_input_template = input_template.split(' ') + # 分词后存储的列表 + tokenized_input_template = [] + # 将每个单词进行分词后添加到上面这个列表中 + for w in space_tokenized_input_template: + tokenized_input_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + for j in range(i): + INPUT.append(tokenized_input_template) + # input_template 的值应该固定为 what is the in trg + # 将原数据集中的json取出后, 其中的template列表不应该变化 + # 获取三元组 构建输出模板 即标签 + for lidx, triple in enumerate(ex['gold_evt_links']): + # 触发词 论元 论元 + # 例子: "gold_evt_links": + # [[[40, 40], [33, 33], "evt089arg01victim"], + # [[40, 40], [28, 28], "evt089arg02place"]] + trigger_span, argument_span, arg_name = triple + # 第几个论元 + + arg_num = ontology_dict[evt_type.replace('n/a', 'unspecified')][arg_name] + # 具体论元内容 短语 + arg_text = ' '.join(context_words[argument_span[0]:argument_span[1] + 1]) + # 通过正则表达式的方式将模板中的每个 替换为具体的论元内容 + # 按照顺序将列表中的依次替换为 + template[lidx] = re.sub('<{}>'.format(arg_num), arg_text, template[lidx]) + + + trigger = ex['evt_triggers'][0] + if mark_trigger: + trigger_span_start = trigger[0] + trigger_span_end = trigger[1] + 2 # one for inclusion, one for extra start marker + # 触发词之前的单词 + prefix = self.tokenizer.tokenize(' '.join(context_words[:trigger[0]]), add_prefix_space=True) + # 触发词短语 + tgt = self.tokenizer.tokenize(' '.join(context_words[trigger[0]: trigger[1] + 1]), add_prefix_space=True) + # 触发词之后的单词 + suffix = self.tokenizer.tokenize(' '.join(context_words[trigger[1] + 1:]), add_prefix_space=True) + context = prefix + [' ', ] + tgt + [' ', ] + suffix + else: + context = self.tokenizer.tokenize(' '.join(context_words), add_prefix_space=True) + # 将context放入CONTEXT中 + for w in range(i): + CONTEXT.append(context) + # 输出模板中的等都替换为统一的 + # 构建输出模板 template + # output_template 的构建需要循环输出 此时的template中的内容已经替换为文本中应该抽取的论文短语 + # 下面这个循环不是很懂什么意思 + # 建立一个output_template + output_template = [] + for i in range(len(template)): + output_template.append(re.sub(r'', '', template[i])) + # 此时的output_template(列表)中的内容存放的是应该生成的template标签模板 + # output_template = re.sub(r'', '', template) + # 使用一个新的space_tokenized_template 来存放分词后的每个template标签模板 + space_tokenized_template = [] + for i in range(len(output_template)): + space_tokenized_template.append(output_template[i].split()) + # space_tokenized_template = output_template.split(' ') + tokenized_template = [] + # 此时的space_tokenized_template[[],[],[]] + for i in range(len(space_tokenized_template)): + for w in space_tokenized_template[i]: + tokenized_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + OUTPUT.append(tokenized_input_template) + tokenized_template = [] + # for w in space_tokenized_template: + # tokenized_template.extend(self.tokenizer.tokenize(w, add_prefix_space=True)) + + return INPUT, OUTPUT, CONTEXT + + def load_ontology(self): + # read ontology + # 每个事件类型根据它需要生成的论元数量的不同拆分成相应数量的模板数 + # 举个例子 : 一条json数据 事件类型是evt_type 需要生成三个论元arg1 arg2 arg3 + # evt_type template1 arg1 + # evt_type template2 arg2 + # evt_type template3 arg3 + # 建立一个字典 每次遍历表格中的行时,针对事件类型的不同 填入字典 在下一次的遍历中如果存在则填入ontology_dict中 + ontology_dict = {} + # 设立一个字典进行判断 如果扫描的事件类型已经存在 + # evt_type_dict = {} + with open('aida_ontology_fj-5.csv', 'r') as f: + # 其中lidx是索引 line 是每一行的数据 + for lidx, line in enumerate(f): + if lidx == 0: # header + continue + fields = line.strip().split(',') + if len(fields) < 2: + break + # 获取事件类型 + evt_type = fields[0] + # 如果该事件类型已经存在过 + if evt_type in ontology_dict.keys(): + # 得到该事件类型下的所有论元类型 + args = fields[2:] + # 将该事件类型对应的模板中的论元模板 填充到onto_logy字典中 + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(args): + if arg != '': + # 事件类型下添加字典一项 arg1的值为arg + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + # 即扫描到的事件类型在 evt_type_dict.keys() 还未存在过 + else: + # 建立该事件类型的key + ontology_dict[evt_type] = {} + args = fields[2:] + ontology_dict[evt_type]['template'] = [] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(args): + if arg != '': + # 事件类型下添加字典一项 arg1的值为arg + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + + return ontology_dict + + def prepare_data(self): + #if not os.path.exists('head_templates_preprocessed_data'): + #os.makedirs('head_templates_preprocessed_data') + + ontology_dict = self.load_ontology() + + #print("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa") + for split, f in [('train', self.hparams.train_file), ('val', self.hparams.val_file), + ('test', self.hparams.test_file)]: + with open(f, 'r') as reader, open('head_templates_preprocessed_data_new/{}.jsonl'.format(split), 'w') as writer: + for lidx, line in enumerate(reader): + # 读取jsonlines中的每一行 + ex = json.loads(line.strip()) + # 输入模板 应该输出的模板 文本 + # 在输入到函数进行处理之后 应该进行一个arg对应一个输入模板、一个输出模板以及一个文本 + # 可以选择以列表的形式进行返回 + input_template, output_template, context = self.create_gold_gen(ex, ontology_dict, + self.hparams.mark_trigger) + + # 返回所有的编码信息 + # 返回的是三个列表 INPUT OUTPUT CONTEXT 这三个列表的长度相等 举个例子 列表长度为3 + length = len(input_template) + for i in range(length): + input_tokens = self.tokenizer.encode_plus(input_template[i], context[i], + add_special_tokens=True, + add_prefix_space=True, + max_length=MAX_LENGTH, + truncation='only_second', + padding='max_length') + # target_tokens + tgt_tokens = self.tokenizer.encode_plus(output_template[i], + add_special_tokens=True, + add_prefix_space=True, + max_length=MAX_TGT_LENGTH, + truncation=True, + padding='max_length') + # input_ids 单词在词典中的编码 + # tgt_tokens 指定对哪些词进行self_attention操作 + processed_ex = { + # 'idx': lidx, + 'doc_key': ex['doc_key'], + 'input_token_ids': input_tokens['input_ids'], + 'input_attn_mask': input_tokens['attention_mask'], + 'tgt_token_ids': tgt_tokens['input_ids'], + 'tgt_attn_mask': tgt_tokens['attention_mask'], + } + #print(processed_ex) + writer.write(json.dumps(processed_ex) + "\n") + + def train_dataloader(self): + dataset = IEDataset('head_templates_preprocessed_data_new/train.jsonl') + + dataloader = DataLoader(dataset, + pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.train_batch_size, + shuffle=True) + return dataloader + + def val_dataloader(self): + dataset = IEDataset('head_templates_preprocessed_data_new/val.jsonl') + + dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.eval_batch_size, shuffle=False) + return dataloader + + def test_dataloader(self): + dataset = IEDataset('head_templates_preprocessed_data_new/test.jsonl') + + dataloader = DataLoader(dataset, pin_memory=True, num_workers=2, + collate_fn=my_collate, + batch_size=self.hparams.eval_batch_size, shuffle=False) + + return dataloader + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--train-file', type=str, default='data/RAMS_1.0/data/train.jsonlines') + parser.add_argument('--val-file', type=str, default='data/RAMS_1.0/data/dev.jsonlines') + parser.add_argument('--test-file', type=str, default='data/RAMS_1.0/data/test.jsonlines') + parser.add_argument('--train_batch_size', type=int, default=2) + parser.add_argument('--eval_batch_size', type=int, default=4) + parser.add_argument('--mark-trigger', action='store_true', default=True) + args = parser.parse_args() + + print("data_module1.pyaaaaaaaaaaaaaaa") + dm = RAMSDataModule(args=args) + dm.prepare_data() + + # training dataloader + dataloader = dm.train_dataloader() + + for idx, batch in enumerate(dataloader): + print(batch) + break + + # val dataloader diff --git a/src/genie/convert_gen_to_output.py b/src/genie/temp/convert_gen_to_output.py similarity index 100% rename from src/genie/convert_gen_to_output.py rename to src/genie/temp/convert_gen_to_output.py diff --git a/src/genie/convert_gen_to_output1.py b/src/genie/temp/convert_gen_to_output1.py similarity index 100% rename from src/genie/convert_gen_to_output1.py rename to src/genie/temp/convert_gen_to_output1.py diff --git "a/src/genie/\357\274\201" "b/src/genie/\357\274\201" new file mode 100644 index 0000000000000000000000000000000000000000..4a93b33d19a0266479c4a5f775cfbe89dcaef127 --- /dev/null +++ "b/src/genie/\357\274\201" @@ -0,0 +1,221 @@ +import os +import json +import argparse +import re +from copy import deepcopy +from tqdm import tqdm + +from utils import find_head, WhitespaceTokenizer, find_arg_span +import spacy +print("convert_gen_to_output5.py") +def extract_args_from_template(ex, template, ontology_dict,): + # extract argument text + # 这个函数的返回值是一个字典 因此需要 template列表和ex中的predicted列表同时进行遍历放入字典中 + # 在这里定义两个列表 分别存放 定义存放模板的列表 TEMPLATE 和 相对应的生成 PREDICTED + # 传过来的参数中的template就是包含所有模板的列表 因此不需要再定义TEMPLATE 还是需要定义一个存放分词后的template + # 这里的template是相应事件类型下的模板包含多个 + # 原来处理的方式是一个数据和一个综合性模板 现在模板是分开的 为什么要把template传过来 这不是脱裤子放屁的操作? + # 下面这段操作是因为上次模板的定义是相同因此只需要去列表中的第一个模板就行 这次需要用循环进行遍历 + t = [] + TEMPLATE = [] + for i in template: + t = i.strip().split() + TEMPLATE.append(t) + t = [] + # 到此为止 得到存放该ex即该数据类型下的所有模板的分词后的列表存储 下面获取对应的predicted同理 + PREDICTED = [] + p = [] + for i in ex['predicted']: + p = i.strip().split() + PREDICTED.append(p) + p = [] + # 这个字典变量定义了这个函数的返回值 应该是论元角色-论元短语的key-value映射 + predicted_args = {} + evt_type = get_event_type(ex)[0] + # 不出意外的话 TEMPLATE和PREDICTED的长度应该是相等的 + length = len(TEMPLATE) + for i in range(length): + template_words = TEMPLATE[i] + predicted_words = PREDICTED[i] + t_ptr = 0 + p_ptr = 0 + while t_ptr < len(template_words) and p_ptr < len(predicted_words): + if re.match(r'<(arg\d+)>', template_words[t_ptr]): + m = re.match(r'<(arg\d+)>', template_words[t_ptr]) + # 这一步的操作是从模板中到 这样的词符 即arg_num 然后通过arg_num找到对应论元角色arg_name + arg_num = m.group(1) + arg_name = ontology_dict[evt_type.replace('n/a', 'unspecified')][arg_num] + + if predicted_words[p_ptr] == '': + # missing argument + p_ptr +=1 + t_ptr +=1 + else: + arg_start = p_ptr + if t_ptr + 1 == len(template_words): + while p_ptr < len(predicted_words): + p_ptr += 1 + else: + while (p_ptr < len(predicted_words)) and (predicted_words[p_ptr] != template_words[t_ptr+1]): + p_ptr += 1 + arg_text = predicted_words[arg_start:p_ptr] + predicted_args[arg_name] = arg_text + t_ptr += 1 + # aligned + else: + t_ptr += 1 + p_ptr += 1 + + return predicted_args + +def get_event_type(ex): + evt_type = [] + for evt in ex['evt_triggers']: + for t in evt[2]: + evt_type.append( t[0]) + return evt_type + +def check_coref(ex, arg_span, gold_spans): + for clus in ex['corefs']: + if arg_span in clus: + matched_gold_spans = [span for span in gold_spans if span in clus] + if len(matched_gold_spans) > 0: + return matched_gold_spans[0] + return arg_span + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--gen-file',type=str, default='checkpoints/gen-new-tokenization-pred/sample_predictions.jsonl') + parser.add_argument('--test-file', type=str,default='data/RAMS_1.0/data/test.jsonlines') + parser.add_argument('--output-file',type=str, default='test_output.jsonl') + parser.add_argument('--ontology-file',type=str, default='aida_ontology_new.csv') + parser.add_argument('--head-only',action='store_true',default=False) + parser.add_argument('--coref', action='store_true', default=False) + args = parser.parse_args() + + # 加载词典 + nlp = spacy.load('en_core_web_sm') + nlp.tokenizer = WhitespaceTokenizer(nlp.vocab) + # read ontology 读取事件本体 模板文件中的内容 + ontology_dict = {} + with open('aida_ontology_fj-5.csv', 'r') as f: + for lidx, line in enumerate(f): + if lidx == 0: # header + continue + fields = line.strip().split(',') + if len(fields) < 2: + break + evt_type = fields[0] + if evt_type in ontology_dict.keys(): + arguments = fields[2:] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(arguments): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + else: + ontology_dict[evt_type] = {} + arguments = fields[2:] + ontology_dict[evt_type]['template'] = [] + ontology_dict[evt_type]['template'].append(fields[1]) + for i, arg in enumerate(arguments): + if arg != '': + ontology_dict[evt_type]['arg{}'.format(i + 1)] = arg + ontology_dict[evt_type][arg] = 'arg{}'.format(i + 1) + examples = {} + print(args.gen_file) + # data/RAMS_1.0/data/test_head_coref.jsonlines + key = [] + with open(args.test_file, 'r') as f: + for line in f: + ex = json.loads(line.strip()) + #if ex['gold_evt_links'] == []: + #key.append(ex['doc_key']) + #continue + ex['ref_evt_links'] = deepcopy(ex['gold_evt_links']) + ex['gold_evt_links'] = [] + examples[ex['doc_key']] = ex + + # checkpoints/gen-RAMS-pred/predictions.jsonl + flag = {} + with open(args.gen_file,'r') as f: + for line in f: + pred = json.loads(line.strip()) + # print(pred) + # 因为最后生成 应该是 多个相同的事件类型在并列 这个操作好像把已经填入的predicte覆盖掉了 + # 在这里的循环中 应该继续向下扫描 采取和ontology中相同的处理方式 用列表的方式存储将pred中的内容存放到examples中的数据中 + # pred 是对预测文件中的预测结果句用空格进行分隔单词后的结果 + # pred中的内容主要包括 doc_key predicted gold + # 如果扫描到的预测json数据事件类型在examples中存在 那么就将predicted存入列表 + # if pred['doc_key'] not in key: + if pred['doc_key'] in flag.keys(): + #print(examples[pred['doc_key']]['predicted']) + examples[pred['doc_key']]['predicted'].append(pred['predicted']) + examples[pred['doc_key']]['gold'].append(pred['gold']) + # 如果没有 说明这是新的事件类型 + else: + flag[pred['doc_key']] = True + examples[pred['doc_key']]['predicted'] = [] + examples[pred['doc_key']]['gold'] = [] + # 然后将此条数据存入 + examples[pred['doc_key']]['predicted'].append(pred['predicted']) + examples[pred['doc_key']]['gold'].append(pred['gold']) + + # checkpoints/gen-RAMS-pred/out_put.jsonl + writer = open(args.output_file, 'w') + for ex in tqdm(examples.values()): + if 'predicted' not in ex:# this is used for testing + continue + # get template 获取事件类型 + evt_type = get_event_type(ex)[0] + context_words = [w for sent in ex['sentences'] for w in sent] + # 这里的template是ontology_dict中 template 包含一个事件类型下的所有事件模板 + template = ontology_dict[evt_type.replace('n/a', 'unspecified')]['template'] + # extract argument text + # 这里应该是提取预测文件中预测到的论元短语 ex是一条json数据 template是这条json数据对应下的模板 on是论元角色和的映射 + # 这里ex中的predicted和gold已经包括了该事件类型下的所有论元 用列表的形式进行存储 且顺序是一一对应的 + # 这里返回的predicted_args是一个字典: + predicted_args = extract_args_from_template(ex, template, ontology_dict) + # get trigger + # extract argument span 找出触发词在文段中的索引 + str_p = '' + str_g = '' + for i in range(len(ex['predicted'])): + str_p += ex['predicted'][i] + str_g += ex['gold'][i] + + ex['predicted'] = str_p + ex['gold'] = str_g + trigger_start = ex['evt_triggers'][0][0] + trigger_end = ex['evt_triggers'][0][1] + # 上面返回的predicted_args是一个字典 暂时认为是论元角色和具体论元短语的映射 + # 还没有发现doc的作用 + doc = None + # 通过test_rams.sh文件的设置 可以发现args.head_only的值为true + # print('aa', args.head_only, args.coref) + if args.head_only: + # 从原始文本中取出标记 + doc = nlp(' '.join(context_words)) + # 其中arg_name是论元角色类型 + for argname in predicted_args: + # 通过find_arg_span函数找出 + arg_span = find_arg_span(predicted_args[argname], context_words, + trigger_start, trigger_end, head_only=args.head_only, doc=doc) + #print(arg_span) + if arg_span:# if None means hullucination + + if args.head_only and args.coref: + # consider coreferential mentions as matching + assert('corefs' in ex) + print('aaa') + gold_spans = [a[1] for a in ex['ref_evt_links'] if a[2]==argname] + arg_span = check_coref(ex, list(arg_span), gold_spans) + + ex['gold_evt_links'].append([[trigger_start, trigger_end], list(arg_span), argname]) + + writer.write(json.dumps(ex)+'\n') + + writer.close() + + + diff --git a/test_rams.sh b/test_rams.sh index a843bb9f6db34bf9c027299b4964aa0e2c0ed5d5..aa04ff42cfd2c8f205654d36f04e195c3532cb99 100755 --- a/test_rams.sh +++ b/test_rams.sh @@ -5,7 +5,7 @@ CKPT_NAME=gen-RAMS MODEL=gen python train.py --model=$MODEL --ckpt_name=$CKPT_NAME-pred \ - --load_ckpt=checkpoints/$CKPT_NAME/epoch=2-v0.ckpt \ + --load_ckpt=checkpoints/$CKPT_NAME-what-head/epoch=2-v0.ckpt \ --dataset=RAMS \ --eval_only \ --train_file=data/RAMS_1.0/data/train.jsonlines \ @@ -19,15 +19,15 @@ python train.py --model=$MODEL --ckpt_name=$CKPT_NAME-pred \ #span eval -#python src/genie/convert_gen_to_output.py --gen-file=checkpoints/$CKPT_NAME-pred/predictions.jsonl \ +#python src/genie/convert_gen_to_output5.py --gen-file=checkpoints/$CKPT_NAME-pred/predictions.jsonl \ #--output-file=checkpoints/$CKPT_NAME-pred/span_output.jsonl #python data/RAMS_1.0/scorer/scorer.py -g=data/RAMS_1.0/data/test.jsonlines -p=checkpoints/$CKPT_NAME-pred/span_output.jsonl \ #--reuse_gold_format --do_all > checkpoints/$CKPT_NAME-pred/span_metrics.txt # head eval -python src/genie/convert_gen_to_output1.py --gen-file=checkpoints/$CKPT_NAME-pred/predictions.jsonl \ ---output-file=checkpoints/$CKPT_NAME-pred/output.jsonl --head-only +python src/genie/convert_gen_to_output5.py --gen-file=checkpoints/$CKPT_NAME-pred/predictions.jsonl \ +--output-file=checkpoints/$CKPT_NAME-pred/output.jsonl --head-only python data/RAMS_1.0/scorer/scorer.py -g=data/RAMS_1.0/data/test_head.jsonlines -p=checkpoints/$CKPT_NAME-pred/output.jsonl \ --reuse_gold_format --do_all > checkpoints/$CKPT_NAME-pred/head_metrics.txt diff --git a/train.py b/train.py index dc8d00c033e20c00ab7310db1b286795dd037907..0d57072a28f27af4a51a6630568d8b1cb1791f37 100644 --- a/train.py +++ b/train.py @@ -14,7 +14,7 @@ from pytorch_lightning.utilities.seed import seed_everything -from src.genie.data_module1 import RAMSDataModule +from src.genie.data_module_w import RAMSDataModule from src.genie.ACE_data_module import ACEDataModule from src.genie.KAIROS_data_module import KAIROSDataModule from src.genie.model import GenIEModel diff --git a/train_rams.sh b/train_rams.sh index 64fba8a8a437adc6de879d6ee4a6275ebf77a9d2..a7fa4c9fcf78b682ef23911642f5689372c6a6b0 100755 --- a/train_rams.sh +++ b/train_rams.sh @@ -2,7 +2,7 @@ set -e set -x -python train.py --model=gen --ckpt_name='gen-RAMS-head' \ +python train.py --model=gen --ckpt_name='gen-RAMS-what-head' \ --dataset=RAMS \ --train_file=data/RAMS_1.0/data/train.jsonlines \ --val_file=data/RAMS_1.0/data/dev.jsonlines \