docred / docred.py
bowdbeg's picture
debugged
46417fa
raw
history blame
8.94 kB
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TODO: Add a description here."""
import os
import datasets
import evaluate
# TODO: Add BibTeX citation
_CITATION = """\
@InProceedings{huggingface:module,
title = {A great new module},
authors={huggingface, Inc.},
year={2020}
}
"""
# TODO: Add description of the module here
_DESCRIPTION = """\
This new module is designed to solve this great ML task and is crafted with a lot of care.
"""
# TODO: Add description of the arguments of the module here
_KWARGS_DESCRIPTION = """
Calculates how good are predictions given some references, using certain scores
Args:
predictions: list of predictions to score. Each predictions
should be a string with tokens separated by spaces.
references: list of reference for each prediction. Each
reference should be a string with tokens separated by spaces.
Returns:
accuracy: description of the first score,
another_score: description of the second score,
Examples:
Examples should be written in doctest format, and should illustrate how
to use the function.
>>> my_new_module = evaluate.load("my_new_module")
>>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
>>> print(results)
{'accuracy': 1.0}
"""
# TODO: Define external resources urls if needed
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class docred(evaluate.Metric):
"""TODO: Short description of my evaluation module."""
dataset_feat = {
"title": datasets.Value("string"),
"sents": datasets.Sequence(datasets.Sequence(datasets.Value("string"))),
"vertexSet": datasets.Sequence(
datasets.Sequence(
{
"name": datasets.Value("string"),
"sent_id": datasets.Value("int32"),
"pos": datasets.Sequence(datasets.Value("int32"), length=2),
"type": datasets.Value("string"),
}
)
),
"labels": {
"head": datasets.Sequence(datasets.Value("int32")),
"tail": datasets.Sequence(datasets.Value("int32")),
"relation_id": datasets.Sequence(datasets.Value("string")),
"relation_text": datasets.Sequence(datasets.Value("string")),
"evidence": datasets.Sequence(datasets.Sequence(datasets.Value("int32"))),
},
}
eps = 1e-12
def _info(self):
# TODO: Specifies the evaluate.EvaluationModuleInfo object
return evaluate.MetricInfo(
# This is the description that will appear on the modules page.
module_type="metric",
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
# This defines the format of each prediction and reference
features=datasets.Features({"predictions": self.dataset_feat, "references": self.dataset_feat}),
# Homepage of the module for documentation
homepage="http://module.homepage",
# Additional links to the codebase or references
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
reference_urls=["http://path.to.reference.url/new_module"],
)
def _download_and_prepare(self, dl_manager):
"""Optional: download external resources useful to compute the scores"""
# TODO: Download external resources if needed
pass
def _generate_fact(self, dataset):
if dataset is None:
return set()
facts = set()
for data in dataset:
vertexSet = data["vertexSet"]
labels = self._convert_labels_to_list(data["labels"])
for label in labels:
rel = label["relation_id"]
for n1 in vertexSet[label["head"]]:
for n2 in vertexSet[label["tail"]]:
facts.add((n1["name"], n2["name"], rel))
return facts
def _convert_to_relation_set(self, data):
relation_set = set()
for d in data:
labels = d["labels"]
labels = self._convert_labels_to_list(labels)
for label in labels:
relation_set.add((d["title"], label["head"], label["tail"], label["relation_id"]))
return relation_set
def _convert_labels_to_list(self, labels):
keys = list(labels.keys())
labels = [{key: labels[key][i] for key in keys} for i in range(len(labels[keys[0]]))]
return labels
def _compute(self, predictions, references, train_data=None):
"""Returns the scores"""
fact_in_train_annotated = self._generate_fact(train_data)
std = {}
tot_evidences = 0
ref_titleset = set([])
title2vectexSet = {}
for x in references:
title = x["title"]
ref_titleset.add(title)
vertexSet = x["vertexSet"]
title2vectexSet[title] = vertexSet
labels = self._convert_labels_to_list(x["labels"])
for label in labels:
r = label["relation_id"]
h_idx = label["head"]
t_idx = label["tail"]
std[(title, r, h_idx, t_idx)] = set(label["evidence"])
tot_evidences += len(label["evidence"])
tot_relations = len(std)
pred_rel = self._convert_to_relation_set(predictions)
submission_answer = sorted(pred_rel, key=lambda x: (x[0], x[1], x[2], x[3]))
correct_re = 0
correct_evidence = 0
pred_evi = 0
correct_in_train_annotated = 0
titleset2 = set([])
for x in submission_answer:
title, h_idx, t_idx, r = x
titleset2.add(title)
if title not in title2vectexSet:
continue
vertexSet = title2vectexSet[title]
if "evidence" in x:
evi = set(x["evidence"])
else:
evi = set([])
pred_evi += len(evi)
if (title, r, h_idx, t_idx) in std:
correct_re += 1
stdevi = std[(title, r, h_idx, t_idx)]
correct_evidence += len(stdevi & evi)
in_train_annotated = in_train_distant = False
for n1 in vertexSet[h_idx]["name"]:
for n2 in vertexSet[t_idx]["name"]:
if (n1, n2, r) in fact_in_train_annotated:
in_train_annotated = True
if in_train_annotated:
correct_in_train_annotated += 1
# if in_train_distant:
# correct_in_train_distant += 1
re_p = 1.0 * correct_re / (len(submission_answer) + self.eps)
re_r = 1.0 * correct_re / (tot_relations + self.eps)
if re_p + re_r == 0:
re_f1 = 0
else:
re_f1 = 2.0 * re_p * re_r / (re_p + re_r)
evi_p = 1.0 * correct_evidence / pred_evi if pred_evi > 0 else 0
evi_r = 1.0 * correct_evidence / (tot_evidences + self.eps)
if evi_p + evi_r == 0:
evi_f1 = 0
else:
evi_f1 = 2.0 * evi_p * evi_r / (evi_p + evi_r)
re_p_ignore_train_annotated = (
1.0
* (correct_re - correct_in_train_annotated)
/ (len(submission_answer) - correct_in_train_annotated + self.eps)
)
# re_p_ignore_train = (
# 1.0 * (correct_re - correct_in_train_distant) / (len(submission_answer) - correct_in_train_distant + self.eps)
# )
if re_p_ignore_train_annotated + re_r == 0:
re_f1_ignore_train_annotated = 0
else:
re_f1_ignore_train_annotated = (
2.0 * re_p_ignore_train_annotated * re_r / (re_p_ignore_train_annotated + re_r)
)
# if re_p_ignore_train + re_r == 0:
# re_f1_ignore_train = 0
# else:
# re_f1_ignore_train = 2.0 * re_p_ignore_train * re_r / (re_p_ignore_train + re_r)
# return re_f1, evi_f1, re_f1_ignore_train_annotated, re_f1_ignore_train, re_p, re_r
return {"f1": re_f1, "precision": re_p, "recall": re_r, "ign_f1": re_f1_ignore_train_annotated}