|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""TODO: Add a description here.""" |
|
|
|
import os |
|
|
|
import datasets |
|
import evaluate |
|
|
|
|
|
_CITATION = """\ |
|
@InProceedings{huggingface:module, |
|
title = {A great new module}, |
|
authors={huggingface, Inc.}, |
|
year={2020} |
|
} |
|
""" |
|
|
|
|
|
_DESCRIPTION = """\ |
|
This new module is designed to solve this great ML task and is crafted with a lot of care. |
|
""" |
|
|
|
|
|
|
|
_KWARGS_DESCRIPTION = """ |
|
Calculates how good are predictions given some references, using certain scores |
|
Args: |
|
predictions: list of predictions to score. Each predictions |
|
should be a string with tokens separated by spaces. |
|
references: list of reference for each prediction. Each |
|
reference should be a string with tokens separated by spaces. |
|
Returns: |
|
accuracy: description of the first score, |
|
another_score: description of the second score, |
|
Examples: |
|
Examples should be written in doctest format, and should illustrate how |
|
to use the function. |
|
|
|
>>> my_new_module = evaluate.load("my_new_module") |
|
>>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1]) |
|
>>> print(results) |
|
{'accuracy': 1.0} |
|
""" |
|
|
|
|
|
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt" |
|
|
|
|
|
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
|
class docred(evaluate.Metric): |
|
"""TODO: Short description of my evaluation module.""" |
|
|
|
dataset_feat = { |
|
"title": datasets.Value("string"), |
|
"sents": datasets.Sequence(datasets.Sequence(datasets.Value("string"))), |
|
"vertexSet": datasets.Sequence( |
|
datasets.Sequence( |
|
{ |
|
"name": datasets.Value("string"), |
|
"sent_id": datasets.Value("int32"), |
|
"pos": datasets.Sequence(datasets.Value("int32"), length=2), |
|
"type": datasets.Value("string"), |
|
} |
|
) |
|
), |
|
"labels": { |
|
"head": datasets.Sequence(datasets.Value("int32")), |
|
"tail": datasets.Sequence(datasets.Value("int32")), |
|
"relation_id": datasets.Sequence(datasets.Value("string")), |
|
"relation_text": datasets.Sequence(datasets.Value("string")), |
|
"evidence": datasets.Sequence(datasets.Sequence(datasets.Value("int32"))), |
|
}, |
|
} |
|
eps = 1e-12 |
|
|
|
def _info(self): |
|
|
|
|
|
return evaluate.MetricInfo( |
|
|
|
module_type="metric", |
|
description=_DESCRIPTION, |
|
citation=_CITATION, |
|
inputs_description=_KWARGS_DESCRIPTION, |
|
|
|
features=datasets.Features({"predictions": self.dataset_feat, "references": self.dataset_feat}), |
|
|
|
homepage="http://module.homepage", |
|
|
|
codebase_urls=["http://github.com/path/to/codebase/of/new_module"], |
|
reference_urls=["http://path.to.reference.url/new_module"], |
|
) |
|
|
|
def _download_and_prepare(self, dl_manager): |
|
"""Optional: download external resources useful to compute the scores""" |
|
|
|
pass |
|
|
|
def _generate_fact(self, dataset): |
|
if dataset is None: |
|
return set() |
|
facts = set() |
|
for data in dataset: |
|
vertexSet = data["vertexSet"] |
|
labels = self._convert_labels_to_list(data["labels"]) |
|
for label in labels: |
|
rel = label["relation_id"] |
|
for n1 in vertexSet[label["head"]]: |
|
for n2 in vertexSet[label["tail"]]: |
|
facts.add((n1["name"], n2["name"], rel)) |
|
return facts |
|
|
|
def _convert_to_relation_set(self, data): |
|
relation_set = set() |
|
for d in data: |
|
labels = d["labels"] |
|
labels = self._convert_labels_to_list(labels) |
|
for label in labels: |
|
relation_set.add((d["title"], label["head"], label["tail"], label["relation_id"])) |
|
return relation_set |
|
|
|
def _convert_labels_to_list(self, labels): |
|
keys = list(labels.keys()) |
|
labels = [{key: labels[key][i] for key in keys} for i in range(len(labels[keys[0]]))] |
|
return labels |
|
|
|
def _compute(self, predictions, references, train_data=None): |
|
"""Returns the scores""" |
|
|
|
fact_in_train_annotated = self._generate_fact(train_data) |
|
|
|
std = {} |
|
tot_evidences = 0 |
|
ref_titleset = set([]) |
|
|
|
title2vectexSet = {} |
|
|
|
for x in references: |
|
title = x["title"] |
|
ref_titleset.add(title) |
|
|
|
vertexSet = x["vertexSet"] |
|
title2vectexSet[title] = vertexSet |
|
labels = self._convert_labels_to_list(x["labels"]) |
|
for label in labels: |
|
r = label["relation_id"] |
|
h_idx = label["head"] |
|
t_idx = label["tail"] |
|
std[(title, r, h_idx, t_idx)] = set(label["evidence"]) |
|
tot_evidences += len(label["evidence"]) |
|
|
|
tot_relations = len(std) |
|
pred_rel = self._convert_to_relation_set(predictions) |
|
submission_answer = sorted(pred_rel, key=lambda x: (x[0], x[1], x[2], x[3])) |
|
|
|
correct_re = 0 |
|
correct_evidence = 0 |
|
pred_evi = 0 |
|
|
|
correct_in_train_annotated = 0 |
|
titleset2 = set([]) |
|
for x in submission_answer: |
|
title, h_idx, t_idx, r = x |
|
titleset2.add(title) |
|
if title not in title2vectexSet: |
|
continue |
|
vertexSet = title2vectexSet[title] |
|
|
|
if "evidence" in x: |
|
evi = set(x["evidence"]) |
|
else: |
|
evi = set([]) |
|
pred_evi += len(evi) |
|
|
|
if (title, r, h_idx, t_idx) in std: |
|
correct_re += 1 |
|
stdevi = std[(title, r, h_idx, t_idx)] |
|
correct_evidence += len(stdevi & evi) |
|
in_train_annotated = in_train_distant = False |
|
for n1 in vertexSet[h_idx]["name"]: |
|
for n2 in vertexSet[t_idx]["name"]: |
|
if (n1, n2, r) in fact_in_train_annotated: |
|
in_train_annotated = True |
|
|
|
if in_train_annotated: |
|
correct_in_train_annotated += 1 |
|
|
|
|
|
|
|
re_p = 1.0 * correct_re / (len(submission_answer) + self.eps) |
|
re_r = 1.0 * correct_re / (tot_relations + self.eps) |
|
if re_p + re_r == 0: |
|
re_f1 = 0 |
|
else: |
|
re_f1 = 2.0 * re_p * re_r / (re_p + re_r) |
|
|
|
evi_p = 1.0 * correct_evidence / pred_evi if pred_evi > 0 else 0 |
|
evi_r = 1.0 * correct_evidence / (tot_evidences + self.eps) |
|
if evi_p + evi_r == 0: |
|
evi_f1 = 0 |
|
else: |
|
evi_f1 = 2.0 * evi_p * evi_r / (evi_p + evi_r) |
|
|
|
re_p_ignore_train_annotated = ( |
|
1.0 |
|
* (correct_re - correct_in_train_annotated) |
|
/ (len(submission_answer) - correct_in_train_annotated + self.eps) |
|
) |
|
|
|
|
|
|
|
|
|
if re_p_ignore_train_annotated + re_r == 0: |
|
re_f1_ignore_train_annotated = 0 |
|
else: |
|
re_f1_ignore_train_annotated = ( |
|
2.0 * re_p_ignore_train_annotated * re_r / (re_p_ignore_train_annotated + re_r) |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return {"f1": re_f1, "precision": re_p, "recall": re_r, "ign_f1": re_f1_ignore_train_annotated} |
|
|