# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """TODO: Add a description here.""" import os import datasets import evaluate # TODO: Add BibTeX citation _CITATION = """\ @InProceedings{huggingface:module, title = {A great new module}, authors={huggingface, Inc.}, year={2020} } """ # TODO: Add description of the module here _DESCRIPTION = """\ This new module is designed to solve this great ML task and is crafted with a lot of care. """ # TODO: Add description of the arguments of the module here _KWARGS_DESCRIPTION = """ Calculates how good are predictions given some references, using certain scores Args: predictions: list of predictions to score. Each predictions should be a string with tokens separated by spaces. references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces. Returns: accuracy: description of the first score, another_score: description of the second score, Examples: Examples should be written in doctest format, and should illustrate how to use the function. >>> my_new_module = evaluate.load("my_new_module") >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1]) >>> print(results) {'accuracy': 1.0} """ # TODO: Define external resources urls if needed BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt" @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) class docred(evaluate.Metric): """TODO: Short description of my evaluation module.""" dataset_feat = { "title": datasets.Value("string"), # "sents": datasets.Sequence(datasets.Sequence(datasets.Value("string"))), "vertexSet": datasets.Sequence( datasets.Sequence( { "name": datasets.Value("string"), "sent_id": datasets.Value("int32"), "pos": datasets.Sequence(datasets.Value("int32"), length=2), "type": datasets.Value("string"), } ) ), "labels": { "head": datasets.Sequence(datasets.Value("int32")), "tail": datasets.Sequence(datasets.Value("int32")), "relation_id": datasets.Sequence(datasets.Value("string")), "evidence": datasets.Sequence(datasets.Sequence(datasets.Value("int32"))), }, } eps = 1e-12 def _info(self): # TODO: Specifies the evaluate.EvaluationModuleInfo object return evaluate.MetricInfo( # This is the description that will appear on the modules page. module_type="metric", description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, # This defines the format of each prediction and reference features=datasets.Features({"predictions": self.dataset_feat, "references": self.dataset_feat}), # Homepage of the module for documentation homepage="http://module.homepage", # Additional links to the codebase or references codebase_urls=["http://github.com/path/to/codebase/of/new_module"], reference_urls=["http://path.to.reference.url/new_module"], ) def _download_and_prepare(self, dl_manager): """Optional: download external resources useful to compute the scores""" # TODO: Download external resources if needed pass def _generate_fact(self, dataset): if dataset is None: return set() facts = set() for data in dataset: vertexSet = data["vertexSet"] labels = self._convert_labels_to_list(data["labels"]) for label in labels: rel = label["relation_id"] for n1 in vertexSet[label["head"]]: for n2 in vertexSet[label["tail"]]: facts.add((n1["name"], n2["name"], rel)) return facts def _convert_to_relation_set(self, data): relation_set = set() for d in data: labels = d["labels"] labels = self._convert_labels_to_list(labels) for label in labels: relation_set.add((d["title"], label["head"], label["tail"], label["relation_id"])) return relation_set def _convert_labels_to_list(self, labels): keys = list(labels.keys()) labels = [{key: labels[key][i] for key in keys} for i in range(len(labels[keys[0]]))] return labels def _compute(self, predictions, references, train_data=None): """Returns the scores""" fact_in_train_annotated = self._generate_fact(train_data) std = {} tot_evidences = 0 ref_titleset = set([]) title2vectexSet = {} for x in references: title = x["title"] ref_titleset.add(title) vertexSet = x["vertexSet"] title2vectexSet[title] = vertexSet labels = self._convert_labels_to_list(x["labels"]) for label in labels: r = label["relation_id"] h_idx = label["head"] t_idx = label["tail"] std[(title, r, h_idx, t_idx)] = set(label["evidence"]) tot_evidences += len(label["evidence"]) tot_relations = len(std) pred_rel = self._convert_to_relation_set(predictions) submission_answer = sorted(pred_rel, key=lambda x: (x[0], x[1], x[2], x[3])) correct_re = 0 correct_evidence = 0 pred_evi = 0 correct_in_train_annotated = 0 titleset2 = set([]) for x in submission_answer: title, h_idx, t_idx, r = x titleset2.add(title) if title not in title2vectexSet: continue vertexSet = title2vectexSet[title] if "evidence" in x: evi = set(x["evidence"]) else: evi = set([]) pred_evi += len(evi) if (title, r, h_idx, t_idx) in std: correct_re += 1 stdevi = std[(title, r, h_idx, t_idx)] correct_evidence += len(stdevi & evi) in_train_annotated = in_train_distant = False for n1 in vertexSet[h_idx]["name"]: for n2 in vertexSet[t_idx]["name"]: if (n1, n2, r) in fact_in_train_annotated: in_train_annotated = True if in_train_annotated: correct_in_train_annotated += 1 # if in_train_distant: # correct_in_train_distant += 1 re_p = 1.0 * correct_re / (len(submission_answer) + self.eps) re_r = 1.0 * correct_re / (tot_relations + self.eps) if re_p + re_r == 0: re_f1 = 0 else: re_f1 = 2.0 * re_p * re_r / (re_p + re_r) evi_p = 1.0 * correct_evidence / pred_evi if pred_evi > 0 else 0 evi_r = 1.0 * correct_evidence / (tot_evidences + self.eps) if evi_p + evi_r == 0: evi_f1 = 0 else: evi_f1 = 2.0 * evi_p * evi_r / (evi_p + evi_r) re_p_ignore_train_annotated = ( 1.0 * (correct_re - correct_in_train_annotated) / (len(submission_answer) - correct_in_train_annotated + self.eps) ) # re_p_ignore_train = ( # 1.0 * (correct_re - correct_in_train_distant) / (len(submission_answer) - correct_in_train_distant + self.eps) # ) if re_p_ignore_train_annotated + re_r == 0: re_f1_ignore_train_annotated = 0 else: re_f1_ignore_train_annotated = ( 2.0 * re_p_ignore_train_annotated * re_r / (re_p_ignore_train_annotated + re_r) ) # if re_p_ignore_train + re_r == 0: # re_f1_ignore_train = 0 # else: # re_f1_ignore_train = 2.0 * re_p_ignore_train * re_r / (re_p_ignore_train + re_r) # return re_f1, evi_f1, re_f1_ignore_train_annotated, re_f1_ignore_train, re_p, re_r return {"f1": re_f1, "precision": re_p, "recall": re_r, "ign_f1": re_f1_ignore_train_annotated}