SummerTime / evaluation /rouge_metric.py
aliabd
full demo working with old graido
7e3e85d
raw
history blame
707 Bytes
from summ_eval.rouge_metric import RougeMetric
from evaluation.summeval_metric import SummEvalMetric
from typing import List, Dict
class Rouge(SummEvalMetric):
metric_name = "rouge"
range = (0, 1)
higher_is_better = True
requires_heavy_compute = False
def __init__(self):
se_metric = RougeMetric()
super(Rouge, self).__init__(se_metric)
def evaluate(
self,
inputs: List[str],
targets: List[str],
keys: List[str] = ["rouge_1_f_score", "rouge_2_f_score", "rouge_l_f_score"],
) -> Dict[str, float]:
score_dict = self.se_metric.evaluate_batch(inputs, targets)
return {key: score_dict["rouge"][key] for key in keys}