Spaces:
Paused
Paused
import re | |
import string | |
from lighteval.tasks.lighteval_task import LightevalTaskConfig | |
from lighteval.metrics import Metrics, MetricCategory | |
from lighteval.metrics.utils import CorpusLevelMetric, MetricUseCase | |
from aenum import extend_enum | |
import numpy as np | |
from lighteval.tasks.requests import Doc | |
from Levenshtein import distance | |
import collections | |
from lighteval.utils import as_list | |
import sacrebleu | |
from ..envs import OWNER | |
def trans_prompt_fn(line, task_name: str = None): | |
"""Defines how to go from a dataset line to a doc object. | |
Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info | |
about what this function should do in the README. | |
""" | |
return Doc( | |
task_name=task_name, | |
query=line["prompt"].strip(), | |
choices=[line["response"][0].strip()], | |
gold_index=[0], | |
instruction="", | |
) | |
def translation_eval_fn(golds: list[str], predictions: list[str], formatted_doc: Doc = None): | |
if len(predictions) > 1: | |
raise ValueError("Predictions should have one item") | |
return float(sacrebleu.sentence_bleu(hypothesis=predictions[0], references=golds).score / 100) | |
sentence_bleu = CorpusLevelMetric( | |
metric="sentence_bleu", | |
sample_level_fn=translation_eval_fn, | |
category=MetricCategory.GENERATIVE, | |
use_case=MetricUseCase.TRANSLATION, | |
corpus_level_fn=np.mean, | |
higher_is_better=True, | |
) | |
extend_enum(Metrics, 'sentence_bleu', sentence_bleu) | |
# This is how you create a simple tasks (like hellaswag) which has one single subset | |
# attached to it, and one evaluation possible. | |
translation_task = LightevalTaskConfig( | |
name="he-en-trans-bleu", | |
prompt_function="trans_prompt_fn", # must be defined in the file or imported from src/lighteval/tasks/tasks_prompt_formatting.py | |
suite=["custom"], | |
hf_repo=f"{OWNER}/tests", | |
hf_subset="default", | |
hf_avail_splits=["en2he", "he2en"], | |
evaluation_splits=["en2he", "he2en"], | |
metric=['sentence_bleu'], | |
stop_sequence=['\n'], | |
generation_size=256 | |
) |