import evaluate from evaluate.evaluation_suite import SubTask class Suite(evaluate.EvaluationSuite): def __init__(self, name): super().__init__(name) self.metric = evaluate.load("accuracy") self.preprocessor = lambda x: {"text": x["text"].lower()} self.suite = [ { SubTask( task_type="text-classification", data="glue", subset="mnli", split="validation", args_for_task={ "metric": self.metric, "input_column": "premise", "second_input_column": "hypothesis", "label_column": "label", "label_mapping": { "ENTAILMENT": 0, "NEUTRAL": 1, "CONTRADICTION": 2, }, }, ) } ]