llm_harness_mistral_arc / llm_harness_mistral_arc.py
alvations's picture
Add my new, shiny module.
e6595d5
raw
history blame
1.93 kB
import evaluate
import datasets
import lm_eval
# TODO: Add BibTeX citation
_CITATION = """
"""
# TODO: Add description of the module here
_DESCRIPTION = """
"""
# TODO: Add description of the arguments of the module here
_KWARGS_DESCRIPTION = """
"""
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class llm_harness_mistral_arc(evaluate.Metric):
def _info(self):
# TODO: Specifies the evaluate.EvaluationModuleInfo object
return evaluate.MetricInfo(
# This is the description that will appear on the modules page.
module_type="metric",
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
# This defines the format of each prediction and reference
features=[
datasets.Features(
{
"pretrained": datasets.Value("string", id="sequence"),
"tasks": datasets.Sequence(datasets.Value("string", id="sequence"), id="references"),
}
)
],
# Homepage of the module for documentation
homepage="http://module.homepage",
# Additional links to the codebase or references
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
reference_urls=["http://path.to.reference.url/new_module"]
)
def _compute(self, pretrained, tasks):
outputs = lm_eval.simple_evaluate(
model="hf",
model_args={"pretrained":pretrained},
tasks=tasks,
num_fewshot=0,
)
results = {}
for task in outputs['results']:
results[task] = {'acc':outputs['results'][task]['acc,none'],
'acc_norm':outputs['results'][task]['acc_norm,none']}
return results