Spaces:
Running
Running
import weave | |
from medrag_multi_modal.assistant.schema import MedQAResponse | |
from medrag_multi_modal.metrics.base import BaseAccuracyMetric | |
class MMLUOptionAccuracy(BaseAccuracyMetric): | |
""" | |
MMLUOptionAccuracy is a metric class that inherits from `BaseAccuracyMetric`. | |
This class is designed to evaluate the accuracy of a multiple-choice question | |
response by comparing the provided answer with the correct answer from the | |
given options. It uses the MedQAResponse schema to extract the response | |
and checks if it matches the correct answer. | |
Methods: | |
-------- | |
score(output: MedQAResponse, options: list[str], answer: str) -> dict: | |
Compares the provided answer with the correct answer and returns a | |
dictionary indicating whether the answer is correct. | |
""" | |
def score(self, output: MedQAResponse, options: list[str], answer: str): | |
return {"correct": options[answer] == output.response.answer} | |