geekyrakshit's picture
add: files
39b7b6a verified
raw
history blame
981 Bytes
import weave
from medrag_multi_modal.assistant.schema import MedQAResponse
from medrag_multi_modal.metrics.base import BaseAccuracyMetric
class MMLUOptionAccuracy(BaseAccuracyMetric):
"""
MMLUOptionAccuracy is a metric class that inherits from `BaseAccuracyMetric`.
This class is designed to evaluate the accuracy of a multiple-choice question
response by comparing the provided answer with the correct answer from the
given options. It uses the MedQAResponse schema to extract the response
and checks if it matches the correct answer.
Methods:
--------
score(output: MedQAResponse, options: list[str], answer: str) -> dict:
Compares the provided answer with the correct answer and returns a
dictionary indicating whether the answer is correct.
"""
@weave.op()
def score(self, output: MedQAResponse, options: list[str], answer: str):
return {"correct": options[answer] == output.response.answer}