gardarjuto commited on
Commit
f1e046c
1 Parent(s): 8446c23

return scores per question and not summary

Browse files
Files changed (1) hide show
  1. score.py +5 -4
score.py CHANGED
@@ -62,7 +62,7 @@ client = OpenAI(
62
  )
63
 
64
 
65
- def calculate_gpt4o_score(queries, user_answers, correct_answers):
66
  """
67
  Calculate the score for the Icelandic Wiki QA benchmark.
68
  """
@@ -87,6 +87,7 @@ def calculate_gpt4o_score(queries, user_answers, correct_answers):
87
  score = score_filter.search(chat).group(1).lower()
88
  scores.append(score)
89
 
90
- return sum(
91
- 1 if score == "excellent" else 0.5 if score == "fair" else 0 for score in scores
92
- ) / len(scores)
 
 
62
  )
63
 
64
 
65
+ def calculate_gpt4o_scores(queries, user_answers, correct_answers):
66
  """
67
  Calculate the score for the Icelandic Wiki QA benchmark.
68
  """
 
87
  score = score_filter.search(chat).group(1).lower()
88
  scores.append(score)
89
 
90
+ scores_numeric = [
91
+ 1.0 if score == "excellent" else 0.5 if score == "fair" else 0.0 for score in scores
92
+ ]
93
+ return scores_numeric