Ori commited on
Commit
8d8c195
1 Parent(s): be6cd9b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -112,6 +112,7 @@ def add_new_eval(
112
 
113
  with open(f"scored/{organization}_{model_name}.jsonl", "w") as scored_file:
114
  with open(file_path, 'r') as f:
 
115
  for ix, line in enumerate(f):
116
  try:
117
  task = json.loads(line)
@@ -141,12 +142,17 @@ def add_new_eval(
141
  )
142
 
143
  all_scores.append({"score": score, "has_ans": has_ans, "model_answer": answer, 'id': task_id})
144
-
145
  scores += score
146
  num_questions += 1
147
  difficulty_scores[difficulty] += score
148
  difficulty_counts[difficulty] += 1
149
 
 
 
 
 
 
150
  accuracy_easy = difficulty_scores["Easy"] / difficulty_counts["Easy"] if difficulty_counts["Easy"] > 0 else 0
151
  accuracy_medium = difficulty_scores["Medium"] / difficulty_counts["Medium"] if difficulty_counts["Medium"] > 0 else 0
152
  accuracy_hard = difficulty_scores["Hard"] / difficulty_counts["Hard"] if difficulty_counts["Hard"] > 0 else 0
 
112
 
113
  with open(f"scored/{organization}_{model_name}.jsonl", "w") as scored_file:
114
  with open(file_path, 'r') as f:
115
+ submitted_ids = set()
116
  for ix, line in enumerate(f):
117
  try:
118
  task = json.loads(line)
 
142
  )
143
 
144
  all_scores.append({"score": score, "has_ans": has_ans, "model_answer": answer, 'id': task_id})
145
+ submitted_ids.add(task["id"])
146
  scores += score
147
  num_questions += 1
148
  difficulty_scores[difficulty] += score
149
  difficulty_counts[difficulty] += 1
150
 
151
+ # Check if all gold answer IDs are present in the submission
152
+ missing_ids = set(gold_answers["test"].keys()) - submitted_ids
153
+ if missing_ids:
154
+ return format_error(f"Submission is missing the following IDs: {', '.join(missing_ids)}")
155
+
156
  accuracy_easy = difficulty_scores["Easy"] / difficulty_counts["Easy"] if difficulty_counts["Easy"] > 0 else 0
157
  accuracy_medium = difficulty_scores["Medium"] / difficulty_counts["Medium"] if difficulty_counts["Medium"] > 0 else 0
158
  accuracy_hard = difficulty_scores["Hard"] / difficulty_counts["Hard"] if difficulty_counts["Hard"] > 0 else 0