aaditya commited on
Commit
c65fc48
1 Parent(s): 20d5de3

Update src/backend/run_eval_suite.py

Browse files
Files changed (1) hide show
  1. src/backend/run_eval_suite.py +3 -5
src/backend/run_eval_suite.py CHANGED
@@ -15,16 +15,14 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
15
  print(
16
  "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
17
  )
18
-
19
- task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
20
 
21
  print(f"Selected Tasks: {task_names}")
22
-
23
  results = evaluator.simple_evaluate(
24
  model="hf-causal-experimental", # "hf-causal"
25
  model_args=eval_request.get_model_args(),
26
  tasks=task_names,
27
- num_fewshot=num_fewshot,
28
  batch_size=batch_size,
29
  device=device,
30
  no_cache=no_cache,
@@ -54,4 +52,4 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
54
  repo_type="dataset",
55
  )
56
 
57
- return results
 
15
  print(
16
  "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
17
  )
18
+ task_names = ["medmcqa", "medqa_4options", "mmlu_anatomy", "mmlu_clinical_knowledge", "mmlu_college_biology", "mmlu_college_medicine", "mmlu_medical_genetics", "mmlu_professional_medicine", "pubmedqa"]
 
19
 
20
  print(f"Selected Tasks: {task_names}")
 
21
  results = evaluator.simple_evaluate(
22
  model="hf-causal-experimental", # "hf-causal"
23
  model_args=eval_request.get_model_args(),
24
  tasks=task_names,
25
+ # num_fewshot=num_fewshot,
26
  batch_size=batch_size,
27
  device=device,
28
  no_cache=no_cache,
 
52
  repo_type="dataset",
53
  )
54
 
55
+ return results