pminervini commited on
Commit
8e3d8c1
1 Parent(s): 3be882c
src/backend/run_eval_suite.py CHANGED
@@ -19,6 +19,8 @@ def run_evaluation(eval_request: EvalRequest, task_names, num_fewshot, batch_siz
19
  # include_task_folder("src/backend/tasks/")
20
  # initialize_tasks('INFO')
21
 
 
 
22
  task_manager = TaskManager(include_path="./src/backend/tasks/")
23
  # task_manager.initialize_tasks('INFO')
24
 
 
19
  # include_task_folder("src/backend/tasks/")
20
  # initialize_tasks('INFO')
21
 
22
+ print(f"Allocating task manager for: {task_names}")
23
+
24
  task_manager = TaskManager(include_path="./src/backend/tasks/")
25
  # task_manager.initialize_tasks('INFO')
26
 
src/backend/tasks/faithdial/faithdial.yaml CHANGED
@@ -1,4 +1,3 @@
1
- group: faithdial
2
  task: faithdial_hallu
3
  dataset_path: McGill-NLP/FaithDial
4
  training_split: train
 
 
1
  task: faithdial_hallu
2
  dataset_path: McGill-NLP/FaithDial
3
  training_split: train
src/backend/tasks/faithdial/faithdial_v2.yaml CHANGED
@@ -1,4 +1,3 @@
1
- group: faithdial
2
  task: faithdial_hallu_v2
3
  dataset_path: McGill-NLP/FaithDial
4
  training_split: train
 
 
1
  task: faithdial_hallu_v2
2
  dataset_path: McGill-NLP/FaithDial
3
  training_split: train
src/backend/tasks/fever/fever10.yaml CHANGED
@@ -1,4 +1,3 @@
1
- group: fever
2
  task: fever10
3
  dataset_path: fever
4
  dataset_name: v1.0
 
 
1
  task: fever10
2
  dataset_path: fever
3
  dataset_name: v1.0
src/backend/tasks/fever/fever11.yaml CHANGED
@@ -1,4 +1,3 @@
1
- group: fever
2
  task: fever11
3
  dataset_path: pminervini/hl-fever
4
  dataset_name: v1.0
 
 
1
  task: fever11
2
  dataset_path: pminervini/hl-fever
3
  dataset_name: v1.0
src/backend/tasks/halueval/halueval_dialogue.yaml CHANGED
@@ -1,5 +1,3 @@
1
- group:
2
- - halueval
3
  task: halueval_dialogue
4
  dataset_path: pminervini/HaluEval
5
  dataset_name: dialogue_samples
 
 
 
1
  task: halueval_dialogue
2
  dataset_path: pminervini/HaluEval
3
  dataset_name: dialogue_samples
src/backend/tasks/halueval/halueval_qa.yaml CHANGED
@@ -1,5 +1,3 @@
1
- group:
2
- - halueval
3
  task: halueval_qa
4
  dataset_path: pminervini/HaluEval
5
  dataset_name: qa_samples
 
 
 
1
  task: halueval_qa
2
  dataset_path: pminervini/HaluEval
3
  dataset_name: qa_samples
src/backend/tasks/halueval/halueval_summarization.yaml CHANGED
@@ -1,5 +1,3 @@
1
- group:
2
- - halueval
3
  task: halueval_summarization
4
  dataset_path: pminervini/HaluEval
5
  dataset_name: summarization_samples
 
 
 
1
  task: halueval_summarization
2
  dataset_path: pminervini/HaluEval
3
  dataset_name: summarization_samples
src/backend/tasks/truefalse/truefalse.yaml CHANGED
@@ -1,4 +1,3 @@
1
- group: truefalse
2
  task: truefalse_cieacf
3
  dataset_path: pminervini/true-false
4
  dataset_name: default
 
 
1
  task: truefalse_cieacf
2
  dataset_path: pminervini/true-false
3
  dataset_name: default
src/backend/tasks/xsum/xsum.yaml DELETED
@@ -1,2 +0,0 @@
1
- task:
2
- - xsum