Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
add selfcheck for hulueval-cli test
Browse files- cli/halueval-cli.py +3 -1
cli/halueval-cli.py
CHANGED
@@ -8,6 +8,7 @@ from src.backend.manage_requests import EvalRequest
|
|
8 |
from src.backend.run_eval_suite import run_evaluation
|
9 |
|
10 |
from src.backend.tasks.xsum.task import XSum
|
|
|
11 |
|
12 |
from lm_eval.tasks import initialize_tasks, include_task_folder
|
13 |
from lm_eval import tasks, evaluator, utils
|
@@ -32,8 +33,9 @@ def main():
|
|
32 |
|
33 |
# my_task = Task("memo-trap", "acc", "memo-trap", 0)
|
34 |
my_task = Task("xsum", "rougeLsum", "XSum", 2)
|
|
|
35 |
|
36 |
-
TASKS_HARNESS = [my_task]
|
37 |
# task_names = ['triviaqa']
|
38 |
# TASKS_HARNESS = [task.value for task in Tasks]
|
39 |
|
|
|
8 |
from src.backend.run_eval_suite import run_evaluation
|
9 |
|
10 |
from src.backend.tasks.xsum.task import XSum
|
11 |
+
from src.backend.tasks.selfcheckgpt.task import SelfCheckGpt
|
12 |
|
13 |
from lm_eval.tasks import initialize_tasks, include_task_folder
|
14 |
from lm_eval import tasks, evaluator, utils
|
|
|
33 |
|
34 |
# my_task = Task("memo-trap", "acc", "memo-trap", 0)
|
35 |
my_task = Task("xsum", "rougeLsum", "XSum", 2)
|
36 |
+
my_task_selfcheckgpt = Task("selfcheckgpt", "max-selfcheckgpt", "selfcheckgpt", 0)
|
37 |
|
38 |
+
TASKS_HARNESS = [my_task, my_task_selfcheckgpt]
|
39 |
# task_names = ['triviaqa']
|
40 |
# TASKS_HARNESS = [task.value for task in Tasks]
|
41 |
|