Spaces:
Runtime error
Runtime error
File size: 1,517 Bytes
894c4b4 6c79b12 894c4b4 90dff75 bcdca08 73d1e6e bcdca08 73d1e6e bcdca08 73d1e6e bcdca08 73d1e6e 894c4b4 6c79b12 894c4b4 b1a5839 894c4b4 7e68bad 894c4b4 f9d415e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import os
import torch
from dataclasses import dataclass
from enum import Enum
from src.envs import CACHE_PATH
@dataclass
class Task:
benchmark: str
metric: str
col_name: str
num_fewshot: int
class Tasks(Enum):
# task_key in the json file, metric_key in the json file, name to display in the leaderboard
# task0 = Task("anli_r1", "acc", "ANLI")
# task1 = Task("logiqa", "acc_norm", "LogiQA")
task0 = Task("nq_open", "em", "NQ Open", 64) # 64, as in the ATLAS paper
task1 = Task("triviaqa", "em", "TriviaQA", 64) # 64, as in the ATLAS paper
# TruthfulQA is intended as a zero-shot benchmark [5, 47]. https://owainevans.github.io/pdfs/truthfulQA_lin_evans.pdf
# task2 = Task("truthfulqa_gen", "rougeL_acc", "TruthfulQA Gen", 0)
task3 = Task("truthfulqa_mc1", "acc", "TruthfulQA MC1", 0)
task4 = Task("truthfulqa_mc2", "acc", "TruthfulQA MC2", 0)
task5 = Task("halueval_qa", "acc", "HaluEval QA", 0)
# task6 = Task("halueval_dialogue", "acc", "HaluEval Dialogue", 0)
# task7 = Task("halueval_summarization", "acc", "HaluEval Summarization", 0)
# task6 = Task("xsum", "rougeL_acc", "XSum", 8)
task8 = Task("memo-trap", "acc", "memo-trap", 0)
# NUM_FEWSHOT = 64 # Change with your few shot
EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")
DEVICE = "cuda" if torch.cuda.is_available() else 'cpu'
LIMIT = None # Testing; needs to be None
|