Spaces:

hallucinations-leaderboard
/

leaderboard

Running on CPU Upgrade

pminervini commited on Dec 7, 2023

Commit

b1a5839

•

1 Parent(s): 4a18d8a

update

Files changed (2) hide show

src/backend/envs.py CHANGED Viewed

@@ -25,9 +25,9 @@ class Tasks(Enum):
     task2 = Task("truthfulqa_mc1", "mc1", "TruthfulQA MC1", 0)
     task3 = Task("truthfulqa_mc2", "mc2", "TruthfulQA MC2", 0)  # TruthfulQA is intended as a zero-shot benchmark [5, 47]. https://owainevans.github.io/pdfs/truthfulQA_lin_evans.pdf
 # NUM_FEWSHOT = 64  # Change with your few shot
 EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
 EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")

     task2 = Task("truthfulqa_mc1", "mc1", "TruthfulQA MC1", 0)
     task3 = Task("truthfulqa_mc2", "mc2", "TruthfulQA MC2", 0)  # TruthfulQA is intended as a zero-shot benchmark [5, 47]. https://owainevans.github.io/pdfs/truthfulQA_lin_evans.pdf
 # NUM_FEWSHOT = 64  # Change with your few shot
 EVAL_REQUESTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-queue-bk")
 EVAL_RESULTS_PATH_BACKEND = os.path.join(CACHE_PATH, "eval-results-bk")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -201,9 +201,6 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
         except dateutil.parser._parser.ParserError:
             files = [files[-1]]
-        # if '125m' in root:
-        #     breakpoint()
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))

         except dateutil.parser._parser.ParserError:
             files = [files[-1]]
         for file in files:
             model_result_filepaths.append(os.path.join(root, file))