File size: 1,952 Bytes
64ae580 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
{
"results": {
"arc_challenge": {
"acc,none": 0.5614334470989761,
"acc_stderr,none": 0.014500682618212865,
"acc_norm,none": 0.613481228668942,
"acc_norm_stderr,none": 0.014230084761910473,
"alias": "arc_challenge"
}
},
"configs": {
"arc_challenge": {
"task": "arc_challenge",
"group": [
"ai2_arc"
],
"dataset_path": "/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/eval/ai2_arc",
"dataset_name": "ARC-Challenge",
"training_split": "train",
"validation_split": "validation",
"test_split": "test",
"doc_to_text": "Question: {{question}}\nAnswer:",
"doc_to_target": "{{choices.label.index(answerKey)}}",
"doc_to_choice": "{{choices.text}}",
"description": "",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"num_fewshot": 25,
"metric_list": [
{
"metric": "acc",
"aggregation": "mean",
"higher_is_better": true
},
{
"metric": "acc_norm",
"aggregation": "mean",
"higher_is_better": true
}
],
"output_type": "multiple_choice",
"repeats": 1,
"should_decontaminate": true,
"doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
"metadata": {
"version": 1.0
}
}
},
"versions": {
"arc_challenge": 1.0
},
"n-shot": {
"arc_challenge": 25
},
"config": {
"model": "vllm",
"model_args": "pretrained=/lustre07/scratch/gagan30/arocr/meta-llama/self_rewarding_models/Oasis,tensor_parallel_size=1,dtype=auto,gpu_memory_utilization=0.9,data_parallel_size=1,max_model_len=4096",
"batch_size": "auto:128",
"batch_sizes": [],
"device": "cuda",
"use_cache": "/lustre07/scratch/gagan30/arocr/cache/",
"limit": null,
"bootstrap_iters": 100000,
"gen_kwargs": null
},
"git_hash": null
} |