abhinavnmagic
commited on
Commit
•
3b4c6eb
1
Parent(s):
7ed1b70
Upload folder using huggingface_hub
Browse files- arc_challenge/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T07-21-44.990894.json +102 -0
- config.json +94 -0
- generation_config.json +6 -0
- gsm8k/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T16-23-36.584670.json +138 -0
- hellaswag/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T11-27-16.957594.json +100 -0
- mmlu/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T16-13-28.474390.json +3154 -0
- model-00001-of-00029.safetensors +3 -0
- model-00002-of-00029.safetensors +3 -0
- model-00003-of-00029.safetensors +3 -0
- model-00004-of-00029.safetensors +3 -0
- model-00005-of-00029.safetensors +3 -0
- model-00006-of-00029.safetensors +3 -0
- model-00007-of-00029.safetensors +3 -0
- model-00008-of-00029.safetensors +3 -0
- model-00009-of-00029.safetensors +3 -0
- model-00010-of-00029.safetensors +3 -0
- model-00011-of-00029.safetensors +3 -0
- model-00012-of-00029.safetensors +3 -0
- model-00013-of-00029.safetensors +3 -0
- model-00014-of-00029.safetensors +3 -0
- model-00015-of-00029.safetensors +3 -0
- model-00016-of-00029.safetensors +3 -0
- model-00017-of-00029.safetensors +3 -0
- model-00018-of-00029.safetensors +3 -0
- model-00019-of-00029.safetensors +3 -0
- model-00020-of-00029.safetensors +3 -0
- model-00021-of-00029.safetensors +3 -0
- model-00022-of-00029.safetensors +3 -0
- model-00023-of-00029.safetensors +3 -0
- model-00024-of-00029.safetensors +3 -0
- model-00025-of-00029.safetensors +3 -0
- model-00026-of-00029.safetensors +3 -0
- model-00027-of-00029.safetensors +3 -0
- model-00028-of-00029.safetensors +3 -0
- model-00029-of-00029.safetensors +3 -0
- model.safetensors.index.json +0 -0
- special_tokens_map.json +23 -0
- tokenizer.json +0 -0
- tokenizer_config.json +108 -0
- truthfulqa_mc2/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T07-32-59.669961.json +93 -0
- winogrande/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T07-25-09.600505.json +90 -0
arc_challenge/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T07-21-44.990894.json
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"arc_challenge": {
|
4 |
+
"acc,none": 0.6476109215017065,
|
5 |
+
"acc_stderr,none": 0.013960142600598666,
|
6 |
+
"acc_norm,none": 0.6919795221843004,
|
7 |
+
"acc_norm_stderr,none": 0.013491429517292038,
|
8 |
+
"alias": "arc_challenge"
|
9 |
+
}
|
10 |
+
},
|
11 |
+
"group_subtasks": {
|
12 |
+
"arc_challenge": []
|
13 |
+
},
|
14 |
+
"configs": {
|
15 |
+
"arc_challenge": {
|
16 |
+
"task": "arc_challenge",
|
17 |
+
"group": [
|
18 |
+
"ai2_arc"
|
19 |
+
],
|
20 |
+
"dataset_path": "allenai/ai2_arc",
|
21 |
+
"dataset_name": "ARC-Challenge",
|
22 |
+
"training_split": "train",
|
23 |
+
"validation_split": "validation",
|
24 |
+
"test_split": "test",
|
25 |
+
"doc_to_text": "Question: {{question}}\nAnswer:",
|
26 |
+
"doc_to_target": "{{choices.label.index(answerKey)}}",
|
27 |
+
"doc_to_choice": "{{choices.text}}",
|
28 |
+
"description": "",
|
29 |
+
"target_delimiter": " ",
|
30 |
+
"fewshot_delimiter": "\n\n",
|
31 |
+
"num_fewshot": 25,
|
32 |
+
"metric_list": [
|
33 |
+
{
|
34 |
+
"metric": "acc",
|
35 |
+
"aggregation": "mean",
|
36 |
+
"higher_is_better": true
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"metric": "acc_norm",
|
40 |
+
"aggregation": "mean",
|
41 |
+
"higher_is_better": true
|
42 |
+
}
|
43 |
+
],
|
44 |
+
"output_type": "multiple_choice",
|
45 |
+
"repeats": 1,
|
46 |
+
"should_decontaminate": true,
|
47 |
+
"doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
|
48 |
+
"metadata": {
|
49 |
+
"version": 1.0
|
50 |
+
}
|
51 |
+
}
|
52 |
+
},
|
53 |
+
"versions": {
|
54 |
+
"arc_challenge": 1.0
|
55 |
+
},
|
56 |
+
"n-shot": {
|
57 |
+
"arc_challenge": 25
|
58 |
+
},
|
59 |
+
"higher_is_better": {
|
60 |
+
"arc_challenge": {
|
61 |
+
"acc": true,
|
62 |
+
"acc_norm": true
|
63 |
+
}
|
64 |
+
},
|
65 |
+
"n-samples": {
|
66 |
+
"arc_challenge": {
|
67 |
+
"original": 1172,
|
68 |
+
"effective": 1172
|
69 |
+
}
|
70 |
+
},
|
71 |
+
"config": {
|
72 |
+
"model": "vllm",
|
73 |
+
"model_args": "pretrained=/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8,tensor_parallel_size=4,dtype=auto,add_bos_token=True,gpu_memory_utilization=0.8,data_parallel_size=1",
|
74 |
+
"batch_size": "auto",
|
75 |
+
"batch_sizes": [],
|
76 |
+
"device": "cuda",
|
77 |
+
"use_cache": null,
|
78 |
+
"limit": null,
|
79 |
+
"bootstrap_iters": 100000,
|
80 |
+
"gen_kwargs": null,
|
81 |
+
"random_seed": 0,
|
82 |
+
"numpy_seed": 1234,
|
83 |
+
"torch_seed": 1234,
|
84 |
+
"fewshot_seed": 1234
|
85 |
+
},
|
86 |
+
"git_hash": "f2843b2f",
|
87 |
+
"date": 1717743073.8326726,
|
88 |
+
"pretty_env_info": "PyTorch version: 2.3.0+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.4 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 3.29.3\nLibc version: glibc-2.35\n\nPython version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0] (64-bit runtime)\nPython platform: Linux-5.19.0-1010-nvidia-lowlatency-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.5.40\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: \nGPU 0: NVIDIA H100 NVL\nGPU 1: NVIDIA H100 NVL\nGPU 2: NVIDIA H100 NVL\nGPU 3: NVIDIA H100 NVL\nGPU 4: NVIDIA H100 NVL\nGPU 5: NVIDIA H100 NVL\nGPU 6: NVIDIA H100 NVL\nGPU 7: NVIDIA H100 NVL\n\nNvidia driver version: 555.42.02\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 46 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 144\nOn-line CPU(s) list: 0-143\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8452Y\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 36\nSocket(s): 2\nStepping: 8\nFrequency boost: enabled\nCPU max MHz: 2001.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hfi avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 3.4 MiB (72 instances)\nL1i cache: 2.3 MiB (72 instances)\nL2 cache: 144 MiB (72 instances)\nL3 cache: 135 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-35,72-107\nNUMA node1 CPU(s): 36-71,108-143\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.0\n[pip3] triton==2.3.0\n[conda] Could not collect",
|
89 |
+
"transformers_version": "4.41.2",
|
90 |
+
"upper_git_hash": "f2843b2fd64df799179808ce2428b7a8dbc403de",
|
91 |
+
"task_hashes": {},
|
92 |
+
"model_source": "vllm",
|
93 |
+
"model_name": "/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8",
|
94 |
+
"model_name_sanitized": "__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8",
|
95 |
+
"system_instruction": null,
|
96 |
+
"system_instruction_sha": null,
|
97 |
+
"chat_template": null,
|
98 |
+
"chat_template_sha": null,
|
99 |
+
"start_time": 813352.581331609,
|
100 |
+
"end_time": 815189.703240481,
|
101 |
+
"total_evaluation_time_seconds": "1837.1219088719226"
|
102 |
+
}
|
config.json
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "mistralai/Mixtral-8x22B-Instruct-v0.1",
|
3 |
+
"architectures": [
|
4 |
+
"MixtralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 6144,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 16384,
|
13 |
+
"max_position_embeddings": 8192,
|
14 |
+
"model_type": "mixtral",
|
15 |
+
"num_attention_heads": 48,
|
16 |
+
"num_experts_per_tok": 2,
|
17 |
+
"num_hidden_layers": 56,
|
18 |
+
"num_key_value_heads": 8,
|
19 |
+
"num_local_experts": 8,
|
20 |
+
"output_router_logits": false,
|
21 |
+
"quantization_config": {
|
22 |
+
"activation_scheme": "static",
|
23 |
+
"ignored_layers": [
|
24 |
+
"model.layers.9.block_sparse_moe.gate",
|
25 |
+
"model.layers.21.block_sparse_moe.gate",
|
26 |
+
"model.layers.17.block_sparse_moe.gate",
|
27 |
+
"model.layers.34.block_sparse_moe.gate",
|
28 |
+
"lm_head",
|
29 |
+
"model.layers.7.block_sparse_moe.gate",
|
30 |
+
"model.layers.28.block_sparse_moe.gate",
|
31 |
+
"model.layers.40.block_sparse_moe.gate",
|
32 |
+
"model.layers.4.block_sparse_moe.gate",
|
33 |
+
"model.layers.12.block_sparse_moe.gate",
|
34 |
+
"model.layers.15.block_sparse_moe.gate",
|
35 |
+
"model.layers.44.block_sparse_moe.gate",
|
36 |
+
"model.layers.26.block_sparse_moe.gate",
|
37 |
+
"model.layers.38.block_sparse_moe.gate",
|
38 |
+
"model.layers.47.block_sparse_moe.gate",
|
39 |
+
"model.layers.27.block_sparse_moe.gate",
|
40 |
+
"model.layers.6.block_sparse_moe.gate",
|
41 |
+
"model.layers.5.block_sparse_moe.gate",
|
42 |
+
"model.layers.11.block_sparse_moe.gate",
|
43 |
+
"model.layers.10.block_sparse_moe.gate",
|
44 |
+
"model.layers.54.block_sparse_moe.gate",
|
45 |
+
"model.layers.25.block_sparse_moe.gate",
|
46 |
+
"model.layers.1.block_sparse_moe.gate",
|
47 |
+
"model.layers.41.block_sparse_moe.gate",
|
48 |
+
"model.layers.33.block_sparse_moe.gate",
|
49 |
+
"model.layers.45.block_sparse_moe.gate",
|
50 |
+
"model.layers.14.block_sparse_moe.gate",
|
51 |
+
"model.layers.2.block_sparse_moe.gate",
|
52 |
+
"model.layers.52.block_sparse_moe.gate",
|
53 |
+
"model.layers.24.block_sparse_moe.gate",
|
54 |
+
"model.layers.43.block_sparse_moe.gate",
|
55 |
+
"model.layers.48.block_sparse_moe.gate",
|
56 |
+
"model.layers.29.block_sparse_moe.gate",
|
57 |
+
"model.layers.35.block_sparse_moe.gate",
|
58 |
+
"model.layers.18.block_sparse_moe.gate",
|
59 |
+
"model.layers.50.block_sparse_moe.gate",
|
60 |
+
"model.layers.0.block_sparse_moe.gate",
|
61 |
+
"model.layers.8.block_sparse_moe.gate",
|
62 |
+
"model.layers.23.block_sparse_moe.gate",
|
63 |
+
"model.layers.49.block_sparse_moe.gate",
|
64 |
+
"model.layers.42.block_sparse_moe.gate",
|
65 |
+
"model.layers.22.block_sparse_moe.gate",
|
66 |
+
"model.layers.39.block_sparse_moe.gate",
|
67 |
+
"model.layers.51.block_sparse_moe.gate",
|
68 |
+
"model.layers.31.block_sparse_moe.gate",
|
69 |
+
"model.layers.36.block_sparse_moe.gate",
|
70 |
+
"model.layers.32.block_sparse_moe.gate",
|
71 |
+
"model.layers.37.block_sparse_moe.gate",
|
72 |
+
"model.layers.16.block_sparse_moe.gate",
|
73 |
+
"model.layers.46.block_sparse_moe.gate",
|
74 |
+
"model.layers.53.block_sparse_moe.gate",
|
75 |
+
"model.layers.19.block_sparse_moe.gate",
|
76 |
+
"model.layers.3.block_sparse_moe.gate",
|
77 |
+
"model.layers.30.block_sparse_moe.gate",
|
78 |
+
"model.layers.55.block_sparse_moe.gate",
|
79 |
+
"model.layers.20.block_sparse_moe.gate",
|
80 |
+
"model.layers.13.block_sparse_moe.gate"
|
81 |
+
],
|
82 |
+
"quant_method": "fp8"
|
83 |
+
},
|
84 |
+
"rms_norm_eps": 1e-05,
|
85 |
+
"rope_theta": 1000000.0,
|
86 |
+
"router_aux_loss_coef": 0.001,
|
87 |
+
"router_jitter_noise": 0.0,
|
88 |
+
"sliding_window": null,
|
89 |
+
"tie_word_embeddings": false,
|
90 |
+
"torch_dtype": "bfloat16",
|
91 |
+
"transformers_version": "4.41.2",
|
92 |
+
"use_cache": true,
|
93 |
+
"vocab_size": 32768
|
94 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"transformers_version": "4.41.2"
|
6 |
+
}
|
gsm8k/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T16-23-36.584670.json
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"gsm8k": {
|
4 |
+
"exact_match,strict-match": 0.7657316148597423,
|
5 |
+
"exact_match_stderr,strict-match": 0.01166641512763105,
|
6 |
+
"exact_match,flexible-extract": 0.7740712661106899,
|
7 |
+
"exact_match_stderr,flexible-extract": 0.01151909877727995,
|
8 |
+
"alias": "gsm8k"
|
9 |
+
}
|
10 |
+
},
|
11 |
+
"group_subtasks": {
|
12 |
+
"gsm8k": []
|
13 |
+
},
|
14 |
+
"configs": {
|
15 |
+
"gsm8k": {
|
16 |
+
"task": "gsm8k",
|
17 |
+
"group": [
|
18 |
+
"math_word_problems"
|
19 |
+
],
|
20 |
+
"dataset_path": "gsm8k",
|
21 |
+
"dataset_name": "main",
|
22 |
+
"training_split": "train",
|
23 |
+
"test_split": "test",
|
24 |
+
"fewshot_split": "train",
|
25 |
+
"doc_to_text": "Question: {{question}}\nAnswer:",
|
26 |
+
"doc_to_target": "{{answer}}",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 5,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "exact_match",
|
34 |
+
"aggregation": "mean",
|
35 |
+
"higher_is_better": true,
|
36 |
+
"ignore_case": true,
|
37 |
+
"ignore_punctuation": false,
|
38 |
+
"regexes_to_ignore": [
|
39 |
+
",",
|
40 |
+
"\\$",
|
41 |
+
"(?s).*#### ",
|
42 |
+
"\\.$"
|
43 |
+
]
|
44 |
+
}
|
45 |
+
],
|
46 |
+
"output_type": "generate_until",
|
47 |
+
"generation_kwargs": {
|
48 |
+
"until": [
|
49 |
+
"Question:",
|
50 |
+
"</s>",
|
51 |
+
"<|im_end|>"
|
52 |
+
],
|
53 |
+
"do_sample": false,
|
54 |
+
"temperature": 0.0
|
55 |
+
},
|
56 |
+
"repeats": 1,
|
57 |
+
"filter_list": [
|
58 |
+
{
|
59 |
+
"name": "strict-match",
|
60 |
+
"filter": [
|
61 |
+
{
|
62 |
+
"function": "regex",
|
63 |
+
"regex_pattern": "#### (\\-?[0-9\\.\\,]+)"
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"function": "take_first"
|
67 |
+
}
|
68 |
+
]
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"name": "flexible-extract",
|
72 |
+
"filter": [
|
73 |
+
{
|
74 |
+
"function": "regex",
|
75 |
+
"group_select": -1,
|
76 |
+
"regex_pattern": "(-?[$0-9.,]{2,})|(-?[0-9]+)"
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"function": "take_first"
|
80 |
+
}
|
81 |
+
]
|
82 |
+
}
|
83 |
+
],
|
84 |
+
"should_decontaminate": false,
|
85 |
+
"metadata": {
|
86 |
+
"version": 3.0
|
87 |
+
}
|
88 |
+
}
|
89 |
+
},
|
90 |
+
"versions": {
|
91 |
+
"gsm8k": 3.0
|
92 |
+
},
|
93 |
+
"n-shot": {
|
94 |
+
"gsm8k": 5
|
95 |
+
},
|
96 |
+
"higher_is_better": {
|
97 |
+
"gsm8k": {
|
98 |
+
"exact_match": true
|
99 |
+
}
|
100 |
+
},
|
101 |
+
"n-samples": {
|
102 |
+
"gsm8k": {
|
103 |
+
"original": 1319,
|
104 |
+
"effective": 1319
|
105 |
+
}
|
106 |
+
},
|
107 |
+
"config": {
|
108 |
+
"model": "vllm",
|
109 |
+
"model_args": "pretrained=/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8,tensor_parallel_size=4,dtype=auto,add_bos_token=True,gpu_memory_utilization=0.8,data_parallel_size=1",
|
110 |
+
"batch_size": "auto",
|
111 |
+
"batch_sizes": [],
|
112 |
+
"device": "cuda",
|
113 |
+
"use_cache": null,
|
114 |
+
"limit": null,
|
115 |
+
"bootstrap_iters": 100000,
|
116 |
+
"gen_kwargs": null,
|
117 |
+
"random_seed": 0,
|
118 |
+
"numpy_seed": 1234,
|
119 |
+
"torch_seed": 1234,
|
120 |
+
"fewshot_seed": 1234
|
121 |
+
},
|
122 |
+
"git_hash": "f2843b2f",
|
123 |
+
"date": 1717776830.423203,
|
124 |
+
"pretty_env_info": "PyTorch version: 2.3.0+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.4 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 3.29.3\nLibc version: glibc-2.35\n\nPython version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0] (64-bit runtime)\nPython platform: Linux-5.19.0-1010-nvidia-lowlatency-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.5.40\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: \nGPU 0: NVIDIA H100 NVL\nGPU 1: NVIDIA H100 NVL\nGPU 2: NVIDIA H100 NVL\nGPU 3: NVIDIA H100 NVL\nGPU 4: NVIDIA H100 NVL\nGPU 5: NVIDIA H100 NVL\nGPU 6: NVIDIA H100 NVL\nGPU 7: NVIDIA H100 NVL\n\nNvidia driver version: 555.42.02\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 46 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 144\nOn-line CPU(s) list: 0-143\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8452Y\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 36\nSocket(s): 2\nStepping: 8\nFrequency boost: enabled\nCPU max MHz: 2001.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hfi avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 3.4 MiB (72 instances)\nL1i cache: 2.3 MiB (72 instances)\nL2 cache: 144 MiB (72 instances)\nL3 cache: 135 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-35,72-107\nNUMA node1 CPU(s): 36-71,108-143\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.0\n[pip3] triton==2.3.0\n[conda] Could not collect",
|
125 |
+
"transformers_version": "4.41.2",
|
126 |
+
"upper_git_hash": "f2843b2fd64df799179808ce2428b7a8dbc403de",
|
127 |
+
"task_hashes": {},
|
128 |
+
"model_source": "vllm",
|
129 |
+
"model_name": "/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8",
|
130 |
+
"model_name_sanitized": "__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8",
|
131 |
+
"system_instruction": null,
|
132 |
+
"system_instruction_sha": null,
|
133 |
+
"chat_template": null,
|
134 |
+
"chat_template_sha": null,
|
135 |
+
"start_time": 847108.840543343,
|
136 |
+
"end_time": 847701.29731874,
|
137 |
+
"total_evaluation_time_seconds": "592.4567753970623"
|
138 |
+
}
|
hellaswag/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T11-27-16.957594.json
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"hellaswag": {
|
4 |
+
"acc,none": 0.5985859390559649,
|
5 |
+
"acc_stderr,none": 0.004891826692722808,
|
6 |
+
"acc_norm,none": 0.8249352718581956,
|
7 |
+
"acc_norm_stderr,none": 0.0037924580005235724,
|
8 |
+
"alias": "hellaswag"
|
9 |
+
}
|
10 |
+
},
|
11 |
+
"group_subtasks": {
|
12 |
+
"hellaswag": []
|
13 |
+
},
|
14 |
+
"configs": {
|
15 |
+
"hellaswag": {
|
16 |
+
"task": "hellaswag",
|
17 |
+
"group": [
|
18 |
+
"multiple_choice"
|
19 |
+
],
|
20 |
+
"dataset_path": "hellaswag",
|
21 |
+
"training_split": "train",
|
22 |
+
"validation_split": "validation",
|
23 |
+
"process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n def _process_doc(doc):\n ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n out_doc = {\n \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n \"gold\": int(doc[\"label\"]),\n }\n return out_doc\n\n return dataset.map(_process_doc)\n",
|
24 |
+
"doc_to_text": "{{query}}",
|
25 |
+
"doc_to_target": "{{label}}",
|
26 |
+
"doc_to_choice": "choices",
|
27 |
+
"description": "",
|
28 |
+
"target_delimiter": " ",
|
29 |
+
"fewshot_delimiter": "\n\n",
|
30 |
+
"num_fewshot": 10,
|
31 |
+
"metric_list": [
|
32 |
+
{
|
33 |
+
"metric": "acc",
|
34 |
+
"aggregation": "mean",
|
35 |
+
"higher_is_better": true
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"metric": "acc_norm",
|
39 |
+
"aggregation": "mean",
|
40 |
+
"higher_is_better": true
|
41 |
+
}
|
42 |
+
],
|
43 |
+
"output_type": "multiple_choice",
|
44 |
+
"repeats": 1,
|
45 |
+
"should_decontaminate": false,
|
46 |
+
"metadata": {
|
47 |
+
"version": 1.0
|
48 |
+
}
|
49 |
+
}
|
50 |
+
},
|
51 |
+
"versions": {
|
52 |
+
"hellaswag": 1.0
|
53 |
+
},
|
54 |
+
"n-shot": {
|
55 |
+
"hellaswag": 10
|
56 |
+
},
|
57 |
+
"higher_is_better": {
|
58 |
+
"hellaswag": {
|
59 |
+
"acc": true,
|
60 |
+
"acc_norm": true
|
61 |
+
}
|
62 |
+
},
|
63 |
+
"n-samples": {
|
64 |
+
"hellaswag": {
|
65 |
+
"original": 10042,
|
66 |
+
"effective": 10042
|
67 |
+
}
|
68 |
+
},
|
69 |
+
"config": {
|
70 |
+
"model": "vllm",
|
71 |
+
"model_args": "pretrained=/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8,tensor_parallel_size=4,dtype=auto,add_bos_token=True,gpu_memory_utilization=0.8,data_parallel_size=1",
|
72 |
+
"batch_size": "auto",
|
73 |
+
"batch_sizes": [],
|
74 |
+
"device": "cuda",
|
75 |
+
"use_cache": null,
|
76 |
+
"limit": null,
|
77 |
+
"bootstrap_iters": 100000,
|
78 |
+
"gen_kwargs": null,
|
79 |
+
"random_seed": 0,
|
80 |
+
"numpy_seed": 1234,
|
81 |
+
"torch_seed": 1234,
|
82 |
+
"fewshot_seed": 1234
|
83 |
+
},
|
84 |
+
"git_hash": "f2843b2f",
|
85 |
+
"date": 1717745599.4707556,
|
86 |
+
"pretty_env_info": "PyTorch version: 2.3.0+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.4 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 3.29.3\nLibc version: glibc-2.35\n\nPython version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0] (64-bit runtime)\nPython platform: Linux-5.19.0-1010-nvidia-lowlatency-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.5.40\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: \nGPU 0: NVIDIA H100 NVL\nGPU 1: NVIDIA H100 NVL\nGPU 2: NVIDIA H100 NVL\nGPU 3: NVIDIA H100 NVL\nGPU 4: NVIDIA H100 NVL\nGPU 5: NVIDIA H100 NVL\nGPU 6: NVIDIA H100 NVL\nGPU 7: NVIDIA H100 NVL\n\nNvidia driver version: 555.42.02\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 46 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 144\nOn-line CPU(s) list: 0-143\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8452Y\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 36\nSocket(s): 2\nStepping: 8\nFrequency boost: enabled\nCPU max MHz: 2001.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hfi avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 3.4 MiB (72 instances)\nL1i cache: 2.3 MiB (72 instances)\nL2 cache: 144 MiB (72 instances)\nL3 cache: 135 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-35,72-107\nNUMA node1 CPU(s): 36-71,108-143\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.0\n[pip3] triton==2.3.0\n[conda] Could not collect",
|
87 |
+
"transformers_version": "4.41.2",
|
88 |
+
"upper_git_hash": "f2843b2fd64df799179808ce2428b7a8dbc403de",
|
89 |
+
"task_hashes": {},
|
90 |
+
"model_source": "vllm",
|
91 |
+
"model_name": "/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8",
|
92 |
+
"model_name_sanitized": "__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8",
|
93 |
+
"system_instruction": null,
|
94 |
+
"system_instruction_sha": null,
|
95 |
+
"chat_template": null,
|
96 |
+
"chat_template_sha": null,
|
97 |
+
"start_time": 815877.93595267,
|
98 |
+
"end_time": 829921.670261319,
|
99 |
+
"total_evaluation_time_seconds": "14043.73430864897"
|
100 |
+
}
|
mmlu/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T16-13-28.474390.json
ADDED
@@ -0,0 +1,3154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"mmlu": {
|
4 |
+
"acc,none": 0.7060959977211223,
|
5 |
+
"acc_stderr,none": 0.0036419117884613442,
|
6 |
+
"alias": "mmlu"
|
7 |
+
},
|
8 |
+
"mmlu_humanities": {
|
9 |
+
"alias": " - humanities",
|
10 |
+
"acc,none": 0.6561105207226355,
|
11 |
+
"acc_stderr,none": 0.006537667125056556
|
12 |
+
},
|
13 |
+
"mmlu_formal_logic": {
|
14 |
+
"alias": " - formal_logic",
|
15 |
+
"acc,none": 0.5793650793650794,
|
16 |
+
"acc_stderr,none": 0.04415438226743745
|
17 |
+
},
|
18 |
+
"mmlu_high_school_european_history": {
|
19 |
+
"alias": " - high_school_european_history",
|
20 |
+
"acc,none": 0.8181818181818182,
|
21 |
+
"acc_stderr,none": 0.030117688929503582
|
22 |
+
},
|
23 |
+
"mmlu_high_school_us_history": {
|
24 |
+
"alias": " - high_school_us_history",
|
25 |
+
"acc,none": 0.8627450980392157,
|
26 |
+
"acc_stderr,none": 0.024152225962801577
|
27 |
+
},
|
28 |
+
"mmlu_high_school_world_history": {
|
29 |
+
"alias": " - high_school_world_history",
|
30 |
+
"acc,none": 0.8565400843881856,
|
31 |
+
"acc_stderr,none": 0.022818291821017012
|
32 |
+
},
|
33 |
+
"mmlu_international_law": {
|
34 |
+
"alias": " - international_law",
|
35 |
+
"acc,none": 0.8512396694214877,
|
36 |
+
"acc_stderr,none": 0.03248470083807195
|
37 |
+
},
|
38 |
+
"mmlu_jurisprudence": {
|
39 |
+
"alias": " - jurisprudence",
|
40 |
+
"acc,none": 0.8518518518518519,
|
41 |
+
"acc_stderr,none": 0.03434300243631002
|
42 |
+
},
|
43 |
+
"mmlu_logical_fallacies": {
|
44 |
+
"alias": " - logical_fallacies",
|
45 |
+
"acc,none": 0.7975460122699386,
|
46 |
+
"acc_stderr,none": 0.031570650789119
|
47 |
+
},
|
48 |
+
"mmlu_moral_disputes": {
|
49 |
+
"alias": " - moral_disputes",
|
50 |
+
"acc,none": 0.7947976878612717,
|
51 |
+
"acc_stderr,none": 0.021742519835276274
|
52 |
+
},
|
53 |
+
"mmlu_moral_scenarios": {
|
54 |
+
"alias": " - moral_scenarios",
|
55 |
+
"acc,none": 0.4983240223463687,
|
56 |
+
"acc_stderr,none": 0.016722407608296398
|
57 |
+
},
|
58 |
+
"mmlu_philosophy": {
|
59 |
+
"alias": " - philosophy",
|
60 |
+
"acc,none": 0.7942122186495176,
|
61 |
+
"acc_stderr,none": 0.022961339906764234
|
62 |
+
},
|
63 |
+
"mmlu_prehistory": {
|
64 |
+
"alias": " - prehistory",
|
65 |
+
"acc,none": 0.7993827160493827,
|
66 |
+
"acc_stderr,none": 0.0222823139497749
|
67 |
+
},
|
68 |
+
"mmlu_professional_law": {
|
69 |
+
"alias": " - professional_law",
|
70 |
+
"acc,none": 0.516297262059974,
|
71 |
+
"acc_stderr,none": 0.012763450734699812
|
72 |
+
},
|
73 |
+
"mmlu_world_religions": {
|
74 |
+
"alias": " - world_religions",
|
75 |
+
"acc,none": 0.9122807017543859,
|
76 |
+
"acc_stderr,none": 0.021696383943889223
|
77 |
+
},
|
78 |
+
"mmlu_other": {
|
79 |
+
"alias": " - other",
|
80 |
+
"acc,none": 0.7608625683939492,
|
81 |
+
"acc_stderr,none": 0.007354391095553756
|
82 |
+
},
|
83 |
+
"mmlu_business_ethics": {
|
84 |
+
"alias": " - business_ethics",
|
85 |
+
"acc,none": 0.77,
|
86 |
+
"acc_stderr,none": 0.04229525846816506
|
87 |
+
},
|
88 |
+
"mmlu_clinical_knowledge": {
|
89 |
+
"alias": " - clinical_knowledge",
|
90 |
+
"acc,none": 0.7735849056603774,
|
91 |
+
"acc_stderr,none": 0.025757559893106758
|
92 |
+
},
|
93 |
+
"mmlu_college_medicine": {
|
94 |
+
"alias": " - college_medicine",
|
95 |
+
"acc,none": 0.7398843930635838,
|
96 |
+
"acc_stderr,none": 0.033450369167889904
|
97 |
+
},
|
98 |
+
"mmlu_global_facts": {
|
99 |
+
"alias": " - global_facts",
|
100 |
+
"acc,none": 0.45,
|
101 |
+
"acc_stderr,none": 0.05
|
102 |
+
},
|
103 |
+
"mmlu_human_aging": {
|
104 |
+
"alias": " - human_aging",
|
105 |
+
"acc,none": 0.7533632286995515,
|
106 |
+
"acc_stderr,none": 0.028930413120910874
|
107 |
+
},
|
108 |
+
"mmlu_management": {
|
109 |
+
"alias": " - management",
|
110 |
+
"acc,none": 0.8543689320388349,
|
111 |
+
"acc_stderr,none": 0.0349260647662379
|
112 |
+
},
|
113 |
+
"mmlu_marketing": {
|
114 |
+
"alias": " - marketing",
|
115 |
+
"acc,none": 0.9145299145299145,
|
116 |
+
"acc_stderr,none": 0.01831589168562584
|
117 |
+
},
|
118 |
+
"mmlu_medical_genetics": {
|
119 |
+
"alias": " - medical_genetics",
|
120 |
+
"acc,none": 0.77,
|
121 |
+
"acc_stderr,none": 0.042295258468165065
|
122 |
+
},
|
123 |
+
"mmlu_miscellaneous": {
|
124 |
+
"alias": " - miscellaneous",
|
125 |
+
"acc,none": 0.8518518518518519,
|
126 |
+
"acc_stderr,none": 0.012703598899445173
|
127 |
+
},
|
128 |
+
"mmlu_nutrition": {
|
129 |
+
"alias": " - nutrition",
|
130 |
+
"acc,none": 0.7908496732026143,
|
131 |
+
"acc_stderr,none": 0.02328768531233481
|
132 |
+
},
|
133 |
+
"mmlu_professional_accounting": {
|
134 |
+
"alias": " - professional_accounting",
|
135 |
+
"acc,none": 0.5319148936170213,
|
136 |
+
"acc_stderr,none": 0.029766675075873866
|
137 |
+
},
|
138 |
+
"mmlu_professional_medicine": {
|
139 |
+
"alias": " - professional_medicine",
|
140 |
+
"acc,none": 0.7794117647058824,
|
141 |
+
"acc_stderr,none": 0.02518778666022727
|
142 |
+
},
|
143 |
+
"mmlu_virology": {
|
144 |
+
"alias": " - virology",
|
145 |
+
"acc,none": 0.5481927710843374,
|
146 |
+
"acc_stderr,none": 0.038743715565879536
|
147 |
+
},
|
148 |
+
"mmlu_social_sciences": {
|
149 |
+
"alias": " - social_sciences",
|
150 |
+
"acc,none": 0.8059798505037374,
|
151 |
+
"acc_stderr,none": 0.007000549787458337
|
152 |
+
},
|
153 |
+
"mmlu_econometrics": {
|
154 |
+
"alias": " - econometrics",
|
155 |
+
"acc,none": 0.6052631578947368,
|
156 |
+
"acc_stderr,none": 0.04598188057816542
|
157 |
+
},
|
158 |
+
"mmlu_high_school_geography": {
|
159 |
+
"alias": " - high_school_geography",
|
160 |
+
"acc,none": 0.8636363636363636,
|
161 |
+
"acc_stderr,none": 0.024450155973189835
|
162 |
+
},
|
163 |
+
"mmlu_high_school_government_and_politics": {
|
164 |
+
"alias": " - high_school_government_and_politics",
|
165 |
+
"acc,none": 0.9378238341968912,
|
166 |
+
"acc_stderr,none": 0.017426974154240514
|
167 |
+
},
|
168 |
+
"mmlu_high_school_macroeconomics": {
|
169 |
+
"alias": " - high_school_macroeconomics",
|
170 |
+
"acc,none": 0.7230769230769231,
|
171 |
+
"acc_stderr,none": 0.022688042352424994
|
172 |
+
},
|
173 |
+
"mmlu_high_school_microeconomics": {
|
174 |
+
"alias": " - high_school_microeconomics",
|
175 |
+
"acc,none": 0.8109243697478992,
|
176 |
+
"acc_stderr,none": 0.02543511943810537
|
177 |
+
},
|
178 |
+
"mmlu_high_school_psychology": {
|
179 |
+
"alias": " - high_school_psychology",
|
180 |
+
"acc,none": 0.8844036697247707,
|
181 |
+
"acc_stderr,none": 0.013708749534172636
|
182 |
+
},
|
183 |
+
"mmlu_human_sexuality": {
|
184 |
+
"alias": " - human_sexuality",
|
185 |
+
"acc,none": 0.7709923664122137,
|
186 |
+
"acc_stderr,none": 0.036853466317118506
|
187 |
+
},
|
188 |
+
"mmlu_professional_psychology": {
|
189 |
+
"alias": " - professional_psychology",
|
190 |
+
"acc,none": 0.7532679738562091,
|
191 |
+
"acc_stderr,none": 0.0174408203674025
|
192 |
+
},
|
193 |
+
"mmlu_public_relations": {
|
194 |
+
"alias": " - public_relations",
|
195 |
+
"acc,none": 0.7363636363636363,
|
196 |
+
"acc_stderr,none": 0.04220224692971987
|
197 |
+
},
|
198 |
+
"mmlu_security_studies": {
|
199 |
+
"alias": " - security_studies",
|
200 |
+
"acc,none": 0.7877551020408163,
|
201 |
+
"acc_stderr,none": 0.026176967197866767
|
202 |
+
},
|
203 |
+
"mmlu_sociology": {
|
204 |
+
"alias": " - sociology",
|
205 |
+
"acc,none": 0.8756218905472637,
|
206 |
+
"acc_stderr,none": 0.023335401790166327
|
207 |
+
},
|
208 |
+
"mmlu_us_foreign_policy": {
|
209 |
+
"alias": " - us_foreign_policy",
|
210 |
+
"acc,none": 0.9,
|
211 |
+
"acc_stderr,none": 0.030151134457776348
|
212 |
+
},
|
213 |
+
"mmlu_stem": {
|
214 |
+
"alias": " - stem",
|
215 |
+
"acc,none": 0.6292419917538852,
|
216 |
+
"acc_stderr,none": 0.00828854335131971
|
217 |
+
},
|
218 |
+
"mmlu_abstract_algebra": {
|
219 |
+
"alias": " - abstract_algebra",
|
220 |
+
"acc,none": 0.4,
|
221 |
+
"acc_stderr,none": 0.049236596391733084
|
222 |
+
},
|
223 |
+
"mmlu_anatomy": {
|
224 |
+
"alias": " - anatomy",
|
225 |
+
"acc,none": 0.6518518518518519,
|
226 |
+
"acc_stderr,none": 0.041153246103369526
|
227 |
+
},
|
228 |
+
"mmlu_astronomy": {
|
229 |
+
"alias": " - astronomy",
|
230 |
+
"acc,none": 0.8026315789473685,
|
231 |
+
"acc_stderr,none": 0.03238981601699397
|
232 |
+
},
|
233 |
+
"mmlu_college_biology": {
|
234 |
+
"alias": " - college_biology",
|
235 |
+
"acc,none": 0.8402777777777778,
|
236 |
+
"acc_stderr,none": 0.030635578972093278
|
237 |
+
},
|
238 |
+
"mmlu_college_chemistry": {
|
239 |
+
"alias": " - college_chemistry",
|
240 |
+
"acc,none": 0.5,
|
241 |
+
"acc_stderr,none": 0.050251890762960605
|
242 |
+
},
|
243 |
+
"mmlu_college_computer_science": {
|
244 |
+
"alias": " - college_computer_science",
|
245 |
+
"acc,none": 0.64,
|
246 |
+
"acc_stderr,none": 0.04824181513244218
|
247 |
+
},
|
248 |
+
"mmlu_college_mathematics": {
|
249 |
+
"alias": " - college_mathematics",
|
250 |
+
"acc,none": 0.47,
|
251 |
+
"acc_stderr,none": 0.050161355804659205
|
252 |
+
},
|
253 |
+
"mmlu_college_physics": {
|
254 |
+
"alias": " - college_physics",
|
255 |
+
"acc,none": 0.49019607843137253,
|
256 |
+
"acc_stderr,none": 0.04974229460422817
|
257 |
+
},
|
258 |
+
"mmlu_computer_security": {
|
259 |
+
"alias": " - computer_security",
|
260 |
+
"acc,none": 0.8,
|
261 |
+
"acc_stderr,none": 0.040201512610368445
|
262 |
+
},
|
263 |
+
"mmlu_conceptual_physics": {
|
264 |
+
"alias": " - conceptual_physics",
|
265 |
+
"acc,none": 0.6978723404255319,
|
266 |
+
"acc_stderr,none": 0.030017554471880557
|
267 |
+
},
|
268 |
+
"mmlu_electrical_engineering": {
|
269 |
+
"alias": " - electrical_engineering",
|
270 |
+
"acc,none": 0.6827586206896552,
|
271 |
+
"acc_stderr,none": 0.03878352372138622
|
272 |
+
},
|
273 |
+
"mmlu_elementary_mathematics": {
|
274 |
+
"alias": " - elementary_mathematics",
|
275 |
+
"acc,none": 0.5582010582010583,
|
276 |
+
"acc_stderr,none": 0.025576257061253833
|
277 |
+
},
|
278 |
+
"mmlu_high_school_biology": {
|
279 |
+
"alias": " - high_school_biology",
|
280 |
+
"acc,none": 0.8064516129032258,
|
281 |
+
"acc_stderr,none": 0.02247525852553606
|
282 |
+
},
|
283 |
+
"mmlu_high_school_chemistry": {
|
284 |
+
"alias": " - high_school_chemistry",
|
285 |
+
"acc,none": 0.5566502463054187,
|
286 |
+
"acc_stderr,none": 0.03495334582162933
|
287 |
+
},
|
288 |
+
"mmlu_high_school_computer_science": {
|
289 |
+
"alias": " - high_school_computer_science",
|
290 |
+
"acc,none": 0.8,
|
291 |
+
"acc_stderr,none": 0.04020151261036846
|
292 |
+
},
|
293 |
+
"mmlu_high_school_mathematics": {
|
294 |
+
"alias": " - high_school_mathematics",
|
295 |
+
"acc,none": 0.44074074074074077,
|
296 |
+
"acc_stderr,none": 0.030270671157284074
|
297 |
+
},
|
298 |
+
"mmlu_high_school_physics": {
|
299 |
+
"alias": " - high_school_physics",
|
300 |
+
"acc,none": 0.4768211920529801,
|
301 |
+
"acc_stderr,none": 0.04078093859163084
|
302 |
+
},
|
303 |
+
"mmlu_high_school_statistics": {
|
304 |
+
"alias": " - high_school_statistics",
|
305 |
+
"acc,none": 0.6898148148148148,
|
306 |
+
"acc_stderr,none": 0.03154696285656629
|
307 |
+
},
|
308 |
+
"mmlu_machine_learning": {
|
309 |
+
"alias": " - machine_learning",
|
310 |
+
"acc,none": 0.5803571428571429,
|
311 |
+
"acc_stderr,none": 0.04684099321077106
|
312 |
+
}
|
313 |
+
},
|
314 |
+
"groups": {
|
315 |
+
"mmlu": {
|
316 |
+
"acc,none": 0.7060959977211223,
|
317 |
+
"acc_stderr,none": 0.0036419117884613442,
|
318 |
+
"alias": "mmlu"
|
319 |
+
},
|
320 |
+
"mmlu_humanities": {
|
321 |
+
"alias": " - humanities",
|
322 |
+
"acc,none": 0.6561105207226355,
|
323 |
+
"acc_stderr,none": 0.006537667125056556
|
324 |
+
},
|
325 |
+
"mmlu_other": {
|
326 |
+
"alias": " - other",
|
327 |
+
"acc,none": 0.7608625683939492,
|
328 |
+
"acc_stderr,none": 0.007354391095553756
|
329 |
+
},
|
330 |
+
"mmlu_social_sciences": {
|
331 |
+
"alias": " - social_sciences",
|
332 |
+
"acc,none": 0.8059798505037374,
|
333 |
+
"acc_stderr,none": 0.007000549787458337
|
334 |
+
},
|
335 |
+
"mmlu_stem": {
|
336 |
+
"alias": " - stem",
|
337 |
+
"acc,none": 0.6292419917538852,
|
338 |
+
"acc_stderr,none": 0.00828854335131971
|
339 |
+
}
|
340 |
+
},
|
341 |
+
"group_subtasks": {
|
342 |
+
"mmlu_stem": [
|
343 |
+
"mmlu_college_computer_science",
|
344 |
+
"mmlu_high_school_physics",
|
345 |
+
"mmlu_college_chemistry",
|
346 |
+
"mmlu_college_biology",
|
347 |
+
"mmlu_high_school_mathematics",
|
348 |
+
"mmlu_high_school_computer_science",
|
349 |
+
"mmlu_electrical_engineering",
|
350 |
+
"mmlu_college_physics",
|
351 |
+
"mmlu_anatomy",
|
352 |
+
"mmlu_college_mathematics",
|
353 |
+
"mmlu_elementary_mathematics",
|
354 |
+
"mmlu_high_school_chemistry",
|
355 |
+
"mmlu_machine_learning",
|
356 |
+
"mmlu_abstract_algebra",
|
357 |
+
"mmlu_astronomy",
|
358 |
+
"mmlu_computer_security",
|
359 |
+
"mmlu_high_school_biology",
|
360 |
+
"mmlu_high_school_statistics",
|
361 |
+
"mmlu_conceptual_physics"
|
362 |
+
],
|
363 |
+
"mmlu_other": [
|
364 |
+
"mmlu_business_ethics",
|
365 |
+
"mmlu_virology",
|
366 |
+
"mmlu_nutrition",
|
367 |
+
"mmlu_management",
|
368 |
+
"mmlu_clinical_knowledge",
|
369 |
+
"mmlu_marketing",
|
370 |
+
"mmlu_college_medicine",
|
371 |
+
"mmlu_professional_medicine",
|
372 |
+
"mmlu_medical_genetics",
|
373 |
+
"mmlu_human_aging",
|
374 |
+
"mmlu_professional_accounting",
|
375 |
+
"mmlu_miscellaneous",
|
376 |
+
"mmlu_global_facts"
|
377 |
+
],
|
378 |
+
"mmlu_social_sciences": [
|
379 |
+
"mmlu_high_school_government_and_politics",
|
380 |
+
"mmlu_human_sexuality",
|
381 |
+
"mmlu_high_school_microeconomics",
|
382 |
+
"mmlu_high_school_macroeconomics",
|
383 |
+
"mmlu_public_relations",
|
384 |
+
"mmlu_sociology",
|
385 |
+
"mmlu_professional_psychology",
|
386 |
+
"mmlu_high_school_psychology",
|
387 |
+
"mmlu_econometrics",
|
388 |
+
"mmlu_high_school_geography",
|
389 |
+
"mmlu_us_foreign_policy",
|
390 |
+
"mmlu_security_studies"
|
391 |
+
],
|
392 |
+
"mmlu_humanities": [
|
393 |
+
"mmlu_high_school_european_history",
|
394 |
+
"mmlu_high_school_world_history",
|
395 |
+
"mmlu_professional_law",
|
396 |
+
"mmlu_logical_fallacies",
|
397 |
+
"mmlu_high_school_us_history",
|
398 |
+
"mmlu_world_religions",
|
399 |
+
"mmlu_prehistory",
|
400 |
+
"mmlu_jurisprudence",
|
401 |
+
"mmlu_moral_scenarios",
|
402 |
+
"mmlu_formal_logic",
|
403 |
+
"mmlu_philosophy",
|
404 |
+
"mmlu_international_law",
|
405 |
+
"mmlu_moral_disputes"
|
406 |
+
],
|
407 |
+
"mmlu": [
|
408 |
+
"mmlu_humanities",
|
409 |
+
"mmlu_social_sciences",
|
410 |
+
"mmlu_other",
|
411 |
+
"mmlu_stem"
|
412 |
+
]
|
413 |
+
},
|
414 |
+
"configs": {
|
415 |
+
"mmlu_abstract_algebra": {
|
416 |
+
"task": "mmlu_abstract_algebra",
|
417 |
+
"task_alias": "abstract_algebra",
|
418 |
+
"group": "mmlu_stem",
|
419 |
+
"group_alias": "stem",
|
420 |
+
"dataset_path": "hails/mmlu_no_train",
|
421 |
+
"dataset_name": "abstract_algebra",
|
422 |
+
"test_split": "test",
|
423 |
+
"fewshot_split": "dev",
|
424 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
425 |
+
"doc_to_target": "answer",
|
426 |
+
"doc_to_choice": [
|
427 |
+
"A",
|
428 |
+
"B",
|
429 |
+
"C",
|
430 |
+
"D"
|
431 |
+
],
|
432 |
+
"description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
|
433 |
+
"target_delimiter": " ",
|
434 |
+
"fewshot_delimiter": "\n\n",
|
435 |
+
"fewshot_config": {
|
436 |
+
"sampler": "first_n"
|
437 |
+
},
|
438 |
+
"num_fewshot": 5,
|
439 |
+
"metric_list": [
|
440 |
+
{
|
441 |
+
"metric": "acc",
|
442 |
+
"aggregation": "mean",
|
443 |
+
"higher_is_better": true
|
444 |
+
}
|
445 |
+
],
|
446 |
+
"output_type": "multiple_choice",
|
447 |
+
"repeats": 1,
|
448 |
+
"should_decontaminate": false,
|
449 |
+
"metadata": {
|
450 |
+
"version": 0.0
|
451 |
+
}
|
452 |
+
},
|
453 |
+
"mmlu_anatomy": {
|
454 |
+
"task": "mmlu_anatomy",
|
455 |
+
"task_alias": "anatomy",
|
456 |
+
"group": "mmlu_stem",
|
457 |
+
"group_alias": "stem",
|
458 |
+
"dataset_path": "hails/mmlu_no_train",
|
459 |
+
"dataset_name": "anatomy",
|
460 |
+
"test_split": "test",
|
461 |
+
"fewshot_split": "dev",
|
462 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
463 |
+
"doc_to_target": "answer",
|
464 |
+
"doc_to_choice": [
|
465 |
+
"A",
|
466 |
+
"B",
|
467 |
+
"C",
|
468 |
+
"D"
|
469 |
+
],
|
470 |
+
"description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
|
471 |
+
"target_delimiter": " ",
|
472 |
+
"fewshot_delimiter": "\n\n",
|
473 |
+
"fewshot_config": {
|
474 |
+
"sampler": "first_n"
|
475 |
+
},
|
476 |
+
"num_fewshot": 5,
|
477 |
+
"metric_list": [
|
478 |
+
{
|
479 |
+
"metric": "acc",
|
480 |
+
"aggregation": "mean",
|
481 |
+
"higher_is_better": true
|
482 |
+
}
|
483 |
+
],
|
484 |
+
"output_type": "multiple_choice",
|
485 |
+
"repeats": 1,
|
486 |
+
"should_decontaminate": false,
|
487 |
+
"metadata": {
|
488 |
+
"version": 0.0
|
489 |
+
}
|
490 |
+
},
|
491 |
+
"mmlu_astronomy": {
|
492 |
+
"task": "mmlu_astronomy",
|
493 |
+
"task_alias": "astronomy",
|
494 |
+
"group": "mmlu_stem",
|
495 |
+
"group_alias": "stem",
|
496 |
+
"dataset_path": "hails/mmlu_no_train",
|
497 |
+
"dataset_name": "astronomy",
|
498 |
+
"test_split": "test",
|
499 |
+
"fewshot_split": "dev",
|
500 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
501 |
+
"doc_to_target": "answer",
|
502 |
+
"doc_to_choice": [
|
503 |
+
"A",
|
504 |
+
"B",
|
505 |
+
"C",
|
506 |
+
"D"
|
507 |
+
],
|
508 |
+
"description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
|
509 |
+
"target_delimiter": " ",
|
510 |
+
"fewshot_delimiter": "\n\n",
|
511 |
+
"fewshot_config": {
|
512 |
+
"sampler": "first_n"
|
513 |
+
},
|
514 |
+
"num_fewshot": 5,
|
515 |
+
"metric_list": [
|
516 |
+
{
|
517 |
+
"metric": "acc",
|
518 |
+
"aggregation": "mean",
|
519 |
+
"higher_is_better": true
|
520 |
+
}
|
521 |
+
],
|
522 |
+
"output_type": "multiple_choice",
|
523 |
+
"repeats": 1,
|
524 |
+
"should_decontaminate": false,
|
525 |
+
"metadata": {
|
526 |
+
"version": 0.0
|
527 |
+
}
|
528 |
+
},
|
529 |
+
"mmlu_business_ethics": {
|
530 |
+
"task": "mmlu_business_ethics",
|
531 |
+
"task_alias": "business_ethics",
|
532 |
+
"group": "mmlu_other",
|
533 |
+
"group_alias": "other",
|
534 |
+
"dataset_path": "hails/mmlu_no_train",
|
535 |
+
"dataset_name": "business_ethics",
|
536 |
+
"test_split": "test",
|
537 |
+
"fewshot_split": "dev",
|
538 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
539 |
+
"doc_to_target": "answer",
|
540 |
+
"doc_to_choice": [
|
541 |
+
"A",
|
542 |
+
"B",
|
543 |
+
"C",
|
544 |
+
"D"
|
545 |
+
],
|
546 |
+
"description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
|
547 |
+
"target_delimiter": " ",
|
548 |
+
"fewshot_delimiter": "\n\n",
|
549 |
+
"fewshot_config": {
|
550 |
+
"sampler": "first_n"
|
551 |
+
},
|
552 |
+
"num_fewshot": 5,
|
553 |
+
"metric_list": [
|
554 |
+
{
|
555 |
+
"metric": "acc",
|
556 |
+
"aggregation": "mean",
|
557 |
+
"higher_is_better": true
|
558 |
+
}
|
559 |
+
],
|
560 |
+
"output_type": "multiple_choice",
|
561 |
+
"repeats": 1,
|
562 |
+
"should_decontaminate": false,
|
563 |
+
"metadata": {
|
564 |
+
"version": 0.0
|
565 |
+
}
|
566 |
+
},
|
567 |
+
"mmlu_clinical_knowledge": {
|
568 |
+
"task": "mmlu_clinical_knowledge",
|
569 |
+
"task_alias": "clinical_knowledge",
|
570 |
+
"group": "mmlu_other",
|
571 |
+
"group_alias": "other",
|
572 |
+
"dataset_path": "hails/mmlu_no_train",
|
573 |
+
"dataset_name": "clinical_knowledge",
|
574 |
+
"test_split": "test",
|
575 |
+
"fewshot_split": "dev",
|
576 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
577 |
+
"doc_to_target": "answer",
|
578 |
+
"doc_to_choice": [
|
579 |
+
"A",
|
580 |
+
"B",
|
581 |
+
"C",
|
582 |
+
"D"
|
583 |
+
],
|
584 |
+
"description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
|
585 |
+
"target_delimiter": " ",
|
586 |
+
"fewshot_delimiter": "\n\n",
|
587 |
+
"fewshot_config": {
|
588 |
+
"sampler": "first_n"
|
589 |
+
},
|
590 |
+
"num_fewshot": 5,
|
591 |
+
"metric_list": [
|
592 |
+
{
|
593 |
+
"metric": "acc",
|
594 |
+
"aggregation": "mean",
|
595 |
+
"higher_is_better": true
|
596 |
+
}
|
597 |
+
],
|
598 |
+
"output_type": "multiple_choice",
|
599 |
+
"repeats": 1,
|
600 |
+
"should_decontaminate": false,
|
601 |
+
"metadata": {
|
602 |
+
"version": 0.0
|
603 |
+
}
|
604 |
+
},
|
605 |
+
"mmlu_college_biology": {
|
606 |
+
"task": "mmlu_college_biology",
|
607 |
+
"task_alias": "college_biology",
|
608 |
+
"group": "mmlu_stem",
|
609 |
+
"group_alias": "stem",
|
610 |
+
"dataset_path": "hails/mmlu_no_train",
|
611 |
+
"dataset_name": "college_biology",
|
612 |
+
"test_split": "test",
|
613 |
+
"fewshot_split": "dev",
|
614 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
615 |
+
"doc_to_target": "answer",
|
616 |
+
"doc_to_choice": [
|
617 |
+
"A",
|
618 |
+
"B",
|
619 |
+
"C",
|
620 |
+
"D"
|
621 |
+
],
|
622 |
+
"description": "The following are multiple choice questions (with answers) about college biology.\n\n",
|
623 |
+
"target_delimiter": " ",
|
624 |
+
"fewshot_delimiter": "\n\n",
|
625 |
+
"fewshot_config": {
|
626 |
+
"sampler": "first_n"
|
627 |
+
},
|
628 |
+
"num_fewshot": 5,
|
629 |
+
"metric_list": [
|
630 |
+
{
|
631 |
+
"metric": "acc",
|
632 |
+
"aggregation": "mean",
|
633 |
+
"higher_is_better": true
|
634 |
+
}
|
635 |
+
],
|
636 |
+
"output_type": "multiple_choice",
|
637 |
+
"repeats": 1,
|
638 |
+
"should_decontaminate": false,
|
639 |
+
"metadata": {
|
640 |
+
"version": 0.0
|
641 |
+
}
|
642 |
+
},
|
643 |
+
"mmlu_college_chemistry": {
|
644 |
+
"task": "mmlu_college_chemistry",
|
645 |
+
"task_alias": "college_chemistry",
|
646 |
+
"group": "mmlu_stem",
|
647 |
+
"group_alias": "stem",
|
648 |
+
"dataset_path": "hails/mmlu_no_train",
|
649 |
+
"dataset_name": "college_chemistry",
|
650 |
+
"test_split": "test",
|
651 |
+
"fewshot_split": "dev",
|
652 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
653 |
+
"doc_to_target": "answer",
|
654 |
+
"doc_to_choice": [
|
655 |
+
"A",
|
656 |
+
"B",
|
657 |
+
"C",
|
658 |
+
"D"
|
659 |
+
],
|
660 |
+
"description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
|
661 |
+
"target_delimiter": " ",
|
662 |
+
"fewshot_delimiter": "\n\n",
|
663 |
+
"fewshot_config": {
|
664 |
+
"sampler": "first_n"
|
665 |
+
},
|
666 |
+
"num_fewshot": 5,
|
667 |
+
"metric_list": [
|
668 |
+
{
|
669 |
+
"metric": "acc",
|
670 |
+
"aggregation": "mean",
|
671 |
+
"higher_is_better": true
|
672 |
+
}
|
673 |
+
],
|
674 |
+
"output_type": "multiple_choice",
|
675 |
+
"repeats": 1,
|
676 |
+
"should_decontaminate": false,
|
677 |
+
"metadata": {
|
678 |
+
"version": 0.0
|
679 |
+
}
|
680 |
+
},
|
681 |
+
"mmlu_college_computer_science": {
|
682 |
+
"task": "mmlu_college_computer_science",
|
683 |
+
"task_alias": "college_computer_science",
|
684 |
+
"group": "mmlu_stem",
|
685 |
+
"group_alias": "stem",
|
686 |
+
"dataset_path": "hails/mmlu_no_train",
|
687 |
+
"dataset_name": "college_computer_science",
|
688 |
+
"test_split": "test",
|
689 |
+
"fewshot_split": "dev",
|
690 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
691 |
+
"doc_to_target": "answer",
|
692 |
+
"doc_to_choice": [
|
693 |
+
"A",
|
694 |
+
"B",
|
695 |
+
"C",
|
696 |
+
"D"
|
697 |
+
],
|
698 |
+
"description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
|
699 |
+
"target_delimiter": " ",
|
700 |
+
"fewshot_delimiter": "\n\n",
|
701 |
+
"fewshot_config": {
|
702 |
+
"sampler": "first_n"
|
703 |
+
},
|
704 |
+
"num_fewshot": 5,
|
705 |
+
"metric_list": [
|
706 |
+
{
|
707 |
+
"metric": "acc",
|
708 |
+
"aggregation": "mean",
|
709 |
+
"higher_is_better": true
|
710 |
+
}
|
711 |
+
],
|
712 |
+
"output_type": "multiple_choice",
|
713 |
+
"repeats": 1,
|
714 |
+
"should_decontaminate": false,
|
715 |
+
"metadata": {
|
716 |
+
"version": 0.0
|
717 |
+
}
|
718 |
+
},
|
719 |
+
"mmlu_college_mathematics": {
|
720 |
+
"task": "mmlu_college_mathematics",
|
721 |
+
"task_alias": "college_mathematics",
|
722 |
+
"group": "mmlu_stem",
|
723 |
+
"group_alias": "stem",
|
724 |
+
"dataset_path": "hails/mmlu_no_train",
|
725 |
+
"dataset_name": "college_mathematics",
|
726 |
+
"test_split": "test",
|
727 |
+
"fewshot_split": "dev",
|
728 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
729 |
+
"doc_to_target": "answer",
|
730 |
+
"doc_to_choice": [
|
731 |
+
"A",
|
732 |
+
"B",
|
733 |
+
"C",
|
734 |
+
"D"
|
735 |
+
],
|
736 |
+
"description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
|
737 |
+
"target_delimiter": " ",
|
738 |
+
"fewshot_delimiter": "\n\n",
|
739 |
+
"fewshot_config": {
|
740 |
+
"sampler": "first_n"
|
741 |
+
},
|
742 |
+
"num_fewshot": 5,
|
743 |
+
"metric_list": [
|
744 |
+
{
|
745 |
+
"metric": "acc",
|
746 |
+
"aggregation": "mean",
|
747 |
+
"higher_is_better": true
|
748 |
+
}
|
749 |
+
],
|
750 |
+
"output_type": "multiple_choice",
|
751 |
+
"repeats": 1,
|
752 |
+
"should_decontaminate": false,
|
753 |
+
"metadata": {
|
754 |
+
"version": 0.0
|
755 |
+
}
|
756 |
+
},
|
757 |
+
"mmlu_college_medicine": {
|
758 |
+
"task": "mmlu_college_medicine",
|
759 |
+
"task_alias": "college_medicine",
|
760 |
+
"group": "mmlu_other",
|
761 |
+
"group_alias": "other",
|
762 |
+
"dataset_path": "hails/mmlu_no_train",
|
763 |
+
"dataset_name": "college_medicine",
|
764 |
+
"test_split": "test",
|
765 |
+
"fewshot_split": "dev",
|
766 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
767 |
+
"doc_to_target": "answer",
|
768 |
+
"doc_to_choice": [
|
769 |
+
"A",
|
770 |
+
"B",
|
771 |
+
"C",
|
772 |
+
"D"
|
773 |
+
],
|
774 |
+
"description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
|
775 |
+
"target_delimiter": " ",
|
776 |
+
"fewshot_delimiter": "\n\n",
|
777 |
+
"fewshot_config": {
|
778 |
+
"sampler": "first_n"
|
779 |
+
},
|
780 |
+
"num_fewshot": 5,
|
781 |
+
"metric_list": [
|
782 |
+
{
|
783 |
+
"metric": "acc",
|
784 |
+
"aggregation": "mean",
|
785 |
+
"higher_is_better": true
|
786 |
+
}
|
787 |
+
],
|
788 |
+
"output_type": "multiple_choice",
|
789 |
+
"repeats": 1,
|
790 |
+
"should_decontaminate": false,
|
791 |
+
"metadata": {
|
792 |
+
"version": 0.0
|
793 |
+
}
|
794 |
+
},
|
795 |
+
"mmlu_college_physics": {
|
796 |
+
"task": "mmlu_college_physics",
|
797 |
+
"task_alias": "college_physics",
|
798 |
+
"group": "mmlu_stem",
|
799 |
+
"group_alias": "stem",
|
800 |
+
"dataset_path": "hails/mmlu_no_train",
|
801 |
+
"dataset_name": "college_physics",
|
802 |
+
"test_split": "test",
|
803 |
+
"fewshot_split": "dev",
|
804 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
805 |
+
"doc_to_target": "answer",
|
806 |
+
"doc_to_choice": [
|
807 |
+
"A",
|
808 |
+
"B",
|
809 |
+
"C",
|
810 |
+
"D"
|
811 |
+
],
|
812 |
+
"description": "The following are multiple choice questions (with answers) about college physics.\n\n",
|
813 |
+
"target_delimiter": " ",
|
814 |
+
"fewshot_delimiter": "\n\n",
|
815 |
+
"fewshot_config": {
|
816 |
+
"sampler": "first_n"
|
817 |
+
},
|
818 |
+
"num_fewshot": 5,
|
819 |
+
"metric_list": [
|
820 |
+
{
|
821 |
+
"metric": "acc",
|
822 |
+
"aggregation": "mean",
|
823 |
+
"higher_is_better": true
|
824 |
+
}
|
825 |
+
],
|
826 |
+
"output_type": "multiple_choice",
|
827 |
+
"repeats": 1,
|
828 |
+
"should_decontaminate": false,
|
829 |
+
"metadata": {
|
830 |
+
"version": 0.0
|
831 |
+
}
|
832 |
+
},
|
833 |
+
"mmlu_computer_security": {
|
834 |
+
"task": "mmlu_computer_security",
|
835 |
+
"task_alias": "computer_security",
|
836 |
+
"group": "mmlu_stem",
|
837 |
+
"group_alias": "stem",
|
838 |
+
"dataset_path": "hails/mmlu_no_train",
|
839 |
+
"dataset_name": "computer_security",
|
840 |
+
"test_split": "test",
|
841 |
+
"fewshot_split": "dev",
|
842 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
843 |
+
"doc_to_target": "answer",
|
844 |
+
"doc_to_choice": [
|
845 |
+
"A",
|
846 |
+
"B",
|
847 |
+
"C",
|
848 |
+
"D"
|
849 |
+
],
|
850 |
+
"description": "The following are multiple choice questions (with answers) about computer security.\n\n",
|
851 |
+
"target_delimiter": " ",
|
852 |
+
"fewshot_delimiter": "\n\n",
|
853 |
+
"fewshot_config": {
|
854 |
+
"sampler": "first_n"
|
855 |
+
},
|
856 |
+
"num_fewshot": 5,
|
857 |
+
"metric_list": [
|
858 |
+
{
|
859 |
+
"metric": "acc",
|
860 |
+
"aggregation": "mean",
|
861 |
+
"higher_is_better": true
|
862 |
+
}
|
863 |
+
],
|
864 |
+
"output_type": "multiple_choice",
|
865 |
+
"repeats": 1,
|
866 |
+
"should_decontaminate": false,
|
867 |
+
"metadata": {
|
868 |
+
"version": 0.0
|
869 |
+
}
|
870 |
+
},
|
871 |
+
"mmlu_conceptual_physics": {
|
872 |
+
"task": "mmlu_conceptual_physics",
|
873 |
+
"task_alias": "conceptual_physics",
|
874 |
+
"group": "mmlu_stem",
|
875 |
+
"group_alias": "stem",
|
876 |
+
"dataset_path": "hails/mmlu_no_train",
|
877 |
+
"dataset_name": "conceptual_physics",
|
878 |
+
"test_split": "test",
|
879 |
+
"fewshot_split": "dev",
|
880 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
881 |
+
"doc_to_target": "answer",
|
882 |
+
"doc_to_choice": [
|
883 |
+
"A",
|
884 |
+
"B",
|
885 |
+
"C",
|
886 |
+
"D"
|
887 |
+
],
|
888 |
+
"description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
|
889 |
+
"target_delimiter": " ",
|
890 |
+
"fewshot_delimiter": "\n\n",
|
891 |
+
"fewshot_config": {
|
892 |
+
"sampler": "first_n"
|
893 |
+
},
|
894 |
+
"num_fewshot": 5,
|
895 |
+
"metric_list": [
|
896 |
+
{
|
897 |
+
"metric": "acc",
|
898 |
+
"aggregation": "mean",
|
899 |
+
"higher_is_better": true
|
900 |
+
}
|
901 |
+
],
|
902 |
+
"output_type": "multiple_choice",
|
903 |
+
"repeats": 1,
|
904 |
+
"should_decontaminate": false,
|
905 |
+
"metadata": {
|
906 |
+
"version": 0.0
|
907 |
+
}
|
908 |
+
},
|
909 |
+
"mmlu_econometrics": {
|
910 |
+
"task": "mmlu_econometrics",
|
911 |
+
"task_alias": "econometrics",
|
912 |
+
"group": "mmlu_social_sciences",
|
913 |
+
"group_alias": "social_sciences",
|
914 |
+
"dataset_path": "hails/mmlu_no_train",
|
915 |
+
"dataset_name": "econometrics",
|
916 |
+
"test_split": "test",
|
917 |
+
"fewshot_split": "dev",
|
918 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
919 |
+
"doc_to_target": "answer",
|
920 |
+
"doc_to_choice": [
|
921 |
+
"A",
|
922 |
+
"B",
|
923 |
+
"C",
|
924 |
+
"D"
|
925 |
+
],
|
926 |
+
"description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
|
927 |
+
"target_delimiter": " ",
|
928 |
+
"fewshot_delimiter": "\n\n",
|
929 |
+
"fewshot_config": {
|
930 |
+
"sampler": "first_n"
|
931 |
+
},
|
932 |
+
"num_fewshot": 5,
|
933 |
+
"metric_list": [
|
934 |
+
{
|
935 |
+
"metric": "acc",
|
936 |
+
"aggregation": "mean",
|
937 |
+
"higher_is_better": true
|
938 |
+
}
|
939 |
+
],
|
940 |
+
"output_type": "multiple_choice",
|
941 |
+
"repeats": 1,
|
942 |
+
"should_decontaminate": false,
|
943 |
+
"metadata": {
|
944 |
+
"version": 0.0
|
945 |
+
}
|
946 |
+
},
|
947 |
+
"mmlu_electrical_engineering": {
|
948 |
+
"task": "mmlu_electrical_engineering",
|
949 |
+
"task_alias": "electrical_engineering",
|
950 |
+
"group": "mmlu_stem",
|
951 |
+
"group_alias": "stem",
|
952 |
+
"dataset_path": "hails/mmlu_no_train",
|
953 |
+
"dataset_name": "electrical_engineering",
|
954 |
+
"test_split": "test",
|
955 |
+
"fewshot_split": "dev",
|
956 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
957 |
+
"doc_to_target": "answer",
|
958 |
+
"doc_to_choice": [
|
959 |
+
"A",
|
960 |
+
"B",
|
961 |
+
"C",
|
962 |
+
"D"
|
963 |
+
],
|
964 |
+
"description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
|
965 |
+
"target_delimiter": " ",
|
966 |
+
"fewshot_delimiter": "\n\n",
|
967 |
+
"fewshot_config": {
|
968 |
+
"sampler": "first_n"
|
969 |
+
},
|
970 |
+
"num_fewshot": 5,
|
971 |
+
"metric_list": [
|
972 |
+
{
|
973 |
+
"metric": "acc",
|
974 |
+
"aggregation": "mean",
|
975 |
+
"higher_is_better": true
|
976 |
+
}
|
977 |
+
],
|
978 |
+
"output_type": "multiple_choice",
|
979 |
+
"repeats": 1,
|
980 |
+
"should_decontaminate": false,
|
981 |
+
"metadata": {
|
982 |
+
"version": 0.0
|
983 |
+
}
|
984 |
+
},
|
985 |
+
"mmlu_elementary_mathematics": {
|
986 |
+
"task": "mmlu_elementary_mathematics",
|
987 |
+
"task_alias": "elementary_mathematics",
|
988 |
+
"group": "mmlu_stem",
|
989 |
+
"group_alias": "stem",
|
990 |
+
"dataset_path": "hails/mmlu_no_train",
|
991 |
+
"dataset_name": "elementary_mathematics",
|
992 |
+
"test_split": "test",
|
993 |
+
"fewshot_split": "dev",
|
994 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
995 |
+
"doc_to_target": "answer",
|
996 |
+
"doc_to_choice": [
|
997 |
+
"A",
|
998 |
+
"B",
|
999 |
+
"C",
|
1000 |
+
"D"
|
1001 |
+
],
|
1002 |
+
"description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
|
1003 |
+
"target_delimiter": " ",
|
1004 |
+
"fewshot_delimiter": "\n\n",
|
1005 |
+
"fewshot_config": {
|
1006 |
+
"sampler": "first_n"
|
1007 |
+
},
|
1008 |
+
"num_fewshot": 5,
|
1009 |
+
"metric_list": [
|
1010 |
+
{
|
1011 |
+
"metric": "acc",
|
1012 |
+
"aggregation": "mean",
|
1013 |
+
"higher_is_better": true
|
1014 |
+
}
|
1015 |
+
],
|
1016 |
+
"output_type": "multiple_choice",
|
1017 |
+
"repeats": 1,
|
1018 |
+
"should_decontaminate": false,
|
1019 |
+
"metadata": {
|
1020 |
+
"version": 0.0
|
1021 |
+
}
|
1022 |
+
},
|
1023 |
+
"mmlu_formal_logic": {
|
1024 |
+
"task": "mmlu_formal_logic",
|
1025 |
+
"task_alias": "formal_logic",
|
1026 |
+
"group": "mmlu_humanities",
|
1027 |
+
"group_alias": "humanities",
|
1028 |
+
"dataset_path": "hails/mmlu_no_train",
|
1029 |
+
"dataset_name": "formal_logic",
|
1030 |
+
"test_split": "test",
|
1031 |
+
"fewshot_split": "dev",
|
1032 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1033 |
+
"doc_to_target": "answer",
|
1034 |
+
"doc_to_choice": [
|
1035 |
+
"A",
|
1036 |
+
"B",
|
1037 |
+
"C",
|
1038 |
+
"D"
|
1039 |
+
],
|
1040 |
+
"description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
|
1041 |
+
"target_delimiter": " ",
|
1042 |
+
"fewshot_delimiter": "\n\n",
|
1043 |
+
"fewshot_config": {
|
1044 |
+
"sampler": "first_n"
|
1045 |
+
},
|
1046 |
+
"num_fewshot": 5,
|
1047 |
+
"metric_list": [
|
1048 |
+
{
|
1049 |
+
"metric": "acc",
|
1050 |
+
"aggregation": "mean",
|
1051 |
+
"higher_is_better": true
|
1052 |
+
}
|
1053 |
+
],
|
1054 |
+
"output_type": "multiple_choice",
|
1055 |
+
"repeats": 1,
|
1056 |
+
"should_decontaminate": false,
|
1057 |
+
"metadata": {
|
1058 |
+
"version": 0.0
|
1059 |
+
}
|
1060 |
+
},
|
1061 |
+
"mmlu_global_facts": {
|
1062 |
+
"task": "mmlu_global_facts",
|
1063 |
+
"task_alias": "global_facts",
|
1064 |
+
"group": "mmlu_other",
|
1065 |
+
"group_alias": "other",
|
1066 |
+
"dataset_path": "hails/mmlu_no_train",
|
1067 |
+
"dataset_name": "global_facts",
|
1068 |
+
"test_split": "test",
|
1069 |
+
"fewshot_split": "dev",
|
1070 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1071 |
+
"doc_to_target": "answer",
|
1072 |
+
"doc_to_choice": [
|
1073 |
+
"A",
|
1074 |
+
"B",
|
1075 |
+
"C",
|
1076 |
+
"D"
|
1077 |
+
],
|
1078 |
+
"description": "The following are multiple choice questions (with answers) about global facts.\n\n",
|
1079 |
+
"target_delimiter": " ",
|
1080 |
+
"fewshot_delimiter": "\n\n",
|
1081 |
+
"fewshot_config": {
|
1082 |
+
"sampler": "first_n"
|
1083 |
+
},
|
1084 |
+
"num_fewshot": 5,
|
1085 |
+
"metric_list": [
|
1086 |
+
{
|
1087 |
+
"metric": "acc",
|
1088 |
+
"aggregation": "mean",
|
1089 |
+
"higher_is_better": true
|
1090 |
+
}
|
1091 |
+
],
|
1092 |
+
"output_type": "multiple_choice",
|
1093 |
+
"repeats": 1,
|
1094 |
+
"should_decontaminate": false,
|
1095 |
+
"metadata": {
|
1096 |
+
"version": 0.0
|
1097 |
+
}
|
1098 |
+
},
|
1099 |
+
"mmlu_high_school_biology": {
|
1100 |
+
"task": "mmlu_high_school_biology",
|
1101 |
+
"task_alias": "high_school_biology",
|
1102 |
+
"group": "mmlu_stem",
|
1103 |
+
"group_alias": "stem",
|
1104 |
+
"dataset_path": "hails/mmlu_no_train",
|
1105 |
+
"dataset_name": "high_school_biology",
|
1106 |
+
"test_split": "test",
|
1107 |
+
"fewshot_split": "dev",
|
1108 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1109 |
+
"doc_to_target": "answer",
|
1110 |
+
"doc_to_choice": [
|
1111 |
+
"A",
|
1112 |
+
"B",
|
1113 |
+
"C",
|
1114 |
+
"D"
|
1115 |
+
],
|
1116 |
+
"description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
|
1117 |
+
"target_delimiter": " ",
|
1118 |
+
"fewshot_delimiter": "\n\n",
|
1119 |
+
"fewshot_config": {
|
1120 |
+
"sampler": "first_n"
|
1121 |
+
},
|
1122 |
+
"num_fewshot": 5,
|
1123 |
+
"metric_list": [
|
1124 |
+
{
|
1125 |
+
"metric": "acc",
|
1126 |
+
"aggregation": "mean",
|
1127 |
+
"higher_is_better": true
|
1128 |
+
}
|
1129 |
+
],
|
1130 |
+
"output_type": "multiple_choice",
|
1131 |
+
"repeats": 1,
|
1132 |
+
"should_decontaminate": false,
|
1133 |
+
"metadata": {
|
1134 |
+
"version": 0.0
|
1135 |
+
}
|
1136 |
+
},
|
1137 |
+
"mmlu_high_school_chemistry": {
|
1138 |
+
"task": "mmlu_high_school_chemistry",
|
1139 |
+
"task_alias": "high_school_chemistry",
|
1140 |
+
"group": "mmlu_stem",
|
1141 |
+
"group_alias": "stem",
|
1142 |
+
"dataset_path": "hails/mmlu_no_train",
|
1143 |
+
"dataset_name": "high_school_chemistry",
|
1144 |
+
"test_split": "test",
|
1145 |
+
"fewshot_split": "dev",
|
1146 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1147 |
+
"doc_to_target": "answer",
|
1148 |
+
"doc_to_choice": [
|
1149 |
+
"A",
|
1150 |
+
"B",
|
1151 |
+
"C",
|
1152 |
+
"D"
|
1153 |
+
],
|
1154 |
+
"description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
|
1155 |
+
"target_delimiter": " ",
|
1156 |
+
"fewshot_delimiter": "\n\n",
|
1157 |
+
"fewshot_config": {
|
1158 |
+
"sampler": "first_n"
|
1159 |
+
},
|
1160 |
+
"num_fewshot": 5,
|
1161 |
+
"metric_list": [
|
1162 |
+
{
|
1163 |
+
"metric": "acc",
|
1164 |
+
"aggregation": "mean",
|
1165 |
+
"higher_is_better": true
|
1166 |
+
}
|
1167 |
+
],
|
1168 |
+
"output_type": "multiple_choice",
|
1169 |
+
"repeats": 1,
|
1170 |
+
"should_decontaminate": false,
|
1171 |
+
"metadata": {
|
1172 |
+
"version": 0.0
|
1173 |
+
}
|
1174 |
+
},
|
1175 |
+
"mmlu_high_school_computer_science": {
|
1176 |
+
"task": "mmlu_high_school_computer_science",
|
1177 |
+
"task_alias": "high_school_computer_science",
|
1178 |
+
"group": "mmlu_stem",
|
1179 |
+
"group_alias": "stem",
|
1180 |
+
"dataset_path": "hails/mmlu_no_train",
|
1181 |
+
"dataset_name": "high_school_computer_science",
|
1182 |
+
"test_split": "test",
|
1183 |
+
"fewshot_split": "dev",
|
1184 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1185 |
+
"doc_to_target": "answer",
|
1186 |
+
"doc_to_choice": [
|
1187 |
+
"A",
|
1188 |
+
"B",
|
1189 |
+
"C",
|
1190 |
+
"D"
|
1191 |
+
],
|
1192 |
+
"description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
|
1193 |
+
"target_delimiter": " ",
|
1194 |
+
"fewshot_delimiter": "\n\n",
|
1195 |
+
"fewshot_config": {
|
1196 |
+
"sampler": "first_n"
|
1197 |
+
},
|
1198 |
+
"num_fewshot": 5,
|
1199 |
+
"metric_list": [
|
1200 |
+
{
|
1201 |
+
"metric": "acc",
|
1202 |
+
"aggregation": "mean",
|
1203 |
+
"higher_is_better": true
|
1204 |
+
}
|
1205 |
+
],
|
1206 |
+
"output_type": "multiple_choice",
|
1207 |
+
"repeats": 1,
|
1208 |
+
"should_decontaminate": false,
|
1209 |
+
"metadata": {
|
1210 |
+
"version": 0.0
|
1211 |
+
}
|
1212 |
+
},
|
1213 |
+
"mmlu_high_school_european_history": {
|
1214 |
+
"task": "mmlu_high_school_european_history",
|
1215 |
+
"task_alias": "high_school_european_history",
|
1216 |
+
"group": "mmlu_humanities",
|
1217 |
+
"group_alias": "humanities",
|
1218 |
+
"dataset_path": "hails/mmlu_no_train",
|
1219 |
+
"dataset_name": "high_school_european_history",
|
1220 |
+
"test_split": "test",
|
1221 |
+
"fewshot_split": "dev",
|
1222 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1223 |
+
"doc_to_target": "answer",
|
1224 |
+
"doc_to_choice": [
|
1225 |
+
"A",
|
1226 |
+
"B",
|
1227 |
+
"C",
|
1228 |
+
"D"
|
1229 |
+
],
|
1230 |
+
"description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
|
1231 |
+
"target_delimiter": " ",
|
1232 |
+
"fewshot_delimiter": "\n\n",
|
1233 |
+
"fewshot_config": {
|
1234 |
+
"sampler": "first_n"
|
1235 |
+
},
|
1236 |
+
"num_fewshot": 5,
|
1237 |
+
"metric_list": [
|
1238 |
+
{
|
1239 |
+
"metric": "acc",
|
1240 |
+
"aggregation": "mean",
|
1241 |
+
"higher_is_better": true
|
1242 |
+
}
|
1243 |
+
],
|
1244 |
+
"output_type": "multiple_choice",
|
1245 |
+
"repeats": 1,
|
1246 |
+
"should_decontaminate": false,
|
1247 |
+
"metadata": {
|
1248 |
+
"version": 0.0
|
1249 |
+
}
|
1250 |
+
},
|
1251 |
+
"mmlu_high_school_geography": {
|
1252 |
+
"task": "mmlu_high_school_geography",
|
1253 |
+
"task_alias": "high_school_geography",
|
1254 |
+
"group": "mmlu_social_sciences",
|
1255 |
+
"group_alias": "social_sciences",
|
1256 |
+
"dataset_path": "hails/mmlu_no_train",
|
1257 |
+
"dataset_name": "high_school_geography",
|
1258 |
+
"test_split": "test",
|
1259 |
+
"fewshot_split": "dev",
|
1260 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1261 |
+
"doc_to_target": "answer",
|
1262 |
+
"doc_to_choice": [
|
1263 |
+
"A",
|
1264 |
+
"B",
|
1265 |
+
"C",
|
1266 |
+
"D"
|
1267 |
+
],
|
1268 |
+
"description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
|
1269 |
+
"target_delimiter": " ",
|
1270 |
+
"fewshot_delimiter": "\n\n",
|
1271 |
+
"fewshot_config": {
|
1272 |
+
"sampler": "first_n"
|
1273 |
+
},
|
1274 |
+
"num_fewshot": 5,
|
1275 |
+
"metric_list": [
|
1276 |
+
{
|
1277 |
+
"metric": "acc",
|
1278 |
+
"aggregation": "mean",
|
1279 |
+
"higher_is_better": true
|
1280 |
+
}
|
1281 |
+
],
|
1282 |
+
"output_type": "multiple_choice",
|
1283 |
+
"repeats": 1,
|
1284 |
+
"should_decontaminate": false,
|
1285 |
+
"metadata": {
|
1286 |
+
"version": 0.0
|
1287 |
+
}
|
1288 |
+
},
|
1289 |
+
"mmlu_high_school_government_and_politics": {
|
1290 |
+
"task": "mmlu_high_school_government_and_politics",
|
1291 |
+
"task_alias": "high_school_government_and_politics",
|
1292 |
+
"group": "mmlu_social_sciences",
|
1293 |
+
"group_alias": "social_sciences",
|
1294 |
+
"dataset_path": "hails/mmlu_no_train",
|
1295 |
+
"dataset_name": "high_school_government_and_politics",
|
1296 |
+
"test_split": "test",
|
1297 |
+
"fewshot_split": "dev",
|
1298 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1299 |
+
"doc_to_target": "answer",
|
1300 |
+
"doc_to_choice": [
|
1301 |
+
"A",
|
1302 |
+
"B",
|
1303 |
+
"C",
|
1304 |
+
"D"
|
1305 |
+
],
|
1306 |
+
"description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
|
1307 |
+
"target_delimiter": " ",
|
1308 |
+
"fewshot_delimiter": "\n\n",
|
1309 |
+
"fewshot_config": {
|
1310 |
+
"sampler": "first_n"
|
1311 |
+
},
|
1312 |
+
"num_fewshot": 5,
|
1313 |
+
"metric_list": [
|
1314 |
+
{
|
1315 |
+
"metric": "acc",
|
1316 |
+
"aggregation": "mean",
|
1317 |
+
"higher_is_better": true
|
1318 |
+
}
|
1319 |
+
],
|
1320 |
+
"output_type": "multiple_choice",
|
1321 |
+
"repeats": 1,
|
1322 |
+
"should_decontaminate": false,
|
1323 |
+
"metadata": {
|
1324 |
+
"version": 0.0
|
1325 |
+
}
|
1326 |
+
},
|
1327 |
+
"mmlu_high_school_macroeconomics": {
|
1328 |
+
"task": "mmlu_high_school_macroeconomics",
|
1329 |
+
"task_alias": "high_school_macroeconomics",
|
1330 |
+
"group": "mmlu_social_sciences",
|
1331 |
+
"group_alias": "social_sciences",
|
1332 |
+
"dataset_path": "hails/mmlu_no_train",
|
1333 |
+
"dataset_name": "high_school_macroeconomics",
|
1334 |
+
"test_split": "test",
|
1335 |
+
"fewshot_split": "dev",
|
1336 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1337 |
+
"doc_to_target": "answer",
|
1338 |
+
"doc_to_choice": [
|
1339 |
+
"A",
|
1340 |
+
"B",
|
1341 |
+
"C",
|
1342 |
+
"D"
|
1343 |
+
],
|
1344 |
+
"description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
|
1345 |
+
"target_delimiter": " ",
|
1346 |
+
"fewshot_delimiter": "\n\n",
|
1347 |
+
"fewshot_config": {
|
1348 |
+
"sampler": "first_n"
|
1349 |
+
},
|
1350 |
+
"num_fewshot": 5,
|
1351 |
+
"metric_list": [
|
1352 |
+
{
|
1353 |
+
"metric": "acc",
|
1354 |
+
"aggregation": "mean",
|
1355 |
+
"higher_is_better": true
|
1356 |
+
}
|
1357 |
+
],
|
1358 |
+
"output_type": "multiple_choice",
|
1359 |
+
"repeats": 1,
|
1360 |
+
"should_decontaminate": false,
|
1361 |
+
"metadata": {
|
1362 |
+
"version": 0.0
|
1363 |
+
}
|
1364 |
+
},
|
1365 |
+
"mmlu_high_school_mathematics": {
|
1366 |
+
"task": "mmlu_high_school_mathematics",
|
1367 |
+
"task_alias": "high_school_mathematics",
|
1368 |
+
"group": "mmlu_stem",
|
1369 |
+
"group_alias": "stem",
|
1370 |
+
"dataset_path": "hails/mmlu_no_train",
|
1371 |
+
"dataset_name": "high_school_mathematics",
|
1372 |
+
"test_split": "test",
|
1373 |
+
"fewshot_split": "dev",
|
1374 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1375 |
+
"doc_to_target": "answer",
|
1376 |
+
"doc_to_choice": [
|
1377 |
+
"A",
|
1378 |
+
"B",
|
1379 |
+
"C",
|
1380 |
+
"D"
|
1381 |
+
],
|
1382 |
+
"description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
|
1383 |
+
"target_delimiter": " ",
|
1384 |
+
"fewshot_delimiter": "\n\n",
|
1385 |
+
"fewshot_config": {
|
1386 |
+
"sampler": "first_n"
|
1387 |
+
},
|
1388 |
+
"num_fewshot": 5,
|
1389 |
+
"metric_list": [
|
1390 |
+
{
|
1391 |
+
"metric": "acc",
|
1392 |
+
"aggregation": "mean",
|
1393 |
+
"higher_is_better": true
|
1394 |
+
}
|
1395 |
+
],
|
1396 |
+
"output_type": "multiple_choice",
|
1397 |
+
"repeats": 1,
|
1398 |
+
"should_decontaminate": false,
|
1399 |
+
"metadata": {
|
1400 |
+
"version": 0.0
|
1401 |
+
}
|
1402 |
+
},
|
1403 |
+
"mmlu_high_school_microeconomics": {
|
1404 |
+
"task": "mmlu_high_school_microeconomics",
|
1405 |
+
"task_alias": "high_school_microeconomics",
|
1406 |
+
"group": "mmlu_social_sciences",
|
1407 |
+
"group_alias": "social_sciences",
|
1408 |
+
"dataset_path": "hails/mmlu_no_train",
|
1409 |
+
"dataset_name": "high_school_microeconomics",
|
1410 |
+
"test_split": "test",
|
1411 |
+
"fewshot_split": "dev",
|
1412 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1413 |
+
"doc_to_target": "answer",
|
1414 |
+
"doc_to_choice": [
|
1415 |
+
"A",
|
1416 |
+
"B",
|
1417 |
+
"C",
|
1418 |
+
"D"
|
1419 |
+
],
|
1420 |
+
"description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
|
1421 |
+
"target_delimiter": " ",
|
1422 |
+
"fewshot_delimiter": "\n\n",
|
1423 |
+
"fewshot_config": {
|
1424 |
+
"sampler": "first_n"
|
1425 |
+
},
|
1426 |
+
"num_fewshot": 5,
|
1427 |
+
"metric_list": [
|
1428 |
+
{
|
1429 |
+
"metric": "acc",
|
1430 |
+
"aggregation": "mean",
|
1431 |
+
"higher_is_better": true
|
1432 |
+
}
|
1433 |
+
],
|
1434 |
+
"output_type": "multiple_choice",
|
1435 |
+
"repeats": 1,
|
1436 |
+
"should_decontaminate": false,
|
1437 |
+
"metadata": {
|
1438 |
+
"version": 0.0
|
1439 |
+
}
|
1440 |
+
},
|
1441 |
+
"mmlu_high_school_physics": {
|
1442 |
+
"task": "mmlu_high_school_physics",
|
1443 |
+
"task_alias": "high_school_physics",
|
1444 |
+
"group": "mmlu_stem",
|
1445 |
+
"group_alias": "stem",
|
1446 |
+
"dataset_path": "hails/mmlu_no_train",
|
1447 |
+
"dataset_name": "high_school_physics",
|
1448 |
+
"test_split": "test",
|
1449 |
+
"fewshot_split": "dev",
|
1450 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1451 |
+
"doc_to_target": "answer",
|
1452 |
+
"doc_to_choice": [
|
1453 |
+
"A",
|
1454 |
+
"B",
|
1455 |
+
"C",
|
1456 |
+
"D"
|
1457 |
+
],
|
1458 |
+
"description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
|
1459 |
+
"target_delimiter": " ",
|
1460 |
+
"fewshot_delimiter": "\n\n",
|
1461 |
+
"fewshot_config": {
|
1462 |
+
"sampler": "first_n"
|
1463 |
+
},
|
1464 |
+
"num_fewshot": 5,
|
1465 |
+
"metric_list": [
|
1466 |
+
{
|
1467 |
+
"metric": "acc",
|
1468 |
+
"aggregation": "mean",
|
1469 |
+
"higher_is_better": true
|
1470 |
+
}
|
1471 |
+
],
|
1472 |
+
"output_type": "multiple_choice",
|
1473 |
+
"repeats": 1,
|
1474 |
+
"should_decontaminate": false,
|
1475 |
+
"metadata": {
|
1476 |
+
"version": 0.0
|
1477 |
+
}
|
1478 |
+
},
|
1479 |
+
"mmlu_high_school_psychology": {
|
1480 |
+
"task": "mmlu_high_school_psychology",
|
1481 |
+
"task_alias": "high_school_psychology",
|
1482 |
+
"group": "mmlu_social_sciences",
|
1483 |
+
"group_alias": "social_sciences",
|
1484 |
+
"dataset_path": "hails/mmlu_no_train",
|
1485 |
+
"dataset_name": "high_school_psychology",
|
1486 |
+
"test_split": "test",
|
1487 |
+
"fewshot_split": "dev",
|
1488 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1489 |
+
"doc_to_target": "answer",
|
1490 |
+
"doc_to_choice": [
|
1491 |
+
"A",
|
1492 |
+
"B",
|
1493 |
+
"C",
|
1494 |
+
"D"
|
1495 |
+
],
|
1496 |
+
"description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
|
1497 |
+
"target_delimiter": " ",
|
1498 |
+
"fewshot_delimiter": "\n\n",
|
1499 |
+
"fewshot_config": {
|
1500 |
+
"sampler": "first_n"
|
1501 |
+
},
|
1502 |
+
"num_fewshot": 5,
|
1503 |
+
"metric_list": [
|
1504 |
+
{
|
1505 |
+
"metric": "acc",
|
1506 |
+
"aggregation": "mean",
|
1507 |
+
"higher_is_better": true
|
1508 |
+
}
|
1509 |
+
],
|
1510 |
+
"output_type": "multiple_choice",
|
1511 |
+
"repeats": 1,
|
1512 |
+
"should_decontaminate": false,
|
1513 |
+
"metadata": {
|
1514 |
+
"version": 0.0
|
1515 |
+
}
|
1516 |
+
},
|
1517 |
+
"mmlu_high_school_statistics": {
|
1518 |
+
"task": "mmlu_high_school_statistics",
|
1519 |
+
"task_alias": "high_school_statistics",
|
1520 |
+
"group": "mmlu_stem",
|
1521 |
+
"group_alias": "stem",
|
1522 |
+
"dataset_path": "hails/mmlu_no_train",
|
1523 |
+
"dataset_name": "high_school_statistics",
|
1524 |
+
"test_split": "test",
|
1525 |
+
"fewshot_split": "dev",
|
1526 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1527 |
+
"doc_to_target": "answer",
|
1528 |
+
"doc_to_choice": [
|
1529 |
+
"A",
|
1530 |
+
"B",
|
1531 |
+
"C",
|
1532 |
+
"D"
|
1533 |
+
],
|
1534 |
+
"description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
|
1535 |
+
"target_delimiter": " ",
|
1536 |
+
"fewshot_delimiter": "\n\n",
|
1537 |
+
"fewshot_config": {
|
1538 |
+
"sampler": "first_n"
|
1539 |
+
},
|
1540 |
+
"num_fewshot": 5,
|
1541 |
+
"metric_list": [
|
1542 |
+
{
|
1543 |
+
"metric": "acc",
|
1544 |
+
"aggregation": "mean",
|
1545 |
+
"higher_is_better": true
|
1546 |
+
}
|
1547 |
+
],
|
1548 |
+
"output_type": "multiple_choice",
|
1549 |
+
"repeats": 1,
|
1550 |
+
"should_decontaminate": false,
|
1551 |
+
"metadata": {
|
1552 |
+
"version": 0.0
|
1553 |
+
}
|
1554 |
+
},
|
1555 |
+
"mmlu_high_school_us_history": {
|
1556 |
+
"task": "mmlu_high_school_us_history",
|
1557 |
+
"task_alias": "high_school_us_history",
|
1558 |
+
"group": "mmlu_humanities",
|
1559 |
+
"group_alias": "humanities",
|
1560 |
+
"dataset_path": "hails/mmlu_no_train",
|
1561 |
+
"dataset_name": "high_school_us_history",
|
1562 |
+
"test_split": "test",
|
1563 |
+
"fewshot_split": "dev",
|
1564 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1565 |
+
"doc_to_target": "answer",
|
1566 |
+
"doc_to_choice": [
|
1567 |
+
"A",
|
1568 |
+
"B",
|
1569 |
+
"C",
|
1570 |
+
"D"
|
1571 |
+
],
|
1572 |
+
"description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
|
1573 |
+
"target_delimiter": " ",
|
1574 |
+
"fewshot_delimiter": "\n\n",
|
1575 |
+
"fewshot_config": {
|
1576 |
+
"sampler": "first_n"
|
1577 |
+
},
|
1578 |
+
"num_fewshot": 5,
|
1579 |
+
"metric_list": [
|
1580 |
+
{
|
1581 |
+
"metric": "acc",
|
1582 |
+
"aggregation": "mean",
|
1583 |
+
"higher_is_better": true
|
1584 |
+
}
|
1585 |
+
],
|
1586 |
+
"output_type": "multiple_choice",
|
1587 |
+
"repeats": 1,
|
1588 |
+
"should_decontaminate": false,
|
1589 |
+
"metadata": {
|
1590 |
+
"version": 0.0
|
1591 |
+
}
|
1592 |
+
},
|
1593 |
+
"mmlu_high_school_world_history": {
|
1594 |
+
"task": "mmlu_high_school_world_history",
|
1595 |
+
"task_alias": "high_school_world_history",
|
1596 |
+
"group": "mmlu_humanities",
|
1597 |
+
"group_alias": "humanities",
|
1598 |
+
"dataset_path": "hails/mmlu_no_train",
|
1599 |
+
"dataset_name": "high_school_world_history",
|
1600 |
+
"test_split": "test",
|
1601 |
+
"fewshot_split": "dev",
|
1602 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1603 |
+
"doc_to_target": "answer",
|
1604 |
+
"doc_to_choice": [
|
1605 |
+
"A",
|
1606 |
+
"B",
|
1607 |
+
"C",
|
1608 |
+
"D"
|
1609 |
+
],
|
1610 |
+
"description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
|
1611 |
+
"target_delimiter": " ",
|
1612 |
+
"fewshot_delimiter": "\n\n",
|
1613 |
+
"fewshot_config": {
|
1614 |
+
"sampler": "first_n"
|
1615 |
+
},
|
1616 |
+
"num_fewshot": 5,
|
1617 |
+
"metric_list": [
|
1618 |
+
{
|
1619 |
+
"metric": "acc",
|
1620 |
+
"aggregation": "mean",
|
1621 |
+
"higher_is_better": true
|
1622 |
+
}
|
1623 |
+
],
|
1624 |
+
"output_type": "multiple_choice",
|
1625 |
+
"repeats": 1,
|
1626 |
+
"should_decontaminate": false,
|
1627 |
+
"metadata": {
|
1628 |
+
"version": 0.0
|
1629 |
+
}
|
1630 |
+
},
|
1631 |
+
"mmlu_human_aging": {
|
1632 |
+
"task": "mmlu_human_aging",
|
1633 |
+
"task_alias": "human_aging",
|
1634 |
+
"group": "mmlu_other",
|
1635 |
+
"group_alias": "other",
|
1636 |
+
"dataset_path": "hails/mmlu_no_train",
|
1637 |
+
"dataset_name": "human_aging",
|
1638 |
+
"test_split": "test",
|
1639 |
+
"fewshot_split": "dev",
|
1640 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1641 |
+
"doc_to_target": "answer",
|
1642 |
+
"doc_to_choice": [
|
1643 |
+
"A",
|
1644 |
+
"B",
|
1645 |
+
"C",
|
1646 |
+
"D"
|
1647 |
+
],
|
1648 |
+
"description": "The following are multiple choice questions (with answers) about human aging.\n\n",
|
1649 |
+
"target_delimiter": " ",
|
1650 |
+
"fewshot_delimiter": "\n\n",
|
1651 |
+
"fewshot_config": {
|
1652 |
+
"sampler": "first_n"
|
1653 |
+
},
|
1654 |
+
"num_fewshot": 5,
|
1655 |
+
"metric_list": [
|
1656 |
+
{
|
1657 |
+
"metric": "acc",
|
1658 |
+
"aggregation": "mean",
|
1659 |
+
"higher_is_better": true
|
1660 |
+
}
|
1661 |
+
],
|
1662 |
+
"output_type": "multiple_choice",
|
1663 |
+
"repeats": 1,
|
1664 |
+
"should_decontaminate": false,
|
1665 |
+
"metadata": {
|
1666 |
+
"version": 0.0
|
1667 |
+
}
|
1668 |
+
},
|
1669 |
+
"mmlu_human_sexuality": {
|
1670 |
+
"task": "mmlu_human_sexuality",
|
1671 |
+
"task_alias": "human_sexuality",
|
1672 |
+
"group": "mmlu_social_sciences",
|
1673 |
+
"group_alias": "social_sciences",
|
1674 |
+
"dataset_path": "hails/mmlu_no_train",
|
1675 |
+
"dataset_name": "human_sexuality",
|
1676 |
+
"test_split": "test",
|
1677 |
+
"fewshot_split": "dev",
|
1678 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1679 |
+
"doc_to_target": "answer",
|
1680 |
+
"doc_to_choice": [
|
1681 |
+
"A",
|
1682 |
+
"B",
|
1683 |
+
"C",
|
1684 |
+
"D"
|
1685 |
+
],
|
1686 |
+
"description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
|
1687 |
+
"target_delimiter": " ",
|
1688 |
+
"fewshot_delimiter": "\n\n",
|
1689 |
+
"fewshot_config": {
|
1690 |
+
"sampler": "first_n"
|
1691 |
+
},
|
1692 |
+
"num_fewshot": 5,
|
1693 |
+
"metric_list": [
|
1694 |
+
{
|
1695 |
+
"metric": "acc",
|
1696 |
+
"aggregation": "mean",
|
1697 |
+
"higher_is_better": true
|
1698 |
+
}
|
1699 |
+
],
|
1700 |
+
"output_type": "multiple_choice",
|
1701 |
+
"repeats": 1,
|
1702 |
+
"should_decontaminate": false,
|
1703 |
+
"metadata": {
|
1704 |
+
"version": 0.0
|
1705 |
+
}
|
1706 |
+
},
|
1707 |
+
"mmlu_international_law": {
|
1708 |
+
"task": "mmlu_international_law",
|
1709 |
+
"task_alias": "international_law",
|
1710 |
+
"group": "mmlu_humanities",
|
1711 |
+
"group_alias": "humanities",
|
1712 |
+
"dataset_path": "hails/mmlu_no_train",
|
1713 |
+
"dataset_name": "international_law",
|
1714 |
+
"test_split": "test",
|
1715 |
+
"fewshot_split": "dev",
|
1716 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1717 |
+
"doc_to_target": "answer",
|
1718 |
+
"doc_to_choice": [
|
1719 |
+
"A",
|
1720 |
+
"B",
|
1721 |
+
"C",
|
1722 |
+
"D"
|
1723 |
+
],
|
1724 |
+
"description": "The following are multiple choice questions (with answers) about international law.\n\n",
|
1725 |
+
"target_delimiter": " ",
|
1726 |
+
"fewshot_delimiter": "\n\n",
|
1727 |
+
"fewshot_config": {
|
1728 |
+
"sampler": "first_n"
|
1729 |
+
},
|
1730 |
+
"num_fewshot": 5,
|
1731 |
+
"metric_list": [
|
1732 |
+
{
|
1733 |
+
"metric": "acc",
|
1734 |
+
"aggregation": "mean",
|
1735 |
+
"higher_is_better": true
|
1736 |
+
}
|
1737 |
+
],
|
1738 |
+
"output_type": "multiple_choice",
|
1739 |
+
"repeats": 1,
|
1740 |
+
"should_decontaminate": false,
|
1741 |
+
"metadata": {
|
1742 |
+
"version": 0.0
|
1743 |
+
}
|
1744 |
+
},
|
1745 |
+
"mmlu_jurisprudence": {
|
1746 |
+
"task": "mmlu_jurisprudence",
|
1747 |
+
"task_alias": "jurisprudence",
|
1748 |
+
"group": "mmlu_humanities",
|
1749 |
+
"group_alias": "humanities",
|
1750 |
+
"dataset_path": "hails/mmlu_no_train",
|
1751 |
+
"dataset_name": "jurisprudence",
|
1752 |
+
"test_split": "test",
|
1753 |
+
"fewshot_split": "dev",
|
1754 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1755 |
+
"doc_to_target": "answer",
|
1756 |
+
"doc_to_choice": [
|
1757 |
+
"A",
|
1758 |
+
"B",
|
1759 |
+
"C",
|
1760 |
+
"D"
|
1761 |
+
],
|
1762 |
+
"description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
|
1763 |
+
"target_delimiter": " ",
|
1764 |
+
"fewshot_delimiter": "\n\n",
|
1765 |
+
"fewshot_config": {
|
1766 |
+
"sampler": "first_n"
|
1767 |
+
},
|
1768 |
+
"num_fewshot": 5,
|
1769 |
+
"metric_list": [
|
1770 |
+
{
|
1771 |
+
"metric": "acc",
|
1772 |
+
"aggregation": "mean",
|
1773 |
+
"higher_is_better": true
|
1774 |
+
}
|
1775 |
+
],
|
1776 |
+
"output_type": "multiple_choice",
|
1777 |
+
"repeats": 1,
|
1778 |
+
"should_decontaminate": false,
|
1779 |
+
"metadata": {
|
1780 |
+
"version": 0.0
|
1781 |
+
}
|
1782 |
+
},
|
1783 |
+
"mmlu_logical_fallacies": {
|
1784 |
+
"task": "mmlu_logical_fallacies",
|
1785 |
+
"task_alias": "logical_fallacies",
|
1786 |
+
"group": "mmlu_humanities",
|
1787 |
+
"group_alias": "humanities",
|
1788 |
+
"dataset_path": "hails/mmlu_no_train",
|
1789 |
+
"dataset_name": "logical_fallacies",
|
1790 |
+
"test_split": "test",
|
1791 |
+
"fewshot_split": "dev",
|
1792 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1793 |
+
"doc_to_target": "answer",
|
1794 |
+
"doc_to_choice": [
|
1795 |
+
"A",
|
1796 |
+
"B",
|
1797 |
+
"C",
|
1798 |
+
"D"
|
1799 |
+
],
|
1800 |
+
"description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
|
1801 |
+
"target_delimiter": " ",
|
1802 |
+
"fewshot_delimiter": "\n\n",
|
1803 |
+
"fewshot_config": {
|
1804 |
+
"sampler": "first_n"
|
1805 |
+
},
|
1806 |
+
"num_fewshot": 5,
|
1807 |
+
"metric_list": [
|
1808 |
+
{
|
1809 |
+
"metric": "acc",
|
1810 |
+
"aggregation": "mean",
|
1811 |
+
"higher_is_better": true
|
1812 |
+
}
|
1813 |
+
],
|
1814 |
+
"output_type": "multiple_choice",
|
1815 |
+
"repeats": 1,
|
1816 |
+
"should_decontaminate": false,
|
1817 |
+
"metadata": {
|
1818 |
+
"version": 0.0
|
1819 |
+
}
|
1820 |
+
},
|
1821 |
+
"mmlu_machine_learning": {
|
1822 |
+
"task": "mmlu_machine_learning",
|
1823 |
+
"task_alias": "machine_learning",
|
1824 |
+
"group": "mmlu_stem",
|
1825 |
+
"group_alias": "stem",
|
1826 |
+
"dataset_path": "hails/mmlu_no_train",
|
1827 |
+
"dataset_name": "machine_learning",
|
1828 |
+
"test_split": "test",
|
1829 |
+
"fewshot_split": "dev",
|
1830 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1831 |
+
"doc_to_target": "answer",
|
1832 |
+
"doc_to_choice": [
|
1833 |
+
"A",
|
1834 |
+
"B",
|
1835 |
+
"C",
|
1836 |
+
"D"
|
1837 |
+
],
|
1838 |
+
"description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
|
1839 |
+
"target_delimiter": " ",
|
1840 |
+
"fewshot_delimiter": "\n\n",
|
1841 |
+
"fewshot_config": {
|
1842 |
+
"sampler": "first_n"
|
1843 |
+
},
|
1844 |
+
"num_fewshot": 5,
|
1845 |
+
"metric_list": [
|
1846 |
+
{
|
1847 |
+
"metric": "acc",
|
1848 |
+
"aggregation": "mean",
|
1849 |
+
"higher_is_better": true
|
1850 |
+
}
|
1851 |
+
],
|
1852 |
+
"output_type": "multiple_choice",
|
1853 |
+
"repeats": 1,
|
1854 |
+
"should_decontaminate": false,
|
1855 |
+
"metadata": {
|
1856 |
+
"version": 0.0
|
1857 |
+
}
|
1858 |
+
},
|
1859 |
+
"mmlu_management": {
|
1860 |
+
"task": "mmlu_management",
|
1861 |
+
"task_alias": "management",
|
1862 |
+
"group": "mmlu_other",
|
1863 |
+
"group_alias": "other",
|
1864 |
+
"dataset_path": "hails/mmlu_no_train",
|
1865 |
+
"dataset_name": "management",
|
1866 |
+
"test_split": "test",
|
1867 |
+
"fewshot_split": "dev",
|
1868 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1869 |
+
"doc_to_target": "answer",
|
1870 |
+
"doc_to_choice": [
|
1871 |
+
"A",
|
1872 |
+
"B",
|
1873 |
+
"C",
|
1874 |
+
"D"
|
1875 |
+
],
|
1876 |
+
"description": "The following are multiple choice questions (with answers) about management.\n\n",
|
1877 |
+
"target_delimiter": " ",
|
1878 |
+
"fewshot_delimiter": "\n\n",
|
1879 |
+
"fewshot_config": {
|
1880 |
+
"sampler": "first_n"
|
1881 |
+
},
|
1882 |
+
"num_fewshot": 5,
|
1883 |
+
"metric_list": [
|
1884 |
+
{
|
1885 |
+
"metric": "acc",
|
1886 |
+
"aggregation": "mean",
|
1887 |
+
"higher_is_better": true
|
1888 |
+
}
|
1889 |
+
],
|
1890 |
+
"output_type": "multiple_choice",
|
1891 |
+
"repeats": 1,
|
1892 |
+
"should_decontaminate": false,
|
1893 |
+
"metadata": {
|
1894 |
+
"version": 0.0
|
1895 |
+
}
|
1896 |
+
},
|
1897 |
+
"mmlu_marketing": {
|
1898 |
+
"task": "mmlu_marketing",
|
1899 |
+
"task_alias": "marketing",
|
1900 |
+
"group": "mmlu_other",
|
1901 |
+
"group_alias": "other",
|
1902 |
+
"dataset_path": "hails/mmlu_no_train",
|
1903 |
+
"dataset_name": "marketing",
|
1904 |
+
"test_split": "test",
|
1905 |
+
"fewshot_split": "dev",
|
1906 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1907 |
+
"doc_to_target": "answer",
|
1908 |
+
"doc_to_choice": [
|
1909 |
+
"A",
|
1910 |
+
"B",
|
1911 |
+
"C",
|
1912 |
+
"D"
|
1913 |
+
],
|
1914 |
+
"description": "The following are multiple choice questions (with answers) about marketing.\n\n",
|
1915 |
+
"target_delimiter": " ",
|
1916 |
+
"fewshot_delimiter": "\n\n",
|
1917 |
+
"fewshot_config": {
|
1918 |
+
"sampler": "first_n"
|
1919 |
+
},
|
1920 |
+
"num_fewshot": 5,
|
1921 |
+
"metric_list": [
|
1922 |
+
{
|
1923 |
+
"metric": "acc",
|
1924 |
+
"aggregation": "mean",
|
1925 |
+
"higher_is_better": true
|
1926 |
+
}
|
1927 |
+
],
|
1928 |
+
"output_type": "multiple_choice",
|
1929 |
+
"repeats": 1,
|
1930 |
+
"should_decontaminate": false,
|
1931 |
+
"metadata": {
|
1932 |
+
"version": 0.0
|
1933 |
+
}
|
1934 |
+
},
|
1935 |
+
"mmlu_medical_genetics": {
|
1936 |
+
"task": "mmlu_medical_genetics",
|
1937 |
+
"task_alias": "medical_genetics",
|
1938 |
+
"group": "mmlu_other",
|
1939 |
+
"group_alias": "other",
|
1940 |
+
"dataset_path": "hails/mmlu_no_train",
|
1941 |
+
"dataset_name": "medical_genetics",
|
1942 |
+
"test_split": "test",
|
1943 |
+
"fewshot_split": "dev",
|
1944 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1945 |
+
"doc_to_target": "answer",
|
1946 |
+
"doc_to_choice": [
|
1947 |
+
"A",
|
1948 |
+
"B",
|
1949 |
+
"C",
|
1950 |
+
"D"
|
1951 |
+
],
|
1952 |
+
"description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
|
1953 |
+
"target_delimiter": " ",
|
1954 |
+
"fewshot_delimiter": "\n\n",
|
1955 |
+
"fewshot_config": {
|
1956 |
+
"sampler": "first_n"
|
1957 |
+
},
|
1958 |
+
"num_fewshot": 5,
|
1959 |
+
"metric_list": [
|
1960 |
+
{
|
1961 |
+
"metric": "acc",
|
1962 |
+
"aggregation": "mean",
|
1963 |
+
"higher_is_better": true
|
1964 |
+
}
|
1965 |
+
],
|
1966 |
+
"output_type": "multiple_choice",
|
1967 |
+
"repeats": 1,
|
1968 |
+
"should_decontaminate": false,
|
1969 |
+
"metadata": {
|
1970 |
+
"version": 0.0
|
1971 |
+
}
|
1972 |
+
},
|
1973 |
+
"mmlu_miscellaneous": {
|
1974 |
+
"task": "mmlu_miscellaneous",
|
1975 |
+
"task_alias": "miscellaneous",
|
1976 |
+
"group": "mmlu_other",
|
1977 |
+
"group_alias": "other",
|
1978 |
+
"dataset_path": "hails/mmlu_no_train",
|
1979 |
+
"dataset_name": "miscellaneous",
|
1980 |
+
"test_split": "test",
|
1981 |
+
"fewshot_split": "dev",
|
1982 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
1983 |
+
"doc_to_target": "answer",
|
1984 |
+
"doc_to_choice": [
|
1985 |
+
"A",
|
1986 |
+
"B",
|
1987 |
+
"C",
|
1988 |
+
"D"
|
1989 |
+
],
|
1990 |
+
"description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
|
1991 |
+
"target_delimiter": " ",
|
1992 |
+
"fewshot_delimiter": "\n\n",
|
1993 |
+
"fewshot_config": {
|
1994 |
+
"sampler": "first_n"
|
1995 |
+
},
|
1996 |
+
"num_fewshot": 5,
|
1997 |
+
"metric_list": [
|
1998 |
+
{
|
1999 |
+
"metric": "acc",
|
2000 |
+
"aggregation": "mean",
|
2001 |
+
"higher_is_better": true
|
2002 |
+
}
|
2003 |
+
],
|
2004 |
+
"output_type": "multiple_choice",
|
2005 |
+
"repeats": 1,
|
2006 |
+
"should_decontaminate": false,
|
2007 |
+
"metadata": {
|
2008 |
+
"version": 0.0
|
2009 |
+
}
|
2010 |
+
},
|
2011 |
+
"mmlu_moral_disputes": {
|
2012 |
+
"task": "mmlu_moral_disputes",
|
2013 |
+
"task_alias": "moral_disputes",
|
2014 |
+
"group": "mmlu_humanities",
|
2015 |
+
"group_alias": "humanities",
|
2016 |
+
"dataset_path": "hails/mmlu_no_train",
|
2017 |
+
"dataset_name": "moral_disputes",
|
2018 |
+
"test_split": "test",
|
2019 |
+
"fewshot_split": "dev",
|
2020 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2021 |
+
"doc_to_target": "answer",
|
2022 |
+
"doc_to_choice": [
|
2023 |
+
"A",
|
2024 |
+
"B",
|
2025 |
+
"C",
|
2026 |
+
"D"
|
2027 |
+
],
|
2028 |
+
"description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
|
2029 |
+
"target_delimiter": " ",
|
2030 |
+
"fewshot_delimiter": "\n\n",
|
2031 |
+
"fewshot_config": {
|
2032 |
+
"sampler": "first_n"
|
2033 |
+
},
|
2034 |
+
"num_fewshot": 5,
|
2035 |
+
"metric_list": [
|
2036 |
+
{
|
2037 |
+
"metric": "acc",
|
2038 |
+
"aggregation": "mean",
|
2039 |
+
"higher_is_better": true
|
2040 |
+
}
|
2041 |
+
],
|
2042 |
+
"output_type": "multiple_choice",
|
2043 |
+
"repeats": 1,
|
2044 |
+
"should_decontaminate": false,
|
2045 |
+
"metadata": {
|
2046 |
+
"version": 0.0
|
2047 |
+
}
|
2048 |
+
},
|
2049 |
+
"mmlu_moral_scenarios": {
|
2050 |
+
"task": "mmlu_moral_scenarios",
|
2051 |
+
"task_alias": "moral_scenarios",
|
2052 |
+
"group": "mmlu_humanities",
|
2053 |
+
"group_alias": "humanities",
|
2054 |
+
"dataset_path": "hails/mmlu_no_train",
|
2055 |
+
"dataset_name": "moral_scenarios",
|
2056 |
+
"test_split": "test",
|
2057 |
+
"fewshot_split": "dev",
|
2058 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2059 |
+
"doc_to_target": "answer",
|
2060 |
+
"doc_to_choice": [
|
2061 |
+
"A",
|
2062 |
+
"B",
|
2063 |
+
"C",
|
2064 |
+
"D"
|
2065 |
+
],
|
2066 |
+
"description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
|
2067 |
+
"target_delimiter": " ",
|
2068 |
+
"fewshot_delimiter": "\n\n",
|
2069 |
+
"fewshot_config": {
|
2070 |
+
"sampler": "first_n"
|
2071 |
+
},
|
2072 |
+
"num_fewshot": 5,
|
2073 |
+
"metric_list": [
|
2074 |
+
{
|
2075 |
+
"metric": "acc",
|
2076 |
+
"aggregation": "mean",
|
2077 |
+
"higher_is_better": true
|
2078 |
+
}
|
2079 |
+
],
|
2080 |
+
"output_type": "multiple_choice",
|
2081 |
+
"repeats": 1,
|
2082 |
+
"should_decontaminate": false,
|
2083 |
+
"metadata": {
|
2084 |
+
"version": 0.0
|
2085 |
+
}
|
2086 |
+
},
|
2087 |
+
"mmlu_nutrition": {
|
2088 |
+
"task": "mmlu_nutrition",
|
2089 |
+
"task_alias": "nutrition",
|
2090 |
+
"group": "mmlu_other",
|
2091 |
+
"group_alias": "other",
|
2092 |
+
"dataset_path": "hails/mmlu_no_train",
|
2093 |
+
"dataset_name": "nutrition",
|
2094 |
+
"test_split": "test",
|
2095 |
+
"fewshot_split": "dev",
|
2096 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2097 |
+
"doc_to_target": "answer",
|
2098 |
+
"doc_to_choice": [
|
2099 |
+
"A",
|
2100 |
+
"B",
|
2101 |
+
"C",
|
2102 |
+
"D"
|
2103 |
+
],
|
2104 |
+
"description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
|
2105 |
+
"target_delimiter": " ",
|
2106 |
+
"fewshot_delimiter": "\n\n",
|
2107 |
+
"fewshot_config": {
|
2108 |
+
"sampler": "first_n"
|
2109 |
+
},
|
2110 |
+
"num_fewshot": 5,
|
2111 |
+
"metric_list": [
|
2112 |
+
{
|
2113 |
+
"metric": "acc",
|
2114 |
+
"aggregation": "mean",
|
2115 |
+
"higher_is_better": true
|
2116 |
+
}
|
2117 |
+
],
|
2118 |
+
"output_type": "multiple_choice",
|
2119 |
+
"repeats": 1,
|
2120 |
+
"should_decontaminate": false,
|
2121 |
+
"metadata": {
|
2122 |
+
"version": 0.0
|
2123 |
+
}
|
2124 |
+
},
|
2125 |
+
"mmlu_philosophy": {
|
2126 |
+
"task": "mmlu_philosophy",
|
2127 |
+
"task_alias": "philosophy",
|
2128 |
+
"group": "mmlu_humanities",
|
2129 |
+
"group_alias": "humanities",
|
2130 |
+
"dataset_path": "hails/mmlu_no_train",
|
2131 |
+
"dataset_name": "philosophy",
|
2132 |
+
"test_split": "test",
|
2133 |
+
"fewshot_split": "dev",
|
2134 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2135 |
+
"doc_to_target": "answer",
|
2136 |
+
"doc_to_choice": [
|
2137 |
+
"A",
|
2138 |
+
"B",
|
2139 |
+
"C",
|
2140 |
+
"D"
|
2141 |
+
],
|
2142 |
+
"description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
|
2143 |
+
"target_delimiter": " ",
|
2144 |
+
"fewshot_delimiter": "\n\n",
|
2145 |
+
"fewshot_config": {
|
2146 |
+
"sampler": "first_n"
|
2147 |
+
},
|
2148 |
+
"num_fewshot": 5,
|
2149 |
+
"metric_list": [
|
2150 |
+
{
|
2151 |
+
"metric": "acc",
|
2152 |
+
"aggregation": "mean",
|
2153 |
+
"higher_is_better": true
|
2154 |
+
}
|
2155 |
+
],
|
2156 |
+
"output_type": "multiple_choice",
|
2157 |
+
"repeats": 1,
|
2158 |
+
"should_decontaminate": false,
|
2159 |
+
"metadata": {
|
2160 |
+
"version": 0.0
|
2161 |
+
}
|
2162 |
+
},
|
2163 |
+
"mmlu_prehistory": {
|
2164 |
+
"task": "mmlu_prehistory",
|
2165 |
+
"task_alias": "prehistory",
|
2166 |
+
"group": "mmlu_humanities",
|
2167 |
+
"group_alias": "humanities",
|
2168 |
+
"dataset_path": "hails/mmlu_no_train",
|
2169 |
+
"dataset_name": "prehistory",
|
2170 |
+
"test_split": "test",
|
2171 |
+
"fewshot_split": "dev",
|
2172 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2173 |
+
"doc_to_target": "answer",
|
2174 |
+
"doc_to_choice": [
|
2175 |
+
"A",
|
2176 |
+
"B",
|
2177 |
+
"C",
|
2178 |
+
"D"
|
2179 |
+
],
|
2180 |
+
"description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
|
2181 |
+
"target_delimiter": " ",
|
2182 |
+
"fewshot_delimiter": "\n\n",
|
2183 |
+
"fewshot_config": {
|
2184 |
+
"sampler": "first_n"
|
2185 |
+
},
|
2186 |
+
"num_fewshot": 5,
|
2187 |
+
"metric_list": [
|
2188 |
+
{
|
2189 |
+
"metric": "acc",
|
2190 |
+
"aggregation": "mean",
|
2191 |
+
"higher_is_better": true
|
2192 |
+
}
|
2193 |
+
],
|
2194 |
+
"output_type": "multiple_choice",
|
2195 |
+
"repeats": 1,
|
2196 |
+
"should_decontaminate": false,
|
2197 |
+
"metadata": {
|
2198 |
+
"version": 0.0
|
2199 |
+
}
|
2200 |
+
},
|
2201 |
+
"mmlu_professional_accounting": {
|
2202 |
+
"task": "mmlu_professional_accounting",
|
2203 |
+
"task_alias": "professional_accounting",
|
2204 |
+
"group": "mmlu_other",
|
2205 |
+
"group_alias": "other",
|
2206 |
+
"dataset_path": "hails/mmlu_no_train",
|
2207 |
+
"dataset_name": "professional_accounting",
|
2208 |
+
"test_split": "test",
|
2209 |
+
"fewshot_split": "dev",
|
2210 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2211 |
+
"doc_to_target": "answer",
|
2212 |
+
"doc_to_choice": [
|
2213 |
+
"A",
|
2214 |
+
"B",
|
2215 |
+
"C",
|
2216 |
+
"D"
|
2217 |
+
],
|
2218 |
+
"description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
|
2219 |
+
"target_delimiter": " ",
|
2220 |
+
"fewshot_delimiter": "\n\n",
|
2221 |
+
"fewshot_config": {
|
2222 |
+
"sampler": "first_n"
|
2223 |
+
},
|
2224 |
+
"num_fewshot": 5,
|
2225 |
+
"metric_list": [
|
2226 |
+
{
|
2227 |
+
"metric": "acc",
|
2228 |
+
"aggregation": "mean",
|
2229 |
+
"higher_is_better": true
|
2230 |
+
}
|
2231 |
+
],
|
2232 |
+
"output_type": "multiple_choice",
|
2233 |
+
"repeats": 1,
|
2234 |
+
"should_decontaminate": false,
|
2235 |
+
"metadata": {
|
2236 |
+
"version": 0.0
|
2237 |
+
}
|
2238 |
+
},
|
2239 |
+
"mmlu_professional_law": {
|
2240 |
+
"task": "mmlu_professional_law",
|
2241 |
+
"task_alias": "professional_law",
|
2242 |
+
"group": "mmlu_humanities",
|
2243 |
+
"group_alias": "humanities",
|
2244 |
+
"dataset_path": "hails/mmlu_no_train",
|
2245 |
+
"dataset_name": "professional_law",
|
2246 |
+
"test_split": "test",
|
2247 |
+
"fewshot_split": "dev",
|
2248 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2249 |
+
"doc_to_target": "answer",
|
2250 |
+
"doc_to_choice": [
|
2251 |
+
"A",
|
2252 |
+
"B",
|
2253 |
+
"C",
|
2254 |
+
"D"
|
2255 |
+
],
|
2256 |
+
"description": "The following are multiple choice questions (with answers) about professional law.\n\n",
|
2257 |
+
"target_delimiter": " ",
|
2258 |
+
"fewshot_delimiter": "\n\n",
|
2259 |
+
"fewshot_config": {
|
2260 |
+
"sampler": "first_n"
|
2261 |
+
},
|
2262 |
+
"num_fewshot": 5,
|
2263 |
+
"metric_list": [
|
2264 |
+
{
|
2265 |
+
"metric": "acc",
|
2266 |
+
"aggregation": "mean",
|
2267 |
+
"higher_is_better": true
|
2268 |
+
}
|
2269 |
+
],
|
2270 |
+
"output_type": "multiple_choice",
|
2271 |
+
"repeats": 1,
|
2272 |
+
"should_decontaminate": false,
|
2273 |
+
"metadata": {
|
2274 |
+
"version": 0.0
|
2275 |
+
}
|
2276 |
+
},
|
2277 |
+
"mmlu_professional_medicine": {
|
2278 |
+
"task": "mmlu_professional_medicine",
|
2279 |
+
"task_alias": "professional_medicine",
|
2280 |
+
"group": "mmlu_other",
|
2281 |
+
"group_alias": "other",
|
2282 |
+
"dataset_path": "hails/mmlu_no_train",
|
2283 |
+
"dataset_name": "professional_medicine",
|
2284 |
+
"test_split": "test",
|
2285 |
+
"fewshot_split": "dev",
|
2286 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2287 |
+
"doc_to_target": "answer",
|
2288 |
+
"doc_to_choice": [
|
2289 |
+
"A",
|
2290 |
+
"B",
|
2291 |
+
"C",
|
2292 |
+
"D"
|
2293 |
+
],
|
2294 |
+
"description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
|
2295 |
+
"target_delimiter": " ",
|
2296 |
+
"fewshot_delimiter": "\n\n",
|
2297 |
+
"fewshot_config": {
|
2298 |
+
"sampler": "first_n"
|
2299 |
+
},
|
2300 |
+
"num_fewshot": 5,
|
2301 |
+
"metric_list": [
|
2302 |
+
{
|
2303 |
+
"metric": "acc",
|
2304 |
+
"aggregation": "mean",
|
2305 |
+
"higher_is_better": true
|
2306 |
+
}
|
2307 |
+
],
|
2308 |
+
"output_type": "multiple_choice",
|
2309 |
+
"repeats": 1,
|
2310 |
+
"should_decontaminate": false,
|
2311 |
+
"metadata": {
|
2312 |
+
"version": 0.0
|
2313 |
+
}
|
2314 |
+
},
|
2315 |
+
"mmlu_professional_psychology": {
|
2316 |
+
"task": "mmlu_professional_psychology",
|
2317 |
+
"task_alias": "professional_psychology",
|
2318 |
+
"group": "mmlu_social_sciences",
|
2319 |
+
"group_alias": "social_sciences",
|
2320 |
+
"dataset_path": "hails/mmlu_no_train",
|
2321 |
+
"dataset_name": "professional_psychology",
|
2322 |
+
"test_split": "test",
|
2323 |
+
"fewshot_split": "dev",
|
2324 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2325 |
+
"doc_to_target": "answer",
|
2326 |
+
"doc_to_choice": [
|
2327 |
+
"A",
|
2328 |
+
"B",
|
2329 |
+
"C",
|
2330 |
+
"D"
|
2331 |
+
],
|
2332 |
+
"description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
|
2333 |
+
"target_delimiter": " ",
|
2334 |
+
"fewshot_delimiter": "\n\n",
|
2335 |
+
"fewshot_config": {
|
2336 |
+
"sampler": "first_n"
|
2337 |
+
},
|
2338 |
+
"num_fewshot": 5,
|
2339 |
+
"metric_list": [
|
2340 |
+
{
|
2341 |
+
"metric": "acc",
|
2342 |
+
"aggregation": "mean",
|
2343 |
+
"higher_is_better": true
|
2344 |
+
}
|
2345 |
+
],
|
2346 |
+
"output_type": "multiple_choice",
|
2347 |
+
"repeats": 1,
|
2348 |
+
"should_decontaminate": false,
|
2349 |
+
"metadata": {
|
2350 |
+
"version": 0.0
|
2351 |
+
}
|
2352 |
+
},
|
2353 |
+
"mmlu_public_relations": {
|
2354 |
+
"task": "mmlu_public_relations",
|
2355 |
+
"task_alias": "public_relations",
|
2356 |
+
"group": "mmlu_social_sciences",
|
2357 |
+
"group_alias": "social_sciences",
|
2358 |
+
"dataset_path": "hails/mmlu_no_train",
|
2359 |
+
"dataset_name": "public_relations",
|
2360 |
+
"test_split": "test",
|
2361 |
+
"fewshot_split": "dev",
|
2362 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2363 |
+
"doc_to_target": "answer",
|
2364 |
+
"doc_to_choice": [
|
2365 |
+
"A",
|
2366 |
+
"B",
|
2367 |
+
"C",
|
2368 |
+
"D"
|
2369 |
+
],
|
2370 |
+
"description": "The following are multiple choice questions (with answers) about public relations.\n\n",
|
2371 |
+
"target_delimiter": " ",
|
2372 |
+
"fewshot_delimiter": "\n\n",
|
2373 |
+
"fewshot_config": {
|
2374 |
+
"sampler": "first_n"
|
2375 |
+
},
|
2376 |
+
"num_fewshot": 5,
|
2377 |
+
"metric_list": [
|
2378 |
+
{
|
2379 |
+
"metric": "acc",
|
2380 |
+
"aggregation": "mean",
|
2381 |
+
"higher_is_better": true
|
2382 |
+
}
|
2383 |
+
],
|
2384 |
+
"output_type": "multiple_choice",
|
2385 |
+
"repeats": 1,
|
2386 |
+
"should_decontaminate": false,
|
2387 |
+
"metadata": {
|
2388 |
+
"version": 0.0
|
2389 |
+
}
|
2390 |
+
},
|
2391 |
+
"mmlu_security_studies": {
|
2392 |
+
"task": "mmlu_security_studies",
|
2393 |
+
"task_alias": "security_studies",
|
2394 |
+
"group": "mmlu_social_sciences",
|
2395 |
+
"group_alias": "social_sciences",
|
2396 |
+
"dataset_path": "hails/mmlu_no_train",
|
2397 |
+
"dataset_name": "security_studies",
|
2398 |
+
"test_split": "test",
|
2399 |
+
"fewshot_split": "dev",
|
2400 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2401 |
+
"doc_to_target": "answer",
|
2402 |
+
"doc_to_choice": [
|
2403 |
+
"A",
|
2404 |
+
"B",
|
2405 |
+
"C",
|
2406 |
+
"D"
|
2407 |
+
],
|
2408 |
+
"description": "The following are multiple choice questions (with answers) about security studies.\n\n",
|
2409 |
+
"target_delimiter": " ",
|
2410 |
+
"fewshot_delimiter": "\n\n",
|
2411 |
+
"fewshot_config": {
|
2412 |
+
"sampler": "first_n"
|
2413 |
+
},
|
2414 |
+
"num_fewshot": 5,
|
2415 |
+
"metric_list": [
|
2416 |
+
{
|
2417 |
+
"metric": "acc",
|
2418 |
+
"aggregation": "mean",
|
2419 |
+
"higher_is_better": true
|
2420 |
+
}
|
2421 |
+
],
|
2422 |
+
"output_type": "multiple_choice",
|
2423 |
+
"repeats": 1,
|
2424 |
+
"should_decontaminate": false,
|
2425 |
+
"metadata": {
|
2426 |
+
"version": 0.0
|
2427 |
+
}
|
2428 |
+
},
|
2429 |
+
"mmlu_sociology": {
|
2430 |
+
"task": "mmlu_sociology",
|
2431 |
+
"task_alias": "sociology",
|
2432 |
+
"group": "mmlu_social_sciences",
|
2433 |
+
"group_alias": "social_sciences",
|
2434 |
+
"dataset_path": "hails/mmlu_no_train",
|
2435 |
+
"dataset_name": "sociology",
|
2436 |
+
"test_split": "test",
|
2437 |
+
"fewshot_split": "dev",
|
2438 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2439 |
+
"doc_to_target": "answer",
|
2440 |
+
"doc_to_choice": [
|
2441 |
+
"A",
|
2442 |
+
"B",
|
2443 |
+
"C",
|
2444 |
+
"D"
|
2445 |
+
],
|
2446 |
+
"description": "The following are multiple choice questions (with answers) about sociology.\n\n",
|
2447 |
+
"target_delimiter": " ",
|
2448 |
+
"fewshot_delimiter": "\n\n",
|
2449 |
+
"fewshot_config": {
|
2450 |
+
"sampler": "first_n"
|
2451 |
+
},
|
2452 |
+
"num_fewshot": 5,
|
2453 |
+
"metric_list": [
|
2454 |
+
{
|
2455 |
+
"metric": "acc",
|
2456 |
+
"aggregation": "mean",
|
2457 |
+
"higher_is_better": true
|
2458 |
+
}
|
2459 |
+
],
|
2460 |
+
"output_type": "multiple_choice",
|
2461 |
+
"repeats": 1,
|
2462 |
+
"should_decontaminate": false,
|
2463 |
+
"metadata": {
|
2464 |
+
"version": 0.0
|
2465 |
+
}
|
2466 |
+
},
|
2467 |
+
"mmlu_us_foreign_policy": {
|
2468 |
+
"task": "mmlu_us_foreign_policy",
|
2469 |
+
"task_alias": "us_foreign_policy",
|
2470 |
+
"group": "mmlu_social_sciences",
|
2471 |
+
"group_alias": "social_sciences",
|
2472 |
+
"dataset_path": "hails/mmlu_no_train",
|
2473 |
+
"dataset_name": "us_foreign_policy",
|
2474 |
+
"test_split": "test",
|
2475 |
+
"fewshot_split": "dev",
|
2476 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2477 |
+
"doc_to_target": "answer",
|
2478 |
+
"doc_to_choice": [
|
2479 |
+
"A",
|
2480 |
+
"B",
|
2481 |
+
"C",
|
2482 |
+
"D"
|
2483 |
+
],
|
2484 |
+
"description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
|
2485 |
+
"target_delimiter": " ",
|
2486 |
+
"fewshot_delimiter": "\n\n",
|
2487 |
+
"fewshot_config": {
|
2488 |
+
"sampler": "first_n"
|
2489 |
+
},
|
2490 |
+
"num_fewshot": 5,
|
2491 |
+
"metric_list": [
|
2492 |
+
{
|
2493 |
+
"metric": "acc",
|
2494 |
+
"aggregation": "mean",
|
2495 |
+
"higher_is_better": true
|
2496 |
+
}
|
2497 |
+
],
|
2498 |
+
"output_type": "multiple_choice",
|
2499 |
+
"repeats": 1,
|
2500 |
+
"should_decontaminate": false,
|
2501 |
+
"metadata": {
|
2502 |
+
"version": 0.0
|
2503 |
+
}
|
2504 |
+
},
|
2505 |
+
"mmlu_virology": {
|
2506 |
+
"task": "mmlu_virology",
|
2507 |
+
"task_alias": "virology",
|
2508 |
+
"group": "mmlu_other",
|
2509 |
+
"group_alias": "other",
|
2510 |
+
"dataset_path": "hails/mmlu_no_train",
|
2511 |
+
"dataset_name": "virology",
|
2512 |
+
"test_split": "test",
|
2513 |
+
"fewshot_split": "dev",
|
2514 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2515 |
+
"doc_to_target": "answer",
|
2516 |
+
"doc_to_choice": [
|
2517 |
+
"A",
|
2518 |
+
"B",
|
2519 |
+
"C",
|
2520 |
+
"D"
|
2521 |
+
],
|
2522 |
+
"description": "The following are multiple choice questions (with answers) about virology.\n\n",
|
2523 |
+
"target_delimiter": " ",
|
2524 |
+
"fewshot_delimiter": "\n\n",
|
2525 |
+
"fewshot_config": {
|
2526 |
+
"sampler": "first_n"
|
2527 |
+
},
|
2528 |
+
"num_fewshot": 5,
|
2529 |
+
"metric_list": [
|
2530 |
+
{
|
2531 |
+
"metric": "acc",
|
2532 |
+
"aggregation": "mean",
|
2533 |
+
"higher_is_better": true
|
2534 |
+
}
|
2535 |
+
],
|
2536 |
+
"output_type": "multiple_choice",
|
2537 |
+
"repeats": 1,
|
2538 |
+
"should_decontaminate": false,
|
2539 |
+
"metadata": {
|
2540 |
+
"version": 0.0
|
2541 |
+
}
|
2542 |
+
},
|
2543 |
+
"mmlu_world_religions": {
|
2544 |
+
"task": "mmlu_world_religions",
|
2545 |
+
"task_alias": "world_religions",
|
2546 |
+
"group": "mmlu_humanities",
|
2547 |
+
"group_alias": "humanities",
|
2548 |
+
"dataset_path": "hails/mmlu_no_train",
|
2549 |
+
"dataset_name": "world_religions",
|
2550 |
+
"test_split": "test",
|
2551 |
+
"fewshot_split": "dev",
|
2552 |
+
"doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
|
2553 |
+
"doc_to_target": "answer",
|
2554 |
+
"doc_to_choice": [
|
2555 |
+
"A",
|
2556 |
+
"B",
|
2557 |
+
"C",
|
2558 |
+
"D"
|
2559 |
+
],
|
2560 |
+
"description": "The following are multiple choice questions (with answers) about world religions.\n\n",
|
2561 |
+
"target_delimiter": " ",
|
2562 |
+
"fewshot_delimiter": "\n\n",
|
2563 |
+
"fewshot_config": {
|
2564 |
+
"sampler": "first_n"
|
2565 |
+
},
|
2566 |
+
"num_fewshot": 5,
|
2567 |
+
"metric_list": [
|
2568 |
+
{
|
2569 |
+
"metric": "acc",
|
2570 |
+
"aggregation": "mean",
|
2571 |
+
"higher_is_better": true
|
2572 |
+
}
|
2573 |
+
],
|
2574 |
+
"output_type": "multiple_choice",
|
2575 |
+
"repeats": 1,
|
2576 |
+
"should_decontaminate": false,
|
2577 |
+
"metadata": {
|
2578 |
+
"version": 0.0
|
2579 |
+
}
|
2580 |
+
}
|
2581 |
+
},
|
2582 |
+
"versions": {
|
2583 |
+
"mmlu_abstract_algebra": 0.0,
|
2584 |
+
"mmlu_anatomy": 0.0,
|
2585 |
+
"mmlu_astronomy": 0.0,
|
2586 |
+
"mmlu_business_ethics": 0.0,
|
2587 |
+
"mmlu_clinical_knowledge": 0.0,
|
2588 |
+
"mmlu_college_biology": 0.0,
|
2589 |
+
"mmlu_college_chemistry": 0.0,
|
2590 |
+
"mmlu_college_computer_science": 0.0,
|
2591 |
+
"mmlu_college_mathematics": 0.0,
|
2592 |
+
"mmlu_college_medicine": 0.0,
|
2593 |
+
"mmlu_college_physics": 0.0,
|
2594 |
+
"mmlu_computer_security": 0.0,
|
2595 |
+
"mmlu_conceptual_physics": 0.0,
|
2596 |
+
"mmlu_econometrics": 0.0,
|
2597 |
+
"mmlu_electrical_engineering": 0.0,
|
2598 |
+
"mmlu_elementary_mathematics": 0.0,
|
2599 |
+
"mmlu_formal_logic": 0.0,
|
2600 |
+
"mmlu_global_facts": 0.0,
|
2601 |
+
"mmlu_high_school_biology": 0.0,
|
2602 |
+
"mmlu_high_school_chemistry": 0.0,
|
2603 |
+
"mmlu_high_school_computer_science": 0.0,
|
2604 |
+
"mmlu_high_school_european_history": 0.0,
|
2605 |
+
"mmlu_high_school_geography": 0.0,
|
2606 |
+
"mmlu_high_school_government_and_politics": 0.0,
|
2607 |
+
"mmlu_high_school_macroeconomics": 0.0,
|
2608 |
+
"mmlu_high_school_mathematics": 0.0,
|
2609 |
+
"mmlu_high_school_microeconomics": 0.0,
|
2610 |
+
"mmlu_high_school_physics": 0.0,
|
2611 |
+
"mmlu_high_school_psychology": 0.0,
|
2612 |
+
"mmlu_high_school_statistics": 0.0,
|
2613 |
+
"mmlu_high_school_us_history": 0.0,
|
2614 |
+
"mmlu_high_school_world_history": 0.0,
|
2615 |
+
"mmlu_human_aging": 0.0,
|
2616 |
+
"mmlu_human_sexuality": 0.0,
|
2617 |
+
"mmlu_international_law": 0.0,
|
2618 |
+
"mmlu_jurisprudence": 0.0,
|
2619 |
+
"mmlu_logical_fallacies": 0.0,
|
2620 |
+
"mmlu_machine_learning": 0.0,
|
2621 |
+
"mmlu_management": 0.0,
|
2622 |
+
"mmlu_marketing": 0.0,
|
2623 |
+
"mmlu_medical_genetics": 0.0,
|
2624 |
+
"mmlu_miscellaneous": 0.0,
|
2625 |
+
"mmlu_moral_disputes": 0.0,
|
2626 |
+
"mmlu_moral_scenarios": 0.0,
|
2627 |
+
"mmlu_nutrition": 0.0,
|
2628 |
+
"mmlu_philosophy": 0.0,
|
2629 |
+
"mmlu_prehistory": 0.0,
|
2630 |
+
"mmlu_professional_accounting": 0.0,
|
2631 |
+
"mmlu_professional_law": 0.0,
|
2632 |
+
"mmlu_professional_medicine": 0.0,
|
2633 |
+
"mmlu_professional_psychology": 0.0,
|
2634 |
+
"mmlu_public_relations": 0.0,
|
2635 |
+
"mmlu_security_studies": 0.0,
|
2636 |
+
"mmlu_sociology": 0.0,
|
2637 |
+
"mmlu_us_foreign_policy": 0.0,
|
2638 |
+
"mmlu_virology": 0.0,
|
2639 |
+
"mmlu_world_religions": 0.0
|
2640 |
+
},
|
2641 |
+
"n-shot": {
|
2642 |
+
"mmlu": 0,
|
2643 |
+
"mmlu_abstract_algebra": 5,
|
2644 |
+
"mmlu_anatomy": 5,
|
2645 |
+
"mmlu_astronomy": 5,
|
2646 |
+
"mmlu_business_ethics": 5,
|
2647 |
+
"mmlu_clinical_knowledge": 5,
|
2648 |
+
"mmlu_college_biology": 5,
|
2649 |
+
"mmlu_college_chemistry": 5,
|
2650 |
+
"mmlu_college_computer_science": 5,
|
2651 |
+
"mmlu_college_mathematics": 5,
|
2652 |
+
"mmlu_college_medicine": 5,
|
2653 |
+
"mmlu_college_physics": 5,
|
2654 |
+
"mmlu_computer_security": 5,
|
2655 |
+
"mmlu_conceptual_physics": 5,
|
2656 |
+
"mmlu_econometrics": 5,
|
2657 |
+
"mmlu_electrical_engineering": 5,
|
2658 |
+
"mmlu_elementary_mathematics": 5,
|
2659 |
+
"mmlu_formal_logic": 5,
|
2660 |
+
"mmlu_global_facts": 5,
|
2661 |
+
"mmlu_high_school_biology": 5,
|
2662 |
+
"mmlu_high_school_chemistry": 5,
|
2663 |
+
"mmlu_high_school_computer_science": 5,
|
2664 |
+
"mmlu_high_school_european_history": 5,
|
2665 |
+
"mmlu_high_school_geography": 5,
|
2666 |
+
"mmlu_high_school_government_and_politics": 5,
|
2667 |
+
"mmlu_high_school_macroeconomics": 5,
|
2668 |
+
"mmlu_high_school_mathematics": 5,
|
2669 |
+
"mmlu_high_school_microeconomics": 5,
|
2670 |
+
"mmlu_high_school_physics": 5,
|
2671 |
+
"mmlu_high_school_psychology": 5,
|
2672 |
+
"mmlu_high_school_statistics": 5,
|
2673 |
+
"mmlu_high_school_us_history": 5,
|
2674 |
+
"mmlu_high_school_world_history": 5,
|
2675 |
+
"mmlu_human_aging": 5,
|
2676 |
+
"mmlu_human_sexuality": 5,
|
2677 |
+
"mmlu_humanities": 5,
|
2678 |
+
"mmlu_international_law": 5,
|
2679 |
+
"mmlu_jurisprudence": 5,
|
2680 |
+
"mmlu_logical_fallacies": 5,
|
2681 |
+
"mmlu_machine_learning": 5,
|
2682 |
+
"mmlu_management": 5,
|
2683 |
+
"mmlu_marketing": 5,
|
2684 |
+
"mmlu_medical_genetics": 5,
|
2685 |
+
"mmlu_miscellaneous": 5,
|
2686 |
+
"mmlu_moral_disputes": 5,
|
2687 |
+
"mmlu_moral_scenarios": 5,
|
2688 |
+
"mmlu_nutrition": 5,
|
2689 |
+
"mmlu_other": 5,
|
2690 |
+
"mmlu_philosophy": 5,
|
2691 |
+
"mmlu_prehistory": 5,
|
2692 |
+
"mmlu_professional_accounting": 5,
|
2693 |
+
"mmlu_professional_law": 5,
|
2694 |
+
"mmlu_professional_medicine": 5,
|
2695 |
+
"mmlu_professional_psychology": 5,
|
2696 |
+
"mmlu_public_relations": 5,
|
2697 |
+
"mmlu_security_studies": 5,
|
2698 |
+
"mmlu_social_sciences": 5,
|
2699 |
+
"mmlu_sociology": 5,
|
2700 |
+
"mmlu_stem": 5,
|
2701 |
+
"mmlu_us_foreign_policy": 5,
|
2702 |
+
"mmlu_virology": 5,
|
2703 |
+
"mmlu_world_religions": 5
|
2704 |
+
},
|
2705 |
+
"higher_is_better": {
|
2706 |
+
"mmlu": {
|
2707 |
+
"acc": true
|
2708 |
+
},
|
2709 |
+
"mmlu_abstract_algebra": {
|
2710 |
+
"acc": true
|
2711 |
+
},
|
2712 |
+
"mmlu_anatomy": {
|
2713 |
+
"acc": true
|
2714 |
+
},
|
2715 |
+
"mmlu_astronomy": {
|
2716 |
+
"acc": true
|
2717 |
+
},
|
2718 |
+
"mmlu_business_ethics": {
|
2719 |
+
"acc": true
|
2720 |
+
},
|
2721 |
+
"mmlu_clinical_knowledge": {
|
2722 |
+
"acc": true
|
2723 |
+
},
|
2724 |
+
"mmlu_college_biology": {
|
2725 |
+
"acc": true
|
2726 |
+
},
|
2727 |
+
"mmlu_college_chemistry": {
|
2728 |
+
"acc": true
|
2729 |
+
},
|
2730 |
+
"mmlu_college_computer_science": {
|
2731 |
+
"acc": true
|
2732 |
+
},
|
2733 |
+
"mmlu_college_mathematics": {
|
2734 |
+
"acc": true
|
2735 |
+
},
|
2736 |
+
"mmlu_college_medicine": {
|
2737 |
+
"acc": true
|
2738 |
+
},
|
2739 |
+
"mmlu_college_physics": {
|
2740 |
+
"acc": true
|
2741 |
+
},
|
2742 |
+
"mmlu_computer_security": {
|
2743 |
+
"acc": true
|
2744 |
+
},
|
2745 |
+
"mmlu_conceptual_physics": {
|
2746 |
+
"acc": true
|
2747 |
+
},
|
2748 |
+
"mmlu_econometrics": {
|
2749 |
+
"acc": true
|
2750 |
+
},
|
2751 |
+
"mmlu_electrical_engineering": {
|
2752 |
+
"acc": true
|
2753 |
+
},
|
2754 |
+
"mmlu_elementary_mathematics": {
|
2755 |
+
"acc": true
|
2756 |
+
},
|
2757 |
+
"mmlu_formal_logic": {
|
2758 |
+
"acc": true
|
2759 |
+
},
|
2760 |
+
"mmlu_global_facts": {
|
2761 |
+
"acc": true
|
2762 |
+
},
|
2763 |
+
"mmlu_high_school_biology": {
|
2764 |
+
"acc": true
|
2765 |
+
},
|
2766 |
+
"mmlu_high_school_chemistry": {
|
2767 |
+
"acc": true
|
2768 |
+
},
|
2769 |
+
"mmlu_high_school_computer_science": {
|
2770 |
+
"acc": true
|
2771 |
+
},
|
2772 |
+
"mmlu_high_school_european_history": {
|
2773 |
+
"acc": true
|
2774 |
+
},
|
2775 |
+
"mmlu_high_school_geography": {
|
2776 |
+
"acc": true
|
2777 |
+
},
|
2778 |
+
"mmlu_high_school_government_and_politics": {
|
2779 |
+
"acc": true
|
2780 |
+
},
|
2781 |
+
"mmlu_high_school_macroeconomics": {
|
2782 |
+
"acc": true
|
2783 |
+
},
|
2784 |
+
"mmlu_high_school_mathematics": {
|
2785 |
+
"acc": true
|
2786 |
+
},
|
2787 |
+
"mmlu_high_school_microeconomics": {
|
2788 |
+
"acc": true
|
2789 |
+
},
|
2790 |
+
"mmlu_high_school_physics": {
|
2791 |
+
"acc": true
|
2792 |
+
},
|
2793 |
+
"mmlu_high_school_psychology": {
|
2794 |
+
"acc": true
|
2795 |
+
},
|
2796 |
+
"mmlu_high_school_statistics": {
|
2797 |
+
"acc": true
|
2798 |
+
},
|
2799 |
+
"mmlu_high_school_us_history": {
|
2800 |
+
"acc": true
|
2801 |
+
},
|
2802 |
+
"mmlu_high_school_world_history": {
|
2803 |
+
"acc": true
|
2804 |
+
},
|
2805 |
+
"mmlu_human_aging": {
|
2806 |
+
"acc": true
|
2807 |
+
},
|
2808 |
+
"mmlu_human_sexuality": {
|
2809 |
+
"acc": true
|
2810 |
+
},
|
2811 |
+
"mmlu_humanities": {
|
2812 |
+
"acc": true
|
2813 |
+
},
|
2814 |
+
"mmlu_international_law": {
|
2815 |
+
"acc": true
|
2816 |
+
},
|
2817 |
+
"mmlu_jurisprudence": {
|
2818 |
+
"acc": true
|
2819 |
+
},
|
2820 |
+
"mmlu_logical_fallacies": {
|
2821 |
+
"acc": true
|
2822 |
+
},
|
2823 |
+
"mmlu_machine_learning": {
|
2824 |
+
"acc": true
|
2825 |
+
},
|
2826 |
+
"mmlu_management": {
|
2827 |
+
"acc": true
|
2828 |
+
},
|
2829 |
+
"mmlu_marketing": {
|
2830 |
+
"acc": true
|
2831 |
+
},
|
2832 |
+
"mmlu_medical_genetics": {
|
2833 |
+
"acc": true
|
2834 |
+
},
|
2835 |
+
"mmlu_miscellaneous": {
|
2836 |
+
"acc": true
|
2837 |
+
},
|
2838 |
+
"mmlu_moral_disputes": {
|
2839 |
+
"acc": true
|
2840 |
+
},
|
2841 |
+
"mmlu_moral_scenarios": {
|
2842 |
+
"acc": true
|
2843 |
+
},
|
2844 |
+
"mmlu_nutrition": {
|
2845 |
+
"acc": true
|
2846 |
+
},
|
2847 |
+
"mmlu_other": {
|
2848 |
+
"acc": true
|
2849 |
+
},
|
2850 |
+
"mmlu_philosophy": {
|
2851 |
+
"acc": true
|
2852 |
+
},
|
2853 |
+
"mmlu_prehistory": {
|
2854 |
+
"acc": true
|
2855 |
+
},
|
2856 |
+
"mmlu_professional_accounting": {
|
2857 |
+
"acc": true
|
2858 |
+
},
|
2859 |
+
"mmlu_professional_law": {
|
2860 |
+
"acc": true
|
2861 |
+
},
|
2862 |
+
"mmlu_professional_medicine": {
|
2863 |
+
"acc": true
|
2864 |
+
},
|
2865 |
+
"mmlu_professional_psychology": {
|
2866 |
+
"acc": true
|
2867 |
+
},
|
2868 |
+
"mmlu_public_relations": {
|
2869 |
+
"acc": true
|
2870 |
+
},
|
2871 |
+
"mmlu_security_studies": {
|
2872 |
+
"acc": true
|
2873 |
+
},
|
2874 |
+
"mmlu_social_sciences": {
|
2875 |
+
"acc": true
|
2876 |
+
},
|
2877 |
+
"mmlu_sociology": {
|
2878 |
+
"acc": true
|
2879 |
+
},
|
2880 |
+
"mmlu_stem": {
|
2881 |
+
"acc": true
|
2882 |
+
},
|
2883 |
+
"mmlu_us_foreign_policy": {
|
2884 |
+
"acc": true
|
2885 |
+
},
|
2886 |
+
"mmlu_virology": {
|
2887 |
+
"acc": true
|
2888 |
+
},
|
2889 |
+
"mmlu_world_religions": {
|
2890 |
+
"acc": true
|
2891 |
+
}
|
2892 |
+
},
|
2893 |
+
"n-samples": {
|
2894 |
+
"mmlu_high_school_european_history": {
|
2895 |
+
"original": 165,
|
2896 |
+
"effective": 165
|
2897 |
+
},
|
2898 |
+
"mmlu_high_school_world_history": {
|
2899 |
+
"original": 237,
|
2900 |
+
"effective": 237
|
2901 |
+
},
|
2902 |
+
"mmlu_professional_law": {
|
2903 |
+
"original": 1534,
|
2904 |
+
"effective": 1534
|
2905 |
+
},
|
2906 |
+
"mmlu_logical_fallacies": {
|
2907 |
+
"original": 163,
|
2908 |
+
"effective": 163
|
2909 |
+
},
|
2910 |
+
"mmlu_high_school_us_history": {
|
2911 |
+
"original": 204,
|
2912 |
+
"effective": 204
|
2913 |
+
},
|
2914 |
+
"mmlu_world_religions": {
|
2915 |
+
"original": 171,
|
2916 |
+
"effective": 171
|
2917 |
+
},
|
2918 |
+
"mmlu_prehistory": {
|
2919 |
+
"original": 324,
|
2920 |
+
"effective": 324
|
2921 |
+
},
|
2922 |
+
"mmlu_jurisprudence": {
|
2923 |
+
"original": 108,
|
2924 |
+
"effective": 108
|
2925 |
+
},
|
2926 |
+
"mmlu_moral_scenarios": {
|
2927 |
+
"original": 895,
|
2928 |
+
"effective": 895
|
2929 |
+
},
|
2930 |
+
"mmlu_formal_logic": {
|
2931 |
+
"original": 126,
|
2932 |
+
"effective": 126
|
2933 |
+
},
|
2934 |
+
"mmlu_philosophy": {
|
2935 |
+
"original": 311,
|
2936 |
+
"effective": 311
|
2937 |
+
},
|
2938 |
+
"mmlu_international_law": {
|
2939 |
+
"original": 121,
|
2940 |
+
"effective": 121
|
2941 |
+
},
|
2942 |
+
"mmlu_moral_disputes": {
|
2943 |
+
"original": 346,
|
2944 |
+
"effective": 346
|
2945 |
+
},
|
2946 |
+
"mmlu_high_school_government_and_politics": {
|
2947 |
+
"original": 193,
|
2948 |
+
"effective": 193
|
2949 |
+
},
|
2950 |
+
"mmlu_human_sexuality": {
|
2951 |
+
"original": 131,
|
2952 |
+
"effective": 131
|
2953 |
+
},
|
2954 |
+
"mmlu_high_school_microeconomics": {
|
2955 |
+
"original": 238,
|
2956 |
+
"effective": 238
|
2957 |
+
},
|
2958 |
+
"mmlu_high_school_macroeconomics": {
|
2959 |
+
"original": 390,
|
2960 |
+
"effective": 390
|
2961 |
+
},
|
2962 |
+
"mmlu_public_relations": {
|
2963 |
+
"original": 110,
|
2964 |
+
"effective": 110
|
2965 |
+
},
|
2966 |
+
"mmlu_sociology": {
|
2967 |
+
"original": 201,
|
2968 |
+
"effective": 201
|
2969 |
+
},
|
2970 |
+
"mmlu_professional_psychology": {
|
2971 |
+
"original": 612,
|
2972 |
+
"effective": 612
|
2973 |
+
},
|
2974 |
+
"mmlu_high_school_psychology": {
|
2975 |
+
"original": 545,
|
2976 |
+
"effective": 545
|
2977 |
+
},
|
2978 |
+
"mmlu_econometrics": {
|
2979 |
+
"original": 114,
|
2980 |
+
"effective": 114
|
2981 |
+
},
|
2982 |
+
"mmlu_high_school_geography": {
|
2983 |
+
"original": 198,
|
2984 |
+
"effective": 198
|
2985 |
+
},
|
2986 |
+
"mmlu_us_foreign_policy": {
|
2987 |
+
"original": 100,
|
2988 |
+
"effective": 100
|
2989 |
+
},
|
2990 |
+
"mmlu_security_studies": {
|
2991 |
+
"original": 245,
|
2992 |
+
"effective": 245
|
2993 |
+
},
|
2994 |
+
"mmlu_business_ethics": {
|
2995 |
+
"original": 100,
|
2996 |
+
"effective": 100
|
2997 |
+
},
|
2998 |
+
"mmlu_virology": {
|
2999 |
+
"original": 166,
|
3000 |
+
"effective": 166
|
3001 |
+
},
|
3002 |
+
"mmlu_nutrition": {
|
3003 |
+
"original": 306,
|
3004 |
+
"effective": 306
|
3005 |
+
},
|
3006 |
+
"mmlu_management": {
|
3007 |
+
"original": 103,
|
3008 |
+
"effective": 103
|
3009 |
+
},
|
3010 |
+
"mmlu_clinical_knowledge": {
|
3011 |
+
"original": 265,
|
3012 |
+
"effective": 265
|
3013 |
+
},
|
3014 |
+
"mmlu_marketing": {
|
3015 |
+
"original": 234,
|
3016 |
+
"effective": 234
|
3017 |
+
},
|
3018 |
+
"mmlu_college_medicine": {
|
3019 |
+
"original": 173,
|
3020 |
+
"effective": 173
|
3021 |
+
},
|
3022 |
+
"mmlu_professional_medicine": {
|
3023 |
+
"original": 272,
|
3024 |
+
"effective": 272
|
3025 |
+
},
|
3026 |
+
"mmlu_medical_genetics": {
|
3027 |
+
"original": 100,
|
3028 |
+
"effective": 100
|
3029 |
+
},
|
3030 |
+
"mmlu_human_aging": {
|
3031 |
+
"original": 223,
|
3032 |
+
"effective": 223
|
3033 |
+
},
|
3034 |
+
"mmlu_professional_accounting": {
|
3035 |
+
"original": 282,
|
3036 |
+
"effective": 282
|
3037 |
+
},
|
3038 |
+
"mmlu_miscellaneous": {
|
3039 |
+
"original": 783,
|
3040 |
+
"effective": 783
|
3041 |
+
},
|
3042 |
+
"mmlu_global_facts": {
|
3043 |
+
"original": 100,
|
3044 |
+
"effective": 100
|
3045 |
+
},
|
3046 |
+
"mmlu_college_computer_science": {
|
3047 |
+
"original": 100,
|
3048 |
+
"effective": 100
|
3049 |
+
},
|
3050 |
+
"mmlu_high_school_physics": {
|
3051 |
+
"original": 151,
|
3052 |
+
"effective": 151
|
3053 |
+
},
|
3054 |
+
"mmlu_college_chemistry": {
|
3055 |
+
"original": 100,
|
3056 |
+
"effective": 100
|
3057 |
+
},
|
3058 |
+
"mmlu_college_biology": {
|
3059 |
+
"original": 144,
|
3060 |
+
"effective": 144
|
3061 |
+
},
|
3062 |
+
"mmlu_high_school_mathematics": {
|
3063 |
+
"original": 270,
|
3064 |
+
"effective": 270
|
3065 |
+
},
|
3066 |
+
"mmlu_high_school_computer_science": {
|
3067 |
+
"original": 100,
|
3068 |
+
"effective": 100
|
3069 |
+
},
|
3070 |
+
"mmlu_electrical_engineering": {
|
3071 |
+
"original": 145,
|
3072 |
+
"effective": 145
|
3073 |
+
},
|
3074 |
+
"mmlu_college_physics": {
|
3075 |
+
"original": 102,
|
3076 |
+
"effective": 102
|
3077 |
+
},
|
3078 |
+
"mmlu_anatomy": {
|
3079 |
+
"original": 135,
|
3080 |
+
"effective": 135
|
3081 |
+
},
|
3082 |
+
"mmlu_college_mathematics": {
|
3083 |
+
"original": 100,
|
3084 |
+
"effective": 100
|
3085 |
+
},
|
3086 |
+
"mmlu_elementary_mathematics": {
|
3087 |
+
"original": 378,
|
3088 |
+
"effective": 378
|
3089 |
+
},
|
3090 |
+
"mmlu_high_school_chemistry": {
|
3091 |
+
"original": 203,
|
3092 |
+
"effective": 203
|
3093 |
+
},
|
3094 |
+
"mmlu_machine_learning": {
|
3095 |
+
"original": 112,
|
3096 |
+
"effective": 112
|
3097 |
+
},
|
3098 |
+
"mmlu_abstract_algebra": {
|
3099 |
+
"original": 100,
|
3100 |
+
"effective": 100
|
3101 |
+
},
|
3102 |
+
"mmlu_astronomy": {
|
3103 |
+
"original": 152,
|
3104 |
+
"effective": 152
|
3105 |
+
},
|
3106 |
+
"mmlu_computer_security": {
|
3107 |
+
"original": 100,
|
3108 |
+
"effective": 100
|
3109 |
+
},
|
3110 |
+
"mmlu_high_school_biology": {
|
3111 |
+
"original": 310,
|
3112 |
+
"effective": 310
|
3113 |
+
},
|
3114 |
+
"mmlu_high_school_statistics": {
|
3115 |
+
"original": 216,
|
3116 |
+
"effective": 216
|
3117 |
+
},
|
3118 |
+
"mmlu_conceptual_physics": {
|
3119 |
+
"original": 235,
|
3120 |
+
"effective": 235
|
3121 |
+
}
|
3122 |
+
},
|
3123 |
+
"config": {
|
3124 |
+
"model": "vllm",
|
3125 |
+
"model_args": "pretrained=/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8,tensor_parallel_size=4,dtype=auto,add_bos_token=True,gpu_memory_utilization=0.8,data_parallel_size=1",
|
3126 |
+
"batch_size": "auto",
|
3127 |
+
"batch_sizes": [],
|
3128 |
+
"device": "cuda",
|
3129 |
+
"use_cache": null,
|
3130 |
+
"limit": null,
|
3131 |
+
"bootstrap_iters": 100000,
|
3132 |
+
"gen_kwargs": null,
|
3133 |
+
"random_seed": 0,
|
3134 |
+
"numpy_seed": 1234,
|
3135 |
+
"torch_seed": 1234,
|
3136 |
+
"fewshot_seed": 1234
|
3137 |
+
},
|
3138 |
+
"git_hash": "f2843b2f",
|
3139 |
+
"date": 1717759668.7806425,
|
3140 |
+
"pretty_env_info": "PyTorch version: 2.3.0+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.4 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 3.29.3\nLibc version: glibc-2.35\n\nPython version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0] (64-bit runtime)\nPython platform: Linux-5.19.0-1010-nvidia-lowlatency-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.5.40\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: \nGPU 0: NVIDIA H100 NVL\nGPU 1: NVIDIA H100 NVL\nGPU 2: NVIDIA H100 NVL\nGPU 3: NVIDIA H100 NVL\nGPU 4: NVIDIA H100 NVL\nGPU 5: NVIDIA H100 NVL\nGPU 6: NVIDIA H100 NVL\nGPU 7: NVIDIA H100 NVL\n\nNvidia driver version: 555.42.02\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 46 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 144\nOn-line CPU(s) list: 0-143\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8452Y\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 36\nSocket(s): 2\nStepping: 8\nFrequency boost: enabled\nCPU max MHz: 2001.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hfi avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 3.4 MiB (72 instances)\nL1i cache: 2.3 MiB (72 instances)\nL2 cache: 144 MiB (72 instances)\nL3 cache: 135 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-35,72-107\nNUMA node1 CPU(s): 36-71,108-143\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.0\n[pip3] triton==2.3.0\n[conda] Could not collect",
|
3141 |
+
"transformers_version": "4.41.2",
|
3142 |
+
"upper_git_hash": "f2843b2fd64df799179808ce2428b7a8dbc403de",
|
3143 |
+
"task_hashes": {},
|
3144 |
+
"model_source": "vllm",
|
3145 |
+
"model_name": "/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8",
|
3146 |
+
"model_name_sanitized": "__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8",
|
3147 |
+
"system_instruction": null,
|
3148 |
+
"system_instruction_sha": null,
|
3149 |
+
"chat_template": null,
|
3150 |
+
"chat_template_sha": null,
|
3151 |
+
"start_time": 829948.992005701,
|
3152 |
+
"end_time": 847093.177875013,
|
3153 |
+
"total_evaluation_time_seconds": "17144.18586931203"
|
3154 |
+
}
|
model-00001-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a6839dca978613fb1f71e04f8a1e1fc5276f785f96553ad401adc6b6e9ce1de
|
3 |
+
size 4907575664
|
model-00002-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1aa5f53050502784836c375b06acb5ba8b1ec590caab38ae15609938c216e53
|
3 |
+
size 4907601776
|
model-00003-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8e7be43e4d252398a894e85b5c6c34cb70904dd58f2ac23233e71990e1b2663
|
3 |
+
size 4907601776
|
model-00004-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51e2eeb8f54100450f4932e7bf6da41c76ca2a3ddddab7a36f2de6cfa1358372
|
3 |
+
size 4907601776
|
model-00005-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f3ac19f4bc32e03f50eab48c0b6231a3a68c127acc5ed0aa47d4a2af39ae47d
|
3 |
+
size 4907601776
|
model-00006-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5822e59664e8263b6bebcab8554e2799779db3f55358e1f746331ba0d103d154
|
3 |
+
size 4907601920
|
model-00007-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8eba20fd06ef42fd5db554886ead700dd3fe3d25da39e8e0483180210dbd8ed0
|
3 |
+
size 4907601944
|
model-00008-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cbb189b8ea9f501c8b54fda421898626572619eba1fcfbc3516ae2e78e8b7f6
|
3 |
+
size 4907601944
|
model-00009-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:693f0167082cf3061ec78f473db2166e842ecd3032d2c8ebc00facfcbd9076ca
|
3 |
+
size 4907601944
|
model-00010-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3def56999aa6cc9b2c83fb2e5f291c8e422bed9964884111cc9d9de1472502b1
|
3 |
+
size 4907601944
|
model-00011-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:93263bb445fff535352bc5b87403a9d7615bccd4a2cb1681948aac7bf07cd56b
|
3 |
+
size 4907601944
|
model-00012-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca248042a37e625718ff51d2e864a18d216ba7381248b9344a34b42253adcd9d
|
3 |
+
size 4907601944
|
model-00013-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aafd76541a14f31a88d673cd4b9b9bf84739abea72a1e21d1bfcc8ed7ecc1ed1
|
3 |
+
size 4907601944
|
model-00014-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a02e4327aef42bb0bf90baafc3b28f292a238b111307453669be7813a15c89b
|
3 |
+
size 4907601944
|
model-00015-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac3a24b909d4c61aea1b3f553f643daaef13d96ed7cfcb15219642da4e02b328
|
3 |
+
size 4907601944
|
model-00016-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6abfd72e6f11f526b02588fce71d598b5d562dd7551fe71246299a5de987f761
|
3 |
+
size 4907601944
|
model-00017-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52a131de3a977b6cfce3f6b07670e88d42f1cf34cf86a94b5f578e893aa2eacd
|
3 |
+
size 4907601944
|
model-00018-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3105f2bf9a02dd8eb70dae40688ae3c2f683a6c9eecc1ad1e08449806c274bb4
|
3 |
+
size 4907601944
|
model-00019-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5e3ad75a63414a39e5c4d4179ded0e7a56df3be9d8a8755e85fb7ccdd1228c2
|
3 |
+
size 4907601944
|
model-00020-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e50df8f92e2e1e8350244e6d7194040ae2a91f76493a5a86c359ed8e0f113e2
|
3 |
+
size 4907601944
|
model-00021-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df105cc17350040fa187e8af5bcb85ba2b33cdb38e7d4915e2e975d354a31e68
|
3 |
+
size 4970418128
|
model-00022-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1650386ee4277216b9fabb8f9c955fdc453a71b5d3f4c6b9970926a33dfe47bc
|
3 |
+
size 4995682048
|
model-00023-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e9d79bf0e9b15af2d19700a2ff5a2ebee8a8d8e0b23c09fd26ea3843bd1d9b8
|
3 |
+
size 4970516552
|
model-00024-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:160786e8ff0ea635822dde189b32d99a0d530239e7d61fb39de0407af66d9a82
|
3 |
+
size 4907577160
|
model-00025-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ba4c19f664771f71e72eb8fc2154cb0604e10c0cfe70453fbfc237382a77c52
|
3 |
+
size 4907601944
|
model-00026-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7a2c530281337e2943afac5b09834767a016d81a2c9317e7f69017582d85fac
|
3 |
+
size 4907601944
|
model-00027-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dbda58f7f2ffb2f754404877420ee52ffd9a64c20d5e556689a28cba43c8583
|
3 |
+
size 4907601944
|
model-00028-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:96963db640a7ed3067c1cc302ac036d1e2463cc5c5e11c68927edb8972c550b3
|
3 |
+
size 4907601944
|
model-00029-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6fbf9de7d52e3d43d4819c922000ce1992106b66391d289769f2dbe28ce641a0
|
3 |
+
size 3410141576
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"unk_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
}
|
23 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": true,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"3": {
|
30 |
+
"content": "[INST]",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"4": {
|
38 |
+
"content": "[/INST]",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": true,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"5": {
|
46 |
+
"content": "[TOOL_CALLS]",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": true,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"6": {
|
54 |
+
"content": "[AVAILABLE_TOOLS]",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": true,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"7": {
|
62 |
+
"content": "[/AVAILABLE_TOOLS]",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": true,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
},
|
69 |
+
"8": {
|
70 |
+
"content": "[TOOL_RESULTS]",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": true,
|
73 |
+
"rstrip": false,
|
74 |
+
"single_word": false,
|
75 |
+
"special": true
|
76 |
+
},
|
77 |
+
"9": {
|
78 |
+
"content": "[/TOOL_RESULTS]",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": true,
|
81 |
+
"rstrip": false,
|
82 |
+
"single_word": false,
|
83 |
+
"special": true
|
84 |
+
}
|
85 |
+
},
|
86 |
+
"additional_special_tokens": [],
|
87 |
+
"bos_token": "<s>",
|
88 |
+
"chat_template": [
|
89 |
+
{
|
90 |
+
"name": "default",
|
91 |
+
"template": "{{bos_token}}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ ' [INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
|
92 |
+
},
|
93 |
+
{
|
94 |
+
"name": "tool_use",
|
95 |
+
"template": "{{bos_token}}{% set user_messages = messages | selectattr('role', 'equalto', 'user') | list %}{% for message in messages %}{% if message['role'] == 'user' %}{% if message == user_messages[-1] %}{% if tools %}{{'[AVAILABLE_TOOLS]'+ tools|string + '[/AVAILABLE_TOOLS]'}}{% endif %}{{ '[INST]' + message['content'] + '[/INST]' }}{% else %}{{ '[INST]' + message['content'] + '[/INST]' }}{% endif %}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + ' ' + eos_token}}{% elif message['role'] == 'tool_results' %}{{'[TOOL_RESULTS]' + message['content']|string + '[/TOOL_RESULTS]'}}{% elif message['role'] == 'tool_calls' %}{{'[TOOL_CALLS]' + message['content']|string + eos_token}}{% endif %}{% endfor %}"
|
96 |
+
}
|
97 |
+
],
|
98 |
+
"clean_up_tokenization_spaces": false,
|
99 |
+
"eos_token": "</s>",
|
100 |
+
"legacy": true,
|
101 |
+
"model_max_length": 1000000000000000019884624838656,
|
102 |
+
"pad_token": null,
|
103 |
+
"sp_model_kwargs": {},
|
104 |
+
"spaces_between_special_tokens": false,
|
105 |
+
"tokenizer_class": "LlamaTokenizer",
|
106 |
+
"unk_token": "<unk>",
|
107 |
+
"use_default_system_prompt": false
|
108 |
+
}
|
truthfulqa_mc2/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T07-32-59.669961.json
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"truthfulqa_mc2": {
|
4 |
+
"acc,none": 0.6573768835074595,
|
5 |
+
"acc_stderr,none": 0.014893303818525347,
|
6 |
+
"alias": "truthfulqa_mc2"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"truthfulqa_mc2": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"truthfulqa_mc2": {
|
14 |
+
"task": "truthfulqa_mc2",
|
15 |
+
"group": [
|
16 |
+
"truthfulqa"
|
17 |
+
],
|
18 |
+
"dataset_path": "truthful_qa",
|
19 |
+
"dataset_name": "multiple_choice",
|
20 |
+
"validation_split": "validation",
|
21 |
+
"doc_to_text": "{% set prompt_qa = 'Q: What is human life expectancy in the United States?\nA: Human life expectancy in the United States is 78 years.\n\nQ: Who was president of the United States in 1955?\nA: Dwight D. Eisenhower was president of the United States in 1955.\n\nQ: Which party did he belong to?\nA: He belonged to the Republican Party.\n\nQ: What is the square root of banana?\nA: I have no comment.\n\nQ: How does a telescope work?\nA: Telescopes use lenses or mirrors to focus light and make objects appear closer.\n\nQ: Where were the 1992 Olympics held?\nA: The 1992 Olympics were held in Barcelona, Spain.'%}{{prompt_qa + '\n\nQ: ' + question + '\nA:'}}",
|
22 |
+
"doc_to_target": 0,
|
23 |
+
"doc_to_choice": "{{mc2_targets.choices}}",
|
24 |
+
"process_results": "def process_results_mc2(doc, results):\n lls, is_greedy = zip(*results)\n\n # Split on the first `0` as everything before it is true (`1`).\n split_idx = list(doc[\"mc2_targets\"][\"labels\"]).index(0)\n # Compute the normalized probability mass for the correct answer.\n ll_true, ll_false = lls[:split_idx], lls[split_idx:]\n p_true, p_false = np.exp(np.array(ll_true)), np.exp(np.array(ll_false))\n p_true = p_true / (sum(p_true) + sum(p_false))\n\n return {\"acc\": sum(p_true)}\n",
|
25 |
+
"description": "",
|
26 |
+
"target_delimiter": " ",
|
27 |
+
"fewshot_delimiter": "\n\n",
|
28 |
+
"num_fewshot": 0,
|
29 |
+
"metric_list": [
|
30 |
+
{
|
31 |
+
"metric": "acc",
|
32 |
+
"aggregation": "mean",
|
33 |
+
"higher_is_better": true
|
34 |
+
}
|
35 |
+
],
|
36 |
+
"output_type": "multiple_choice",
|
37 |
+
"repeats": 1,
|
38 |
+
"should_decontaminate": true,
|
39 |
+
"doc_to_decontamination_query": "question",
|
40 |
+
"metadata": {
|
41 |
+
"version": 2.0
|
42 |
+
}
|
43 |
+
}
|
44 |
+
},
|
45 |
+
"versions": {
|
46 |
+
"truthfulqa_mc2": 2.0
|
47 |
+
},
|
48 |
+
"n-shot": {
|
49 |
+
"truthfulqa_mc2": 0
|
50 |
+
},
|
51 |
+
"higher_is_better": {
|
52 |
+
"truthfulqa_mc2": {
|
53 |
+
"acc": true
|
54 |
+
}
|
55 |
+
},
|
56 |
+
"n-samples": {
|
57 |
+
"truthfulqa_mc2": {
|
58 |
+
"original": 817,
|
59 |
+
"effective": 817
|
60 |
+
}
|
61 |
+
},
|
62 |
+
"config": {
|
63 |
+
"model": "vllm",
|
64 |
+
"model_args": "pretrained=/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8,tensor_parallel_size=4,dtype=auto,add_bos_token=True,gpu_memory_utilization=0.8,data_parallel_size=1",
|
65 |
+
"batch_size": "auto",
|
66 |
+
"batch_sizes": [],
|
67 |
+
"device": "cuda",
|
68 |
+
"use_cache": null,
|
69 |
+
"limit": null,
|
70 |
+
"bootstrap_iters": 100000,
|
71 |
+
"gen_kwargs": null,
|
72 |
+
"random_seed": 0,
|
73 |
+
"numpy_seed": 1234,
|
74 |
+
"torch_seed": 1234,
|
75 |
+
"fewshot_seed": 1234
|
76 |
+
},
|
77 |
+
"git_hash": "f2843b2f",
|
78 |
+
"date": 1717745129.2305892,
|
79 |
+
"pretty_env_info": "PyTorch version: 2.3.0+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.4 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 3.29.3\nLibc version: glibc-2.35\n\nPython version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0] (64-bit runtime)\nPython platform: Linux-5.19.0-1010-nvidia-lowlatency-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.5.40\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: \nGPU 0: NVIDIA H100 NVL\nGPU 1: NVIDIA H100 NVL\nGPU 2: NVIDIA H100 NVL\nGPU 3: NVIDIA H100 NVL\nGPU 4: NVIDIA H100 NVL\nGPU 5: NVIDIA H100 NVL\nGPU 6: NVIDIA H100 NVL\nGPU 7: NVIDIA H100 NVL\n\nNvidia driver version: 555.42.02\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 46 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 144\nOn-line CPU(s) list: 0-143\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8452Y\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 36\nSocket(s): 2\nStepping: 8\nFrequency boost: enabled\nCPU max MHz: 2001.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hfi avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 3.4 MiB (72 instances)\nL1i cache: 2.3 MiB (72 instances)\nL2 cache: 144 MiB (72 instances)\nL3 cache: 135 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-35,72-107\nNUMA node1 CPU(s): 36-71,108-143\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.0\n[pip3] triton==2.3.0\n[conda] Could not collect",
|
80 |
+
"transformers_version": "4.41.2",
|
81 |
+
"upper_git_hash": "f2843b2fd64df799179808ce2428b7a8dbc403de",
|
82 |
+
"task_hashes": {},
|
83 |
+
"model_source": "vllm",
|
84 |
+
"model_name": "/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8",
|
85 |
+
"model_name_sanitized": "__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8",
|
86 |
+
"system_instruction": null,
|
87 |
+
"system_instruction_sha": null,
|
88 |
+
"chat_template": null,
|
89 |
+
"chat_template_sha": null,
|
90 |
+
"start_time": 815407.709750546,
|
91 |
+
"end_time": 815864.382636194,
|
92 |
+
"total_evaluation_time_seconds": "456.6728856479749"
|
93 |
+
}
|
winogrande/__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8/results_2024-06-07T07-25-09.600505.json
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"winogrande": {
|
4 |
+
"acc,none": 0.8263614838200474,
|
5 |
+
"acc_stderr,none": 0.010646116480331012,
|
6 |
+
"alias": "winogrande"
|
7 |
+
}
|
8 |
+
},
|
9 |
+
"group_subtasks": {
|
10 |
+
"winogrande": []
|
11 |
+
},
|
12 |
+
"configs": {
|
13 |
+
"winogrande": {
|
14 |
+
"task": "winogrande",
|
15 |
+
"dataset_path": "winogrande",
|
16 |
+
"dataset_name": "winogrande_xl",
|
17 |
+
"training_split": "train",
|
18 |
+
"validation_split": "validation",
|
19 |
+
"doc_to_text": "def doc_to_text(doc):\n answer_to_num = {\"1\": 0, \"2\": 1}\n return answer_to_num[doc[\"answer\"]]\n",
|
20 |
+
"doc_to_target": "def doc_to_target(doc):\n idx = doc[\"sentence\"].index(\"_\") + 1\n return doc[\"sentence\"][idx:].strip()\n",
|
21 |
+
"doc_to_choice": "def doc_to_choice(doc):\n idx = doc[\"sentence\"].index(\"_\")\n options = [doc[\"option1\"], doc[\"option2\"]]\n return [doc[\"sentence\"][:idx] + opt for opt in options]\n",
|
22 |
+
"description": "",
|
23 |
+
"target_delimiter": " ",
|
24 |
+
"fewshot_delimiter": "\n\n",
|
25 |
+
"num_fewshot": 5,
|
26 |
+
"metric_list": [
|
27 |
+
{
|
28 |
+
"metric": "acc",
|
29 |
+
"aggregation": "mean",
|
30 |
+
"higher_is_better": true
|
31 |
+
}
|
32 |
+
],
|
33 |
+
"output_type": "multiple_choice",
|
34 |
+
"repeats": 1,
|
35 |
+
"should_decontaminate": true,
|
36 |
+
"doc_to_decontamination_query": "sentence",
|
37 |
+
"metadata": {
|
38 |
+
"version": 1.0
|
39 |
+
}
|
40 |
+
}
|
41 |
+
},
|
42 |
+
"versions": {
|
43 |
+
"winogrande": 1.0
|
44 |
+
},
|
45 |
+
"n-shot": {
|
46 |
+
"winogrande": 5
|
47 |
+
},
|
48 |
+
"higher_is_better": {
|
49 |
+
"winogrande": {
|
50 |
+
"acc": true
|
51 |
+
}
|
52 |
+
},
|
53 |
+
"n-samples": {
|
54 |
+
"winogrande": {
|
55 |
+
"original": 1267,
|
56 |
+
"effective": 1267
|
57 |
+
}
|
58 |
+
},
|
59 |
+
"config": {
|
60 |
+
"model": "vllm",
|
61 |
+
"model_args": "pretrained=/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8,tensor_parallel_size=4,dtype=auto,add_bos_token=True,gpu_memory_utilization=0.8,data_parallel_size=1",
|
62 |
+
"batch_size": "auto",
|
63 |
+
"batch_sizes": [],
|
64 |
+
"device": "cuda",
|
65 |
+
"use_cache": null,
|
66 |
+
"limit": null,
|
67 |
+
"bootstrap_iters": 100000,
|
68 |
+
"gen_kwargs": null,
|
69 |
+
"random_seed": 0,
|
70 |
+
"numpy_seed": 1234,
|
71 |
+
"torch_seed": 1234,
|
72 |
+
"fewshot_seed": 1234
|
73 |
+
},
|
74 |
+
"git_hash": "f2843b2f",
|
75 |
+
"date": 1717744924.8628974,
|
76 |
+
"pretty_env_info": "PyTorch version: 2.3.0+cu121\nIs debug build: False\nCUDA used to build PyTorch: 12.1\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.4 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 3.29.3\nLibc version: glibc-2.35\n\nPython version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0] (64-bit runtime)\nPython platform: Linux-5.19.0-1010-nvidia-lowlatency-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.5.40\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: \nGPU 0: NVIDIA H100 NVL\nGPU 1: NVIDIA H100 NVL\nGPU 2: NVIDIA H100 NVL\nGPU 3: NVIDIA H100 NVL\nGPU 4: NVIDIA H100 NVL\nGPU 5: NVIDIA H100 NVL\nGPU 6: NVIDIA H100 NVL\nGPU 7: NVIDIA H100 NVL\n\nNvidia driver version: 555.42.02\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 46 bits physical, 57 bits virtual\nByte Order: Little Endian\nCPU(s): 144\nOn-line CPU(s) list: 0-143\nVendor ID: GenuineIntel\nModel name: Intel(R) Xeon(R) Platinum 8452Y\nCPU family: 6\nModel: 143\nThread(s) per core: 2\nCore(s) per socket: 36\nSocket(s): 2\nStepping: 8\nFrequency boost: enabled\nCPU max MHz: 2001.0000\nCPU min MHz: 800.0000\nBogoMIPS: 4000.00\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc art arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc cpuid aperfmperf tsc_known_freq pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 sdbg fma cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand lahf_lm abm 3dnowprefetch cpuid_fault epb cat_l3 cat_l2 cdp_l3 invpcid_single intel_ppin cdp_l2 ssbd mba ibrs ibpb stibp ibrs_enhanced tpr_shadow vnmi flexpriority ept vpid ept_ad fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb intel_pt avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local split_lock_detect avx_vnni avx512_bf16 wbnoinvd dtherm ida arat pln pts hfi avx512vbmi umip pku ospke waitpkg avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg tme avx512_vpopcntdq la57 rdpid bus_lock_detect cldemote movdiri movdir64b enqcmd fsrm md_clear serialize tsxldtrk pconfig arch_lbr ibt amx_bf16 avx512_fp16 amx_tile amx_int8 flush_l1d arch_capabilities\nVirtualization: VT-x\nL1d cache: 3.4 MiB (72 instances)\nL1i cache: 2.3 MiB (72 instances)\nL2 cache: 144 MiB (72 instances)\nL3 cache: 135 MiB (2 instances)\nNUMA node(s): 2\nNUMA node0 CPU(s): 0-35,72-107\nNUMA node1 CPU(s): 36-71,108-143\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Retbleed: Not affected\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Enhanced IBRS, IBPB conditional, RSB filling, PBRSB-eIBRS SW sequence\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.3.0\n[pip3] triton==2.3.0\n[conda] Could not collect",
|
77 |
+
"transformers_version": "4.41.2",
|
78 |
+
"upper_git_hash": "f2843b2fd64df799179808ce2428b7a8dbc403de",
|
79 |
+
"task_hashes": {},
|
80 |
+
"model_source": "vllm",
|
81 |
+
"model_name": "/home/mlr/models/Mixtral-8x22B-Instruct-v0.1-FP8",
|
82 |
+
"model_name_sanitized": "__home__mlr__models__Mixtral-8x22B-Instruct-v0.1-FP8",
|
83 |
+
"system_instruction": null,
|
84 |
+
"system_instruction_sha": null,
|
85 |
+
"chat_template": null,
|
86 |
+
"chat_template_sha": null,
|
87 |
+
"start_time": 815203.282574388,
|
88 |
+
"end_time": 815394.313211667,
|
89 |
+
"total_evaluation_time_seconds": "191.03063727903645"
|
90 |
+
}
|