TnT commited on
Commit
d241326
·
1 Parent(s): 640b1c8

Upload results_Mistral-7B-v0.1_mmlu.json

Browse files
Files changed (1) hide show
  1. results_Mistral-7B-v0.1_mmlu.json +1 -0
results_Mistral-7B-v0.1_mmlu.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"results": {"hendrycksTest-high_school_microeconomics": {"acc": 0.7058823529411765, "acc_stderr": 0.029597329730978096, "acc_norm": 0.7058823529411765, "acc_norm_stderr": 0.029597329730978096}, "hendrycksTest-high_school_statistics": {"acc": 0.5462962962962963, "acc_stderr": 0.033953227263757976, "acc_norm": 0.5462962962962963, "acc_norm_stderr": 0.033953227263757976}, "hendrycksTest-high_school_computer_science": {"acc": 0.6, "acc_stderr": 0.04923659639173309, "acc_norm": 0.6, "acc_norm_stderr": 0.04923659639173309}, "hendrycksTest-high_school_physics": {"acc": 0.3708609271523179, "acc_stderr": 0.03943966699183629, "acc_norm": 0.3708609271523179, "acc_norm_stderr": 0.03943966699183629}, "hendrycksTest-moral_scenarios": {"acc": 0.43798882681564244, "acc_stderr": 0.016593394227564846, "acc_norm": 0.43798882681564244, "acc_norm_stderr": 0.016593394227564846}, "hendrycksTest-high_school_european_history": {"acc": 0.6303030303030303, "acc_stderr": 0.03769430314512567, "acc_norm": 0.6303030303030303, "acc_norm_stderr": 0.03769430314512567}, "hendrycksTest-moral_disputes": {"acc": 0.6994219653179191, "acc_stderr": 0.024685316867257803, "acc_norm": 0.6994219653179191, "acc_norm_stderr": 0.024685316867257803}, "hendrycksTest-high_school_macroeconomics": {"acc": 0.6461538461538462, "acc_stderr": 0.02424378399406216, "acc_norm": 0.6461538461538462, "acc_norm_stderr": 0.02424378399406216}, "hendrycksTest-high_school_mathematics": {"acc": 0.37037037037037035, "acc_stderr": 0.02944316932303154, "acc_norm": 0.37037037037037035, "acc_norm_stderr": 0.02944316932303154}, "hendrycksTest-high_school_chemistry": {"acc": 0.5270935960591133, "acc_stderr": 0.03512819077876106, "acc_norm": 0.5270935960591133, "acc_norm_stderr": 0.03512819077876106}, "hendrycksTest-high_school_government_and_politics": {"acc": 0.8860103626943006, "acc_stderr": 0.022935144053919443, "acc_norm": 0.8860103626943006, "acc_norm_stderr": 0.022935144053919443}, "hendrycksTest-high_school_world_history": {"acc": 0.6540084388185654, "acc_stderr": 0.030964810588786716, "acc_norm": 0.6540084388185654, "acc_norm_stderr": 0.030964810588786716}, "hendrycksTest-high_school_us_history": {"acc": 0.7303921568627451, "acc_stderr": 0.031145570659486782, "acc_norm": 0.7303921568627451, "acc_norm_stderr": 0.031145570659486782}, "ethics_deontology": {"acc": 0.5080645161290323, "acc_stderr": 0.00833804164110466, "em": 0.010011123470522803}, "hendrycksTest-high_school_geography": {"acc": 0.8181818181818182, "acc_stderr": 0.027479603010538797, "acc_norm": 0.8181818181818182, "acc_norm_stderr": 0.027479603010538797}, "hendrycksTest-high_school_biology": {"acc": 0.7838709677419354, "acc_stderr": 0.023415293433568525, "acc_norm": 0.7838709677419354, "acc_norm_stderr": 0.023415293433568525}, "hendrycksTest-high_school_psychology": {"acc": 0.8385321100917431, "acc_stderr": 0.015776239256163224, "acc_norm": 0.8385321100917431, "acc_norm_stderr": 0.015776239256163224}}, "versions": {"hendrycksTest-high_school_microeconomics": 1, "hendrycksTest-high_school_statistics": 1, "hendrycksTest-high_school_computer_science": 1, "hendrycksTest-high_school_physics": 1, "hendrycksTest-moral_scenarios": 1, "hendrycksTest-high_school_european_history": 1, "hendrycksTest-moral_disputes": 1, "hendrycksTest-high_school_macroeconomics": 1, "hendrycksTest-high_school_mathematics": 1, "hendrycksTest-high_school_chemistry": 1, "hendrycksTest-high_school_government_and_politics": 1, "hendrycksTest-high_school_world_history": 1, "hendrycksTest-high_school_us_history": 1, "ethics_deontology": 0, "hendrycksTest-high_school_geography": 1, "hendrycksTest-high_school_biology": 1, "hendrycksTest-high_school_psychology": 1}, "config": {"model": "Mistral-7B-v0.1", "num_fewshot": 0, "batch_size": 8, "device": "cuda:0", "no_cache": true, "limit": null, "bootstrap_iters": 2, "description_dict": null}}