benchbench / cache /aggregate_scoress_cache_e798cd9f99d4c09af5b81734eecc6592.csv
Yotam-Perlitz
update cache
23926f6
raw
history blame
421 Bytes
model,score
gpt_4o_2024_05_13,1.0
qwen2_72b_instruct,0.8437710437710438
gemma_2_27b_it,0.8225108225108225
llama3_70b_instruct,0.7288840788840788
qwen1.5_110b_chat,0.6958874458874459
command_r_plus,0.5340909090909091
qwen1.5_72b_chat,0.510909090909091
llama3_8b_instruct,0.2897065897065897
mixtral_8x7b_instruct_v0.1,0.27759740259740256
command_r,0.1515151515151515
qwen1.5_7b_chat,0.06897546897546898
llama_2_7b_chat,0.0