leaderboard / plots /clustermap_qa.json
pminervini's picture
update
c0db8b3
raw
history blame
3 kB
{"columns":["TheBloke\/Llama-2-13B-chat-GPTQ","TheBloke\/Llama-2-7B-Chat-GPTQ","TheBloke\/Wizard-Vicuna-13B-Uncensored-GPTQ","teknium\/OpenHermes-2-Mistral-7B","teknium\/OpenHermes-2.5-Mistral-7B","mistralai\/Mistral-7B-Instruct-v0.2","mistralai\/Mistral-7B-Instruct-v0.1","bigscience\/bloom-7b1","bigscience\/bloom-560m","berkeley-nest\/Starling-LM-7B-alpha","EleutherAI\/gpt-neo-125m","EleutherAI\/gpt-neo-2.7B","EleutherAI\/gpt-j-6b","EleutherAI\/gpt-neo-1.3B","Gryphe\/MythoMax-L2-13b","Open-Orca\/Mistral-7B-OpenOrca","pankajmathur\/orca_mini_3b","KoboldAI\/OPT-13B-Erebus","ehartford\/dolphin-2.1-mistral-7b","togethercomputer\/LLaMA-2-7B-32K","togethercomputer\/GPT-JT-6B-v1","togethercomputer\/Llama-2-7B-32K-Instruct","HuggingFaceH4\/zephyr-7b-alpha","HuggingFaceH4\/zephyr-7b-beta","tiiuae\/falcon-7b-instruct","tiiuae\/falcon-7b","ai-forever\/mGPT","NousResearch\/Yarn-Mistral-7b-128k","NousResearch\/Nous-Hermes-Llama2-13b","DiscoResearch\/mixtral-7b-8expert","meta-llama\/Llama-2-7b-chat-hf","meta-llama\/Llama-2-7b-hf","meta-llama\/Llama-2-13b-chat-hf","meta-llama\/Llama-2-13b-hf","upstage\/SOLAR-10.7B-Instruct-v1.0"],"index":["TruthfulQA MC1, Accuracy","TriviaQA, EM","NQ, EM","TruthfulQA MC2, Accuracy"],"data":[[0.2656058752,0.2900856793,0.358629131,0.3390452876,0.3598531212,0.5250917993,0.3916768666,0.2239902081,0.2447980416,0.3157894737,0.2582619339,0.2386780906,0.2019583843,0.2313341493,0.364749082,0.3525091799,0.2802937576,0.205630355,0.3904528764,0.2558139535,0.2264381885,0.3096695226,0.4063647491,0.3867809058,0.2876376989,0.2239902081,0.2325581395,0.2741738066,0.3476132191,0.247246022,0.3023255814,0.2521419829,0.2802937576,0.2594859241,null],[0.0947949175,0.0817543469,0.0961324119,0.6805060187,null,0.6389322336,0.5224030317,0.2633749443,0.0570664289,0.6594962104,0.0155483727,0.2011257245,0.3931676326,0.1415514935,0.0974141774,0.6583816317,0.3750557289,0.3940592956,0.6674654481,0.0815871601,0.4172982613,0.0927329469,0.6591618368,0.6443379403,0.3915514935,0.5866584931,0.1061078912,0.7003455194,0.0984730272,0.0,0.0883303611,0.0921756576,0.0978600089,0.0948506465,0.4198060633],[0.0263157895,0.0263157895,0.0232686981,0.0335180055,0.0349030471,0.0313019391,0.0249307479,0.0329639889,0.0102493075,0.0343490305,0.0049861496,0.0542936288,0.0916897507,0.0368421053,0.028531856,0.0293628809,0.0265927978,0.1307479224,0.0293628809,0.0238227147,0.1091412742,0.0271468144,0.0304709141,0.0315789474,0.135734072,0.2207756233,0.0232686981,null,0.0254847645,0.0,0.0252077562,0.0268698061,0.0268698061,0.0274238227,0.1991689751],[0.4167499124,0.4410061226,0.5164091712,0.509255685,0.530496424,0.6681645582,0.5592109222,0.3889466583,0.4243149954,0.4731016618,0.4557936883,0.3986263303,0.3595710074,0.3961377938,0.515367679,0.5225657507,0.423037498,0.352115369,0.558968896,0.3841009802,0.3706818154,0.4572778615,0.5602234073,0.5511952533,0.4407667557,0.3426523695,0.3962402525,0.4225232911,0.5026754146,0.4947679694,0.4531160226,0.389651281,0.4394613382,0.3689257684,null]]}