LLMArena commited on
Commit
1aaa52c
·
verified ·
1 Parent(s): 3fd0dd9

Upload 2 files

Browse files
elo_results_20241211.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:403f492dbe8149e1a6956320389121608c9976cd2c2506b96eca21aa2c27d7b4
3
+ size 1760108
leaderboard_table_20241211.csv ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rating,variance,rating_q975,rating_q025,num_battles,final_ranking,key,Model,License,Organization,Knowledge cutoff date,Link,MT-bench (score),MMLU
2
+ 1066.4652564735854,162.20642914676966,1090.3737475195344,1041.62858746108,552,2,gpt-4o-2024-11-20,gpt-4o-2024-11-20,Proprietary,OpenAI,11-2024,https://openai.com/api/,70.0,50.0
3
+ 1039.4876281892873,136.2530783959714,1061.5620401970853,1014.9224393115712,726,4,Google: Gemini Pro 1.5,Google: Gemini Pro 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
4
+ 891.6540594761141,133.89903363999983,913.6571848927705,868.0911223678636,855,39,gpt-4o-2024-05-13,gpt-4o-2024-05-13,Proprietary,OpenAI,05-2024,https://openai.com/api/,70.0,50.0
5
+ 1009.4925549677336,290.3793418865619,1044.3612025141724,974.6778017490105,324,4,Cohere: Command R+ (08-2024),Cohere: Command R+ (08-2024),Open Source,Cohere,-,https://docs.cohere.com/v2/docs/command-r-plus,-,-
6
+ 1014.6798015543044,273.38116993139346,1047.5809058881168,981.7202717689902,361,4,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/Vikhr-Nemo-12B-Instruct-R-21-09-24,-,-
7
+ 889.4543976131046,175.74070960123206,915.4321472314347,863.5281890070374,585,39,claude-3-5-sonnet-20241022,claude-3-5-sonnet-20241022,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
8
+ 1052.203591486529,187.0315640157163,1079.5057211201429,1024.5070712701593,473,2,Qwen2.5 72B Instruct,Qwen2.5 72B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-72B-Instruct,-,-
9
+ 878.1354116123769,130.7743515049299,900.2595469051313,856.6612747470441,835,42,T-lite-instruct-0.1,T-lite-instruct-0.1,Open Source,t-bank-ai,In training,https://huggingface.co/AnatoliiPotapov/T-lite-instruct-0.1,-,-
10
+ 1015.3367648037724,178.66876393090928,1041.7033474087555,990.4221687989087,516,4,claude-3-5-sonnet-20240620,claude-3-5-sonnet-20240620,Proprietary,Anthropic,06-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
11
+ 917.3646279739613,116.75570464501229,937.3773693394644,894.967349316358,927,36,GigaChat-Max-preview 4.0.26.20,GigaChat-Max-preview 4.0.26.20,Proprietary,Sber,In training,https://www.sber-bank.by/new/gigachat-29102024,-,-
12
+ 1009.388957628975,297.96995105106515,1045.4552765062992,978.1646423923137,295,4,Llama 3.3 70B Instruct,Llama 3.3 70B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
13
+ 1010.4753599747639,425.6948709407261,1050.5863013909704,971.16460828865,203,4,-,YandexGPT Experimental Trio,-,-,-,-,-,-
14
+ 892.2359468400748,178.70300983824845,916.6291031181632,865.4734222610763,574,39,Google: Gemini Flash 1.5,Google: Gemini Flash 1.5,Proprietary,Google,-,https://gemini.google.com/,-,-
15
+ 1045.913351666114,159.78304370936868,1070.136161665994,1020.5191627963811,594,2,gpt-4-turbo-2024-04-09,gpt-4-turbo-2024-04-09,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
16
+ 1117.109446756341,184.2167643629795,1143.8837555472046,1091.731079615234,593,1,Gemma 2 27B,Gemma 2 27B,Proprietary,Google,-,https://blog.google/technology/developers/google-gemma-2/,-,-
17
+ 1024.5927597958773,108.63513201118707,1045.2212699653837,1004.4594239193036,1077,4,Llama 3.1 405B Instruct Turbo,Llama 3.1 405B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
18
+ 994.801346452818,98.57525545910447,1014.2778936341865,975.0247597793742,1086,14,YandexGPT Experimental,YandexGPT Experimental,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
19
+ 1034.4697617204065,119.3835390987837,1055.7725912178216,1013.9261231494952,766,4,LLaMA-3 Chat (70B),LLaMA-3 Chat (70B),Proprietary,Meta,12-2023,https://llama.meta.com/llama3/,-,-
20
+ 1017.9264289317942,135.58032241361744,1040.7087985752341,994.8288675138318,720,5,saiga_llama3_70b,saiga_llama3_70b,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_70b_sft_m1_d5_abliterated_awq_4bit,-,-
21
+ 935.597262757723,128.19843568090937,957.8049685227182,912.8175881211779,739,33,Llama 3.1 70B Instruct Turbo,Llama 3.1 70B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
22
+ 915.9458810241074,204.7816321281494,943.2825421278895,887.4930847307357,518,36,GigaChat-Plus 4.0.26.15,GigaChat-Plus 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
23
+ 1051.32482857087,708.0351976590512,1101.6166914086114,999.6094943754206,138,1,GigaChat 4.0.26.20,GigaChat 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
24
+ 938.9055595660295,87.89076147944694,956.8881660194003,920.1120037190248,1092,33,saiga_llama3_8b_v7,saiga_llama3_8b_v7,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
25
+ 1065.585654889274,177.05766511725776,1089.7230928787446,1039.2397286744638,541,2,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,Open Source,RefalMachine,-,https://huggingface.co/RefalMachine/ruadapt_llama3_instruct_lep_saiga_kto_ablitirated,-,-
26
+ 963.9399958025759,657.9243043051607,1014.9347923474577,912.1757828680171,161,13,GigaChat-Pro 4.0.26.20,GigaChat-Pro 4.0.26.20,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
27
+ 1011.9006992104486,199.37144389937447,1038.960075600483,984.1092901784089,513,7,GigaChat 4.0.26.15,GigaChat 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
28
+ 1059.78452431081,114.16566332067245,1079.778991981747,1038.5760018604697,863,2,GigaChat-Pro 4.0.26.15,GigaChat-Pro 4.0.26.15,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
29
+ 1066.3229955092913,195.7211874830416,1094.193440250466,1039.341461749015,455,1,gpt-4o-mini-2024-07-18,gpt-4o-mini-2024-07-18,Proprietary,OpenAI,07-2024,https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence/,70.0,50.0
30
+ 990.1968809193805,139.89212536071358,1013.2008876109159,968.2061875402611,677,16,claude-3-haiku-20240307,claude-3-haiku-20240307,Proprietary,Anthropic,03-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-family,-,-
31
+ 953.4225640983079,85.34803539761137,972.3518221590283,936.3393750990774,1211,30,claude-3-5-haiku-20241022,claude-3-5-haiku-20241022,Proprietary,Anthropic,10-2024,https://docs.anthropic.com/en/docs/intro-to-claude#claude-3-5-family,-,-
32
+ 947.2578526203331,90.35555598521394,967.1069413032229,929.9170565779622,1207,32,LLaMA-3 Chat (8B),LLaMA-3 Chat (8B),Proprietary,Meta,03-2023,https://llama.meta.com/llama3/,-,-
33
+ 970.6813931516238,95.87332538494664,990.0154777646837,951.4986188119813,976,24,Vikhrmodels/it-5.2-fp16-cp,Vikhrmodels/it-5.2-fp16-cp,Open Source,Vikhrmodels,In training,https://huggingface.co/Vikhrmodels/it-5.2-fp16-cp,-,-
34
+ 1030.9567751819188,81.71769016381201,1048.2317138218114,1013.2673464554689,1110,4,saiga_llama3_8b_v6,saiga_llama3_8b_v6,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_llama3_8b,-,-
35
+ 1046.9508241596855,405.8415075848698,1088.1351202972037,1008.0826100807584,233,2,YandexGPT 4 Pro,YandexGPT 4 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-4,45.2,35.2
36
+ 1002.2769344066596,794.5832788289706,1055.7129029527605,947.5630194246367,138,4,saiga_phi3_medium,saiga_phi3_medium,Open Source,Ilya Gusev,In training,https://huggingface.co/IlyaGusev/saiga_phi3_medium_sft_m1_d2_kto_m5_d7,-,-
37
+ 1053.0729326264425,96.51321071167163,1072.2150953189737,1033.4122966559096,1107,2,Qwen2.5 Coder 32B Instruct,Qwen2.5 Coder 32B Instruct,Open Source,Qwen,-,https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct,-,-
38
+ 1066.0789385536307,386.5161922980958,1104.1181868190963,1027.8600993370494,233,1,YandexGPT 3 Lite,YandexGPT 3 Lite,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,45.2,35.2
39
+ 1003.1450060083076,94.97263801203302,1021.7798019607957,984.0571971554197,1067,12,YandexGPT 3 Pro,YandexGPT 3 Pro,Proprietary,Yandex,In training,https://ya.ru/ai/gpt-3,65.2,45.2
40
+ 838.3668922829805,90.3553247186183,857.8103537277818,819.6103716562735,1258,47,Qwen 2 Instruct (72B),Qwen 2 Instruct (72B),Open Source,Qwen,12-2023,https://llama.meta.com/llama3/,-,-
41
+ 928.3253799198692,80.40239202605129,944.816964098719,909.7549471263087,1226,35,Llama 3.1 8B Instruct Turbo,Llama 3.1 8B Instruct Turbo,Proprietary,Meta,-,https://ai.meta.com/blog/meta-llama-3-1/,-,-
42
+ 1039.9796814004058,76.26062485599546,1057.2070427844683,1022.994642558331,1191,4,gpt-4-0613,gpt-4-0613,Proprietary,OpenAI,04-2023,https://openai.com/api/,70.0,50.0
43
+ 1085.8493977129394,84.3476465762785,1104.568414967872,1067.0238864044056,1268,1,GigaChat-Pro 2.2.25.3,GigaChat-Pro 2.2.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
44
+ 1119.6577231381139,854.9524219887568,1184.5198579752407,1068.1671320883102,129,1,Llama 3.2 11B Instruct,Llama 3.2 11B Instruct,Open Source,Meta,-,https://www.llama.com/,-,-
45
+ 1007.7807399229864,81.96789135663971,1025.09471862186,990.1975012810778,993,10,GigaChat-Pro 4.0.26.8,GigaChat-Pro 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
46
+ 1018.4978484657778,128.8332371524829,1039.9549758924052,995.7520366981141,794,5,GigaChat 3.1.25.3,GigaChat 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
47
+ 986.4442894898277,165.37845504312205,1011.8325301187166,961.5598542318053,595,16,GigaChat 4.0.26.8,GigaChat 4.0.26.8,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
48
+ 1012.7997004028763,91.80371595606908,1030.9333335571528,994.4812695981076,1043,9,GigaChat-Plus 3.1.25.3,GigaChat-Plus 3.1.25.3,Proprietary,Sber,In training,https://developers.sber.ru/portal/products/gigachat,-,-
49
+ 967.7633899788693,134.9935796457256,990.4977542751445,944.7494057443963,748,22,gpt-3.5-turbo-0125,gpt-3.5-turbo-0125,Proprietary,OpenAI,09-2021,https://openai.com/api/,65.2,45.2