OpsEval / data_v2 /network_zh_mc_gen.csv
Junetheriver's picture
update leaderboard 2025-02-24
0257d7c
raw
history blame
2.86 kB
name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con
AquilaChat2-34B,34.66,34.66,47.74,47.74,44.48,44.48,,
Baichuan-13B-Chat,15.2,16.0,43.9,49.7,34.3,36.1,51.3,55.6
Baichuan2-13B-Chat,35.6,35.9,30.5,30.5,34.6,35.6,30.2,32.0
ChatGLM2-6B,33.8,33.7,42.1,42.2,36.0,36.0,39.5,39.5
ChatGLM3-6B,41.39414802,41.39414802,49.22547332,49.22547332,38.81239243,38.81239243,42.85714286,42.85714286
Chinese-Alpaca-2-13B,33.1,33.1,44.2,44.2,44.0,44.0,42.7,42.7
Chinese-LLaMA-2-13B,22.5,22.5,38.8,38.8,41.8,41.8,32.2,32.2
DevOps-Model-14B-Chat,47.59,46.57,52.52,56.01,62.07,60.08,50.59,55.79
ERNIE-Bot-4.0,67.54,67.54,71.96,71.96,72.0,72.0,78.0,78.0
GLM3-turbo,59.63855422,59.63855422,,,,,,
GLM4,67.383821,67.383821,,,,,,
GPT-3.5-turbo,58.4,58.6,64.8,67.6,59.2,59.7,65.2,67.4
GPT-4,,,,,,,86.0,86.0
Hunyuan-13B,60.0,60.0,70.0,70.0,,,,
InternLM-7B,41.7,41.7,38.4,38.4,42.6,42.6,41.3,41.3
InternLM2-Chat-20B,57.48709122,57.48709122,57.14285714,57.14285714,59.1222031,59.1222031,50.77452668,50.77452668
InternLM2-Chat-7B,54.30292599,54.30292599,59.81067126,59.81067126,58.51979346,58.51979346,51.63511188,51.63511188
LLaMA-2-13B,29.7,31.6,51.6,57.0,39.6,38.9,48.0,50.6
LLaMA-2-70B-Chat,38.55,38.55,57.49,57.49,49.09,49.09,48.57,48.57
LLaMA-2-7B,29.8,30.2,50.1,55.6,38.6,40.8,45.6,50.4
Mistral-7B,1.9,1.9,45.61,45.61,15.0,15.0,35.97,35.97
Qwen-14B-Chat,48.35,48.81,55.35,57.4,58.53,56.12,52.12,54.99
Qwen-72B-Chat,65.77,65.86,68.13,68.3,69.4,69.4,69.99,70.08
Qwen-7B-Chat,29.6,29.9,50.6,53.5,50.4,46.9,46.9,47.7
Yi-34B-Chat,61.61,62.56,68.11,69.75,65.73,65.37,69.88,71.21
JIUTIAN-75B-net,66.38023630504833,66.38023630504833,74.75832438238453,74.75832438238453,67.88399570354457,67.88399570354457,74.43609022556392,74.43609022556392
Claude-3-Opus,62.329525111479995,62.329525111479995,,,,,,
Deepseek-R1-Distill-Llama-8B,27.380138658334147,27.380138658334147,41.48846141327344,41.48846141327344,32.6123750935781,32.6123750935781,33.60348924258699,33.60348924258699
Deepseek-R1-Distill-Qwen-1.5B,24.732285258601046,24.732285258601046,24.071542492595125,24.071542492595125,20.06151742993848,20.06151742993848,25.47277284119389,25.47277284119389
Deepseek-R1-Distill-Qwen-7B,30.83520489535527,30.83520489535527,31.99882823942974,31.99882823942974,26.802395599388078,26.802395599388078,33.14454968590307,33.14454968590307
Gemma-2B,29.69019,29.69019,39.15663,39.15663,29.77625,29.77625,38.64028,38.64028
Gemma-7B,31.58348,31.58348,47.59036,47.59036,34.68158,34.68158,48.88124,48.88124
Meta-Llama-3-8B-Instruct,35.904696806952444,35.904696806952444,38.94801939914722,38.94801939914722,41.717931191615406,41.717931191615406,31.059792337987826,31.059792337987826
Qwen1.5-14B-Base,45.18072,45.18072,59.1222,59.1222,61.10155,61.10155,52.4957,52.4957
Qwen1.5-14B-Chat,54.04475,53.87263,62.56454,63.85542,58.77797,58.0895,63.42513,65.57659