name,zero_naive,zero_self_con,zero_cot,zero_cot_self_con,few_naive,few_self_con,few_cot,few_cot_self_con Baichuan2-13B-Chat,37.5,40.0,47.5,52.5,37.5,37.5,42.5,45.0 ChatGLM3-6B,35.0,35.0,50.0,50.0,47.5,47.5,45.0,45.0 DevOps-Model-14B-Chat,35.0,27.5,37.5,52.5,50.0,50.0,55.0,62.5 ERNIE-Bot-4.0,57.5,57.5,60.0,60.0,52.5,52.5,57.5,57.5 GPT-3.5-turbo,50.0,47.5,55.0,55.0,40.0,40.0,50.0,55.0 GPT-4,57.5,57.5,57.5,57.5,52.5,52.5,62.5,62.5 InternLM2-Chat-20B,47.5,47.5,,,47.5,47.5,, InternLM2-Chat-7B,60.0,60.0,57.5,57.5,55.0,55.0,62.5,62.5 LLaMA-2-13B,42.5,42.5,50.0,50.0,50.0,50.0,42.5,42.5 LLaMA-2-70B-Chat,0.0,0.0,57.5,57.5,25.0,25.0,45.0,45.0 LLaMA-2-7B,32.5,32.5,45.0,45.0,45.0,45.0,45.0,45.0 Mistral-7B,0.0,0.0,37.5,37.5,20.0,20.0,50.0,50.0 Qwen-14B-Chat,47.5,45.0,50.0,47.5,50.0,47.5,55.0,57.5 Qwen-72B-Chat,50.0,50.0,47.5,47.5,45.0,45.0,60.0,60.0 Yi-34B-Chat,55.0,55.0,60.0,67.5,50.0,50.0,52.5,55.0 Claude-3-Opus,72.85714285714286,72.85714285714286,,,,,, Deepseek-R1-Distill-Llama-8B,51.42857142857143,51.42857142857143,48.57142857142857,48.57142857142857,50.0,50.0,41.42857142857143,41.42857142857143 Deepseek-R1-Distill-Qwen-1.5B,42.857142857142854,42.857142857142854,45.71428571428571,45.71428571428571,34.285714285714285,34.285714285714285,34.285714285714285,34.285714285714285 Deepseek-R1-Distill-Qwen-14B,50.0,50.0,,,51.42857142857143,51.42857142857143,, Deepseek-R1-Distill-Qwen-32B,77.14285714285714,77.14285714285714,,,64.28571428571428,64.28571428571428,, Deepseek-R1-Distill-Qwen-7B,35.71428571428571,35.71428571428571,41.42857142857143,41.42857142857143,50.0,50.0,22.857142857142858,22.857142857142858 Gemma-2B,37.5,37.5,40.0,40.0,32.5,32.5,40.0,40.0 Gemma-7B,32.5,32.5,62.5,62.5,40.0,40.0,50.0,50.0 Meta-Llama-3-8B-Instruct,52.85714285714286,52.85714285714286,47.14285714285714,47.14285714285714,52.85714285714286,52.85714285714286,30.0,30.0 Qwen1.5-14B-Base,47.5,47.5,50.0,50.0,47.5,47.5,45.0,45.0 Qwen1.5-14B-Chat,45.0,47.5,60.0,72.5,52.5,55.0,60.0,60.0