|
Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,CFI,SRMR,RMSEA |
|
phi-3-mini-128k-instruct,0.34490740740740744,0.39387631706052895,0.039299993295009855,0.281800547806919,0.7509527777777777,0.25489166666666674,0.22045000000000003 |
|
phi-3-medium-128k-instruct,0.34317129629629634,0.4026069526718651,0.09692037989916814,0.2651981204439735,0.6727694444444445,0.2984500000000001,0.2759472222222221 |
|
Mistral-7B-Instruct-v0.1,0.1996527777777778,0.30674462188144647,0.027216280472015988,0.2829498135031582,0.500288888888889,0.45314444444444446,0.4191027777777777 |
|
Mistral-7B-Instruct-v0.2,0.3755787037037038,0.4028886762146369,0.14417876497818388,0.265188983528973,0.5787944444444445,0.35010277777777776,0.3171083333333333 |
|
Mistral-7B-Instruct-v0.3,0.2708333333333333,0.34429493368035685,0.07960539866974455,0.2742399030139009,0.5231444444444444,0.4214972222222223,0.3914694444444443 |
|
Mixtral-8x7B-Instruct-v0.1,0.4496527777777778,0.47204265176392696,0.21473356319081474,0.2624402608740656,0.6766166666666665,0.25611666666666666,0.24065277777777772 |
|
Mixtral-8x22B-Instruct-v0.1,0.26620370370370366,0.3496962191659786,0.1414001940345544,0.2548838005881672,0.45902777777777776,0.4849916666666666,0.4871833333333333 |
|
command_r_plus,0.5815972222222222,0.5698450422762357,0.3429686514651868,0.23811982320641845,0.7772111111111112,0.17755277777777778,0.17465277777777777 |
|
llama_3_8b_instruct,0.48900462962962954,0.5066363890459272,0.24527785038654715,0.245806400289881,0.7348277777777779,0.20952222222222228,0.20751944444444437 |
|
llama_3_70b_instruct,0.7291666666666666,0.7270613281502669,0.607020698814379,0.18525883672204868,0.8298166666666668,0.10965277777777771,0.14649722222222217 |
|
llama_3.1_8b_instruct,0.5434027777777778,0.5599895255443657,0.4295080949846363,0.22060228669473025,0.6379333333333334,0.3225500000000001,0.3328972222222223 |
|
llama_3.1_70b_instruct,0.7847222222222222,0.7630277652278956,0.691365862744007,0.1709718847084183,0.8203805555555554,0.14023055555555552,0.17041944444444446 |
|
llama_3.1_405b_instruct_4bit,0.6886574074074073,0.6993503239272297,0.7232098126552619,0.1702199925365422,0.6062611111111111,0.3538527777777777,0.38022500000000004 |
|
Qwen2-7B-Instruct,0.43287037037037035,0.46812644016430927,0.25108519506513916,0.25776537005719313,0.6248583333333334,0.32358611111111113,0.3028361111111111 |
|
Qwen2-72B-Instruct,0.5810185185185186,0.64867678910782,0.6465993243020925,0.20297742879025626,0.5559722222222221,0.3575638888888889,0.39241388888888884 |
|
Qwen2.5-0.5B-Instruct,0.28877314814814814,0.3796838812739187,0.002970456550606876,0.2928913315666324,0.7497416666666666,0.24648888888888887,0.18477222222222223 |
|
Qwen2.5-7B-Instruct,0.6186342592592592,0.5896473181421169,0.333554494486959,0.2505866550331236,0.8311222222222222,0.10302222222222213,0.09455277777777782 |
|
Qwen2.5-32B-Instruct,0.7442129629629629,0.731635015756055,0.6724190751477237,0.1806656189868978,0.7584111111111111,0.19748055555555544,0.21686111111111106 |
|
Qwen2.5-72B-Instruct,0.7991898148148148,0.754401345305127,0.6974116787371809,0.16176650806326276,0.7859583333333332,0.177875,0.2007527777777779 |
|
gpt-3.5-turbo-0125,0.21643518518518517,0.328243163867074,0.08240359836763214,0.28728574920060357,0.4998916666666666,0.47583055555555553,0.4404444444444445 |
|
gpt-4o-0513,0.7025462962962963,0.6713251724661671,0.5122163952167618,0.19201420113771173,0.7998694444444445,0.14606111111111109,0.1400583333333334 |
|
gpt-4o-mini-2024-07-18,0.3628472222222222,0.40825697940501954,0.13575309046266867,0.2707065266105181,0.6141777777777777,0.32648055555555555,0.29394722222222214 |
|
Mistral-Large-Instruct-2407,0.8217592592592592,0.7808285247091349,0.7644582301049158,0.16944638941325085,0.7604888888888888,0.18767499999999993,0.21457222222222228 |
|
Mistral-Small-Instruct-2409,0.7083333333333335,0.7319149695591499,0.6416815833333804,0.1894343546381,0.7891722222222222,0.1387222222222222,0.17242222222222225 |
|
dummy,0.14872685185185186,0.2784036220050126,-0.009004148398032956,0.2928877637010999,0.5076361111111111,0.4973388888888889,0.4541638888888889 |
|
|