Spaces:
Runtime error
Runtime error
Update small.json
Browse files- small.json +11 -11
small.json
CHANGED
@@ -5,7 +5,7 @@
|
|
5 |
"MMLU": 36.97,
|
6 |
"ARC":60.94,
|
7 |
"WinoGrande": 46.88,
|
8 |
-
"
|
9 |
"CommonsenseQA": 49.15,
|
10 |
"Race": 37.81,
|
11 |
"MedMCQA": 22.61,
|
@@ -17,7 +17,7 @@
|
|
17 |
"MMLU": 9.99,
|
18 |
"ARC":15.84 ,
|
19 |
"WinoGrande": 40.96,
|
20 |
-
"
|
21 |
"CommonsenseQA": 31.13,
|
22 |
"Race": 34.91,
|
23 |
"MedMCQA": 4.7,
|
@@ -29,7 +29,7 @@
|
|
29 |
"MMLU": 17.52,
|
30 |
"ARC":23.93,
|
31 |
"WinoGrande": 16.10,
|
32 |
-
"
|
33 |
"CommonsenseQA": 27.46,
|
34 |
"Race": 14.32,
|
35 |
"MedMCQA": 4.57,
|
@@ -42,7 +42,7 @@
|
|
42 |
"MMLU": 9.22,
|
43 |
"ARC":14.95,
|
44 |
"WinoGrande": 14.76,
|
45 |
-
"
|
46 |
"CommonsenseQA": 9.01,
|
47 |
"Race": 16.19,
|
48 |
"MedMCQA": 1.68,
|
@@ -54,7 +54,7 @@
|
|
54 |
"MMLU": 9.21,
|
55 |
"ARC":13.5,
|
56 |
"WinoGrande": 16.97,
|
57 |
-
"
|
58 |
"CommonsenseQA": 11.41,
|
59 |
"Race": 14.35,
|
60 |
"MedMCQA": 1.86,
|
@@ -66,7 +66,7 @@
|
|
66 |
"MMLU": 8.54,
|
67 |
"ARC":13.18,
|
68 |
"WinoGrande": 6.16,
|
69 |
-
"
|
70 |
"CommonsenseQA": 13.10,
|
71 |
"Race": 13.61,
|
72 |
"MedMCQA": 2.07,
|
@@ -78,7 +78,7 @@
|
|
78 |
"MMLU": 9.66,
|
79 |
"ARC":14.69,
|
80 |
"WinoGrande": 11.52,
|
81 |
-
"
|
82 |
"CommonsenseQA": 9.01,
|
83 |
"Race": 12.76,
|
84 |
"MedMCQA": 3.19,
|
@@ -90,7 +90,7 @@
|
|
90 |
"MMLU": 8.94,
|
91 |
"ARC":13.31,
|
92 |
"WinoGrande": 12.23,
|
93 |
-
"
|
94 |
"CommonsenseQA": 6.06,
|
95 |
"Race": 16.7,
|
96 |
"MedMCQA": 2.07,
|
@@ -102,7 +102,7 @@
|
|
102 |
"MMLU": 7.40,
|
103 |
"ARC":11.83,
|
104 |
"WinoGrande": 12.47,
|
105 |
-
"
|
106 |
"CommonsenseQA": 7.61,
|
107 |
"Race": 13.61,
|
108 |
"MedMCQA": 1.25,
|
@@ -114,7 +114,7 @@
|
|
114 |
"MMLU": 6.94,
|
115 |
"ARC": 6.69,
|
116 |
"WinoGrande": 10.81,
|
117 |
-
"
|
118 |
"CommonsenseQA": 6.34,
|
119 |
"Race": 13.75,
|
120 |
"MedMCQA": 2.63,
|
@@ -126,7 +126,7 @@
|
|
126 |
"MMLU": 5.37,
|
127 |
"ARC":4.43,
|
128 |
"WinoGrande": 9.31,
|
129 |
-
"
|
130 |
"CommonsenseQA": 6.2,
|
131 |
"Race": 6.9,
|
132 |
"MedMCQA": 1.04,
|
|
|
5 |
"MMLU": 36.97,
|
6 |
"ARC":60.94,
|
7 |
"WinoGrande": 46.88,
|
8 |
+
"PIQA": 32.04,
|
9 |
"CommonsenseQA": 49.15,
|
10 |
"Race": 37.81,
|
11 |
"MedMCQA": 22.61,
|
|
|
17 |
"MMLU": 9.99,
|
18 |
"ARC":15.84 ,
|
19 |
"WinoGrande": 40.96,
|
20 |
+
"PIQA": 15.52,
|
21 |
"CommonsenseQA": 31.13,
|
22 |
"Race": 34.91,
|
23 |
"MedMCQA": 4.7,
|
|
|
29 |
"MMLU": 17.52,
|
30 |
"ARC":23.93,
|
31 |
"WinoGrande": 16.10,
|
32 |
+
"PIQA": 15.09,
|
33 |
"CommonsenseQA": 27.46,
|
34 |
"Race": 14.32,
|
35 |
"MedMCQA": 4.57,
|
|
|
42 |
"MMLU": 9.22,
|
43 |
"ARC":14.95,
|
44 |
"WinoGrande": 14.76,
|
45 |
+
"PIQA": 5.32,
|
46 |
"CommonsenseQA": 9.01,
|
47 |
"Race": 16.19,
|
48 |
"MedMCQA": 1.68,
|
|
|
54 |
"MMLU": 9.21,
|
55 |
"ARC":13.5,
|
56 |
"WinoGrande": 16.97,
|
57 |
+
"PIQA": 0.86,
|
58 |
"CommonsenseQA": 11.41,
|
59 |
"Race": 14.35,
|
60 |
"MedMCQA": 1.86,
|
|
|
66 |
"MMLU": 8.54,
|
67 |
"ARC":13.18,
|
68 |
"WinoGrande": 6.16,
|
69 |
+
"PIQA": 8.05,
|
70 |
"CommonsenseQA": 13.10,
|
71 |
"Race": 13.61,
|
72 |
"MedMCQA": 2.07,
|
|
|
78 |
"MMLU": 9.66,
|
79 |
"ARC":14.69,
|
80 |
"WinoGrande": 11.52,
|
81 |
+
"PIQA": 4.17,
|
82 |
"CommonsenseQA": 9.01,
|
83 |
"Race": 12.76,
|
84 |
"MedMCQA": 3.19,
|
|
|
90 |
"MMLU": 8.94,
|
91 |
"ARC":13.31,
|
92 |
"WinoGrande": 12.23,
|
93 |
+
"PIQA": 3.59,
|
94 |
"CommonsenseQA": 6.06,
|
95 |
"Race": 16.7,
|
96 |
"MedMCQA": 2.07,
|
|
|
102 |
"MMLU": 7.40,
|
103 |
"ARC":11.83,
|
104 |
"WinoGrande": 12.47,
|
105 |
+
"PIQA": 4.48,
|
106 |
"CommonsenseQA": 7.61,
|
107 |
"Race": 13.61,
|
108 |
"MedMCQA": 1.25,
|
|
|
114 |
"MMLU": 6.94,
|
115 |
"ARC": 6.69,
|
116 |
"WinoGrande": 10.81,
|
117 |
+
"PIQA": 4.31,
|
118 |
"CommonsenseQA": 6.34,
|
119 |
"Race": 13.75,
|
120 |
"MedMCQA": 2.63,
|
|
|
126 |
"MMLU": 5.37,
|
127 |
"ARC":4.43,
|
128 |
"WinoGrande": 9.31,
|
129 |
+
"PIQA": 2.16,
|
130 |
"CommonsenseQA": 6.2,
|
131 |
"Race": 6.9,
|
132 |
"MedMCQA": 1.04,
|