Update README.md
Browse files
README.md
CHANGED
@@ -74,28 +74,16 @@ CodeLlama-13B-Python: 42.89
|
|
74 |
|
75 |
CodeLlama-13B: 35.07
|
76 |
|
77 |
-
## lm-evaluation-harness
|
78 |
-
|
79 |
-
```json
|
80 |
-
{'ARC (acc_norm)': ,
|
81 |
-
'HellaSwag (acc_norm)': ,
|
82 |
-
'MMLU (acc)': ,
|
83 |
-
'TruthfulQA (mc2)': ,
|
84 |
-
'Winoground (acc)': ,
|
85 |
-
'GSM8K (acc)': ,
|
86 |
-
'DROP (f1)': ,
|
87 |
-
'Open LLM Score': }
|
88 |
-
```
|
89 |
|
90 |
[Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
91 |
|
92 |
| Metric | Value |
|
93 |
| --- | --- |
|
94 |
-
| ARC | |
|
95 |
-
| HellaSwag | |
|
96 |
-
| MMLU |
|
97 |
-
| TruthfulQA |
|
98 |
-
| Winoground |
|
99 |
-
| GSM8K |
|
100 |
-
| Average |
|
101 |
|
|
|
74 |
|
75 |
CodeLlama-13B: 35.07
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
[Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
79 |
|
80 |
| Metric | Value |
|
81 |
| --- | --- |
|
82 |
+
| ARC | 58.79 |
|
83 |
+
| HellaSwag | 81.89 |
|
84 |
+
| MMLU | 61.27 |
|
85 |
+
| TruthfulQA | 49.85 |
|
86 |
+
| Winoground | 78.22 |
|
87 |
+
| GSM8K | 56.33 |
|
88 |
+
| Average | 64.39 |
|
89 |
|