Update README.md
Browse files
README.md
CHANGED
@@ -73,7 +73,7 @@ print(response)
|
|
73 |
|
74 |
## Benchmarks
|
75 |
|
76 |
-
Nous Benchmark:
|
77 |
|
78 |
| Model |AGIEval|GPT4All|TruthfulQA|Bigbench|Average|
|
79 |
|---------------------------------------------------|------:|------:|---------:|-------:|------:|
|
@@ -159,11 +159,12 @@ Average: 48.54%
|
|
159 |
|bigbench_tracking_shuffled_objects_three_objects| 0|multiple_choice_grade|44.00|± | 2.87|
|
160 |
|
161 |
Average: 41.43%
|
162 |
-
|
163 |
-
Average score: 51.25%
|
164 |
```
|
165 |
|
166 |
-
|
|
|
|
|
|
|
167 |
|
168 |
| Model |ARC |HellaSwag|MMLU |TruthfulQA|Winogrande|GSM8K|Average|
|
169 |
|---------------------------------------------------|---:|--------:|----:|---------:|---------:|----:|------:|
|
@@ -477,6 +478,6 @@ Average: 77.35%
|
|
477 |
| | |alias |gsm8k| | |
|
478 |
|
479 |
Average: 67.48%
|
|
|
480 |
|
481 |
-
Average score
|
482 |
-
```
|
|
|
73 |
|
74 |
## Benchmarks
|
75 |
|
76 |
+
### Nous Benchmark:
|
77 |
|
78 |
| Model |AGIEval|GPT4All|TruthfulQA|Bigbench|Average|
|
79 |
|---------------------------------------------------|------:|------:|---------:|-------:|------:|
|
|
|
159 |
|bigbench_tracking_shuffled_objects_three_objects| 0|multiple_choice_grade|44.00|± | 2.87|
|
160 |
|
161 |
Average: 41.43%
|
|
|
|
|
162 |
```
|
163 |
|
164 |
+
**Average score**: 51.25%
|
165 |
+
|
166 |
+
|
167 |
+
### OpenLLM Benchmark:
|
168 |
|
169 |
| Model |ARC |HellaSwag|MMLU |TruthfulQA|Winogrande|GSM8K|Average|
|
170 |
|---------------------------------------------------|---:|--------:|----:|---------:|---------:|----:|------:|
|
|
|
478 |
| | |alias |gsm8k| | |
|
479 |
|
480 |
Average: 67.48%
|
481 |
+
```
|
482 |
|
483 |
+
**Average score**: 67.48%
|
|