Omartificial-Intelligence-Space
commited on
Commit
•
863e0ef
1
Parent(s):
3706ee4
update about
Browse files- src/about.py +37 -1
src/about.py
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
from enum import Enum
|
2 |
from dataclasses import dataclass
|
3 |
|
|
|
4 |
@dataclass
|
5 |
class TaskInfo:
|
6 |
benchmark: str
|
7 |
col_name: str
|
8 |
metric: str
|
9 |
|
|
|
10 |
class Tasks(Enum):
|
11 |
-
# Replace these with actual subjects from your dataset
|
12 |
History = TaskInfo(benchmark='History', col_name='History', metric='accuracy')
|
13 |
Mathematics = TaskInfo(benchmark='Mathematics', col_name='Mathematics', metric='accuracy')
|
14 |
Science = TaskInfo(benchmark='Science', col_name='Science', metric='accuracy')
|
@@ -19,3 +20,38 @@ class Tasks(Enum):
|
|
19 |
Chemistry = TaskInfo(benchmark='Chemistry', col_name='Chemistry', metric='accuracy')
|
20 |
Biology = TaskInfo(benchmark='Biology', col_name='Biology', metric='accuracy')
|
21 |
ComputerScience = TaskInfo(benchmark='Computer Science', col_name='Computer Science', metric='accuracy')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from enum import Enum
|
2 |
from dataclasses import dataclass
|
3 |
|
4 |
+
# Define TaskInfo and Tasks as before
|
5 |
@dataclass
|
6 |
class TaskInfo:
|
7 |
benchmark: str
|
8 |
col_name: str
|
9 |
metric: str
|
10 |
|
11 |
+
# Replace these with actual subjects from your dataset
|
12 |
class Tasks(Enum):
|
|
|
13 |
History = TaskInfo(benchmark='History', col_name='History', metric='accuracy')
|
14 |
Mathematics = TaskInfo(benchmark='Mathematics', col_name='Mathematics', metric='accuracy')
|
15 |
Science = TaskInfo(benchmark='Science', col_name='Science', metric='accuracy')
|
|
|
20 |
Chemistry = TaskInfo(benchmark='Chemistry', col_name='Chemistry', metric='accuracy')
|
21 |
Biology = TaskInfo(benchmark='Biology', col_name='Biology', metric='accuracy')
|
22 |
ComputerScience = TaskInfo(benchmark='Computer Science', col_name='Computer Science', metric='accuracy')
|
23 |
+
|
24 |
+
# Now include the variables expected by app.py
|
25 |
+
|
26 |
+
TITLE = """
|
27 |
+
<h1 align="center">🌐 LLM Leaderboard for MMMLU Evaluation 🌐</h1>
|
28 |
+
"""
|
29 |
+
|
30 |
+
INTRODUCTION_TEXT = """
|
31 |
+
Welcome to the LLM Leaderboard for the MMMLU dataset evaluation. This leaderboard displays the performance of various language models on the MMMLU dataset across different subjects.
|
32 |
+
"""
|
33 |
+
|
34 |
+
LLM_BENCHMARKS_TEXT = """
|
35 |
+
## About the MMMLU Benchmark
|
36 |
+
|
37 |
+
The Massive Multitask Multilingual Language Understanding (MMMLU) benchmark is designed to evaluate models on a wide range of subjects.
|
38 |
+
|
39 |
+
## How to Interpret the Leaderboard
|
40 |
+
|
41 |
+
- **Model**: The name of the model evaluated.
|
42 |
+
- **Average ⬆️**: The average accuracy across all subjects.
|
43 |
+
- **Subject Columns**: The accuracy (%) for each individual subject.
|
44 |
+
|
45 |
+
## How to Submit Your Model
|
46 |
+
|
47 |
+
Go to the **Submit here!** tab and provide your model details to have it evaluated and appear on the leaderboard.
|
48 |
+
"""
|
49 |
+
|
50 |
+
EVALUATION_QUEUE_TEXT = """
|
51 |
+
Below are the lists of models that have been evaluated, are currently being evaluated, or are pending evaluation.
|
52 |
+
"""
|
53 |
+
|
54 |
+
CITATION_BUTTON_LABEL = "Citation"
|
55 |
+
CITATION_BUTTON_TEXT = """
|
56 |
+
If you use this leaderboard or the MMMLU dataset in your research, please cite:
|
57 |
+
|