xianchaowu commited on
Commit
215c0a2
1 Parent(s): fd30dcd

add mmlu eval and mmlu test

Browse files
Files changed (1) hide show
  1. README.md +126 -61
README.md CHANGED
@@ -8,7 +8,7 @@ license: llama2
8
 
9
  0. using the updated [Meta's LLaMA-2 models](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf).
10
  1. support [4-bit qlora](https://arxiv.org/abs/2305.14314), extreme GPU memory and inference time saving;
11
- 2. better MMLU evaluation dataset results, llama2-7b's 45.3% to our 47.95% (+2.65%).
12
 
13
  ### Introduction
14
  Determine the rank of LoRA layers by the singular values of pretrained weight matrices.
@@ -85,67 +85,132 @@ model.print_trainable_parameters()
85
 
86
  ## MMLU result:
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  ```json
89
- {"mmlu_loss": 1.4239519843083437,
90
- "mmlu_eval_accuracy_econometrics": 0.16666666666666666,
91
- "mmlu_eval_accuracy_college_computer_science": 0.36363636363636365,
92
- "mmlu_eval_accuracy_marketing": 0.8,
93
- "mmlu_eval_accuracy_jurisprudence": 0.36363636363636365,
94
- "mmlu_eval_accuracy_sociology": 0.7272727272727273,
95
- "mmlu_eval_accuracy_college_mathematics": 0.36363636363636365,
96
- "mmlu_eval_accuracy_philosophy": 0.4411764705882353,
97
- "mmlu_eval_accuracy_high_school_european_history": 0.5,
98
- "mmlu_eval_accuracy_public_relations": 0.5833333333333334,
99
- "mmlu_eval_accuracy_high_school_microeconomics": 0.4230769230769231,
100
- "mmlu_eval_accuracy_global_facts": 0.5,
101
- "mmlu_eval_accuracy_high_school_government_and_politics": 0.5238095238095238,
102
- "mmlu_eval_accuracy_anatomy": 0.5,
103
- "mmlu_eval_accuracy_moral_disputes": 0.4473684210526316,
104
- "mmlu_eval_accuracy_machine_learning": 0.36363636363636365,
105
- "mmlu_eval_accuracy_professional_law": 0.3,
106
- "mmlu_eval_accuracy_management": 0.6363636363636364,
107
- "mmlu_eval_accuracy_college_physics": 0.45454545454545453,
108
- "mmlu_eval_accuracy_prehistory": 0.5428571428571428,
109
- "mmlu_eval_accuracy_high_school_biology": 0.46875,
110
- "mmlu_eval_accuracy_nutrition": 0.6060606060606061,
111
- "mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556,
112
- "mmlu_eval_accuracy_computer_security": 0.18181818181818182,
113
- "mmlu_eval_accuracy_international_law": 0.9230769230769231,
114
- "mmlu_eval_accuracy_high_school_mathematics": 0.20689655172413793,
115
- "mmlu_eval_accuracy_miscellaneous": 0.627906976744186,
116
- "mmlu_eval_accuracy_high_school_macroeconomics": 0.5348837209302325,
117
- "mmlu_eval_accuracy_human_aging": 0.6956521739130435,
118
- "mmlu_eval_accuracy_conceptual_physics": 0.38461538461538464,
119
- "mmlu_eval_accuracy_elementary_mathematics": 0.24390243902439024,
120
- "mmlu_eval_accuracy_high_school_geography": 0.7272727272727273,
121
- "mmlu_eval_accuracy_medical_genetics": 0.8181818181818182,
122
- "mmlu_eval_accuracy_world_religions": 0.7368421052631579,
123
- "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365,
124
- "mmlu_eval_accuracy_human_sexuality": 0.4166666666666667,
125
- "mmlu_eval_accuracy_security_studies": 0.5555555555555556,
126
- "mmlu_eval_accuracy_high_school_us_history": 0.7727272727272727,
127
- "mmlu_eval_accuracy_high_school_chemistry": 0.2727272727272727,
128
- "mmlu_eval_accuracy_formal_logic": 0.21428571428571427,
129
- "mmlu_eval_accuracy_electrical_engineering": 0.25,
130
- "mmlu_eval_accuracy_professional_accounting": 0.3548387096774194,
131
- "mmlu_eval_accuracy_college_biology": 0.375,
132
- "mmlu_eval_accuracy_professional_medicine": 0.3870967741935484,
133
- "mmlu_eval_accuracy_moral_scenarios": 0.31,
134
- "mmlu_eval_accuracy_business_ethics": 0.5454545454545454,
135
- "mmlu_eval_accuracy_astronomy": 0.375,
136
- "mmlu_eval_accuracy_high_school_world_history": 0.5769230769230769,
137
- "mmlu_eval_accuracy_high_school_statistics": 0.391304347826087,
138
- "mmlu_eval_accuracy_us_foreign_policy": 0.7272727272727273,
139
- "mmlu_eval_accuracy_professional_psychology": 0.463768115942029,
140
- "mmlu_eval_accuracy_high_school_physics": 0.35294117647058826,
141
- "mmlu_eval_accuracy_college_medicine": 0.3181818181818182,
142
- "mmlu_eval_accuracy_virology": 0.3888888888888889,
143
- "mmlu_eval_accuracy_clinical_knowledge": 0.4482758620689655,
144
- "mmlu_eval_accuracy_college_chemistry": 0.375,
145
- "mmlu_eval_accuracy_logical_fallacies": 0.6666666666666666,
146
- "mmlu_eval_accuracy_high_school_psychology": 0.7166666666666667,
147
- "mmlu_eval_accuracy": 0.47949665158112187,
148
- "epoch": 2.71}
149
  ```
150
 
151
  ## License and intended use
 
8
 
9
  0. using the updated [Meta's LLaMA-2 models](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf).
10
  1. support [4-bit qlora](https://arxiv.org/abs/2305.14314), extreme GPU memory and inference time saving;
11
+ 2. better MMLU evaluation dataset results, llama2-7b's 45.3% to our 46.69% (+1.39%) in MMLU-test and 46.04% (+0.74%) in MMLU-eval. ['0ede8dd71e923db6258295621d817ca8714516d4']
12
 
13
  ### Introduction
14
  Determine the rank of LoRA layers by the singular values of pretrained weight matrices.
 
85
 
86
  ## MMLU result:
87
 
88
+ ### MMLU eval result:
89
+
90
+ ```json
91
+ {"mmlu_loss": 1.6819591112653856,
92
+ "mmlu_eval_accuracy_econometrics": 0.16666666666666666,
93
+ "mmlu_eval_accuracy_marketing": 0.68,
94
+ "mmlu_eval_accuracy_formal_logic": 0.21428571428571427,
95
+ "mmlu_eval_accuracy_high_school_macroeconomics": 0.46511627906976744,
96
+ "mmlu_eval_accuracy_high_school_government_and_politics": 0.5238095238095238,
97
+ "mmlu_eval_accuracy_college_physics": 0.45454545454545453,
98
+ "mmlu_eval_accuracy_medical_genetics": 0.8181818181818182,
99
+ "mmlu_eval_accuracy_international_law": 0.9230769230769231,
100
+ "mmlu_eval_accuracy_management": 0.6363636363636364,
101
+ "mmlu_eval_accuracy_philosophy": 0.35294117647058826,
102
+ "mmlu_eval_accuracy_high_school_geography": 0.6363636363636364,
103
+ "mmlu_eval_accuracy_professional_accounting": 0.2903225806451613,
104
+ "mmlu_eval_accuracy_jurisprudence": 0.45454545454545453,
105
+ "mmlu_eval_accuracy_miscellaneous": 0.627906976744186,
106
+ "mmlu_eval_accuracy_human_aging": 0.6956521739130435,
107
+ "mmlu_eval_accuracy_prehistory": 0.4857142857142857,
108
+ "mmlu_eval_accuracy_electrical_engineering": 0.3125,
109
+ "mmlu_eval_accuracy_high_school_biology": 0.375,
110
+ "mmlu_eval_accuracy_high_school_statistics": 0.34782608695652173,
111
+ "mmlu_eval_accuracy_moral_scenarios": 0.28,
112
+ "mmlu_eval_accuracy_sociology": 0.6363636363636364,
113
+ "mmlu_eval_accuracy_world_religions": 0.6842105263157895,
114
+ "mmlu_eval_accuracy_abstract_algebra": 0.36363636363636365,
115
+ "mmlu_eval_accuracy_college_mathematics": 0.36363636363636365,
116
+ "mmlu_eval_accuracy_conceptual_physics": 0.3076923076923077,
117
+ "mmlu_eval_accuracy_moral_disputes": 0.47368421052631576,
118
+ "mmlu_eval_accuracy_college_medicine": 0.36363636363636365,
119
+ "mmlu_eval_accuracy_professional_psychology": 0.391304347826087,
120
+ "mmlu_eval_accuracy_elementary_mathematics": 0.24390243902439024,
121
+ "mmlu_eval_accuracy_logical_fallacies": 0.6666666666666666,
122
+ "mmlu_eval_accuracy_high_school_physics": 0.35294117647058826,
123
+ "mmlu_eval_accuracy_college_chemistry": 0.375,
124
+ "mmlu_eval_accuracy_college_biology": 0.375,
125
+ "mmlu_eval_accuracy_anatomy": 0.5714285714285714,
126
+ "mmlu_eval_accuracy_machine_learning": 0.18181818181818182,
127
+ "mmlu_eval_accuracy_nutrition": 0.6363636363636364,
128
+ "mmlu_eval_accuracy_virology": 0.3333333333333333,
129
+ "mmlu_eval_accuracy_high_school_chemistry": 0.4090909090909091,
130
+ "mmlu_eval_accuracy_high_school_european_history": 0.4444444444444444,
131
+ "mmlu_eval_accuracy_clinical_knowledge": 0.4482758620689655,
132
+ "mmlu_eval_accuracy_high_school_psychology": 0.7333333333333333,
133
+ "mmlu_eval_accuracy_college_computer_science": 0.36363636363636365,
134
+ "mmlu_eval_accuracy_business_ethics": 0.45454545454545453,
135
+ "mmlu_eval_accuracy_professional_medicine": 0.3548387096774194,
136
+ "mmlu_eval_accuracy_professional_law": 0.27647058823529413,
137
+ "mmlu_eval_accuracy_us_foreign_policy": 0.6363636363636364,
138
+ "mmlu_eval_accuracy_high_school_us_history": 0.8181818181818182,
139
+ "mmlu_eval_accuracy_human_sexuality": 0.4166666666666667,
140
+ "mmlu_eval_accuracy_high_school_microeconomics": 0.38461538461538464,
141
+ "mmlu_eval_accuracy_high_school_computer_science": 0.5555555555555556,
142
+ "mmlu_eval_accuracy_security_studies": 0.5185185185185185,
143
+ "mmlu_eval_accuracy_astronomy": 0.4375,
144
+ "mmlu_eval_accuracy_public_relations": 0.5,
145
+ "mmlu_eval_accuracy_high_school_mathematics": 0.1724137931034483,
146
+ "mmlu_eval_accuracy_computer_security": 0.18181818181818182,
147
+ "mmlu_eval_accuracy_global_facts": 0.5,
148
+ "mmlu_eval_accuracy_high_school_world_history": 0.5769230769230769,
149
+ "mmlu_eval_accuracy": 0.46043208436613065}
150
+ ```
151
+
152
+ ### MMLU test result:
153
+
154
  ```json
155
+ {"mmlu_loss": 1.6388045419503643,
156
+ "mmlu_test_accuracy_high_school_chemistry": 0.4039408866995074,
157
+ "mmlu_test_accuracy_abstract_algebra": 0.28,
158
+ "mmlu_test_accuracy_high_school_government_and_politics": 0.689119170984456,
159
+ "mmlu_test_accuracy_human_sexuality": 0.5419847328244275,
160
+ "mmlu_test_accuracy_nutrition": 0.5130718954248366,
161
+ "mmlu_test_accuracy_world_religions": 0.6666666666666666,
162
+ "mmlu_test_accuracy_high_school_physics": 0.33112582781456956,
163
+ "mmlu_test_accuracy_professional_accounting": 0.32978723404255317,
164
+ "mmlu_test_accuracy_business_ethics": 0.44,
165
+ "mmlu_test_accuracy_econometrics": 0.32456140350877194,
166
+ "mmlu_test_accuracy_machine_learning": 0.35714285714285715,
167
+ "mmlu_test_accuracy_moral_scenarios": 0.22569832402234638,
168
+ "mmlu_test_accuracy_jurisprudence": 0.5925925925925926,
169
+ "mmlu_test_accuracy_professional_law": 0.3239895697522816,
170
+ "mmlu_test_accuracy_medical_genetics": 0.48,
171
+ "mmlu_test_accuracy_college_chemistry": 0.36,
172
+ "mmlu_test_accuracy_high_school_geography": 0.5606060606060606,
173
+ "mmlu_test_accuracy_prehistory": 0.5185185185185185,
174
+ "mmlu_test_accuracy_high_school_world_history": 0.5864978902953587,
175
+ "mmlu_test_accuracy_professional_psychology": 0.4297385620915033,
176
+ "mmlu_test_accuracy_public_relations": 0.5272727272727272,
177
+ "mmlu_test_accuracy_high_school_psychology": 0.6256880733944954,
178
+ "mmlu_test_accuracy_high_school_biology": 0.5225806451612903,
179
+ "mmlu_test_accuracy_computer_security": 0.52,
180
+ "mmlu_test_accuracy_conceptual_physics": 0.3829787234042553,
181
+ "mmlu_test_accuracy_elementary_mathematics": 0.30423280423280424,
182
+ "mmlu_test_accuracy_high_school_computer_science": 0.42,
183
+ "mmlu_test_accuracy_marketing": 0.6495726495726496,
184
+ "mmlu_test_accuracy_college_mathematics": 0.33,
185
+ "mmlu_test_accuracy_college_biology": 0.5138888888888888,
186
+ "mmlu_test_accuracy_us_foreign_policy": 0.73,
187
+ "mmlu_test_accuracy_security_studies": 0.4775510204081633,
188
+ "mmlu_test_accuracy_high_school_european_history": 0.5393939393939394,
189
+ "mmlu_test_accuracy_international_law": 0.6363636363636364,
190
+ "mmlu_test_accuracy_moral_disputes": 0.5028901734104047,
191
+ "mmlu_test_accuracy_sociology": 0.5920398009950248,
192
+ "mmlu_test_accuracy_astronomy": 0.4868421052631579,
193
+ "mmlu_test_accuracy_professional_medicine": 0.40808823529411764,
194
+ "mmlu_test_accuracy_formal_logic": 0.23809523809523808,
195
+ "mmlu_test_accuracy_high_school_microeconomics": 0.46218487394957986,
196
+ "mmlu_test_accuracy_high_school_macroeconomics": 0.4307692307692308,
197
+ "mmlu_test_accuracy_management": 0.6601941747572816,
198
+ "mmlu_test_accuracy_logical_fallacies": 0.5276073619631901,
199
+ "mmlu_test_accuracy_high_school_mathematics": 0.25555555555555554,
200
+ "mmlu_test_accuracy_college_physics": 0.2647058823529412,
201
+ "mmlu_test_accuracy_philosophy": 0.5434083601286174,
202
+ "mmlu_test_accuracy_global_facts": 0.36,
203
+ "mmlu_test_accuracy_human_aging": 0.5695067264573991,
204
+ "mmlu_test_accuracy_virology": 0.4457831325301205,
205
+ "mmlu_test_accuracy_high_school_us_history": 0.6323529411764706,
206
+ "mmlu_test_accuracy_miscellaneous": 0.6615581098339719,
207
+ "mmlu_test_accuracy_college_computer_science": 0.37,
208
+ "mmlu_test_accuracy_high_school_statistics": 0.30092592592592593,
209
+ "mmlu_test_accuracy_electrical_engineering": 0.4689655172413793,
210
+ "mmlu_test_accuracy_college_medicine": 0.37572254335260113,
211
+ "mmlu_test_accuracy_clinical_knowledge": 0.49433962264150944,
212
+ "mmlu_test_accuracy_anatomy": 0.42962962962962964,
213
+ "mmlu_test_accuracy": 0.4669426393404124}
 
214
  ```
215
 
216
  ## License and intended use