yentinglin
commited on
Commit
•
7708da0
1
Parent(s):
b6d5518
Update README.md
Browse files
README.md
CHANGED
@@ -186,4 +186,78 @@ Taiwan LLM v2 is conducted in collaboration with [Ubitus K.K.](http://ubitus.net
|
|
186 |
|leaderboard:mmlu:world_religions:5 | 0|acc |0.7661|± |0.0325|
|
187 |
|leaderboard:truthfulqa:mc:0 | 0|truthfulqa_mc1|0.2840|± |0.0158|
|
188 |
| | |truthfulqa_mc2|0.4423|± |0.0146|
|
189 |
-
|leaderboard:winogrande:5 | 0|acc |0.7593|± |0.0120|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
|leaderboard:mmlu:world_religions:5 | 0|acc |0.7661|± |0.0325|
|
187 |
|leaderboard:truthfulqa:mc:0 | 0|truthfulqa_mc1|0.2840|± |0.0158|
|
188 |
| | |truthfulqa_mc2|0.4423|± |0.0146|
|
189 |
+
|leaderboard:winogrande:5 | 0|acc |0.7593|± |0.0120|
|
190 |
+
|
191 |
+
|
192 |
+
## TC-Eval
|
193 |
+
# DRCD TODO
|
194 |
+
community|tc-eval-v2:drcd|0|0
|
195 |
+
# Table TODO
|
196 |
+
community|tc-eval-v2:penguin_table|0|0
|
197 |
+
# MMLU
|
198 |
+
community|tc-eval-v2:tmmluplus-accounting|5|0
|
199 |
+
community|tc-eval-v2:tmmluplus-administrative_law|5|0
|
200 |
+
community|tc-eval-v2:tmmluplus-advance_chemistry|5|0
|
201 |
+
community|tc-eval-v2:tmmluplus-agriculture|5|0
|
202 |
+
community|tc-eval-v2:tmmluplus-anti_money_laundering|5|0
|
203 |
+
community|tc-eval-v2:tmmluplus-auditing|5|0
|
204 |
+
community|tc-eval-v2:tmmluplus-basic_medical_science|5|0
|
205 |
+
community|tc-eval-v2:tmmluplus-business_management|5|0
|
206 |
+
community|tc-eval-v2:tmmluplus-chinese_language_and_literature|5|0
|
207 |
+
community|tc-eval-v2:tmmluplus-clinical_psychology|5|0
|
208 |
+
community|tc-eval-v2:tmmluplus-computer_science|5|0
|
209 |
+
community|tc-eval-v2:tmmluplus-culinary_skills|5|0
|
210 |
+
community|tc-eval-v2:tmmluplus-dentistry|5|0
|
211 |
+
community|tc-eval-v2:tmmluplus-economics|5|0
|
212 |
+
community|tc-eval-v2:tmmluplus-education|5|0
|
213 |
+
community|tc-eval-v2:tmmluplus-education_(profession_level)|5|0
|
214 |
+
community|tc-eval-v2:tmmluplus-educational_psychology|5|0
|
215 |
+
community|tc-eval-v2:tmmluplus-engineering_math|5|0
|
216 |
+
community|tc-eval-v2:tmmluplus-finance_banking|5|0
|
217 |
+
community|tc-eval-v2:tmmluplus-financial_analysis|5|0
|
218 |
+
community|tc-eval-v2:tmmluplus-fire_science|5|0
|
219 |
+
community|tc-eval-v2:tmmluplus-general_principles_of_law|5|0
|
220 |
+
community|tc-eval-v2:tmmluplus-geography_of_taiwan|5|0
|
221 |
+
community|tc-eval-v2:tmmluplus-human_behavior|5|0
|
222 |
+
community|tc-eval-v2:tmmluplus-insurance_studies|5|0
|
223 |
+
community|tc-eval-v2:tmmluplus-introduction_to_law|5|0
|
224 |
+
community|tc-eval-v2:tmmluplus-jce_humanities|5|0
|
225 |
+
community|tc-eval-v2:tmmluplus-junior_chemistry|5|0
|
226 |
+
community|tc-eval-v2:tmmluplus-junior_chinese_exam|5|0
|
227 |
+
community|tc-eval-v2:tmmluplus-junior_math_exam|5|0
|
228 |
+
community|tc-eval-v2:tmmluplus-junior_science_exam|5|0
|
229 |
+
community|tc-eval-v2:tmmluplus-junior_social_studies|5|0
|
230 |
+
community|tc-eval-v2:tmmluplus-logic_reasoning|5|0
|
231 |
+
community|tc-eval-v2:tmmluplus-macroeconomics|5|0
|
232 |
+
community|tc-eval-v2:tmmluplus-management_accounting|5|0
|
233 |
+
community|tc-eval-v2:tmmluplus-marketing_management|5|0
|
234 |
+
community|tc-eval-v2:tmmluplus-mechanical|5|0
|
235 |
+
community|tc-eval-v2:tmmluplus-music|5|0
|
236 |
+
community|tc-eval-v2:tmmluplus-national_protection|5|0
|
237 |
+
community|tc-eval-v2:tmmluplus-nautical_science|5|0
|
238 |
+
community|tc-eval-v2:tmmluplus-occupational_therapy_for_psychological_disorders|5|0
|
239 |
+
community|tc-eval-v2:tmmluplus-official_document_management|5|0
|
240 |
+
community|tc-eval-v2:tmmluplus-optometry|5|0
|
241 |
+
community|tc-eval-v2:tmmluplus-organic_chemistry|5|0
|
242 |
+
community|tc-eval-v2:tmmluplus-pharmacology|5|0
|
243 |
+
community|tc-eval-v2:tmmluplus-pharmacy|5|0
|
244 |
+
community|tc-eval-v2:tmmluplus-physical_education|5|0
|
245 |
+
community|tc-eval-v2:tmmluplus-physics|5|0
|
246 |
+
community|tc-eval-v2:tmmluplus-politic_science|5|0
|
247 |
+
community|tc-eval-v2:tmmluplus-real_estate|5|0
|
248 |
+
community|tc-eval-v2:tmmluplus-secondary_physics|5|0
|
249 |
+
community|tc-eval-v2:tmmluplus-statistics_and_machine_learning|5|0
|
250 |
+
community|tc-eval-v2:tmmluplus-taiwanese_hokkien|5|0
|
251 |
+
community|tc-eval-v2:tmmluplus-taxation|5|0
|
252 |
+
community|tc-eval-v2:tmmluplus-technical|5|0
|
253 |
+
community|tc-eval-v2:tmmluplus-three_principles_of_people|5|0
|
254 |
+
community|tc-eval-v2:tmmluplus-trade|5|0
|
255 |
+
community|tc-eval-v2:tmmluplus-traditional_chinese_medicine_clinical_medicine|5|0
|
256 |
+
community|tc-eval-v2:tmmluplus-trust_practice|5|0
|
257 |
+
community|tc-eval-v2:tmmluplus-ttqav2|5|0
|
258 |
+
community|tc-eval-v2:tmmluplus-tve_chinese_language|5|0
|
259 |
+
community|tc-eval-v2:tmmluplus-tve_design|5|0
|
260 |
+
community|tc-eval-v2:tmmluplus-tve_mathematics|5|0
|
261 |
+
community|tc-eval-v2:tmmluplus-tve_natural_sciences|5|0
|
262 |
+
community|tc-eval-v2:tmmluplus-veterinary_pathology|5|0
|
263 |
+
community|tc-eval-v2:tmmluplus-veterinary_pharmacology|5|0
|