|
- 0 babi_nli/counting |
|
- 1 babi_nli/indefinite-knowledge |
|
- 2 babi_nli/simple-negation |
|
- 3 babi_nli/three-arg-relations |
|
- 4 babi_nli/basic-induction |
|
- 5 babi_nli/time-reasoning |
|
- 6 babi_nli/compound-coreference |
|
- 7 babi_nli/path-finding |
|
- 8 babi_nli/positional-reasoning |
|
- 9 babi_nli/conjunction |
|
- 10 babi_nli/size-reasoning |
|
- 11 babi_nli/yes-no-questions |
|
- 12 babi_nli/basic-coreference |
|
- 13 babi_nli/two-supporting-facts |
|
- 14 babi_nli/lists-sets |
|
- 15 babi_nli/two-arg-relations |
|
- 16 babi_nli/three-supporting-facts |
|
- 17 babi_nli/basic-deduction |
|
- 18 babi_nli/single-supporting-fact |
|
- 19 anli/a1 |
|
- 20 anli/a2 |
|
- 21 anli/a3 |
|
- 22 sick/label |
|
- 23 sick/relatedness |
|
- 24 sick/entailment_AB |
|
- 25 sick/entailment_BA |
|
- 26 snli |
|
- 27 scitail/snli_format |
|
- 28 hans |
|
- 29 WANLI |
|
- 30 recast/recast_kg_relations |
|
- 31 recast/recast_puns |
|
- 32 recast/recast_factuality |
|
- 33 recast/recast_megaveridicality |
|
- 34 recast/recast_verbcorner |
|
- 35 recast/recast_verbnet |
|
- 36 recast/recast_ner |
|
- 37 recast/recast_sentiment |
|
- 38 probability_words_nli/usnli |
|
- 39 probability_words_nli/reasoning_1hop |
|
- 40 probability_words_nli/reasoning_2hop |
|
- 41 nan-nli/joey234--nan-nli |
|
- 42 nli_fever |
|
- 43 breaking_nli |
|
- 44 conj_nli |
|
- 45 fracas |
|
- 46 dialogue_nli |
|
- 47 mpe |
|
- 48 dnc |
|
- 49 gpt3_nli |
|
- 50 recast_white/fnplus |
|
- 51 recast_white/sprl |
|
- 52 recast_white/dpr |
|
- 53 joci |
|
- 54 contrast_nli |
|
- 55 robust_nli/IS_CS |
|
- 56 robust_nli/LI_LI |
|
- 57 robust_nli/ST_WO |
|
- 58 robust_nli/PI_SP |
|
- 59 robust_nli/PI_CD |
|
- 60 robust_nli/ST_SE |
|
- 61 robust_nli/ST_NE |
|
- 62 robust_nli/ST_LM |
|
- 63 robust_nli_is_sd |
|
- 64 robust_nli_li_ts |
|
- 65 gen_debiased_nli/snli_seq_z |
|
- 66 gen_debiased_nli/snli_z_aug |
|
- 67 gen_debiased_nli/snli_par_z |
|
- 68 gen_debiased_nli/mnli_par_z |
|
- 69 gen_debiased_nli/mnli_z_aug |
|
- 70 gen_debiased_nli/mnli_seq_z |
|
- 71 add_one_rte |
|
- 72 imppres/presupposition_cleft_uniqueness/presupposition |
|
- 73 imppres/presupposition_possessed_definites_uniqueness/presupposition |
|
- 74 imppres/presupposition_possessed_definites_existence/presupposition |
|
- 75 imppres/presupposition_only_presupposition/presupposition |
|
- 76 imppres/presupposition_all_n_presupposition/presupposition |
|
- 77 imppres/presupposition_both_presupposition/presupposition |
|
- 78 imppres/presupposition_change_of_state/presupposition |
|
- 79 imppres/presupposition_cleft_existence/presupposition |
|
- 80 imppres/presupposition_question_presupposition/presupposition |
|
- 81 imppres/implicature_modals/prag |
|
- 82 imppres/implicature_numerals_10_100/prag |
|
- 83 imppres/implicature_numerals_2_3/prag |
|
- 84 imppres/implicature_gradable_adjective/prag |
|
- 85 imppres/implicature_quantifiers/prag |
|
- 86 imppres/implicature_gradable_verb/prag |
|
- 87 imppres/implicature_connectives/prag |
|
- 88 imppres/implicature_gradable_adjective/log |
|
- 89 imppres/implicature_gradable_verb/log |
|
- 90 imppres/implicature_numerals_2_3/log |
|
- 91 imppres/implicature_numerals_10_100/log |
|
- 92 imppres/implicature_modals/log |
|
- 93 imppres/implicature_quantifiers/log |
|
- 94 imppres/implicature_connectives/log |
|
- 95 glue_diagnostics/diagnostics |
|
- 96 hlgd |
|
- 97 paws/labeled_final |
|
- 98 paws/labeled_swap |
|
- 99 quora |
|
- 100 medical_questions_pairs |
|
- 101 conll2003/pos_tags |
|
- 102 conll2003/chunk_tags |
|
- 103 conll2003/ner_tags |
|
- 104 hh-rlhf |
|
- 105 model-written-evals |
|
- 106 truthful_qa/multiple_choice |
|
- 107 fig-qa |
|
- 108 bigbench/fantasy_reasoning |
|
- 109 bigbench/nonsense_words_grammar |
|
- 110 bigbench/analytic_entailment |
|
- 111 bigbench/logic_grid_puzzle |
|
- 112 bigbench/geometric_shapes |
|
- 113 bigbench/key_value_maps |
|
- 114 bigbench/analogical_similarity |
|
- 115 bigbench/metaphor_understanding |
|
- 116 bigbench/metaphor_boolean |
|
- 117 bigbench/ruin_names |
|
- 118 bigbench/cs_algorithms |
|
- 119 bigbench/physical_intuition |
|
- 120 bigbench/mnist_ascii |
|
- 121 bigbench/moral_permissibility |
|
- 122 bigbench/emoji_movie |
|
- 123 bigbench/snarks |
|
- 124 bigbench/timedial |
|
- 125 bigbench/dark_humor_detection |
|
- 126 bigbench/gre_reading_comprehension |
|
- 127 bigbench/empirical_judgments |
|
- 128 bigbench/causal_judgment |
|
- 129 bigbench/fact_checker |
|
- 130 bigbench/logical_fallacy_detection |
|
- 131 bigbench/identify_math_theorems |
|
- 132 bigbench/dyck_languages |
|
- 133 bigbench/winowhy |
|
- 134 bigbench/logical_sequence |
|
- 135 bigbench/strategyqa |
|
- 136 bigbench/unit_interpretation |
|
- 137 bigbench/authorship_verification |
|
- 138 bigbench/undo_permutation |
|
- 139 bigbench/epistemic_reasoning |
|
- 140 bigbench/human_organs_senses |
|
- 141 bigbench/misconceptions |
|
- 142 bigbench/international_phonetic_alphabet_nli |
|
- 143 bigbench/identify_odd_metaphor |
|
- 144 bigbench/mathematical_induction |
|
- 145 bigbench/odd_one_out |
|
- 146 bigbench/reasoning_about_colored_objects |
|
- 147 bigbench/strange_stories |
|
- 148 bigbench/evaluating_information_essentiality |
|
- 149 bigbench/figure_of_speech_detection |
|
- 150 bigbench/english_proverbs |
|
- 151 bigbench/general_knowledge |
|
- 152 bigbench/tracking_shuffled_objects |
|
- 153 bigbench/physics |
|
- 154 bigbench/anachronisms |
|
- 155 bigbench/simple_ethical_questions |
|
- 156 bigbench/logical_args |
|
- 157 bigbench/suicide_risk |
|
- 158 bigbench/sentence_ambiguity |
|
- 159 bigbench/temporal_sequences |
|
- 160 bigbench/penguins_in_a_table |
|
- 161 bigbench/sports_understanding |
|
- 162 bigbench/hyperbaton |
|
- 163 bigbench/code_line_description |
|
- 164 bigbench/question_selection |
|
- 165 bigbench/disambiguation_qa |
|
- 166 bigbench/date_understanding |
|
- 167 bigbench/play_dialog_same_or_different |
|
- 168 bigbench/salient_translation_error_detection |
|
- 169 bigbench/irony_identification |
|
- 170 bigbench/emojis_emotion_prediction |
|
- 171 bigbench/hindu_knowledge |
|
- 172 bigbench/conceptual_combinations |
|
- 173 bigbench/implicatures |
|
- 174 bigbench/movie_dialog_same_or_different |
|
- 175 bigbench/social_support |
|
- 176 bigbench/presuppositions_as_nli |
|
- 177 bigbench/vitaminc_fact_verification |
|
- 178 bigbench/hhh_alignment |
|
- 179 bigbench/implicit_relations |
|
- 180 bigbench/bbq_lite_json |
|
- 181 bigbench/phrase_relatedness |
|
- 182 bigbench/logical_deduction |
|
- 183 bigbench/discourse_marker_prediction |
|
- 184 bigbench/movie_recommendation |
|
- 185 bigbench/real_or_fake_text |
|
- 186 bigbench/formal_fallacies_syllogisms_negation |
|
- 187 bigbench/crass_ai |
|
- 188 blimp/inchoative |
|
- 189 blimp/principle_A_c_command |
|
- 190 blimp/matrix_question_npi_licensor_present |
|
- 191 blimp/wh_questions_subject_gap_long_distance |
|
- 192 blimp/sentential_subject_island |
|
- 193 blimp/existential_there_quantifiers_2 |
|
- 194 blimp/sentential_negation_npi_scope |
|
- 195 blimp/complex_NP_island |
|
- 196 blimp/principle_A_reconstruction |
|
- 197 blimp/animate_subject_passive |
|
- 198 blimp/tough_vs_raising_1 |
|
- 199 blimp/wh_vs_that_with_gap |
|
- 200 blimp/principle_A_domain_2 |
|
- 201 blimp/npi_present_1 |
|
- 202 blimp/wh_vs_that_with_gap_long_distance |
|
- 203 blimp/superlative_quantifiers_1 |
|
- 204 blimp/npi_present_2 |
|
- 205 blimp/wh_questions_object_gap |
|
- 206 blimp/coordinate_structure_constraint_complex_left_branch |
|
- 207 blimp/coordinate_structure_constraint_object_extraction |
|
- 208 blimp/left_branch_island_echo_question |
|
- 209 blimp/drop_argument |
|
- 210 cos_e/v1.0 |
|
- 211 cosmos_qa |
|
- 212 dream |
|
- 213 openbookqa |
|
- 214 qasc |
|
- 215 quartz |
|
- 216 quail |
|
- 217 head_qa/en |
|
- 218 sciq |
|
- 219 social_i_qa |
|
- 220 wiki_hop |
|
- 221 wiqa |
|
- 222 piqa |
|
- 223 hellaswag |
|
- 224 super_glue/copa |
|
- 225 art |
|
- 226 hendrycks_test/moral_disputes |
|
- 227 hendrycks_test/moral_scenarios |
|
- 228 hendrycks_test/nutrition |
|
- 229 hendrycks_test/philosophy |
|
- 230 hendrycks_test/prehistory |
|
- 231 hendrycks_test/professional_accounting |
|
- 232 hendrycks_test/professional_law |
|
- 233 hendrycks_test/world_religions |
|
- 234 hendrycks_test/professional_psychology |
|
- 235 hendrycks_test/public_relations |
|
- 236 hendrycks_test/security_studies |
|
- 237 hendrycks_test/sociology |
|
- 238 hendrycks_test/us_foreign_policy |
|
- 239 hendrycks_test/virology |
|
- 240 hendrycks_test/miscellaneous |
|
- 241 hendrycks_test/professional_medicine |
|
- 242 hendrycks_test/medical_genetics |
|
- 243 hendrycks_test/college_mathematics |
|
- 244 hendrycks_test/management |
|
- 245 hendrycks_test/high_school_computer_science |
|
- 246 hendrycks_test/astronomy |
|
- 247 hendrycks_test/high_school_chemistry |
|
- 248 hendrycks_test/high_school_biology |
|
- 249 hendrycks_test/global_facts |
|
- 250 hendrycks_test/formal_logic |
|
- 251 hendrycks_test/elementary_mathematics |
|
- 252 hendrycks_test/high_school_european_history |
|
- 253 hendrycks_test/electrical_engineering |
|
- 254 hendrycks_test/conceptual_physics |
|
- 255 hendrycks_test/computer_security |
|
- 256 hendrycks_test/college_physics |
|
- 257 hendrycks_test/college_medicine |
|
- 258 hendrycks_test/college_computer_science |
|
- 259 hendrycks_test/college_chemistry |
|
- 260 hendrycks_test/college_biology |
|
- 261 hendrycks_test/econometrics |
|
- 262 hendrycks_test/clinical_knowledge |
|
- 263 hendrycks_test/anatomy |
|
- 264 hendrycks_test/marketing |
|
- 265 hendrycks_test/machine_learning |
|
- 266 hendrycks_test/logical_fallacies |
|
- 267 hendrycks_test/jurisprudence |
|
- 268 hendrycks_test/international_law |
|
- 269 hendrycks_test/human_sexuality |
|
- 270 hendrycks_test/human_aging |
|
- 271 hendrycks_test/high_school_world_history |
|
- 272 hendrycks_test/abstract_algebra |
|
- 273 hendrycks_test/high_school_us_history |
|
- 274 hendrycks_test/high_school_psychology |
|
- 275 hendrycks_test/high_school_physics |
|
- 276 hendrycks_test/high_school_microeconomics |
|
- 277 hendrycks_test/high_school_mathematics |
|
- 278 hendrycks_test/high_school_macroeconomics |
|
- 279 hendrycks_test/high_school_government_and_politics |
|
- 280 hendrycks_test/high_school_geography |
|
- 281 hendrycks_test/high_school_statistics |
|
- 282 hendrycks_test/business_ethics |
|
- 283 winogrande/winogrande_xl |
|
- 284 codah/codah |
|
- 285 ai2_arc/ARC-Challenge/challenge |
|
- 286 ai2_arc/ARC-Easy/challenge |
|
- 287 definite_pronoun_resolution |
|
- 288 swag |
|
- 289 math_qa |
|
- 290 utilitarianism |
|
- 291 TuringBench |
|
- 292 trec |
|
- 293 vitaminc/tals--vitaminc |
|
- 294 hope_edi/english |
|
- 295 rumoureval_2019/RumourEval2019 |
|
- 296 ethos/binary |
|
- 297 ethos/multilabel |
|
- 298 glue/cola |
|
- 299 glue/sst2 |
|
- 300 glue/mrpc |
|
- 301 glue/qqp |
|
- 302 glue/stsb |
|
- 303 glue/mnli |
|
- 304 glue/qnli |
|
- 305 glue/rte |
|
- 306 glue/wnli |
|
- 307 super_glue/boolq |
|
- 308 super_glue/cb |
|
- 309 super_glue/multirc |
|
- 310 super_glue/wic |
|
- 311 super_glue/axg |
|
- 312 tweet_eval/stance_feminist |
|
- 313 tweet_eval/stance_atheism |
|
- 314 tweet_eval/stance_hillary |
|
- 315 tweet_eval/stance_abortion |
|
- 316 tweet_eval/sentiment |
|
- 317 tweet_eval/offensive |
|
- 318 tweet_eval/stance_climate |
|
- 319 tweet_eval/irony |
|
- 320 tweet_eval/emotion |
|
- 321 tweet_eval/emoji |
|
- 322 tweet_eval/hate |
|
- 323 discovery/discovery |
|
- 324 pragmeval/switchboard |
|
- 325 pragmeval/squinky-informativeness |
|
- 326 pragmeval/emobank-arousal |
|
- 327 pragmeval/emobank-dominance |
|
- 328 pragmeval/emobank-valence |
|
- 329 pragmeval/mrda |
|
- 330 pragmeval/verifiability |
|
- 331 pragmeval/squinky-implicature |
|
- 332 pragmeval/squinky-formality |
|
- 333 pragmeval/gum |
|
- 334 pragmeval/emergent |
|
- 335 pragmeval/persuasiveness-premisetype |
|
- 336 pragmeval/pdtb |
|
- 337 pragmeval/persuasiveness-eloquence |
|
- 338 pragmeval/persuasiveness-specificity |
|
- 339 pragmeval/persuasiveness-strength |
|
- 340 pragmeval/sarcasm |
|
- 341 pragmeval/stac |
|
- 342 pragmeval/persuasiveness-claimtype |
|
- 343 pragmeval/persuasiveness-relevance |
|
- 344 lex_glue/eurlex |
|
- 345 lex_glue/scotus |
|
- 346 lex_glue/ledgar |
|
- 347 lex_glue/unfair_tos |
|
- 348 lex_glue/case_hold |
|
- 349 imdb |
|
- 350 rotten_tomatoes |
|
- 351 ag_news |
|
- 352 yelp_review_full/yelp_review_full |
|
- 353 financial_phrasebank/sentences_allagree |
|
- 354 poem_sentiment |
|
- 355 dbpedia_14/dbpedia_14 |
|
- 356 amazon_polarity/amazon_polarity |
|
- 357 app_reviews |
|
- 358 hate_speech18 |
|
- 359 sms_spam |
|
- 360 humicroedit/subtask-1 |
|
- 361 humicroedit/subtask-2 |
|
- 362 snips_built_in_intents |
|
- 363 banking77 |
|
- 364 hate_speech_offensive |
|
- 365 hyperpartisan_news_detection/byarticle |
|
- 366 hyperpartisan_news_detection/bypublisher |
|
- 367 go_emotions/simplified |
|
- 368 scicite |
|
- 369 liar |
|
- 370 lexical_relation_classification/ROOT09 |
|
- 371 lexical_relation_classification/EVALution |
|
- 372 lexical_relation_classification/CogALexV |
|
- 373 lexical_relation_classification/BLESS |
|
- 374 lexical_relation_classification/K&H+N |
|
- 375 linguisticprobing/coordination_inversion |
|
- 376 linguisticprobing/odd_man_out |
|
- 377 linguisticprobing/word_content |
|
- 378 linguisticprobing/obj_number |
|
- 379 linguisticprobing/past_present |
|
- 380 linguisticprobing/tree_depth |
|
- 381 linguisticprobing/sentence_length |
|
- 382 linguisticprobing/top_constituents |
|
- 383 linguisticprobing/bigram_shift |
|
- 384 linguisticprobing/subj_number |
|
- 385 crowdflower/sentiment_nuclear_power |
|
- 386 crowdflower/tweet_global_warming |
|
- 387 crowdflower/airline-sentiment |
|
- 388 crowdflower/economic-news |
|
- 389 crowdflower/political-media-audience |
|
- 390 crowdflower/political-media-bias |
|
- 391 crowdflower/political-media-message |
|
- 392 crowdflower/text_emotion |
|
- 393 crowdflower/corporate-messaging |
|
- 394 ethics/commonsense |
|
- 395 ethics/deontology |
|
- 396 ethics/justice |
|
- 397 ethics/virtue |
|
- 398 emo/emo2019 |
|
- 399 google_wellformed_query |
|
- 400 tweets_hate_speech_detection |
|
- 401 adv_glue/adv_sst2 |
|
- 402 adv_glue/adv_qqp |
|
- 403 adv_glue/adv_mnli |
|
- 404 adv_glue/adv_mnli_mismatched |
|
- 405 adv_glue/adv_qnli |
|
- 406 adv_glue/adv_rte |
|
- 407 has_part |
|
- 408 wnut_17/wnut_17 |
|
- 409 ncbi_disease/ncbi_disease |
|
- 410 acronym_identification |
|
- 411 jnlpba/jnlpba |
|
- 412 species_800/species_800 |
|
- 413 ontonotes_english/SpeedOfMagic--ontonotes_english |
|
- 414 blog_authorship_corpus/gender |
|
- 415 blog_authorship_corpus/age |
|
- 416 blog_authorship_corpus/horoscope |
|
- 417 blog_authorship_corpus/job |
|
- 418 open_question_type |
|
- 419 health_fact |
|
- 420 commonsense_qa |
|
- 421 mc_taco |
|
- 422 ade_corpus_v2/Ade_corpus_v2_classification |
|
- 423 discosense |
|
- 424 circa |
|
- 425 code_x_glue_cc_defect_detection |
|
- 426 code_x_glue_cc_clone_detection_big_clone_bench |
|
- 427 code_x_glue_cc_code_refinement/medium |
|
- 428 EffectiveFeedbackStudentWriting |
|
- 429 promptSentiment |
|
- 430 promptNLI |
|
- 431 promptSpoke |
|
- 432 promptProficiency |
|
- 433 promptGrammar |
|
- 434 promptCoherence |
|
- 435 phrase_similarity |
|
- 436 scientific-exaggeration-detection |
|
- 437 quarel |
|
- 438 fever-evidence-related/mwong--fever-related |
|
- 439 numer_sense |
|
- 440 dynasent/dynabench.dynasent.r1.all/r1 |
|
- 441 dynasent/dynabench.dynasent.r2.all/r2 |
|
- 442 Sarcasm_News_Headline |
|
- 443 sem_eval_2010_task_8 |