|
{ |
|
"_name_or_path": "mt-428-30000", |
|
"architectures": [ |
|
"DebertaV2ForSequenceClassification" |
|
], |
|
"attention_probs_dropout_prob": 0.1, |
|
"classifiers_size": [ |
|
3, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
1, |
|
2, |
|
3, |
|
2, |
|
2, |
|
2, |
|
3, |
|
3, |
|
3, |
|
3, |
|
1, |
|
3, |
|
3, |
|
2, |
|
2, |
|
3, |
|
2, |
|
2, |
|
2, |
|
2, |
|
6, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
2, |
|
3, |
|
2, |
|
2, |
|
2, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
2, |
|
2, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
2, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
47, |
|
23, |
|
9, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
20, |
|
50, |
|
3, |
|
3, |
|
4, |
|
2, |
|
8, |
|
20, |
|
4, |
|
2, |
|
2, |
|
3, |
|
2, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
174, |
|
2, |
|
3, |
|
2, |
|
2, |
|
2, |
|
2, |
|
41, |
|
51, |
|
2, |
|
8, |
|
2, |
|
16, |
|
18, |
|
2, |
|
2, |
|
17, |
|
2, |
|
3, |
|
2, |
|
3, |
|
12, |
|
42, |
|
3, |
|
7, |
|
11, |
|
7, |
|
4, |
|
3, |
|
100, |
|
13, |
|
100, |
|
8, |
|
1, |
|
20, |
|
2, |
|
2, |
|
4, |
|
5, |
|
3, |
|
4, |
|
14, |
|
2, |
|
6, |
|
4, |
|
2, |
|
1, |
|
3, |
|
10, |
|
77, |
|
3, |
|
10, |
|
4, |
|
2, |
|
7, |
|
6, |
|
28, |
|
3, |
|
6, |
|
7, |
|
6, |
|
5, |
|
3, |
|
4, |
|
2, |
|
2, |
|
6, |
|
2, |
|
7, |
|
2, |
|
2, |
|
2, |
|
20, |
|
4, |
|
2, |
|
3, |
|
4, |
|
3, |
|
2, |
|
2, |
|
9, |
|
13, |
|
2, |
|
2, |
|
2, |
|
2, |
|
4, |
|
1, |
|
2, |
|
1, |
|
13, |
|
3, |
|
5, |
|
11, |
|
37, |
|
2, |
|
49, |
|
12, |
|
40, |
|
10, |
|
4, |
|
1, |
|
2, |
|
2, |
|
1, |
|
5, |
|
3, |
|
2, |
|
3, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
3, |
|
2, |
|
2, |
|
12, |
|
3, |
|
3, |
|
2, |
|
19, |
|
3, |
|
1, |
|
1, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
1, |
|
2, |
|
2, |
|
1, |
|
1, |
|
2, |
|
3, |
|
2, |
|
1, |
|
4, |
|
4, |
|
1, |
|
1, |
|
1, |
|
2, |
|
3, |
|
2, |
|
3, |
|
1, |
|
1, |
|
2, |
|
1, |
|
3, |
|
2, |
|
2, |
|
2, |
|
2, |
|
3, |
|
2, |
|
2, |
|
2, |
|
1, |
|
3, |
|
2, |
|
2, |
|
1, |
|
1, |
|
1, |
|
1, |
|
2, |
|
1, |
|
1, |
|
1, |
|
1, |
|
4, |
|
1, |
|
2, |
|
1, |
|
1 |
|
], |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.1, |
|
"hidden_size": 1024, |
|
"id2label": { |
|
"0": "entailment", |
|
"1": "neutral", |
|
"2": "contradiction" |
|
}, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 4096, |
|
"label2id": { |
|
"contradiction": 2, |
|
"entailment": 0, |
|
"neutral": 1 |
|
}, |
|
"layer_norm_eps": 1e-07, |
|
"max_position_embeddings": 512, |
|
"max_relative_positions": -1, |
|
"model_type": "deberta-v2", |
|
"norm_rel_ebd": "layer_norm", |
|
"num_attention_heads": 16, |
|
"num_hidden_layers": 24, |
|
"pad_token_id": 0, |
|
"pooler_dropout": 0, |
|
"pooler_hidden_act": "gelu", |
|
"pooler_hidden_size": 1024, |
|
"pos_att_type": [ |
|
"p2c", |
|
"c2p" |
|
], |
|
"position_biased_input": false, |
|
"position_buckets": 256, |
|
"relative_attention": true, |
|
"share_att_key": true, |
|
"tasks": [ |
|
"glue/mnli", |
|
"glue/qnli", |
|
"glue/rte", |
|
"glue/wnli", |
|
"glue/mrpc", |
|
"glue/qqp", |
|
"glue/stsb", |
|
"super_glue/boolq", |
|
"super_glue/cb", |
|
"super_glue/multirc", |
|
"super_glue/wic", |
|
"super_glue/axg", |
|
"anli/a1", |
|
"anli/a2", |
|
"anli/a3", |
|
"sick/label", |
|
"sick/relatedness", |
|
"sick/entailment_AB", |
|
"snli", |
|
"scitail/snli_format", |
|
"hans", |
|
"WANLI", |
|
"recast/recast_verbnet", |
|
"recast/recast_ner", |
|
"recast/recast_factuality", |
|
"recast/recast_puns", |
|
"recast/recast_kg_relations", |
|
"recast/recast_sentiment", |
|
"recast/recast_megaveridicality", |
|
"recast/recast_verbcorner", |
|
"probability_words_nli/usnli", |
|
"probability_words_nli/reasoning_2hop", |
|
"probability_words_nli/reasoning_1hop", |
|
"nan-nli/joey234--nan-nli", |
|
"nli_fever", |
|
"breaking_nli", |
|
"conj_nli", |
|
"fracas", |
|
"dialogue_nli", |
|
"mpe", |
|
"dnc", |
|
"gpt3_nli", |
|
"recast_white/fnplus", |
|
"recast_white/sprl", |
|
"recast_white/dpr", |
|
"joci", |
|
"contrast_nli", |
|
"robust_nli/IS_CS", |
|
"robust_nli/LI_LI", |
|
"robust_nli/ST_WO", |
|
"robust_nli/PI_SP", |
|
"robust_nli/PI_CD", |
|
"robust_nli/ST_SE", |
|
"robust_nli/ST_NE", |
|
"robust_nli/ST_LM", |
|
"robust_nli_is_sd", |
|
"robust_nli_li_ts", |
|
"gen_debiased_nli/snli_seq_z", |
|
"gen_debiased_nli/snli_z_aug", |
|
"gen_debiased_nli/snli_par_z", |
|
"gen_debiased_nli/mnli_par_z", |
|
"gen_debiased_nli/mnli_z_aug", |
|
"gen_debiased_nli/mnli_seq_z", |
|
"add_one_rte", |
|
"imppres/presupposition_change_of_state/presupposition", |
|
"imppres/presupposition_all_n_presupposition/presupposition", |
|
"imppres/presupposition_question_presupposition/presupposition", |
|
"imppres/presupposition_possessed_definites_uniqueness/presupposition", |
|
"imppres/presupposition_possessed_definites_existence/presupposition", |
|
"imppres/presupposition_only_presupposition/presupposition", |
|
"imppres/presupposition_cleft_uniqueness/presupposition", |
|
"imppres/presupposition_cleft_existence/presupposition", |
|
"imppres/presupposition_both_presupposition/presupposition", |
|
"imppres/implicature_quantifiers/prag", |
|
"imppres/implicature_numerals_2_3/prag", |
|
"imppres/implicature_modals/prag", |
|
"imppres/implicature_numerals_10_100/prag", |
|
"imppres/implicature_gradable_adjective/prag", |
|
"imppres/implicature_connectives/prag", |
|
"imppres/implicature_gradable_verb/prag", |
|
"imppres/implicature_quantifiers/log", |
|
"imppres/implicature_numerals_2_3/log", |
|
"imppres/implicature_numerals_10_100/log", |
|
"imppres/implicature_modals/log", |
|
"imppres/implicature_gradable_verb/log", |
|
"imppres/implicature_gradable_adjective/log", |
|
"imppres/implicature_connectives/log", |
|
"glue_diagnostics/diagnostics", |
|
"hlgd", |
|
"paws/labeled_final", |
|
"paws/labeled_swap", |
|
"quora", |
|
"medical_questions_pairs", |
|
"conll2003/pos_tags", |
|
"conll2003/chunk_tags", |
|
"conll2003/ner_tags", |
|
"hh-rlhf", |
|
"model-written-evals", |
|
"truthful_qa/multiple_choice", |
|
"fig-qa", |
|
"bigbench/bbq_lite_json", |
|
"bigbench/social_support", |
|
"bigbench/irony_identification", |
|
"bigbench/snarks", |
|
"bigbench/presuppositions_as_nli", |
|
"bigbench/logical_fallacy_detection", |
|
"bigbench/mathematical_induction", |
|
"bigbench/timedial", |
|
"bigbench/logical_args", |
|
"bigbench/causal_judgment", |
|
"bigbench/authorship_verification", |
|
"bigbench/discourse_marker_prediction", |
|
"bigbench/identify_odd_metaphor", |
|
"bigbench/mnist_ascii", |
|
"bigbench/empirical_judgments", |
|
"bigbench/human_organs_senses", |
|
"bigbench/phrase_relatedness", |
|
"bigbench/similarities_abstraction", |
|
"bigbench/elementary_math_qa", |
|
"bigbench/general_knowledge", |
|
"bigbench/odd_one_out", |
|
"bigbench/physical_intuition", |
|
"bigbench/english_proverbs", |
|
"bigbench/key_value_maps", |
|
"bigbench/fact_checker", |
|
"bigbench/real_or_fake_text", |
|
"bigbench/movie_recommendation", |
|
"bigbench/implicit_relations", |
|
"bigbench/cause_and_effect", |
|
"bigbench/crass_ai", |
|
"bigbench/emoji_movie", |
|
"bigbench/formal_fallacies_syllogisms_negation", |
|
"bigbench/cs_algorithms", |
|
"bigbench/salient_translation_error_detection", |
|
"bigbench/metaphor_understanding", |
|
"bigbench/play_dialog_same_or_different", |
|
"bigbench/sports_understanding", |
|
"bigbench/understanding_fables", |
|
"bigbench/nonsense_words_grammar", |
|
"bigbench/misconceptions", |
|
"bigbench/moral_permissibility", |
|
"bigbench/color", |
|
"bigbench/metaphor_boolean", |
|
"bigbench/hyperbaton", |
|
"bigbench/arithmetic", |
|
"bigbench/reasoning_about_colored_objects", |
|
"bigbench/social_iqa", |
|
"bigbench/symbol_interpretation", |
|
"bigbench/crash_blossom", |
|
"bigbench/undo_permutation", |
|
"bigbench/implicatures", |
|
"bigbench/question_selection", |
|
"bigbench/code_line_description", |
|
"bigbench/dark_humor_detection", |
|
"bigbench/disambiguation_qa", |
|
"bigbench/contextual_parametric_knowledge_conflicts", |
|
"bigbench/entailed_polarity", |
|
"bigbench/ruin_names", |
|
"bigbench/international_phonetic_alphabet_nli", |
|
"bigbench/conceptual_combinations", |
|
"bigbench/strange_stories", |
|
"bigbench/evaluating_information_essentiality", |
|
"bigbench/fantasy_reasoning", |
|
"bigbench/physics", |
|
"bigbench/sentence_ambiguity", |
|
"bigbench/intent_recognition", |
|
"bigbench/checkmate_in_one", |
|
"bigbench/analogical_similarity", |
|
"bigbench/identify_math_theorems", |
|
"bigbench/movie_dialog_same_or_different", |
|
"bigbench/unit_interpretation", |
|
"bigbench/logical_deduction", |
|
"bigbench/abstract_narrative_understanding", |
|
"bigbench/strategyqa", |
|
"bigbench/penguins_in_a_table", |
|
"bigbench/goal_step_wikihow", |
|
"bigbench/hhh_alignment", |
|
"bigbench/date_understanding", |
|
"bigbench/cifar10_classification", |
|
"bigbench/riddle_sense", |
|
"bigbench/logic_grid_puzzle", |
|
"bigbench/suicide_risk", |
|
"bigbench/hindu_knowledge", |
|
"bigbench/navigate", |
|
"bigbench/temporal_sequences", |
|
"bigbench/anachronisms", |
|
"bigbench/geometric_shapes", |
|
"bigbench/figure_of_speech_detection", |
|
"bigbench/tracking_shuffled_objects", |
|
"bigbench/epistemic_reasoning", |
|
"bigbench/simple_ethical_questions", |
|
"bigbench/dyck_languages", |
|
"bigbench/analytic_entailment", |
|
"bigbench/winowhy", |
|
"bigbench/gre_reading_comprehension", |
|
"bigbench/logical_sequence", |
|
"bigbench/emojis_emotion_prediction", |
|
"bigbench/novel_concepts", |
|
"bigbench/vitaminc_fact_verification", |
|
"bigbench/known_unknowns", |
|
"cos_e/v1.0", |
|
"cosmos_qa", |
|
"dream", |
|
"openbookqa", |
|
"qasc", |
|
"quartz", |
|
"quail", |
|
"head_qa/en", |
|
"sciq", |
|
"social_i_qa", |
|
"wiki_hop", |
|
"wiqa", |
|
"piqa", |
|
"hellaswag", |
|
"super_glue/copa", |
|
"balanced-copa", |
|
"e-CARE", |
|
"art", |
|
"winogrande/winogrande_xl", |
|
"codah/codah", |
|
"ai2_arc/ARC-Easy/challenge", |
|
"ai2_arc/ARC-Challenge/challenge", |
|
"definite_pronoun_resolution", |
|
"swag", |
|
"math_qa", |
|
"glue/cola", |
|
"glue/sst2", |
|
"utilitarianism", |
|
"amazon_counterfactual/en", |
|
"insincere-questions", |
|
"toxic_conversations", |
|
"TuringBench", |
|
"trec", |
|
"vitaminc/tals--vitaminc", |
|
"hope_edi/english", |
|
"rumoureval_2019/RumourEval2019", |
|
"ethos/binary", |
|
"ethos/multilabel", |
|
"tweet_eval/emoji", |
|
"tweet_eval/emotion", |
|
"tweet_eval/hate", |
|
"tweet_eval/offensive", |
|
"tweet_eval/sentiment", |
|
"tweet_eval/irony", |
|
"tweet_eval/stance_atheism", |
|
"tweet_eval/stance_climate", |
|
"tweet_eval/stance_hillary", |
|
"tweet_eval/stance_feminist", |
|
"tweet_eval/stance_abortion", |
|
"discovery/discovery", |
|
"pragmeval/squinky-informativeness", |
|
"pragmeval/verifiability", |
|
"pragmeval/squinky-formality", |
|
"pragmeval/squinky-implicature", |
|
"pragmeval/emobank-dominance", |
|
"pragmeval/emobank-arousal", |
|
"pragmeval/switchboard", |
|
"pragmeval/mrda", |
|
"pragmeval/emobank-valence", |
|
"pragmeval/persuasiveness-premisetype", |
|
"pragmeval/persuasiveness-relevance", |
|
"pragmeval/pdtb", |
|
"pragmeval/stac", |
|
"pragmeval/persuasiveness-strength", |
|
"pragmeval/sarcasm", |
|
"pragmeval/gum", |
|
"pragmeval/persuasiveness-eloquence", |
|
"pragmeval/persuasiveness-claimtype", |
|
"pragmeval/persuasiveness-specificity", |
|
"pragmeval/emergent", |
|
"silicone/maptask", |
|
"silicone/oasis", |
|
"silicone/meld_s", |
|
"silicone/meld_e", |
|
"silicone/iemocap", |
|
"silicone/dyda_e", |
|
"silicone/dyda_da", |
|
"silicone/sem", |
|
"lex_glue/eurlex", |
|
"lex_glue/scotus", |
|
"lex_glue/ledgar", |
|
"lex_glue/unfair_tos", |
|
"lex_glue/case_hold", |
|
"language-identification", |
|
"imdb", |
|
"rotten_tomatoes", |
|
"ag_news", |
|
"yelp_review_full/yelp_review_full", |
|
"financial_phrasebank/sentences_allagree", |
|
"poem_sentiment", |
|
"dbpedia_14/dbpedia_14", |
|
"amazon_polarity/amazon_polarity", |
|
"app_reviews", |
|
"hate_speech18", |
|
"sms_spam", |
|
"humicroedit/subtask-1", |
|
"humicroedit/subtask-2", |
|
"snips_built_in_intents", |
|
"banking77", |
|
"hate_speech_offensive", |
|
"yahoo_answers_topics", |
|
"stackoverflow-questions", |
|
"hyperpartisan_news", |
|
"sciie", |
|
"citation_intent", |
|
"go_emotions/simplified", |
|
"scicite", |
|
"liar", |
|
"lexical_relation_classification/EVALution", |
|
"lexical_relation_classification/BLESS", |
|
"lexical_relation_classification/CogALexV", |
|
"lexical_relation_classification/ROOT09", |
|
"lexical_relation_classification/K&H+N", |
|
"linguisticprobing/bigram_shift", |
|
"linguisticprobing/odd_man_out", |
|
"linguisticprobing/sentence_length", |
|
"linguisticprobing/coordination_inversion", |
|
"linguisticprobing/tree_depth", |
|
"linguisticprobing/subj_number", |
|
"linguisticprobing/obj_number", |
|
"linguisticprobing/past_present", |
|
"linguisticprobing/top_constituents", |
|
"crowdflower/sentiment_nuclear_power", |
|
"crowdflower/tweet_global_warming", |
|
"crowdflower/airline-sentiment", |
|
"crowdflower/corporate-messaging", |
|
"crowdflower/economic-news", |
|
"crowdflower/political-media-bias", |
|
"crowdflower/political-media-audience", |
|
"crowdflower/political-media-message", |
|
"crowdflower/text_emotion", |
|
"ethics/commonsense", |
|
"ethics/deontology", |
|
"ethics/justice", |
|
"ethics/virtue", |
|
"emo/emo2019", |
|
"google_wellformed_query", |
|
"tweets_hate_speech_detection", |
|
"has_part", |
|
"wnut_17/wnut_17", |
|
"ncbi_disease/ncbi_disease", |
|
"acronym_identification", |
|
"jnlpba/jnlpba", |
|
"ontonotes_english/SpeedOfMagic--ontonotes_english", |
|
"blog_authorship_corpus/gender", |
|
"blog_authorship_corpus/age", |
|
"blog_authorship_corpus/horoscope", |
|
"blog_authorship_corpus/job", |
|
"open_question_type", |
|
"health_fact", |
|
"commonsense_qa", |
|
"mc_taco", |
|
"ade_corpus_v2/Ade_corpus_v2_classification", |
|
"discosense", |
|
"circa", |
|
"EffectiveFeedbackStudentWriting", |
|
"promptSentiment", |
|
"promptNLI", |
|
"promptSpoke", |
|
"promptProficiency", |
|
"promptGrammar", |
|
"promptCoherence", |
|
"phrase_similarity", |
|
"scientific-exaggeration-detection", |
|
"quarel", |
|
"fever-evidence-related/mwong--fever-related", |
|
"numer_sense", |
|
"dynasent/dynabench.dynasent.r1.all/r1", |
|
"dynasent/dynabench.dynasent.r2.all/r2", |
|
"Sarcasm_News_Headline", |
|
"sem_eval_2010_task_8", |
|
"auditor_review/demo-org--auditor_review", |
|
"medmcqa", |
|
"aqua_rat/tokenized", |
|
"Dynasent_Disagreement", |
|
"Politeness_Disagreement", |
|
"SBIC_Disagreement", |
|
"SChem_Disagreement", |
|
"Dilemmas_Disagreement", |
|
"logiqa", |
|
"wiki_qa", |
|
"cycic_classification", |
|
"cycic_multiplechoice", |
|
"sts-companion", |
|
"commonsense_qa_2.0", |
|
"lingnli", |
|
"monotonicity-entailment", |
|
"arct", |
|
"scinli", |
|
"naturallogic", |
|
"onestop_qa", |
|
"moral_stories/full", |
|
"prost", |
|
"dynahate", |
|
"syntactic-augmentation-nli", |
|
"autotnli", |
|
"CONDAQA", |
|
"webgpt_comparisons", |
|
"synthetic-instruct-gptj-pairwise", |
|
"scruples", |
|
"wouldyourather", |
|
"attempto-nli", |
|
"defeasible-nli/snli", |
|
"defeasible-nli/atomic", |
|
"nli-veridicality-transitivity", |
|
"natural-language-satisfiability", |
|
"lonli", |
|
"dadc-limit-nli", |
|
"FLUTE", |
|
"strategy-qa", |
|
"summarize_from_feedback/comparisons", |
|
"folio", |
|
"tomi-nli", |
|
"avicenna", |
|
"SHP", |
|
"MedQA-USMLE-4-options-hf", |
|
"wikimedqa/medwiki", |
|
"cicero", |
|
"CREAK", |
|
"mutual", |
|
"NeQA", |
|
"quote-repetition", |
|
"redefine-math", |
|
"puzzte", |
|
"implicatures", |
|
"babi_nli", |
|
"blimp-2", |
|
"mmlu-4" |
|
], |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.25.1", |
|
"type_vocab_size": 0, |
|
"vocab_size": 128100 |
|
} |
|
|