deberta-base-long-nli / config.json
sileod's picture
Upload DebertaV2ForSequenceClassification
9941d36 verified
{
"_name_or_path": "microsoft/deberta-v3-base",
"architectures": [
"DebertaV2ForSequenceClassification"
],
"attention_probs_dropout_prob": 0.1,
"classifiers_size": [
3,
2,
2,
2,
2,
2,
1,
2,
3,
2,
2,
2,
3,
3,
3,
3,
1,
3,
3,
2,
2,
3,
2,
2,
6,
2,
2,
2,
2,
2,
2,
2,
2,
3,
3,
3,
3,
3,
3,
3,
2,
2,
2,
2,
5,
3,
3,
3,
3,
3,
3,
3,
3,
2,
2,
2,
3,
3,
3,
3,
3,
3,
3,
2,
2,
2,
2,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
2,
2,
2,
2,
2,
20,
50,
3,
3,
4,
2,
8,
20,
2,
4,
2,
2,
3,
3,
3,
3,
3,
3,
174,
2,
41,
3,
51,
2,
2,
2,
2,
2,
2,
16,
3,
2,
8,
2,
2,
18,
2,
17,
3,
3,
7,
11,
12,
7,
4,
42,
3,
100,
13,
100,
8,
1,
20,
2,
2,
4,
5,
3,
4,
14,
2,
6,
4,
2,
1,
3,
10,
3,
10,
4,
2,
7,
6,
28,
3,
6,
7,
3,
4,
5,
6,
2,
2,
2,
6,
20,
7,
2,
2,
2,
9,
13,
2,
3,
4,
3,
2,
4,
2,
2,
2,
2,
2,
4,
1,
2,
1,
2,
49,
40,
10,
4,
1,
2,
2,
1,
5,
2,
3,
2,
2,
12,
3,
3,
2,
19,
3,
1,
2,
2,
2,
2,
2,
1,
2,
2,
1,
1,
2,
3,
2,
1,
4,
3,
1,
1,
1,
2,
3,
2,
3,
1,
1,
2,
1,
2,
2,
2,
2,
3,
2,
2,
2,
1,
3,
2,
2,
1,
1,
1,
1,
2,
1,
1,
1,
1,
4,
1,
1,
1,
1,
3,
1,
3,
1,
2,
2,
1,
2,
3,
3,
2,
1,
3,
1,
1,
3,
1,
3,
2,
1,
1,
1,
2,
2,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2,
2,
2,
2,
77,
3,
2,
2,
1,
1,
2,
3,
18,
13,
2,
2,
2,
2,
2,
4,
2,
24,
3,
2,
2,
1,
2,
2,
3,
1,
2,
3,
2,
3,
3,
2,
2,
4,
1,
17,
3,
3,
2,
3,
2,
3,
3,
2,
1,
1,
3,
2,
9,
2,
1,
1,
1,
1,
1,
2,
2,
3,
2,
3,
4,
3,
3,
2,
1,
1,
2,
1,
1,
6,
2,
3,
3,
1,
1,
2,
2,
10,
2,
2,
2,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
4,
1,
7,
6,
6,
6,
6,
6,
6,
3,
2,
1,
2,
2,
3,
3,
3,
1
],
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"id2label": {
"0": "entailment",
"1": "neutral",
"2": "contradiction"
},
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"contradiction": 2,
"entailment": 0,
"neutral": 1
},
"layer_norm_eps": 1e-07,
"max_position_embeddings": 1280,
"max_relative_positions": -1,
"model_type": "deberta-v2",
"norm_rel_ebd": "layer_norm",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"pad_token_id": 0,
"pooler_dropout": 0,
"pooler_hidden_act": "gelu",
"pooler_hidden_size": 768,
"pos_att_type": [
"p2c",
"c2p"
],
"position_biased_input": false,
"position_buckets": 256,
"relative_attention": true,
"share_att_key": true,
"tasks": [
"glue/mnli",
"glue/qnli",
"glue/rte",
"glue/wnli",
"glue/mrpc",
"glue/qqp",
"glue/stsb",
"super_glue/boolq",
"super_glue/cb",
"super_glue/multirc",
"super_glue/wic",
"super_glue/axg",
"anli/a1",
"anli/a2",
"anli/a3",
"sick/label",
"sick/relatedness",
"sick/entailment_AB",
"snli",
"scitail/snli_format",
"hans",
"WANLI",
"recast/recast_puns",
"recast/recast_factuality",
"recast/recast_kg_relations",
"recast/recast_verbnet",
"recast/recast_verbcorner",
"recast/recast_ner",
"recast/recast_megaveridicality",
"recast/recast_sentiment",
"probability_words_nli/reasoning_1hop",
"probability_words_nli/usnli",
"probability_words_nli/reasoning_2hop",
"nan-nli",
"nli_fever",
"breaking_nli",
"conj_nli",
"fracas",
"dialogue_nli",
"mpe",
"dnc",
"recast_white/fnplus",
"recast_white/sprl",
"recast_white/dpr",
"joci",
"robust_nli/IS_CS",
"robust_nli/LI_LI",
"robust_nli/ST_WO",
"robust_nli/PI_SP",
"robust_nli/PI_CD",
"robust_nli/ST_SE",
"robust_nli/ST_NE",
"robust_nli/ST_LM",
"robust_nli_is_sd",
"robust_nli_li_ts",
"add_one_rte",
"imppres/implicature_gradable_adjective/log",
"imppres/implicature_modals/log",
"imppres/implicature_numerals_10_100/log",
"imppres/implicature_quantifiers/log",
"imppres/implicature_connectives/log",
"imppres/implicature_gradable_verb/log",
"imppres/implicature_numerals_2_3/log",
"hlgd",
"paws/labeled_final",
"paws/labeled_swap",
"medical_questions_pairs",
"model-written-evals",
"truthful_qa/multiple_choice",
"fig-qa",
"bigbench/discourse_marker_prediction",
"bigbench/gre_reading_comprehension",
"bigbench/simple_ethical_questions",
"bigbench/known_unknowns",
"bigbench/fact_checker",
"bigbench/phrase_relatedness",
"bigbench/irony_identification",
"bigbench/crash_blossom",
"bigbench/physical_intuition",
"bigbench/analytic_entailment",
"bigbench/play_dialog_same_or_different",
"bigbench/metaphor_boolean",
"bigbench/undo_permutation",
"bigbench/disambiguation_qa",
"bigbench/metaphor_understanding",
"bigbench/nonsense_words_grammar",
"bigbench/logical_deduction",
"bigbench/sentence_ambiguity",
"bigbench/key_value_maps",
"bigbench/checkmate_in_one",
"bigbench/code_line_description",
"bigbench/crass_ai",
"bigbench/movie_dialog_same_or_different",
"bigbench/strategyqa",
"bigbench/hhh_alignment",
"bigbench/english_proverbs",
"bigbench/cs_algorithms",
"bigbench/formal_fallacies_syllogisms_negation",
"bigbench/conceptual_combinations",
"bigbench/geometric_shapes",
"bigbench/tracking_shuffled_objects",
"bigbench/arithmetic",
"bigbench/misconceptions",
"bigbench/odd_one_out",
"bigbench/international_phonetic_alphabet_nli",
"bigbench/social_iqa",
"bigbench/logical_fallacy_detection",
"bigbench/hyperbaton",
"bigbench/novel_concepts",
"bigbench/similarities_abstraction",
"bigbench/timedial",
"bigbench/salient_translation_error_detection",
"bigbench/cifar10_classification",
"bigbench/winowhy",
"bigbench/mathematical_induction",
"bigbench/real_or_fake_text",
"bigbench/epistemic_reasoning",
"bigbench/riddle_sense",
"bigbench/identify_odd_metaphor",
"bigbench/human_organs_senses",
"bigbench/emojis_emotion_prediction",
"bigbench/logical_args",
"bigbench/anachronisms",
"bigbench/bbq_lite_json",
"bigbench/general_knowledge",
"bigbench/logical_sequence",
"bigbench/navigate",
"bigbench/analogical_similarity",
"bigbench/authorship_verification",
"bigbench/elementary_math_qa",
"bigbench/goal_step_wikihow",
"bigbench/empirical_judgments",
"bigbench/question_selection",
"bigbench/temporal_sequences",
"bigbench/mnist_ascii",
"bigbench/color",
"bigbench/date_understanding",
"bigbench/figure_of_speech_detection",
"bigbench/contextual_parametric_knowledge_conflicts",
"bigbench/ruin_names",
"bigbench/abstract_narrative_understanding",
"bigbench/physics",
"bigbench/understanding_fables",
"bigbench/identify_math_theorems",
"bigbench/penguins_in_a_table",
"bigbench/vitaminc_fact_verification",
"bigbench/social_support",
"bigbench/strange_stories",
"bigbench/dark_humor_detection",
"bigbench/fantasy_reasoning",
"bigbench/logic_grid_puzzle",
"bigbench/emoji_movie",
"bigbench/moral_permissibility",
"bigbench/symbol_interpretation",
"bigbench/presuppositions_as_nli",
"bigbench/dyck_languages",
"bigbench/implicatures",
"bigbench/suicide_risk",
"bigbench/unit_interpretation",
"bigbench/intent_recognition",
"bigbench/movie_recommendation",
"bigbench/snarks",
"bigbench/evaluating_information_essentiality",
"bigbench/sports_understanding",
"bigbench/entailed_polarity",
"bigbench/causal_judgment",
"bigbench/cause_and_effect",
"bigbench/implicit_relations",
"bigbench/reasoning_about_colored_objects",
"bigbench/hindu_knowledge",
"cos_e/v1.0",
"cosmos_qa",
"dream",
"openbookqa",
"qasc",
"quartz",
"quail",
"head_qa/en",
"sciq",
"social_i_qa",
"wiki_hop/original",
"wiqa",
"piqa",
"hellaswag",
"super_glue/copa",
"balanced-copa",
"e-CARE",
"art",
"winogrande/winogrande_xl",
"codah/codah",
"ai2_arc/ARC-Easy/challenge",
"ai2_arc/ARC-Challenge/challenge",
"definite_pronoun_resolution",
"swag/regular",
"math_qa",
"glue/cola",
"glue/sst2",
"utilitarianism",
"amazon_counterfactual/en",
"insincere-questions",
"toxic_conversations",
"TuringBench",
"trec",
"vitaminc",
"hope_edi/english",
"rumoureval_2019/RumourEval2019",
"ethos/binary",
"ethos/multilabel",
"tweet_eval/emoji",
"tweet_eval/offensive",
"tweet_eval/emotion",
"tweet_eval/hate",
"tweet_eval/irony",
"tweet_eval/sentiment",
"tweet_eval/stance_abortion",
"tweet_eval/stance_atheism",
"tweet_eval/stance_climate",
"tweet_eval/stance_feminist",
"tweet_eval/stance_hillary",
"discovery/discovery",
"pragmeval/squinky-formality",
"pragmeval/switchboard",
"pragmeval/verifiability",
"pragmeval/mrda",
"pragmeval/squinky-informativeness",
"pragmeval/squinky-implicature",
"pragmeval/emobank-arousal",
"pragmeval/emobank-dominance",
"pragmeval/emobank-valence",
"pragmeval/sarcasm",
"pragmeval/pdtb",
"pragmeval/persuasiveness-claimtype",
"pragmeval/persuasiveness-relevance",
"pragmeval/persuasiveness-premisetype",
"pragmeval/persuasiveness-eloquence",
"pragmeval/persuasiveness-strength",
"pragmeval/stac",
"pragmeval/persuasiveness-specificity",
"pragmeval/gum",
"pragmeval/emergent",
"silicone/sem",
"silicone/meld_e",
"silicone/iemocap",
"silicone/maptask",
"silicone/dyda_e",
"silicone/dyda_da",
"silicone/oasis",
"silicone/meld_s",
"lex_glue/eurlex",
"lex_glue/scotus",
"lex_glue/ledgar",
"lex_glue/unfair_tos",
"lex_glue/case_hold",
"language-identification",
"imdb",
"rotten_tomatoes",
"ag_news",
"yelp_review_full/yelp_review_full",
"financial_phrasebank/sentences_allagree",
"poem_sentiment",
"dbpedia_14/dbpedia_14",
"amazon_polarity/amazon_polarity",
"app_reviews",
"hate_speech18",
"sms_spam",
"humicroedit/subtask-1",
"humicroedit/subtask-2",
"snips_built_in_intents",
"hate_speech_offensive",
"yahoo_answers_topics",
"stackoverflow-questions",
"hyperpartisan_news",
"sciie",
"citation_intent",
"go_emotions/simplified",
"scicite",
"liar",
"lexical_relation_classification/EVALution",
"lexical_relation_classification/ROOT09",
"lexical_relation_classification/K&H+N",
"lexical_relation_classification/CogALexV",
"lexical_relation_classification/BLESS",
"linguisticprobing/subj_number",
"linguisticprobing/past_present",
"linguisticprobing/obj_number",
"linguisticprobing/sentence_length",
"linguisticprobing/top_constituents",
"linguisticprobing/tree_depth",
"linguisticprobing/coordination_inversion",
"linguisticprobing/odd_man_out",
"linguisticprobing/bigram_shift",
"crowdflower/political-media-message",
"crowdflower/text_emotion",
"crowdflower/political-media-audience",
"crowdflower/economic-news",
"crowdflower/corporate-messaging",
"crowdflower/airline-sentiment",
"crowdflower/tweet_global_warming",
"crowdflower/sentiment_nuclear_power",
"crowdflower/political-media-bias",
"ethics/commonsense",
"ethics/deontology",
"ethics/justice",
"ethics/virtue",
"emo/emo2019",
"google_wellformed_query",
"tweets_hate_speech_detection",
"has_part",
"blog_authorship_corpus/gender",
"blog_authorship_corpus/age",
"blog_authorship_corpus/job",
"open_question_type",
"health_fact",
"commonsense_qa",
"mc_taco",
"ade_corpus_v2/Ade_corpus_v2_classification",
"discosense",
"circa",
"phrase_similarity",
"scientific-exaggeration-detection",
"quarel",
"fever-evidence-related",
"numer_sense",
"dynasent/dynabench.dynasent.r1.all/r1",
"dynasent/dynabench.dynasent.r2.all/r2",
"Sarcasm_News_Headline",
"sem_eval_2010_task_8",
"auditor_review",
"medmcqa",
"Dynasent_Disagreement",
"Politeness_Disagreement",
"SBIC_Disagreement",
"SChem_Disagreement",
"Dilemmas_Disagreement",
"logiqa",
"wiki_qa",
"cycic_classification",
"cycic_multiplechoice",
"sts-companion",
"commonsense_qa_2.0",
"lingnli",
"monotonicity-entailment",
"arct",
"scinli",
"naturallogic",
"onestop_qa",
"moral_stories/full",
"prost",
"dynahate",
"syntactic-augmentation-nli",
"autotnli",
"CONDAQA",
"webgpt_comparisons",
"synthetic-instruct-gptj-pairwise",
"scruples",
"wouldyourather",
"defeasible-nli/atomic",
"defeasible-nli/snli",
"help-nli",
"nli-veridicality-transitivity",
"lonli",
"dadc-limit-nli",
"FLUTE",
"strategy-qa",
"summarize_from_feedback/comparisons",
"folio",
"tomi-nli",
"avicenna",
"SHP",
"MedQA-USMLE-4-options-hf",
"wikimedqa/medwiki",
"cicero",
"CREAK",
"mutual",
"NeQA",
"quote-repetition",
"redefine-math",
"puzzte",
"implicatures",
"race/high",
"race/middle",
"race-c",
"spartqa-yn",
"spartqa-mchoice",
"temporal-nli",
"riddle_sense",
"clcd-english",
"twentyquestions",
"reclor",
"counterfactually-augmented-imdb",
"counterfactually-augmented-snli",
"cnli",
"boolq-natural-perturbations",
"acceptability-prediction",
"equate",
"ScienceQA_text_only",
"ekar_english",
"implicit-hate-stg1",
"chaos-mnli-ambiguity",
"headline_cause/en_simple",
"logiqa-2.0-nli",
"oasst2_dense_flat/quality",
"oasst2_dense_flat/toxicity",
"oasst2_dense_flat/helpfulness",
"mindgames",
"ambient",
"path-naturalness-prediction",
"civil_comments/toxicity",
"civil_comments/severe_toxicity",
"civil_comments/obscene",
"civil_comments/threat",
"civil_comments/insult",
"civil_comments/identity_attack",
"civil_comments/sexual_explicit",
"cloth",
"dgen",
"I2D2",
"args_me",
"Touche23-ValueEval",
"starcon",
"banking77",
"ConTRoL-nli",
"tracie",
"sherliic",
"sen-making/1",
"sen-making/2",
"winowhy",
"robustLR",
"v1/gen_train234_test2to10",
"logical-fallacy",
"parade",
"cladder",
"subjectivity",
"MOH",
"VUAC",
"sharc_modified/mod",
"conceptrules_v2",
"disrpt/eng.dep.scidtb.rels",
"zero-shot-label-nli",
"com2sense",
"scone",
"winodict",
"fool-me-twice",
"monli",
"corr2cause",
"lsat_qa/all",
"apt",
"twitter-financial-news-sentiment",
"icl-symbol-tuning-instruct",
"SpaceNLI",
"propsegment/nli",
"HatemojiBuild",
"regset",
"esci",
"chatbot_arena_conversations",
"dnd_style_intents",
"FLD.v2/default",
"FLD.v2/star",
"SDOH-NLI",
"scifact_entailment",
"feasibilityQA",
"simple_pair",
"AdjectiveScaleProbe-nli",
"resnli",
"SpaRTUN",
"ReSQ",
"semantic_fragments_nli",
"dataset_train_nli",
"stepgame",
"nlgraph",
"oasst2_pairwise_rlhf_reward",
"hh-rlhf/helpful-online",
"hh-rlhf/helpful-rejection-sampled",
"hh-rlhf/helpful-base",
"hh-rlhf/harmless-base",
"ruletaker",
"PARARULE-Plus",
"proofwriter",
"logical-entailment",
"nope",
"LogicNLI",
"contract-nli/contractnli_a/seg",
"contract-nli/contractnli_b/full",
"nli4ct_semeval2024",
"lsat-ar",
"lsat-rc",
"biosift-nli",
"brainteasers/SP",
"brainteasers/WP",
"persuasion",
"AmbigNQ-clarifying-question",
"SIGA-nli",
"FOL-nli",
"goal-step-wikihow/order",
"PARADISE",
"doc-nli",
"mctest-nli",
"patent-phrase-similarity",
"natural-language-satisfiability",
"idioms-nli",
"lifecycle-entailment",
"HelpSteer/helpfulness",
"HelpSteer/correctness",
"HelpSteer/coherence",
"HelpSteer/complexity",
"HelpSteer/verbosity",
"HelpSteer2/helpfulness",
"HelpSteer2/correctness",
"HelpSteer2/coherence",
"HelpSteer2/complexity",
"HelpSteer2/verbosity",
"MSciNLI",
"UltraFeedback-paired",
"AES2-essay-scoring",
"english-grading/cohesion",
"english-grading/syntax",
"english-grading/vocabulary",
"english-grading/phraseology",
"english-grading/grammar",
"english-grading/conventions",
"wice",
"hover",
"tasksource_dpo_pairs",
"seahorse_summarization_evaluation",
"babi_nli",
"gen_debiased_nli",
"imppres/presupposition",
"/prag",
"blimp-2"
],
"torch_dtype": "float32",
"transformers_version": "4.42.3",
"type_vocab_size": 0,
"vocab_size": 128100
}