{ "_name_or_path": "tasksource/deberta-small-long-nli", "architectures": [ "DebertaV2ForSequenceClassification" ], "attention_probs_dropout_prob": 0.1, "classifiers_size": [ 3, 2, 2, 2, 2, 2, 1, 2, 3, 2, 2, 2, 3, 3, 3, 3, 1, 3, 3, 2, 2, 3, 2, 6, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 5, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 20, 50, 3, 3, 4, 2, 8, 2, 3, 2, 2, 4, 20, 3, 3, 3, 3, 3, 174, 2, 2, 2, 2, 41, 3, 51, 2, 2, 2, 2, 16, 17, 18, 2, 2, 2, 3, 8, 3, 7, 7, 11, 3, 3, 42, 12, 4, 100, 13, 100, 8, 1, 20, 2, 2, 4, 5, 3, 4, 14, 2, 6, 4, 2, 1, 3, 10, 3, 10, 4, 2, 7, 6, 28, 3, 6, 5, 7, 4, 6, 3, 2, 2, 2, 6, 2, 7, 20, 2, 2, 4, 2, 9, 13, 4, 2, 3, 2, 3, 2, 2, 2, 2, 4, 1, 2, 1, 2, 49, 40, 10, 4, 1, 2, 2, 1, 5, 2, 3, 2, 2, 12, 3, 3, 2, 19, 3, 1, 2, 2, 2, 2, 2, 1, 2, 2, 1, 1, 2, 3, 2, 1, 4, 3, 1, 1, 1, 2, 3, 2, 3, 1, 1, 2, 1, 2, 2, 2, 2, 3, 2, 2, 2, 1, 3, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 4, 1, 1, 1, 1, 3, 1, 3, 1, 2, 2, 1, 2, 3, 3, 2, 1, 3, 1, 1, 3, 1, 3, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 77, 3, 2, 2, 1, 1, 2, 3, 18, 13, 2, 2, 2, 2, 2, 4, 2, 24, 3, 2, 2, 1, 2, 2, 3, 1, 2, 3, 2, 3, 3, 2, 2, 4, 1, 17, 3, 3, 2, 3, 2, 3, 3, 2, 1, 1, 3, 2, 9, 2, 1, 1, 1, 1, 1, 2, 2, 3, 2, 3, 4, 3, 3, 2, 1, 1, 2, 1, 1, 6, 2, 3, 3, 1, 1, 2, 2, 10, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 1, 7, 6, 6, 6, 6, 6, 6, 3, 2, 1, 2, 2, 2, 3, 3, 3, 1 ], "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "not biased", "1": "biased" }, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "biased": 1, "not biased": 0 }, "layer_norm_eps": 1e-07, "max_position_embeddings": 1680, "max_relative_positions": -1, "model_type": "deberta-v2", "norm_rel_ebd": "layer_norm", "num_attention_heads": 12, "num_hidden_layers": 6, "pad_token_id": 0, "pooler_dropout": 0, "pooler_hidden_act": "gelu", "pooler_hidden_size": 768, "pos_att_type": [ "p2c", "c2p" ], "position_biased_input": false, "position_buckets": 256, "problem_type": "multi_label_classification", "relative_attention": true, "share_att_key": true, "tasks": [ "glue/mnli", "glue/qnli", "glue/rte", "glue/wnli", "glue/mrpc", "glue/qqp", "glue/stsb", "super_glue/boolq", "super_glue/cb", "super_glue/multirc", "super_glue/wic", "super_glue/axg", "anli/a1", "anli/a2", "anli/a3", "sick/label", "sick/relatedness", "sick/entailment_AB", "snli", "scitail/snli_format", "hans", "WANLI", "recast/recast_ner", "recast/recast_kg_relations", "recast/recast_puns", "recast/recast_verbcorner", "recast/recast_sentiment", "recast/recast_verbnet", "recast/recast_factuality", "recast/recast_megaveridicality", "probability_words_nli/reasoning_2hop", "probability_words_nli/reasoning_1hop", "probability_words_nli/usnli", "nan-nli", "nli_fever", "breaking_nli", "conj_nli", "fracas", "dialogue_nli", "mpe", "dnc", "recast_white/fnplus", "recast_white/sprl", "recast_white/dpr", "joci", "robust_nli/IS_CS", "robust_nli/LI_LI", "robust_nli/ST_WO", "robust_nli/PI_SP", "robust_nli/PI_CD", "robust_nli/ST_SE", "robust_nli/ST_NE", "robust_nli/ST_LM", "robust_nli_is_sd", "robust_nli_li_ts", "add_one_rte", "imppres/implicature_quantifiers/log", "imppres/implicature_numerals_2_3/log", "imppres/implicature_gradable_adjective/log", "imppres/implicature_connectives/log", "imppres/implicature_modals/log", "imppres/implicature_numerals_10_100/log", "imppres/implicature_gradable_verb/log", "hlgd", "paws/labeled_final", "paws/labeled_swap", "medical_questions_pairs", "model-written-evals", "truthful_qa/multiple_choice", "fig-qa", "bigbench/bbq_lite_json", "bigbench/english_proverbs", "bigbench/figure_of_speech_detection", "bigbench/emoji_movie", "bigbench/date_understanding", "bigbench/metaphor_understanding", "bigbench/logical_fallacy_detection", "bigbench/analogical_similarity", "bigbench/mnist_ascii", "bigbench/elementary_math_qa", "bigbench/snarks", "bigbench/social_support", "bigbench/logical_deduction", "bigbench/emojis_emotion_prediction", "bigbench/sports_understanding", "bigbench/cifar10_classification", "bigbench/tracking_shuffled_objects", "bigbench/physics", "bigbench/mathematical_induction", "bigbench/movie_dialog_same_or_different", "bigbench/goal_step_wikihow", "bigbench/strategyqa", "bigbench/identify_math_theorems", "bigbench/gre_reading_comprehension", "bigbench/novel_concepts", "bigbench/conceptual_combinations", "bigbench/hyperbaton", "bigbench/strange_stories", "bigbench/contextual_parametric_knowledge_conflicts", "bigbench/implicatures", "bigbench/penguins_in_a_table", "bigbench/logical_sequence", "bigbench/simple_ethical_questions", "bigbench/dyck_languages", "bigbench/geometric_shapes", "bigbench/irony_identification", "bigbench/intent_recognition", "bigbench/logical_args", "bigbench/known_unknowns", "bigbench/formal_fallacies_syllogisms_negation", "bigbench/suicide_risk", "bigbench/crash_blossom", "bigbench/logic_grid_puzzle", "bigbench/analytic_entailment", "bigbench/dark_humor_detection", "bigbench/timedial", "bigbench/presuppositions_as_nli", "bigbench/arithmetic", "bigbench/implicit_relations", "bigbench/understanding_fables", "bigbench/salient_translation_error_detection", "bigbench/anachronisms", "bigbench/moral_permissibility", "bigbench/abstract_narrative_understanding", "bigbench/misconceptions", "bigbench/nonsense_words_grammar", "bigbench/code_line_description", "bigbench/sentence_ambiguity", "bigbench/disambiguation_qa", "bigbench/crass_ai", "bigbench/similarities_abstraction", "bigbench/authorship_verification", "bigbench/phrase_relatedness", "bigbench/color", "bigbench/hhh_alignment", "bigbench/metaphor_boolean", "bigbench/fantasy_reasoning", "bigbench/ruin_names", "bigbench/cause_and_effect", "bigbench/temporal_sequences", "bigbench/navigate", "bigbench/symbol_interpretation", "bigbench/key_value_maps", "bigbench/entailed_polarity", "bigbench/riddle_sense", "bigbench/discourse_marker_prediction", "bigbench/reasoning_about_colored_objects", "bigbench/empirical_judgments", "bigbench/fact_checker", "bigbench/movie_recommendation", "bigbench/checkmate_in_one", "bigbench/epistemic_reasoning", "bigbench/vitaminc_fact_verification", "bigbench/general_knowledge", "bigbench/identify_odd_metaphor", "bigbench/physical_intuition", "bigbench/winowhy", "bigbench/cs_algorithms", "bigbench/undo_permutation", "bigbench/evaluating_information_essentiality", "bigbench/unit_interpretation", "bigbench/question_selection", "bigbench/international_phonetic_alphabet_nli", "bigbench/play_dialog_same_or_different", "bigbench/real_or_fake_text", "bigbench/human_organs_senses", "bigbench/hindu_knowledge", "bigbench/social_iqa", "bigbench/odd_one_out", "bigbench/causal_judgment", "cos_e/v1.0", "cosmos_qa", "dream", "openbookqa", "qasc", "quartz", "quail", "head_qa/en", "sciq", "social_i_qa", "wiki_hop/original", "wiqa", "piqa", "hellaswag", "super_glue/copa", "balanced-copa", "e-CARE", "art", "winogrande/winogrande_xl", "codah/codah", "ai2_arc/ARC-Easy/challenge", "ai2_arc/ARC-Challenge/challenge", "definite_pronoun_resolution", "swag/regular", "math_qa", "glue/cola", "glue/sst2", "utilitarianism", "amazon_counterfactual/en", "insincere-questions", "toxic_conversations", "TuringBench", "trec", "vitaminc", "hope_edi/english", "rumoureval_2019/RumourEval2019", "ethos/binary", "ethos/multilabel", "tweet_eval/offensive", "tweet_eval/sentiment", "tweet_eval/irony", "tweet_eval/hate", "tweet_eval/emotion", "tweet_eval/emoji", "tweet_eval/stance_abortion", "tweet_eval/stance_atheism", "tweet_eval/stance_climate", "tweet_eval/stance_feminist", "tweet_eval/stance_hillary", "discovery/discovery", "pragmeval/squinky-formality", "pragmeval/emobank-arousal", "pragmeval/squinky-implicature", "pragmeval/squinky-informativeness", "pragmeval/switchboard", "pragmeval/verifiability", "pragmeval/mrda", "pragmeval/emobank-valence", "pragmeval/emobank-dominance", "pragmeval/persuasiveness-strength", "pragmeval/persuasiveness-relevance", "pragmeval/pdtb", "pragmeval/gum", "pragmeval/stac", "pragmeval/persuasiveness-specificity", "pragmeval/sarcasm", "pragmeval/persuasiveness-eloquence", "pragmeval/emergent", "pragmeval/persuasiveness-premisetype", "pragmeval/persuasiveness-claimtype", "silicone/meld_e", "silicone/dyda_e", "silicone/iemocap", "silicone/sem", "silicone/meld_s", "silicone/oasis", "silicone/maptask", "silicone/dyda_da", "lex_glue/eurlex", "lex_glue/scotus", "lex_glue/ledgar", "lex_glue/unfair_tos", "lex_glue/case_hold", "language-identification", "imdb", "rotten_tomatoes", "ag_news", "yelp_review_full/yelp_review_full", "financial_phrasebank/sentences_allagree", "poem_sentiment", "dbpedia_14/dbpedia_14", "amazon_polarity/amazon_polarity", "app_reviews", "hate_speech18", "sms_spam", "humicroedit/subtask-1", "humicroedit/subtask-2", "snips_built_in_intents", "hate_speech_offensive", "yahoo_answers_topics", "stackoverflow-questions", "hyperpartisan_news", "sciie", "citation_intent", "go_emotions/simplified", "scicite", "liar", "lexical_relation_classification/CogALexV", "lexical_relation_classification/EVALution", "lexical_relation_classification/K&H+N", "lexical_relation_classification/BLESS", "lexical_relation_classification/ROOT09", "linguisticprobing/obj_number", "linguisticprobing/bigram_shift", "linguisticprobing/subj_number", "linguisticprobing/sentence_length", "linguisticprobing/odd_man_out", "linguisticprobing/tree_depth", "linguisticprobing/top_constituents", "linguisticprobing/coordination_inversion", "linguisticprobing/past_present", "crowdflower/sentiment_nuclear_power", "crowdflower/tweet_global_warming", "crowdflower/political-media-message", "crowdflower/text_emotion", "crowdflower/corporate-messaging", "crowdflower/political-media-audience", "crowdflower/airline-sentiment", "crowdflower/political-media-bias", "crowdflower/economic-news", "ethics/commonsense", "ethics/deontology", "ethics/justice", "ethics/virtue", "emo/emo2019", "google_wellformed_query", "tweets_hate_speech_detection", "has_part", "blog_authorship_corpus/gender", "blog_authorship_corpus/age", "blog_authorship_corpus/job", "open_question_type", "health_fact", "commonsense_qa", "mc_taco", "ade_corpus_v2/Ade_corpus_v2_classification", "discosense", "circa", "phrase_similarity", "scientific-exaggeration-detection", "quarel", "fever-evidence-related", "numer_sense", "dynasent/dynabench.dynasent.r1.all/r1", "dynasent/dynabench.dynasent.r2.all/r2", "Sarcasm_News_Headline", "sem_eval_2010_task_8", "auditor_review", "medmcqa", "Dynasent_Disagreement", "Politeness_Disagreement", "SBIC_Disagreement", "SChem_Disagreement", "Dilemmas_Disagreement", "logiqa", "wiki_qa", "cycic_classification", "cycic_multiplechoice", "sts-companion", "commonsense_qa_2.0", "lingnli", "monotonicity-entailment", "arct", "scinli", "naturallogic", "onestop_qa", "moral_stories/full", "prost", "dynahate", "syntactic-augmentation-nli", "autotnli", "CONDAQA", "webgpt_comparisons", "synthetic-instruct-gptj-pairwise", "scruples", "wouldyourather", "defeasible-nli/atomic", "defeasible-nli/snli", "help-nli", "nli-veridicality-transitivity", "lonli", "dadc-limit-nli", "FLUTE", "strategy-qa", "summarize_from_feedback/comparisons", "folio", "tomi-nli", "avicenna", "SHP", "MedQA-USMLE-4-options-hf", "wikimedqa/medwiki", "cicero", "CREAK", "mutual", "NeQA", "quote-repetition", "redefine-math", "puzzte", "implicatures", "race/middle", "race/high", "race-c", "spartqa-yn", "spartqa-mchoice", "temporal-nli", "riddle_sense", "clcd-english", "twentyquestions", "reclor", "counterfactually-augmented-imdb", "counterfactually-augmented-snli", "cnli", "boolq-natural-perturbations", "acceptability-prediction", "equate", "ScienceQA_text_only", "ekar_english", "implicit-hate-stg1", "chaos-mnli-ambiguity", "headline_cause/en_simple", "logiqa-2.0-nli", "oasst2_dense_flat/quality", "oasst2_dense_flat/toxicity", "oasst2_dense_flat/helpfulness", "mindgames", "ambient", "path-naturalness-prediction", "civil_comments/toxicity", "civil_comments/severe_toxicity", "civil_comments/obscene", "civil_comments/threat", "civil_comments/insult", "civil_comments/identity_attack", "civil_comments/sexual_explicit", "cloth", "dgen", "I2D2", "args_me", "Touche23-ValueEval", "starcon", "banking77", "ConTRoL-nli", "tracie", "sherliic", "sen-making/1", "sen-making/2", "winowhy", "robustLR", "v1/gen_train234_test2to10", "logical-fallacy", "parade", "cladder", "subjectivity", "MOH", "VUAC", "sharc_modified/mod", "conceptrules_v2", "disrpt/eng.dep.scidtb.rels", "zero-shot-label-nli", "com2sense", "scone", "winodict", "fool-me-twice", "monli", "corr2cause", "lsat_qa/all", "apt", "twitter-financial-news-sentiment", "icl-symbol-tuning-instruct", "SpaceNLI", "propsegment/nli", "HatemojiBuild", "regset", "esci", "chatbot_arena_conversations", "dnd_style_intents", "FLD.v2/default", "FLD.v2/star", "SDOH-NLI", "scifact_entailment", "feasibilityQA", "simple_pair", "AdjectiveScaleProbe-nli", "resnli", "SpaRTUN", "ReSQ", "semantic_fragments_nli", "dataset_train_nli", "stepgame", "nlgraph", "oasst2_pairwise_rlhf_reward", "hh-rlhf/helpful-base", "hh-rlhf/helpful-online", "hh-rlhf/helpful-rejection-sampled", "hh-rlhf/harmless-base", "ruletaker", "PARARULE-Plus", "proofwriter", "logical-entailment", "nope", "LogicNLI", "contract-nli/contractnli_a/seg", "contract-nli/contractnli_b/full", "nli4ct_semeval2024", "lsat-ar", "lsat-rc", "biosift-nli", "brainteasers/WP", "brainteasers/SP", "persuasion", "AmbigNQ-clarifying-question", "SIGA-nli", "FOL-nli", "goal-step-wikihow/order", "PARADISE", "doc-nli", "mctest-nli", "patent-phrase-similarity", "natural-language-satisfiability", "idioms-nli", "lifecycle-entailment", "HelpSteer/helpfulness", "HelpSteer/correctness", "HelpSteer/coherence", "HelpSteer/complexity", "HelpSteer/verbosity", "HelpSteer2/helpfulness", "HelpSteer2/correctness", "HelpSteer2/coherence", "HelpSteer2/complexity", "HelpSteer2/verbosity", "MSciNLI", "UltraFeedback-paired", "AES2-essay-scoring", "english-grading/cohesion", "english-grading/syntax", "english-grading/vocabulary", "english-grading/phraseology", "english-grading/grammar", "english-grading/conventions", "wice", "hover", "tasksource_dpo_pairs", "seahorse_summarization_evaluation", "missing-item-prediction/contrastive", "babi_nli", "gen_debiased_nli", "imppres/presupposition", "/prag", "blimp-2" ], "torch_dtype": "bfloat16", "transformers_version": "4.46.2", "type_vocab_size": 0, "vocab_size": 128100 }