sileod commited on
Commit
9f72a81
1 Parent(s): ee3f917

Upload DebertaV2ForSequenceClassification

Browse files
Files changed (2) hide show
  1. config.json +146 -142
  2. model.safetensors +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "sileod/deberta-v3-base-tasksource-nli",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
@@ -27,13 +27,13 @@
27
  2,
28
  2,
29
  3,
 
30
  2,
31
  2,
32
  2,
33
  2,
34
  2,
35
  2,
36
- 6,
37
  2,
38
  2,
39
  2,
@@ -213,46 +213,46 @@
213
  4,
214
  2,
215
  8,
216
- 20,
217
  2,
218
  2,
219
- 4,
220
  2,
 
221
  3,
 
222
  3,
223
  3,
224
  3,
225
  3,
226
  3,
227
  174,
228
- 2,
229
- 3,
230
- 51,
231
  41,
 
232
  2,
 
233
  2,
234
  2,
235
  2,
236
  2,
237
  2,
238
- 2,
239
- 18,
240
  3,
241
  16,
242
  17,
243
- 3,
244
  2,
245
  8,
246
  2,
247
- 2,
248
  3,
 
 
 
 
249
  3,
250
- 42,
251
- 4,
252
  7,
253
- 11,
254
  12,
 
255
  7,
 
 
 
256
  100,
257
  13,
258
  100,
@@ -283,27 +283,27 @@
283
  3,
284
  6,
285
  6,
286
- 7,
287
  5,
 
288
  4,
289
  3,
290
- 7,
291
  20,
292
  2,
 
293
  2,
294
  2,
295
  2,
296
  2,
297
- 6,
298
  2,
299
- 9,
300
- 4,
301
  2,
302
- 4,
303
  13,
304
- 3,
305
- 3,
306
  2,
 
 
 
 
307
  2,
308
  2,
309
  2,
@@ -528,6 +528,8 @@
528
  6,
529
  3,
530
  2,
 
 
531
  3,
532
  3,
533
  3,
@@ -549,7 +551,7 @@
549
  "neutral": 1
550
  },
551
  "layer_norm_eps": 1e-07,
552
- "max_position_embeddings": 1024,
553
  "max_relative_positions": -1,
554
  "model_type": "deberta-v2",
555
  "norm_rel_ebd": "layer_norm",
@@ -590,14 +592,14 @@
590
  "scitail/snli_format",
591
  "hans",
592
  "WANLI",
 
593
  "recast/recast_sentiment",
594
- "recast/recast_ner",
595
- "recast/recast_verbcorner",
596
  "recast/recast_verbnet",
 
597
  "recast/recast_factuality",
598
- "recast/recast_puns",
599
- "recast/recast_kg_relations",
600
  "recast/recast_megaveridicality",
 
601
  "probability_words_nli/reasoning_1hop",
602
  "probability_words_nli/reasoning_2hop",
603
  "probability_words_nli/usnli",
@@ -624,13 +626,13 @@
624
  "robust_nli_is_sd",
625
  "robust_nli_li_ts",
626
  "add_one_rte",
627
- "imppres/implicature_connectives/log",
628
- "imppres/implicature_gradable_adjective/log",
629
- "imppres/implicature_quantifiers/log",
630
  "imppres/implicature_numerals_10_100/log",
631
  "imppres/implicature_modals/log",
632
  "imppres/implicature_gradable_verb/log",
 
 
633
  "imppres/implicature_numerals_2_3/log",
 
634
  "hlgd",
635
  "paws/labeled_final",
636
  "paws/labeled_swap",
@@ -638,106 +640,106 @@
638
  "model-written-evals",
639
  "truthful_qa/multiple_choice",
640
  "fig-qa",
641
- "bigbench/general_knowledge",
642
- "bigbench/physical_intuition",
643
- "bigbench/bbq_lite_json",
644
- "bigbench/elementary_math_qa",
645
- "bigbench/human_organs_senses",
 
 
 
646
  "bigbench/misconceptions",
 
 
 
 
 
 
 
 
 
 
647
  "bigbench/social_iqa",
648
- "bigbench/causal_judgment",
 
 
 
 
 
 
 
 
 
 
 
649
  "bigbench/physics",
650
- "bigbench/identify_math_theorems",
651
- "bigbench/fact_checker",
652
- "bigbench/empirical_judgments",
 
653
  "bigbench/key_value_maps",
654
- "bigbench/hhh_alignment",
655
- "bigbench/logical_deduction",
656
- "bigbench/similarities_abstraction",
657
- "bigbench/color",
658
- "bigbench/evaluating_information_essentiality",
659
- "bigbench/temporal_sequences",
660
- "bigbench/authorship_verification",
661
- "bigbench/conceptual_combinations",
662
- "bigbench/gre_reading_comprehension",
663
- "bigbench/symbol_interpretation",
664
  "bigbench/undo_permutation",
665
- "bigbench/snarks",
666
- "bigbench/mnist_ascii",
667
- "bigbench/arithmetic",
668
  "bigbench/nonsense_words_grammar",
669
  "bigbench/logical_sequence",
 
 
 
 
 
 
 
 
670
  "bigbench/movie_recommendation",
671
- "bigbench/cs_algorithms",
672
- "bigbench/anachronisms",
673
- "bigbench/known_unknowns",
674
  "bigbench/intent_recognition",
675
- "bigbench/entailed_polarity",
676
- "bigbench/tracking_shuffled_objects",
 
677
  "bigbench/salient_translation_error_detection",
 
 
 
 
678
  "bigbench/implicatures",
679
- "bigbench/international_phonetic_alphabet_nli",
680
- "bigbench/mathematical_induction",
681
- "bigbench/irony_identification",
682
- "bigbench/movie_dialog_same_or_different",
683
- "bigbench/moral_permissibility",
684
- "bigbench/goal_step_wikihow",
685
- "bigbench/fantasy_reasoning",
686
- "bigbench/timedial",
687
- "bigbench/figure_of_speech_detection",
688
  "bigbench/checkmate_in_one",
689
- "bigbench/winowhy",
690
- "bigbench/dyck_languages",
691
- "bigbench/social_support",
692
- "bigbench/unit_interpretation",
693
- "bigbench/english_proverbs",
694
  "bigbench/ruin_names",
695
- "bigbench/crash_blossom",
 
 
 
 
 
 
 
696
  "bigbench/emojis_emotion_prediction",
697
- "bigbench/understanding_fables",
698
- "bigbench/implicit_relations",
699
- "bigbench/cifar10_classification",
700
- "bigbench/analytic_entailment",
701
- "bigbench/vitaminc_fact_verification",
702
- "bigbench/phrase_relatedness",
703
- "bigbench/epistemic_reasoning",
704
- "bigbench/penguins_in_a_table",
705
- "bigbench/question_selection",
706
- "bigbench/reasoning_about_colored_objects",
707
- "bigbench/strategyqa",
708
  "bigbench/dark_humor_detection",
709
- "bigbench/formal_fallacies_syllogisms_negation",
710
- "bigbench/odd_one_out",
711
- "bigbench/novel_concepts",
712
- "bigbench/hyperbaton",
713
- "bigbench/suicide_risk",
714
- "bigbench/navigate",
715
  "bigbench/crass_ai",
716
- "bigbench/hindu_knowledge",
717
- "bigbench/disambiguation_qa",
718
- "bigbench/riddle_sense",
719
- "bigbench/presuppositions_as_nli",
720
- "bigbench/strange_stories",
721
- "bigbench/emoji_movie",
722
- "bigbench/metaphor_understanding",
723
  "bigbench/logical_args",
724
- "bigbench/identify_odd_metaphor",
725
- "bigbench/logical_fallacy_detection",
726
- "bigbench/code_line_description",
727
- "bigbench/geometric_shapes",
728
- "bigbench/discourse_marker_prediction",
729
- "bigbench/sentence_ambiguity",
730
- "bigbench/abstract_narrative_understanding",
731
  "bigbench/analogical_similarity",
732
- "bigbench/cause_and_effect",
733
- "bigbench/simple_ethical_questions",
734
- "bigbench/sports_understanding",
735
- "bigbench/date_understanding",
736
- "bigbench/metaphor_boolean",
737
- "bigbench/logic_grid_puzzle",
738
- "bigbench/contextual_parametric_knowledge_conflicts",
739
- "bigbench/real_or_fake_text",
740
- "bigbench/play_dialog_same_or_different",
741
  "cos_e/v1.0",
742
  "cosmos_qa",
743
  "dream",
@@ -776,46 +778,46 @@
776
  "rumoureval_2019/RumourEval2019",
777
  "ethos/binary",
778
  "ethos/multilabel",
779
- "tweet_eval/emoji",
780
- "tweet_eval/hate",
781
  "tweet_eval/offensive",
 
782
  "tweet_eval/emotion",
783
- "tweet_eval/irony",
784
  "tweet_eval/sentiment",
 
785
  "tweet_eval/stance_abortion",
786
  "tweet_eval/stance_atheism",
787
  "tweet_eval/stance_climate",
788
  "tweet_eval/stance_feminist",
789
  "tweet_eval/stance_hillary",
790
  "discovery/discovery",
 
 
791
  "pragmeval/squinky-informativeness",
792
  "pragmeval/verifiability",
793
- "pragmeval/mrda",
794
- "pragmeval/switchboard",
795
- "pragmeval/emobank-arousal",
796
  "pragmeval/emobank-valence",
797
  "pragmeval/emobank-dominance",
798
- "pragmeval/squinky-formality",
799
- "pragmeval/squinky-implicature",
800
- "pragmeval/sarcasm",
801
- "pragmeval/persuasiveness-specificity",
802
- "pragmeval/stac",
803
  "pragmeval/emergent",
804
  "pragmeval/pdtb",
805
  "pragmeval/gum",
806
- "pragmeval/persuasiveness-claimtype",
807
- "pragmeval/persuasiveness-strength",
808
  "pragmeval/persuasiveness-premisetype",
809
  "pragmeval/persuasiveness-relevance",
810
- "pragmeval/persuasiveness-eloquence",
 
 
 
 
811
  "silicone/sem",
812
- "silicone/meld_s",
813
- "silicone/oasis",
814
  "silicone/dyda_da",
815
  "silicone/dyda_e",
 
 
816
  "silicone/iemocap",
817
- "silicone/maptask",
818
- "silicone/meld_e",
819
  "lex_glue/eurlex",
820
  "lex_glue/scotus",
821
  "lex_glue/ledgar",
@@ -846,28 +848,28 @@
846
  "scicite",
847
  "liar",
848
  "lexical_relation_classification/BLESS",
849
- "lexical_relation_classification/EVALution",
850
  "lexical_relation_classification/CogALexV",
 
851
  "lexical_relation_classification/K&H+N",
852
  "lexical_relation_classification/ROOT09",
853
- "linguisticprobing/tree_depth",
854
  "linguisticprobing/top_constituents",
 
 
855
  "linguisticprobing/subj_number",
 
 
856
  "linguisticprobing/bigram_shift",
857
  "linguisticprobing/odd_man_out",
858
- "linguisticprobing/coordination_inversion",
859
- "linguisticprobing/past_present",
860
- "linguisticprobing/sentence_length",
861
- "linguisticprobing/obj_number",
862
- "crowdflower/political-media-message",
863
- "crowdflower/corporate-messaging",
864
- "crowdflower/tweet_global_warming",
865
- "crowdflower/sentiment_nuclear_power",
866
- "crowdflower/text_emotion",
867
- "crowdflower/airline-sentiment",
868
  "crowdflower/economic-news",
869
  "crowdflower/political-media-bias",
 
870
  "crowdflower/political-media-audience",
 
 
 
 
 
871
  "ethics/commonsense",
872
  "ethics/deontology",
873
  "ethics/justice",
@@ -1090,6 +1092,8 @@
1090
  "english-grading/grammar",
1091
  "english-grading/conventions",
1092
  "wice",
 
 
1093
  "babi_nli",
1094
  "gen_debiased_nli",
1095
  "imppres/presupposition",
 
1
  {
2
+ "_name_or_path": "microsoft/deberta-v3-base",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
 
27
  2,
28
  2,
29
  3,
30
+ 6,
31
  2,
32
  2,
33
  2,
34
  2,
35
  2,
36
  2,
 
37
  2,
38
  2,
39
  2,
 
213
  4,
214
  2,
215
  8,
 
216
  2,
217
  2,
 
218
  2,
219
+ 4,
220
  3,
221
+ 20,
222
  3,
223
  3,
224
  3,
225
  3,
226
  3,
227
  174,
 
 
 
228
  41,
229
+ 51,
230
  2,
231
+ 3,
232
  2,
233
  2,
234
  2,
235
  2,
236
  2,
 
 
237
  3,
238
  16,
239
  17,
 
240
  2,
241
  8,
242
  2,
 
243
  3,
244
+ 2,
245
+ 2,
246
+ 18,
247
+ 2,
248
  3,
 
 
249
  7,
 
250
  12,
251
+ 4,
252
  7,
253
+ 42,
254
+ 3,
255
+ 11,
256
  100,
257
  13,
258
  100,
 
283
  3,
284
  6,
285
  6,
 
286
  5,
287
+ 7,
288
  4,
289
  3,
290
+ 6,
291
  20,
292
  2,
293
+ 7,
294
  2,
295
  2,
296
  2,
297
  2,
 
298
  2,
299
+ 3,
 
300
  2,
 
301
  13,
 
 
302
  2,
303
+ 4,
304
+ 9,
305
+ 4,
306
+ 3,
307
  2,
308
  2,
309
  2,
 
528
  6,
529
  3,
530
  2,
531
+ 1,
532
+ 2,
533
  3,
534
  3,
535
  3,
 
551
  "neutral": 1
552
  },
553
  "layer_norm_eps": 1e-07,
554
+ "max_position_embeddings": 1280,
555
  "max_relative_positions": -1,
556
  "model_type": "deberta-v2",
557
  "norm_rel_ebd": "layer_norm",
 
592
  "scitail/snli_format",
593
  "hans",
594
  "WANLI",
595
+ "recast/recast_kg_relations",
596
  "recast/recast_sentiment",
597
+ "recast/recast_puns",
 
598
  "recast/recast_verbnet",
599
+ "recast/recast_verbcorner",
600
  "recast/recast_factuality",
 
 
601
  "recast/recast_megaveridicality",
602
+ "recast/recast_ner",
603
  "probability_words_nli/reasoning_1hop",
604
  "probability_words_nli/reasoning_2hop",
605
  "probability_words_nli/usnli",
 
626
  "robust_nli_is_sd",
627
  "robust_nli_li_ts",
628
  "add_one_rte",
 
 
 
629
  "imppres/implicature_numerals_10_100/log",
630
  "imppres/implicature_modals/log",
631
  "imppres/implicature_gradable_verb/log",
632
+ "imppres/implicature_connectives/log",
633
+ "imppres/implicature_gradable_adjective/log",
634
  "imppres/implicature_numerals_2_3/log",
635
+ "imppres/implicature_quantifiers/log",
636
  "hlgd",
637
  "paws/labeled_final",
638
  "paws/labeled_swap",
 
640
  "model-written-evals",
641
  "truthful_qa/multiple_choice",
642
  "fig-qa",
643
+ "bigbench/known_unknowns",
644
+ "bigbench/formal_fallacies_syllogisms_negation",
645
+ "bigbench/presuppositions_as_nli",
646
+ "bigbench/metaphor_understanding",
647
+ "bigbench/cause_and_effect",
648
+ "bigbench/simple_ethical_questions",
649
+ "bigbench/color",
650
+ "bigbench/similarities_abstraction",
651
  "bigbench/misconceptions",
652
+ "bigbench/english_proverbs",
653
+ "bigbench/sports_understanding",
654
+ "bigbench/metaphor_boolean",
655
+ "bigbench/logical_fallacy_detection",
656
+ "bigbench/logic_grid_puzzle",
657
+ "bigbench/empirical_judgments",
658
+ "bigbench/mnist_ascii",
659
+ "bigbench/entailed_polarity",
660
+ "bigbench/elementary_math_qa",
661
+ "bigbench/gre_reading_comprehension",
662
  "bigbench/social_iqa",
663
+ "bigbench/figure_of_speech_detection",
664
+ "bigbench/timedial",
665
+ "bigbench/moral_permissibility",
666
+ "bigbench/evaluating_information_essentiality",
667
+ "bigbench/implicit_relations",
668
+ "bigbench/real_or_fake_text",
669
+ "bigbench/hyperbaton",
670
+ "bigbench/odd_one_out",
671
+ "bigbench/play_dialog_same_or_different",
672
+ "bigbench/vitaminc_fact_verification",
673
+ "bigbench/goal_step_wikihow",
674
+ "bigbench/unit_interpretation",
675
  "bigbench/physics",
676
+ "bigbench/code_line_description",
677
+ "bigbench/phrase_relatedness",
678
+ "bigbench/general_knowledge",
679
+ "bigbench/identify_odd_metaphor",
680
  "bigbench/key_value_maps",
681
+ "bigbench/sentence_ambiguity",
682
+ "bigbench/disambiguation_qa",
683
+ "bigbench/movie_dialog_same_or_different",
684
+ "bigbench/discourse_marker_prediction",
685
+ "bigbench/social_support",
686
+ "bigbench/anachronisms",
 
 
 
 
687
  "bigbench/undo_permutation",
688
+ "bigbench/irony_identification",
 
 
689
  "bigbench/nonsense_words_grammar",
690
  "bigbench/logical_sequence",
691
+ "bigbench/reasoning_about_colored_objects",
692
+ "bigbench/causal_judgment",
693
+ "bigbench/geometric_shapes",
694
+ "bigbench/logical_deduction",
695
+ "bigbench/question_selection",
696
+ "bigbench/hhh_alignment",
697
+ "bigbench/analytic_entailment",
698
+ "bigbench/riddle_sense",
699
  "bigbench/movie_recommendation",
700
+ "bigbench/penguins_in_a_table",
 
 
701
  "bigbench/intent_recognition",
702
+ "bigbench/conceptual_combinations",
703
+ "bigbench/cs_algorithms",
704
+ "bigbench/temporal_sequences",
705
  "bigbench/salient_translation_error_detection",
706
+ "bigbench/abstract_narrative_understanding",
707
+ "bigbench/authorship_verification",
708
+ "bigbench/winowhy",
709
+ "bigbench/strategyqa",
710
  "bigbench/implicatures",
711
+ "bigbench/snarks",
712
+ "bigbench/human_organs_senses",
713
+ "bigbench/arithmetic",
714
+ "bigbench/tracking_shuffled_objects",
715
+ "bigbench/date_understanding",
716
+ "bigbench/understanding_fables",
 
 
 
717
  "bigbench/checkmate_in_one",
718
+ "bigbench/cifar10_classification",
719
+ "bigbench/navigate",
 
 
 
720
  "bigbench/ruin_names",
721
+ "bigbench/bbq_lite_json",
722
+ "bigbench/suicide_risk",
723
+ "bigbench/dyck_languages",
724
+ "bigbench/emoji_movie",
725
+ "bigbench/contextual_parametric_knowledge_conflicts",
726
+ "bigbench/novel_concepts",
727
+ "bigbench/fantasy_reasoning",
728
+ "bigbench/mathematical_induction",
729
  "bigbench/emojis_emotion_prediction",
730
+ "bigbench/symbol_interpretation",
731
+ "bigbench/strange_stories",
732
+ "bigbench/identify_math_theorems",
 
 
 
 
 
 
 
 
733
  "bigbench/dark_humor_detection",
 
 
 
 
 
 
734
  "bigbench/crass_ai",
735
+ "bigbench/international_phonetic_alphabet_nli",
 
 
 
 
 
 
736
  "bigbench/logical_args",
737
+ "bigbench/epistemic_reasoning",
738
+ "bigbench/hindu_knowledge",
739
+ "bigbench/physical_intuition",
740
+ "bigbench/crash_blossom",
 
 
 
741
  "bigbench/analogical_similarity",
742
+ "bigbench/fact_checker",
 
 
 
 
 
 
 
 
743
  "cos_e/v1.0",
744
  "cosmos_qa",
745
  "dream",
 
778
  "rumoureval_2019/RumourEval2019",
779
  "ethos/binary",
780
  "ethos/multilabel",
781
+ "tweet_eval/irony",
 
782
  "tweet_eval/offensive",
783
+ "tweet_eval/hate",
784
  "tweet_eval/emotion",
 
785
  "tweet_eval/sentiment",
786
+ "tweet_eval/emoji",
787
  "tweet_eval/stance_abortion",
788
  "tweet_eval/stance_atheism",
789
  "tweet_eval/stance_climate",
790
  "tweet_eval/stance_feminist",
791
  "tweet_eval/stance_hillary",
792
  "discovery/discovery",
793
+ "pragmeval/switchboard",
794
+ "pragmeval/mrda",
795
  "pragmeval/squinky-informativeness",
796
  "pragmeval/verifiability",
797
+ "pragmeval/squinky-implicature",
798
+ "pragmeval/squinky-formality",
 
799
  "pragmeval/emobank-valence",
800
  "pragmeval/emobank-dominance",
801
+ "pragmeval/emobank-arousal",
 
 
 
 
802
  "pragmeval/emergent",
803
  "pragmeval/pdtb",
804
  "pragmeval/gum",
805
+ "pragmeval/persuasiveness-eloquence",
 
806
  "pragmeval/persuasiveness-premisetype",
807
  "pragmeval/persuasiveness-relevance",
808
+ "pragmeval/persuasiveness-claimtype",
809
+ "pragmeval/sarcasm",
810
+ "pragmeval/persuasiveness-strength",
811
+ "pragmeval/stac",
812
+ "pragmeval/persuasiveness-specificity",
813
  "silicone/sem",
814
+ "silicone/meld_e",
815
+ "silicone/maptask",
816
  "silicone/dyda_da",
817
  "silicone/dyda_e",
818
+ "silicone/oasis",
819
+ "silicone/meld_s",
820
  "silicone/iemocap",
 
 
821
  "lex_glue/eurlex",
822
  "lex_glue/scotus",
823
  "lex_glue/ledgar",
 
848
  "scicite",
849
  "liar",
850
  "lexical_relation_classification/BLESS",
 
851
  "lexical_relation_classification/CogALexV",
852
+ "lexical_relation_classification/EVALution",
853
  "lexical_relation_classification/K&H+N",
854
  "lexical_relation_classification/ROOT09",
855
+ "linguisticprobing/sentence_length",
856
  "linguisticprobing/top_constituents",
857
+ "linguisticprobing/coordination_inversion",
858
+ "linguisticprobing/tree_depth",
859
  "linguisticprobing/subj_number",
860
+ "linguisticprobing/obj_number",
861
+ "linguisticprobing/past_present",
862
  "linguisticprobing/bigram_shift",
863
  "linguisticprobing/odd_man_out",
 
 
 
 
 
 
 
 
 
 
864
  "crowdflower/economic-news",
865
  "crowdflower/political-media-bias",
866
+ "crowdflower/text_emotion",
867
  "crowdflower/political-media-audience",
868
+ "crowdflower/sentiment_nuclear_power",
869
+ "crowdflower/political-media-message",
870
+ "crowdflower/corporate-messaging",
871
+ "crowdflower/airline-sentiment",
872
+ "crowdflower/tweet_global_warming",
873
  "ethics/commonsense",
874
  "ethics/deontology",
875
  "ethics/justice",
 
1092
  "english-grading/grammar",
1093
  "english-grading/conventions",
1094
  "wice",
1095
+ "hover",
1096
+ "tasksource_dpo_pairs",
1097
  "babi_nli",
1098
  "gen_debiased_nli",
1099
  "imppres/presupposition",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00dfda6b5886f4aca861bc5234850d74358a3e8dcbe78c3d55038dded6fd5c17
3
  size 737722356
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae1e040afa694928b43261010315a2ccbe630744935c65fba2eb308d2daa26e
3
  size 737722356