|
{ |
|
"_name_or_path": "microsoft/mdeberta-v3-base", |
|
"architectures": [ |
|
"DebertaV2ForSequenceClassification" |
|
], |
|
"attention_probs_dropout_prob": 0.1, |
|
"classifiers_size": [ |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
2, |
|
11, |
|
15, |
|
31, |
|
12, |
|
31, |
|
2, |
|
2, |
|
3, |
|
5, |
|
6, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
3, |
|
2, |
|
2, |
|
2, |
|
2, |
|
6, |
|
6, |
|
6, |
|
69, |
|
2, |
|
1, |
|
8, |
|
10, |
|
2, |
|
2, |
|
5, |
|
2, |
|
2, |
|
2, |
|
2, |
|
1, |
|
1, |
|
1, |
|
20, |
|
235, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
1, |
|
9, |
|
17 |
|
], |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.1, |
|
"hidden_size": 768, |
|
"id2label": { |
|
"0": "entailment", |
|
"1": "neutral", |
|
"2": "contradiction" |
|
}, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"label2id": { |
|
"contradiction": 2, |
|
"entailment": 0, |
|
"neutral": 1 |
|
}, |
|
"layer_norm_eps": 1e-07, |
|
"max_position_embeddings": 512, |
|
"max_relative_positions": -1, |
|
"model_type": "deberta-v2", |
|
"norm_rel_ebd": "layer_norm", |
|
"num_attention_heads": 12, |
|
"num_hidden_layers": 12, |
|
"pad_token_id": 0, |
|
"pooler_dropout": 0, |
|
"pooler_hidden_act": "gelu", |
|
"pooler_hidden_size": 768, |
|
"pos_att_type": [ |
|
"p2c", |
|
"c2p" |
|
], |
|
"position_biased_input": false, |
|
"position_buckets": 256, |
|
"relative_attention": true, |
|
"share_att_key": true, |
|
"tasks": [ |
|
"xnli/ur", |
|
"xnli/ar", |
|
"xnli/bg", |
|
"xnli/de", |
|
"xnli/el", |
|
"xnli/zh", |
|
"xnli/vi", |
|
"xnli/tr", |
|
"xnli/th", |
|
"xnli/sw", |
|
"xnli/en", |
|
"xnli/ru", |
|
"xnli/es", |
|
"xnli/fr", |
|
"xnli/hi", |
|
"americas_nli/all_languages", |
|
"multilingual-NLI-26lang-2mil7", |
|
"stsb_multi_mt/pl", |
|
"stsb_multi_mt/it", |
|
"stsb_multi_mt/fr", |
|
"stsb_multi_mt/nl", |
|
"stsb_multi_mt/es", |
|
"stsb_multi_mt/de", |
|
"stsb_multi_mt/en", |
|
"stsb_multi_mt/pt", |
|
"stsb_multi_mt/ru", |
|
"stsb_multi_mt/zh", |
|
"paws-x/zh", |
|
"paws-x/ko", |
|
"paws-x/fr", |
|
"paws-x/es", |
|
"paws-x/de", |
|
"paws-x/en", |
|
"paws-x/ja", |
|
"miam/dihana", |
|
"miam/ilisten", |
|
"miam/loria", |
|
"miam/maptask", |
|
"miam/vm2", |
|
"x-stance/de", |
|
"x-stance/fr", |
|
"multilingual-sentiments/all", |
|
"universal-joy", |
|
"amazon_reviews_multi/all_languages", |
|
"tweet_sentiment_multilingual/all", |
|
"tweet_sentiment_multilingual/portuguese", |
|
"tweet_sentiment_multilingual/italian", |
|
"tweet_sentiment_multilingual/spanish", |
|
"tweet_sentiment_multilingual/german", |
|
"tweet_sentiment_multilingual/french", |
|
"tweet_sentiment_multilingual/english", |
|
"tweet_sentiment_multilingual/arabic", |
|
"tweet_sentiment_multilingual/hindi", |
|
"offenseval_2020/tr", |
|
"offenseval_2020/ar", |
|
"offenseval_2020/da", |
|
"offenseval_2020/gr", |
|
"offenseval_dravidian/tamil", |
|
"offenseval_dravidian/malayalam", |
|
"offenseval_dravidian/kannada", |
|
"MLMA_hate_speech", |
|
"xglue/qam", |
|
"xsum_factuality", |
|
"x-fact", |
|
"xglue/nc", |
|
"xglue/qadsm", |
|
"xglue/qam", |
|
"xglue/wpr", |
|
"xlwic/xlwic_en_ko", |
|
"xlwic/xlwic_fr_fr", |
|
"xlwic/xlwic_it_it", |
|
"xlwic/xlwic_de_de", |
|
"oasst1_dense_flat/quality", |
|
"oasst1_dense_flat/toxicity", |
|
"oasst1_dense_flat/helpfulness", |
|
"language-identification", |
|
"wili_2018", |
|
"exams/multilingual", |
|
"xcsr/X-CSQA-es", |
|
"xcsr/X-CSQA-en", |
|
"xcsr/X-CODAH-ur", |
|
"xcsr/X-CODAH-sw", |
|
"xcsr/X-CODAH-hi", |
|
"xcsr/X-CODAH-vi", |
|
"xcsr/X-CODAH-ar", |
|
"xcsr/X-CODAH-ru", |
|
"xcsr/X-CODAH-pt", |
|
"xcsr/X-CODAH-pl", |
|
"xcsr/X-CODAH-nl", |
|
"xcsr/X-CODAH-jap", |
|
"xcsr/X-CODAH-it", |
|
"xcsr/X-CODAH-fr", |
|
"xcsr/X-CODAH-es", |
|
"xcsr/X-CODAH-de", |
|
"xcsr/X-CODAH-zh", |
|
"xcsr/X-CODAH-en", |
|
"xcsr/X-CSQA-ur", |
|
"xcsr/X-CSQA-sw", |
|
"xcsr/X-CSQA-zh", |
|
"xcsr/X-CSQA-vi", |
|
"xcsr/X-CSQA-ar", |
|
"xcsr/X-CSQA-ru", |
|
"xcsr/X-CSQA-pt", |
|
"xcsr/X-CSQA-pl", |
|
"xcsr/X-CSQA-nl", |
|
"xcsr/X-CSQA-jap", |
|
"xcsr/X-CSQA-it", |
|
"xcsr/X-CSQA-fr", |
|
"xcsr/X-CSQA-de", |
|
"xcsr/X-CSQA-hi", |
|
"xcopa/translation-th", |
|
"xcopa/sw", |
|
"xcopa/et", |
|
"xcopa/ht", |
|
"xcopa/it", |
|
"xcopa/id", |
|
"xcopa/qu", |
|
"xcopa/translation-vi", |
|
"xcopa/zh", |
|
"xcopa/ta", |
|
"xcopa/th", |
|
"xcopa/vi", |
|
"xcopa/tr", |
|
"xcopa/translation-ht", |
|
"xcopa/translation-it", |
|
"xcopa/translation-id", |
|
"xcopa/translation-sw", |
|
"xcopa/translation-zh", |
|
"xcopa/translation-ta", |
|
"xcopa/translation-tr", |
|
"xcopa/translation-et", |
|
"xstory_cloze/ar", |
|
"xstory_cloze/te", |
|
"xstory_cloze/sw", |
|
"xstory_cloze/id", |
|
"xstory_cloze/hi", |
|
"xstory_cloze/es", |
|
"xstory_cloze/my", |
|
"xstory_cloze/ru", |
|
"xstory_cloze/en", |
|
"xstory_cloze/eu", |
|
"xstory_cloze/zh", |
|
"hh-rlhf", |
|
"xglue/ner", |
|
"xglue/pos" |
|
], |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.26.1", |
|
"type_vocab_size": 0, |
|
"vocab_size": 251000 |
|
} |
|
|