Add original model
Browse files- classes.dict +3 -0
- config.json +39 -0
- pytorch_model.bin +3 -0
- rubert_sentiment.json +116 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.txt +0 -0
classes.dict
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
1 90717
|
2 |
+
2 49736
|
3 |
+
0 49438
|
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "../huggingface/rubert-base-cased-sentiment",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"directionality": "bidi",
|
8 |
+
"gradient_checkpointing": false,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"id2label": {
|
13 |
+
"0": "NEUTRAL",
|
14 |
+
"1": "POSITIVE",
|
15 |
+
"2": "NEGATIVE"
|
16 |
+
},
|
17 |
+
"initializer_range": 0.02,
|
18 |
+
"intermediate_size": 3072,
|
19 |
+
"label2id": {
|
20 |
+
"NEGATIVE": 2,
|
21 |
+
"NEUTRAL": 0,
|
22 |
+
"POSITIVE": 1
|
23 |
+
},
|
24 |
+
"layer_norm_eps": 1e-12,
|
25 |
+
"max_position_embeddings": 512,
|
26 |
+
"model_type": "bert",
|
27 |
+
"num_attention_heads": 12,
|
28 |
+
"num_hidden_layers": 12,
|
29 |
+
"output_past": true,
|
30 |
+
"pad_token_id": 0,
|
31 |
+
"pooler_fc_size": 768,
|
32 |
+
"pooler_num_attention_heads": 12,
|
33 |
+
"pooler_num_fc_layers": 3,
|
34 |
+
"pooler_size_per_head": 128,
|
35 |
+
"pooler_type": "first_token_transform",
|
36 |
+
"return_dict": true,
|
37 |
+
"type_vocab_size": 2,
|
38 |
+
"vocab_size": 119547
|
39 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17c5e6f11e5672c158b12ed629edb94a2d5adfb0c0eacf55c21d250c7381dac1
|
3 |
+
size 711509513
|
rubert_sentiment.json
ADDED
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"dataset_reader": {
|
3 |
+
"class_name": "basic_classification_reader",
|
4 |
+
"x": "text",
|
5 |
+
"y": "sentiment",
|
6 |
+
"data_path": "/content/drive/MyDrive/BERT/train/",
|
7 |
+
"train": "train.csv",
|
8 |
+
"valid": "valid.csv"
|
9 |
+
},
|
10 |
+
"dataset_iterator": {
|
11 |
+
"class_name": "basic_classification_iterator",
|
12 |
+
"seed": 42
|
13 |
+
},
|
14 |
+
"chainer": {
|
15 |
+
"in": [
|
16 |
+
"x"
|
17 |
+
],
|
18 |
+
"in_y": [
|
19 |
+
"y"
|
20 |
+
],
|
21 |
+
"pipe": [
|
22 |
+
{
|
23 |
+
"id": "classes_vocab",
|
24 |
+
"class_name": "simple_vocab",
|
25 |
+
"fit_on": [
|
26 |
+
"y"
|
27 |
+
],
|
28 |
+
"save_path": "/content/drive/MyDrive/BERT/sentiment_bert_model/classes.dict",
|
29 |
+
"load_path": "/content/drive/MyDrive/BERT/sentiment_bert_model/classes.dict",
|
30 |
+
"in": "y",
|
31 |
+
"out": "y_ids"
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"class_name": "torch_transformers_preprocessor",
|
35 |
+
"vocab_file": "/content/drive/MyDrive/BERT/rubert-base-cased-sentiment/",
|
36 |
+
"do_lower_case": true,
|
37 |
+
"max_seq_length": 512,
|
38 |
+
"in": [
|
39 |
+
"x"
|
40 |
+
],
|
41 |
+
"out": [
|
42 |
+
"bert_features"
|
43 |
+
]
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"in": "y_ids",
|
47 |
+
"out": "y_onehot",
|
48 |
+
"class_name": "one_hotter",
|
49 |
+
"depth": "#classes_vocab.len",
|
50 |
+
"single_vector": true
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"class_name": "torch_transformers_classifier",
|
54 |
+
"n_classes": 3,
|
55 |
+
"return_probas": true,
|
56 |
+
"pretrained_bert": "/content/drive/MyDrive/BERT/rubert-base-cased-sentiment/",
|
57 |
+
"save_path": "/content/drive/MyDrive/BERT/sentiment_bert_model/model",
|
58 |
+
"load_path": "/content/drive/MyDrive/BERT/sentiment_bert_model/model",
|
59 |
+
"optimizer": "AdamW",
|
60 |
+
"optimizer_parameters": {
|
61 |
+
"lr": 1e-05
|
62 |
+
},
|
63 |
+
"learning_rate_drop_patience": 5,
|
64 |
+
"learning_rate_drop_div": 2.0,
|
65 |
+
"in": [
|
66 |
+
"bert_features"
|
67 |
+
],
|
68 |
+
"in_y": [
|
69 |
+
"y_ids"
|
70 |
+
],
|
71 |
+
"out": [
|
72 |
+
"y_pred_probas"
|
73 |
+
]
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"in": "y_pred_probas",
|
77 |
+
"out": "y_pred_ids",
|
78 |
+
"class_name": "proba2labels",
|
79 |
+
"max_proba": true
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"in": "y_pred_ids",
|
83 |
+
"out": "y_pred_labels",
|
84 |
+
"ref": "classes_vocab"
|
85 |
+
}
|
86 |
+
],
|
87 |
+
"out": [
|
88 |
+
"y_pred_labels"
|
89 |
+
]
|
90 |
+
},
|
91 |
+
"train": {
|
92 |
+
"epochs": 5,
|
93 |
+
"batch_size": 8,
|
94 |
+
"metrics": [
|
95 |
+
"accuracy",
|
96 |
+
"f1_macro",
|
97 |
+
"f1_weighted",
|
98 |
+
{
|
99 |
+
"name": "roc_auc",
|
100 |
+
"inputs": [
|
101 |
+
"y_onehot",
|
102 |
+
"y_pred_probas"
|
103 |
+
]
|
104 |
+
}
|
105 |
+
],
|
106 |
+
"validation_patience": 2,
|
107 |
+
"val_every_n_epochs": 1,
|
108 |
+
"log_every_n_epochs": 1,
|
109 |
+
"show_examples": false,
|
110 |
+
"evaluation_targets": [
|
111 |
+
"train",
|
112 |
+
"valid"
|
113 |
+
],
|
114 |
+
"class_name": "nn_trainer"
|
115 |
+
}
|
116 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": "/home/igor/.cache/torch/transformers/1f428acdde727eed5de979d6856ce350a470be2a64e134a1fdae04af78a27301.dd8bd9bfd3664b530ea4e645105f557769387b3da9f79bdb55ed556bdd80611d", "name_or_path": "DeepPavlov/rubert-base-cased-conversational", "do_basic_tokenize": true, "never_split": null}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|