RabidUmarell commited on
Commit
03c89fb
1 Parent(s): b5855f0
checkpoint-160/config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "SZTAKI-HLT/hubert-base-cc",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "NEUTRAL",
14
+ "1": "BIT-TOXIC",
15
+ "2": "QUITE-TOXIC",
16
+ "3": "TOXIC",
17
+ "4": "VERY-TOXIC",
18
+ "5": "HIGHLY-TOXIC"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "BIT-TOXIC": 1,
24
+ "HIGHLY-TOXIC": 5,
25
+ "NEUTRAL": 0,
26
+ "QUITE-TOXIC": 2,
27
+ "TOXIC": 3,
28
+ "VERY-TOXIC": 4
29
+ },
30
+ "layer_norm_eps": 1e-12,
31
+ "max_position_embeddings": 512,
32
+ "model_type": "bert",
33
+ "num_attention_heads": 12,
34
+ "num_hidden_layers": 12,
35
+ "pad_token_id": 0,
36
+ "position_embedding_type": "absolute",
37
+ "problem_type": "single_label_classification",
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.36.2",
40
+ "type_vocab_size": 2,
41
+ "use_cache": true,
42
+ "vocab_size": 32001
43
+ }
checkpoint-160/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9fbcb3c57172640144ad106b31c97d623cd07ddc95969d63c7257e6e81b7182
3
+ size 442514440
checkpoint-160/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8083959f936114a680e91bd369681538d78867febf313fea0f233696b5d8107
3
+ size 885149946
checkpoint-160/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:030f797d3abd18cb53e527cf6cf1e4f89fa57bfcdd9f31cb25473191fa4c03db
3
+ size 14244
checkpoint-160/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:590ffa6d84ac5f11e0c202149e9478c4654d2bbbb7a0458f8e623121da4cef9a
3
+ size 1064
checkpoint-160/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
checkpoint-160/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-160/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": false,
48
+ "full_tokenizer_file": null,
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
checkpoint-160/trainer_state.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2997977137565613,
3
+ "best_model_checkpoint": "models/toxic-bert-hubert/checkpoint-150",
4
+ "epoch": 0.23088023088023088,
5
+ "eval_steps": 10,
6
+ "global_step": 160,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01,
13
+ "learning_rate": 1.9600000000000002e-05,
14
+ "loss": 0.206,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.01,
19
+ "eval_f1": 0.7730744144170906,
20
+ "eval_loss": 0.6420192122459412,
21
+ "eval_runtime": 22.3453,
22
+ "eval_samples_per_second": 247.837,
23
+ "eval_steps_per_second": 7.787,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 0.03,
28
+ "learning_rate": 1.9200000000000003e-05,
29
+ "loss": 0.5054,
30
+ "step": 20
31
+ },
32
+ {
33
+ "epoch": 0.03,
34
+ "eval_f1": 0.8048163482788003,
35
+ "eval_loss": 0.5228331685066223,
36
+ "eval_runtime": 22.8204,
37
+ "eval_samples_per_second": 242.677,
38
+ "eval_steps_per_second": 7.625,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 0.04,
43
+ "learning_rate": 1.88e-05,
44
+ "loss": 0.5716,
45
+ "step": 30
46
+ },
47
+ {
48
+ "epoch": 0.04,
49
+ "eval_f1": 0.8136074625725318,
50
+ "eval_loss": 0.4328407943248749,
51
+ "eval_runtime": 23.1876,
52
+ "eval_samples_per_second": 238.834,
53
+ "eval_steps_per_second": 7.504,
54
+ "step": 30
55
+ },
56
+ {
57
+ "epoch": 0.06,
58
+ "learning_rate": 1.8400000000000003e-05,
59
+ "loss": 0.4257,
60
+ "step": 40
61
+ },
62
+ {
63
+ "epoch": 0.06,
64
+ "eval_f1": 0.8259426500452947,
65
+ "eval_loss": 0.4285435080528259,
66
+ "eval_runtime": 23.2092,
67
+ "eval_samples_per_second": 238.613,
68
+ "eval_steps_per_second": 7.497,
69
+ "step": 40
70
+ },
71
+ {
72
+ "epoch": 0.07,
73
+ "learning_rate": 1.8e-05,
74
+ "loss": 0.4937,
75
+ "step": 50
76
+ },
77
+ {
78
+ "epoch": 0.07,
79
+ "eval_f1": 0.8317824244449398,
80
+ "eval_loss": 0.39296483993530273,
81
+ "eval_runtime": 23.2217,
82
+ "eval_samples_per_second": 238.484,
83
+ "eval_steps_per_second": 7.493,
84
+ "step": 50
85
+ },
86
+ {
87
+ "epoch": 0.09,
88
+ "learning_rate": 1.76e-05,
89
+ "loss": 0.4411,
90
+ "step": 60
91
+ },
92
+ {
93
+ "epoch": 0.09,
94
+ "eval_f1": 0.8356587695730946,
95
+ "eval_loss": 0.3781413733959198,
96
+ "eval_runtime": 23.4641,
97
+ "eval_samples_per_second": 236.021,
98
+ "eval_steps_per_second": 7.416,
99
+ "step": 60
100
+ },
101
+ {
102
+ "epoch": 0.1,
103
+ "learning_rate": 1.72e-05,
104
+ "loss": 0.4197,
105
+ "step": 70
106
+ },
107
+ {
108
+ "epoch": 0.1,
109
+ "eval_f1": 0.8497564694972085,
110
+ "eval_loss": 0.3615292012691498,
111
+ "eval_runtime": 23.1269,
112
+ "eval_samples_per_second": 239.462,
113
+ "eval_steps_per_second": 7.524,
114
+ "step": 70
115
+ },
116
+ {
117
+ "epoch": 0.12,
118
+ "learning_rate": 1.6800000000000002e-05,
119
+ "loss": 0.3702,
120
+ "step": 80
121
+ },
122
+ {
123
+ "epoch": 0.12,
124
+ "eval_f1": 0.8725646676802798,
125
+ "eval_loss": 0.3257770836353302,
126
+ "eval_runtime": 22.9073,
127
+ "eval_samples_per_second": 241.757,
128
+ "eval_steps_per_second": 7.596,
129
+ "step": 80
130
+ },
131
+ {
132
+ "epoch": 0.13,
133
+ "learning_rate": 1.64e-05,
134
+ "loss": 0.3357,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.13,
139
+ "eval_f1": 0.8735307760659007,
140
+ "eval_loss": 0.31817400455474854,
141
+ "eval_runtime": 23.2006,
142
+ "eval_samples_per_second": 238.701,
143
+ "eval_steps_per_second": 7.5,
144
+ "step": 90
145
+ },
146
+ {
147
+ "epoch": 0.14,
148
+ "learning_rate": 1.6000000000000003e-05,
149
+ "loss": 0.4082,
150
+ "step": 100
151
+ },
152
+ {
153
+ "epoch": 0.14,
154
+ "eval_f1": 0.8404628911456754,
155
+ "eval_loss": 0.35047250986099243,
156
+ "eval_runtime": 22.7192,
157
+ "eval_samples_per_second": 243.758,
158
+ "eval_steps_per_second": 7.659,
159
+ "step": 100
160
+ },
161
+ {
162
+ "epoch": 0.16,
163
+ "learning_rate": 1.5600000000000003e-05,
164
+ "loss": 0.4107,
165
+ "step": 110
166
+ },
167
+ {
168
+ "epoch": 0.16,
169
+ "eval_f1": 0.8586341129440507,
170
+ "eval_loss": 0.3375680148601532,
171
+ "eval_runtime": 23.371,
172
+ "eval_samples_per_second": 236.96,
173
+ "eval_steps_per_second": 7.445,
174
+ "step": 110
175
+ },
176
+ {
177
+ "epoch": 0.17,
178
+ "learning_rate": 1.5200000000000002e-05,
179
+ "loss": 0.2864,
180
+ "step": 120
181
+ },
182
+ {
183
+ "epoch": 0.17,
184
+ "eval_f1": 0.8561894871027176,
185
+ "eval_loss": 0.32658323645591736,
186
+ "eval_runtime": 23.2113,
187
+ "eval_samples_per_second": 238.591,
188
+ "eval_steps_per_second": 7.496,
189
+ "step": 120
190
+ },
191
+ {
192
+ "epoch": 0.19,
193
+ "learning_rate": 1.48e-05,
194
+ "loss": 0.3483,
195
+ "step": 130
196
+ },
197
+ {
198
+ "epoch": 0.19,
199
+ "eval_f1": 0.8645368802572093,
200
+ "eval_loss": 0.3245397210121155,
201
+ "eval_runtime": 23.0332,
202
+ "eval_samples_per_second": 240.436,
203
+ "eval_steps_per_second": 7.554,
204
+ "step": 130
205
+ },
206
+ {
207
+ "epoch": 0.2,
208
+ "learning_rate": 1.4400000000000001e-05,
209
+ "loss": 0.317,
210
+ "step": 140
211
+ },
212
+ {
213
+ "epoch": 0.2,
214
+ "eval_f1": 0.8735823993344022,
215
+ "eval_loss": 0.32588475942611694,
216
+ "eval_runtime": 23.3446,
217
+ "eval_samples_per_second": 237.228,
218
+ "eval_steps_per_second": 7.454,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.22,
223
+ "learning_rate": 1.4e-05,
224
+ "loss": 0.3114,
225
+ "step": 150
226
+ },
227
+ {
228
+ "epoch": 0.22,
229
+ "eval_f1": 0.8704014314485559,
230
+ "eval_loss": 0.2997977137565613,
231
+ "eval_runtime": 22.6714,
232
+ "eval_samples_per_second": 244.273,
233
+ "eval_steps_per_second": 7.675,
234
+ "step": 150
235
+ },
236
+ {
237
+ "epoch": 0.23,
238
+ "learning_rate": 1.3600000000000002e-05,
239
+ "loss": 0.3323,
240
+ "step": 160
241
+ },
242
+ {
243
+ "epoch": 0.23,
244
+ "eval_f1": 0.8661724356256255,
245
+ "eval_loss": 0.3152276575565338,
246
+ "eval_runtime": 23.046,
247
+ "eval_samples_per_second": 240.302,
248
+ "eval_steps_per_second": 7.55,
249
+ "step": 160
250
+ }
251
+ ],
252
+ "logging_steps": 10,
253
+ "max_steps": 500,
254
+ "num_input_tokens_seen": 0,
255
+ "num_train_epochs": 1,
256
+ "save_steps": 10,
257
+ "total_flos": 359241714253440.0,
258
+ "train_batch_size": 32,
259
+ "trial_name": null,
260
+ "trial_params": null
261
+ }
checkpoint-160/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10e056d88700e4c1253c7100c9c94587a6124eb1203d56f1cd161bc83611a778
3
+ size 4728
checkpoint-160/vocab.txt ADDED
The diff for this file is too large to render. See raw diff