carted-ml commited on
Commit
a92ef25
·
1 Parent(s): 7216636

carted-nlp/categorization-finetuned-20220721-164940-pruned-20220803-184651

Browse files
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.8760195530726257,
4
+ "eval_f1": 0.8750784966730759,
5
+ "eval_loss": 0.46725794672966003,
6
+ "eval_runtime": 42.6171,
7
+ "eval_samples_per_second": 3360.157,
8
+ "eval_steps_per_second": 70.019,
9
+ "test_samples": 143200,
10
+ "train_loss": 0.4135634679527994,
11
+ "train_runtime": 19096.9194,
12
+ "train_samples": 1138117,
13
+ "train_samples_per_second": 893.953,
14
+ "train_steps_per_second": 3.103
15
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c68108da767a7a02ee990ff8344d49ed41b34bd4062dd87a4229844ecd4b525
3
  size 488228637
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da5f963c6319f63a4c53614c92da3838876574304c3d7339fd3508e5b5a49766
3
  size 488228637
recipe.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 1.1.0
2
+
3
+ __metadata__:
4
+ per_device_train_batch_size: 48
5
+ per_device_eval_batch_size: 48
6
+ fp16: False
7
+ framework_metadata:
8
+ python_version: 3.8.13
9
+ sparseml_version: 1.0.1
10
+ torch_version: 1.9.1+cu111
11
+
12
+ modifiers:
13
+ - !GMPruningModifier
14
+ end_epoch: 11
15
+ final_sparsity: 0.7
16
+ global_sparsity: False
17
+ init_sparsity: 0.0
18
+ inter_func: cubic
19
+ leave_enabled: True
20
+ mask_type: unstructured
21
+ params: ['re:roberta.encoder.layer.*.attention.self.query.weight', 're:roberta.encoder.layer.*.attention.self.key.weight', 're:roberta.encoder.layer.*.attention.self.value.weight', 're:roberta.encoder.layer.*.attention.output.dense.weight', 're:roberta.encoder.layer.*.intermediate.dense.weight', 're:roberta.encoder.layer.*.output.dense.weight', 're:classifier.dense.weight']
22
+ start_epoch: 2
23
+ update_frequency: 0.05
24
+
25
+ - !EpochRangeModifier
26
+ end_epoch: 15
27
+ start_epoch: 0.0
test_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.8760195530726257,
4
+ "eval_f1": 0.8750784966730759,
5
+ "eval_loss": 0.46725794672966003,
6
+ "eval_runtime": 42.6171,
7
+ "eval_samples_per_second": 3360.157,
8
+ "eval_steps_per_second": 70.019,
9
+ "test_samples": 143200
10
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "train_loss": 0.4135634679527994,
4
+ "train_runtime": 19096.9194,
5
+ "train_samples": 1138117,
6
+ "train_samples_per_second": 893.953,
7
+ "train_steps_per_second": 3.103
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,489 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 14.999789127409219,
5
+ "global_step": 59265,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.51,
12
+ "learning_rate": 6.994913523145705e-06,
13
+ "loss": 0.3404,
14
+ "step": 2000
15
+ },
16
+ {
17
+ "epoch": 0.51,
18
+ "eval_accuracy": 0.887192844025166,
19
+ "eval_f1": 0.886488607488543,
20
+ "eval_loss": 0.4329048991203308,
21
+ "eval_runtime": 42.8766,
22
+ "eval_samples_per_second": 3340.027,
23
+ "eval_steps_per_second": 69.595,
24
+ "step": 2000
25
+ },
26
+ {
27
+ "epoch": 1.01,
28
+ "learning_rate": 6.954310370186513e-06,
29
+ "loss": 0.3433,
30
+ "step": 4000
31
+ },
32
+ {
33
+ "epoch": 1.01,
34
+ "eval_accuracy": 0.8883170750441662,
35
+ "eval_f1": 0.8876090017723502,
36
+ "eval_loss": 0.4280129075050354,
37
+ "eval_runtime": 46.562,
38
+ "eval_samples_per_second": 3075.664,
39
+ "eval_steps_per_second": 64.087,
40
+ "step": 4000
41
+ },
42
+ {
43
+ "epoch": 1.52,
44
+ "learning_rate": 6.873575782958537e-06,
45
+ "loss": 0.3281,
46
+ "step": 6000
47
+ },
48
+ {
49
+ "epoch": 1.52,
50
+ "eval_accuracy": 0.8889804411733899,
51
+ "eval_f1": 0.8882523897529724,
52
+ "eval_loss": 0.43023422360420227,
53
+ "eval_runtime": 45.3226,
54
+ "eval_samples_per_second": 3159.767,
55
+ "eval_steps_per_second": 65.839,
56
+ "step": 6000
57
+ },
58
+ {
59
+ "epoch": 2.02,
60
+ "learning_rate": 6.753647718516397e-06,
61
+ "loss": 0.331,
62
+ "step": 8000
63
+ },
64
+ {
65
+ "epoch": 2.02,
66
+ "eval_accuracy": 0.8891340628033154,
67
+ "eval_f1": 0.8885291871869458,
68
+ "eval_loss": 0.42653217911720276,
69
+ "eval_runtime": 54.726,
70
+ "eval_samples_per_second": 2616.839,
71
+ "eval_steps_per_second": 54.526,
72
+ "step": 8000
73
+ },
74
+ {
75
+ "epoch": 2.53,
76
+ "learning_rate": 6.5959194752957715e-06,
77
+ "loss": 0.3224,
78
+ "step": 10000
79
+ },
80
+ {
81
+ "epoch": 2.53,
82
+ "eval_accuracy": 0.8881355222087998,
83
+ "eval_f1": 0.8873919365623191,
84
+ "eval_loss": 0.43002641201019287,
85
+ "eval_runtime": 54.91,
86
+ "eval_samples_per_second": 2608.066,
87
+ "eval_steps_per_second": 54.343,
88
+ "step": 10000
89
+ },
90
+ {
91
+ "epoch": 3.04,
92
+ "learning_rate": 6.402223506072121e-06,
93
+ "loss": 0.3361,
94
+ "step": 12000
95
+ },
96
+ {
97
+ "epoch": 3.04,
98
+ "eval_accuracy": 0.8889175959611477,
99
+ "eval_f1": 0.8882468792028413,
100
+ "eval_loss": 0.42911502718925476,
101
+ "eval_runtime": 50.9856,
102
+ "eval_samples_per_second": 2808.81,
103
+ "eval_steps_per_second": 58.526,
104
+ "step": 12000
105
+ },
106
+ {
107
+ "epoch": 3.54,
108
+ "learning_rate": 6.1748101289188055e-06,
109
+ "loss": 0.3323,
110
+ "step": 14000
111
+ },
112
+ {
113
+ "epoch": 3.54,
114
+ "eval_accuracy": 0.8877584509353462,
115
+ "eval_f1": 0.8871109456457335,
116
+ "eval_loss": 0.4336757957935333,
117
+ "eval_runtime": 51.7236,
118
+ "eval_samples_per_second": 2768.739,
119
+ "eval_steps_per_second": 57.691,
120
+ "step": 14000
121
+ },
122
+ {
123
+ "epoch": 4.05,
124
+ "learning_rate": 5.916321383496139e-06,
125
+ "loss": 0.3556,
126
+ "step": 16000
127
+ },
128
+ {
129
+ "epoch": 4.05,
130
+ "eval_accuracy": 0.8856845589313521,
131
+ "eval_f1": 0.8851065217265167,
132
+ "eval_loss": 0.4344990849494934,
133
+ "eval_runtime": 50.059,
134
+ "eval_samples_per_second": 2860.803,
135
+ "eval_steps_per_second": 59.61,
136
+ "step": 16000
137
+ },
138
+ {
139
+ "epoch": 4.56,
140
+ "learning_rate": 5.629760336403372e-06,
141
+ "loss": 0.3663,
142
+ "step": 18000
143
+ },
144
+ {
145
+ "epoch": 4.56,
146
+ "eval_accuracy": 0.8836455809341591,
147
+ "eval_f1": 0.8828440694403326,
148
+ "eval_loss": 0.44171223044395447,
149
+ "eval_runtime": 52.3029,
150
+ "eval_samples_per_second": 2738.072,
151
+ "eval_steps_per_second": 57.052,
152
+ "step": 18000
153
+ },
154
+ {
155
+ "epoch": 5.06,
156
+ "learning_rate": 5.318456192197413e-06,
157
+ "loss": 0.3902,
158
+ "step": 20000
159
+ },
160
+ {
161
+ "epoch": 5.06,
162
+ "eval_accuracy": 0.8789252072146304,
163
+ "eval_f1": 0.8780871328644805,
164
+ "eval_loss": 0.4555477797985077,
165
+ "eval_runtime": 50.7366,
166
+ "eval_samples_per_second": 2822.599,
167
+ "eval_steps_per_second": 58.814,
168
+ "step": 20000
169
+ },
170
+ {
171
+ "epoch": 5.57,
172
+ "learning_rate": 4.986025615410962e-06,
173
+ "loss": 0.4036,
174
+ "step": 22000
175
+ },
176
+ {
177
+ "epoch": 5.57,
178
+ "eval_accuracy": 0.8788483963996676,
179
+ "eval_f1": 0.877917228234661,
180
+ "eval_loss": 0.4555710554122925,
181
+ "eval_runtime": 49.8006,
182
+ "eval_samples_per_second": 2875.647,
183
+ "eval_steps_per_second": 59.919,
184
+ "step": 22000
185
+ },
186
+ {
187
+ "epoch": 6.07,
188
+ "learning_rate": 4.636330712922542e-06,
189
+ "loss": 0.4305,
190
+ "step": 24000
191
+ },
192
+ {
193
+ "epoch": 6.07,
194
+ "eval_accuracy": 0.8750637180624122,
195
+ "eval_f1": 0.8741648169470942,
196
+ "eval_loss": 0.46971216797828674,
197
+ "eval_runtime": 50.8439,
198
+ "eval_samples_per_second": 2816.642,
199
+ "eval_steps_per_second": 58.689,
200
+ "step": 24000
201
+ },
202
+ {
203
+ "epoch": 6.58,
204
+ "learning_rate": 4.273434164830173e-06,
205
+ "loss": 0.4501,
206
+ "step": 26000
207
+ },
208
+ {
209
+ "epoch": 6.58,
210
+ "eval_accuracy": 0.8737788826121263,
211
+ "eval_f1": 0.8725169321502271,
212
+ "eval_loss": 0.4762924611568451,
213
+ "eval_runtime": 50.5449,
214
+ "eval_samples_per_second": 2833.301,
215
+ "eval_steps_per_second": 59.037,
216
+ "step": 26000
217
+ },
218
+ {
219
+ "epoch": 7.09,
220
+ "learning_rate": 3.901552025108571e-06,
221
+ "loss": 0.4733,
222
+ "step": 28000
223
+ },
224
+ {
225
+ "epoch": 7.09,
226
+ "eval_accuracy": 0.8710486072802687,
227
+ "eval_f1": 0.8700483733487925,
228
+ "eval_loss": 0.48572415113449097,
229
+ "eval_runtime": 51.8675,
230
+ "eval_samples_per_second": 2761.054,
231
+ "eval_steps_per_second": 57.531,
232
+ "step": 28000
233
+ },
234
+ {
235
+ "epoch": 7.59,
236
+ "learning_rate": 3.5250047404016715e-06,
237
+ "loss": 0.4851,
238
+ "step": 30000
239
+ },
240
+ {
241
+ "epoch": 7.59,
242
+ "eval_accuracy": 0.8705109315755295,
243
+ "eval_f1": 0.8695447653344123,
244
+ "eval_loss": 0.4862508177757263,
245
+ "eval_runtime": 50.7033,
246
+ "eval_samples_per_second": 2824.454,
247
+ "eval_steps_per_second": 58.852,
248
+ "step": 30000
249
+ },
250
+ {
251
+ "epoch": 8.1,
252
+ "learning_rate": 3.1481669560035742e-06,
253
+ "loss": 0.4846,
254
+ "step": 32000
255
+ },
256
+ {
257
+ "epoch": 8.1,
258
+ "eval_accuracy": 0.8708251576367407,
259
+ "eval_f1": 0.869757227158951,
260
+ "eval_loss": 0.48489654064178467,
261
+ "eval_runtime": 50.2252,
262
+ "eval_samples_per_second": 2851.338,
263
+ "eval_steps_per_second": 59.412,
264
+ "step": 32000
265
+ },
266
+ {
267
+ "epoch": 8.61,
268
+ "learning_rate": 2.775416692171258e-06,
269
+ "loss": 0.4856,
270
+ "step": 34000
271
+ },
272
+ {
273
+ "epoch": 8.61,
274
+ "eval_accuracy": 0.870664553205455,
275
+ "eval_f1": 0.8694665590028052,
276
+ "eval_loss": 0.4835050404071808,
277
+ "eval_runtime": 50.112,
278
+ "eval_samples_per_second": 2857.776,
279
+ "eval_steps_per_second": 59.547,
280
+ "step": 34000
281
+ },
282
+ {
283
+ "epoch": 9.11,
284
+ "learning_rate": 2.411084481227692e-06,
285
+ "loss": 0.4774,
286
+ "step": 36000
287
+ },
288
+ {
289
+ "epoch": 9.11,
290
+ "eval_accuracy": 0.8719214574502999,
291
+ "eval_f1": 0.870818507712314,
292
+ "eval_loss": 0.47967976331710815,
293
+ "eval_runtime": 51.643,
294
+ "eval_samples_per_second": 2773.058,
295
+ "eval_steps_per_second": 57.781,
296
+ "step": 36000
297
+ },
298
+ {
299
+ "epoch": 9.62,
300
+ "learning_rate": 2.059403056369544e-06,
301
+ "loss": 0.4635,
302
+ "step": 38000
303
+ },
304
+ {
305
+ "epoch": 9.62,
306
+ "eval_accuracy": 0.8728222388257721,
307
+ "eval_f1": 0.8716593868274884,
308
+ "eval_loss": 0.47762736678123474,
309
+ "eval_runtime": 49.924,
310
+ "eval_samples_per_second": 2868.539,
311
+ "eval_steps_per_second": 59.771,
312
+ "step": 38000
313
+ },
314
+ {
315
+ "epoch": 10.12,
316
+ "learning_rate": 1.7244581766840846e-06,
317
+ "loss": 0.4561,
318
+ "step": 40000
319
+ },
320
+ {
321
+ "epoch": 10.12,
322
+ "eval_accuracy": 0.8738906074338904,
323
+ "eval_f1": 0.8728714900247343,
324
+ "eval_loss": 0.4745886027812958,
325
+ "eval_runtime": 52.1859,
326
+ "eval_samples_per_second": 2744.206,
327
+ "eval_steps_per_second": 57.18,
328
+ "step": 40000
329
+ },
330
+ {
331
+ "epoch": 10.63,
332
+ "learning_rate": 1.4101411596796607e-06,
333
+ "loss": 0.4475,
334
+ "step": 42000
335
+ },
336
+ {
337
+ "epoch": 10.63,
338
+ "eval_accuracy": 0.8749380276379278,
339
+ "eval_f1": 0.8739688992879336,
340
+ "eval_loss": 0.4705161154270172,
341
+ "eval_runtime": 52.4489,
342
+ "eval_samples_per_second": 2730.449,
343
+ "eval_steps_per_second": 56.893,
344
+ "step": 42000
345
+ },
346
+ {
347
+ "epoch": 11.14,
348
+ "learning_rate": 1.1201036727965609e-06,
349
+ "loss": 0.4413,
350
+ "step": 44000
351
+ },
352
+ {
353
+ "epoch": 11.14,
354
+ "eval_accuracy": 0.8754268237331453,
355
+ "eval_f1": 0.8743869003587098,
356
+ "eval_loss": 0.46908074617385864,
357
+ "eval_runtime": 50.2149,
358
+ "eval_samples_per_second": 2851.921,
359
+ "eval_steps_per_second": 59.425,
360
+ "step": 44000
361
+ },
362
+ {
363
+ "epoch": 11.64,
364
+ "learning_rate": 8.577153091208042e-07,
365
+ "loss": 0.4389,
366
+ "step": 46000
367
+ },
368
+ {
369
+ "epoch": 11.64,
370
+ "eval_accuracy": 0.8759644994378845,
371
+ "eval_f1": 0.875008769768229,
372
+ "eval_loss": 0.46789219975471497,
373
+ "eval_runtime": 50.3984,
374
+ "eval_samples_per_second": 2841.539,
375
+ "eval_steps_per_second": 59.208,
376
+ "step": 46000
377
+ },
378
+ {
379
+ "epoch": 12.15,
380
+ "learning_rate": 6.260244401770287e-07,
381
+ "loss": 0.4361,
382
+ "step": 48000
383
+ },
384
+ {
385
+ "epoch": 12.15,
386
+ "eval_accuracy": 0.875894671424282,
387
+ "eval_f1": 0.8749081220424403,
388
+ "eval_loss": 0.46769237518310547,
389
+ "eval_runtime": 50.042,
390
+ "eval_samples_per_second": 2861.778,
391
+ "eval_steps_per_second": 59.63,
392
+ "step": 48000
393
+ },
394
+ {
395
+ "epoch": 12.65,
396
+ "learning_rate": 4.2772280060435426e-07,
397
+ "loss": 0.4362,
398
+ "step": 50000
399
+ },
400
+ {
401
+ "epoch": 12.65,
402
+ "eval_accuracy": 0.876257777095015,
403
+ "eval_f1": 0.875265867499827,
404
+ "eval_loss": 0.4671792984008789,
405
+ "eval_runtime": 52.5275,
406
+ "eval_samples_per_second": 2726.361,
407
+ "eval_steps_per_second": 56.808,
408
+ "step": 50000
409
+ },
410
+ {
411
+ "epoch": 13.16,
412
+ "learning_rate": 2.6511421616278837e-07,
413
+ "loss": 0.4309,
414
+ "step": 52000
415
+ },
416
+ {
417
+ "epoch": 13.16,
418
+ "eval_accuracy": 0.87611812106781,
419
+ "eval_f1": 0.8751424993393537,
420
+ "eval_loss": 0.4671061038970947,
421
+ "eval_runtime": 51.1386,
422
+ "eval_samples_per_second": 2800.408,
423
+ "eval_steps_per_second": 58.351,
424
+ "step": 52000
425
+ },
426
+ {
427
+ "epoch": 13.67,
428
+ "learning_rate": 1.400878383814987e-07,
429
+ "loss": 0.4316,
430
+ "step": 54000
431
+ },
432
+ {
433
+ "epoch": 13.67,
434
+ "eval_accuracy": 0.8764183815263007,
435
+ "eval_f1": 0.8754265388560385,
436
+ "eval_loss": 0.4670025110244751,
437
+ "eval_runtime": 51.6718,
438
+ "eval_samples_per_second": 2771.511,
439
+ "eval_steps_per_second": 57.749,
440
+ "step": 54000
441
+ },
442
+ {
443
+ "epoch": 14.17,
444
+ "learning_rate": 5.409619680302491e-08,
445
+ "loss": 0.4321,
446
+ "step": 56000
447
+ },
448
+ {
449
+ "epoch": 14.17,
450
+ "eval_accuracy": 0.8764393299303814,
451
+ "eval_f1": 0.8754605862048482,
452
+ "eval_loss": 0.4668178856372833,
453
+ "eval_runtime": 50.8951,
454
+ "eval_samples_per_second": 2813.807,
455
+ "eval_steps_per_second": 58.63,
456
+ "step": 56000
457
+ },
458
+ {
459
+ "epoch": 14.68,
460
+ "learning_rate": 8.138323807738212e-09,
461
+ "loss": 0.4311,
462
+ "step": 58000
463
+ },
464
+ {
465
+ "epoch": 14.68,
466
+ "eval_accuracy": 0.8763904503208597,
467
+ "eval_f1": 0.8754165344722714,
468
+ "eval_loss": 0.46675533056259155,
469
+ "eval_runtime": 50.6992,
470
+ "eval_samples_per_second": 2824.68,
471
+ "eval_steps_per_second": 58.857,
472
+ "step": 58000
473
+ },
474
+ {
475
+ "epoch": 15.0,
476
+ "step": 59265,
477
+ "total_flos": 3.822729571998278e+17,
478
+ "train_loss": 0.4135634679527994,
479
+ "train_runtime": 19096.9194,
480
+ "train_samples_per_second": 893.953,
481
+ "train_steps_per_second": 3.103
482
+ }
483
+ ],
484
+ "max_steps": 59265,
485
+ "num_train_epochs": 15,
486
+ "total_flos": 3.822729571998278e+17,
487
+ "trial_name": null,
488
+ "trial_params": null
489
+ }