chizhik commited on
Commit
42350de
1 Parent(s): b039ab8

all deleted prior to update

Browse files
README.md DELETED
@@ -1,9 +0,0 @@
1
- ---
2
- language:
3
- - es
4
- tags:
5
- - biomedical
6
- - clinical
7
- - text classification
8
- inference: false
9
- ---
 
 
 
 
 
 
 
 
 
 
config.json DELETED
@@ -1,65 +0,0 @@
1
- {
2
- "_name_or_path": "Models/roberta-base-biomedical-clinical-es",
3
- "architectures": [
4
- "RobertaForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "gradient_checkpointing": false,
11
- "hidden_act": "gelu",
12
- "hidden_dropout_prob": 0.1,
13
- "hidden_size": 768,
14
- "id2label": {
15
- "0": "LABEL_0",
16
- "1": "LABEL_1",
17
- "2": "LABEL_2",
18
- "3": "LABEL_3",
19
- "4": "LABEL_4",
20
- "5": "LABEL_5",
21
- "6": "LABEL_6",
22
- "7": "LABEL_7",
23
- "8": "LABEL_8",
24
- "9": "LABEL_9",
25
- "10": "LABEL_10",
26
- "11": "LABEL_11",
27
- "12": "LABEL_12",
28
- "13": "LABEL_13",
29
- "14": "LABEL_14",
30
- "15": "LABEL_15"
31
- },
32
- "initializer_range": 0.02,
33
- "intermediate_size": 3072,
34
- "label2id": {
35
- "LABEL_0": 0,
36
- "LABEL_1": 1,
37
- "LABEL_10": 10,
38
- "LABEL_11": 11,
39
- "LABEL_12": 12,
40
- "LABEL_13": 13,
41
- "LABEL_14": 14,
42
- "LABEL_15": 15,
43
- "LABEL_2": 2,
44
- "LABEL_3": 3,
45
- "LABEL_4": 4,
46
- "LABEL_5": 5,
47
- "LABEL_6": 6,
48
- "LABEL_7": 7,
49
- "LABEL_8": 8,
50
- "LABEL_9": 9
51
- },
52
- "layer_norm_eps": 1e-05,
53
- "max_position_embeddings": 514,
54
- "model_type": "roberta",
55
- "num_attention_heads": 12,
56
- "num_hidden_layers": 12,
57
- "pad_token_id": 1,
58
- "position_embedding_type": "absolute",
59
- "problem_type": "multi_label_classification",
60
- "torch_dtype": "float32",
61
- "transformers_version": "4.19.2",
62
- "type_vocab_size": 1,
63
- "use_cache": true,
64
- "vocab_size": 52000
65
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb0df800a0717e27a653a11d7f8346b293ebec23f5e161f45ac4940172e19941
3
- size 1008039837
 
 
 
 
pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae6e41143e7af891ffe98910ef36cc712dbb46d86b33a08904be3bd8a2543f6a
3
- size 504033325
 
 
 
 
rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c92db6b019bcc5daa66264ee6196af75e462e009a9f3d24a99e49aae2e98453
3
- size 14567
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:052b473372c295fdcecaddd4e0fe02e5f1ad19b0e470cf4538018b0a63f4f2cb
3
- size 623
 
 
 
 
special_tokens_map.json DELETED
@@ -1 +0,0 @@
1
- {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
 
 
tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json DELETED
@@ -1 +0,0 @@
1
- {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": true, "trim_offsets": true, "max_len": 512, "special_tokens_map_file": null, "name_or_path": "Models/roberta-base-biomedical-clinical-es", "tokenizer_class": "RobertaTokenizer"}
 
 
trainer_state.json DELETED
@@ -1,578 +0,0 @@
1
- {
2
- "best_metric": 0.7414142113821449,
3
- "best_model_checkpoint": "./CARES/checkpoints/roberta/checkpoint-2698",
4
- "epoch": 38.0,
5
- "global_step": 2698,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 1.0,
12
- "eval_loss": 0.2509869933128357,
13
- "eval_macro_f1": 0.04342407975460123,
14
- "eval_macro_precision": 0.034867610837438424,
15
- "eval_macro_recall": 0.057545731707317076,
16
- "eval_micro_f1": 0.38635394456289984,
17
- "eval_micro_precision": 0.5578817733990148,
18
- "eval_micro_recall": 0.29549902152641877,
19
- "eval_runtime": 2.7138,
20
- "eval_samples_per_second": 355.964,
21
- "eval_steps_per_second": 11.423,
22
- "step": 71
23
- },
24
- {
25
- "epoch": 2.0,
26
- "eval_loss": 0.21916496753692627,
27
- "eval_macro_f1": 0.10341134166953839,
28
- "eval_macro_precision": 0.1498316489705596,
29
- "eval_macro_recall": 0.08601393938480183,
30
- "eval_micro_f1": 0.42796410014171,
31
- "eval_micro_precision": 0.7756849315068494,
32
- "eval_micro_recall": 0.29549902152641877,
33
- "eval_runtime": 2.7022,
34
- "eval_samples_per_second": 357.485,
35
- "eval_steps_per_second": 11.472,
36
- "step": 142
37
- },
38
- {
39
- "epoch": 3.0,
40
- "eval_loss": 0.1901940405368805,
41
- "eval_macro_f1": 0.17433694331035438,
42
- "eval_macro_precision": 0.1996622297635695,
43
- "eval_macro_recall": 0.1554066444975929,
44
- "eval_micro_f1": 0.5823627287853577,
45
- "eval_micro_precision": 0.8036739380022963,
46
- "eval_micro_recall": 0.45662100456621,
47
- "eval_runtime": 2.7029,
48
- "eval_samples_per_second": 357.398,
49
- "eval_steps_per_second": 11.469,
50
- "step": 213
51
- },
52
- {
53
- "epoch": 4.0,
54
- "eval_loss": 0.16621476411819458,
55
- "eval_macro_f1": 0.20771764577178875,
56
- "eval_macro_precision": 0.3206539932628853,
57
- "eval_macro_recall": 0.18121219739914096,
58
- "eval_micro_f1": 0.6394934705184012,
59
- "eval_micro_precision": 0.8128772635814889,
60
- "eval_micro_recall": 0.5270711024135681,
61
- "eval_runtime": 2.7016,
62
- "eval_samples_per_second": 357.565,
63
- "eval_steps_per_second": 11.475,
64
- "step": 284
65
- },
66
- {
67
- "epoch": 5.0,
68
- "eval_loss": 0.1592407524585724,
69
- "eval_macro_f1": 0.32374988901102003,
70
- "eval_macro_precision": 0.36319303902027156,
71
- "eval_macro_recall": 0.30985669516000114,
72
- "eval_micro_f1": 0.6886094674556215,
73
- "eval_micro_precision": 0.7950469684030743,
74
- "eval_micro_recall": 0.6073059360730594,
75
- "eval_runtime": 2.7015,
76
- "eval_samples_per_second": 357.579,
77
- "eval_steps_per_second": 11.475,
78
- "step": 355
79
- },
80
- {
81
- "epoch": 6.0,
82
- "eval_loss": 0.1491735577583313,
83
- "eval_macro_f1": 0.3533190399166411,
84
- "eval_macro_precision": 0.4078813390257998,
85
- "eval_macro_recall": 0.33542234175118296,
86
- "eval_micro_f1": 0.7270094134685011,
87
- "eval_micro_precision": 0.8169243287225386,
88
- "eval_micro_recall": 0.6549249836921069,
89
- "eval_runtime": 2.7029,
90
- "eval_samples_per_second": 357.392,
91
- "eval_steps_per_second": 11.469,
92
- "step": 426
93
- },
94
- {
95
- "epoch": 7.0,
96
- "eval_loss": 0.14026637375354767,
97
- "eval_macro_f1": 0.47073084873229765,
98
- "eval_macro_precision": 0.6837800649883113,
99
- "eval_macro_recall": 0.423964742702268,
100
- "eval_micro_f1": 0.75768156424581,
101
- "eval_micro_precision": 0.8151765589782118,
102
- "eval_micro_recall": 0.7077625570776256,
103
- "eval_runtime": 2.7322,
104
- "eval_samples_per_second": 353.558,
105
- "eval_steps_per_second": 11.346,
106
- "step": 497
107
- },
108
- {
109
- "epoch": 7.04,
110
- "learning_rate": 4.647887323943662e-05,
111
- "loss": 0.1794,
112
- "step": 500
113
- },
114
- {
115
- "epoch": 8.0,
116
- "eval_loss": 0.13919810950756073,
117
- "eval_macro_f1": 0.4762066395538287,
118
- "eval_macro_precision": 0.6922175380127314,
119
- "eval_macro_recall": 0.4248427568334097,
120
- "eval_micro_f1": 0.7554770318021201,
121
- "eval_micro_precision": 0.8242097147262915,
122
- "eval_micro_recall": 0.6973255055446836,
123
- "eval_runtime": 2.7464,
124
- "eval_samples_per_second": 351.729,
125
- "eval_steps_per_second": 11.287,
126
- "step": 568
127
- },
128
- {
129
- "epoch": 9.0,
130
- "eval_loss": 0.14247503876686096,
131
- "eval_macro_f1": 0.49866416753622766,
132
- "eval_macro_precision": 0.6531542097723891,
133
- "eval_macro_recall": 0.4611636373916834,
134
- "eval_micro_f1": 0.7498267498267498,
135
- "eval_micro_precision": 0.7997043606799704,
136
- "eval_micro_recall": 0.7058056099151989,
137
- "eval_runtime": 2.7469,
138
- "eval_samples_per_second": 351.665,
139
- "eval_steps_per_second": 11.285,
140
- "step": 639
141
- },
142
- {
143
- "epoch": 10.0,
144
- "eval_loss": 0.14413639903068542,
145
- "eval_macro_f1": 0.515652512172511,
146
- "eval_macro_precision": 0.5714508837953802,
147
- "eval_macro_recall": 0.494438079407737,
148
- "eval_micro_f1": 0.7669675693747909,
149
- "eval_micro_precision": 0.7866941015089163,
150
- "eval_micro_recall": 0.7482061317677756,
151
- "eval_runtime": 2.752,
152
- "eval_samples_per_second": 351.019,
153
- "eval_steps_per_second": 11.265,
154
- "step": 710
155
- },
156
- {
157
- "epoch": 11.0,
158
- "eval_loss": 0.13881583511829376,
159
- "eval_macro_f1": 0.5815495436036973,
160
- "eval_macro_precision": 0.6306553976911301,
161
- "eval_macro_recall": 0.5606746536385887,
162
- "eval_micro_f1": 0.7825223435948361,
163
- "eval_micro_precision": 0.7943548387096774,
164
- "eval_micro_recall": 0.7710371819960861,
165
- "eval_runtime": 2.7353,
166
- "eval_samples_per_second": 353.165,
167
- "eval_steps_per_second": 11.333,
168
- "step": 781
169
- },
170
- {
171
- "epoch": 12.0,
172
- "eval_loss": 0.14889590442180634,
173
- "eval_macro_f1": 0.582949261540157,
174
- "eval_macro_precision": 0.6154355859721446,
175
- "eval_macro_recall": 0.5660348822272816,
176
- "eval_micro_f1": 0.7689724647414371,
177
- "eval_micro_precision": 0.7923875432525952,
178
- "eval_micro_recall": 0.7469015003261579,
179
- "eval_runtime": 2.7291,
180
- "eval_samples_per_second": 353.967,
181
- "eval_steps_per_second": 11.359,
182
- "step": 852
183
- },
184
- {
185
- "epoch": 13.0,
186
- "eval_loss": 0.14177829027175903,
187
- "eval_macro_f1": 0.6003936149639709,
188
- "eval_macro_precision": 0.6178459654131079,
189
- "eval_macro_recall": 0.5986040907728625,
190
- "eval_micro_f1": 0.7824089268132589,
191
- "eval_micro_precision": 0.7873183619550859,
192
- "eval_micro_recall": 0.7775603392041748,
193
- "eval_runtime": 2.7236,
194
- "eval_samples_per_second": 354.674,
195
- "eval_steps_per_second": 11.382,
196
- "step": 923
197
- },
198
- {
199
- "epoch": 14.0,
200
- "eval_loss": 0.13778340816497803,
201
- "eval_macro_f1": 0.6071892145561097,
202
- "eval_macro_precision": 0.6420473960894854,
203
- "eval_macro_recall": 0.5902771317099681,
204
- "eval_micro_f1": 0.7962716378162449,
205
- "eval_micro_precision": 0.8130523453433038,
206
- "eval_micro_recall": 0.7801696020874103,
207
- "eval_runtime": 4.6013,
208
- "eval_samples_per_second": 209.942,
209
- "eval_steps_per_second": 6.737,
210
- "step": 994
211
- },
212
- {
213
- "epoch": 14.08,
214
- "learning_rate": 4.295774647887324e-05,
215
- "loss": 0.0459,
216
- "step": 1000
217
- },
218
- {
219
- "epoch": 15.0,
220
- "eval_loss": 0.14022594690322876,
221
- "eval_macro_f1": 0.6215256196989195,
222
- "eval_macro_precision": 0.7025580891227308,
223
- "eval_macro_recall": 0.5957635875240769,
224
- "eval_micro_f1": 0.7986776859504131,
225
- "eval_micro_precision": 0.8096514745308311,
226
- "eval_micro_recall": 0.7879973907371167,
227
- "eval_runtime": 2.7033,
228
- "eval_samples_per_second": 357.348,
229
- "eval_steps_per_second": 11.468,
230
- "step": 1065
231
- },
232
- {
233
- "epoch": 16.0,
234
- "eval_loss": 0.1448926478624344,
235
- "eval_macro_f1": 0.608747774411539,
236
- "eval_macro_precision": 0.621954970496014,
237
- "eval_macro_recall": 0.6062377476848362,
238
- "eval_micro_f1": 0.79816813869807,
239
- "eval_micro_precision": 0.800524934383202,
240
- "eval_micro_recall": 0.7958251793868232,
241
- "eval_runtime": 2.7015,
242
- "eval_samples_per_second": 357.574,
243
- "eval_steps_per_second": 11.475,
244
- "step": 1136
245
- },
246
- {
247
- "epoch": 17.0,
248
- "eval_loss": 0.1469811201095581,
249
- "eval_macro_f1": 0.6153866789938162,
250
- "eval_macro_precision": 0.650591338815516,
251
- "eval_macro_recall": 0.5950841704161889,
252
- "eval_micro_f1": 0.7996005326231691,
253
- "eval_micro_precision": 0.8164513936097892,
254
- "eval_micro_recall": 0.7834311806914547,
255
- "eval_runtime": 2.701,
256
- "eval_samples_per_second": 357.647,
257
- "eval_steps_per_second": 11.477,
258
- "step": 1207
259
- },
260
- {
261
- "epoch": 18.0,
262
- "eval_loss": 0.15327712893486023,
263
- "eval_macro_f1": 0.6316443999429009,
264
- "eval_macro_precision": 0.6512229785159827,
265
- "eval_macro_recall": 0.6349187975925275,
266
- "eval_micro_f1": 0.7879777850375694,
267
- "eval_micro_precision": 0.7892670157068062,
268
- "eval_micro_recall": 0.786692759295499,
269
- "eval_runtime": 5.7579,
270
- "eval_samples_per_second": 167.769,
271
- "eval_steps_per_second": 5.384,
272
- "step": 1278
273
- },
274
- {
275
- "epoch": 19.0,
276
- "eval_loss": 0.15278153121471405,
277
- "eval_macro_f1": 0.6310003042032536,
278
- "eval_macro_precision": 0.6839818740829384,
279
- "eval_macro_recall": 0.6255359551717035,
280
- "eval_micro_f1": 0.7923659098387628,
281
- "eval_micro_precision": 0.799468791500664,
282
- "eval_micro_recall": 0.7853881278538812,
283
- "eval_runtime": 2.7,
284
- "eval_samples_per_second": 357.774,
285
- "eval_steps_per_second": 11.481,
286
- "step": 1349
287
- },
288
- {
289
- "epoch": 20.0,
290
- "eval_loss": 0.15630246698856354,
291
- "eval_macro_f1": 0.632349332888396,
292
- "eval_macro_precision": 0.6992914076324832,
293
- "eval_macro_recall": 0.6115304182846466,
294
- "eval_micro_f1": 0.7856437273625287,
295
- "eval_micro_precision": 0.793218085106383,
296
- "eval_micro_recall": 0.7782126549249837,
297
- "eval_runtime": 2.7011,
298
- "eval_samples_per_second": 357.638,
299
- "eval_steps_per_second": 11.477,
300
- "step": 1420
301
- },
302
- {
303
- "epoch": 21.0,
304
- "eval_loss": 0.15509752929210663,
305
- "eval_macro_f1": 0.6092726479340159,
306
- "eval_macro_precision": 0.6680318673287149,
307
- "eval_macro_recall": 0.5717336891200013,
308
- "eval_micro_f1": 0.7991967871485944,
309
- "eval_micro_precision": 0.8206185567010309,
310
- "eval_micro_recall": 0.7788649706457925,
311
- "eval_runtime": 2.6986,
312
- "eval_samples_per_second": 357.967,
313
- "eval_steps_per_second": 11.488,
314
- "step": 1491
315
- },
316
- {
317
- "epoch": 21.13,
318
- "learning_rate": 3.943661971830986e-05,
319
- "loss": 0.0169,
320
- "step": 1500
321
- },
322
- {
323
- "epoch": 22.0,
324
- "eval_loss": 0.15633134543895721,
325
- "eval_macro_f1": 0.6559542184820522,
326
- "eval_macro_precision": 0.6920252568569637,
327
- "eval_macro_recall": 0.6423031165953985,
328
- "eval_micro_f1": 0.8009213557091149,
329
- "eval_micro_precision": 0.8081009296148738,
330
- "eval_micro_recall": 0.7938682322243966,
331
- "eval_runtime": 2.7117,
332
- "eval_samples_per_second": 356.232,
333
- "eval_steps_per_second": 11.432,
334
- "step": 1562
335
- },
336
- {
337
- "epoch": 23.0,
338
- "eval_loss": 0.16708678007125854,
339
- "eval_macro_f1": 0.6341964700327825,
340
- "eval_macro_precision": 0.6730301960786685,
341
- "eval_macro_recall": 0.6361571334823197,
342
- "eval_micro_f1": 0.7845195145949493,
343
- "eval_micro_precision": 0.7889182058047494,
344
- "eval_micro_recall": 0.7801696020874103,
345
- "eval_runtime": 2.7072,
346
- "eval_samples_per_second": 356.825,
347
- "eval_steps_per_second": 11.451,
348
- "step": 1633
349
- },
350
- {
351
- "epoch": 24.0,
352
- "eval_loss": 0.15924513339996338,
353
- "eval_macro_f1": 0.6567583708222104,
354
- "eval_macro_precision": 0.6932259526572662,
355
- "eval_macro_recall": 0.6481414988629403,
356
- "eval_micro_f1": 0.8018252933507171,
357
- "eval_micro_precision": 0.8013029315960912,
358
- "eval_micro_recall": 0.8023483365949119,
359
- "eval_runtime": 2.7129,
360
- "eval_samples_per_second": 356.081,
361
- "eval_steps_per_second": 11.427,
362
- "step": 1704
363
- },
364
- {
365
- "epoch": 25.0,
366
- "eval_loss": 0.15955589711666107,
367
- "eval_macro_f1": 0.6499053948607745,
368
- "eval_macro_precision": 0.7063535942579273,
369
- "eval_macro_recall": 0.622573757353082,
370
- "eval_micro_f1": 0.8029100529100529,
371
- "eval_micro_precision": 0.8142186452045607,
372
- "eval_micro_recall": 0.79191128506197,
373
- "eval_runtime": 2.7378,
374
- "eval_samples_per_second": 352.84,
375
- "eval_steps_per_second": 11.323,
376
- "step": 1775
377
- },
378
- {
379
- "epoch": 26.0,
380
- "eval_loss": 0.1635105311870575,
381
- "eval_macro_f1": 0.6483066019452002,
382
- "eval_macro_precision": 0.6824077806874724,
383
- "eval_macro_recall": 0.6413983597695092,
384
- "eval_micro_f1": 0.7965879265091863,
385
- "eval_micro_precision": 0.8013201320132013,
386
- "eval_micro_recall": 0.79191128506197,
387
- "eval_runtime": 2.7006,
388
- "eval_samples_per_second": 357.694,
389
- "eval_steps_per_second": 11.479,
390
- "step": 1846
391
- },
392
- {
393
- "epoch": 27.0,
394
- "eval_loss": 0.16744764149188995,
395
- "eval_macro_f1": 0.6758901394131414,
396
- "eval_macro_precision": 0.7249401901181491,
397
- "eval_macro_recall": 0.6772836080088236,
398
- "eval_micro_f1": 0.789776357827476,
399
- "eval_micro_precision": 0.7739511584220413,
400
- "eval_micro_recall": 0.8062622309197651,
401
- "eval_runtime": 2.7031,
402
- "eval_samples_per_second": 357.373,
403
- "eval_steps_per_second": 11.468,
404
- "step": 1917
405
- },
406
- {
407
- "epoch": 28.0,
408
- "eval_loss": 0.17063026130199432,
409
- "eval_macro_f1": 0.6882911714431834,
410
- "eval_macro_precision": 0.7320265820262954,
411
- "eval_macro_recall": 0.6788510173157892,
412
- "eval_micro_f1": 0.7945659377070908,
413
- "eval_micro_precision": 0.8074074074074075,
414
- "eval_micro_recall": 0.7821265492498369,
415
- "eval_runtime": 2.7065,
416
- "eval_samples_per_second": 356.912,
417
- "eval_steps_per_second": 11.454,
418
- "step": 1988
419
- },
420
- {
421
- "epoch": 28.17,
422
- "learning_rate": 3.5915492957746486e-05,
423
- "loss": 0.0088,
424
- "step": 2000
425
- },
426
- {
427
- "epoch": 29.0,
428
- "eval_loss": 0.17283257842063904,
429
- "eval_macro_f1": 0.6771985368510574,
430
- "eval_macro_precision": 0.7332109505236407,
431
- "eval_macro_recall": 0.663425919860526,
432
- "eval_micro_f1": 0.7905294311081881,
433
- "eval_micro_precision": 0.7970822281167109,
434
- "eval_micro_recall": 0.7840834964122635,
435
- "eval_runtime": 2.7052,
436
- "eval_samples_per_second": 357.094,
437
- "eval_steps_per_second": 11.46,
438
- "step": 2059
439
- },
440
- {
441
- "epoch": 30.0,
442
- "eval_loss": 0.18444736301898956,
443
- "eval_macro_f1": 0.6746367426025414,
444
- "eval_macro_precision": 0.7201523247516791,
445
- "eval_macro_recall": 0.6728013388910854,
446
- "eval_micro_f1": 0.7788242936018187,
447
- "eval_micro_precision": 0.7755498059508409,
448
- "eval_micro_recall": 0.7821265492498369,
449
- "eval_runtime": 2.704,
450
- "eval_samples_per_second": 357.254,
451
- "eval_steps_per_second": 11.465,
452
- "step": 2130
453
- },
454
- {
455
- "epoch": 31.0,
456
- "eval_loss": 0.1695714294910431,
457
- "eval_macro_f1": 0.6879346286499437,
458
- "eval_macro_precision": 0.7516673286655658,
459
- "eval_macro_recall": 0.6669919763081706,
460
- "eval_micro_f1": 0.802747791952895,
461
- "eval_micro_precision": 0.8051181102362205,
462
- "eval_micro_recall": 0.8003913894324853,
463
- "eval_runtime": 2.7049,
464
- "eval_samples_per_second": 357.124,
465
- "eval_steps_per_second": 11.46,
466
- "step": 2201
467
- },
468
- {
469
- "epoch": 32.0,
470
- "eval_loss": 0.16994765400886536,
471
- "eval_macro_f1": 0.7053100252741045,
472
- "eval_macro_precision": 0.805278845304122,
473
- "eval_macro_recall": 0.6824655471979887,
474
- "eval_micro_f1": 0.8006472491909384,
475
- "eval_micro_precision": 0.7944765574823378,
476
- "eval_micro_recall": 0.806914546640574,
477
- "eval_runtime": 2.7034,
478
- "eval_samples_per_second": 357.325,
479
- "eval_steps_per_second": 11.467,
480
- "step": 2272
481
- },
482
- {
483
- "epoch": 33.0,
484
- "eval_loss": 0.17551660537719727,
485
- "eval_macro_f1": 0.7235647899419184,
486
- "eval_macro_precision": 0.7984745552617538,
487
- "eval_macro_recall": 0.6960462941676209,
488
- "eval_micro_f1": 0.8010352636687157,
489
- "eval_micro_precision": 0.7946084724005135,
490
- "eval_micro_recall": 0.8075668623613829,
491
- "eval_runtime": 2.7047,
492
- "eval_samples_per_second": 357.151,
493
- "eval_steps_per_second": 11.461,
494
- "step": 2343
495
- },
496
- {
497
- "epoch": 34.0,
498
- "eval_loss": 0.17244330048561096,
499
- "eval_macro_f1": 0.6948832290211537,
500
- "eval_macro_precision": 0.7553961335643664,
501
- "eval_macro_recall": 0.6730022583492952,
502
- "eval_micro_f1": 0.8063989552726085,
503
- "eval_micro_precision": 0.8071895424836601,
504
- "eval_micro_recall": 0.8056099151989563,
505
- "eval_runtime": 2.7035,
506
- "eval_samples_per_second": 357.318,
507
- "eval_steps_per_second": 11.467,
508
- "step": 2414
509
- },
510
- {
511
- "epoch": 35.0,
512
- "eval_loss": 0.17627869546413422,
513
- "eval_macro_f1": 0.7275662557839568,
514
- "eval_macro_precision": 0.7750438946995515,
515
- "eval_macro_recall": 0.717891607952716,
516
- "eval_micro_f1": 0.8019261637239165,
517
- "eval_micro_precision": 0.7895069532237674,
518
- "eval_micro_recall": 0.8147423352902805,
519
- "eval_runtime": 2.7043,
520
- "eval_samples_per_second": 357.211,
521
- "eval_steps_per_second": 11.463,
522
- "step": 2485
523
- },
524
- {
525
- "epoch": 35.21,
526
- "learning_rate": 3.23943661971831e-05,
527
- "loss": 0.0051,
528
- "step": 2500
529
- },
530
- {
531
- "epoch": 36.0,
532
- "eval_loss": 0.1767842173576355,
533
- "eval_macro_f1": 0.6877210668749649,
534
- "eval_macro_precision": 0.7524115221848844,
535
- "eval_macro_recall": 0.6652074916473143,
536
- "eval_micro_f1": 0.7996077149395227,
537
- "eval_micro_precision": 0.8014416775884666,
538
- "eval_micro_recall": 0.7977821265492498,
539
- "eval_runtime": 2.7004,
540
- "eval_samples_per_second": 357.724,
541
- "eval_steps_per_second": 11.48,
542
- "step": 2556
543
- },
544
- {
545
- "epoch": 37.0,
546
- "eval_loss": 0.17860282957553864,
547
- "eval_macro_f1": 0.7341275703601238,
548
- "eval_macro_precision": 0.8208336935763849,
549
- "eval_macro_recall": 0.6963532745066767,
550
- "eval_micro_f1": 0.8019512195121952,
551
- "eval_micro_precision": 0.7996108949416343,
552
- "eval_micro_recall": 0.8043052837573386,
553
- "eval_runtime": 2.7018,
554
- "eval_samples_per_second": 357.539,
555
- "eval_steps_per_second": 11.474,
556
- "step": 2627
557
- },
558
- {
559
- "epoch": 38.0,
560
- "eval_loss": 0.18057870864868164,
561
- "eval_macro_f1": 0.7414142113821449,
562
- "eval_macro_precision": 0.8229012131281167,
563
- "eval_macro_recall": 0.7070558641306933,
564
- "eval_micro_f1": 0.8049575994781474,
565
- "eval_micro_precision": 0.8049575994781474,
566
- "eval_micro_recall": 0.8049575994781474,
567
- "eval_runtime": 2.703,
568
- "eval_samples_per_second": 357.385,
569
- "eval_steps_per_second": 11.469,
570
- "step": 2698
571
- }
572
- ],
573
- "max_steps": 7100,
574
- "num_train_epochs": 100,
575
- "total_flos": 2.2147253694876576e+16,
576
- "trial_name": null,
577
- "trial_params": null
578
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3415febb816dc8caa278fa1560a2ebbb18bac1da83bdbbd581a6a1409f9c4b4d
3
- size 3183
 
 
 
 
vocab.json DELETED
The diff for this file is too large to render. See raw diff