venetis commited on
Commit
c9f3e36
1 Parent(s): 965f6cf

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,44 @@
1
  {
2
  "epoch": 3.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 2118227382190080.0,
4
- "train_loss": 0.374498660454969,
5
- "train_runtime": 306.4673,
6
- "train_samples_per_second": 52.156,
7
- "train_steps_per_second": 1.635
 
 
 
 
 
 
 
 
 
 
 
 
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 1.0,
4
+ "eval_f1": 1.0,
5
+ "eval_loss": 0.01563212461769581,
6
+ "eval_precision": 1.0,
7
+ "eval_recall": 1.0,
8
+ "eval_runtime": 4.7878,
9
+ "eval_samples_per_second": 139.103,
10
+ "eval_steps_per_second": 4.386,
11
+ "test_accuracy": 1.0,
12
+ "test_f1": 1.0,
13
+ "test_loss": 0.015602031722664833,
14
+ "test_precision": 1.0,
15
+ "test_recall": 1.0,
16
+ "test_runtime": 3.7938,
17
+ "test_samples_per_second": 175.812,
18
+ "test_steps_per_second": 5.535,
19
+ "testaccuracy": 1.0,
20
+ "testf1": 1.0,
21
+ "testloss": 0.015602031722664833,
22
+ "testprecision": 1.0,
23
+ "testrecall": 1.0,
24
+ "testruntime": 3.8386,
25
+ "testsamples_per_second": 173.763,
26
+ "teststeps_per_second": 5.471,
27
  "total_flos": 2118227382190080.0,
28
+ "train_accuracy": 0.9981231231231231,
29
+ "train_f1": 0.9981237024802164,
30
+ "train_loss": 0.02615901082754135,
31
+ "train_precision": 0.9981332041882663,
32
+ "train_recall": 0.9981231231231231,
33
+ "train_runtime": 33.3791,
34
+ "train_samples_per_second": 159.621,
35
+ "train_steps_per_second": 5.003,
36
+ "valid_accuracy": 1.0,
37
+ "valid_f1": 1.0,
38
+ "valid_loss": 0.01563212461769581,
39
+ "valid_precision": 1.0,
40
+ "valid_recall": 1.0,
41
+ "valid_runtime": 4.1534,
42
+ "valid_samples_per_second": 160.349,
43
+ "valid_steps_per_second": 5.056
44
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95118faba19834f44966d1ab6e2f64c0e519c8c15503cd58ee3c51f0d779ba3d
3
  size 267926253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08deb6849fe67d26281d2b1bf52aa27b8e8ce8ec312683bc555bf4a2085139d1
3
  size 267926253
runs/Mar10_12-04-59_96c678b480cf/events.out.tfevents.1678452496.96c678b480cf.1488.4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ac649eea94beb6fb8e7bb32b9052bc0e9b524604ec6cf907b04aeb3ee81aa79
3
- size 14162
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdf3ce9529f776c11ccd571ba6031c022c4ee710078bffa150095682e3bd76a7
3
+ size 14516
runs/Mar10_12-04-59_96c678b480cf/events.out.tfevents.1678453656.96c678b480cf.1488.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9ee7068b830d374b680fd3796e24d3ca4cd9c05b94270ede480852a62fb73d2
3
+ size 4288
test_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "test_accuracy": 1.0,
4
+ "test_f1": 1.0,
5
+ "test_loss": 0.015602031722664833,
6
+ "test_precision": 1.0,
7
+ "test_recall": 1.0,
8
+ "test_runtime": 3.7938,
9
+ "test_samples_per_second": 175.812,
10
+ "test_steps_per_second": 5.535
11
+ }
train_more_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_accuracy": 0.9981231231231231,
4
+ "train_f1": 0.9981237024802164,
5
+ "train_loss": 0.02615901082754135,
6
+ "train_precision": 0.9981332041882663,
7
+ "train_recall": 0.9981231231231231,
8
+ "train_runtime": 33.3791,
9
+ "train_samples_per_second": 159.621,
10
+ "train_steps_per_second": 5.003
11
+ }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 2118227382190080.0,
4
- "train_loss": 0.374498660454969,
5
- "train_runtime": 306.4673,
6
- "train_samples_per_second": 52.156,
7
- "train_steps_per_second": 1.635
8
  }
 
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 2118227382190080.0,
4
+ "train_loss": 0.08387424607952673,
5
+ "train_runtime": 368.9515,
6
+ "train_samples_per_second": 43.323,
7
+ "train_steps_per_second": 1.358
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.9849327674451294,
3
- "best_model_checkpoint": "distilbert-base-uncased_finetuned_text_2_disease/checkpoint-167",
4
  "epoch": 3.0,
5
  "global_step": 501,
6
  "is_hyper_param_search": false,
@@ -9,348 +9,456 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.06,
12
- "learning_rate": 0.0,
13
- "loss": 0.3927,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.12,
18
- "learning_rate": 0.0,
19
- "loss": 0.3871,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.18,
24
- "learning_rate": 0.0,
25
- "loss": 0.3545,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.24,
30
- "learning_rate": 0.0,
31
- "loss": 0.389,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.3,
36
- "learning_rate": 0.0,
37
- "loss": 0.3796,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.36,
42
- "learning_rate": 0.0,
43
- "loss": 0.3484,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.42,
48
- "learning_rate": 0.0,
49
- "loss": 0.3691,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.48,
54
- "learning_rate": 0.0,
55
- "loss": 0.3773,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.54,
60
- "learning_rate": 0.0,
61
- "loss": 0.3739,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.6,
66
- "learning_rate": 0.0,
67
- "loss": 0.3687,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.66,
72
- "learning_rate": 0.0,
73
- "loss": 0.3946,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.72,
78
- "learning_rate": 0.0,
79
- "loss": 0.3614,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.78,
84
- "learning_rate": 0.0,
85
- "loss": 0.3963,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.84,
90
- "learning_rate": 0.0,
91
- "loss": 0.3555,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.9,
96
- "learning_rate": 0.0,
97
- "loss": 0.3627,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.96,
102
- "learning_rate": 0.0,
103
- "loss": 0.3641,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 1.0,
108
- "eval_accuracy": 0.984984984984985,
109
- "eval_f1": 0.9849327674451294,
110
- "eval_loss": 0.2975810170173645,
111
- "eval_precision": 0.9865442976554087,
112
- "eval_recall": 0.984984984984985,
113
- "eval_runtime": 3.8994,
114
- "eval_samples_per_second": 170.795,
115
- "eval_steps_per_second": 5.385,
116
  "step": 167
117
  },
118
  {
119
  "epoch": 1.02,
120
- "learning_rate": 0.0,
121
- "loss": 0.3898,
122
  "step": 170
123
  },
124
  {
125
  "epoch": 1.08,
126
- "learning_rate": 0.0,
127
- "loss": 0.3551,
128
  "step": 180
129
  },
130
  {
131
  "epoch": 1.14,
132
- "learning_rate": 0.0,
133
- "loss": 0.3864,
134
  "step": 190
135
  },
136
  {
137
  "epoch": 1.2,
138
- "learning_rate": 0.0,
139
- "loss": 0.3539,
140
  "step": 200
141
  },
142
  {
143
  "epoch": 1.26,
144
- "learning_rate": 0.0,
145
- "loss": 0.3792,
146
  "step": 210
147
  },
148
  {
149
  "epoch": 1.32,
150
- "learning_rate": 0.0,
151
- "loss": 0.3929,
152
  "step": 220
153
  },
154
  {
155
  "epoch": 1.38,
156
- "learning_rate": 0.0,
157
- "loss": 0.3688,
158
  "step": 230
159
  },
160
  {
161
  "epoch": 1.44,
162
- "learning_rate": 0.0,
163
- "loss": 0.4244,
164
  "step": 240
165
  },
166
  {
167
  "epoch": 1.5,
168
- "learning_rate": 0.0,
169
- "loss": 0.3677,
170
  "step": 250
171
  },
172
  {
173
  "epoch": 1.56,
174
- "learning_rate": 0.0,
175
- "loss": 0.371,
176
  "step": 260
177
  },
178
  {
179
  "epoch": 1.62,
180
- "learning_rate": 0.0,
181
- "loss": 0.3686,
182
  "step": 270
183
  },
184
  {
185
  "epoch": 1.68,
186
- "learning_rate": 0.0,
187
- "loss": 0.3774,
188
  "step": 280
189
  },
190
  {
191
  "epoch": 1.74,
192
- "learning_rate": 0.0,
193
- "loss": 0.3703,
194
  "step": 290
195
  },
196
  {
197
  "epoch": 1.8,
198
- "learning_rate": 0.0,
199
- "loss": 0.3731,
200
  "step": 300
201
  },
202
  {
203
  "epoch": 1.86,
204
- "learning_rate": 0.0,
205
- "loss": 0.3763,
206
  "step": 310
207
  },
208
  {
209
  "epoch": 1.92,
210
- "learning_rate": 0.0,
211
- "loss": 0.3712,
212
  "step": 320
213
  },
214
  {
215
  "epoch": 1.98,
216
- "learning_rate": 0.0,
217
- "loss": 0.3762,
218
  "step": 330
219
  },
220
  {
221
  "epoch": 2.0,
222
- "eval_accuracy": 0.984984984984985,
223
- "eval_f1": 0.9849327674451294,
224
- "eval_loss": 0.2975810170173645,
225
- "eval_precision": 0.9865442976554087,
226
- "eval_recall": 0.984984984984985,
227
- "eval_runtime": 3.6994,
228
- "eval_samples_per_second": 180.03,
229
- "eval_steps_per_second": 5.677,
230
  "step": 334
231
  },
232
  {
233
  "epoch": 2.04,
234
- "learning_rate": 0.0,
235
- "loss": 0.3601,
236
  "step": 340
237
  },
238
  {
239
  "epoch": 2.1,
240
- "learning_rate": 0.0,
241
- "loss": 0.3885,
242
  "step": 350
243
  },
244
  {
245
  "epoch": 2.16,
246
- "learning_rate": 0.0,
247
- "loss": 0.3864,
248
  "step": 360
249
  },
250
  {
251
  "epoch": 2.22,
252
- "learning_rate": 0.0,
253
- "loss": 0.3701,
254
  "step": 370
255
  },
256
  {
257
  "epoch": 2.28,
258
- "learning_rate": 0.0,
259
- "loss": 0.3744,
260
  "step": 380
261
  },
262
  {
263
  "epoch": 2.34,
264
- "learning_rate": 0.0,
265
- "loss": 0.3791,
266
  "step": 390
267
  },
268
  {
269
  "epoch": 2.4,
270
- "learning_rate": 0.0,
271
- "loss": 0.3572,
272
  "step": 400
273
  },
274
  {
275
  "epoch": 2.46,
276
- "learning_rate": 0.0,
277
- "loss": 0.3809,
278
  "step": 410
279
  },
280
  {
281
  "epoch": 2.51,
282
- "learning_rate": 0.0,
283
- "loss": 0.3796,
284
  "step": 420
285
  },
286
  {
287
  "epoch": 2.57,
288
- "learning_rate": 0.0,
289
- "loss": 0.3517,
290
  "step": 430
291
  },
292
  {
293
  "epoch": 2.63,
294
- "learning_rate": 0.0,
295
- "loss": 0.3723,
296
  "step": 440
297
  },
298
  {
299
  "epoch": 2.69,
300
- "learning_rate": 0.0,
301
- "loss": 0.3895,
302
  "step": 450
303
  },
304
  {
305
  "epoch": 2.75,
306
- "learning_rate": 0.0,
307
- "loss": 0.3786,
308
  "step": 460
309
  },
310
  {
311
  "epoch": 2.81,
312
- "learning_rate": 0.0,
313
- "loss": 0.3844,
314
  "step": 470
315
  },
316
  {
317
  "epoch": 2.87,
318
- "learning_rate": 0.0,
319
- "loss": 0.3575,
320
  "step": 480
321
  },
322
  {
323
  "epoch": 2.93,
324
- "learning_rate": 0.0,
325
- "loss": 0.3547,
326
  "step": 490
327
  },
328
  {
329
  "epoch": 2.99,
330
- "learning_rate": 0.0,
331
- "loss": 0.3783,
332
  "step": 500
333
  },
334
  {
335
  "epoch": 3.0,
336
- "eval_accuracy": 0.984984984984985,
337
- "eval_f1": 0.9849327674451294,
338
- "eval_loss": 0.2975810170173645,
339
- "eval_precision": 0.9865442976554087,
340
- "eval_recall": 0.984984984984985,
341
- "eval_runtime": 3.693,
342
- "eval_samples_per_second": 180.342,
343
- "eval_steps_per_second": 5.686,
344
  "step": 501
345
  },
346
  {
347
  "epoch": 3.0,
348
  "step": 501,
349
  "total_flos": 2118227382190080.0,
350
- "train_loss": 0.374498660454969,
351
- "train_runtime": 306.4673,
352
- "train_samples_per_second": 52.156,
353
- "train_steps_per_second": 1.635
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  }
355
  ],
356
  "max_steps": 501,
 
1
  {
2
+ "best_metric": 1.0,
3
+ "best_model_checkpoint": "distilbert-base-uncased_finetuned_text_2_disease/checkpoint-334",
4
  "epoch": 3.0,
5
  "global_step": 501,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.06,
12
+ "learning_rate": 1.9600798403193614e-05,
13
+ "loss": 0.3519,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.12,
18
+ "learning_rate": 1.920159680638723e-05,
19
+ "loss": 0.3228,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.18,
24
+ "learning_rate": 1.8802395209580838e-05,
25
+ "loss": 0.3043,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.24,
30
+ "learning_rate": 1.8403193612774454e-05,
31
+ "loss": 0.232,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.3,
36
+ "learning_rate": 1.8003992015968066e-05,
37
+ "loss": 0.2346,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.36,
42
+ "learning_rate": 1.7604790419161678e-05,
43
+ "loss": 0.2027,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.42,
48
+ "learning_rate": 1.720558882235529e-05,
49
+ "loss": 0.1758,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.48,
54
+ "learning_rate": 1.6806387225548902e-05,
55
+ "loss": 0.1465,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.54,
60
+ "learning_rate": 1.6407185628742518e-05,
61
+ "loss": 0.1593,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.6,
66
+ "learning_rate": 1.600798403193613e-05,
67
+ "loss": 0.1201,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.66,
72
+ "learning_rate": 1.5608782435129742e-05,
73
+ "loss": 0.1107,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.72,
78
+ "learning_rate": 1.5209580838323354e-05,
79
+ "loss": 0.1138,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.78,
84
+ "learning_rate": 1.4810379241516968e-05,
85
+ "loss": 0.0947,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.84,
90
+ "learning_rate": 1.4411177644710582e-05,
91
+ "loss": 0.0911,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.9,
96
+ "learning_rate": 1.4011976047904192e-05,
97
+ "loss": 0.0613,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.96,
102
+ "learning_rate": 1.3612774451097806e-05,
103
+ "loss": 0.0585,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 1.0,
108
+ "eval_accuracy": 0.9984984984984985,
109
+ "eval_f1": 0.9984973421631103,
110
+ "eval_loss": 0.03541606292128563,
111
+ "eval_precision": 0.9985541096652208,
112
+ "eval_recall": 0.9984984984984985,
113
+ "eval_runtime": 3.7271,
114
+ "eval_samples_per_second": 178.69,
115
+ "eval_steps_per_second": 5.634,
116
  "step": 167
117
  },
118
  {
119
  "epoch": 1.02,
120
+ "learning_rate": 1.3213572854291418e-05,
121
+ "loss": 0.0779,
122
  "step": 170
123
  },
124
  {
125
  "epoch": 1.08,
126
+ "learning_rate": 1.2814371257485032e-05,
127
+ "loss": 0.0501,
128
  "step": 180
129
  },
130
  {
131
  "epoch": 1.14,
132
+ "learning_rate": 1.2415169660678643e-05,
133
+ "loss": 0.0756,
134
  "step": 190
135
  },
136
  {
137
  "epoch": 1.2,
138
+ "learning_rate": 1.2015968063872256e-05,
139
+ "loss": 0.0417,
140
  "step": 200
141
  },
142
  {
143
  "epoch": 1.26,
144
+ "learning_rate": 1.161676646706587e-05,
145
+ "loss": 0.0505,
146
  "step": 210
147
  },
148
  {
149
  "epoch": 1.32,
150
+ "learning_rate": 1.1217564870259482e-05,
151
+ "loss": 0.0609,
152
  "step": 220
153
  },
154
  {
155
  "epoch": 1.38,
156
+ "learning_rate": 1.0818363273453095e-05,
157
+ "loss": 0.0764,
158
  "step": 230
159
  },
160
  {
161
  "epoch": 1.44,
162
+ "learning_rate": 1.0419161676646707e-05,
163
+ "loss": 0.0677,
164
  "step": 240
165
  },
166
  {
167
  "epoch": 1.5,
168
+ "learning_rate": 1.001996007984032e-05,
169
+ "loss": 0.0617,
170
  "step": 250
171
  },
172
  {
173
  "epoch": 1.56,
174
+ "learning_rate": 9.620758483033933e-06,
175
+ "loss": 0.0365,
176
  "step": 260
177
  },
178
  {
179
  "epoch": 1.62,
180
+ "learning_rate": 9.221556886227547e-06,
181
+ "loss": 0.0315,
182
  "step": 270
183
  },
184
  {
185
  "epoch": 1.68,
186
+ "learning_rate": 8.822355289421159e-06,
187
+ "loss": 0.0297,
188
  "step": 280
189
  },
190
  {
191
  "epoch": 1.74,
192
+ "learning_rate": 8.42315369261477e-06,
193
+ "loss": 0.0505,
194
  "step": 290
195
  },
196
  {
197
  "epoch": 1.8,
198
+ "learning_rate": 8.023952095808385e-06,
199
+ "loss": 0.0275,
200
  "step": 300
201
  },
202
  {
203
  "epoch": 1.86,
204
+ "learning_rate": 7.624750499001997e-06,
205
+ "loss": 0.0366,
206
  "step": 310
207
  },
208
  {
209
  "epoch": 1.92,
210
+ "learning_rate": 7.225548902195609e-06,
211
+ "loss": 0.0269,
212
  "step": 320
213
  },
214
  {
215
  "epoch": 1.98,
216
+ "learning_rate": 6.826347305389223e-06,
217
+ "loss": 0.0457,
218
  "step": 330
219
  },
220
  {
221
  "epoch": 2.0,
222
+ "eval_accuracy": 1.0,
223
+ "eval_f1": 1.0,
224
+ "eval_loss": 0.01563212461769581,
225
+ "eval_precision": 1.0,
226
+ "eval_recall": 1.0,
227
+ "eval_runtime": 4.0223,
228
+ "eval_samples_per_second": 165.575,
229
+ "eval_steps_per_second": 5.221,
230
  "step": 334
231
  },
232
  {
233
  "epoch": 2.04,
234
+ "learning_rate": 6.427145708582835e-06,
235
+ "loss": 0.0284,
236
  "step": 340
237
  },
238
  {
239
  "epoch": 2.1,
240
+ "learning_rate": 6.027944111776448e-06,
241
+ "loss": 0.0241,
242
  "step": 350
243
  },
244
  {
245
  "epoch": 2.16,
246
+ "learning_rate": 5.62874251497006e-06,
247
+ "loss": 0.0424,
248
  "step": 360
249
  },
250
  {
251
  "epoch": 2.22,
252
+ "learning_rate": 5.229540918163674e-06,
253
+ "loss": 0.0444,
254
  "step": 370
255
  },
256
  {
257
  "epoch": 2.28,
258
+ "learning_rate": 4.830339321357286e-06,
259
+ "loss": 0.0222,
260
  "step": 380
261
  },
262
  {
263
  "epoch": 2.34,
264
+ "learning_rate": 4.431137724550898e-06,
265
+ "loss": 0.0248,
266
  "step": 390
267
  },
268
  {
269
  "epoch": 2.4,
270
+ "learning_rate": 4.031936127744511e-06,
271
+ "loss": 0.0344,
272
  "step": 400
273
  },
274
  {
275
  "epoch": 2.46,
276
+ "learning_rate": 3.6327345309381242e-06,
277
+ "loss": 0.0214,
278
  "step": 410
279
  },
280
  {
281
  "epoch": 2.51,
282
+ "learning_rate": 3.2335329341317368e-06,
283
+ "loss": 0.0216,
284
  "step": 420
285
  },
286
  {
287
  "epoch": 2.57,
288
+ "learning_rate": 2.8343313373253494e-06,
289
+ "loss": 0.0512,
290
  "step": 430
291
  },
292
  {
293
  "epoch": 2.63,
294
+ "learning_rate": 2.4351297405189623e-06,
295
+ "loss": 0.0437,
296
  "step": 440
297
  },
298
  {
299
  "epoch": 2.69,
300
+ "learning_rate": 2.035928143712575e-06,
301
+ "loss": 0.0439,
302
  "step": 450
303
  },
304
  {
305
  "epoch": 2.75,
306
+ "learning_rate": 1.6367265469061877e-06,
307
+ "loss": 0.0212,
308
  "step": 460
309
  },
310
  {
311
  "epoch": 2.81,
312
+ "learning_rate": 1.2375249500998005e-06,
313
+ "loss": 0.0203,
314
  "step": 470
315
  },
316
  {
317
  "epoch": 2.87,
318
+ "learning_rate": 8.383233532934132e-07,
319
+ "loss": 0.0483,
320
  "step": 480
321
  },
322
  {
323
  "epoch": 2.93,
324
+ "learning_rate": 4.39121756487026e-07,
325
+ "loss": 0.0594,
326
  "step": 490
327
  },
328
  {
329
  "epoch": 2.99,
330
+ "learning_rate": 3.992015968063872e-08,
331
+ "loss": 0.0207,
332
  "step": 500
333
  },
334
  {
335
  "epoch": 3.0,
336
+ "eval_accuracy": 1.0,
337
+ "eval_f1": 1.0,
338
+ "eval_loss": 0.012468785047531128,
339
+ "eval_precision": 1.0,
340
+ "eval_recall": 1.0,
341
+ "eval_runtime": 4.1959,
342
+ "eval_samples_per_second": 158.726,
343
+ "eval_steps_per_second": 5.005,
344
  "step": 501
345
  },
346
  {
347
  "epoch": 3.0,
348
  "step": 501,
349
  "total_flos": 2118227382190080.0,
350
+ "train_loss": 0.08387424607952673,
351
+ "train_runtime": 368.9515,
352
+ "train_samples_per_second": 43.323,
353
+ "train_steps_per_second": 1.358
354
+ },
355
+ {
356
+ "epoch": 3.0,
357
+ "eval_accuracy": 0.9981231231231231,
358
+ "eval_f1": 0.9981237024802164,
359
+ "eval_loss": 0.02615901082754135,
360
+ "eval_precision": 0.9981332041882663,
361
+ "eval_recall": 0.9981231231231231,
362
+ "eval_runtime": 31.1804,
363
+ "eval_samples_per_second": 170.877,
364
+ "eval_steps_per_second": 5.356,
365
+ "step": 501
366
+ },
367
+ {
368
+ "epoch": 3.0,
369
+ "eval_accuracy": 0.9981231231231231,
370
+ "eval_f1": 0.9981237024802164,
371
+ "eval_loss": 0.02615901082754135,
372
+ "eval_precision": 0.9981332041882663,
373
+ "eval_recall": 0.9981231231231231,
374
+ "eval_runtime": 31.2125,
375
+ "eval_samples_per_second": 170.701,
376
+ "eval_steps_per_second": 5.35,
377
+ "step": 501
378
+ },
379
+ {
380
+ "epoch": 3.0,
381
+ "eval_accuracy": 0.9981231231231231,
382
+ "eval_f1": 0.9981237024802164,
383
+ "eval_loss": 0.02615901082754135,
384
+ "eval_precision": 0.9981332041882663,
385
+ "eval_recall": 0.9981231231231231,
386
+ "eval_runtime": 33.039,
387
+ "eval_samples_per_second": 161.264,
388
+ "eval_steps_per_second": 5.055,
389
+ "step": 501
390
+ },
391
+ {
392
+ "epoch": 3.0,
393
+ "eval_accuracy": 0.9981231231231231,
394
+ "eval_f1": 0.9981237024802164,
395
+ "eval_loss": 0.02615901082754135,
396
+ "eval_precision": 0.9981332041882663,
397
+ "eval_recall": 0.9981231231231231,
398
+ "eval_runtime": 33.3791,
399
+ "eval_samples_per_second": 159.621,
400
+ "eval_steps_per_second": 5.003,
401
+ "step": 501
402
+ },
403
+ {
404
+ "epoch": 3.0,
405
+ "eval_accuracy": 1.0,
406
+ "eval_f1": 1.0,
407
+ "eval_loss": 0.01563212461769581,
408
+ "eval_precision": 1.0,
409
+ "eval_recall": 1.0,
410
+ "eval_runtime": 4.7878,
411
+ "eval_samples_per_second": 139.103,
412
+ "eval_steps_per_second": 4.386,
413
+ "step": 501
414
+ },
415
+ {
416
+ "epoch": 3.0,
417
+ "eval_accuracy": 1.0,
418
+ "eval_f1": 1.0,
419
+ "eval_loss": 0.015602031722664833,
420
+ "eval_precision": 1.0,
421
+ "eval_recall": 1.0,
422
+ "eval_runtime": 3.8386,
423
+ "eval_samples_per_second": 173.763,
424
+ "eval_steps_per_second": 5.471,
425
+ "step": 501
426
+ },
427
+ {
428
+ "epoch": 3.0,
429
+ "eval_accuracy": 1.0,
430
+ "eval_f1": 1.0,
431
+ "eval_loss": 0.015602031722664833,
432
+ "eval_precision": 1.0,
433
+ "eval_recall": 1.0,
434
+ "eval_runtime": 4.2166,
435
+ "eval_samples_per_second": 158.186,
436
+ "eval_steps_per_second": 4.98,
437
+ "step": 501
438
+ },
439
+ {
440
+ "epoch": 3.0,
441
+ "eval_accuracy": 1.0,
442
+ "eval_f1": 1.0,
443
+ "eval_loss": 0.01563212461769581,
444
+ "eval_precision": 1.0,
445
+ "eval_recall": 1.0,
446
+ "eval_runtime": 4.1534,
447
+ "eval_samples_per_second": 160.349,
448
+ "eval_steps_per_second": 5.056,
449
+ "step": 501
450
+ },
451
+ {
452
+ "epoch": 3.0,
453
+ "eval_accuracy": 1.0,
454
+ "eval_f1": 1.0,
455
+ "eval_loss": 0.015602031722664833,
456
+ "eval_precision": 1.0,
457
+ "eval_recall": 1.0,
458
+ "eval_runtime": 3.7938,
459
+ "eval_samples_per_second": 175.812,
460
+ "eval_steps_per_second": 5.535,
461
+ "step": 501
462
  }
463
  ],
464
  "max_steps": 501,
valid_results.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "valid_accuracy": 1.0,
4
+ "valid_f1": 1.0,
5
+ "valid_loss": 0.01563212461769581,
6
+ "valid_precision": 1.0,
7
+ "valid_recall": 1.0,
8
+ "valid_runtime": 4.1534,
9
+ "valid_samples_per_second": 160.349,
10
+ "valid_steps_per_second": 5.056
11
+ }