amyeroberts HF staff commited on
Commit
3f563dc
1 Parent(s): 5c1e91c

End of training

Browse files
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 6787.48388671875,
4
- "eval_runtime": 2.1947,
5
- "eval_samples_per_second": 14.58,
6
- "eval_steps_per_second": 1.823,
7
- "train_loss": 6234.263671875,
8
- "train_runtime": 98.3256,
9
- "train_samples_per_second": 1.627,
10
- "train_steps_per_second": 0.203
11
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 6719.021484375,
4
+ "eval_runtime": 17.2616,
5
+ "eval_samples_per_second": 14.831,
6
+ "eval_steps_per_second": 1.854,
7
+ "train_loss": 6244.167712402344,
8
+ "train_runtime": 818.3257,
9
+ "train_samples_per_second": 6.257,
10
+ "train_steps_per_second": 0.782
11
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_loss": 6787.48388671875,
4
- "eval_runtime": 2.1947,
5
- "eval_samples_per_second": 14.58,
6
- "eval_steps_per_second": 1.823
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_loss": 6719.021484375,
4
+ "eval_runtime": 17.2616,
5
+ "eval_samples_per_second": 14.831,
6
+ "eval_steps_per_second": 1.854
7
  }
runs/Jul19_09-30-19_amy-2-gpu/events.out.tfevents.1689759879.amy-2-gpu.68506.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86799bdb45bf61756b3b354bc2e63cac40c9c98d96eadc47d09aa142b3109ccb
3
+ size 359
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 6234.263671875,
4
- "train_runtime": 98.3256,
5
- "train_samples_per_second": 1.627,
6
- "train_steps_per_second": 0.203
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 6244.167712402344,
4
+ "train_runtime": 818.3257,
5
+ "train_samples_per_second": 6.257,
6
+ "train_steps_per_second": 0.782
7
  }
trainer_state.json CHANGED
@@ -1,77 +1,449 @@
1
  {
2
- "best_metric": 6787.48388671875,
3
- "best_model_checkpoint": "./coco_outputs/checkpoint-20",
4
  "epoch": 5.0,
5
- "global_step": 20,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.0,
12
- "eval_loss": 6788.2998046875,
13
- "eval_runtime": 2.0726,
14
- "eval_samples_per_second": 15.44,
15
- "eval_steps_per_second": 1.93,
16
- "step": 4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  },
18
  {
19
  "epoch": 2.0,
20
- "eval_loss": 6787.97509765625,
21
- "eval_runtime": 2.0833,
22
- "eval_samples_per_second": 15.361,
23
- "eval_steps_per_second": 1.92,
24
- "step": 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  },
26
  {
27
  "epoch": 2.5,
28
  "learning_rate": 1e-05,
29
- "loss": 6175.932,
30
- "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  },
32
  {
33
  "epoch": 3.0,
34
- "eval_loss": 6787.7900390625,
35
- "eval_runtime": 2.1929,
36
- "eval_samples_per_second": 14.592,
37
- "eval_steps_per_second": 1.824,
38
- "step": 12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  },
40
  {
41
  "epoch": 4.0,
42
- "eval_loss": 6787.5068359375,
43
- "eval_runtime": 2.0906,
44
- "eval_samples_per_second": 15.307,
45
- "eval_steps_per_second": 1.913,
46
- "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  },
48
  {
49
  "epoch": 5.0,
50
  "learning_rate": 0.0,
51
- "loss": 6292.5953,
52
- "step": 20
53
  },
54
  {
55
  "epoch": 5.0,
56
- "eval_loss": 6787.48388671875,
57
- "eval_runtime": 2.2266,
58
- "eval_samples_per_second": 14.372,
59
- "eval_steps_per_second": 1.796,
60
- "step": 20
61
  },
62
  {
63
  "epoch": 5.0,
64
- "step": 20,
65
- "total_flos": 7.65002115072e+16,
66
- "train_loss": 6234.263671875,
67
- "train_runtime": 98.3256,
68
- "train_samples_per_second": 1.627,
69
- "train_steps_per_second": 0.203
70
  }
71
  ],
72
- "max_steps": 20,
73
  "num_train_epochs": 5,
74
- "total_flos": 7.65002115072e+16,
75
  "trial_name": null,
76
  "trial_params": null
77
  }
 
1
  {
2
+ "best_metric": 6719.021484375,
3
+ "best_model_checkpoint": "./coco_outputs/checkpoint-640",
4
  "epoch": 5.0,
5
+ "global_step": 640,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 1.96875e-05,
13
+ "loss": 6346.0008,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.16,
18
+ "learning_rate": 1.9375e-05,
19
+ "loss": 6400.0195,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.23,
24
+ "learning_rate": 1.9062500000000003e-05,
25
+ "loss": 6295.0742,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.31,
30
+ "learning_rate": 1.8750000000000002e-05,
31
+ "loss": 6280.2168,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.39,
36
+ "learning_rate": 1.84375e-05,
37
+ "loss": 6138.5656,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.47,
42
+ "learning_rate": 1.8125e-05,
43
+ "loss": 6088.3699,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.55,
48
+ "learning_rate": 1.7812500000000003e-05,
49
+ "loss": 6224.8137,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.62,
54
+ "learning_rate": 1.7500000000000002e-05,
55
+ "loss": 6147.6773,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.7,
60
+ "learning_rate": 1.71875e-05,
61
+ "loss": 6426.423,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.78,
66
+ "learning_rate": 1.6875e-05,
67
+ "loss": 6195.2883,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.86,
72
+ "learning_rate": 1.6562500000000003e-05,
73
+ "loss": 6409.0055,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 0.94,
78
+ "learning_rate": 1.6250000000000002e-05,
79
+ "loss": 6457.3832,
80
+ "step": 120
81
+ },
82
  {
83
  "epoch": 1.0,
84
+ "eval_loss": 6720.3701171875,
85
+ "eval_runtime": 16.7008,
86
+ "eval_samples_per_second": 15.329,
87
+ "eval_steps_per_second": 1.916,
88
+ "step": 128
89
+ },
90
+ {
91
+ "epoch": 1.02,
92
+ "learning_rate": 1.59375e-05,
93
+ "loss": 6196.2672,
94
+ "step": 130
95
+ },
96
+ {
97
+ "epoch": 1.09,
98
+ "learning_rate": 1.5625e-05,
99
+ "loss": 6187.7188,
100
+ "step": 140
101
+ },
102
+ {
103
+ "epoch": 1.17,
104
+ "learning_rate": 1.5312500000000003e-05,
105
+ "loss": 6357.3324,
106
+ "step": 150
107
+ },
108
+ {
109
+ "epoch": 1.25,
110
+ "learning_rate": 1.5000000000000002e-05,
111
+ "loss": 6108.043,
112
+ "step": 160
113
+ },
114
+ {
115
+ "epoch": 1.33,
116
+ "learning_rate": 1.4687500000000001e-05,
117
+ "loss": 6499.2922,
118
+ "step": 170
119
+ },
120
+ {
121
+ "epoch": 1.41,
122
+ "learning_rate": 1.4375e-05,
123
+ "loss": 6227.1672,
124
+ "step": 180
125
+ },
126
+ {
127
+ "epoch": 1.48,
128
+ "learning_rate": 1.4062500000000001e-05,
129
+ "loss": 6416.4754,
130
+ "step": 190
131
+ },
132
+ {
133
+ "epoch": 1.56,
134
+ "learning_rate": 1.375e-05,
135
+ "loss": 6024.552,
136
+ "step": 200
137
+ },
138
+ {
139
+ "epoch": 1.64,
140
+ "learning_rate": 1.3437500000000001e-05,
141
+ "loss": 5964.7684,
142
+ "step": 210
143
+ },
144
+ {
145
+ "epoch": 1.72,
146
+ "learning_rate": 1.3125e-05,
147
+ "loss": 5876.4055,
148
+ "step": 220
149
+ },
150
+ {
151
+ "epoch": 1.8,
152
+ "learning_rate": 1.2812500000000001e-05,
153
+ "loss": 6149.1523,
154
+ "step": 230
155
+ },
156
+ {
157
+ "epoch": 1.88,
158
+ "learning_rate": 1.25e-05,
159
+ "loss": 6330.3543,
160
+ "step": 240
161
+ },
162
+ {
163
+ "epoch": 1.95,
164
+ "learning_rate": 1.2187500000000001e-05,
165
+ "loss": 6234.0246,
166
+ "step": 250
167
  },
168
  {
169
  "epoch": 2.0,
170
+ "eval_loss": 6719.166015625,
171
+ "eval_runtime": 16.7578,
172
+ "eval_samples_per_second": 15.276,
173
+ "eval_steps_per_second": 1.91,
174
+ "step": 256
175
+ },
176
+ {
177
+ "epoch": 2.03,
178
+ "learning_rate": 1.1875e-05,
179
+ "loss": 6279.7289,
180
+ "step": 260
181
+ },
182
+ {
183
+ "epoch": 2.11,
184
+ "learning_rate": 1.1562500000000002e-05,
185
+ "loss": 6253.7887,
186
+ "step": 270
187
+ },
188
+ {
189
+ "epoch": 2.19,
190
+ "learning_rate": 1.125e-05,
191
+ "loss": 6329.3629,
192
+ "step": 280
193
+ },
194
+ {
195
+ "epoch": 2.27,
196
+ "learning_rate": 1.0937500000000002e-05,
197
+ "loss": 6185.052,
198
+ "step": 290
199
+ },
200
+ {
201
+ "epoch": 2.34,
202
+ "learning_rate": 1.0625e-05,
203
+ "loss": 5918.475,
204
+ "step": 300
205
+ },
206
+ {
207
+ "epoch": 2.42,
208
+ "learning_rate": 1.0312500000000002e-05,
209
+ "loss": 6114.2844,
210
+ "step": 310
211
  },
212
  {
213
  "epoch": 2.5,
214
  "learning_rate": 1e-05,
215
+ "loss": 6149.1941,
216
+ "step": 320
217
+ },
218
+ {
219
+ "epoch": 2.58,
220
+ "learning_rate": 9.6875e-06,
221
+ "loss": 6154.8313,
222
+ "step": 330
223
+ },
224
+ {
225
+ "epoch": 2.66,
226
+ "learning_rate": 9.375000000000001e-06,
227
+ "loss": 6146.2449,
228
+ "step": 340
229
+ },
230
+ {
231
+ "epoch": 2.73,
232
+ "learning_rate": 9.0625e-06,
233
+ "loss": 6099.1227,
234
+ "step": 350
235
+ },
236
+ {
237
+ "epoch": 2.81,
238
+ "learning_rate": 8.750000000000001e-06,
239
+ "loss": 6083.9359,
240
+ "step": 360
241
+ },
242
+ {
243
+ "epoch": 2.89,
244
+ "learning_rate": 8.4375e-06,
245
+ "loss": 6283.482,
246
+ "step": 370
247
+ },
248
+ {
249
+ "epoch": 2.97,
250
+ "learning_rate": 8.125000000000001e-06,
251
+ "loss": 6335.5293,
252
+ "step": 380
253
  },
254
  {
255
  "epoch": 3.0,
256
+ "eval_loss": 6719.03076171875,
257
+ "eval_runtime": 16.7226,
258
+ "eval_samples_per_second": 15.309,
259
+ "eval_steps_per_second": 1.914,
260
+ "step": 384
261
+ },
262
+ {
263
+ "epoch": 3.05,
264
+ "learning_rate": 7.8125e-06,
265
+ "loss": 6466.4867,
266
+ "step": 390
267
+ },
268
+ {
269
+ "epoch": 3.12,
270
+ "learning_rate": 7.500000000000001e-06,
271
+ "loss": 6327.5625,
272
+ "step": 400
273
+ },
274
+ {
275
+ "epoch": 3.2,
276
+ "learning_rate": 7.1875e-06,
277
+ "loss": 6213.9516,
278
+ "step": 410
279
+ },
280
+ {
281
+ "epoch": 3.28,
282
+ "learning_rate": 6.875e-06,
283
+ "loss": 6337.8066,
284
+ "step": 420
285
+ },
286
+ {
287
+ "epoch": 3.36,
288
+ "learning_rate": 6.5625e-06,
289
+ "loss": 6188.782,
290
+ "step": 430
291
+ },
292
+ {
293
+ "epoch": 3.44,
294
+ "learning_rate": 6.25e-06,
295
+ "loss": 6432.0715,
296
+ "step": 440
297
+ },
298
+ {
299
+ "epoch": 3.52,
300
+ "learning_rate": 5.9375e-06,
301
+ "loss": 6178.6059,
302
+ "step": 450
303
+ },
304
+ {
305
+ "epoch": 3.59,
306
+ "learning_rate": 5.625e-06,
307
+ "loss": 6356.5617,
308
+ "step": 460
309
+ },
310
+ {
311
+ "epoch": 3.67,
312
+ "learning_rate": 5.3125e-06,
313
+ "loss": 6572.6281,
314
+ "step": 470
315
+ },
316
+ {
317
+ "epoch": 3.75,
318
+ "learning_rate": 5e-06,
319
+ "loss": 6376.5836,
320
+ "step": 480
321
+ },
322
+ {
323
+ "epoch": 3.83,
324
+ "learning_rate": 4.6875000000000004e-06,
325
+ "loss": 6186.7969,
326
+ "step": 490
327
+ },
328
+ {
329
+ "epoch": 3.91,
330
+ "learning_rate": 4.3750000000000005e-06,
331
+ "loss": 6402.0125,
332
+ "step": 500
333
+ },
334
+ {
335
+ "epoch": 3.98,
336
+ "learning_rate": 4.0625000000000005e-06,
337
+ "loss": 6520.8047,
338
+ "step": 510
339
  },
340
  {
341
  "epoch": 4.0,
342
+ "eval_loss": 6719.060546875,
343
+ "eval_runtime": 16.8687,
344
+ "eval_samples_per_second": 15.176,
345
+ "eval_steps_per_second": 1.897,
346
+ "step": 512
347
+ },
348
+ {
349
+ "epoch": 4.06,
350
+ "learning_rate": 3.7500000000000005e-06,
351
+ "loss": 6016.2836,
352
+ "step": 520
353
+ },
354
+ {
355
+ "epoch": 4.14,
356
+ "learning_rate": 3.4375e-06,
357
+ "loss": 5934.718,
358
+ "step": 530
359
+ },
360
+ {
361
+ "epoch": 4.22,
362
+ "learning_rate": 3.125e-06,
363
+ "loss": 6232.1816,
364
+ "step": 540
365
+ },
366
+ {
367
+ "epoch": 4.3,
368
+ "learning_rate": 2.8125e-06,
369
+ "loss": 6584.9633,
370
+ "step": 550
371
+ },
372
+ {
373
+ "epoch": 4.38,
374
+ "learning_rate": 2.5e-06,
375
+ "loss": 6533.6297,
376
+ "step": 560
377
+ },
378
+ {
379
+ "epoch": 4.45,
380
+ "learning_rate": 2.1875000000000002e-06,
381
+ "loss": 6380.1676,
382
+ "step": 570
383
+ },
384
+ {
385
+ "epoch": 4.53,
386
+ "learning_rate": 1.8750000000000003e-06,
387
+ "loss": 6350.5738,
388
+ "step": 580
389
+ },
390
+ {
391
+ "epoch": 4.61,
392
+ "learning_rate": 1.5625e-06,
393
+ "loss": 5988.5664,
394
+ "step": 590
395
+ },
396
+ {
397
+ "epoch": 4.69,
398
+ "learning_rate": 1.25e-06,
399
+ "loss": 6169.0441,
400
+ "step": 600
401
+ },
402
+ {
403
+ "epoch": 4.77,
404
+ "learning_rate": 9.375000000000001e-07,
405
+ "loss": 6024.5156,
406
+ "step": 610
407
+ },
408
+ {
409
+ "epoch": 4.84,
410
+ "learning_rate": 6.25e-07,
411
+ "loss": 6515.2469,
412
+ "step": 620
413
+ },
414
+ {
415
+ "epoch": 4.92,
416
+ "learning_rate": 3.125e-07,
417
+ "loss": 6284.4926,
418
+ "step": 630
419
  },
420
  {
421
  "epoch": 5.0,
422
  "learning_rate": 0.0,
423
+ "loss": 5738.273,
424
+ "step": 640
425
  },
426
  {
427
  "epoch": 5.0,
428
+ "eval_loss": 6719.021484375,
429
+ "eval_runtime": 17.1732,
430
+ "eval_samples_per_second": 14.907,
431
+ "eval_steps_per_second": 1.863,
432
+ "step": 640
433
  },
434
  {
435
  "epoch": 5.0,
436
+ "step": 640,
437
+ "total_flos": 2.4480067682304e+18,
438
+ "train_loss": 6244.167712402344,
439
+ "train_runtime": 818.3257,
440
+ "train_samples_per_second": 6.257,
441
+ "train_steps_per_second": 0.782
442
  }
443
  ],
444
+ "max_steps": 640,
445
  "num_train_epochs": 5,
446
+ "total_flos": 2.4480067682304e+18,
447
  "trial_name": null,
448
  "trial_params": null
449
  }