c14kevincardenas commited on
Commit
d933eb3
·
verified ·
1 Parent(s): 9570d6d

End of training

Browse files
README.md CHANGED
@@ -3,6 +3,9 @@ library_name: transformers
3
  license: apache-2.0
4
  base_model: c14kevincardenas/beit-large-patch16-384-limb
5
  tags:
 
 
 
6
  - generated_from_trainer
7
  model-index:
8
  - name: limbxy_pose_2heads_1layers_8embeddim
@@ -14,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # limbxy_pose_2heads_1layers_8embeddim
16
 
17
- This model is a fine-tuned version of [c14kevincardenas/beit-large-patch16-384-limb](https://huggingface.co/c14kevincardenas/beit-large-patch16-384-limb) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
  - Loss: 0.0077
20
  - Rmse: 0.0876
 
3
  license: apache-2.0
4
  base_model: c14kevincardenas/beit-large-patch16-384-limb
5
  tags:
6
+ - image-regression
7
+ - human-movement
8
+ - vision
9
  - generated_from_trainer
10
  model-index:
11
  - name: limbxy_pose_2heads_1layers_8embeddim
 
17
 
18
  # limbxy_pose_2heads_1layers_8embeddim
19
 
20
+ This model is a fine-tuned version of [c14kevincardenas/beit-large-patch16-384-limb](https://huggingface.co/c14kevincardenas/beit-large-patch16-384-limb) on the c14kevincardenas/beta_caller_284_limbxy_pose dataset.
21
  It achieves the following results on the evaluation set:
22
  - Loss: 0.0077
23
  - Rmse: 0.0876
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.1402168720960617,
4
- "eval_rmse": 0.37445545196533203,
5
- "eval_runtime": 9.652,
6
- "eval_samples_per_second": 103.605,
7
- "eval_steps_per_second": 1.658,
8
  "total_flos": 0.0,
9
- "train_loss": 0.16565035152971075,
10
- "train_runtime": 3332.9842,
11
- "train_samples_per_second": 33.976,
12
- "train_steps_per_second": 0.534
13
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 0.007665493991225958,
4
+ "eval_rmse": 0.08755281567573547,
5
+ "eval_runtime": 9.8779,
6
+ "eval_samples_per_second": 101.236,
7
+ "eval_steps_per_second": 1.62,
8
  "total_flos": 0.0,
9
+ "train_loss": 0.10928233507602067,
10
+ "train_runtime": 3299.6169,
11
+ "train_samples_per_second": 34.319,
12
+ "train_steps_per_second": 0.539
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_loss": 0.1402168720960617,
4
- "eval_rmse": 0.37445545196533203,
5
- "eval_runtime": 9.652,
6
- "eval_samples_per_second": 103.605,
7
- "eval_steps_per_second": 1.658
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_loss": 0.007665493991225958,
4
+ "eval_rmse": 0.08755281567573547,
5
+ "eval_runtime": 9.8779,
6
+ "eval_samples_per_second": 101.236,
7
+ "eval_steps_per_second": 1.62
8
  }
runs/Feb19_16-14-54_galactica.ad.cirange.net/events.out.tfevents.1739985392.galactica.ad.cirange.net.2991274.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b8477a11731996ee379c0788791671e5683f7e7ef5c81c72beb57a45b87300
3
+ size 407
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.16565035152971075,
5
- "train_runtime": 3332.9842,
6
- "train_samples_per_second": 33.976,
7
- "train_steps_per_second": 0.534
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.10928233507602067,
5
+ "train_runtime": 3299.6169,
6
+ "train_samples_per_second": 34.319,
7
+ "train_steps_per_second": 0.539
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.1402168720960617,
3
  "best_model_checkpoint": "limbxy_pose/checkpoint-1780",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
@@ -10,689 +10,689 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.2808988764044944,
13
- "grad_norm": 1.4306482431017398e-12,
14
  "learning_rate": 5e-06,
15
- "loss": 0.3282,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.5617977528089888,
20
- "grad_norm": 7.286453206209118e-14,
21
  "learning_rate": 1e-05,
22
- "loss": 0.3304,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.8426966292134831,
27
- "grad_norm": 1.7020333059614146e-11,
28
  "learning_rate": 1.5e-05,
29
- "loss": 0.3414,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 1.0,
34
- "eval_loss": 0.33112141489982605,
35
- "eval_rmse": 0.5754314661026001,
36
- "eval_runtime": 9.461,
37
- "eval_samples_per_second": 105.697,
38
- "eval_steps_per_second": 1.691,
39
  "step": 89
40
  },
41
  {
42
  "epoch": 1.1235955056179776,
43
- "grad_norm": 4.0378258462508976e-13,
44
  "learning_rate": 2e-05,
45
- "loss": 0.3408,
46
  "step": 100
47
  },
48
  {
49
  "epoch": 1.404494382022472,
50
- "grad_norm": 38181.53515625,
51
  "learning_rate": 2.5e-05,
52
- "loss": 0.2674,
53
  "step": 125
54
  },
55
  {
56
  "epoch": 1.6853932584269664,
57
- "grad_norm": 1195767.75,
58
  "learning_rate": 3e-05,
59
- "loss": 0.1862,
60
  "step": 150
61
  },
62
  {
63
  "epoch": 1.9662921348314608,
64
- "grad_norm": 4560497.0,
65
  "learning_rate": 3.5e-05,
66
- "loss": 0.1834,
67
  "step": 175
68
  },
69
  {
70
  "epoch": 2.0,
71
- "eval_loss": 0.2025543600320816,
72
- "eval_rmse": 0.4500603973865509,
73
- "eval_runtime": 9.155,
74
- "eval_samples_per_second": 109.23,
75
- "eval_steps_per_second": 1.748,
76
  "step": 178
77
  },
78
  {
79
  "epoch": 2.247191011235955,
80
- "grad_norm": 2378441.75,
81
  "learning_rate": 4e-05,
82
- "loss": 0.1947,
83
  "step": 200
84
  },
85
  {
86
  "epoch": 2.5280898876404496,
87
- "grad_norm": 1214816.375,
88
  "learning_rate": 4.5e-05,
89
- "loss": 0.1846,
90
  "step": 225
91
  },
92
  {
93
  "epoch": 2.808988764044944,
94
- "grad_norm": 596520.4375,
95
  "learning_rate": 5e-05,
96
- "loss": 0.1645,
97
  "step": 250
98
  },
99
  {
100
  "epoch": 3.0,
101
- "eval_loss": 0.14911922812461853,
102
- "eval_rmse": 0.38615959882736206,
103
- "eval_runtime": 9.1063,
104
- "eval_samples_per_second": 109.815,
105
- "eval_steps_per_second": 1.757,
106
  "step": 267
107
  },
108
  {
109
  "epoch": 3.0898876404494384,
110
- "grad_norm": 789416.375,
111
  "learning_rate": 4.918300653594771e-05,
112
- "loss": 0.1692,
113
  "step": 275
114
  },
115
  {
116
  "epoch": 3.370786516853933,
117
- "grad_norm": 3581939.75,
118
  "learning_rate": 4.8366013071895424e-05,
119
- "loss": 0.1954,
120
  "step": 300
121
  },
122
  {
123
  "epoch": 3.6516853932584272,
124
- "grad_norm": 2062457.125,
125
  "learning_rate": 4.7549019607843135e-05,
126
- "loss": 0.1924,
127
  "step": 325
128
  },
129
  {
130
  "epoch": 3.932584269662921,
131
- "grad_norm": 2538814.0,
132
  "learning_rate": 4.673202614379085e-05,
133
- "loss": 0.1729,
134
  "step": 350
135
  },
136
  {
137
  "epoch": 4.0,
138
- "eval_loss": 0.2542487680912018,
139
- "eval_rmse": 0.5042308568954468,
140
- "eval_runtime": 9.4787,
141
- "eval_samples_per_second": 105.5,
142
- "eval_steps_per_second": 1.688,
143
  "step": 356
144
  },
145
  {
146
  "epoch": 4.213483146067416,
147
- "grad_norm": 5207696.0,
148
  "learning_rate": 4.5915032679738564e-05,
149
- "loss": 0.1845,
150
  "step": 375
151
  },
152
  {
153
  "epoch": 4.49438202247191,
154
- "grad_norm": 2629999.75,
155
  "learning_rate": 4.5098039215686275e-05,
156
- "loss": 0.1854,
157
  "step": 400
158
  },
159
  {
160
  "epoch": 4.775280898876405,
161
- "grad_norm": 764233.0625,
162
  "learning_rate": 4.4281045751633986e-05,
163
- "loss": 0.1612,
164
  "step": 425
165
  },
166
  {
167
  "epoch": 5.0,
168
- "eval_loss": 0.15393178164958954,
169
- "eval_rmse": 0.39234140515327454,
170
- "eval_runtime": 9.1662,
171
- "eval_samples_per_second": 109.097,
172
- "eval_steps_per_second": 1.746,
173
  "step": 445
174
  },
175
  {
176
  "epoch": 5.056179775280899,
177
- "grad_norm": 5359111.0,
178
  "learning_rate": 4.3464052287581704e-05,
179
- "loss": 0.1679,
180
  "step": 450
181
  },
182
  {
183
  "epoch": 5.337078651685394,
184
- "grad_norm": 5016195.0,
185
  "learning_rate": 4.2647058823529415e-05,
186
- "loss": 0.1658,
187
  "step": 475
188
  },
189
  {
190
  "epoch": 5.617977528089888,
191
- "grad_norm": 1101185.75,
192
  "learning_rate": 4.1830065359477126e-05,
193
- "loss": 0.1567,
194
  "step": 500
195
  },
196
  {
197
  "epoch": 5.898876404494382,
198
- "grad_norm": 2115526.0,
199
  "learning_rate": 4.101307189542484e-05,
200
- "loss": 0.1578,
201
  "step": 525
202
  },
203
  {
204
  "epoch": 6.0,
205
- "eval_loss": 0.14643920958042145,
206
- "eval_rmse": 0.38267379999160767,
207
- "eval_runtime": 9.2072,
208
- "eval_samples_per_second": 108.61,
209
- "eval_steps_per_second": 1.738,
210
  "step": 534
211
  },
212
  {
213
  "epoch": 6.179775280898877,
214
- "grad_norm": 1441159.25,
215
  "learning_rate": 4.0196078431372555e-05,
216
- "loss": 0.1535,
217
  "step": 550
218
  },
219
  {
220
  "epoch": 6.460674157303371,
221
- "grad_norm": 1516853.375,
222
  "learning_rate": 3.9379084967320266e-05,
223
- "loss": 0.164,
224
  "step": 575
225
  },
226
  {
227
  "epoch": 6.741573033707866,
228
- "grad_norm": 668387.4375,
229
  "learning_rate": 3.8562091503267977e-05,
230
- "loss": 0.1594,
231
  "step": 600
232
  },
233
  {
234
  "epoch": 7.0,
235
- "eval_loss": 0.18601085245609283,
236
- "eval_rmse": 0.43128976225852966,
237
- "eval_runtime": 9.5674,
238
- "eval_samples_per_second": 104.522,
239
- "eval_steps_per_second": 1.672,
240
  "step": 623
241
  },
242
  {
243
  "epoch": 7.022471910112359,
244
- "grad_norm": 3315089.5,
245
  "learning_rate": 3.774509803921569e-05,
246
- "loss": 0.1673,
247
  "step": 625
248
  },
249
  {
250
  "epoch": 7.303370786516854,
251
- "grad_norm": 178060.9375,
252
  "learning_rate": 3.6928104575163405e-05,
253
- "loss": 0.1514,
254
  "step": 650
255
  },
256
  {
257
  "epoch": 7.584269662921348,
258
- "grad_norm": 924463.8125,
259
  "learning_rate": 3.611111111111111e-05,
260
- "loss": 0.1454,
261
  "step": 675
262
  },
263
  {
264
  "epoch": 7.865168539325842,
265
- "grad_norm": 44355.859375,
266
  "learning_rate": 3.529411764705883e-05,
267
- "loss": 0.1546,
268
  "step": 700
269
  },
270
  {
271
  "epoch": 8.0,
272
- "eval_loss": 0.14328204095363617,
273
- "eval_rmse": 0.37852615118026733,
274
- "eval_runtime": 9.3971,
275
- "eval_samples_per_second": 106.416,
276
  "eval_steps_per_second": 1.703,
277
  "step": 712
278
  },
279
  {
280
  "epoch": 8.146067415730338,
281
- "grad_norm": 2000040.75,
282
  "learning_rate": 3.447712418300654e-05,
283
- "loss": 0.1503,
284
  "step": 725
285
  },
286
  {
287
  "epoch": 8.426966292134832,
288
- "grad_norm": 1668223.5,
289
  "learning_rate": 3.366013071895425e-05,
290
- "loss": 0.1469,
291
  "step": 750
292
  },
293
  {
294
  "epoch": 8.707865168539326,
295
- "grad_norm": 969523.8125,
296
  "learning_rate": 3.284313725490196e-05,
297
- "loss": 0.1525,
298
  "step": 775
299
  },
300
  {
301
  "epoch": 8.98876404494382,
302
- "grad_norm": 399384.6875,
303
  "learning_rate": 3.202614379084967e-05,
304
- "loss": 0.1517,
305
  "step": 800
306
  },
307
  {
308
  "epoch": 9.0,
309
- "eval_loss": 0.14162829518318176,
310
- "eval_rmse": 0.37633535265922546,
311
- "eval_runtime": 9.2193,
312
- "eval_samples_per_second": 108.468,
313
- "eval_steps_per_second": 1.735,
314
  "step": 801
315
  },
316
  {
317
  "epoch": 9.269662921348315,
318
- "grad_norm": 1699236.625,
319
  "learning_rate": 3.120915032679739e-05,
320
- "loss": 0.1475,
321
  "step": 825
322
  },
323
  {
324
  "epoch": 9.55056179775281,
325
- "grad_norm": 200852.296875,
326
  "learning_rate": 3.0392156862745097e-05,
327
- "loss": 0.1481,
328
  "step": 850
329
  },
330
  {
331
  "epoch": 9.831460674157304,
332
- "grad_norm": 837677.625,
333
  "learning_rate": 2.957516339869281e-05,
334
- "loss": 0.1461,
335
  "step": 875
336
  },
337
  {
338
  "epoch": 10.0,
339
- "eval_loss": 0.15756502747535706,
340
- "eval_rmse": 0.3969446122646332,
341
- "eval_runtime": 9.8254,
342
- "eval_samples_per_second": 101.777,
343
- "eval_steps_per_second": 1.628,
344
  "step": 890
345
  },
346
  {
347
  "epoch": 10.112359550561798,
348
- "grad_norm": 1678043.0,
349
  "learning_rate": 2.8758169934640522e-05,
350
- "loss": 0.1571,
351
  "step": 900
352
  },
353
  {
354
  "epoch": 10.393258426966293,
355
- "grad_norm": 417715.75,
356
  "learning_rate": 2.7941176470588236e-05,
357
- "loss": 0.1508,
358
  "step": 925
359
  },
360
  {
361
  "epoch": 10.674157303370787,
362
- "grad_norm": 296561.8125,
363
  "learning_rate": 2.7124183006535947e-05,
364
- "loss": 0.1456,
365
  "step": 950
366
  },
367
  {
368
  "epoch": 10.955056179775282,
369
- "grad_norm": 1708742.5,
370
  "learning_rate": 2.630718954248366e-05,
371
- "loss": 0.1519,
372
  "step": 975
373
  },
374
  {
375
  "epoch": 11.0,
376
- "eval_loss": 0.1622958481311798,
377
- "eval_rmse": 0.4028595983982086,
378
- "eval_runtime": 9.6192,
379
- "eval_samples_per_second": 103.958,
380
- "eval_steps_per_second": 1.663,
381
  "step": 979
382
  },
383
  {
384
  "epoch": 11.235955056179776,
385
- "grad_norm": 923283.0,
386
  "learning_rate": 2.5490196078431373e-05,
387
- "loss": 0.1527,
388
  "step": 1000
389
  },
390
  {
391
  "epoch": 11.51685393258427,
392
- "grad_norm": 78371.8671875,
393
  "learning_rate": 2.4673202614379087e-05,
394
- "loss": 0.1499,
395
  "step": 1025
396
  },
397
  {
398
  "epoch": 11.797752808988765,
399
- "grad_norm": 1358486.375,
400
  "learning_rate": 2.38562091503268e-05,
401
- "loss": 0.1491,
402
  "step": 1050
403
  },
404
  {
405
  "epoch": 12.0,
406
- "eval_loss": 0.14110486209392548,
407
- "eval_rmse": 0.37563925981521606,
408
- "eval_runtime": 9.459,
409
- "eval_samples_per_second": 105.719,
410
- "eval_steps_per_second": 1.692,
411
  "step": 1068
412
  },
413
  {
414
  "epoch": 12.07865168539326,
415
- "grad_norm": 677287.375,
416
  "learning_rate": 2.303921568627451e-05,
417
- "loss": 0.1458,
418
  "step": 1075
419
  },
420
  {
421
  "epoch": 12.359550561797754,
422
- "grad_norm": 1671154.75,
423
  "learning_rate": 2.2222222222222223e-05,
424
- "loss": 0.1446,
425
  "step": 1100
426
  },
427
  {
428
  "epoch": 12.640449438202246,
429
- "grad_norm": 1566995.875,
430
  "learning_rate": 2.1405228758169934e-05,
431
- "loss": 0.1449,
432
  "step": 1125
433
  },
434
  {
435
  "epoch": 12.921348314606742,
436
- "grad_norm": 457053.46875,
437
  "learning_rate": 2.058823529411765e-05,
438
- "loss": 0.1489,
439
  "step": 1150
440
  },
441
  {
442
  "epoch": 13.0,
443
- "eval_loss": 0.1416008174419403,
444
- "eval_rmse": 0.3762988746166229,
445
- "eval_runtime": 9.5408,
446
- "eval_samples_per_second": 104.813,
447
- "eval_steps_per_second": 1.677,
448
  "step": 1157
449
  },
450
  {
451
  "epoch": 13.202247191011235,
452
- "grad_norm": 1004067.375,
453
  "learning_rate": 1.977124183006536e-05,
454
- "loss": 0.1459,
455
  "step": 1175
456
  },
457
  {
458
  "epoch": 13.48314606741573,
459
- "grad_norm": 2169427.0,
460
  "learning_rate": 1.895424836601307e-05,
461
- "loss": 0.1453,
462
  "step": 1200
463
  },
464
  {
465
  "epoch": 13.764044943820224,
466
- "grad_norm": 408455.125,
467
  "learning_rate": 1.8137254901960785e-05,
468
- "loss": 0.1425,
469
  "step": 1225
470
  },
471
  {
472
  "epoch": 14.0,
473
- "eval_loss": 0.14256992936134338,
474
- "eval_rmse": 0.3775843381881714,
475
- "eval_runtime": 9.5439,
476
- "eval_samples_per_second": 104.779,
477
- "eval_steps_per_second": 1.676,
478
  "step": 1246
479
  },
480
  {
481
  "epoch": 14.044943820224718,
482
- "grad_norm": 582970.4375,
483
  "learning_rate": 1.7320261437908496e-05,
484
- "loss": 0.147,
485
  "step": 1250
486
  },
487
  {
488
  "epoch": 14.325842696629213,
489
- "grad_norm": 524589.5625,
490
  "learning_rate": 1.650326797385621e-05,
491
- "loss": 0.1424,
492
  "step": 1275
493
  },
494
  {
495
  "epoch": 14.606741573033707,
496
- "grad_norm": 1342928.875,
497
  "learning_rate": 1.568627450980392e-05,
498
- "loss": 0.1433,
499
  "step": 1300
500
  },
501
  {
502
  "epoch": 14.887640449438202,
503
- "grad_norm": 1165010.25,
504
  "learning_rate": 1.4869281045751634e-05,
505
- "loss": 0.145,
506
  "step": 1325
507
  },
508
  {
509
  "epoch": 15.0,
510
- "eval_loss": 0.14070571959018707,
511
- "eval_rmse": 0.37510761618614197,
512
- "eval_runtime": 9.3011,
513
- "eval_samples_per_second": 107.514,
514
- "eval_steps_per_second": 1.72,
515
  "step": 1335
516
  },
517
  {
518
  "epoch": 15.168539325842696,
519
- "grad_norm": 1536645.125,
520
  "learning_rate": 1.4052287581699347e-05,
521
- "loss": 0.1397,
522
  "step": 1350
523
  },
524
  {
525
  "epoch": 15.44943820224719,
526
- "grad_norm": 121041.3984375,
527
  "learning_rate": 1.323529411764706e-05,
528
- "loss": 0.145,
529
  "step": 1375
530
  },
531
  {
532
  "epoch": 15.730337078651685,
533
- "grad_norm": 850638.375,
534
  "learning_rate": 1.2418300653594772e-05,
535
- "loss": 0.1418,
536
  "step": 1400
537
  },
538
  {
539
  "epoch": 16.0,
540
- "eval_loss": 0.144333153963089,
541
- "eval_rmse": 0.37991204857826233,
542
- "eval_runtime": 9.7712,
543
- "eval_samples_per_second": 102.342,
544
- "eval_steps_per_second": 1.637,
545
  "step": 1424
546
  },
547
  {
548
  "epoch": 16.01123595505618,
549
- "grad_norm": 1658046.0,
550
  "learning_rate": 1.1601307189542485e-05,
551
- "loss": 0.1432,
552
  "step": 1425
553
  },
554
  {
555
  "epoch": 16.292134831460675,
556
- "grad_norm": 291296.34375,
557
  "learning_rate": 1.0784313725490197e-05,
558
- "loss": 0.1422,
559
  "step": 1450
560
  },
561
  {
562
  "epoch": 16.573033707865168,
563
- "grad_norm": 71275.6953125,
564
  "learning_rate": 9.96732026143791e-06,
565
- "loss": 0.1429,
566
  "step": 1475
567
  },
568
  {
569
  "epoch": 16.853932584269664,
570
- "grad_norm": 717235.9375,
571
  "learning_rate": 9.150326797385621e-06,
572
- "loss": 0.1411,
573
  "step": 1500
574
  },
575
  {
576
  "epoch": 17.0,
577
- "eval_loss": 0.1402604728937149,
578
- "eval_rmse": 0.3745136559009552,
579
- "eval_runtime": 9.4156,
580
- "eval_samples_per_second": 106.207,
581
- "eval_steps_per_second": 1.699,
582
  "step": 1513
583
  },
584
  {
585
  "epoch": 17.134831460674157,
586
- "grad_norm": 956427.5625,
587
  "learning_rate": 8.333333333333334e-06,
588
- "loss": 0.1424,
589
  "step": 1525
590
  },
591
  {
592
  "epoch": 17.415730337078653,
593
- "grad_norm": 391686.90625,
594
  "learning_rate": 7.5163398692810456e-06,
595
- "loss": 0.1434,
596
  "step": 1550
597
  },
598
  {
599
  "epoch": 17.696629213483146,
600
- "grad_norm": 178194.640625,
601
  "learning_rate": 6.699346405228758e-06,
602
- "loss": 0.1396,
603
  "step": 1575
604
  },
605
  {
606
  "epoch": 17.97752808988764,
607
- "grad_norm": 315803.8125,
608
  "learning_rate": 5.882352941176471e-06,
609
- "loss": 0.1398,
610
  "step": 1600
611
  },
612
  {
613
  "epoch": 18.0,
614
- "eval_loss": 0.14033755660057068,
615
- "eval_rmse": 0.37461650371551514,
616
- "eval_runtime": 9.5116,
617
- "eval_samples_per_second": 105.135,
618
- "eval_steps_per_second": 1.682,
619
  "step": 1602
620
  },
621
  {
622
  "epoch": 18.258426966292134,
623
- "grad_norm": 201845.0,
624
  "learning_rate": 5.065359477124184e-06,
625
- "loss": 0.1396,
626
  "step": 1625
627
  },
628
  {
629
  "epoch": 18.53932584269663,
630
- "grad_norm": 346229.6875,
631
  "learning_rate": 4.2483660130718954e-06,
632
- "loss": 0.1386,
633
  "step": 1650
634
  },
635
  {
636
  "epoch": 18.820224719101123,
637
- "grad_norm": 75540.28125,
638
  "learning_rate": 3.431372549019608e-06,
639
- "loss": 0.143,
640
  "step": 1675
641
  },
642
  {
643
  "epoch": 19.0,
644
- "eval_loss": 0.14052481949329376,
645
- "eval_rmse": 0.37486639618873596,
646
- "eval_runtime": 9.3754,
647
- "eval_samples_per_second": 106.662,
648
- "eval_steps_per_second": 1.707,
649
  "step": 1691
650
  },
651
  {
652
  "epoch": 19.10112359550562,
653
- "grad_norm": 240692.8125,
654
  "learning_rate": 2.6143790849673204e-06,
655
- "loss": 0.1409,
656
  "step": 1700
657
  },
658
  {
659
  "epoch": 19.382022471910112,
660
- "grad_norm": 214180.84375,
661
  "learning_rate": 1.7973856209150326e-06,
662
- "loss": 0.141,
663
  "step": 1725
664
  },
665
  {
666
  "epoch": 19.662921348314608,
667
- "grad_norm": 108064.1484375,
668
  "learning_rate": 9.80392156862745e-07,
669
- "loss": 0.1407,
670
  "step": 1750
671
  },
672
  {
673
  "epoch": 19.9438202247191,
674
- "grad_norm": 161086.03125,
675
  "learning_rate": 1.6339869281045752e-07,
676
- "loss": 0.1395,
677
  "step": 1775
678
  },
679
  {
680
  "epoch": 20.0,
681
- "eval_loss": 0.1402168720960617,
682
- "eval_rmse": 0.37445545196533203,
683
- "eval_runtime": 9.6855,
684
- "eval_samples_per_second": 103.247,
685
- "eval_steps_per_second": 1.652,
686
  "step": 1780
687
  },
688
  {
689
  "epoch": 20.0,
690
  "step": 1780,
691
  "total_flos": 0.0,
692
- "train_loss": 0.16565035152971075,
693
- "train_runtime": 3332.9842,
694
- "train_samples_per_second": 33.976,
695
- "train_steps_per_second": 0.534
696
  }
697
  ],
698
  "logging_steps": 25,
 
1
  {
2
+ "best_metric": 0.007665493991225958,
3
  "best_model_checkpoint": "limbxy_pose/checkpoint-1780",
4
  "epoch": 20.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.2808988764044944,
13
+ "grad_norm": 2355601.5,
14
  "learning_rate": 5e-06,
15
+ "loss": 0.3253,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.5617977528089888,
20
+ "grad_norm": 873035.5,
21
  "learning_rate": 1e-05,
22
+ "loss": 0.2139,
23
  "step": 50
24
  },
25
  {
26
  "epoch": 0.8426966292134831,
27
+ "grad_norm": 1139656.125,
28
  "learning_rate": 1.5e-05,
29
+ "loss": 0.174,
30
  "step": 75
31
  },
32
  {
33
  "epoch": 1.0,
34
+ "eval_loss": 0.14710678160190582,
35
+ "eval_rmse": 0.38354501128196716,
36
+ "eval_runtime": 9.3728,
37
+ "eval_samples_per_second": 106.692,
38
+ "eval_steps_per_second": 1.707,
39
  "step": 89
40
  },
41
  {
42
  "epoch": 1.1235955056179776,
43
+ "grad_norm": 959863.5625,
44
  "learning_rate": 2e-05,
45
+ "loss": 0.1598,
46
  "step": 100
47
  },
48
  {
49
  "epoch": 1.404494382022472,
50
+ "grad_norm": 368922.84375,
51
  "learning_rate": 2.5e-05,
52
+ "loss": 0.1569,
53
  "step": 125
54
  },
55
  {
56
  "epoch": 1.6853932584269664,
57
+ "grad_norm": 1642762.375,
58
  "learning_rate": 3e-05,
59
+ "loss": 0.1584,
60
  "step": 150
61
  },
62
  {
63
  "epoch": 1.9662921348314608,
64
+ "grad_norm": 2866672.75,
65
  "learning_rate": 3.5e-05,
66
+ "loss": 0.1559,
67
  "step": 175
68
  },
69
  {
70
  "epoch": 2.0,
71
+ "eval_loss": 0.14660561084747314,
72
+ "eval_rmse": 0.38289114832878113,
73
+ "eval_runtime": 9.5918,
74
+ "eval_samples_per_second": 104.256,
75
+ "eval_steps_per_second": 1.668,
76
  "step": 178
77
  },
78
  {
79
  "epoch": 2.247191011235955,
80
+ "grad_norm": 140559.03125,
81
  "learning_rate": 4e-05,
82
+ "loss": 0.1467,
83
  "step": 200
84
  },
85
  {
86
  "epoch": 2.5280898876404496,
87
+ "grad_norm": 2080408.125,
88
  "learning_rate": 4.5e-05,
89
+ "loss": 0.1534,
90
  "step": 225
91
  },
92
  {
93
  "epoch": 2.808988764044944,
94
+ "grad_norm": 2639697.5,
95
  "learning_rate": 5e-05,
96
+ "loss": 0.1485,
97
  "step": 250
98
  },
99
  {
100
  "epoch": 3.0,
101
+ "eval_loss": 0.16608364880084991,
102
+ "eval_rmse": 0.4075336158275604,
103
+ "eval_runtime": 9.3878,
104
+ "eval_samples_per_second": 106.521,
105
+ "eval_steps_per_second": 1.704,
106
  "step": 267
107
  },
108
  {
109
  "epoch": 3.0898876404494384,
110
+ "grad_norm": 1161722.875,
111
  "learning_rate": 4.918300653594771e-05,
112
+ "loss": 0.1613,
113
  "step": 275
114
  },
115
  {
116
  "epoch": 3.370786516853933,
117
+ "grad_norm": 3452693.5,
118
  "learning_rate": 4.8366013071895424e-05,
119
+ "loss": 0.1791,
120
  "step": 300
121
  },
122
  {
123
  "epoch": 3.6516853932584272,
124
+ "grad_norm": 1499728.0,
125
  "learning_rate": 4.7549019607843135e-05,
126
+ "loss": 0.1599,
127
  "step": 325
128
  },
129
  {
130
  "epoch": 3.932584269662921,
131
+ "grad_norm": 2438932.5,
132
  "learning_rate": 4.673202614379085e-05,
133
+ "loss": 0.1624,
134
  "step": 350
135
  },
136
  {
137
  "epoch": 4.0,
138
+ "eval_loss": 0.14177033305168152,
139
+ "eval_rmse": 0.3765240013599396,
140
+ "eval_runtime": 9.4578,
141
+ "eval_samples_per_second": 105.733,
142
+ "eval_steps_per_second": 1.692,
143
  "step": 356
144
  },
145
  {
146
  "epoch": 4.213483146067416,
147
+ "grad_norm": 1148349.625,
148
  "learning_rate": 4.5915032679738564e-05,
149
+ "loss": 0.1568,
150
  "step": 375
151
  },
152
  {
153
  "epoch": 4.49438202247191,
154
+ "grad_norm": 2919198.5,
155
  "learning_rate": 4.5098039215686275e-05,
156
+ "loss": 0.1513,
157
  "step": 400
158
  },
159
  {
160
  "epoch": 4.775280898876405,
161
+ "grad_norm": 1855904.125,
162
  "learning_rate": 4.4281045751633986e-05,
163
+ "loss": 0.1457,
164
  "step": 425
165
  },
166
  {
167
  "epoch": 5.0,
168
+ "eval_loss": 0.14367185533046722,
169
+ "eval_rmse": 0.3790406882762909,
170
+ "eval_runtime": 9.4902,
171
+ "eval_samples_per_second": 105.372,
172
+ "eval_steps_per_second": 1.686,
173
  "step": 445
174
  },
175
  {
176
  "epoch": 5.056179775280899,
177
+ "grad_norm": 3503330.5,
178
  "learning_rate": 4.3464052287581704e-05,
179
+ "loss": 0.1512,
180
  "step": 450
181
  },
182
  {
183
  "epoch": 5.337078651685394,
184
+ "grad_norm": 334371.0,
185
  "learning_rate": 4.2647058823529415e-05,
186
+ "loss": 0.1517,
187
  "step": 475
188
  },
189
  {
190
  "epoch": 5.617977528089888,
191
+ "grad_norm": 987476.0625,
192
  "learning_rate": 4.1830065359477126e-05,
193
+ "loss": 0.1511,
194
  "step": 500
195
  },
196
  {
197
  "epoch": 5.898876404494382,
198
+ "grad_norm": 1318160.375,
199
  "learning_rate": 4.101307189542484e-05,
200
+ "loss": 0.1635,
201
  "step": 525
202
  },
203
  {
204
  "epoch": 6.0,
205
+ "eval_loss": 0.14237765967845917,
206
+ "eval_rmse": 0.37732964754104614,
207
+ "eval_runtime": 9.6289,
208
+ "eval_samples_per_second": 103.854,
209
+ "eval_steps_per_second": 1.662,
210
  "step": 534
211
  },
212
  {
213
  "epoch": 6.179775280898877,
214
+ "grad_norm": 2755903.5,
215
  "learning_rate": 4.0196078431372555e-05,
216
+ "loss": 0.1616,
217
  "step": 550
218
  },
219
  {
220
  "epoch": 6.460674157303371,
221
+ "grad_norm": 647614.5625,
222
  "learning_rate": 3.9379084967320266e-05,
223
+ "loss": 0.1498,
224
  "step": 575
225
  },
226
  {
227
  "epoch": 6.741573033707866,
228
+ "grad_norm": 1388723.25,
229
  "learning_rate": 3.8562091503267977e-05,
230
+ "loss": 0.1428,
231
  "step": 600
232
  },
233
  {
234
  "epoch": 7.0,
235
+ "eval_loss": 0.15840163826942444,
236
+ "eval_rmse": 0.39799708127975464,
237
+ "eval_runtime": 9.5792,
238
+ "eval_samples_per_second": 104.393,
239
+ "eval_steps_per_second": 1.67,
240
  "step": 623
241
  },
242
  {
243
  "epoch": 7.022471910112359,
244
+ "grad_norm": 3193922.25,
245
  "learning_rate": 3.774509803921569e-05,
246
+ "loss": 0.15,
247
  "step": 625
248
  },
249
  {
250
  "epoch": 7.303370786516854,
251
+ "grad_norm": 634225.0,
252
  "learning_rate": 3.6928104575163405e-05,
253
+ "loss": 0.1491,
254
  "step": 650
255
  },
256
  {
257
  "epoch": 7.584269662921348,
258
+ "grad_norm": 1895702.75,
259
  "learning_rate": 3.611111111111111e-05,
260
+ "loss": 0.1521,
261
  "step": 675
262
  },
263
  {
264
  "epoch": 7.865168539325842,
265
+ "grad_norm": 410481.09375,
266
  "learning_rate": 3.529411764705883e-05,
267
+ "loss": 0.1481,
268
  "step": 700
269
  },
270
  {
271
  "epoch": 8.0,
272
+ "eval_loss": 0.140840545296669,
273
+ "eval_rmse": 0.3752872943878174,
274
+ "eval_runtime": 9.3963,
275
+ "eval_samples_per_second": 106.424,
276
  "eval_steps_per_second": 1.703,
277
  "step": 712
278
  },
279
  {
280
  "epoch": 8.146067415730338,
281
+ "grad_norm": 121664.78125,
282
  "learning_rate": 3.447712418300654e-05,
283
+ "loss": 0.1497,
284
  "step": 725
285
  },
286
  {
287
  "epoch": 8.426966292134832,
288
+ "grad_norm": 691517.125,
289
  "learning_rate": 3.366013071895425e-05,
290
+ "loss": 0.1423,
291
  "step": 750
292
  },
293
  {
294
  "epoch": 8.707865168539326,
295
+ "grad_norm": 2580666.25,
296
  "learning_rate": 3.284313725490196e-05,
297
+ "loss": 0.1451,
298
  "step": 775
299
  },
300
  {
301
  "epoch": 8.98876404494382,
302
+ "grad_norm": 1733368.625,
303
  "learning_rate": 3.202614379084967e-05,
304
+ "loss": 0.1494,
305
  "step": 800
306
  },
307
  {
308
  "epoch": 9.0,
309
+ "eval_loss": 0.1478467881679535,
310
+ "eval_rmse": 0.38450852036476135,
311
+ "eval_runtime": 9.5629,
312
+ "eval_samples_per_second": 104.571,
313
+ "eval_steps_per_second": 1.673,
314
  "step": 801
315
  },
316
  {
317
  "epoch": 9.269662921348315,
318
+ "grad_norm": 312437.125,
319
  "learning_rate": 3.120915032679739e-05,
320
+ "loss": 0.1429,
321
  "step": 825
322
  },
323
  {
324
  "epoch": 9.55056179775281,
325
+ "grad_norm": 231154.46875,
326
  "learning_rate": 3.0392156862745097e-05,
327
+ "loss": 0.1444,
328
  "step": 850
329
  },
330
  {
331
  "epoch": 9.831460674157304,
332
+ "grad_norm": 1203652.5,
333
  "learning_rate": 2.957516339869281e-05,
334
+ "loss": 0.1417,
335
  "step": 875
336
  },
337
  {
338
  "epoch": 10.0,
339
+ "eval_loss": 0.1544562131166458,
340
+ "eval_rmse": 0.393009215593338,
341
+ "eval_runtime": 9.6401,
342
+ "eval_samples_per_second": 103.733,
343
+ "eval_steps_per_second": 1.66,
344
  "step": 890
345
  },
346
  {
347
  "epoch": 10.112359550561798,
348
+ "grad_norm": 1199470.25,
349
  "learning_rate": 2.8758169934640522e-05,
350
+ "loss": 0.158,
351
  "step": 900
352
  },
353
  {
354
  "epoch": 10.393258426966293,
355
+ "grad_norm": 192614.96875,
356
  "learning_rate": 2.7941176470588236e-05,
357
+ "loss": 0.15,
358
  "step": 925
359
  },
360
  {
361
  "epoch": 10.674157303370787,
362
+ "grad_norm": 1780062.125,
363
  "learning_rate": 2.7124183006535947e-05,
364
+ "loss": 0.1439,
365
  "step": 950
366
  },
367
  {
368
  "epoch": 10.955056179775282,
369
+ "grad_norm": 1194267.125,
370
  "learning_rate": 2.630718954248366e-05,
371
+ "loss": 0.1421,
372
  "step": 975
373
  },
374
  {
375
  "epoch": 11.0,
376
+ "eval_loss": 0.1432497501373291,
377
+ "eval_rmse": 0.37848347425460815,
378
+ "eval_runtime": 9.3096,
379
+ "eval_samples_per_second": 107.416,
380
+ "eval_steps_per_second": 1.719,
381
  "step": 979
382
  },
383
  {
384
  "epoch": 11.235955056179776,
385
+ "grad_norm": 527445.1875,
386
  "learning_rate": 2.5490196078431373e-05,
387
+ "loss": 0.1392,
388
  "step": 1000
389
  },
390
  {
391
  "epoch": 11.51685393258427,
392
+ "grad_norm": 186754.59375,
393
  "learning_rate": 2.4673202614379087e-05,
394
+ "loss": 0.1455,
395
  "step": 1025
396
  },
397
  {
398
  "epoch": 11.797752808988765,
399
+ "grad_norm": 184548.09375,
400
  "learning_rate": 2.38562091503268e-05,
401
+ "loss": 0.145,
402
  "step": 1050
403
  },
404
  {
405
  "epoch": 12.0,
406
+ "eval_loss": 0.14026711881160736,
407
+ "eval_rmse": 0.37452250719070435,
408
+ "eval_runtime": 9.4209,
409
+ "eval_samples_per_second": 106.147,
410
+ "eval_steps_per_second": 1.698,
411
  "step": 1068
412
  },
413
  {
414
  "epoch": 12.07865168539326,
415
+ "grad_norm": 234157.390625,
416
  "learning_rate": 2.303921568627451e-05,
417
+ "loss": 0.1445,
418
  "step": 1075
419
  },
420
  {
421
  "epoch": 12.359550561797754,
422
+ "grad_norm": 1915960.625,
423
  "learning_rate": 2.2222222222222223e-05,
424
+ "loss": 0.144,
425
  "step": 1100
426
  },
427
  {
428
  "epoch": 12.640449438202246,
429
+ "grad_norm": 664816.875,
430
  "learning_rate": 2.1405228758169934e-05,
431
+ "loss": 0.142,
432
  "step": 1125
433
  },
434
  {
435
  "epoch": 12.921348314606742,
436
+ "grad_norm": 237225.984375,
437
  "learning_rate": 2.058823529411765e-05,
438
+ "loss": 0.1466,
439
  "step": 1150
440
  },
441
  {
442
  "epoch": 13.0,
443
+ "eval_loss": 0.14430756866931915,
444
+ "eval_rmse": 0.37987837195396423,
445
+ "eval_runtime": 9.2838,
446
+ "eval_samples_per_second": 107.715,
447
+ "eval_steps_per_second": 1.723,
448
  "step": 1157
449
  },
450
  {
451
  "epoch": 13.202247191011235,
452
+ "grad_norm": 322684.4375,
453
  "learning_rate": 1.977124183006536e-05,
454
+ "loss": 0.1446,
455
  "step": 1175
456
  },
457
  {
458
  "epoch": 13.48314606741573,
459
+ "grad_norm": 474081.25,
460
  "learning_rate": 1.895424836601307e-05,
461
+ "loss": 0.1414,
462
  "step": 1200
463
  },
464
  {
465
  "epoch": 13.764044943820224,
466
+ "grad_norm": 1984481.125,
467
  "learning_rate": 1.8137254901960785e-05,
468
+ "loss": 0.0601,
469
  "step": 1225
470
  },
471
  {
472
  "epoch": 14.0,
473
+ "eval_loss": 0.02082459256052971,
474
+ "eval_rmse": 0.14430728554725647,
475
+ "eval_runtime": 9.6578,
476
+ "eval_samples_per_second": 103.543,
477
+ "eval_steps_per_second": 1.657,
478
  "step": 1246
479
  },
480
  {
481
  "epoch": 14.044943820224718,
482
+ "grad_norm": 3539425.25,
483
  "learning_rate": 1.7320261437908496e-05,
484
+ "loss": 0.0386,
485
  "step": 1250
486
  },
487
  {
488
  "epoch": 14.325842696629213,
489
+ "grad_norm": 975899.9375,
490
  "learning_rate": 1.650326797385621e-05,
491
+ "loss": 0.0274,
492
  "step": 1275
493
  },
494
  {
495
  "epoch": 14.606741573033707,
496
+ "grad_norm": 3280766.0,
497
  "learning_rate": 1.568627450980392e-05,
498
+ "loss": 0.0194,
499
  "step": 1300
500
  },
501
  {
502
  "epoch": 14.887640449438202,
503
+ "grad_norm": 339197.90625,
504
  "learning_rate": 1.4869281045751634e-05,
505
+ "loss": 0.0154,
506
  "step": 1325
507
  },
508
  {
509
  "epoch": 15.0,
510
+ "eval_loss": 0.012424159795045853,
511
+ "eval_rmse": 0.11146371811628342,
512
+ "eval_runtime": 9.3192,
513
+ "eval_samples_per_second": 107.305,
514
+ "eval_steps_per_second": 1.717,
515
  "step": 1335
516
  },
517
  {
518
  "epoch": 15.168539325842696,
519
+ "grad_norm": 325666.3125,
520
  "learning_rate": 1.4052287581699347e-05,
521
+ "loss": 0.0106,
522
  "step": 1350
523
  },
524
  {
525
  "epoch": 15.44943820224719,
526
+ "grad_norm": 407296.9375,
527
  "learning_rate": 1.323529411764706e-05,
528
+ "loss": 0.013,
529
  "step": 1375
530
  },
531
  {
532
  "epoch": 15.730337078651685,
533
+ "grad_norm": 2163835.5,
534
  "learning_rate": 1.2418300653594772e-05,
535
+ "loss": 0.0102,
536
  "step": 1400
537
  },
538
  {
539
  "epoch": 16.0,
540
+ "eval_loss": 0.012837632559239864,
541
+ "eval_rmse": 0.1133032739162445,
542
+ "eval_runtime": 9.5459,
543
+ "eval_samples_per_second": 104.757,
544
+ "eval_steps_per_second": 1.676,
545
  "step": 1424
546
  },
547
  {
548
  "epoch": 16.01123595505618,
549
+ "grad_norm": 1708998.75,
550
  "learning_rate": 1.1601307189542485e-05,
551
+ "loss": 0.0127,
552
  "step": 1425
553
  },
554
  {
555
  "epoch": 16.292134831460675,
556
+ "grad_norm": 324622.65625,
557
  "learning_rate": 1.0784313725490197e-05,
558
+ "loss": 0.0063,
559
  "step": 1450
560
  },
561
  {
562
  "epoch": 16.573033707865168,
563
+ "grad_norm": 533753.9375,
564
  "learning_rate": 9.96732026143791e-06,
565
+ "loss": 0.0067,
566
  "step": 1475
567
  },
568
  {
569
  "epoch": 16.853932584269664,
570
+ "grad_norm": 20416.09765625,
571
  "learning_rate": 9.150326797385621e-06,
572
+ "loss": 0.0071,
573
  "step": 1500
574
  },
575
  {
576
  "epoch": 17.0,
577
+ "eval_loss": 0.01293737068772316,
578
+ "eval_rmse": 0.11374256014823914,
579
+ "eval_runtime": 9.3021,
580
+ "eval_samples_per_second": 107.502,
581
+ "eval_steps_per_second": 1.72,
582
  "step": 1513
583
  },
584
  {
585
  "epoch": 17.134831460674157,
586
+ "grad_norm": 350699.0625,
587
  "learning_rate": 8.333333333333334e-06,
588
+ "loss": 0.0086,
589
  "step": 1525
590
  },
591
  {
592
  "epoch": 17.415730337078653,
593
+ "grad_norm": 1733482.25,
594
  "learning_rate": 7.5163398692810456e-06,
595
+ "loss": 0.0079,
596
  "step": 1550
597
  },
598
  {
599
  "epoch": 17.696629213483146,
600
+ "grad_norm": 2389954.0,
601
  "learning_rate": 6.699346405228758e-06,
602
+ "loss": 0.0095,
603
  "step": 1575
604
  },
605
  {
606
  "epoch": 17.97752808988764,
607
+ "grad_norm": 451414.53125,
608
  "learning_rate": 5.882352941176471e-06,
609
+ "loss": 0.0076,
610
  "step": 1600
611
  },
612
  {
613
  "epoch": 18.0,
614
+ "eval_loss": 0.008463106118142605,
615
+ "eval_rmse": 0.09199514985084534,
616
+ "eval_runtime": 9.6031,
617
+ "eval_samples_per_second": 104.133,
618
+ "eval_steps_per_second": 1.666,
619
  "step": 1602
620
  },
621
  {
622
  "epoch": 18.258426966292134,
623
+ "grad_norm": 1180487.125,
624
  "learning_rate": 5.065359477124184e-06,
625
+ "loss": 0.0055,
626
  "step": 1625
627
  },
628
  {
629
  "epoch": 18.53932584269663,
630
+ "grad_norm": 1367445.375,
631
  "learning_rate": 4.2483660130718954e-06,
632
+ "loss": 0.0057,
633
  "step": 1650
634
  },
635
  {
636
  "epoch": 18.820224719101123,
637
+ "grad_norm": 903850.0,
638
  "learning_rate": 3.431372549019608e-06,
639
+ "loss": 0.0057,
640
  "step": 1675
641
  },
642
  {
643
  "epoch": 19.0,
644
+ "eval_loss": 0.007886779494583607,
645
+ "eval_rmse": 0.08880754560232162,
646
+ "eval_runtime": 9.2481,
647
+ "eval_samples_per_second": 108.13,
648
+ "eval_steps_per_second": 1.73,
649
  "step": 1691
650
  },
651
  {
652
  "epoch": 19.10112359550562,
653
+ "grad_norm": 801771.375,
654
  "learning_rate": 2.6143790849673204e-06,
655
+ "loss": 0.0048,
656
  "step": 1700
657
  },
658
  {
659
  "epoch": 19.382022471910112,
660
+ "grad_norm": 485753.125,
661
  "learning_rate": 1.7973856209150326e-06,
662
+ "loss": 0.0049,
663
  "step": 1725
664
  },
665
  {
666
  "epoch": 19.662921348314608,
667
+ "grad_norm": 370014.5625,
668
  "learning_rate": 9.80392156862745e-07,
669
+ "loss": 0.0044,
670
  "step": 1750
671
  },
672
  {
673
  "epoch": 19.9438202247191,
674
+ "grad_norm": 474841.78125,
675
  "learning_rate": 1.6339869281045752e-07,
676
+ "loss": 0.0046,
677
  "step": 1775
678
  },
679
  {
680
  "epoch": 20.0,
681
+ "eval_loss": 0.007665493991225958,
682
+ "eval_rmse": 0.08755281567573547,
683
+ "eval_runtime": 9.5227,
684
+ "eval_samples_per_second": 105.012,
685
+ "eval_steps_per_second": 1.68,
686
  "step": 1780
687
  },
688
  {
689
  "epoch": 20.0,
690
  "step": 1780,
691
  "total_flos": 0.0,
692
+ "train_loss": 0.10928233507602067,
693
+ "train_runtime": 3299.6169,
694
+ "train_samples_per_second": 34.319,
695
+ "train_steps_per_second": 0.539
696
  }
697
  ],
698
  "logging_steps": 25,