joe611 commited on
Commit
69cd30f
·
verified ·
1 Parent(s): 94af384

Training in progress, epoch 149, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:270afe67c62359d34e6ac5c65609a12e7a0fec1bcf98e2d737cc3c52911eb7bd
3
  size 166496880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1b050b46f4abbe104a662b3e7daff91ad7aa7b0dc5b6b45c2c1e21223b9dd9a
3
  size 166496880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c198a07a34779506d028d89bce2b45f51089ed7ab2b873708ae0a98d3bb7c5de
3
  size 330495866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90435ff37114bd6582ded1147a5d07d469037df489412cd78505236316b1afb6
3
  size 330495866
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:795c218f3ddbb3e76a77ad88cdf831b8efe198149593238e9275c9118253ec02
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74ff39ce5bc1f6039b31922a6e443ab8d8a8f93d5528fc0d576340ae329fb493
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30c145ebabe049cbd8d4c8ceced4c707816290a109bf20ccefa837d8cb555a00
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb678f76da1c9347406d38fe82346b2ac3acd84e6118cb46f17ee79a3da28612
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.23615802824497223,
3
  "best_model_checkpoint": "chickens-composite-403232323232-150-epochs-w-transform-metrics-test/checkpoint-123000",
4
- "epoch": 148.0,
5
  "eval_steps": 500,
6
- "global_step": 148000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -38386,6 +38386,263 @@
38386
  "eval_samples_per_second": 15.389,
38387
  "eval_steps_per_second": 1.924,
38388
  "step": 148000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38389
  }
38390
  ],
38391
  "logging_steps": 30,
@@ -38405,7 +38662,7 @@
38405
  "attributes": {}
38406
  }
38407
  },
38408
- "total_flos": 5.09154207105024e+19,
38409
  "train_batch_size": 2,
38410
  "trial_name": null,
38411
  "trial_params": null
 
1
  {
2
  "best_metric": 0.23615802824497223,
3
  "best_model_checkpoint": "chickens-composite-403232323232-150-epochs-w-transform-metrics-test/checkpoint-123000",
4
+ "epoch": 149.0,
5
  "eval_steps": 500,
6
+ "global_step": 149000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
38386
  "eval_samples_per_second": 15.389,
38387
  "eval_steps_per_second": 1.924,
38388
  "step": 148000
38389
+ },
38390
+ {
38391
+ "epoch": 148.02,
38392
+ "grad_norm": 23.768529891967773,
38393
+ "learning_rate": 4.298583608501328e-09,
38394
+ "loss": 0.2354,
38395
+ "step": 148020
38396
+ },
38397
+ {
38398
+ "epoch": 148.05,
38399
+ "grad_norm": 74.93134307861328,
38400
+ "learning_rate": 4.169328287299545e-09,
38401
+ "loss": 0.3732,
38402
+ "step": 148050
38403
+ },
38404
+ {
38405
+ "epoch": 148.08,
38406
+ "grad_norm": 37.484535217285156,
38407
+ "learning_rate": 4.042045240927883e-09,
38408
+ "loss": 0.4355,
38409
+ "step": 148080
38410
+ },
38411
+ {
38412
+ "epoch": 148.11,
38413
+ "grad_norm": 49.3876838684082,
38414
+ "learning_rate": 3.9167345196361454e-09,
38415
+ "loss": 0.2641,
38416
+ "step": 148110
38417
+ },
38418
+ {
38419
+ "epoch": 148.14,
38420
+ "grad_norm": 85.84772491455078,
38421
+ "learning_rate": 3.793396172895314e-09,
38422
+ "loss": 0.2854,
38423
+ "step": 148140
38424
+ },
38425
+ {
38426
+ "epoch": 148.17,
38427
+ "grad_norm": 42.09956359863281,
38428
+ "learning_rate": 3.672030249396441e-09,
38429
+ "loss": 0.3354,
38430
+ "step": 148170
38431
+ },
38432
+ {
38433
+ "epoch": 148.2,
38434
+ "grad_norm": 62.16353988647461,
38435
+ "learning_rate": 3.5526367970539765e-09,
38436
+ "loss": 0.3456,
38437
+ "step": 148200
38438
+ },
38439
+ {
38440
+ "epoch": 148.23,
38441
+ "grad_norm": 63.247642517089844,
38442
+ "learning_rate": 3.4352158630018837e-09,
38443
+ "loss": 0.242,
38444
+ "step": 148230
38445
+ },
38446
+ {
38447
+ "epoch": 148.26,
38448
+ "grad_norm": 28.344297409057617,
38449
+ "learning_rate": 3.31976749359586e-09,
38450
+ "loss": 0.321,
38451
+ "step": 148260
38452
+ },
38453
+ {
38454
+ "epoch": 148.29,
38455
+ "grad_norm": 29.875398635864258,
38456
+ "learning_rate": 3.206291734413891e-09,
38457
+ "loss": 0.3615,
38458
+ "step": 148290
38459
+ },
38460
+ {
38461
+ "epoch": 148.32,
38462
+ "grad_norm": 136.5042724609375,
38463
+ "learning_rate": 3.094788630254031e-09,
38464
+ "loss": 0.3272,
38465
+ "step": 148320
38466
+ },
38467
+ {
38468
+ "epoch": 148.35,
38469
+ "grad_norm": 45.96992492675781,
38470
+ "learning_rate": 2.9852582251355124e-09,
38471
+ "loss": 0.316,
38472
+ "step": 148350
38473
+ },
38474
+ {
38475
+ "epoch": 148.38,
38476
+ "grad_norm": 73.72418975830078,
38477
+ "learning_rate": 2.8777005622998567e-09,
38478
+ "loss": 0.274,
38479
+ "step": 148380
38480
+ },
38481
+ {
38482
+ "epoch": 148.41,
38483
+ "grad_norm": 56.12461471557617,
38484
+ "learning_rate": 2.772115684209209e-09,
38485
+ "loss": 0.3025,
38486
+ "step": 148410
38487
+ },
38488
+ {
38489
+ "epoch": 148.44,
38490
+ "grad_norm": 64.38630676269531,
38491
+ "learning_rate": 2.6685036325457826e-09,
38492
+ "loss": 0.2964,
38493
+ "step": 148440
38494
+ },
38495
+ {
38496
+ "epoch": 148.47,
38497
+ "grad_norm": 78.73645782470703,
38498
+ "learning_rate": 2.5668644482151892e-09,
38499
+ "loss": 0.2818,
38500
+ "step": 148470
38501
+ },
38502
+ {
38503
+ "epoch": 148.5,
38504
+ "grad_norm": 51.524024963378906,
38505
+ "learning_rate": 2.4671981713420003e-09,
38506
+ "loss": 0.2586,
38507
+ "step": 148500
38508
+ },
38509
+ {
38510
+ "epoch": 148.53,
38511
+ "grad_norm": 61.21031188964844,
38512
+ "learning_rate": 2.369504841273629e-09,
38513
+ "loss": 0.2708,
38514
+ "step": 148530
38515
+ },
38516
+ {
38517
+ "epoch": 148.56,
38518
+ "grad_norm": 36.56247329711914,
38519
+ "learning_rate": 2.2737844965775578e-09,
38520
+ "loss": 0.3043,
38521
+ "step": 148560
38522
+ },
38523
+ {
38524
+ "epoch": 148.59,
38525
+ "grad_norm": 50.6976432800293,
38526
+ "learning_rate": 2.1800371750430037e-09,
38527
+ "loss": 0.2613,
38528
+ "step": 148590
38529
+ },
38530
+ {
38531
+ "epoch": 148.62,
38532
+ "grad_norm": 104.1881103515625,
38533
+ "learning_rate": 2.088262913679251e-09,
38534
+ "loss": 0.2819,
38535
+ "step": 148620
38536
+ },
38537
+ {
38538
+ "epoch": 148.65,
38539
+ "grad_norm": 77.15486907958984,
38540
+ "learning_rate": 1.9984617487173174e-09,
38541
+ "loss": 0.4705,
38542
+ "step": 148650
38543
+ },
38544
+ {
38545
+ "epoch": 148.68,
38546
+ "grad_norm": 257.2453308105469,
38547
+ "learning_rate": 1.9106337156099553e-09,
38548
+ "loss": 0.3147,
38549
+ "step": 148680
38550
+ },
38551
+ {
38552
+ "epoch": 148.71,
38553
+ "grad_norm": 94.51361083984375,
38554
+ "learning_rate": 1.8247788490299846e-09,
38555
+ "loss": 0.2612,
38556
+ "step": 148710
38557
+ },
38558
+ {
38559
+ "epoch": 148.74,
38560
+ "grad_norm": 54.03693771362305,
38561
+ "learning_rate": 1.740897182871404e-09,
38562
+ "loss": 0.2791,
38563
+ "step": 148740
38564
+ },
38565
+ {
38566
+ "epoch": 148.77,
38567
+ "grad_norm": 49.87909698486328,
38568
+ "learning_rate": 1.6589887502493907e-09,
38569
+ "loss": 0.3407,
38570
+ "step": 148770
38571
+ },
38572
+ {
38573
+ "epoch": 148.8,
38574
+ "grad_norm": 28.649518966674805,
38575
+ "learning_rate": 1.5790535835003006e-09,
38576
+ "loss": 0.3216,
38577
+ "step": 148800
38578
+ },
38579
+ {
38580
+ "epoch": 148.83,
38581
+ "grad_norm": 4.305846232455224e-05,
38582
+ "learning_rate": 1.5010917141811132e-09,
38583
+ "loss": 0.2869,
38584
+ "step": 148830
38585
+ },
38586
+ {
38587
+ "epoch": 148.86,
38588
+ "grad_norm": 52.96532440185547,
38589
+ "learning_rate": 1.425103173069986e-09,
38590
+ "loss": 0.3236,
38591
+ "step": 148860
38592
+ },
38593
+ {
38594
+ "epoch": 148.89,
38595
+ "grad_norm": 67.40062713623047,
38596
+ "learning_rate": 1.3510879901657003e-09,
38597
+ "loss": 0.3153,
38598
+ "step": 148890
38599
+ },
38600
+ {
38601
+ "epoch": 148.92,
38602
+ "grad_norm": 231.43629455566406,
38603
+ "learning_rate": 1.2790461946887712e-09,
38604
+ "loss": 0.3195,
38605
+ "step": 148920
38606
+ },
38607
+ {
38608
+ "epoch": 148.95,
38609
+ "grad_norm": 79.40615844726562,
38610
+ "learning_rate": 1.2089778150797816e-09,
38611
+ "loss": 0.3172,
38612
+ "step": 148950
38613
+ },
38614
+ {
38615
+ "epoch": 148.98,
38616
+ "grad_norm": 70.4350357055664,
38617
+ "learning_rate": 1.1408828790010484e-09,
38618
+ "loss": 0.2945,
38619
+ "step": 148980
38620
+ },
38621
+ {
38622
+ "epoch": 149.0,
38623
+ "eval_loss": 0.23936256766319275,
38624
+ "eval_map": 0.8413,
38625
+ "eval_map_50": 0.9641,
38626
+ "eval_map_75": 0.9341,
38627
+ "eval_map_chicken": 0.8309,
38628
+ "eval_map_duck": 0.7956,
38629
+ "eval_map_large": 0.8507,
38630
+ "eval_map_medium": 0.8408,
38631
+ "eval_map_plant": 0.8973,
38632
+ "eval_map_small": 0.3268,
38633
+ "eval_mar_1": 0.3376,
38634
+ "eval_mar_10": 0.8711,
38635
+ "eval_mar_100": 0.8749,
38636
+ "eval_mar_100_chicken": 0.8738,
38637
+ "eval_mar_100_duck": 0.8294,
38638
+ "eval_mar_100_plant": 0.9215,
38639
+ "eval_mar_large": 0.881,
38640
+ "eval_mar_medium": 0.8792,
38641
+ "eval_mar_small": 0.3947,
38642
+ "eval_runtime": 13.2099,
38643
+ "eval_samples_per_second": 15.14,
38644
+ "eval_steps_per_second": 1.893,
38645
+ "step": 149000
38646
  }
38647
  ],
38648
  "logging_steps": 30,
 
38662
  "attributes": {}
38663
  }
38664
  },
38665
+ "total_flos": 5.12594438234112e+19,
38666
  "train_batch_size": 2,
38667
  "trial_name": null,
38668
  "trial_params": null