Rakhman16 commited on
Commit
e98f59a
·
verified ·
1 Parent(s): 7ea8c4e

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6638d29deb2797875b6b7ea73679773560f05533895012d7a4f636bb12105954
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44ea6b7a6a2fff075f708703afadf30e3e8c57e3163b493b4645497b86d2384f
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70295e49fa85a076268aa566f4c6ef9d4a75879dc008935976c485916dd382d5
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f98d95db80478cfe0e9aee8fa102c9b668166fcc2a159d0c1e6bcd8518b7bc45
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b88a2b02a5ecabad292a0ab85bac7e2fb5ae0291cf62ebc51c5807b0bc02f8b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572b27dd4aaf18aaca2cbee93be2b885a21373fc2b4cd02fc4f0e4185393316a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:337fb6485f673c443fc806f75192c189e4589e981ee99a1f7362abb2e1c82bb8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b2b446c840bb8969c73f2b1624f62cc7b296b26a6d87dc45e0852da9ec5c8d2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.11480703204870224,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-2500",
4
- "epoch": 0.43909721612364977,
5
  "eval_steps": 100,
6
- "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -557,6 +557,116 @@
557
  "eval_samples_per_second": 25.474,
558
  "eval_steps_per_second": 3.187,
559
  "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
560
  }
561
  ],
562
  "logging_steps": 50,
@@ -576,7 +686,7 @@
576
  "attributes": {}
577
  }
578
  },
579
- "total_flos": 1.21791578112e+16,
580
  "train_batch_size": 8,
581
  "trial_name": null,
582
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.11349175125360489,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-3000",
4
+ "epoch": 0.5269166593483797,
5
  "eval_steps": 100,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
557
  "eval_samples_per_second": 25.474,
558
  "eval_steps_per_second": 3.187,
559
  "step": 2500
560
+ },
561
+ {
562
+ "epoch": 0.44787916044612275,
563
+ "grad_norm": 9090.5810546875,
564
+ "learning_rate": 2.664061127700685e-05,
565
+ "loss": 0.1183,
566
+ "step": 2550
567
+ },
568
+ {
569
+ "epoch": 0.4566611047685958,
570
+ "grad_norm": 13992.6572265625,
571
+ "learning_rate": 2.657474090988934e-05,
572
+ "loss": 0.1204,
573
+ "step": 2600
574
+ },
575
+ {
576
+ "epoch": 0.4566611047685958,
577
+ "eval_loss": 0.11466159671545029,
578
+ "eval_runtime": 175.0561,
579
+ "eval_samples_per_second": 25.478,
580
+ "eval_steps_per_second": 3.188,
581
+ "step": 2600
582
+ },
583
+ {
584
+ "epoch": 0.46544304909106876,
585
+ "grad_norm": 10754.3505859375,
586
+ "learning_rate": 2.6508870542771825e-05,
587
+ "loss": 0.1233,
588
+ "step": 2650
589
+ },
590
+ {
591
+ "epoch": 0.47422499341354174,
592
+ "grad_norm": 10475.4765625,
593
+ "learning_rate": 2.6443000175654315e-05,
594
+ "loss": 0.1226,
595
+ "step": 2700
596
+ },
597
+ {
598
+ "epoch": 0.47422499341354174,
599
+ "eval_loss": 0.11424204707145691,
600
+ "eval_runtime": 174.9273,
601
+ "eval_samples_per_second": 25.496,
602
+ "eval_steps_per_second": 3.19,
603
+ "step": 2700
604
+ },
605
+ {
606
+ "epoch": 0.4830069377360148,
607
+ "grad_norm": 16770.1015625,
608
+ "learning_rate": 2.63771298085368e-05,
609
+ "loss": 0.1331,
610
+ "step": 2750
611
+ },
612
+ {
613
+ "epoch": 0.49178888205848775,
614
+ "grad_norm": 193283.25,
615
+ "learning_rate": 2.631125944141929e-05,
616
+ "loss": 0.1193,
617
+ "step": 2800
618
+ },
619
+ {
620
+ "epoch": 0.49178888205848775,
621
+ "eval_loss": 0.11391730606555939,
622
+ "eval_runtime": 175.0052,
623
+ "eval_samples_per_second": 25.485,
624
+ "eval_steps_per_second": 3.188,
625
+ "step": 2800
626
+ },
627
+ {
628
+ "epoch": 0.5005708263809607,
629
+ "grad_norm": 8650.1865234375,
630
+ "learning_rate": 2.6245389074301775e-05,
631
+ "loss": 0.1307,
632
+ "step": 2850
633
+ },
634
+ {
635
+ "epoch": 0.5093527707034338,
636
+ "grad_norm": 11343.3427734375,
637
+ "learning_rate": 2.6179518707184262e-05,
638
+ "loss": 0.1173,
639
+ "step": 2900
640
+ },
641
+ {
642
+ "epoch": 0.5093527707034338,
643
+ "eval_loss": 0.11382684111595154,
644
+ "eval_runtime": 175.1185,
645
+ "eval_samples_per_second": 25.468,
646
+ "eval_steps_per_second": 3.186,
647
+ "step": 2900
648
+ },
649
+ {
650
+ "epoch": 0.5181347150259067,
651
+ "grad_norm": 9844.486328125,
652
+ "learning_rate": 2.611364834006675e-05,
653
+ "loss": 0.1229,
654
+ "step": 2950
655
+ },
656
+ {
657
+ "epoch": 0.5269166593483797,
658
+ "grad_norm": 8915.7255859375,
659
+ "learning_rate": 2.6047777972949235e-05,
660
+ "loss": 0.125,
661
+ "step": 3000
662
+ },
663
+ {
664
+ "epoch": 0.5269166593483797,
665
+ "eval_loss": 0.11349175125360489,
666
+ "eval_runtime": 175.2134,
667
+ "eval_samples_per_second": 25.455,
668
+ "eval_steps_per_second": 3.185,
669
+ "step": 3000
670
  }
671
  ],
672
  "logging_steps": 50,
 
686
  "attributes": {}
687
  }
688
  },
689
+ "total_flos": 1.461498937344e+16,
690
  "train_batch_size": 8,
691
  "trial_name": null,
692
  "trial_params": null