g4rg commited on
Commit
ee7abc5
·
verified ·
1 Parent(s): 186490b

Training in progress, step 96, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c4a5e2add8c903d966ce8710e97157c591e27dae3825fe3c2d2fd8594e8da90
3
  size 763470136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d281dead310dc912d96ea9c1ecf041030d9b9eda5a70050289f56893a32f795
3
  size 763470136
last-checkpoint/global_step96/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7b861109736f9e25e086aed1a1fddd63c6942bd39bba0da383a0542dae65588
3
+ size 385019984
last-checkpoint/global_step96/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d86e4d1c0014d1412435b331c85ccc3cdf61eb589f4ae020739e77a7221b66d
3
+ size 385019984
last-checkpoint/global_step96/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbef0bf2e51a29abe24d04075d92f103231b3c2658b794facc9ffa01dcc2bc48
3
+ size 385019984
last-checkpoint/global_step96/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:754d3b5c90d2e73fd13dcd360b2c6eafe627fc7f15db6632aea2afbdbbf98e7b
3
+ size 385019984
last-checkpoint/global_step96/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a3a11c3ac404043c019564950aa61d5dcd5ff0e804350a911a40348e024bae6
3
+ size 385019984
last-checkpoint/global_step96/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b27ef6303c05d781a8e7e681180069a7499343a77c1f3e312c7e06e4df24fa7
3
+ size 385019984
last-checkpoint/global_step96/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22cf195c16fd65966316397268f3d87870fa9bdbd53df64960236049b16bcd7d
3
+ size 348711830
last-checkpoint/global_step96/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f23c09b06ab3c2d67b09265f6ae5a7d6677f76a8ceb8fc4fee7a9f9355273d9
3
+ size 348711830
last-checkpoint/global_step96/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee3353cc58853b3bf08488fb8200b0044f2c1714bf66c0bc0d4fb45b954307a
3
+ size 348711830
last-checkpoint/global_step96/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46dddca3867947d351db07f7330fc7f0365395ebbac13c640f830609c22d5a6e
3
+ size 348711830
last-checkpoint/global_step96/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f5a128a4eefd61ce663446c0d456bd8626e7497c843aa9a87db61097e46e07b
3
+ size 348711830
last-checkpoint/global_step96/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bc982405297d8a24ceb926a3dcc148a9533c0b0aec2af68187233e64b39806e
3
+ size 348711830
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step64
 
1
+ global_step96
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1d2706b93a662ac3d076f62ac0cc0bf9da89b22f28d7aedb79e2d221480bf22
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18fb5e6493092cd8f71e5b3842d879509d55ae6cab2bf942dd56c48e7b8cc9fc
3
  size 15472
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e012a473e8960670cde6d00f32b72516b3b678ea6c28d0bc75a8a6872b87726
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fc4d72ba97fb58bd464a1fddcf7c6f2d733fa949f29b90e179d642b44eb624e
3
  size 15472
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d15d65d436172526002aa34da5c198ac75a338a871f9c7b229bad29a0998f16
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c2da6915b4a587863bc60c730f828ac7764e70c41ebff8c368e95f045073b7f
3
  size 15472
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82fade3599ddd40984f8a744d9e09f0f5ed5dde67c4de699c5efada15e9b546c
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89e0de0d83a177df7de41df7ea72b48a294a2d0589d8ca1035b9dd419b036e81
3
  size 15472
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3553f81d93560a998b742fcb2c39d12dae1b25a8fe484f0f025ea17be9815aeb
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6218d38ef6fd867f07998ab60de53f729af52921318b25e9685aae0a3d9044
3
  size 15472
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f45b6ba5697e78673c882ff2682372ece017522f58d35821dcf57b76881f3694
3
  size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10040054714cbe309f578c161884fad07491098cb4b32684d3d08fcb91914d00
3
  size 15472
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3db122c9205c73d5e7e1d59359a230de2aae1e7f2b50ca89f28123120a2430ad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c8d252fdd44d3c445fb84b7c408c35fe553a7fa8fd66113443f48d6125c89b0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4129032258064516,
5
  "eval_steps": 32,
6
- "global_step": 64,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -479,6 +479,238 @@
479
  "eval_samples_per_second": 1.608,
480
  "eval_steps_per_second": 0.064,
481
  "step": 64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
  }
483
  ],
484
  "logging_steps": 1,
@@ -498,7 +730,7 @@
498
  "attributes": {}
499
  }
500
  },
501
- "total_flos": 131039452200960.0,
502
  "train_batch_size": 5,
503
  "trial_name": null,
504
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6193548387096774,
5
  "eval_steps": 32,
6
+ "global_step": 96,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
479
  "eval_samples_per_second": 1.608,
480
  "eval_steps_per_second": 0.064,
481
  "step": 64
482
+ },
483
+ {
484
+ "epoch": 0.41935483870967744,
485
+ "grad_norm": 0.42898014496026954,
486
+ "learning_rate": 3.8899703564688187e-05,
487
+ "loss": 1.3098,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 0.4258064516129032,
492
+ "grad_norm": 0.3729718619879595,
493
+ "learning_rate": 3.8448600767754265e-05,
494
+ "loss": 1.3267,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 0.432258064516129,
499
+ "grad_norm": 0.5652836221912215,
500
+ "learning_rate": 3.7991655975072834e-05,
501
+ "loss": 1.3008,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 0.43870967741935485,
506
+ "grad_norm": 0.3611571783806379,
507
+ "learning_rate": 3.752911300497212e-05,
508
+ "loss": 1.2365,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 0.44516129032258067,
513
+ "grad_norm": 0.4101622999668487,
514
+ "learning_rate": 3.706121866287699e-05,
515
+ "loss": 1.2805,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 0.45161290322580644,
520
+ "grad_norm": 0.4194502800160711,
521
+ "learning_rate": 3.658822260961763e-05,
522
+ "loss": 1.2627,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 0.45806451612903226,
527
+ "grad_norm": 0.4464572963409143,
528
+ "learning_rate": 3.611037722821452e-05,
529
+ "loss": 1.3269,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 0.4645161290322581,
534
+ "grad_norm": 0.43900384749780696,
535
+ "learning_rate": 3.562793748921095e-05,
536
+ "loss": 1.0625,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 0.47096774193548385,
541
+ "grad_norm": 0.3492561062627179,
542
+ "learning_rate": 3.514116081462488e-05,
543
+ "loss": 1.2854,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 0.4774193548387097,
548
+ "grad_norm": 1.004303081481083,
549
+ "learning_rate": 3.4650306940592784e-05,
550
+ "loss": 1.3114,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 0.4838709677419355,
555
+ "grad_norm": 0.372149762179685,
556
+ "learning_rate": 3.415563777877859e-05,
557
+ "loss": 1.1604,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 0.49032258064516127,
562
+ "grad_norm": 0.36620109818968666,
563
+ "learning_rate": 3.365741727662187e-05,
564
+ "loss": 1.2055,
565
+ "step": 76
566
+ },
567
+ {
568
+ "epoch": 0.4967741935483871,
569
+ "grad_norm": 0.3209403988829257,
570
+ "learning_rate": 3.315591127649981e-05,
571
+ "loss": 1.2652,
572
+ "step": 77
573
+ },
574
+ {
575
+ "epoch": 0.5032258064516129,
576
+ "grad_norm": 0.6268869630058581,
577
+ "learning_rate": 3.265138737387802e-05,
578
+ "loss": 1.3451,
579
+ "step": 78
580
+ },
581
+ {
582
+ "epoch": 0.5096774193548387,
583
+ "grad_norm": 0.37710251621094776,
584
+ "learning_rate": 3.214411477452589e-05,
585
+ "loss": 1.1998,
586
+ "step": 79
587
+ },
588
+ {
589
+ "epoch": 0.5161290322580645,
590
+ "grad_norm": 0.3965119239115867,
591
+ "learning_rate": 3.1634364150872836e-05,
592
+ "loss": 1.198,
593
+ "step": 80
594
+ },
595
+ {
596
+ "epoch": 0.5225806451612903,
597
+ "grad_norm": 0.38914331784636286,
598
+ "learning_rate": 3.112240749758179e-05,
599
+ "loss": 1.3164,
600
+ "step": 81
601
+ },
602
+ {
603
+ "epoch": 0.5290322580645161,
604
+ "grad_norm": 0.4854967858248665,
605
+ "learning_rate": 3.060851798641735e-05,
606
+ "loss": 1.1669,
607
+ "step": 82
608
+ },
609
+ {
610
+ "epoch": 0.535483870967742,
611
+ "grad_norm": 0.4486571105935308,
612
+ "learning_rate": 3.00929698204857e-05,
613
+ "loss": 1.3611,
614
+ "step": 83
615
+ },
616
+ {
617
+ "epoch": 0.5419354838709678,
618
+ "grad_norm": 0.5816885351466946,
619
+ "learning_rate": 2.9576038087924297e-05,
620
+ "loss": 1.2272,
621
+ "step": 84
622
+ },
623
+ {
624
+ "epoch": 0.5483870967741935,
625
+ "grad_norm": 0.3242743003758612,
626
+ "learning_rate": 2.905799861511932e-05,
627
+ "loss": 1.1925,
628
+ "step": 85
629
+ },
630
+ {
631
+ "epoch": 0.5548387096774193,
632
+ "grad_norm": 0.3110545851314829,
633
+ "learning_rate": 2.8539127819529143e-05,
634
+ "loss": 0.9746,
635
+ "step": 86
636
+ },
637
+ {
638
+ "epoch": 0.5612903225806452,
639
+ "grad_norm": 0.3102061641971853,
640
+ "learning_rate": 2.801970256219253e-05,
641
+ "loss": 1.352,
642
+ "step": 87
643
+ },
644
+ {
645
+ "epoch": 0.567741935483871,
646
+ "grad_norm": 0.30361763618294724,
647
+ "learning_rate": 2.7500000000000004e-05,
648
+ "loss": 1.2039,
649
+ "step": 88
650
+ },
651
+ {
652
+ "epoch": 0.5741935483870968,
653
+ "grad_norm": 0.5030242942383549,
654
+ "learning_rate": 2.698029743780748e-05,
655
+ "loss": 1.2757,
656
+ "step": 89
657
+ },
658
+ {
659
+ "epoch": 0.5806451612903226,
660
+ "grad_norm": 0.5902079797954521,
661
+ "learning_rate": 2.6460872180470865e-05,
662
+ "loss": 1.1542,
663
+ "step": 90
664
+ },
665
+ {
666
+ "epoch": 0.5870967741935483,
667
+ "grad_norm": 0.4650188539079032,
668
+ "learning_rate": 2.594200138488069e-05,
669
+ "loss": 1.1455,
670
+ "step": 91
671
+ },
672
+ {
673
+ "epoch": 0.5935483870967742,
674
+ "grad_norm": 0.6953375177526994,
675
+ "learning_rate": 2.5423961912075712e-05,
676
+ "loss": 1.2476,
677
+ "step": 92
678
+ },
679
+ {
680
+ "epoch": 0.6,
681
+ "grad_norm": 0.324295911329268,
682
+ "learning_rate": 2.4907030179514307e-05,
683
+ "loss": 1.0578,
684
+ "step": 93
685
+ },
686
+ {
687
+ "epoch": 0.6064516129032258,
688
+ "grad_norm": 0.36056444973850205,
689
+ "learning_rate": 2.4391482013582657e-05,
690
+ "loss": 1.3128,
691
+ "step": 94
692
+ },
693
+ {
694
+ "epoch": 0.6129032258064516,
695
+ "grad_norm": 0.31638336845784404,
696
+ "learning_rate": 2.387759250241821e-05,
697
+ "loss": 1.1412,
698
+ "step": 95
699
+ },
700
+ {
701
+ "epoch": 0.6193548387096774,
702
+ "grad_norm": 0.3807737813278726,
703
+ "learning_rate": 2.3365635849127166e-05,
704
+ "loss": 1.301,
705
+ "step": 96
706
+ },
707
+ {
708
+ "epoch": 0.6193548387096774,
709
+ "eval_loss": 1.0401562452316284,
710
+ "eval_runtime": 62.5349,
711
+ "eval_samples_per_second": 1.599,
712
+ "eval_steps_per_second": 0.064,
713
+ "step": 96
714
  }
715
  ],
716
  "logging_steps": 1,
 
730
  "attributes": {}
731
  }
732
  },
733
+ "total_flos": 196559178301440.0,
734
  "train_batch_size": 5,
735
  "trial_name": null,
736
  "trial_params": null