Training in progress, step 3800, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22465254e5f9eed72a7dea87e2b4b92583e14edbd75cd17bed916fc9be4b78f6
 size 1370666272

 version https://git-lfs.github.com/spec/v1
+oid sha256:212ab0d0fa8e6b7844ee509ef0c2f36fa6a5b98f4314314e8ae31a26f8061e14
 size 1370666272

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eaabfbc4128c0e09212864d781616dee6172d8adc83ba9ccddd54a7fcd3e6fe8
 size 697294462

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d52db6458b426b6bd77979be5e39e5045f2ca07bf67445e3563381fe27948ec
 size 697294462

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7f64223ec4ad7aefd2f895f9c9b3e1665a75b5e8c4e5d7c89c33aec876b1973
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0763a0ee6c789e6c090cd10cd205450c6c3acb74dd37b4d57a02c7f9152991aa
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8703725194383196,
   "eval_steps": 500,
-  "global_step": 3750,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -26257,6 +26257,356 @@
       "learning_rate": 0.00018545017643798129,
       "loss": 0.8464,
       "step": 3750
     }
   ],
   "logging_steps": 1,
@@ -26276,7 +26626,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.66453768617984e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.8819774863641638,
   "eval_steps": 500,
+  "global_step": 3800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00018545017643798129,
       "loss": 0.8464,
       "step": 3750
+    },
+    {
+      "epoch": 0.8706046187768365,
+      "grad_norm": 0.4808577299118042,
+      "learning_rate": 0.00018544259941234085,
+      "loss": 0.8261,
+      "step": 3751
+    },
+    {
+      "epoch": 0.8708367181153533,
+      "grad_norm": 0.4224403202533722,
+      "learning_rate": 0.00018543502056916536,
+      "loss": 0.8425,
+      "step": 3752
+    },
+    {
+      "epoch": 0.8710688174538702,
+      "grad_norm": 0.5509401559829712,
+      "learning_rate": 0.000185427439908616,
+      "loss": 0.7942,
+      "step": 3753
+    },
+    {
+      "epoch": 0.8713009167923872,
+      "grad_norm": 0.47354769706726074,
+      "learning_rate": 0.00018541985743085405,
+      "loss": 0.8791,
+      "step": 3754
+    },
+    {
+      "epoch": 0.8715330161309041,
+      "grad_norm": 0.48359885811805725,
+      "learning_rate": 0.00018541227313604078,
+      "loss": 0.8611,
+      "step": 3755
+    },
+    {
+      "epoch": 0.8717651154694209,
+      "grad_norm": 0.48973050713539124,
+      "learning_rate": 0.00018540468702433758,
+      "loss": 0.8084,
+      "step": 3756
+    },
+    {
+      "epoch": 0.8719972148079378,
+      "grad_norm": 0.4059913158416748,
+      "learning_rate": 0.00018539709909590576,
+      "loss": 0.8935,
+      "step": 3757
+    },
+    {
+      "epoch": 0.8722293141464547,
+      "grad_norm": 0.44113290309906006,
+      "learning_rate": 0.00018538950935090677,
+      "loss": 0.8441,
+      "step": 3758
+    },
+    {
+      "epoch": 0.8724614134849715,
+      "grad_norm": 0.4337928891181946,
+      "learning_rate": 0.00018538191778950204,
+      "loss": 0.8548,
+      "step": 3759
+    },
+    {
+      "epoch": 0.8726935128234885,
+      "grad_norm": 0.41244831681251526,
+      "learning_rate": 0.00018537432441185304,
+      "loss": 0.8356,
+      "step": 3760
+    },
+    {
+      "epoch": 0.8729256121620054,
+      "grad_norm": 0.44714102149009705,
+      "learning_rate": 0.00018536672921812134,
+      "loss": 0.8085,
+      "step": 3761
+    },
+    {
+      "epoch": 0.8731577115005222,
+      "grad_norm": 0.43154868483543396,
+      "learning_rate": 0.00018535913220846847,
+      "loss": 0.7995,
+      "step": 3762
+    },
+    {
+      "epoch": 0.8733898108390391,
+      "grad_norm": 0.4167262017726898,
+      "learning_rate": 0.00018535153338305603,
+      "loss": 0.8501,
+      "step": 3763
+    },
+    {
+      "epoch": 0.873621910177556,
+      "grad_norm": 0.398404598236084,
+      "learning_rate": 0.00018534393274204574,
+      "loss": 0.8162,
+      "step": 3764
+    },
+    {
+      "epoch": 0.8738540095160728,
+      "grad_norm": 0.3954335153102875,
+      "learning_rate": 0.00018533633028559917,
+      "loss": 0.8697,
+      "step": 3765
+    },
+    {
+      "epoch": 0.8740861088545898,
+      "grad_norm": 0.4284425377845764,
+      "learning_rate": 0.00018532872601387807,
+      "loss": 0.8538,
+      "step": 3766
+    },
+    {
+      "epoch": 0.8743182081931067,
+      "grad_norm": 0.3925730586051941,
+      "learning_rate": 0.00018532111992704424,
+      "loss": 0.8329,
+      "step": 3767
+    },
+    {
+      "epoch": 0.8745503075316235,
+      "grad_norm": 0.42586302757263184,
+      "learning_rate": 0.00018531351202525945,
+      "loss": 0.8452,
+      "step": 3768
+    },
+    {
+      "epoch": 0.8747824068701404,
+      "grad_norm": 0.41396793723106384,
+      "learning_rate": 0.00018530590230868556,
+      "loss": 0.84,
+      "step": 3769
+    },
+    {
+      "epoch": 0.8750145062086573,
+      "grad_norm": 0.421150267124176,
+      "learning_rate": 0.00018529829077748442,
+      "loss": 0.8413,
+      "step": 3770
+    },
+    {
+      "epoch": 0.8752466055471742,
+      "grad_norm": 0.4445338249206543,
+      "learning_rate": 0.00018529067743181793,
+      "loss": 0.8299,
+      "step": 3771
+    },
+    {
+      "epoch": 0.8754787048856911,
+      "grad_norm": 0.6780601143836975,
+      "learning_rate": 0.00018528306227184806,
+      "loss": 0.8644,
+      "step": 3772
+    },
+    {
+      "epoch": 0.875710804224208,
+      "grad_norm": 0.4485917091369629,
+      "learning_rate": 0.0001852754452977368,
+      "loss": 0.8571,
+      "step": 3773
+    },
+    {
+      "epoch": 0.8759429035627249,
+      "grad_norm": 0.43445441126823425,
+      "learning_rate": 0.00018526782650964618,
+      "loss": 0.8499,
+      "step": 3774
+    },
+    {
+      "epoch": 0.8761750029012417,
+      "grad_norm": 0.43933218717575073,
+      "learning_rate": 0.00018526020590773823,
+      "loss": 0.8504,
+      "step": 3775
+    },
+    {
+      "epoch": 0.8764071022397586,
+      "grad_norm": 0.4182621240615845,
+      "learning_rate": 0.0001852525834921751,
+      "loss": 0.8153,
+      "step": 3776
+    },
+    {
+      "epoch": 0.8766392015782755,
+      "grad_norm": 0.4337303638458252,
+      "learning_rate": 0.00018524495926311893,
+      "loss": 0.8355,
+      "step": 3777
+    },
+    {
+      "epoch": 0.8768713009167924,
+      "grad_norm": 0.45787665247917175,
+      "learning_rate": 0.00018523733322073192,
+      "loss": 0.8477,
+      "step": 3778
+    },
+    {
+      "epoch": 0.8771034002553093,
+      "grad_norm": 0.44240859150886536,
+      "learning_rate": 0.00018522970536517623,
+      "loss": 0.8366,
+      "step": 3779
+    },
+    {
+      "epoch": 0.8773354995938262,
+      "grad_norm": 0.5221067070960999,
+      "learning_rate": 0.00018522207569661415,
+      "loss": 0.7688,
+      "step": 3780
+    },
+    {
+      "epoch": 0.877567598932343,
+      "grad_norm": 0.41580215096473694,
+      "learning_rate": 0.000185214444215208,
+      "loss": 0.8615,
+      "step": 3781
+    },
+    {
+      "epoch": 0.8777996982708599,
+      "grad_norm": 0.43873414397239685,
+      "learning_rate": 0.00018520681092112006,
+      "loss": 0.8547,
+      "step": 3782
+    },
+    {
+      "epoch": 0.8780317976093768,
+      "grad_norm": 0.37557294964790344,
+      "learning_rate": 0.0001851991758145128,
+      "loss": 0.8184,
+      "step": 3783
+    },
+    {
+      "epoch": 0.8782638969478938,
+      "grad_norm": 0.4067942500114441,
+      "learning_rate": 0.00018519153889554854,
+      "loss": 0.8072,
+      "step": 3784
+    },
+    {
+      "epoch": 0.8784959962864106,
+      "grad_norm": 0.38345038890838623,
+      "learning_rate": 0.0001851839001643898,
+      "loss": 0.8175,
+      "step": 3785
+    },
+    {
+      "epoch": 0.8787280956249275,
+      "grad_norm": 0.4141753613948822,
+      "learning_rate": 0.00018517625962119905,
+      "loss": 0.7874,
+      "step": 3786
+    },
+    {
+      "epoch": 0.8789601949634444,
+      "grad_norm": 0.410163015127182,
+      "learning_rate": 0.00018516861726613877,
+      "loss": 0.8337,
+      "step": 3787
+    },
+    {
+      "epoch": 0.8791922943019612,
+      "grad_norm": 0.4118984639644623,
+      "learning_rate": 0.00018516097309937156,
+      "loss": 0.8398,
+      "step": 3788
+    },
+    {
+      "epoch": 0.8794243936404781,
+      "grad_norm": 0.42656370997428894,
+      "learning_rate": 0.0001851533271210601,
+      "loss": 0.8233,
+      "step": 3789
+    },
+    {
+      "epoch": 0.8796564929789951,
+      "grad_norm": 0.4185142517089844,
+      "learning_rate": 0.00018514567933136693,
+      "loss": 0.8372,
+      "step": 3790
+    },
+    {
+      "epoch": 0.8798885923175119,
+      "grad_norm": 0.4009787440299988,
+      "learning_rate": 0.0001851380297304548,
+      "loss": 0.8194,
+      "step": 3791
+    },
+    {
+      "epoch": 0.8801206916560288,
+      "grad_norm": 0.43166425824165344,
+      "learning_rate": 0.00018513037831848639,
+      "loss": 0.8333,
+      "step": 3792
+    },
+    {
+      "epoch": 0.8803527909945457,
+      "grad_norm": 0.4240741431713104,
+      "learning_rate": 0.00018512272509562446,
+      "loss": 0.8087,
+      "step": 3793
+    },
+    {
+      "epoch": 0.8805848903330625,
+      "grad_norm": 0.4606071710586548,
+      "learning_rate": 0.00018511507006203188,
+      "loss": 0.7864,
+      "step": 3794
+    },
+    {
+      "epoch": 0.8808169896715794,
+      "grad_norm": 0.4614354372024536,
+      "learning_rate": 0.0001851074132178714,
+      "loss": 0.8406,
+      "step": 3795
+    },
+    {
+      "epoch": 0.8810490890100964,
+      "grad_norm": 0.46533650159835815,
+      "learning_rate": 0.00018509975456330592,
+      "loss": 0.8695,
+      "step": 3796
+    },
+    {
+      "epoch": 0.8812811883486132,
+      "grad_norm": 0.46137019991874695,
+      "learning_rate": 0.00018509209409849843,
+      "loss": 0.8355,
+      "step": 3797
+    },
+    {
+      "epoch": 0.8815132876871301,
+      "grad_norm": 0.4653560221195221,
+      "learning_rate": 0.00018508443182361175,
+      "loss": 0.8749,
+      "step": 3798
+    },
+    {
+      "epoch": 0.881745387025647,
+      "grad_norm": 0.39684346318244934,
+      "learning_rate": 0.00018507676773880897,
+      "loss": 0.8362,
+      "step": 3799
+    },
+    {
+      "epoch": 0.8819774863641638,
+      "grad_norm": 0.4175236225128174,
+      "learning_rate": 0.0001850691018442531,
+      "loss": 0.8549,
+      "step": 3800
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.6867315219955712e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null