Training in progress, step 2900, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:789f226e44ff6175f0650db489f0554e7f69dc5b63c5b19f6f8f90422e097bc3
 size 1370666272

 version https://git-lfs.github.com/spec/v1
+oid sha256:85e10776ed7d2feec702f85a92294fc572495be458fad36bc37e21242039a14d
 size 1370666272

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:53c7ca855a09c6703804528921ba002a4454692bef620396449f5abdd6380228
 size 697294462

 version https://git-lfs.github.com/spec/v1
+oid sha256:f328674c88f0186255ee5dbf4ac7f148eb4bef19de18a361c1ed0eb9ce9660bb
 size 697294462

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:afaac9fbe4271faaba5196ab94e52163e6bf1b95bd8386498fc1f2c58b28a4a4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:605d9f0439096f21199e65a6f7490d22d8285df735f81d56920505482985be35
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6614831147731229,
   "eval_steps": 500,
-  "global_step": 2850,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -19957,6 +19957,356 @@
       "learning_rate": 0.00019150812636190874,
       "loss": 0.8451,
       "step": 2850
     }
   ],
   "logging_steps": 1,
@@ -19976,7 +20326,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2650486414966784e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6730880816989672,
   "eval_steps": 500,
+  "global_step": 2900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019150812636190874,
       "loss": 0.8451,
       "step": 2850
+    },
+    {
+      "epoch": 0.6617152141116398,
+      "grad_norm": 0.45293116569519043,
+      "learning_rate": 0.00019150224373375174,
+      "loss": 0.9152,
+      "step": 2851
+    },
+    {
+      "epoch": 0.6619473134501567,
+      "grad_norm": 0.49578094482421875,
+      "learning_rate": 0.00019149635915915889,
+      "loss": 0.8429,
+      "step": 2852
+    },
+    {
+      "epoch": 0.6621794127886735,
+      "grad_norm": 0.45070314407348633,
+      "learning_rate": 0.00019149047263825538,
+      "loss": 0.829,
+      "step": 2853
+    },
+    {
+      "epoch": 0.6624115121271904,
+      "grad_norm": 0.44752323627471924,
+      "learning_rate": 0.00019148458417116645,
+      "loss": 0.874,
+      "step": 2854
+    },
+    {
+      "epoch": 0.6626436114657073,
+      "grad_norm": 0.4903758466243744,
+      "learning_rate": 0.00019147869375801734,
+      "loss": 0.8787,
+      "step": 2855
+    },
+    {
+      "epoch": 0.6628757108042242,
+      "grad_norm": 0.43119940161705017,
+      "learning_rate": 0.00019147280139893337,
+      "loss": 0.8978,
+      "step": 2856
+    },
+    {
+      "epoch": 0.6631078101427411,
+      "grad_norm": 0.5306719541549683,
+      "learning_rate": 0.00019146690709403988,
+      "loss": 0.9067,
+      "step": 2857
+    },
+    {
+      "epoch": 0.663339909481258,
+      "grad_norm": 0.45615947246551514,
+      "learning_rate": 0.0001914610108434622,
+      "loss": 0.839,
+      "step": 2858
+    },
+    {
+      "epoch": 0.6635720088197749,
+      "grad_norm": 0.4449672996997833,
+      "learning_rate": 0.00019145511264732584,
+      "loss": 0.8675,
+      "step": 2859
+    },
+    {
+      "epoch": 0.6638041081582917,
+      "grad_norm": 0.4791627526283264,
+      "learning_rate": 0.00019144921250575619,
+      "loss": 0.8853,
+      "step": 2860
+    },
+    {
+      "epoch": 0.6640362074968086,
+      "grad_norm": 0.4788571298122406,
+      "learning_rate": 0.00019144331041887882,
+      "loss": 0.9273,
+      "step": 2861
+    },
+    {
+      "epoch": 0.6642683068353256,
+      "grad_norm": 0.42612382769584656,
+      "learning_rate": 0.00019143740638681922,
+      "loss": 0.893,
+      "step": 2862
+    },
+    {
+      "epoch": 0.6645004061738424,
+      "grad_norm": 0.47250261902809143,
+      "learning_rate": 0.000191431500409703,
+      "loss": 0.8795,
+      "step": 2863
+    },
+    {
+      "epoch": 0.6647325055123593,
+      "grad_norm": 0.4971529543399811,
+      "learning_rate": 0.00019142559248765587,
+      "loss": 0.9454,
+      "step": 2864
+    },
+    {
+      "epoch": 0.6649646048508762,
+      "grad_norm": 0.5304151177406311,
+      "learning_rate": 0.00019141968262080335,
+      "loss": 0.9761,
+      "step": 2865
+    },
+    {
+      "epoch": 0.665196704189393,
+      "grad_norm": 0.4551432728767395,
+      "learning_rate": 0.00019141377080927132,
+      "loss": 0.8263,
+      "step": 2866
+    },
+    {
+      "epoch": 0.6654288035279099,
+      "grad_norm": 0.4839153289794922,
+      "learning_rate": 0.0001914078570531854,
+      "loss": 0.8516,
+      "step": 2867
+    },
+    {
+      "epoch": 0.6656609028664269,
+      "grad_norm": 0.501598060131073,
+      "learning_rate": 0.0001914019413526715,
+      "loss": 0.8753,
+      "step": 2868
+    },
+    {
+      "epoch": 0.6658930022049437,
+      "grad_norm": 0.49526655673980713,
+      "learning_rate": 0.00019139602370785538,
+      "loss": 0.8342,
+      "step": 2869
+    },
+    {
+      "epoch": 0.6661251015434606,
+      "grad_norm": 0.49142616987228394,
+      "learning_rate": 0.00019139010411886291,
+      "loss": 0.8389,
+      "step": 2870
+    },
+    {
+      "epoch": 0.6663572008819775,
+      "grad_norm": 0.5267114639282227,
+      "learning_rate": 0.00019138418258582006,
+      "loss": 0.8339,
+      "step": 2871
+    },
+    {
+      "epoch": 0.6665893002204943,
+      "grad_norm": 0.42393583059310913,
+      "learning_rate": 0.0001913782591088528,
+      "loss": 0.8576,
+      "step": 2872
+    },
+    {
+      "epoch": 0.6668213995590112,
+      "grad_norm": 0.4962637722492218,
+      "learning_rate": 0.0001913723336880871,
+      "loss": 0.837,
+      "step": 2873
+    },
+    {
+      "epoch": 0.6670534988975282,
+      "grad_norm": 0.4471946656703949,
+      "learning_rate": 0.000191366406323649,
+      "loss": 0.8259,
+      "step": 2874
+    },
+    {
+      "epoch": 0.667285598236045,
+      "grad_norm": 0.48034703731536865,
+      "learning_rate": 0.00019136047701566464,
+      "loss": 0.8537,
+      "step": 2875
+    },
+    {
+      "epoch": 0.6675176975745619,
+      "grad_norm": 0.47116121649742126,
+      "learning_rate": 0.0001913545457642601,
+      "loss": 0.8252,
+      "step": 2876
+    },
+    {
+      "epoch": 0.6677497969130788,
+      "grad_norm": 0.5071761012077332,
+      "learning_rate": 0.00019134861256956155,
+      "loss": 0.898,
+      "step": 2877
+    },
+    {
+      "epoch": 0.6679818962515957,
+      "grad_norm": 0.4993492662906647,
+      "learning_rate": 0.00019134267743169524,
+      "loss": 0.8555,
+      "step": 2878
+    },
+    {
+      "epoch": 0.6682139955901125,
+      "grad_norm": 0.5150817036628723,
+      "learning_rate": 0.00019133674035078736,
+      "loss": 0.8624,
+      "step": 2879
+    },
+    {
+      "epoch": 0.6684460949286295,
+      "grad_norm": 0.5153425931930542,
+      "learning_rate": 0.00019133080132696426,
+      "loss": 0.8093,
+      "step": 2880
+    },
+    {
+      "epoch": 0.6686781942671464,
+      "grad_norm": 0.4248557686805725,
+      "learning_rate": 0.00019132486036035226,
+      "loss": 0.8488,
+      "step": 2881
+    },
+    {
+      "epoch": 0.6689102936056632,
+      "grad_norm": 0.4647797644138336,
+      "learning_rate": 0.0001913189174510777,
+      "loss": 0.9239,
+      "step": 2882
+    },
+    {
+      "epoch": 0.6691423929441801,
+      "grad_norm": 0.5158550143241882,
+      "learning_rate": 0.00019131297259926706,
+      "loss": 0.8746,
+      "step": 2883
+    },
+    {
+      "epoch": 0.669374492282697,
+      "grad_norm": 0.4511086344718933,
+      "learning_rate": 0.00019130702580504676,
+      "loss": 0.897,
+      "step": 2884
+    },
+    {
+      "epoch": 0.6696065916212138,
+      "grad_norm": 0.5059782862663269,
+      "learning_rate": 0.0001913010770685433,
+      "loss": 0.8666,
+      "step": 2885
+    },
+    {
+      "epoch": 0.6698386909597308,
+      "grad_norm": 0.4928185045719147,
+      "learning_rate": 0.00019129512638988322,
+      "loss": 0.842,
+      "step": 2886
+    },
+    {
+      "epoch": 0.6700707902982477,
+      "grad_norm": 0.5002438426017761,
+      "learning_rate": 0.00019128917376919313,
+      "loss": 0.9076,
+      "step": 2887
+    },
+    {
+      "epoch": 0.6703028896367645,
+      "grad_norm": 0.427513986825943,
+      "learning_rate": 0.0001912832192065996,
+      "loss": 0.8238,
+      "step": 2888
+    },
+    {
+      "epoch": 0.6705349889752814,
+      "grad_norm": 0.45401087403297424,
+      "learning_rate": 0.0001912772627022294,
+      "loss": 0.8605,
+      "step": 2889
+    },
+    {
+      "epoch": 0.6707670883137983,
+      "grad_norm": 0.43657442927360535,
+      "learning_rate": 0.0001912713042562091,
+      "loss": 0.8506,
+      "step": 2890
+    },
+    {
+      "epoch": 0.6709991876523151,
+      "grad_norm": 0.41969212889671326,
+      "learning_rate": 0.00019126534386866556,
+      "loss": 0.8791,
+      "step": 2891
+    },
+    {
+      "epoch": 0.6712312869908321,
+      "grad_norm": 0.46783447265625,
+      "learning_rate": 0.00019125938153972548,
+      "loss": 0.8774,
+      "step": 2892
+    },
+    {
+      "epoch": 0.671463386329349,
+      "grad_norm": 0.44763606786727905,
+      "learning_rate": 0.00019125341726951577,
+      "loss": 0.9214,
+      "step": 2893
+    },
+    {
+      "epoch": 0.6716954856678659,
+      "grad_norm": 0.46709761023521423,
+      "learning_rate": 0.00019124745105816325,
+      "loss": 0.8276,
+      "step": 2894
+    },
+    {
+      "epoch": 0.6719275850063827,
+      "grad_norm": 0.471754252910614,
+      "learning_rate": 0.0001912414829057949,
+      "loss": 0.8645,
+      "step": 2895
+    },
+    {
+      "epoch": 0.6721596843448996,
+      "grad_norm": 0.4268680810928345,
+      "learning_rate": 0.00019123551281253757,
+      "loss": 0.8376,
+      "step": 2896
+    },
+    {
+      "epoch": 0.6723917836834165,
+      "grad_norm": 0.4184396266937256,
+      "learning_rate": 0.00019122954077851833,
+      "loss": 0.87,
+      "step": 2897
+    },
+    {
+      "epoch": 0.6726238830219334,
+      "grad_norm": 0.48813703656196594,
+      "learning_rate": 0.0001912235668038642,
+      "loss": 0.866,
+      "step": 2898
+    },
+    {
+      "epoch": 0.6728559823604503,
+      "grad_norm": 0.4599473774433136,
+      "learning_rate": 0.00019121759088870226,
+      "loss": 0.834,
+      "step": 2899
+    },
+    {
+      "epoch": 0.6730880816989672,
+      "grad_norm": 0.4024162292480469,
+      "learning_rate": 0.00019121161303315963,
+      "loss": 0.8731,
+      "step": 2900
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.2872424773124096e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null