Training in progress, step 2850, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44791fb686c0f92a3ca53186840e2c8355789932001a028b47646af1a8a1b45c
 size 1370666272

 version https://git-lfs.github.com/spec/v1
+oid sha256:789f226e44ff6175f0650db489f0554e7f69dc5b63c5b19f6f8f90422e097bc3
 size 1370666272

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d586e1619bf49bc533732763da9dc62537363ab53027df7eb9192650c866d327
 size 697294462

 version https://git-lfs.github.com/spec/v1
+oid sha256:53c7ca855a09c6703804528921ba002a4454692bef620396449f5abdd6380228
 size 697294462

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54c9effe6f4720ddc5037c1f923116bf7c70164eba829d93d939ea303faa1268
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:afaac9fbe4271faaba5196ab94e52163e6bf1b95bd8386498fc1f2c58b28a4a4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.6498781478472786,
   "eval_steps": 500,
-  "global_step": 2800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -19607,6 +19607,356 @@
       "learning_rate": 0.00019179977330980487,
       "loss": 0.8965,
       "step": 2800
     }
   ],
   "logging_steps": 1,
@@ -19626,7 +19976,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2428548056809472e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.6614831147731229,
   "eval_steps": 500,
+  "global_step": 2850,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019179977330980487,
       "loss": 0.8965,
       "step": 2800
+    },
+    {
+      "epoch": 0.6501102471857956,
+      "grad_norm": 0.4750087857246399,
+      "learning_rate": 0.00019179398816212382,
+      "loss": 0.9313,
+      "step": 2801
+    },
+    {
+      "epoch": 0.6503423465243124,
+      "grad_norm": 0.4137982130050659,
+      "learning_rate": 0.00019178820106180094,
+      "loss": 0.8269,
+      "step": 2802
+    },
+    {
+      "epoch": 0.6505744458628293,
+      "grad_norm": 0.48815712332725525,
+      "learning_rate": 0.00019178241200895935,
+      "loss": 0.8957,
+      "step": 2803
+    },
+    {
+      "epoch": 0.6508065452013462,
+      "grad_norm": 0.5212056636810303,
+      "learning_rate": 0.0001917766210037222,
+      "loss": 0.826,
+      "step": 2804
+    },
+    {
+      "epoch": 0.651038644539863,
+      "grad_norm": 0.46167051792144775,
+      "learning_rate": 0.0001917708280462126,
+      "loss": 0.9014,
+      "step": 2805
+    },
+    {
+      "epoch": 0.6512707438783799,
+      "grad_norm": 0.47623032331466675,
+      "learning_rate": 0.00019176503313655393,
+      "loss": 0.8882,
+      "step": 2806
+    },
+    {
+      "epoch": 0.6515028432168968,
+      "grad_norm": 0.46180862188339233,
+      "learning_rate": 0.00019175923627486936,
+      "loss": 0.9117,
+      "step": 2807
+    },
+    {
+      "epoch": 0.6517349425554138,
+      "grad_norm": 0.4569379687309265,
+      "learning_rate": 0.0001917534374612822,
+      "loss": 0.8399,
+      "step": 2808
+    },
+    {
+      "epoch": 0.6519670418939306,
+      "grad_norm": 0.42162245512008667,
+      "learning_rate": 0.00019174763669591583,
+      "loss": 0.8652,
+      "step": 2809
+    },
+    {
+      "epoch": 0.6521991412324475,
+      "grad_norm": 0.4374902546405792,
+      "learning_rate": 0.0001917418339788936,
+      "loss": 0.895,
+      "step": 2810
+    },
+    {
+      "epoch": 0.6524312405709644,
+      "grad_norm": 0.4497464895248413,
+      "learning_rate": 0.000191736029310339,
+      "loss": 0.8953,
+      "step": 2811
+    },
+    {
+      "epoch": 0.6526633399094812,
+      "grad_norm": 0.4323320686817169,
+      "learning_rate": 0.00019173022269037548,
+      "loss": 0.8703,
+      "step": 2812
+    },
+    {
+      "epoch": 0.6528954392479981,
+      "grad_norm": 0.45908528566360474,
+      "learning_rate": 0.00019172441411912657,
+      "loss": 0.8765,
+      "step": 2813
+    },
+    {
+      "epoch": 0.6531275385865151,
+      "grad_norm": 0.41703182458877563,
+      "learning_rate": 0.00019171860359671583,
+      "loss": 0.8681,
+      "step": 2814
+    },
+    {
+      "epoch": 0.6533596379250319,
+      "grad_norm": 0.45060259103775024,
+      "learning_rate": 0.00019171279112326683,
+      "loss": 0.8919,
+      "step": 2815
+    },
+    {
+      "epoch": 0.6535917372635488,
+      "grad_norm": 0.4701296389102936,
+      "learning_rate": 0.00019170697669890324,
+      "loss": 0.8749,
+      "step": 2816
+    },
+    {
+      "epoch": 0.6538238366020657,
+      "grad_norm": 0.4668188691139221,
+      "learning_rate": 0.00019170116032374876,
+      "loss": 0.8601,
+      "step": 2817
+    },
+    {
+      "epoch": 0.6540559359405825,
+      "grad_norm": 0.42963141202926636,
+      "learning_rate": 0.0001916953419979271,
+      "loss": 0.884,
+      "step": 2818
+    },
+    {
+      "epoch": 0.6542880352790994,
+      "grad_norm": 0.5206764340400696,
+      "learning_rate": 0.00019168952172156202,
+      "loss": 0.8831,
+      "step": 2819
+    },
+    {
+      "epoch": 0.6545201346176164,
+      "grad_norm": 0.4822680652141571,
+      "learning_rate": 0.0001916836994947773,
+      "loss": 0.8141,
+      "step": 2820
+    },
+    {
+      "epoch": 0.6547522339561332,
+      "grad_norm": 0.44132062792778015,
+      "learning_rate": 0.00019167787531769684,
+      "loss": 0.8837,
+      "step": 2821
+    },
+    {
+      "epoch": 0.6549843332946501,
+      "grad_norm": 0.47267404198646545,
+      "learning_rate": 0.00019167204919044451,
+      "loss": 0.9059,
+      "step": 2822
+    },
+    {
+      "epoch": 0.655216432633167,
+      "grad_norm": 0.4189220070838928,
+      "learning_rate": 0.00019166622111314426,
+      "loss": 0.8696,
+      "step": 2823
+    },
+    {
+      "epoch": 0.6554485319716838,
+      "grad_norm": 0.41616180539131165,
+      "learning_rate": 0.0001916603910859201,
+      "loss": 0.8296,
+      "step": 2824
+    },
+    {
+      "epoch": 0.6556806313102007,
+      "grad_norm": 0.4162457287311554,
+      "learning_rate": 0.00019165455910889593,
+      "loss": 0.8204,
+      "step": 2825
+    },
+    {
+      "epoch": 0.6559127306487177,
+      "grad_norm": 0.4778987467288971,
+      "learning_rate": 0.0001916487251821959,
+      "loss": 0.8528,
+      "step": 2826
+    },
+    {
+      "epoch": 0.6561448299872346,
+      "grad_norm": 0.4973873198032379,
+      "learning_rate": 0.0001916428893059441,
+      "loss": 0.8403,
+      "step": 2827
+    },
+    {
+      "epoch": 0.6563769293257514,
+      "grad_norm": 0.4930678904056549,
+      "learning_rate": 0.00019163705148026464,
+      "loss": 0.8223,
+      "step": 2828
+    },
+    {
+      "epoch": 0.6566090286642683,
+      "grad_norm": 0.44355422258377075,
+      "learning_rate": 0.00019163121170528175,
+      "loss": 0.8361,
+      "step": 2829
+    },
+    {
+      "epoch": 0.6568411280027852,
+      "grad_norm": 0.45476454496383667,
+      "learning_rate": 0.0001916253699811196,
+      "loss": 0.8712,
+      "step": 2830
+    },
+    {
+      "epoch": 0.657073227341302,
+      "grad_norm": 0.4533182382583618,
+      "learning_rate": 0.00019161952630790248,
+      "loss": 0.8984,
+      "step": 2831
+    },
+    {
+      "epoch": 0.657305326679819,
+      "grad_norm": 0.4435712695121765,
+      "learning_rate": 0.0001916136806857547,
+      "loss": 0.8294,
+      "step": 2832
+    },
+    {
+      "epoch": 0.6575374260183359,
+      "grad_norm": 0.5167298316955566,
+      "learning_rate": 0.00019160783311480061,
+      "loss": 0.9074,
+      "step": 2833
+    },
+    {
+      "epoch": 0.6577695253568527,
+      "grad_norm": 0.48255985975265503,
+      "learning_rate": 0.00019160198359516456,
+      "loss": 0.8771,
+      "step": 2834
+    },
+    {
+      "epoch": 0.6580016246953696,
+      "grad_norm": 0.49954113364219666,
+      "learning_rate": 0.00019159613212697108,
+      "loss": 0.837,
+      "step": 2835
+    },
+    {
+      "epoch": 0.6582337240338865,
+      "grad_norm": 0.45875173807144165,
+      "learning_rate": 0.00019159027871034452,
+      "loss": 0.9007,
+      "step": 2836
+    },
+    {
+      "epoch": 0.6584658233724033,
+      "grad_norm": 0.4180905818939209,
+      "learning_rate": 0.00019158442334540947,
+      "loss": 0.9139,
+      "step": 2837
+    },
+    {
+      "epoch": 0.6586979227109203,
+      "grad_norm": 0.492866188287735,
+      "learning_rate": 0.00019157856603229048,
+      "loss": 0.8481,
+      "step": 2838
+    },
+    {
+      "epoch": 0.6589300220494372,
+      "grad_norm": 0.45765408873558044,
+      "learning_rate": 0.0001915727067711121,
+      "loss": 0.8913,
+      "step": 2839
+    },
+    {
+      "epoch": 0.659162121387954,
+      "grad_norm": 0.4523009657859802,
+      "learning_rate": 0.00019156684556199903,
+      "loss": 0.8815,
+      "step": 2840
+    },
+    {
+      "epoch": 0.6593942207264709,
+      "grad_norm": 0.463329941034317,
+      "learning_rate": 0.00019156098240507592,
+      "loss": 0.8844,
+      "step": 2841
+    },
+    {
+      "epoch": 0.6596263200649878,
+      "grad_norm": 0.4301539957523346,
+      "learning_rate": 0.00019155511730046748,
+      "loss": 0.8209,
+      "step": 2842
+    },
+    {
+      "epoch": 0.6598584194035046,
+      "grad_norm": 0.4687608480453491,
+      "learning_rate": 0.0001915492502482985,
+      "loss": 0.8791,
+      "step": 2843
+    },
+    {
+      "epoch": 0.6600905187420216,
+      "grad_norm": 0.46065258979797363,
+      "learning_rate": 0.00019154338124869377,
+      "loss": 0.8791,
+      "step": 2844
+    },
+    {
+      "epoch": 0.6603226180805385,
+      "grad_norm": 0.4436477720737457,
+      "learning_rate": 0.0001915375103017781,
+      "loss": 0.879,
+      "step": 2845
+    },
+    {
+      "epoch": 0.6605547174190554,
+      "grad_norm": 0.4415607750415802,
+      "learning_rate": 0.0001915316374076764,
+      "loss": 0.8601,
+      "step": 2846
+    },
+    {
+      "epoch": 0.6607868167575722,
+      "grad_norm": 0.46711909770965576,
+      "learning_rate": 0.00019152576256651366,
+      "loss": 0.8796,
+      "step": 2847
+    },
+    {
+      "epoch": 0.6610189160960891,
+      "grad_norm": 0.4268472194671631,
+      "learning_rate": 0.0001915198857784148,
+      "loss": 0.8689,
+      "step": 2848
+    },
+    {
+      "epoch": 0.661251015434606,
+      "grad_norm": 0.3973580002784729,
+      "learning_rate": 0.0001915140070435048,
+      "loss": 0.8466,
+      "step": 2849
+    },
+    {
+      "epoch": 0.6614831147731229,
+      "grad_norm": 0.4282270669937134,
+      "learning_rate": 0.00019150812636190874,
+      "loss": 0.8451,
+      "step": 2850
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.2650486414966784e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null