Training in progress, step 265, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62dd58f5f07e9cfe5165e4ee91690a196110eda1989f229f57e089616cbea092
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:d3e35886bdcf00795dc56e4c37d7f8b9156e3af207c4f5197e9b99f856611064
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6e698cb8a9f1b2feee9bca00e0a141b2948dbdee0009ec8f7c9aeccb939cad8
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e666486e8cc62b97d906ea52ffc730c72ad81956be23d6dde2d1167414d1fd1
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ab3419023c7faeaec921cfbb345173fbd2d7548f26f89a44a67a4f452b92d5d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:db7e8cfd6e7b1242076647967dbc1bd8afa97abb68b4228e97c7e65538d0340d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1b8e5ce2718054463127476242d5f2ee90d1229b9c9ce677374004099bb2fb5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e07629d02aee1dc6fe60d67e7c3e731bc344bba13e8c3da66188957923131f6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.33646069233257847,
   "eval_steps": 386,
-  "global_step": 260,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1835,6 +1835,41 @@
       "learning_rate": 9.925227316586316e-05,
       "loss": 0.8119,
       "step": 260
     }
   ],
   "logging_steps": 1,
@@ -1854,7 +1889,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.906636646403277e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.3429310902620511,
   "eval_steps": 386,
+  "global_step": 265,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.925227316586316e-05,
       "loss": 0.8119,
       "step": 260
+    },
+    {
+      "epoch": 0.337754771918473,
+      "grad_norm": 0.8329979181289673,
+      "learning_rate": 9.924515301760606e-05,
+      "loss": 0.8467,
+      "step": 261
+    },
+    {
+      "epoch": 0.33904885150436753,
+      "grad_norm": 0.801017701625824,
+      "learning_rate": 9.923799938720488e-05,
+      "loss": 0.8333,
+      "step": 262
+    },
+    {
+      "epoch": 0.34034293109026204,
+      "grad_norm": 0.9083892703056335,
+      "learning_rate": 9.923081227952347e-05,
+      "loss": 0.8727,
+      "step": 263
+    },
+    {
+      "epoch": 0.3416370106761566,
+      "grad_norm": 0.7917154431343079,
+      "learning_rate": 9.922359169944834e-05,
+      "loss": 1.0341,
+      "step": 264
+    },
+    {
+      "epoch": 0.3429310902620511,
+      "grad_norm": 0.6865798234939575,
+      "learning_rate": 9.921633765188886e-05,
+      "loss": 0.9117,
+      "step": 265
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.962533504987955e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null