Training in progress, step 35, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7c25e36ef1bea88e955b04a392995659ce2efa2958e9a824de856926f2f78b2
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a0904a44c1929f97e8bd4e5c46a96a8a3044de6f935bcf5630acdb0cdb6d739
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc69a4cb3de67ebaa3a95161b888f3e6a62143841950ec7d93681c428ce896bf
 size 49846260

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ef6f282d85d8e63c53b00534e682ea5282bac1edb49ab13b1b78e144354038b
 size 49846260

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e8303210a87e6366e53f9d2ad1dc5984114aa017ddbff7d118553d8efe51202
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e24fd0a4708a45e9b32be5aab9d4589ac1e498dcf2ba55e9e776e2a6e66e9b62
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a73e1ff9beffc13aa54f4adf4df9ed4ad8819cc503c53ddfd100ef74e91d520
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a97edf5ef9280d040e46685cc4e47c24383c42a51949ff834379ab1766a8b0a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.038822387576835975,
   "eval_steps": 386,
-  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -225,6 +225,41 @@
       "learning_rate": 6e-05,
       "loss": 1.3808,
       "step": 30
     }
   ],
   "logging_steps": 1,
@@ -244,7 +279,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.353811515080704e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.045292785506308636,
   "eval_steps": 386,
+  "global_step": 35,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6e-05,
       "loss": 1.3808,
       "step": 30
+    },
+    {
+      "epoch": 0.040116467162730506,
+      "grad_norm": 1.353873372077942,
+      "learning_rate": 6.2e-05,
+      "loss": 1.229,
+      "step": 31
+    },
+    {
+      "epoch": 0.04141054674862504,
+      "grad_norm": 1.2547746896743774,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 1.1668,
+      "step": 32
+    },
+    {
+      "epoch": 0.042704626334519574,
+      "grad_norm": 1.3806778192520142,
+      "learning_rate": 6.6e-05,
+      "loss": 1.0691,
+      "step": 33
+    },
+    {
+      "epoch": 0.043998705920414105,
+      "grad_norm": 1.2815773487091064,
+      "learning_rate": 6.800000000000001e-05,
+      "loss": 1.2409,
+      "step": 34
+    },
+    {
+      "epoch": 0.045292785506308636,
+      "grad_norm": 1.3677266836166382,
+      "learning_rate": 7e-05,
+      "loss": 0.9668,
+      "step": 35
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.912780100927488e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null