Training in progress, step 640, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c42e3144125416adcb9a1ec0d9b0eb55619b6a3619ed6542a06de661bbf161b2
 size 100697728

 version https://git-lfs.github.com/spec/v1
+oid sha256:e59a37c02b250fde83ba038c839a2e952bf520ce910f43a7857db234f396d3f0
 size 100697728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9177ecdd03afda4d13d5d2eb2a0c1d5fbe522c00f0127e7917ebf328b282a0c8
 size 201541754

 version https://git-lfs.github.com/spec/v1
+oid sha256:79a9df9fefae970af178ffc5c6daff5bac43c4ffccc8bb655e4c8b0717bde90a
 size 201541754

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57a916e4fd36223dffeadbeb32e21c87fb935df188a4e2e19aafc7b1c3d84241
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e72fb99776019a0f4753f71a958f1e0ab8cd89837117e79e8970f4ea20b12a6d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:398bdc23ac4d9e39643bb660fbaeca4b591289face79e5178809ed45f99c413b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbd83824b10c800d2a1e10e6af2da6cf8778074505a180484fec6f86647c2253
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.22182468694096602,
   "eval_steps": 500,
-  "global_step": 620,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -503,14 +503,30 @@
       "loss": 0.4495,
       "num_input_tokens_seen": 413867,
       "step": 620
     }
   ],
   "logging_steps": 10,
   "max_steps": 2795,
-  "num_input_tokens_seen": 413867,
   "num_train_epochs": 1,
   "save_steps": 20,
-  "total_flos": 9306407303387136.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.22898032200357782,
   "eval_steps": 500,
+  "global_step": 640,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "loss": 0.4495,
       "num_input_tokens_seen": 413867,
       "step": 620
+    },
+    {
+      "epoch": 0.22540250447227192,
+      "grad_norm": 0.41710999608039856,
+      "learning_rate": 0.00015491949910554563,
+      "loss": 0.426,
+      "num_input_tokens_seen": 422712,
+      "step": 630
+    },
+    {
+      "epoch": 0.22898032200357782,
+      "grad_norm": 0.42847341299057007,
+      "learning_rate": 0.00015420393559928446,
+      "loss": 0.4163,
+      "num_input_tokens_seen": 428523,
+      "step": 640
     }
   ],
   "logging_steps": 10,
   "max_steps": 2795,
+  "num_input_tokens_seen": 428523,
   "num_train_epochs": 1,
   "save_steps": 20,
+  "total_flos": 9635968987305984.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null