Training in progress, step 360, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f630f599375b6f9e8b3d899cdec9f7ee694632d31c165a103037286f91bdb40d
 size 97307544

 version https://git-lfs.github.com/spec/v1
+oid sha256:3aa9893a84cb0e84de34ca04383d58aed137598edfa5d6b72d961fad0f27735d
 size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f881a0164766ec0cdc916551a149a6e90b7c7bff3e56a1b2314770aebd4e485f
 size 49846644

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b961e1395983b9869fa73b1df3863725b49dac36d70d0a4af83933dde9e72f8
 size 49846644

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e01ddaf507ffae0b11987877fc582c16ae0f24690bb23d34f7cf537c451f099
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea983627119c27e833d8278aa2759c5160e60ebbfa2503f097a57924edc56a60
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3758bcabbf6a982afc57e0ef5b43cd5da58e8541454f29024c32acf06fb153c5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3808f18a91c29579db27addbed742cebf6b7639d12834c99b15b5738ffd35626
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.45292785506308636,
   "eval_steps": 386,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2465,6 +2465,76 @@
       "learning_rate": 9.847799323991234e-05,
       "loss": 1.077,
       "step": 350
     }
   ],
   "logging_steps": 1,
@@ -2484,7 +2554,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.912780100927488e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4658686509220317,
   "eval_steps": 386,
+  "global_step": 360,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 9.847799323991234e-05,
       "loss": 1.077,
       "step": 350
+    },
+    {
+      "epoch": 0.4542219346489809,
+      "grad_norm": 0.9179444909095764,
+      "learning_rate": 9.8467881843081e-05,
+      "loss": 0.8353,
+      "step": 351
+    },
+    {
+      "epoch": 0.4555160142348754,
+      "grad_norm": 0.855802595615387,
+      "learning_rate": 9.845773749257804e-05,
+      "loss": 0.9764,
+      "step": 352
+    },
+    {
+      "epoch": 0.45681009382077,
+      "grad_norm": 0.9086332321166992,
+      "learning_rate": 9.844756019530066e-05,
+      "loss": 1.0526,
+      "step": 353
+    },
+    {
+      "epoch": 0.4581041734066645,
+      "grad_norm": 0.890271782875061,
+      "learning_rate": 9.843734995816848e-05,
+      "loss": 0.8905,
+      "step": 354
+    },
+    {
+      "epoch": 0.45939825299255904,
+      "grad_norm": 0.7878096699714661,
+      "learning_rate": 9.842710678812351e-05,
+      "loss": 0.8706,
+      "step": 355
+    },
+    {
+      "epoch": 0.4606923325784536,
+      "grad_norm": 0.9886014461517334,
+      "learning_rate": 9.841683069213017e-05,
+      "loss": 0.9579,
+      "step": 356
+    },
+    {
+      "epoch": 0.4619864121643481,
+      "grad_norm": 0.8265432119369507,
+      "learning_rate": 9.840652167717526e-05,
+      "loss": 0.8528,
+      "step": 357
+    },
+    {
+      "epoch": 0.46328049175024266,
+      "grad_norm": 0.7354372143745422,
+      "learning_rate": 9.839617975026793e-05,
+      "loss": 0.775,
+      "step": 358
+    },
+    {
+      "epoch": 0.46457457133613717,
+      "grad_norm": 0.8311409950256348,
+      "learning_rate": 9.838580491843976e-05,
+      "loss": 1.0374,
+      "step": 359
+    },
+    {
+      "epoch": 0.4658686509220317,
+      "grad_norm": 0.8180521130561829,
+      "learning_rate": 9.837539718874464e-05,
+      "loss": 0.818,
+      "step": 360
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 4.024573818096845e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null