Upload v2 model

Browse files

Files changed (7) hide show

optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scaler.pt +1 -1
scheduler.pt +1 -1
trainer_state.json +17 -65
training_args.bin +1 -1

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8bd3f6a9c08e12f0995b8987b6aa0be12f6f347b899e266e2a788f00e75afe7d
 size 10524831541

 version https://git-lfs.github.com/spec/v1
+oid sha256:beb1dfe9e3fb3e4565d46305a1c359d55c6d170b2ea69871ebfc0920f690f10a
 size 10524831541

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2eca4b63e00c0cf643a0b4cbbeda9874195025489d5dd449f7db2b9e80ab0db0
 size 5363104725

 version https://git-lfs.github.com/spec/v1
+oid sha256:209f413dc67f384b46885e98571da206fc6c4457c89d15a2ff44ca826f01b17d
 size 5363104725

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b77d9a5a81ac8b69e0670f5933e23ce7468b78385324a6bbb8b86fd0ba74fb6b
 size 15597

 version https://git-lfs.github.com/spec/v1
+oid sha256:369852fda86a4e5f67261d035b21273f6fcf461fb59c7bbda54eb04e9cbed42f
 size 15597

scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5b77c67ca3fb0d7234d39f939d0791074eda83a0f90bb8b136d320c3a473fa62
 size 557

 version https://git-lfs.github.com/spec/v1
+oid sha256:93dcbb27405421069b5f31003e3f224646e375ff4c9d7ba4b0d265614f660d0f
 size 557

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35d421c455531e443ef68bd5042b7f963d2347c483d8dfcb6aeb3c14fd4ba202
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4ad8ded642fb7c5cc9603be3aae650f1ddaee94d88bc96085f01683245bac67
 size 627

trainer_state.json CHANGED Viewed

@@ -1,88 +1,40 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 8.875739644970414,
-  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.74,
-      "learning_rate": 4.96e-05,
-      "loss": 1.8134,
       "step": 500
     },
     {
-      "epoch": 1.48,
-      "learning_rate": 4.996328645447816e-05,
-      "loss": 1.3742,
       "step": 1000
     },
     {
-      "epoch": 2.22,
-      "learning_rate": 4.992627683197632e-05,
-      "loss": 1.122,
       "step": 1500
     },
     {
-      "epoch": 2.96,
-      "learning_rate": 4.988934122871947e-05,
-      "loss": 0.8645,
       "step": 2000
-    },
-    {
-      "epoch": 3.7,
-      "learning_rate": 4.9852553663952626e-05,
-      "loss": 0.6174,
-      "step": 2500
-    },
-    {
-      "epoch": 4.44,
-      "learning_rate": 4.9815618060695786e-05,
-      "loss": 0.5355,
-      "step": 3000
-    },
-    {
-      "epoch": 5.18,
-      "learning_rate": 4.977868245743893e-05,
-      "loss": 0.4336,
-      "step": 3500
-    },
-    {
-      "epoch": 5.92,
-      "learning_rate": 4.974167283493709e-05,
-      "loss": 0.3695,
-      "step": 4000
-    },
-    {
-      "epoch": 6.66,
-      "learning_rate": 4.970466321243524e-05,
-      "loss": 0.3013,
-      "step": 4500
-    },
-    {
-      "epoch": 7.4,
-      "learning_rate": 4.966772760917839e-05,
-      "loss": 0.2954,
-      "step": 5000
-    },
-    {
-      "epoch": 8.14,
-      "learning_rate": 4.963071798667654e-05,
-      "loss": 0.2812,
-      "step": 5500
-    },
-    {
-      "epoch": 8.88,
-      "learning_rate": 4.959370836417469e-05,
-      "loss": 0.2718,
-      "step": 6000
     }
   ],
-  "max_steps": 676000,
-  "num_train_epochs": 1000,
-  "total_flos": 1.2118642683671347e+17,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.760546642899584,
+  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 1.19,
+      "learning_rate": 1.9920000000000002e-05,
+      "loss": 1.8071,
       "step": 500
     },
     {
+      "epoch": 2.38,
+      "learning_rate": 3.9920000000000004e-05,
+      "loss": 1.2711,
       "step": 1000
     },
     {
+      "epoch": 3.57,
+      "learning_rate": 3.951414634146342e-05,
+      "loss": 0.9311,
       "step": 1500
     },
     {
+      "epoch": 4.76,
+      "learning_rate": 3.9026341463414634e-05,
+      "loss": 0.6448,
       "step": 2000
     }
   ],
+  "max_steps": 42000,
+  "num_train_epochs": 100,
+  "total_flos": 1.4237038007161651e+17,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9949c173097804108f0625268b24c63f3f1c70d9342dfee2164c0e5dc8f16ef3
 size 3259

 version https://git-lfs.github.com/spec/v1
+oid sha256:79a1524a3e0cf9ed521d405e1d37b687d354f40a4093feacd6350515bf4392c1
 size 3259