ncbateman commited on
Commit
42f4184
1 Parent(s): 863f91a

Training in progress, step 460, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b043cd7a58160e6996c90fab1beb1517bb2613e4b0999b16dca09195ed9daad8
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a8e766c93c4cb51d235429ab576d2269c0fe74ccc12cbec07722e592fc31d83
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa5d29c4411fdeb35a1244b024f4c5c0c57180230285a448b1e43fc79ce803fc
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9f249a9f18d595322f53870ff656ff9f0fb190feac83ed8e8e07c86b79d402d
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:450d4f0e3a0bdbd8c489459f618a362a3c0456d0f149f06ab048c544e79c9d17
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d5ea361bd8f109ccf9deae94ad5b06097316390b345e2c5b8dfcae47e6460a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ef7e9caf74d940d149631b7d434e8f3ed3d9f20dbfe42784c1a25924da5d43b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:702e6ac2a5684998fe08dbec2461764b61e07652058ef3b71a2777c5464d9e27
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5888062115820123,
5
  "eval_steps": 386,
6
- "global_step": 455,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3208,6 +3208,41 @@
3208
  "learning_rate": 9.72377690802055e-05,
3209
  "loss": 0.9959,
3210
  "step": 455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3211
  }
3212
  ],
3213
  "logging_steps": 1,
@@ -3227,7 +3262,7 @@
3227
  "attributes": {}
3228
  }
3229
  },
3230
- "total_flos": 5.0866141312057344e+17,
3231
  "train_batch_size": 4,
3232
  "trial_name": null,
3233
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.595276609511485,
5
  "eval_steps": 386,
6
+ "global_step": 460,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3208
  "learning_rate": 9.72377690802055e-05,
3209
  "loss": 0.9959,
3210
  "step": 455
3211
+ },
3212
+ {
3213
+ "epoch": 0.5901002911679069,
3214
+ "grad_norm": 0.8143338561058044,
3215
+ "learning_rate": 9.722423938195922e-05,
3216
+ "loss": 0.9954,
3217
+ "step": 456
3218
+ },
3219
+ {
3220
+ "epoch": 0.5913943707538014,
3221
+ "grad_norm": 0.6839838027954102,
3222
+ "learning_rate": 9.721067757560303e-05,
3223
+ "loss": 0.7288,
3224
+ "step": 457
3225
+ },
3226
+ {
3227
+ "epoch": 0.5926884503396959,
3228
+ "grad_norm": 0.8407920598983765,
3229
+ "learning_rate": 9.719708367035767e-05,
3230
+ "loss": 0.858,
3231
+ "step": 458
3232
+ },
3233
+ {
3234
+ "epoch": 0.5939825299255904,
3235
+ "grad_norm": 0.8388239741325378,
3236
+ "learning_rate": 9.718345767546576e-05,
3237
+ "loss": 0.8455,
3238
+ "step": 459
3239
+ },
3240
+ {
3241
+ "epoch": 0.595276609511485,
3242
+ "grad_norm": 0.7476726770401001,
3243
+ "learning_rate": 9.716979960019173e-05,
3244
+ "loss": 0.8261,
3245
+ "step": 460
3246
  }
3247
  ],
3248
  "logging_steps": 1,
 
3262
  "attributes": {}
3263
  }
3264
  },
3265
+ "total_flos": 5.142510989790413e+17,
3266
  "train_batch_size": 4,
3267
  "trial_name": null,
3268
  "trial_params": null