pilotj commited on
Commit
6651aef
·
verified ·
1 Parent(s): 257b0a6

Training in progress, step 10000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e1f2a52271c8cfdc722db3684a53097db8aee70296a993415177e22b8b3c5e0
3
  size 267906392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:486af49129a31e1b7c1e43e65eab8e26b8c4cc8ddc136711eef3f671ce00c3ec
3
  size 267906392
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b54f41f4d888087803f506752e2901f35a2c89665696ccb7156217f7828f3d6
3
  size 535874874
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde14063113d3963bdceb328049937cf5555849d1cc907876d7f93ae43926358
3
  size 535874874
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44a41369b5f42949ac33bbe224edbd4520b7fe7d1b7605b66225d2966ebb6e77
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbb2ca652b13d24cd9ca0437acca204dd7dc408e95308fc4b43867a99c53a4ed
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2eda9f617ae6489ee1b2a0db69ddf3c24a61fa0c7670292a9c9be1991125aad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:058930117d0715c41bf93049e22ed1e989ddac193b68a49ee9225dc685ba128a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.9059617519378662,
3
- "best_model_checkpoint": "/kaggle/working/results/checkpoint-8000",
4
- "epoch": 0.6233928153978026,
5
  "eval_steps": 1000,
6
- "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -183,6 +183,50 @@
183
  "eval_samples_per_second": 104.044,
184
  "eval_steps_per_second": 0.82,
185
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  }
187
  ],
188
  "logging_steps": 500,
@@ -202,7 +246,7 @@
202
  "attributes": {}
203
  }
204
  },
205
- "total_flos": 3.3926168444928e+16,
206
  "train_batch_size": 32,
207
  "trial_name": null,
208
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8291246891021729,
3
+ "best_model_checkpoint": "/kaggle/working/results/checkpoint-10000",
4
+ "epoch": 0.7792410192472532,
5
  "eval_steps": 1000,
6
+ "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
183
  "eval_samples_per_second": 104.044,
184
  "eval_steps_per_second": 0.82,
185
  "step": 8000
186
+ },
187
+ {
188
+ "epoch": 0.6623548663601652,
189
+ "grad_norm": 3.0767014026641846,
190
+ "learning_rate": 3.376451336398348e-05,
191
+ "loss": 0.497,
192
+ "step": 8500
193
+ },
194
+ {
195
+ "epoch": 0.7013169173225279,
196
+ "grad_norm": 3.861445903778076,
197
+ "learning_rate": 2.9868308267747215e-05,
198
+ "loss": 0.4688,
199
+ "step": 9000
200
+ },
201
+ {
202
+ "epoch": 0.7013169173225279,
203
+ "eval_loss": 0.8691270351409912,
204
+ "eval_runtime": 50.1753,
205
+ "eval_samples_per_second": 103.637,
206
+ "eval_steps_per_second": 0.817,
207
+ "step": 9000
208
+ },
209
+ {
210
+ "epoch": 0.7402789682848905,
211
+ "grad_norm": 3.7857894897460938,
212
+ "learning_rate": 2.5972103171510948e-05,
213
+ "loss": 0.4853,
214
+ "step": 9500
215
+ },
216
+ {
217
+ "epoch": 0.7792410192472532,
218
+ "grad_norm": 6.56783390045166,
219
+ "learning_rate": 2.2075898075274684e-05,
220
+ "loss": 0.483,
221
+ "step": 10000
222
+ },
223
+ {
224
+ "epoch": 0.7792410192472532,
225
+ "eval_loss": 0.8291246891021729,
226
+ "eval_runtime": 49.9917,
227
+ "eval_samples_per_second": 104.017,
228
+ "eval_steps_per_second": 0.82,
229
+ "step": 10000
230
  }
231
  ],
232
  "logging_steps": 500,
 
246
  "attributes": {}
247
  }
248
  },
249
+ "total_flos": 4.240771055616e+16,
250
  "train_batch_size": 32,
251
  "trial_name": null,
252
  "trial_params": null