mattbonnell commited on
Commit
3c03d57
·
verified ·
1 Parent(s): 14e7c29

Training in progress, step 8500, checkpoint

Browse files
last-checkpoint/global_step8500/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b00bfac3db53b521d614ce880e37feda5c474c7fba505a80a6d87d89fa371d6
3
+ size 197282509
last-checkpoint/global_step8500/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91cd20ad8ecf66c964fee0c8463fbedecf792a1a57c7fdec1d0fad4e4e1d8b35
3
+ size 180416968
last-checkpoint/global_step8500/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b04765786097e4990a2170914e6f9d7c27447c8ec041bca13a538f7ff9b0eecc
3
+ size 180416776
last-checkpoint/global_step8500/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:549824e4bab052048978b41ec5aa592913f15740cf737e38321c0b486d0527a9
3
+ size 180416776
last-checkpoint/global_step8500/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b82f439bd80358859433ccfec8b6f84a61360667568e37f9ff58752b32be9b5
3
+ size 180416904
last-checkpoint/global_step8500/zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8e2d0d67801717d7ec7333d951d1911fdc8344f8fa1ce8a7c4fd95796ec354
3
+ size 180416712
last-checkpoint/global_step8500/zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a29df9d086627bd5788f2c85fc4c8e2f1f7f432ae7fe8b1242018731c04f2930
3
+ size 180417096
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step8000
 
1
+ global_step8500
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e2ccd66b0d3eac61fdd022c65f61fec2848db2ff54d1c43e2a14a9f026c0394
3
  size 188836816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7d95a2a6398a3976684f9f296988a5e39dde3afb33e0d969993bcbd7dcacaca
3
  size 188836816
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea500e099e374ab8f40a21f5238cc4b753325af0d8cddf9294790e0f04153e7a
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:564a5d159c6784b209d5ba2261c4de8c25ee8803b9c92f51fd1e9fd52c1b3fc4
3
  size 15536
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11e26d2216e18df43db8309fcc95dba493bd2243b1ee081f23d44d1d51767fbd
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cbf33bcff4e9bc7aead0f12ef6877a895f2040493ae419349b5a3b110b5036e
3
  size 15536
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5dbf9199d9fbbe4519352983976ccd7fc8c6c8f988fbb95736f5368013dce0c
3
  size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ce0b2876ec405f01219dce5c2551d896b5f10a920f11726ba040105793eaabb
3
  size 15536
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83e6add4149c20ee1bbd11a39e647cd83b6d5e92df39787f3e4dd6d4c2c9362a
3
- size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a83285a5f0517959619b1f7f7827cf51e67685851a9aed6f10647402c51355
3
+ size 15472
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdd4141f9ac586703ff022ae4a72874450b5b0b99397d5a0cb6dd7caffc6ebd4
3
- size 15536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d45bda445d230c33252857d7ba14a68c6656232ceb63109a76eba6e8e0d43306
3
+ size 15472
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca1450055d7dfb8743ae0d1ace8a5ace9c1bef6e7e75026e510a4b2960a133be
3
- size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:976477c91777116536dc7d0dffbe92366832796b5f9e9ba3c59be4799747b19a
3
+ size 15536
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cad4268cfb586000e1a64ea1e79a3a6d50e4d70a575ea40791dc76489a8bd7bb
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71db0e061be5d6991df2e521349de0fd5a5ff99a4bfc1c920bc3894a6254c9f
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 216.21621621621622,
5
  "eval_steps": 1500,
6
- "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1163,6 +1163,76 @@
1163
  "learning_rate": 0.0001,
1164
  "loss": 0.022,
1165
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1166
  }
1167
  ],
1168
  "logging_steps": 50,
@@ -1182,7 +1252,7 @@
1182
  "attributes": {}
1183
  }
1184
  },
1185
- "total_flos": 2.2456068419295904e+20,
1186
  "train_batch_size": 64,
1187
  "trial_name": null,
1188
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 229.72972972972974,
5
  "eval_steps": 1500,
6
+ "global_step": 8500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1163
  "learning_rate": 0.0001,
1164
  "loss": 0.022,
1165
  "step": 8000
1166
+ },
1167
+ {
1168
+ "epoch": 217.56756756756758,
1169
+ "grad_norm": 0.38513001799583435,
1170
+ "learning_rate": 0.0001,
1171
+ "loss": 0.0215,
1172
+ "step": 8050
1173
+ },
1174
+ {
1175
+ "epoch": 218.9189189189189,
1176
+ "grad_norm": 0.400036484003067,
1177
+ "learning_rate": 0.0001,
1178
+ "loss": 0.021,
1179
+ "step": 8100
1180
+ },
1181
+ {
1182
+ "epoch": 220.27027027027026,
1183
+ "grad_norm": 0.3203113377094269,
1184
+ "learning_rate": 0.0001,
1185
+ "loss": 0.0207,
1186
+ "step": 8150
1187
+ },
1188
+ {
1189
+ "epoch": 221.6216216216216,
1190
+ "grad_norm": 0.3765117824077606,
1191
+ "learning_rate": 0.0001,
1192
+ "loss": 0.0197,
1193
+ "step": 8200
1194
+ },
1195
+ {
1196
+ "epoch": 222.97297297297297,
1197
+ "grad_norm": 0.3336365222930908,
1198
+ "learning_rate": 0.0001,
1199
+ "loss": 0.0211,
1200
+ "step": 8250
1201
+ },
1202
+ {
1203
+ "epoch": 224.32432432432432,
1204
+ "grad_norm": 0.29828354716300964,
1205
+ "learning_rate": 0.0001,
1206
+ "loss": 0.0188,
1207
+ "step": 8300
1208
+ },
1209
+ {
1210
+ "epoch": 225.67567567567568,
1211
+ "grad_norm": 0.34553930163383484,
1212
+ "learning_rate": 0.0001,
1213
+ "loss": 0.0199,
1214
+ "step": 8350
1215
+ },
1216
+ {
1217
+ "epoch": 227.02702702702703,
1218
+ "grad_norm": 0.3510328531265259,
1219
+ "learning_rate": 0.0001,
1220
+ "loss": 0.0215,
1221
+ "step": 8400
1222
+ },
1223
+ {
1224
+ "epoch": 228.3783783783784,
1225
+ "grad_norm": 0.48810675740242004,
1226
+ "learning_rate": 0.0001,
1227
+ "loss": 0.0217,
1228
+ "step": 8450
1229
+ },
1230
+ {
1231
+ "epoch": 229.72972972972974,
1232
+ "grad_norm": 0.34023284912109375,
1233
+ "learning_rate": 0.0001,
1234
+ "loss": 0.0225,
1235
+ "step": 8500
1236
  }
1237
  ],
1238
  "logging_steps": 50,
 
1252
  "attributes": {}
1253
  }
1254
  },
1255
+ "total_flos": 2.3861586914904637e+20,
1256
  "train_batch_size": 64,
1257
  "trial_name": null,
1258
  "trial_params": null