Training in progress, step 9000, checkpoint
Browse files- last-checkpoint/global_step9000/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step9000/zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9000/zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9000/zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9000/zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9000/zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step9000/zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +82 -3
last-checkpoint/global_step9000/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:679e19d21666fbd176c864156b8c7c5845f88b7b624f9f324e9eb22bab3c33a8
|
3 |
+
size 197282509
|
last-checkpoint/global_step9000/zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6356d5f6dbd695e172a73eb0cd4421c7df86c325fd2f348d4dd4f6b845ac6360
|
3 |
+
size 180416968
|
last-checkpoint/global_step9000/zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daa070acd5ce24cc23b06b7e3a05c59cfe018dedd7e0549cb622be5c6e28c3ee
|
3 |
+
size 180416776
|
last-checkpoint/global_step9000/zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:919f84c4ecb818f8992b84076e993b1f58c302d515e1a4ae2b55e824c168f7bf
|
3 |
+
size 180416776
|
last-checkpoint/global_step9000/zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99c1939255a8747c77a8d857a22c8263f74db18d6ae16f0202ffd6e0337c417d
|
3 |
+
size 180416904
|
last-checkpoint/global_step9000/zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3107ded7bc86a29baf90b127f260aa7a678cf5b4b3a2a5e5a6dd2a12337bfa1c
|
3 |
+
size 180416712
|
last-checkpoint/global_step9000/zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:830ba11f654bbe488f96b088b48dfdea45f74335584890df96d7dfcd32adbaa9
|
3 |
+
size 180417096
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step9000
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 188836816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ab65fcd6eef541b0eab0651d0717f1badda42d7fd92bfb4075b331da2037828
|
3 |
size 188836816
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:04a7c505ffdf7349443f4571264a08aa5d00e509401833cfbc8ee36e3f78ae54
|
3 |
size 15536
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95d7ca58acb8d3876c3a7f75f1478ea294c4dc3f3a656bcf55937f21b39bafeb
|
3 |
size 15536
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b892c9e25d561484c4d46a997b69468880c163891890ad309278d371355e9ba
|
3 |
size 15536
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8a81bb1b67d6e9cc674626ce2fca3b1db95526692490a7cca8768ee12d6e2e7
|
3 |
+
size 15536
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15472
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfa5c5c42cdaccb27d45e096ddb8bbb4eafd2783ecc504d34c4ed6a4e70b1732
|
3 |
size 15472
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abd2f2fb50fa21042e610837b6b9a95060d6b9c41e23ab7ce19a6228e9dd5bb9
|
3 |
size 15536
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc228f89b1f6600cf303d1bda56efe53cce614e2f06ad7a65194b3b38bd74bd7
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1233,6 +1233,85 @@
|
|
1233 |
"learning_rate": 0.0001,
|
1234 |
"loss": 0.0225,
|
1235 |
"step": 8500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1236 |
}
|
1237 |
],
|
1238 |
"logging_steps": 50,
|
@@ -1252,7 +1331,7 @@
|
|
1252 |
"attributes": {}
|
1253 |
}
|
1254 |
},
|
1255 |
-
"total_flos": 2.
|
1256 |
"train_batch_size": 64,
|
1257 |
"trial_name": null,
|
1258 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 243.24324324324326,
|
5 |
"eval_steps": 1500,
|
6 |
+
"global_step": 9000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1233 |
"learning_rate": 0.0001,
|
1234 |
"loss": 0.0225,
|
1235 |
"step": 8500
|
1236 |
+
},
|
1237 |
+
{
|
1238 |
+
"epoch": 231.0810810810811,
|
1239 |
+
"grad_norm": 0.31986966729164124,
|
1240 |
+
"learning_rate": 0.0001,
|
1241 |
+
"loss": 0.0217,
|
1242 |
+
"step": 8550
|
1243 |
+
},
|
1244 |
+
{
|
1245 |
+
"epoch": 232.43243243243242,
|
1246 |
+
"grad_norm": 0.27697187662124634,
|
1247 |
+
"learning_rate": 0.0001,
|
1248 |
+
"loss": 0.0205,
|
1249 |
+
"step": 8600
|
1250 |
+
},
|
1251 |
+
{
|
1252 |
+
"epoch": 233.78378378378378,
|
1253 |
+
"grad_norm": 0.3078053593635559,
|
1254 |
+
"learning_rate": 0.0001,
|
1255 |
+
"loss": 0.0185,
|
1256 |
+
"step": 8650
|
1257 |
+
},
|
1258 |
+
{
|
1259 |
+
"epoch": 235.13513513513513,
|
1260 |
+
"grad_norm": 0.24676857888698578,
|
1261 |
+
"learning_rate": 0.0001,
|
1262 |
+
"loss": 0.0202,
|
1263 |
+
"step": 8700
|
1264 |
+
},
|
1265 |
+
{
|
1266 |
+
"epoch": 236.48648648648648,
|
1267 |
+
"grad_norm": 0.2980283498764038,
|
1268 |
+
"learning_rate": 0.0001,
|
1269 |
+
"loss": 0.0202,
|
1270 |
+
"step": 8750
|
1271 |
+
},
|
1272 |
+
{
|
1273 |
+
"epoch": 237.83783783783784,
|
1274 |
+
"grad_norm": 0.34748488664627075,
|
1275 |
+
"learning_rate": 0.0001,
|
1276 |
+
"loss": 0.0188,
|
1277 |
+
"step": 8800
|
1278 |
+
},
|
1279 |
+
{
|
1280 |
+
"epoch": 239.1891891891892,
|
1281 |
+
"grad_norm": 0.31379759311676025,
|
1282 |
+
"learning_rate": 0.0001,
|
1283 |
+
"loss": 0.0195,
|
1284 |
+
"step": 8850
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"epoch": 240.54054054054055,
|
1288 |
+
"grad_norm": 0.31512585282325745,
|
1289 |
+
"learning_rate": 0.0001,
|
1290 |
+
"loss": 0.0197,
|
1291 |
+
"step": 8900
|
1292 |
+
},
|
1293 |
+
{
|
1294 |
+
"epoch": 241.8918918918919,
|
1295 |
+
"grad_norm": 0.28801149129867554,
|
1296 |
+
"learning_rate": 0.0001,
|
1297 |
+
"loss": 0.0188,
|
1298 |
+
"step": 8950
|
1299 |
+
},
|
1300 |
+
{
|
1301 |
+
"epoch": 243.24324324324326,
|
1302 |
+
"grad_norm": 0.29776033759117126,
|
1303 |
+
"learning_rate": 0.0001,
|
1304 |
+
"loss": 0.0196,
|
1305 |
+
"step": 9000
|
1306 |
+
},
|
1307 |
+
{
|
1308 |
+
"epoch": 243.24324324324326,
|
1309 |
+
"eval_loss": 0.6647829413414001,
|
1310 |
+
"eval_runtime": 22.108,
|
1311 |
+
"eval_samples_per_second": 70.879,
|
1312 |
+
"eval_steps_per_second": 0.226,
|
1313 |
+
"eval_wer": 0.19295266397792402,
|
1314 |
+
"step": 9000
|
1315 |
}
|
1316 |
],
|
1317 |
"logging_steps": 50,
|
|
|
1331 |
"attributes": {}
|
1332 |
}
|
1333 |
},
|
1334 |
+
"total_flos": 2.5263079974749025e+20,
|
1335 |
"train_batch_size": 64,
|
1336 |
"trial_name": null,
|
1337 |
"trial_params": null
|