jonathanjordan21
commited on
Upload folder using huggingface_hub
Browse files- data/model.safetensors +1 -1
- data/optimizer.pt +1 -1
- data/rng_state.pth +1 -1
- data/scheduler.pt +1 -1
- data/trainer_state.json +703 -3
- data/training_args.bin +1 -1
data/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 576008736
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:502786fd8a726e2156aa16f8cdb0a508eaeff2e6b7935f2a126e5c7fb3fe4875
|
3 |
size 576008736
|
data/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1152256984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:31920983e209ab4ab448d37797fc73ec6f5a2c677beb1998c0c67cde4d40d85a
|
3 |
size 1152256984
|
data/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c31ec632e18c6039ab923941c5b9a34579988b2eae98f203c7491579abcdc560
|
3 |
size 14244
|
data/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc76b89758f0a3c24f4601172ddc998440db62e30dfdfe43a93a78b9b974a013
|
3 |
size 1064
|
data/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6307,6 +6307,706 @@
|
|
6307 |
"learning_rate": 2.9948870643768915e-05,
|
6308 |
"loss": 1.7891,
|
6309 |
"step": 13500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6310 |
}
|
6311 |
],
|
6312 |
"logging_steps": 15,
|
@@ -6326,7 +7026,7 @@
|
|
6326 |
"attributes": {}
|
6327 |
}
|
6328 |
},
|
6329 |
-
"total_flos": 2.
|
6330 |
"train_batch_size": 4,
|
6331 |
"trial_name": null,
|
6332 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.02921027111026293,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 15000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6307 |
"learning_rate": 2.9948870643768915e-05,
|
6308 |
"loss": 1.7891,
|
6309 |
"step": 13500
|
6310 |
+
},
|
6311 |
+
{
|
6312 |
+
"epoch": 0.026318454270346903,
|
6313 |
+
"grad_norm": 5.705179214477539,
|
6314 |
+
"learning_rate": 2.9948757024626645e-05,
|
6315 |
+
"loss": 1.8502,
|
6316 |
+
"step": 13515
|
6317 |
+
},
|
6318 |
+
{
|
6319 |
+
"epoch": 0.026347664541457164,
|
6320 |
+
"grad_norm": 4.427610397338867,
|
6321 |
+
"learning_rate": 2.994864327959879e-05,
|
6322 |
+
"loss": 1.746,
|
6323 |
+
"step": 13530
|
6324 |
+
},
|
6325 |
+
{
|
6326 |
+
"epoch": 0.026376874812567428,
|
6327 |
+
"grad_norm": 2.9682793617248535,
|
6328 |
+
"learning_rate": 2.994852940868631e-05,
|
6329 |
+
"loss": 1.8766,
|
6330 |
+
"step": 13545
|
6331 |
+
},
|
6332 |
+
{
|
6333 |
+
"epoch": 0.02640608508367769,
|
6334 |
+
"grad_norm": 2.8406543731689453,
|
6335 |
+
"learning_rate": 2.9948415411890164e-05,
|
6336 |
+
"loss": 1.8637,
|
6337 |
+
"step": 13560
|
6338 |
+
},
|
6339 |
+
{
|
6340 |
+
"epoch": 0.026435295354787954,
|
6341 |
+
"grad_norm": 2.9661149978637695,
|
6342 |
+
"learning_rate": 2.9948301289211308e-05,
|
6343 |
+
"loss": 1.7703,
|
6344 |
+
"step": 13575
|
6345 |
+
},
|
6346 |
+
{
|
6347 |
+
"epoch": 0.026464505625898214,
|
6348 |
+
"grad_norm": 2.961155652999878,
|
6349 |
+
"learning_rate": 2.99481870406507e-05,
|
6350 |
+
"loss": 1.8123,
|
6351 |
+
"step": 13590
|
6352 |
+
},
|
6353 |
+
{
|
6354 |
+
"epoch": 0.02649371589700848,
|
6355 |
+
"grad_norm": 3.7241668701171875,
|
6356 |
+
"learning_rate": 2.9948072666209308e-05,
|
6357 |
+
"loss": 2.018,
|
6358 |
+
"step": 13605
|
6359 |
+
},
|
6360 |
+
{
|
6361 |
+
"epoch": 0.026522926168118743,
|
6362 |
+
"grad_norm": 2.8102498054504395,
|
6363 |
+
"learning_rate": 2.9947958165888096e-05,
|
6364 |
+
"loss": 1.8577,
|
6365 |
+
"step": 13620
|
6366 |
+
},
|
6367 |
+
{
|
6368 |
+
"epoch": 0.026552136439229004,
|
6369 |
+
"grad_norm": 2.061007022857666,
|
6370 |
+
"learning_rate": 2.9947843539688027e-05,
|
6371 |
+
"loss": 1.9684,
|
6372 |
+
"step": 13635
|
6373 |
+
},
|
6374 |
+
{
|
6375 |
+
"epoch": 0.02658134671033927,
|
6376 |
+
"grad_norm": 4.699859619140625,
|
6377 |
+
"learning_rate": 2.994772878761006e-05,
|
6378 |
+
"loss": 1.9438,
|
6379 |
+
"step": 13650
|
6380 |
+
},
|
6381 |
+
{
|
6382 |
+
"epoch": 0.02661055698144953,
|
6383 |
+
"grad_norm": 5.8805952072143555,
|
6384 |
+
"learning_rate": 2.994761390965517e-05,
|
6385 |
+
"loss": 1.8862,
|
6386 |
+
"step": 13665
|
6387 |
+
},
|
6388 |
+
{
|
6389 |
+
"epoch": 0.026639767252559794,
|
6390 |
+
"grad_norm": 3.6178531646728516,
|
6391 |
+
"learning_rate": 2.994749890582432e-05,
|
6392 |
+
"loss": 1.9754,
|
6393 |
+
"step": 13680
|
6394 |
+
},
|
6395 |
+
{
|
6396 |
+
"epoch": 0.026668977523670055,
|
6397 |
+
"grad_norm": 2.891448497772217,
|
6398 |
+
"learning_rate": 2.9947383776118482e-05,
|
6399 |
+
"loss": 1.8838,
|
6400 |
+
"step": 13695
|
6401 |
+
},
|
6402 |
+
{
|
6403 |
+
"epoch": 0.02669818779478032,
|
6404 |
+
"grad_norm": 2.5380797386169434,
|
6405 |
+
"learning_rate": 2.994726852053862e-05,
|
6406 |
+
"loss": 2.0006,
|
6407 |
+
"step": 13710
|
6408 |
+
},
|
6409 |
+
{
|
6410 |
+
"epoch": 0.02672739806589058,
|
6411 |
+
"grad_norm": 3.083801031112671,
|
6412 |
+
"learning_rate": 2.994715313908571e-05,
|
6413 |
+
"loss": 1.9287,
|
6414 |
+
"step": 13725
|
6415 |
+
},
|
6416 |
+
{
|
6417 |
+
"epoch": 0.026756608337000845,
|
6418 |
+
"grad_norm": 3.9220306873321533,
|
6419 |
+
"learning_rate": 2.9947037631760717e-05,
|
6420 |
+
"loss": 2.0063,
|
6421 |
+
"step": 13740
|
6422 |
+
},
|
6423 |
+
{
|
6424 |
+
"epoch": 0.02678581860811111,
|
6425 |
+
"grad_norm": 2.41329288482666,
|
6426 |
+
"learning_rate": 2.994692199856462e-05,
|
6427 |
+
"loss": 1.7779,
|
6428 |
+
"step": 13755
|
6429 |
+
},
|
6430 |
+
{
|
6431 |
+
"epoch": 0.02681502887922137,
|
6432 |
+
"grad_norm": 3.137281656265259,
|
6433 |
+
"learning_rate": 2.9946806239498392e-05,
|
6434 |
+
"loss": 1.7686,
|
6435 |
+
"step": 13770
|
6436 |
+
},
|
6437 |
+
{
|
6438 |
+
"epoch": 0.026844239150331635,
|
6439 |
+
"grad_norm": 3.8897507190704346,
|
6440 |
+
"learning_rate": 2.994669035456301e-05,
|
6441 |
+
"loss": 1.9879,
|
6442 |
+
"step": 13785
|
6443 |
+
},
|
6444 |
+
{
|
6445 |
+
"epoch": 0.026873449421441896,
|
6446 |
+
"grad_norm": 2.888145685195923,
|
6447 |
+
"learning_rate": 2.994657434375944e-05,
|
6448 |
+
"loss": 2.0012,
|
6449 |
+
"step": 13800
|
6450 |
+
},
|
6451 |
+
{
|
6452 |
+
"epoch": 0.02690265969255216,
|
6453 |
+
"grad_norm": 2.683145523071289,
|
6454 |
+
"learning_rate": 2.9946458207088667e-05,
|
6455 |
+
"loss": 1.8579,
|
6456 |
+
"step": 13815
|
6457 |
+
},
|
6458 |
+
{
|
6459 |
+
"epoch": 0.02693186996366242,
|
6460 |
+
"grad_norm": 2.5023186206817627,
|
6461 |
+
"learning_rate": 2.9946341944551668e-05,
|
6462 |
+
"loss": 1.8899,
|
6463 |
+
"step": 13830
|
6464 |
+
},
|
6465 |
+
{
|
6466 |
+
"epoch": 0.026961080234772686,
|
6467 |
+
"grad_norm": 4.522122383117676,
|
6468 |
+
"learning_rate": 2.994622555614942e-05,
|
6469 |
+
"loss": 1.8373,
|
6470 |
+
"step": 13845
|
6471 |
+
},
|
6472 |
+
{
|
6473 |
+
"epoch": 0.02699029050588295,
|
6474 |
+
"grad_norm": 1.9197810888290405,
|
6475 |
+
"learning_rate": 2.9946109041882902e-05,
|
6476 |
+
"loss": 1.874,
|
6477 |
+
"step": 13860
|
6478 |
+
},
|
6479 |
+
{
|
6480 |
+
"epoch": 0.02701950077699321,
|
6481 |
+
"grad_norm": 2.0907135009765625,
|
6482 |
+
"learning_rate": 2.9945992401753103e-05,
|
6483 |
+
"loss": 1.9878,
|
6484 |
+
"step": 13875
|
6485 |
+
},
|
6486 |
+
{
|
6487 |
+
"epoch": 0.027048711048103476,
|
6488 |
+
"grad_norm": 3.0691592693328857,
|
6489 |
+
"learning_rate": 2.9945875635761e-05,
|
6490 |
+
"loss": 1.8859,
|
6491 |
+
"step": 13890
|
6492 |
+
},
|
6493 |
+
{
|
6494 |
+
"epoch": 0.027077921319213737,
|
6495 |
+
"grad_norm": 2.0707552433013916,
|
6496 |
+
"learning_rate": 2.9945758743907573e-05,
|
6497 |
+
"loss": 1.7612,
|
6498 |
+
"step": 13905
|
6499 |
+
},
|
6500 |
+
{
|
6501 |
+
"epoch": 0.027107131590324,
|
6502 |
+
"grad_norm": 2.2770462036132812,
|
6503 |
+
"learning_rate": 2.994564172619381e-05,
|
6504 |
+
"loss": 1.8028,
|
6505 |
+
"step": 13920
|
6506 |
+
},
|
6507 |
+
{
|
6508 |
+
"epoch": 0.027136341861434262,
|
6509 |
+
"grad_norm": 2.681814193725586,
|
6510 |
+
"learning_rate": 2.9945524582620695e-05,
|
6511 |
+
"loss": 1.7967,
|
6512 |
+
"step": 13935
|
6513 |
+
},
|
6514 |
+
{
|
6515 |
+
"epoch": 0.027165552132544526,
|
6516 |
+
"grad_norm": 3.0529186725616455,
|
6517 |
+
"learning_rate": 2.994540731318922e-05,
|
6518 |
+
"loss": 1.7972,
|
6519 |
+
"step": 13950
|
6520 |
+
},
|
6521 |
+
{
|
6522 |
+
"epoch": 0.027194762403654787,
|
6523 |
+
"grad_norm": 3.369091033935547,
|
6524 |
+
"learning_rate": 2.9945289917900368e-05,
|
6525 |
+
"loss": 1.8092,
|
6526 |
+
"step": 13965
|
6527 |
+
},
|
6528 |
+
{
|
6529 |
+
"epoch": 0.027223972674765052,
|
6530 |
+
"grad_norm": 2.190134048461914,
|
6531 |
+
"learning_rate": 2.9945172396755124e-05,
|
6532 |
+
"loss": 2.0228,
|
6533 |
+
"step": 13980
|
6534 |
+
},
|
6535 |
+
{
|
6536 |
+
"epoch": 0.027253182945875316,
|
6537 |
+
"grad_norm": 2.805100202560425,
|
6538 |
+
"learning_rate": 2.9945054749754483e-05,
|
6539 |
+
"loss": 1.9312,
|
6540 |
+
"step": 13995
|
6541 |
+
},
|
6542 |
+
{
|
6543 |
+
"epoch": 0.027282393216985577,
|
6544 |
+
"grad_norm": 2.195697546005249,
|
6545 |
+
"learning_rate": 2.9944936976899433e-05,
|
6546 |
+
"loss": 1.9791,
|
6547 |
+
"step": 14010
|
6548 |
+
},
|
6549 |
+
{
|
6550 |
+
"epoch": 0.027311603488095842,
|
6551 |
+
"grad_norm": 1.723713755607605,
|
6552 |
+
"learning_rate": 2.9944819078190967e-05,
|
6553 |
+
"loss": 1.8542,
|
6554 |
+
"step": 14025
|
6555 |
+
},
|
6556 |
+
{
|
6557 |
+
"epoch": 0.027340813759206103,
|
6558 |
+
"grad_norm": 2.633101463317871,
|
6559 |
+
"learning_rate": 2.9944701053630075e-05,
|
6560 |
+
"loss": 1.8127,
|
6561 |
+
"step": 14040
|
6562 |
+
},
|
6563 |
+
{
|
6564 |
+
"epoch": 0.027370024030316367,
|
6565 |
+
"grad_norm": 1.9390171766281128,
|
6566 |
+
"learning_rate": 2.9944582903217756e-05,
|
6567 |
+
"loss": 1.9183,
|
6568 |
+
"step": 14055
|
6569 |
+
},
|
6570 |
+
{
|
6571 |
+
"epoch": 0.027399234301426628,
|
6572 |
+
"grad_norm": 3.9491968154907227,
|
6573 |
+
"learning_rate": 2.9944464626955003e-05,
|
6574 |
+
"loss": 2.0849,
|
6575 |
+
"step": 14070
|
6576 |
+
},
|
6577 |
+
{
|
6578 |
+
"epoch": 0.027428444572536893,
|
6579 |
+
"grad_norm": 2.4679179191589355,
|
6580 |
+
"learning_rate": 2.9944346224842812e-05,
|
6581 |
+
"loss": 1.9285,
|
6582 |
+
"step": 14085
|
6583 |
+
},
|
6584 |
+
{
|
6585 |
+
"epoch": 0.027457654843647157,
|
6586 |
+
"grad_norm": 2.999509334564209,
|
6587 |
+
"learning_rate": 2.994422769688218e-05,
|
6588 |
+
"loss": 1.9523,
|
6589 |
+
"step": 14100
|
6590 |
+
},
|
6591 |
+
{
|
6592 |
+
"epoch": 0.027486865114757418,
|
6593 |
+
"grad_norm": 3.8798091411590576,
|
6594 |
+
"learning_rate": 2.9944109043074104e-05,
|
6595 |
+
"loss": 1.9014,
|
6596 |
+
"step": 14115
|
6597 |
+
},
|
6598 |
+
{
|
6599 |
+
"epoch": 0.027516075385867683,
|
6600 |
+
"grad_norm": 2.5288240909576416,
|
6601 |
+
"learning_rate": 2.9943990263419582e-05,
|
6602 |
+
"loss": 2.1135,
|
6603 |
+
"step": 14130
|
6604 |
+
},
|
6605 |
+
{
|
6606 |
+
"epoch": 0.027545285656977943,
|
6607 |
+
"grad_norm": 2.2120304107666016,
|
6608 |
+
"learning_rate": 2.994387135791962e-05,
|
6609 |
+
"loss": 1.7418,
|
6610 |
+
"step": 14145
|
6611 |
+
},
|
6612 |
+
{
|
6613 |
+
"epoch": 0.027574495928088208,
|
6614 |
+
"grad_norm": 2.805328607559204,
|
6615 |
+
"learning_rate": 2.994375232657521e-05,
|
6616 |
+
"loss": 1.8776,
|
6617 |
+
"step": 14160
|
6618 |
+
},
|
6619 |
+
{
|
6620 |
+
"epoch": 0.02760370619919847,
|
6621 |
+
"grad_norm": 2.8841097354888916,
|
6622 |
+
"learning_rate": 2.9943633169387365e-05,
|
6623 |
+
"loss": 1.9106,
|
6624 |
+
"step": 14175
|
6625 |
+
},
|
6626 |
+
{
|
6627 |
+
"epoch": 0.027632916470308733,
|
6628 |
+
"grad_norm": 1.8887025117874146,
|
6629 |
+
"learning_rate": 2.994351388635708e-05,
|
6630 |
+
"loss": 1.8916,
|
6631 |
+
"step": 14190
|
6632 |
+
},
|
6633 |
+
{
|
6634 |
+
"epoch": 0.027662126741418994,
|
6635 |
+
"grad_norm": 2.8623757362365723,
|
6636 |
+
"learning_rate": 2.9943394477485363e-05,
|
6637 |
+
"loss": 1.8735,
|
6638 |
+
"step": 14205
|
6639 |
+
},
|
6640 |
+
{
|
6641 |
+
"epoch": 0.02769133701252926,
|
6642 |
+
"grad_norm": 3.1046249866485596,
|
6643 |
+
"learning_rate": 2.994327494277322e-05,
|
6644 |
+
"loss": 1.9124,
|
6645 |
+
"step": 14220
|
6646 |
+
},
|
6647 |
+
{
|
6648 |
+
"epoch": 0.027720547283639523,
|
6649 |
+
"grad_norm": 2.653933525085449,
|
6650 |
+
"learning_rate": 2.9943155282221663e-05,
|
6651 |
+
"loss": 1.9387,
|
6652 |
+
"step": 14235
|
6653 |
+
},
|
6654 |
+
{
|
6655 |
+
"epoch": 0.027749757554749784,
|
6656 |
+
"grad_norm": 2.975820779800415,
|
6657 |
+
"learning_rate": 2.9943035495831688e-05,
|
6658 |
+
"loss": 1.8232,
|
6659 |
+
"step": 14250
|
6660 |
+
},
|
6661 |
+
{
|
6662 |
+
"epoch": 0.02777896782586005,
|
6663 |
+
"grad_norm": 5.906015396118164,
|
6664 |
+
"learning_rate": 2.9942915583604307e-05,
|
6665 |
+
"loss": 1.9167,
|
6666 |
+
"step": 14265
|
6667 |
+
},
|
6668 |
+
{
|
6669 |
+
"epoch": 0.02780817809697031,
|
6670 |
+
"grad_norm": 2.592456102371216,
|
6671 |
+
"learning_rate": 2.994279554554054e-05,
|
6672 |
+
"loss": 1.7433,
|
6673 |
+
"step": 14280
|
6674 |
+
},
|
6675 |
+
{
|
6676 |
+
"epoch": 0.027837388368080574,
|
6677 |
+
"grad_norm": 5.042680263519287,
|
6678 |
+
"learning_rate": 2.994267538164138e-05,
|
6679 |
+
"loss": 1.7878,
|
6680 |
+
"step": 14295
|
6681 |
+
},
|
6682 |
+
{
|
6683 |
+
"epoch": 0.027866598639190835,
|
6684 |
+
"grad_norm": 4.092184066772461,
|
6685 |
+
"learning_rate": 2.9942555091907853e-05,
|
6686 |
+
"loss": 1.6955,
|
6687 |
+
"step": 14310
|
6688 |
+
},
|
6689 |
+
{
|
6690 |
+
"epoch": 0.0278958089103011,
|
6691 |
+
"grad_norm": 4.623755931854248,
|
6692 |
+
"learning_rate": 2.994243467634097e-05,
|
6693 |
+
"loss": 1.7866,
|
6694 |
+
"step": 14325
|
6695 |
+
},
|
6696 |
+
{
|
6697 |
+
"epoch": 0.027925019181411364,
|
6698 |
+
"grad_norm": 3.042306661605835,
|
6699 |
+
"learning_rate": 2.994231413494174e-05,
|
6700 |
+
"loss": 1.8891,
|
6701 |
+
"step": 14340
|
6702 |
+
},
|
6703 |
+
{
|
6704 |
+
"epoch": 0.027954229452521625,
|
6705 |
+
"grad_norm": 2.784275531768799,
|
6706 |
+
"learning_rate": 2.9942193467711184e-05,
|
6707 |
+
"loss": 2.0112,
|
6708 |
+
"step": 14355
|
6709 |
+
},
|
6710 |
+
{
|
6711 |
+
"epoch": 0.02798343972363189,
|
6712 |
+
"grad_norm": 1.9308695793151855,
|
6713 |
+
"learning_rate": 2.9942072674650317e-05,
|
6714 |
+
"loss": 1.7964,
|
6715 |
+
"step": 14370
|
6716 |
+
},
|
6717 |
+
{
|
6718 |
+
"epoch": 0.02801264999474215,
|
6719 |
+
"grad_norm": 3.7377004623413086,
|
6720 |
+
"learning_rate": 2.994195175576015e-05,
|
6721 |
+
"loss": 1.8661,
|
6722 |
+
"step": 14385
|
6723 |
+
},
|
6724 |
+
{
|
6725 |
+
"epoch": 0.028041860265852415,
|
6726 |
+
"grad_norm": 2.484870195388794,
|
6727 |
+
"learning_rate": 2.994183071104171e-05,
|
6728 |
+
"loss": 1.7358,
|
6729 |
+
"step": 14400
|
6730 |
+
},
|
6731 |
+
{
|
6732 |
+
"epoch": 0.028071070536962676,
|
6733 |
+
"grad_norm": 2.6344974040985107,
|
6734 |
+
"learning_rate": 2.9941709540496013e-05,
|
6735 |
+
"loss": 1.9183,
|
6736 |
+
"step": 14415
|
6737 |
+
},
|
6738 |
+
{
|
6739 |
+
"epoch": 0.02810028080807294,
|
6740 |
+
"grad_norm": 2.168701410293579,
|
6741 |
+
"learning_rate": 2.9941588244124072e-05,
|
6742 |
+
"loss": 1.999,
|
6743 |
+
"step": 14430
|
6744 |
+
},
|
6745 |
+
{
|
6746 |
+
"epoch": 0.028129491079183205,
|
6747 |
+
"grad_norm": 2.986727476119995,
|
6748 |
+
"learning_rate": 2.994146682192692e-05,
|
6749 |
+
"loss": 1.8344,
|
6750 |
+
"step": 14445
|
6751 |
+
},
|
6752 |
+
{
|
6753 |
+
"epoch": 0.028158701350293466,
|
6754 |
+
"grad_norm": 3.3715713024139404,
|
6755 |
+
"learning_rate": 2.9941345273905573e-05,
|
6756 |
+
"loss": 2.0468,
|
6757 |
+
"step": 14460
|
6758 |
+
},
|
6759 |
+
{
|
6760 |
+
"epoch": 0.02818791162140373,
|
6761 |
+
"grad_norm": 2.2077038288116455,
|
6762 |
+
"learning_rate": 2.9941223600061054e-05,
|
6763 |
+
"loss": 2.0255,
|
6764 |
+
"step": 14475
|
6765 |
+
},
|
6766 |
+
{
|
6767 |
+
"epoch": 0.02821712189251399,
|
6768 |
+
"grad_norm": 3.4651224613189697,
|
6769 |
+
"learning_rate": 2.994110180039439e-05,
|
6770 |
+
"loss": 1.8604,
|
6771 |
+
"step": 14490
|
6772 |
+
},
|
6773 |
+
{
|
6774 |
+
"epoch": 0.028246332163624256,
|
6775 |
+
"grad_norm": 2.0584287643432617,
|
6776 |
+
"learning_rate": 2.994097987490661e-05,
|
6777 |
+
"loss": 2.0636,
|
6778 |
+
"step": 14505
|
6779 |
+
},
|
6780 |
+
{
|
6781 |
+
"epoch": 0.028275542434734516,
|
6782 |
+
"grad_norm": 3.1285014152526855,
|
6783 |
+
"learning_rate": 2.9940857823598736e-05,
|
6784 |
+
"loss": 1.7656,
|
6785 |
+
"step": 14520
|
6786 |
+
},
|
6787 |
+
{
|
6788 |
+
"epoch": 0.02830475270584478,
|
6789 |
+
"grad_norm": 2.541280746459961,
|
6790 |
+
"learning_rate": 2.9940735646471793e-05,
|
6791 |
+
"loss": 1.8682,
|
6792 |
+
"step": 14535
|
6793 |
+
},
|
6794 |
+
{
|
6795 |
+
"epoch": 0.028333962976955042,
|
6796 |
+
"grad_norm": 2.4748847484588623,
|
6797 |
+
"learning_rate": 2.9940613343526817e-05,
|
6798 |
+
"loss": 2.0047,
|
6799 |
+
"step": 14550
|
6800 |
+
},
|
6801 |
+
{
|
6802 |
+
"epoch": 0.028363173248065306,
|
6803 |
+
"grad_norm": 3.2105560302734375,
|
6804 |
+
"learning_rate": 2.9940490914764834e-05,
|
6805 |
+
"loss": 1.7709,
|
6806 |
+
"step": 14565
|
6807 |
+
},
|
6808 |
+
{
|
6809 |
+
"epoch": 0.02839238351917557,
|
6810 |
+
"grad_norm": 3.491591215133667,
|
6811 |
+
"learning_rate": 2.9940368360186878e-05,
|
6812 |
+
"loss": 1.786,
|
6813 |
+
"step": 14580
|
6814 |
+
},
|
6815 |
+
{
|
6816 |
+
"epoch": 0.028421593790285832,
|
6817 |
+
"grad_norm": 3.315342903137207,
|
6818 |
+
"learning_rate": 2.9940245679793978e-05,
|
6819 |
+
"loss": 1.917,
|
6820 |
+
"step": 14595
|
6821 |
+
},
|
6822 |
+
{
|
6823 |
+
"epoch": 0.028450804061396096,
|
6824 |
+
"grad_norm": 1.7594997882843018,
|
6825 |
+
"learning_rate": 2.9940122873587164e-05,
|
6826 |
+
"loss": 1.877,
|
6827 |
+
"step": 14610
|
6828 |
+
},
|
6829 |
+
{
|
6830 |
+
"epoch": 0.028480014332506357,
|
6831 |
+
"grad_norm": 2.442725896835327,
|
6832 |
+
"learning_rate": 2.9939999941567474e-05,
|
6833 |
+
"loss": 1.9577,
|
6834 |
+
"step": 14625
|
6835 |
+
},
|
6836 |
+
{
|
6837 |
+
"epoch": 0.02850922460361662,
|
6838 |
+
"grad_norm": 3.146977663040161,
|
6839 |
+
"learning_rate": 2.993987688373595e-05,
|
6840 |
+
"loss": 1.8722,
|
6841 |
+
"step": 14640
|
6842 |
+
},
|
6843 |
+
{
|
6844 |
+
"epoch": 0.028538434874726883,
|
6845 |
+
"grad_norm": 2.6678929328918457,
|
6846 |
+
"learning_rate": 2.9939753700093618e-05,
|
6847 |
+
"loss": 1.659,
|
6848 |
+
"step": 14655
|
6849 |
+
},
|
6850 |
+
{
|
6851 |
+
"epoch": 0.028567645145837147,
|
6852 |
+
"grad_norm": 2.4769906997680664,
|
6853 |
+
"learning_rate": 2.9939630390641518e-05,
|
6854 |
+
"loss": 1.8257,
|
6855 |
+
"step": 14670
|
6856 |
+
},
|
6857 |
+
{
|
6858 |
+
"epoch": 0.02859685541694741,
|
6859 |
+
"grad_norm": 2.9314770698547363,
|
6860 |
+
"learning_rate": 2.993950695538069e-05,
|
6861 |
+
"loss": 1.8004,
|
6862 |
+
"step": 14685
|
6863 |
+
},
|
6864 |
+
{
|
6865 |
+
"epoch": 0.028626065688057672,
|
6866 |
+
"grad_norm": 3.2279980182647705,
|
6867 |
+
"learning_rate": 2.993938339431217e-05,
|
6868 |
+
"loss": 1.9438,
|
6869 |
+
"step": 14700
|
6870 |
+
},
|
6871 |
+
{
|
6872 |
+
"epoch": 0.028655275959167937,
|
6873 |
+
"grad_norm": 2.8929495811462402,
|
6874 |
+
"learning_rate": 2.9939259707437002e-05,
|
6875 |
+
"loss": 1.7995,
|
6876 |
+
"step": 14715
|
6877 |
+
},
|
6878 |
+
{
|
6879 |
+
"epoch": 0.028684486230278198,
|
6880 |
+
"grad_norm": 4.861998558044434,
|
6881 |
+
"learning_rate": 2.9939135894756232e-05,
|
6882 |
+
"loss": 1.8188,
|
6883 |
+
"step": 14730
|
6884 |
+
},
|
6885 |
+
{
|
6886 |
+
"epoch": 0.028713696501388462,
|
6887 |
+
"grad_norm": 5.37394905090332,
|
6888 |
+
"learning_rate": 2.9939011956270893e-05,
|
6889 |
+
"loss": 2.061,
|
6890 |
+
"step": 14745
|
6891 |
+
},
|
6892 |
+
{
|
6893 |
+
"epoch": 0.028742906772498723,
|
6894 |
+
"grad_norm": 2.2253520488739014,
|
6895 |
+
"learning_rate": 2.9938887891982035e-05,
|
6896 |
+
"loss": 1.963,
|
6897 |
+
"step": 14760
|
6898 |
+
},
|
6899 |
+
{
|
6900 |
+
"epoch": 0.028772117043608988,
|
6901 |
+
"grad_norm": 3.424954414367676,
|
6902 |
+
"learning_rate": 2.99387637018907e-05,
|
6903 |
+
"loss": 2.0077,
|
6904 |
+
"step": 14775
|
6905 |
+
},
|
6906 |
+
{
|
6907 |
+
"epoch": 0.02880132731471925,
|
6908 |
+
"grad_norm": 2.8398706912994385,
|
6909 |
+
"learning_rate": 2.9938639385997934e-05,
|
6910 |
+
"loss": 2.0516,
|
6911 |
+
"step": 14790
|
6912 |
+
},
|
6913 |
+
{
|
6914 |
+
"epoch": 0.028830537585829513,
|
6915 |
+
"grad_norm": 2.371492385864258,
|
6916 |
+
"learning_rate": 2.9938514944304788e-05,
|
6917 |
+
"loss": 1.8057,
|
6918 |
+
"step": 14805
|
6919 |
+
},
|
6920 |
+
{
|
6921 |
+
"epoch": 0.028859747856939778,
|
6922 |
+
"grad_norm": 1.870301365852356,
|
6923 |
+
"learning_rate": 2.9938390376812304e-05,
|
6924 |
+
"loss": 1.8335,
|
6925 |
+
"step": 14820
|
6926 |
+
},
|
6927 |
+
{
|
6928 |
+
"epoch": 0.02888895812805004,
|
6929 |
+
"grad_norm": 3.1508800983428955,
|
6930 |
+
"learning_rate": 2.9938265683521533e-05,
|
6931 |
+
"loss": 2.0272,
|
6932 |
+
"step": 14835
|
6933 |
+
},
|
6934 |
+
{
|
6935 |
+
"epoch": 0.028918168399160303,
|
6936 |
+
"grad_norm": 2.8456640243530273,
|
6937 |
+
"learning_rate": 2.9938140864433528e-05,
|
6938 |
+
"loss": 1.889,
|
6939 |
+
"step": 14850
|
6940 |
+
},
|
6941 |
+
{
|
6942 |
+
"epoch": 0.028947378670270564,
|
6943 |
+
"grad_norm": 2.3040804862976074,
|
6944 |
+
"learning_rate": 2.9938015919549337e-05,
|
6945 |
+
"loss": 1.9274,
|
6946 |
+
"step": 14865
|
6947 |
+
},
|
6948 |
+
{
|
6949 |
+
"epoch": 0.02897658894138083,
|
6950 |
+
"grad_norm": 3.075559139251709,
|
6951 |
+
"learning_rate": 2.9937890848870012e-05,
|
6952 |
+
"loss": 1.9239,
|
6953 |
+
"step": 14880
|
6954 |
+
},
|
6955 |
+
{
|
6956 |
+
"epoch": 0.02900579921249109,
|
6957 |
+
"grad_norm": 2.4840190410614014,
|
6958 |
+
"learning_rate": 2.9937765652396608e-05,
|
6959 |
+
"loss": 1.9836,
|
6960 |
+
"step": 14895
|
6961 |
+
},
|
6962 |
+
{
|
6963 |
+
"epoch": 0.029035009483601354,
|
6964 |
+
"grad_norm": 2.915515422821045,
|
6965 |
+
"learning_rate": 2.9937640330130182e-05,
|
6966 |
+
"loss": 2.0196,
|
6967 |
+
"step": 14910
|
6968 |
+
},
|
6969 |
+
{
|
6970 |
+
"epoch": 0.02906421975471162,
|
6971 |
+
"grad_norm": 2.684401035308838,
|
6972 |
+
"learning_rate": 2.993751488207178e-05,
|
6973 |
+
"loss": 1.9699,
|
6974 |
+
"step": 14925
|
6975 |
+
},
|
6976 |
+
{
|
6977 |
+
"epoch": 0.02909343002582188,
|
6978 |
+
"grad_norm": 1.7906841039657593,
|
6979 |
+
"learning_rate": 2.9937389308222468e-05,
|
6980 |
+
"loss": 1.9435,
|
6981 |
+
"step": 14940
|
6982 |
+
},
|
6983 |
+
{
|
6984 |
+
"epoch": 0.029122640296932144,
|
6985 |
+
"grad_norm": 2.7629384994506836,
|
6986 |
+
"learning_rate": 2.9937263608583297e-05,
|
6987 |
+
"loss": 1.9266,
|
6988 |
+
"step": 14955
|
6989 |
+
},
|
6990 |
+
{
|
6991 |
+
"epoch": 0.029151850568042405,
|
6992 |
+
"grad_norm": 3.65447735786438,
|
6993 |
+
"learning_rate": 2.9937137783155326e-05,
|
6994 |
+
"loss": 1.8818,
|
6995 |
+
"step": 14970
|
6996 |
+
},
|
6997 |
+
{
|
6998 |
+
"epoch": 0.02918106083915267,
|
6999 |
+
"grad_norm": 2.684885025024414,
|
7000 |
+
"learning_rate": 2.993701183193962e-05,
|
7001 |
+
"loss": 1.744,
|
7002 |
+
"step": 14985
|
7003 |
+
},
|
7004 |
+
{
|
7005 |
+
"epoch": 0.02921027111026293,
|
7006 |
+
"grad_norm": 2.995678424835205,
|
7007 |
+
"learning_rate": 2.9936885754937237e-05,
|
7008 |
+
"loss": 1.868,
|
7009 |
+
"step": 15000
|
7010 |
}
|
7011 |
],
|
7012 |
"logging_steps": 15,
|
|
|
7026 |
"attributes": {}
|
7027 |
}
|
7028 |
},
|
7029 |
+
"total_flos": 2.384291701225267e+16,
|
7030 |
"train_batch_size": 4,
|
7031 |
"trial_name": null,
|
7032 |
"trial_params": null
|
data/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ec2ab14e2ce9ef4d500efa223a38f8e5ca01386342399ff2338f06b1fa66a7a
|
3 |
size 5304
|