Training in progress, step 1800, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1370666272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c19a8c4ff78657e1ad6849d03827d74a166332e2c92b5ee4c34966f79e091caa
|
3 |
size 1370666272
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 697294462
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10c0470d53e83293b301fcaf8b6ed1125194ec8f54fe9618703c1367bf9a41e7
|
3 |
size 697294462
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d290a2c62404485bacce37c57039bbf078af94d6cf0884b19d2a678f11aec096
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12257,6 +12257,356 @@
|
|
12257 |
"learning_rate": 0.0001967750823269455,
|
12258 |
"loss": 0.8951,
|
12259 |
"step": 1750
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12260 |
}
|
12261 |
],
|
12262 |
"logging_steps": 1,
|
@@ -12276,7 +12626,7 @@
|
|
12276 |
"attributes": {}
|
12277 |
}
|
12278 |
},
|
12279 |
-
"total_flos": 7.
|
12280 |
"train_batch_size": 32,
|
12281 |
"trial_name": null,
|
12282 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.4177788093303934,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1800,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12257 |
"learning_rate": 0.0001967750823269455,
|
12258 |
"loss": 0.8951,
|
12259 |
"step": 1750
|
12260 |
+
},
|
12261 |
+
{
|
12262 |
+
"epoch": 0.40640594174306605,
|
12263 |
+
"grad_norm": 0.6410776972770691,
|
12264 |
+
"learning_rate": 0.00019677140721822734,
|
12265 |
+
"loss": 0.8931,
|
12266 |
+
"step": 1751
|
12267 |
+
},
|
12268 |
+
{
|
12269 |
+
"epoch": 0.4066380410815829,
|
12270 |
+
"grad_norm": 0.7138279676437378,
|
12271 |
+
"learning_rate": 0.00019676773005098766,
|
12272 |
+
"loss": 0.9195,
|
12273 |
+
"step": 1752
|
12274 |
+
},
|
12275 |
+
{
|
12276 |
+
"epoch": 0.4068701404200998,
|
12277 |
+
"grad_norm": 0.5348248481750488,
|
12278 |
+
"learning_rate": 0.00019676405082530476,
|
12279 |
+
"loss": 0.9232,
|
12280 |
+
"step": 1753
|
12281 |
+
},
|
12282 |
+
{
|
12283 |
+
"epoch": 0.4071022397586167,
|
12284 |
+
"grad_norm": 0.6424762010574341,
|
12285 |
+
"learning_rate": 0.00019676036954125684,
|
12286 |
+
"loss": 0.9068,
|
12287 |
+
"step": 1754
|
12288 |
+
},
|
12289 |
+
{
|
12290 |
+
"epoch": 0.40733433909713357,
|
12291 |
+
"grad_norm": 0.6323909759521484,
|
12292 |
+
"learning_rate": 0.00019675668619892228,
|
12293 |
+
"loss": 0.929,
|
12294 |
+
"step": 1755
|
12295 |
+
},
|
12296 |
+
{
|
12297 |
+
"epoch": 0.40756643843565044,
|
12298 |
+
"grad_norm": 0.541854977607727,
|
12299 |
+
"learning_rate": 0.00019675300079837935,
|
12300 |
+
"loss": 0.9729,
|
12301 |
+
"step": 1756
|
12302 |
+
},
|
12303 |
+
{
|
12304 |
+
"epoch": 0.40779853777416736,
|
12305 |
+
"grad_norm": 0.7092952132225037,
|
12306 |
+
"learning_rate": 0.00019674931333970647,
|
12307 |
+
"loss": 0.9005,
|
12308 |
+
"step": 1757
|
12309 |
+
},
|
12310 |
+
{
|
12311 |
+
"epoch": 0.4080306371126842,
|
12312 |
+
"grad_norm": 0.5437161922454834,
|
12313 |
+
"learning_rate": 0.0001967456238229821,
|
12314 |
+
"loss": 0.9944,
|
12315 |
+
"step": 1758
|
12316 |
+
},
|
12317 |
+
{
|
12318 |
+
"epoch": 0.4082627364512011,
|
12319 |
+
"grad_norm": 0.5253750681877136,
|
12320 |
+
"learning_rate": 0.00019674193224828473,
|
12321 |
+
"loss": 0.9535,
|
12322 |
+
"step": 1759
|
12323 |
+
},
|
12324 |
+
{
|
12325 |
+
"epoch": 0.408494835789718,
|
12326 |
+
"grad_norm": 0.6329406499862671,
|
12327 |
+
"learning_rate": 0.00019673823861569286,
|
12328 |
+
"loss": 0.8904,
|
12329 |
+
"step": 1760
|
12330 |
+
},
|
12331 |
+
{
|
12332 |
+
"epoch": 0.4087269351282349,
|
12333 |
+
"grad_norm": 0.5530345439910889,
|
12334 |
+
"learning_rate": 0.00019673454292528508,
|
12335 |
+
"loss": 0.934,
|
12336 |
+
"step": 1761
|
12337 |
+
},
|
12338 |
+
{
|
12339 |
+
"epoch": 0.40895903446675175,
|
12340 |
+
"grad_norm": 0.6421835422515869,
|
12341 |
+
"learning_rate": 0.00019673084517714,
|
12342 |
+
"loss": 0.9014,
|
12343 |
+
"step": 1762
|
12344 |
+
},
|
12345 |
+
{
|
12346 |
+
"epoch": 0.40919113380526867,
|
12347 |
+
"grad_norm": 0.5271580219268799,
|
12348 |
+
"learning_rate": 0.00019672714537133628,
|
12349 |
+
"loss": 0.923,
|
12350 |
+
"step": 1763
|
12351 |
+
},
|
12352 |
+
{
|
12353 |
+
"epoch": 0.40942323314378554,
|
12354 |
+
"grad_norm": 0.5356336236000061,
|
12355 |
+
"learning_rate": 0.00019672344350795258,
|
12356 |
+
"loss": 0.9246,
|
12357 |
+
"step": 1764
|
12358 |
+
},
|
12359 |
+
{
|
12360 |
+
"epoch": 0.4096553324823024,
|
12361 |
+
"grad_norm": 0.6168617606163025,
|
12362 |
+
"learning_rate": 0.0001967197395870677,
|
12363 |
+
"loss": 0.923,
|
12364 |
+
"step": 1765
|
12365 |
+
},
|
12366 |
+
{
|
12367 |
+
"epoch": 0.4098874318208193,
|
12368 |
+
"grad_norm": 0.49557581543922424,
|
12369 |
+
"learning_rate": 0.00019671603360876043,
|
12370 |
+
"loss": 0.9448,
|
12371 |
+
"step": 1766
|
12372 |
+
},
|
12373 |
+
{
|
12374 |
+
"epoch": 0.4101195311593362,
|
12375 |
+
"grad_norm": 0.5493084192276001,
|
12376 |
+
"learning_rate": 0.00019671232557310958,
|
12377 |
+
"loss": 0.9362,
|
12378 |
+
"step": 1767
|
12379 |
+
},
|
12380 |
+
{
|
12381 |
+
"epoch": 0.41035163049785306,
|
12382 |
+
"grad_norm": 0.6057862639427185,
|
12383 |
+
"learning_rate": 0.00019670861548019405,
|
12384 |
+
"loss": 0.9443,
|
12385 |
+
"step": 1768
|
12386 |
+
},
|
12387 |
+
{
|
12388 |
+
"epoch": 0.41058372983637,
|
12389 |
+
"grad_norm": 0.5347152948379517,
|
12390 |
+
"learning_rate": 0.0001967049033300927,
|
12391 |
+
"loss": 0.9054,
|
12392 |
+
"step": 1769
|
12393 |
+
},
|
12394 |
+
{
|
12395 |
+
"epoch": 0.41081582917488685,
|
12396 |
+
"grad_norm": 0.5570089817047119,
|
12397 |
+
"learning_rate": 0.0001967011891228846,
|
12398 |
+
"loss": 0.9094,
|
12399 |
+
"step": 1770
|
12400 |
+
},
|
12401 |
+
{
|
12402 |
+
"epoch": 0.4110479285134037,
|
12403 |
+
"grad_norm": 0.5425180792808533,
|
12404 |
+
"learning_rate": 0.00019669747285864863,
|
12405 |
+
"loss": 0.9072,
|
12406 |
+
"step": 1771
|
12407 |
+
},
|
12408 |
+
{
|
12409 |
+
"epoch": 0.41128002785192064,
|
12410 |
+
"grad_norm": 0.5784744024276733,
|
12411 |
+
"learning_rate": 0.00019669375453746396,
|
12412 |
+
"loss": 1.0027,
|
12413 |
+
"step": 1772
|
12414 |
+
},
|
12415 |
+
{
|
12416 |
+
"epoch": 0.4115121271904375,
|
12417 |
+
"grad_norm": 0.6552026867866516,
|
12418 |
+
"learning_rate": 0.0001966900341594096,
|
12419 |
+
"loss": 0.9353,
|
12420 |
+
"step": 1773
|
12421 |
+
},
|
12422 |
+
{
|
12423 |
+
"epoch": 0.41174422652895437,
|
12424 |
+
"grad_norm": 0.4845140874385834,
|
12425 |
+
"learning_rate": 0.0001966863117245648,
|
12426 |
+
"loss": 0.9227,
|
12427 |
+
"step": 1774
|
12428 |
+
},
|
12429 |
+
{
|
12430 |
+
"epoch": 0.4119763258674713,
|
12431 |
+
"grad_norm": 0.5522558689117432,
|
12432 |
+
"learning_rate": 0.0001966825872330086,
|
12433 |
+
"loss": 0.9159,
|
12434 |
+
"step": 1775
|
12435 |
+
},
|
12436 |
+
{
|
12437 |
+
"epoch": 0.41220842520598816,
|
12438 |
+
"grad_norm": 0.6886111497879028,
|
12439 |
+
"learning_rate": 0.0001966788606848203,
|
12440 |
+
"loss": 0.8733,
|
12441 |
+
"step": 1776
|
12442 |
+
},
|
12443 |
+
{
|
12444 |
+
"epoch": 0.412440524544505,
|
12445 |
+
"grad_norm": 0.5358473062515259,
|
12446 |
+
"learning_rate": 0.0001966751320800792,
|
12447 |
+
"loss": 0.8916,
|
12448 |
+
"step": 1777
|
12449 |
+
},
|
12450 |
+
{
|
12451 |
+
"epoch": 0.41267262388302195,
|
12452 |
+
"grad_norm": 0.574971079826355,
|
12453 |
+
"learning_rate": 0.0001966714014188646,
|
12454 |
+
"loss": 0.8828,
|
12455 |
+
"step": 1778
|
12456 |
+
},
|
12457 |
+
{
|
12458 |
+
"epoch": 0.4129047232215388,
|
12459 |
+
"grad_norm": 0.5384320616722107,
|
12460 |
+
"learning_rate": 0.0001966676687012558,
|
12461 |
+
"loss": 0.8881,
|
12462 |
+
"step": 1779
|
12463 |
+
},
|
12464 |
+
{
|
12465 |
+
"epoch": 0.4131368225600557,
|
12466 |
+
"grad_norm": 0.6178532838821411,
|
12467 |
+
"learning_rate": 0.00019666393392733228,
|
12468 |
+
"loss": 0.9724,
|
12469 |
+
"step": 1780
|
12470 |
+
},
|
12471 |
+
{
|
12472 |
+
"epoch": 0.4133689218985726,
|
12473 |
+
"grad_norm": 0.5532113313674927,
|
12474 |
+
"learning_rate": 0.00019666019709717344,
|
12475 |
+
"loss": 0.9535,
|
12476 |
+
"step": 1781
|
12477 |
+
},
|
12478 |
+
{
|
12479 |
+
"epoch": 0.4136010212370895,
|
12480 |
+
"grad_norm": 0.5668889880180359,
|
12481 |
+
"learning_rate": 0.00019665645821085876,
|
12482 |
+
"loss": 0.9127,
|
12483 |
+
"step": 1782
|
12484 |
+
},
|
12485 |
+
{
|
12486 |
+
"epoch": 0.41383312057560634,
|
12487 |
+
"grad_norm": 0.5764045715332031,
|
12488 |
+
"learning_rate": 0.00019665271726846783,
|
12489 |
+
"loss": 0.9412,
|
12490 |
+
"step": 1783
|
12491 |
+
},
|
12492 |
+
{
|
12493 |
+
"epoch": 0.41406521991412326,
|
12494 |
+
"grad_norm": 0.5341030955314636,
|
12495 |
+
"learning_rate": 0.00019664897427008014,
|
12496 |
+
"loss": 0.9349,
|
12497 |
+
"step": 1784
|
12498 |
+
},
|
12499 |
+
{
|
12500 |
+
"epoch": 0.41429731925264013,
|
12501 |
+
"grad_norm": 0.6231575012207031,
|
12502 |
+
"learning_rate": 0.00019664522921577544,
|
12503 |
+
"loss": 0.8928,
|
12504 |
+
"step": 1785
|
12505 |
+
},
|
12506 |
+
{
|
12507 |
+
"epoch": 0.414529418591157,
|
12508 |
+
"grad_norm": 0.5901029706001282,
|
12509 |
+
"learning_rate": 0.00019664148210563328,
|
12510 |
+
"loss": 0.9054,
|
12511 |
+
"step": 1786
|
12512 |
+
},
|
12513 |
+
{
|
12514 |
+
"epoch": 0.4147615179296739,
|
12515 |
+
"grad_norm": 0.5409894585609436,
|
12516 |
+
"learning_rate": 0.0001966377329397334,
|
12517 |
+
"loss": 0.8859,
|
12518 |
+
"step": 1787
|
12519 |
+
},
|
12520 |
+
{
|
12521 |
+
"epoch": 0.4149936172681908,
|
12522 |
+
"grad_norm": 0.6134136915206909,
|
12523 |
+
"learning_rate": 0.00019663398171815554,
|
12524 |
+
"loss": 0.8984,
|
12525 |
+
"step": 1788
|
12526 |
+
},
|
12527 |
+
{
|
12528 |
+
"epoch": 0.41522571660670765,
|
12529 |
+
"grad_norm": 0.5341612696647644,
|
12530 |
+
"learning_rate": 0.00019663022844097956,
|
12531 |
+
"loss": 0.8723,
|
12532 |
+
"step": 1789
|
12533 |
+
},
|
12534 |
+
{
|
12535 |
+
"epoch": 0.4154578159452246,
|
12536 |
+
"grad_norm": 0.5658878684043884,
|
12537 |
+
"learning_rate": 0.00019662647310828523,
|
12538 |
+
"loss": 0.8559,
|
12539 |
+
"step": 1790
|
12540 |
+
},
|
12541 |
+
{
|
12542 |
+
"epoch": 0.41568991528374144,
|
12543 |
+
"grad_norm": 0.49125760793685913,
|
12544 |
+
"learning_rate": 0.00019662271572015247,
|
12545 |
+
"loss": 0.8786,
|
12546 |
+
"step": 1791
|
12547 |
+
},
|
12548 |
+
{
|
12549 |
+
"epoch": 0.4159220146222583,
|
12550 |
+
"grad_norm": 0.6301273703575134,
|
12551 |
+
"learning_rate": 0.00019661895627666115,
|
12552 |
+
"loss": 0.8943,
|
12553 |
+
"step": 1792
|
12554 |
+
},
|
12555 |
+
{
|
12556 |
+
"epoch": 0.41615411396077523,
|
12557 |
+
"grad_norm": 0.5750293731689453,
|
12558 |
+
"learning_rate": 0.00019661519477789135,
|
12559 |
+
"loss": 0.8957,
|
12560 |
+
"step": 1793
|
12561 |
+
},
|
12562 |
+
{
|
12563 |
+
"epoch": 0.4163862132992921,
|
12564 |
+
"grad_norm": 0.5299922823905945,
|
12565 |
+
"learning_rate": 0.000196611431223923,
|
12566 |
+
"loss": 0.963,
|
12567 |
+
"step": 1794
|
12568 |
+
},
|
12569 |
+
{
|
12570 |
+
"epoch": 0.41661831263780896,
|
12571 |
+
"grad_norm": 0.7080173492431641,
|
12572 |
+
"learning_rate": 0.00019660766561483618,
|
12573 |
+
"loss": 0.9599,
|
12574 |
+
"step": 1795
|
12575 |
+
},
|
12576 |
+
{
|
12577 |
+
"epoch": 0.4168504119763259,
|
12578 |
+
"grad_norm": 0.5741339921951294,
|
12579 |
+
"learning_rate": 0.00019660389795071097,
|
12580 |
+
"loss": 0.941,
|
12581 |
+
"step": 1796
|
12582 |
+
},
|
12583 |
+
{
|
12584 |
+
"epoch": 0.41708251131484275,
|
12585 |
+
"grad_norm": 0.5957292318344116,
|
12586 |
+
"learning_rate": 0.00019660012823162755,
|
12587 |
+
"loss": 0.91,
|
12588 |
+
"step": 1797
|
12589 |
+
},
|
12590 |
+
{
|
12591 |
+
"epoch": 0.4173146106533596,
|
12592 |
+
"grad_norm": 0.5832741856575012,
|
12593 |
+
"learning_rate": 0.0001965963564576661,
|
12594 |
+
"loss": 0.9038,
|
12595 |
+
"step": 1798
|
12596 |
+
},
|
12597 |
+
{
|
12598 |
+
"epoch": 0.41754670999187654,
|
12599 |
+
"grad_norm": 0.613530158996582,
|
12600 |
+
"learning_rate": 0.00019659258262890683,
|
12601 |
+
"loss": 0.9392,
|
12602 |
+
"step": 1799
|
12603 |
+
},
|
12604 |
+
{
|
12605 |
+
"epoch": 0.4177788093303934,
|
12606 |
+
"grad_norm": 0.5645830631256104,
|
12607 |
+
"learning_rate": 0.00019658880674543004,
|
12608 |
+
"loss": 0.9393,
|
12609 |
+
"step": 1800
|
12610 |
}
|
12611 |
],
|
12612 |
"logging_steps": 1,
|
|
|
12626 |
"attributes": {}
|
12627 |
}
|
12628 |
},
|
12629 |
+
"total_flos": 7.989780893663232e+17,
|
12630 |
"train_batch_size": 32,
|
12631 |
"trial_name": null,
|
12632 |
"trial_params": null
|