Upload folder using huggingface_hub
Browse files- pytorch_model-00001-of-00003.bin +2 -2
- pytorch_model-00002-of-00003.bin +2 -2
- pytorch_model-00003-of-00003.bin +2 -2
- trainer_state.json +848 -3
pytorch_model-00001-of-00003.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7aaa6098082d792fee0be032e0dd660213189af96f0213a828d4cc55d626d222
|
3 |
+
size 9877988050
|
pytorch_model-00002-of-00003.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:372de6c40b11b32b0b29f6304b8a22f2c81e56f9516f7ea134115145bb5525be
|
3 |
+
size 9894799542
|
pytorch_model-00003-of-00003.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5049d5c4b55dada711ff08977660e67d02d7a9741e09b2539913368037efaca
|
3 |
+
size 7180989689
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4342,11 +4342,856 @@
|
|
4342 |
"learning_rate": 1.3271869055905495e-06,
|
4343 |
"loss": 0.1769,
|
4344 |
"step": 720
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4345 |
}
|
4346 |
],
|
4347 |
"max_steps": 858,
|
4348 |
"num_train_epochs": 3,
|
4349 |
-
"total_flos":
|
4350 |
"trial_name": null,
|
4351 |
"trial_params": null
|
4352 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.0,
|
5 |
+
"global_step": 858,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4342 |
"learning_rate": 1.3271869055905495e-06,
|
4343 |
"loss": 0.1769,
|
4344 |
"step": 720
|
4345 |
+
},
|
4346 |
+
{
|
4347 |
+
"epoch": 2.52,
|
4348 |
+
"learning_rate": 1.3084513916284913e-06,
|
4349 |
+
"loss": 0.1691,
|
4350 |
+
"step": 721
|
4351 |
+
},
|
4352 |
+
{
|
4353 |
+
"epoch": 2.52,
|
4354 |
+
"learning_rate": 1.2898398000448441e-06,
|
4355 |
+
"loss": 0.1835,
|
4356 |
+
"step": 722
|
4357 |
+
},
|
4358 |
+
{
|
4359 |
+
"epoch": 2.53,
|
4360 |
+
"learning_rate": 1.2713523961999996e-06,
|
4361 |
+
"loss": 0.1878,
|
4362 |
+
"step": 723
|
4363 |
+
},
|
4364 |
+
{
|
4365 |
+
"epoch": 2.53,
|
4366 |
+
"learning_rate": 1.2529894436836965e-06,
|
4367 |
+
"loss": 0.178,
|
4368 |
+
"step": 724
|
4369 |
+
},
|
4370 |
+
{
|
4371 |
+
"epoch": 2.53,
|
4372 |
+
"learning_rate": 1.2347512043112753e-06,
|
4373 |
+
"loss": 0.1772,
|
4374 |
+
"step": 725
|
4375 |
+
},
|
4376 |
+
{
|
4377 |
+
"epoch": 2.54,
|
4378 |
+
"learning_rate": 1.2166379381199423e-06,
|
4379 |
+
"loss": 0.1823,
|
4380 |
+
"step": 726
|
4381 |
+
},
|
4382 |
+
{
|
4383 |
+
"epoch": 2.54,
|
4384 |
+
"learning_rate": 1.1986499033650557e-06,
|
4385 |
+
"loss": 0.176,
|
4386 |
+
"step": 727
|
4387 |
+
},
|
4388 |
+
{
|
4389 |
+
"epoch": 2.55,
|
4390 |
+
"learning_rate": 1.1807873565164507e-06,
|
4391 |
+
"loss": 0.1685,
|
4392 |
+
"step": 728
|
4393 |
+
},
|
4394 |
+
{
|
4395 |
+
"epoch": 2.55,
|
4396 |
+
"learning_rate": 1.1630505522547853e-06,
|
4397 |
+
"loss": 0.1721,
|
4398 |
+
"step": 729
|
4399 |
+
},
|
4400 |
+
{
|
4401 |
+
"epoch": 2.55,
|
4402 |
+
"learning_rate": 1.1454397434679022e-06,
|
4403 |
+
"loss": 0.1601,
|
4404 |
+
"step": 730
|
4405 |
+
},
|
4406 |
+
{
|
4407 |
+
"epoch": 2.56,
|
4408 |
+
"learning_rate": 1.12795518124722e-06,
|
4409 |
+
"loss": 0.1755,
|
4410 |
+
"step": 731
|
4411 |
+
},
|
4412 |
+
{
|
4413 |
+
"epoch": 2.56,
|
4414 |
+
"learning_rate": 1.11059711488417e-06,
|
4415 |
+
"loss": 0.1735,
|
4416 |
+
"step": 732
|
4417 |
+
},
|
4418 |
+
{
|
4419 |
+
"epoch": 2.56,
|
4420 |
+
"learning_rate": 1.0933657918666175e-06,
|
4421 |
+
"loss": 0.1627,
|
4422 |
+
"step": 733
|
4423 |
+
},
|
4424 |
+
{
|
4425 |
+
"epoch": 2.57,
|
4426 |
+
"learning_rate": 1.0762614578753571e-06,
|
4427 |
+
"loss": 0.1694,
|
4428 |
+
"step": 734
|
4429 |
+
},
|
4430 |
+
{
|
4431 |
+
"epoch": 2.57,
|
4432 |
+
"learning_rate": 1.0592843567805944e-06,
|
4433 |
+
"loss": 0.1933,
|
4434 |
+
"step": 735
|
4435 |
+
},
|
4436 |
+
{
|
4437 |
+
"epoch": 2.57,
|
4438 |
+
"learning_rate": 1.042434730638473e-06,
|
4439 |
+
"loss": 0.1702,
|
4440 |
+
"step": 736
|
4441 |
+
},
|
4442 |
+
{
|
4443 |
+
"epoch": 2.58,
|
4444 |
+
"learning_rate": 1.0257128196876233e-06,
|
4445 |
+
"loss": 0.1776,
|
4446 |
+
"step": 737
|
4447 |
+
},
|
4448 |
+
{
|
4449 |
+
"epoch": 2.58,
|
4450 |
+
"learning_rate": 1.0091188623457415e-06,
|
4451 |
+
"loss": 0.1745,
|
4452 |
+
"step": 738
|
4453 |
+
},
|
4454 |
+
{
|
4455 |
+
"epoch": 2.58,
|
4456 |
+
"learning_rate": 9.926530952061831e-07,
|
4457 |
+
"loss": 0.1746,
|
4458 |
+
"step": 739
|
4459 |
+
},
|
4460 |
+
{
|
4461 |
+
"epoch": 2.59,
|
4462 |
+
"learning_rate": 9.763157530345957e-07,
|
4463 |
+
"loss": 0.1832,
|
4464 |
+
"step": 740
|
4465 |
+
},
|
4466 |
+
{
|
4467 |
+
"epoch": 2.59,
|
4468 |
+
"learning_rate": 9.601070687655667e-07,
|
4469 |
+
"loss": 0.1721,
|
4470 |
+
"step": 741
|
4471 |
+
},
|
4472 |
+
{
|
4473 |
+
"epoch": 2.59,
|
4474 |
+
"learning_rate": 9.440272734993072e-07,
|
4475 |
+
"loss": 0.1767,
|
4476 |
+
"step": 742
|
4477 |
+
},
|
4478 |
+
{
|
4479 |
+
"epoch": 2.6,
|
4480 |
+
"learning_rate": 9.280765964983529e-07,
|
4481 |
+
"loss": 0.1743,
|
4482 |
+
"step": 743
|
4483 |
+
},
|
4484 |
+
{
|
4485 |
+
"epoch": 2.6,
|
4486 |
+
"learning_rate": 9.122552651842931e-07,
|
4487 |
+
"loss": 0.1906,
|
4488 |
+
"step": 744
|
4489 |
+
},
|
4490 |
+
{
|
4491 |
+
"epoch": 2.6,
|
4492 |
+
"learning_rate": 8.965635051345411e-07,
|
4493 |
+
"loss": 0.1703,
|
4494 |
+
"step": 745
|
4495 |
+
},
|
4496 |
+
{
|
4497 |
+
"epoch": 2.61,
|
4498 |
+
"learning_rate": 8.810015400790994e-07,
|
4499 |
+
"loss": 0.1847,
|
4500 |
+
"step": 746
|
4501 |
+
},
|
4502 |
+
{
|
4503 |
+
"epoch": 2.61,
|
4504 |
+
"learning_rate": 8.655695918973861e-07,
|
4505 |
+
"loss": 0.178,
|
4506 |
+
"step": 747
|
4507 |
+
},
|
4508 |
+
{
|
4509 |
+
"epoch": 2.62,
|
4510 |
+
"learning_rate": 8.502678806150588e-07,
|
4511 |
+
"loss": 0.1808,
|
4512 |
+
"step": 748
|
4513 |
+
},
|
4514 |
+
{
|
4515 |
+
"epoch": 2.62,
|
4516 |
+
"learning_rate": 8.350966244008896e-07,
|
4517 |
+
"loss": 0.1768,
|
4518 |
+
"step": 749
|
4519 |
+
},
|
4520 |
+
{
|
4521 |
+
"epoch": 2.62,
|
4522 |
+
"learning_rate": 8.200560395636414e-07,
|
4523 |
+
"loss": 0.1871,
|
4524 |
+
"step": 750
|
4525 |
+
},
|
4526 |
+
{
|
4527 |
+
"epoch": 2.63,
|
4528 |
+
"learning_rate": 8.051463405489956e-07,
|
4529 |
+
"loss": 0.176,
|
4530 |
+
"step": 751
|
4531 |
+
},
|
4532 |
+
{
|
4533 |
+
"epoch": 2.63,
|
4534 |
+
"learning_rate": 7.903677399364839e-07,
|
4535 |
+
"loss": 0.1715,
|
4536 |
+
"step": 752
|
4537 |
+
},
|
4538 |
+
{
|
4539 |
+
"epoch": 2.63,
|
4540 |
+
"learning_rate": 7.757204484364699e-07,
|
4541 |
+
"loss": 0.1839,
|
4542 |
+
"step": 753
|
4543 |
+
},
|
4544 |
+
{
|
4545 |
+
"epoch": 2.64,
|
4546 |
+
"learning_rate": 7.612046748871327e-07,
|
4547 |
+
"loss": 0.1799,
|
4548 |
+
"step": 754
|
4549 |
+
},
|
4550 |
+
{
|
4551 |
+
"epoch": 2.64,
|
4552 |
+
"learning_rate": 7.468206262514965e-07,
|
4553 |
+
"loss": 0.1664,
|
4554 |
+
"step": 755
|
4555 |
+
},
|
4556 |
+
{
|
4557 |
+
"epoch": 2.64,
|
4558 |
+
"learning_rate": 7.325685076144795e-07,
|
4559 |
+
"loss": 0.1742,
|
4560 |
+
"step": 756
|
4561 |
+
},
|
4562 |
+
{
|
4563 |
+
"epoch": 2.65,
|
4564 |
+
"learning_rate": 7.184485221799631e-07,
|
4565 |
+
"loss": 0.1811,
|
4566 |
+
"step": 757
|
4567 |
+
},
|
4568 |
+
{
|
4569 |
+
"epoch": 2.65,
|
4570 |
+
"learning_rate": 7.044608712679058e-07,
|
4571 |
+
"loss": 0.1805,
|
4572 |
+
"step": 758
|
4573 |
+
},
|
4574 |
+
{
|
4575 |
+
"epoch": 2.65,
|
4576 |
+
"learning_rate": 6.90605754311462e-07,
|
4577 |
+
"loss": 0.1733,
|
4578 |
+
"step": 759
|
4579 |
+
},
|
4580 |
+
{
|
4581 |
+
"epoch": 2.66,
|
4582 |
+
"learning_rate": 6.768833688541443e-07,
|
4583 |
+
"loss": 0.1746,
|
4584 |
+
"step": 760
|
4585 |
+
},
|
4586 |
+
{
|
4587 |
+
"epoch": 2.66,
|
4588 |
+
"learning_rate": 6.632939105470049e-07,
|
4589 |
+
"loss": 0.1745,
|
4590 |
+
"step": 761
|
4591 |
+
},
|
4592 |
+
{
|
4593 |
+
"epoch": 2.66,
|
4594 |
+
"learning_rate": 6.498375731458529e-07,
|
4595 |
+
"loss": 0.176,
|
4596 |
+
"step": 762
|
4597 |
+
},
|
4598 |
+
{
|
4599 |
+
"epoch": 2.67,
|
4600 |
+
"learning_rate": 6.365145485084767e-07,
|
4601 |
+
"loss": 0.1773,
|
4602 |
+
"step": 763
|
4603 |
+
},
|
4604 |
+
{
|
4605 |
+
"epoch": 2.67,
|
4606 |
+
"learning_rate": 6.233250265919266e-07,
|
4607 |
+
"loss": 0.1801,
|
4608 |
+
"step": 764
|
4609 |
+
},
|
4610 |
+
{
|
4611 |
+
"epoch": 2.67,
|
4612 |
+
"learning_rate": 6.102691954497908e-07,
|
4613 |
+
"loss": 0.177,
|
4614 |
+
"step": 765
|
4615 |
+
},
|
4616 |
+
{
|
4617 |
+
"epoch": 2.68,
|
4618 |
+
"learning_rate": 5.973472412295256e-07,
|
4619 |
+
"loss": 0.1753,
|
4620 |
+
"step": 766
|
4621 |
+
},
|
4622 |
+
{
|
4623 |
+
"epoch": 2.68,
|
4624 |
+
"learning_rate": 5.845593481697931e-07,
|
4625 |
+
"loss": 0.1721,
|
4626 |
+
"step": 767
|
4627 |
+
},
|
4628 |
+
{
|
4629 |
+
"epoch": 2.69,
|
4630 |
+
"learning_rate": 5.719056985978388e-07,
|
4631 |
+
"loss": 0.1906,
|
4632 |
+
"step": 768
|
4633 |
+
},
|
4634 |
+
{
|
4635 |
+
"epoch": 2.69,
|
4636 |
+
"learning_rate": 5.59386472926895e-07,
|
4637 |
+
"loss": 0.192,
|
4638 |
+
"step": 769
|
4639 |
+
},
|
4640 |
+
{
|
4641 |
+
"epoch": 2.69,
|
4642 |
+
"learning_rate": 5.470018496535967e-07,
|
4643 |
+
"loss": 0.1909,
|
4644 |
+
"step": 770
|
4645 |
+
},
|
4646 |
+
{
|
4647 |
+
"epoch": 2.7,
|
4648 |
+
"learning_rate": 5.347520053554544e-07,
|
4649 |
+
"loss": 0.1836,
|
4650 |
+
"step": 771
|
4651 |
+
},
|
4652 |
+
{
|
4653 |
+
"epoch": 2.7,
|
4654 |
+
"learning_rate": 5.22637114688318e-07,
|
4655 |
+
"loss": 0.1815,
|
4656 |
+
"step": 772
|
4657 |
+
},
|
4658 |
+
{
|
4659 |
+
"epoch": 2.7,
|
4660 |
+
"learning_rate": 5.106573503839018e-07,
|
4661 |
+
"loss": 0.1717,
|
4662 |
+
"step": 773
|
4663 |
+
},
|
4664 |
+
{
|
4665 |
+
"epoch": 2.71,
|
4666 |
+
"learning_rate": 4.988128832473105e-07,
|
4667 |
+
"loss": 0.1843,
|
4668 |
+
"step": 774
|
4669 |
+
},
|
4670 |
+
{
|
4671 |
+
"epoch": 2.71,
|
4672 |
+
"learning_rate": 4.871038821546104e-07,
|
4673 |
+
"loss": 0.1721,
|
4674 |
+
"step": 775
|
4675 |
+
},
|
4676 |
+
{
|
4677 |
+
"epoch": 2.71,
|
4678 |
+
"learning_rate": 4.755305140504185e-07,
|
4679 |
+
"loss": 0.1919,
|
4680 |
+
"step": 776
|
4681 |
+
},
|
4682 |
+
{
|
4683 |
+
"epoch": 2.72,
|
4684 |
+
"learning_rate": 4.6409294394552774e-07,
|
4685 |
+
"loss": 0.1768,
|
4686 |
+
"step": 777
|
4687 |
+
},
|
4688 |
+
{
|
4689 |
+
"epoch": 2.72,
|
4690 |
+
"learning_rate": 4.5279133491454406e-07,
|
4691 |
+
"loss": 0.1926,
|
4692 |
+
"step": 778
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 2.72,
|
4696 |
+
"learning_rate": 4.416258480935731e-07,
|
4697 |
+
"loss": 0.1749,
|
4698 |
+
"step": 779
|
4699 |
+
},
|
4700 |
+
{
|
4701 |
+
"epoch": 2.73,
|
4702 |
+
"learning_rate": 4.305966426779118e-07,
|
4703 |
+
"loss": 0.176,
|
4704 |
+
"step": 780
|
4705 |
+
},
|
4706 |
+
{
|
4707 |
+
"epoch": 2.73,
|
4708 |
+
"learning_rate": 4.197038759197869e-07,
|
4709 |
+
"loss": 0.1823,
|
4710 |
+
"step": 781
|
4711 |
+
},
|
4712 |
+
{
|
4713 |
+
"epoch": 2.73,
|
4714 |
+
"learning_rate": 4.089477031261113e-07,
|
4715 |
+
"loss": 0.1808,
|
4716 |
+
"step": 782
|
4717 |
+
},
|
4718 |
+
{
|
4719 |
+
"epoch": 2.74,
|
4720 |
+
"learning_rate": 3.983282776562647e-07,
|
4721 |
+
"loss": 0.1722,
|
4722 |
+
"step": 783
|
4723 |
+
},
|
4724 |
+
{
|
4725 |
+
"epoch": 2.74,
|
4726 |
+
"learning_rate": 3.878457509199107e-07,
|
4727 |
+
"loss": 0.174,
|
4728 |
+
"step": 784
|
4729 |
+
},
|
4730 |
+
{
|
4731 |
+
"epoch": 2.74,
|
4732 |
+
"learning_rate": 3.7750027237484e-07,
|
4733 |
+
"loss": 0.1742,
|
4734 |
+
"step": 785
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 2.75,
|
4738 |
+
"learning_rate": 3.6729198952483725e-07,
|
4739 |
+
"loss": 0.178,
|
4740 |
+
"step": 786
|
4741 |
+
},
|
4742 |
+
{
|
4743 |
+
"epoch": 2.75,
|
4744 |
+
"learning_rate": 3.572210479175753e-07,
|
4745 |
+
"loss": 0.1695,
|
4746 |
+
"step": 787
|
4747 |
+
},
|
4748 |
+
{
|
4749 |
+
"epoch": 2.76,
|
4750 |
+
"learning_rate": 3.4728759114254774e-07,
|
4751 |
+
"loss": 0.1676,
|
4752 |
+
"step": 788
|
4753 |
+
},
|
4754 |
+
{
|
4755 |
+
"epoch": 2.76,
|
4756 |
+
"learning_rate": 3.374917608290107e-07,
|
4757 |
+
"loss": 0.1848,
|
4758 |
+
"step": 789
|
4759 |
+
},
|
4760 |
+
{
|
4761 |
+
"epoch": 2.76,
|
4762 |
+
"learning_rate": 3.278336966439744e-07,
|
4763 |
+
"loss": 0.187,
|
4764 |
+
"step": 790
|
4765 |
+
},
|
4766 |
+
{
|
4767 |
+
"epoch": 2.77,
|
4768 |
+
"learning_rate": 3.1831353629020345e-07,
|
4769 |
+
"loss": 0.1757,
|
4770 |
+
"step": 791
|
4771 |
+
},
|
4772 |
+
{
|
4773 |
+
"epoch": 2.77,
|
4774 |
+
"learning_rate": 3.089314155042589e-07,
|
4775 |
+
"loss": 0.1715,
|
4776 |
+
"step": 792
|
4777 |
+
},
|
4778 |
+
{
|
4779 |
+
"epoch": 2.77,
|
4780 |
+
"learning_rate": 2.996874680545603e-07,
|
4781 |
+
"loss": 0.1661,
|
4782 |
+
"step": 793
|
4783 |
+
},
|
4784 |
+
{
|
4785 |
+
"epoch": 2.78,
|
4786 |
+
"learning_rate": 2.905818257394799e-07,
|
4787 |
+
"loss": 0.184,
|
4788 |
+
"step": 794
|
4789 |
+
},
|
4790 |
+
{
|
4791 |
+
"epoch": 2.78,
|
4792 |
+
"learning_rate": 2.816146183854618e-07,
|
4793 |
+
"loss": 0.1794,
|
4794 |
+
"step": 795
|
4795 |
+
},
|
4796 |
+
{
|
4797 |
+
"epoch": 2.78,
|
4798 |
+
"learning_rate": 2.727859738451721e-07,
|
4799 |
+
"loss": 0.1744,
|
4800 |
+
"step": 796
|
4801 |
+
},
|
4802 |
+
{
|
4803 |
+
"epoch": 2.79,
|
4804 |
+
"learning_rate": 2.640960179956764e-07,
|
4805 |
+
"loss": 0.1644,
|
4806 |
+
"step": 797
|
4807 |
+
},
|
4808 |
+
{
|
4809 |
+
"epoch": 2.79,
|
4810 |
+
"learning_rate": 2.5554487473664404e-07,
|
4811 |
+
"loss": 0.1781,
|
4812 |
+
"step": 798
|
4813 |
+
},
|
4814 |
+
{
|
4815 |
+
"epoch": 2.79,
|
4816 |
+
"learning_rate": 2.471326659885809e-07,
|
4817 |
+
"loss": 0.1831,
|
4818 |
+
"step": 799
|
4819 |
+
},
|
4820 |
+
{
|
4821 |
+
"epoch": 2.8,
|
4822 |
+
"learning_rate": 2.388595116910919e-07,
|
4823 |
+
"loss": 0.1818,
|
4824 |
+
"step": 800
|
4825 |
+
},
|
4826 |
+
{
|
4827 |
+
"epoch": 2.8,
|
4828 |
+
"learning_rate": 2.3072552980117568e-07,
|
4829 |
+
"loss": 0.191,
|
4830 |
+
"step": 801
|
4831 |
+
},
|
4832 |
+
{
|
4833 |
+
"epoch": 2.8,
|
4834 |
+
"learning_rate": 2.2273083629153148e-07,
|
4835 |
+
"loss": 0.1834,
|
4836 |
+
"step": 802
|
4837 |
+
},
|
4838 |
+
{
|
4839 |
+
"epoch": 2.81,
|
4840 |
+
"learning_rate": 2.1487554514891706e-07,
|
4841 |
+
"loss": 0.1709,
|
4842 |
+
"step": 803
|
4843 |
+
},
|
4844 |
+
{
|
4845 |
+
"epoch": 2.81,
|
4846 |
+
"learning_rate": 2.0715976837251793e-07,
|
4847 |
+
"loss": 0.1698,
|
4848 |
+
"step": 804
|
4849 |
+
},
|
4850 |
+
{
|
4851 |
+
"epoch": 2.81,
|
4852 |
+
"learning_rate": 1.9958361597235076e-07,
|
4853 |
+
"loss": 0.1833,
|
4854 |
+
"step": 805
|
4855 |
+
},
|
4856 |
+
{
|
4857 |
+
"epoch": 2.82,
|
4858 |
+
"learning_rate": 1.921471959676957e-07,
|
4859 |
+
"loss": 0.1738,
|
4860 |
+
"step": 806
|
4861 |
+
},
|
4862 |
+
{
|
4863 |
+
"epoch": 2.82,
|
4864 |
+
"learning_rate": 1.8485061438555552e-07,
|
4865 |
+
"loss": 0.168,
|
4866 |
+
"step": 807
|
4867 |
+
},
|
4868 |
+
{
|
4869 |
+
"epoch": 2.83,
|
4870 |
+
"learning_rate": 1.7769397525914668e-07,
|
4871 |
+
"loss": 0.1849,
|
4872 |
+
"step": 808
|
4873 |
+
},
|
4874 |
+
{
|
4875 |
+
"epoch": 2.83,
|
4876 |
+
"learning_rate": 1.706773806264106e-07,
|
4877 |
+
"loss": 0.1741,
|
4878 |
+
"step": 809
|
4879 |
+
},
|
4880 |
+
{
|
4881 |
+
"epoch": 2.83,
|
4882 |
+
"learning_rate": 1.6380093052856482e-07,
|
4883 |
+
"loss": 0.1777,
|
4884 |
+
"step": 810
|
4885 |
+
},
|
4886 |
+
{
|
4887 |
+
"epoch": 2.84,
|
4888 |
+
"learning_rate": 1.5706472300867082e-07,
|
4889 |
+
"loss": 0.2177,
|
4890 |
+
"step": 811
|
4891 |
+
},
|
4892 |
+
{
|
4893 |
+
"epoch": 2.84,
|
4894 |
+
"learning_rate": 1.5046885411024393e-07,
|
4895 |
+
"loss": 0.175,
|
4896 |
+
"step": 812
|
4897 |
+
},
|
4898 |
+
{
|
4899 |
+
"epoch": 2.84,
|
4900 |
+
"learning_rate": 1.4401341787587454e-07,
|
4901 |
+
"loss": 0.1731,
|
4902 |
+
"step": 813
|
4903 |
+
},
|
4904 |
+
{
|
4905 |
+
"epoch": 2.85,
|
4906 |
+
"learning_rate": 1.3769850634589356e-07,
|
4907 |
+
"loss": 0.1778,
|
4908 |
+
"step": 814
|
4909 |
+
},
|
4910 |
+
{
|
4911 |
+
"epoch": 2.85,
|
4912 |
+
"learning_rate": 1.3152420955706014e-07,
|
4913 |
+
"loss": 0.1704,
|
4914 |
+
"step": 815
|
4915 |
+
},
|
4916 |
+
{
|
4917 |
+
"epoch": 2.85,
|
4918 |
+
"learning_rate": 1.2549061554127494e-07,
|
4919 |
+
"loss": 0.1729,
|
4920 |
+
"step": 816
|
4921 |
+
},
|
4922 |
+
{
|
4923 |
+
"epoch": 2.86,
|
4924 |
+
"learning_rate": 1.195978103243234e-07,
|
4925 |
+
"loss": 0.1831,
|
4926 |
+
"step": 817
|
4927 |
+
},
|
4928 |
+
{
|
4929 |
+
"epoch": 2.86,
|
4930 |
+
"learning_rate": 1.1384587792465873e-07,
|
4931 |
+
"loss": 0.166,
|
4932 |
+
"step": 818
|
4933 |
+
},
|
4934 |
+
{
|
4935 |
+
"epoch": 2.86,
|
4936 |
+
"learning_rate": 1.0823490035218986e-07,
|
4937 |
+
"loss": 0.1784,
|
4938 |
+
"step": 819
|
4939 |
+
},
|
4940 |
+
{
|
4941 |
+
"epoch": 2.87,
|
4942 |
+
"learning_rate": 1.0276495760712768e-07,
|
4943 |
+
"loss": 0.1825,
|
4944 |
+
"step": 820
|
4945 |
+
},
|
4946 |
+
{
|
4947 |
+
"epoch": 2.87,
|
4948 |
+
"learning_rate": 9.743612767882937e-08,
|
4949 |
+
"loss": 0.1838,
|
4950 |
+
"step": 821
|
4951 |
+
},
|
4952 |
+
{
|
4953 |
+
"epoch": 2.87,
|
4954 |
+
"learning_rate": 9.224848654469932e-08,
|
4955 |
+
"loss": 0.1793,
|
4956 |
+
"step": 822
|
4957 |
+
},
|
4958 |
+
{
|
4959 |
+
"epoch": 2.88,
|
4960 |
+
"learning_rate": 8.720210816909436e-08,
|
4961 |
+
"loss": 0.1735,
|
4962 |
+
"step": 823
|
4963 |
+
},
|
4964 |
+
{
|
4965 |
+
"epoch": 2.88,
|
4966 |
+
"learning_rate": 8.229706450227804e-08,
|
4967 |
+
"loss": 0.1773,
|
4968 |
+
"step": 824
|
4969 |
+
},
|
4970 |
+
{
|
4971 |
+
"epoch": 2.88,
|
4972 |
+
"learning_rate": 7.753342547939357e-08,
|
4973 |
+
"loss": 0.1783,
|
4974 |
+
"step": 825
|
4975 |
+
},
|
4976 |
+
{
|
4977 |
+
"epoch": 2.89,
|
4978 |
+
"learning_rate": 7.291125901946027e-08,
|
4979 |
+
"loss": 0.1779,
|
4980 |
+
"step": 826
|
4981 |
+
},
|
4982 |
+
{
|
4983 |
+
"epoch": 2.89,
|
4984 |
+
"learning_rate": 6.843063102441317e-08,
|
4985 |
+
"loss": 0.1824,
|
4986 |
+
"step": 827
|
4987 |
+
},
|
4988 |
+
{
|
4989 |
+
"epoch": 2.9,
|
4990 |
+
"learning_rate": 6.409160537815818e-08,
|
4991 |
+
"loss": 0.1727,
|
4992 |
+
"step": 828
|
4993 |
+
},
|
4994 |
+
{
|
4995 |
+
"epoch": 2.9,
|
4996 |
+
"learning_rate": 5.9894243945664e-08,
|
4997 |
+
"loss": 0.1756,
|
4998 |
+
"step": 829
|
4999 |
+
},
|
5000 |
+
{
|
5001 |
+
"epoch": 2.9,
|
5002 |
+
"learning_rate": 5.5838606572078404e-08,
|
5003 |
+
"loss": 0.1828,
|
5004 |
+
"step": 830
|
5005 |
+
},
|
5006 |
+
{
|
5007 |
+
"epoch": 2.91,
|
5008 |
+
"learning_rate": 5.192475108187545e-08,
|
5009 |
+
"loss": 0.1693,
|
5010 |
+
"step": 831
|
5011 |
+
},
|
5012 |
+
{
|
5013 |
+
"epoch": 2.91,
|
5014 |
+
"learning_rate": 4.815273327803183e-08,
|
5015 |
+
"loss": 0.1911,
|
5016 |
+
"step": 832
|
5017 |
+
},
|
5018 |
+
{
|
5019 |
+
"epoch": 2.91,
|
5020 |
+
"learning_rate": 4.4522606941228564e-08,
|
5021 |
+
"loss": 0.1885,
|
5022 |
+
"step": 833
|
5023 |
+
},
|
5024 |
+
{
|
5025 |
+
"epoch": 2.92,
|
5026 |
+
"learning_rate": 4.103442382909051e-08,
|
5027 |
+
"loss": 0.1746,
|
5028 |
+
"step": 834
|
5029 |
+
},
|
5030 |
+
{
|
5031 |
+
"epoch": 2.92,
|
5032 |
+
"learning_rate": 3.7688233675439164e-08,
|
5033 |
+
"loss": 0.1931,
|
5034 |
+
"step": 835
|
5035 |
+
},
|
5036 |
+
{
|
5037 |
+
"epoch": 2.92,
|
5038 |
+
"learning_rate": 3.448408418959326e-08,
|
5039 |
+
"loss": 0.1707,
|
5040 |
+
"step": 836
|
5041 |
+
},
|
5042 |
+
{
|
5043 |
+
"epoch": 2.93,
|
5044 |
+
"learning_rate": 3.1422021055679266e-08,
|
5045 |
+
"loss": 0.1713,
|
5046 |
+
"step": 837
|
5047 |
+
},
|
5048 |
+
{
|
5049 |
+
"epoch": 2.93,
|
5050 |
+
"learning_rate": 2.850208793198861e-08,
|
5051 |
+
"loss": 0.1746,
|
5052 |
+
"step": 838
|
5053 |
+
},
|
5054 |
+
{
|
5055 |
+
"epoch": 2.93,
|
5056 |
+
"learning_rate": 2.572432645034817e-08,
|
5057 |
+
"loss": 0.1714,
|
5058 |
+
"step": 839
|
5059 |
+
},
|
5060 |
+
{
|
5061 |
+
"epoch": 2.94,
|
5062 |
+
"learning_rate": 2.308877621553185e-08,
|
5063 |
+
"loss": 0.1758,
|
5064 |
+
"step": 840
|
5065 |
+
},
|
5066 |
+
{
|
5067 |
+
"epoch": 2.94,
|
5068 |
+
"learning_rate": 2.059547480469104e-08,
|
5069 |
+
"loss": 0.1741,
|
5070 |
+
"step": 841
|
5071 |
+
},
|
5072 |
+
{
|
5073 |
+
"epoch": 2.94,
|
5074 |
+
"learning_rate": 1.824445776682504e-08,
|
5075 |
+
"loss": 0.179,
|
5076 |
+
"step": 842
|
5077 |
+
},
|
5078 |
+
{
|
5079 |
+
"epoch": 2.95,
|
5080 |
+
"learning_rate": 1.603575862226925e-08,
|
5081 |
+
"loss": 0.1797,
|
5082 |
+
"step": 843
|
5083 |
+
},
|
5084 |
+
{
|
5085 |
+
"epoch": 2.95,
|
5086 |
+
"learning_rate": 1.396940886221776e-08,
|
5087 |
+
"loss": 0.1738,
|
5088 |
+
"step": 844
|
5089 |
+
},
|
5090 |
+
{
|
5091 |
+
"epoch": 2.95,
|
5092 |
+
"learning_rate": 1.2045437948275952e-08,
|
5093 |
+
"loss": 0.1819,
|
5094 |
+
"step": 845
|
5095 |
+
},
|
5096 |
+
{
|
5097 |
+
"epoch": 2.96,
|
5098 |
+
"learning_rate": 1.0263873312040818e-08,
|
5099 |
+
"loss": 0.1698,
|
5100 |
+
"step": 846
|
5101 |
+
},
|
5102 |
+
{
|
5103 |
+
"epoch": 2.96,
|
5104 |
+
"learning_rate": 8.62474035470795e-09,
|
5105 |
+
"loss": 0.182,
|
5106 |
+
"step": 847
|
5107 |
+
},
|
5108 |
+
{
|
5109 |
+
"epoch": 2.97,
|
5110 |
+
"learning_rate": 7.128062446709605e-09,
|
5111 |
+
"loss": 0.1692,
|
5112 |
+
"step": 848
|
5113 |
+
},
|
5114 |
+
{
|
5115 |
+
"epoch": 2.97,
|
5116 |
+
"learning_rate": 5.773860927383856e-09,
|
5117 |
+
"loss": 0.1778,
|
5118 |
+
"step": 849
|
5119 |
+
},
|
5120 |
+
{
|
5121 |
+
"epoch": 2.97,
|
5122 |
+
"learning_rate": 4.562155104665955e-09,
|
5123 |
+
"loss": 0.1786,
|
5124 |
+
"step": 850
|
5125 |
+
},
|
5126 |
+
{
|
5127 |
+
"epoch": 2.98,
|
5128 |
+
"learning_rate": 3.492962254819654e-09,
|
5129 |
+
"loss": 0.1657,
|
5130 |
+
"step": 851
|
5131 |
+
},
|
5132 |
+
{
|
5133 |
+
"epoch": 2.98,
|
5134 |
+
"learning_rate": 2.5662976221840772e-09,
|
5135 |
+
"loss": 0.1741,
|
5136 |
+
"step": 852
|
5137 |
+
},
|
5138 |
+
{
|
5139 |
+
"epoch": 2.98,
|
5140 |
+
"learning_rate": 1.7821744189605583e-09,
|
5141 |
+
"loss": 0.1843,
|
5142 |
+
"step": 853
|
5143 |
+
},
|
5144 |
+
{
|
5145 |
+
"epoch": 2.99,
|
5146 |
+
"learning_rate": 1.1406038250205699e-09,
|
5147 |
+
"loss": 0.1725,
|
5148 |
+
"step": 854
|
5149 |
+
},
|
5150 |
+
{
|
5151 |
+
"epoch": 2.99,
|
5152 |
+
"learning_rate": 6.41594987752514e-10,
|
5153 |
+
"loss": 0.1748,
|
5154 |
+
"step": 855
|
5155 |
+
},
|
5156 |
+
{
|
5157 |
+
"epoch": 2.99,
|
5158 |
+
"learning_rate": 2.851550219240551e-10,
|
5159 |
+
"loss": 0.1902,
|
5160 |
+
"step": 856
|
5161 |
+
},
|
5162 |
+
{
|
5163 |
+
"epoch": 3.0,
|
5164 |
+
"learning_rate": 7.128900958774942e-11,
|
5165 |
+
"loss": 0.1918,
|
5166 |
+
"step": 857
|
5167 |
+
},
|
5168 |
+
{
|
5169 |
+
"epoch": 3.0,
|
5170 |
+
"learning_rate": 0.0,
|
5171 |
+
"loss": 0.1749,
|
5172 |
+
"step": 858
|
5173 |
+
},
|
5174 |
+
{
|
5175 |
+
"epoch": 3.0,
|
5176 |
+
"eval_loss": 0.322337806224823,
|
5177 |
+
"eval_runtime": 42.6722,
|
5178 |
+
"eval_samples_per_second": 17.506,
|
5179 |
+
"eval_steps_per_second": 0.562,
|
5180 |
+
"step": 858
|
5181 |
+
},
|
5182 |
+
{
|
5183 |
+
"epoch": 3.0,
|
5184 |
+
"step": 858,
|
5185 |
+
"total_flos": 2.1832875959648256e+18,
|
5186 |
+
"train_loss": 0.43623367181191075,
|
5187 |
+
"train_runtime": 26982.3823,
|
5188 |
+
"train_samples_per_second": 4.067,
|
5189 |
+
"train_steps_per_second": 0.032
|
5190 |
}
|
5191 |
],
|
5192 |
"max_steps": 858,
|
5193 |
"num_train_epochs": 3,
|
5194 |
+
"total_flos": 2.1832875959648256e+18,
|
5195 |
"trial_name": null,
|
5196 |
"trial_params": null
|
5197 |
}
|