Training in progress, step 225, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18fdbc0c4f0a856bb851756d41e848c7f8da8cdd3c8bb06d2d6bed157cd6e47f
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846260
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:593b4b5069ea3e92bd5dd2bd90ebd76f7ec8bccb3efe79a2301c144ab29a07b4
|
3 |
size 49846260
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74a5cf5e988307b7bc5ac99195c5626f83509282efb2453403481b5a9c2074a3
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70b35dc00d38fbc94393b01bb750de67a9e73be3bb058ea334b16afbeab55729
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1555,6 +1555,41 @@
|
|
1555 |
"learning_rate": 9.950957071399357e-05,
|
1556 |
"loss": 0.8541,
|
1557 |
"step": 220
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1558 |
}
|
1559 |
],
|
1560 |
"logging_steps": 1,
|
@@ -1574,7 +1609,7 @@
|
|
1574 |
"attributes": {}
|
1575 |
}
|
1576 |
},
|
1577 |
-
"total_flos": 2.
|
1578 |
"train_batch_size": 4,
|
1579 |
"trial_name": null,
|
1580 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.2911679068262698,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 225,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1555 |
"learning_rate": 9.950957071399357e-05,
|
1556 |
"loss": 0.8541,
|
1557 |
"step": 220
|
1558 |
+
},
|
1559 |
+
{
|
1560 |
+
"epoch": 0.2859915884826917,
|
1561 |
+
"grad_norm": 0.8993477821350098,
|
1562 |
+
"learning_rate": 9.950379357812543e-05,
|
1563 |
+
"loss": 1.0253,
|
1564 |
+
"step": 221
|
1565 |
+
},
|
1566 |
+
{
|
1567 |
+
"epoch": 0.28728566806858624,
|
1568 |
+
"grad_norm": 1.073880910873413,
|
1569 |
+
"learning_rate": 9.949798278426158e-05,
|
1570 |
+
"loss": 1.115,
|
1571 |
+
"step": 222
|
1572 |
+
},
|
1573 |
+
{
|
1574 |
+
"epoch": 0.28857974765448074,
|
1575 |
+
"grad_norm": 0.7941976189613342,
|
1576 |
+
"learning_rate": 9.949213833635285e-05,
|
1577 |
+
"loss": 0.9398,
|
1578 |
+
"step": 223
|
1579 |
+
},
|
1580 |
+
{
|
1581 |
+
"epoch": 0.2898738272403753,
|
1582 |
+
"grad_norm": 0.798089325428009,
|
1583 |
+
"learning_rate": 9.948626023837291e-05,
|
1584 |
+
"loss": 0.8523,
|
1585 |
+
"step": 224
|
1586 |
+
},
|
1587 |
+
{
|
1588 |
+
"epoch": 0.2911679068262698,
|
1589 |
+
"grad_norm": 1.0251280069351196,
|
1590 |
+
"learning_rate": 9.948034849431831e-05,
|
1591 |
+
"loss": 0.939,
|
1592 |
+
"step": 225
|
1593 |
}
|
1594 |
],
|
1595 |
"logging_steps": 1,
|
|
|
1609 |
"attributes": {}
|
1610 |
}
|
1611 |
},
|
1612 |
+
"total_flos": 2.515358636310528e+17,
|
1613 |
"train_batch_size": 4,
|
1614 |
"trial_name": null,
|
1615 |
"trial_params": null
|