Training in progress, step 680, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2aa4cdb306bf7294222f7b3f970f532c7b4cdc22e6cc372d92a68a5832ae3ac9
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3eb132123db17680d8840722840709fbc913fe87e08366059ecb3b185ef779ee
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:477ac7793a58a9aee87e63f0ed0db383646c754c91fbf8cf6726681a3e1bf55c
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59c557a3a19fcbe03c6517f32635519665f582ddfe4481e7465a155c1f572f8b
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4748,6 +4748,41 @@
|
|
4748 |
"learning_rate": 9.350593706906651e-05,
|
4749 |
"loss": 0.787,
|
4750 |
"step": 675
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4751 |
}
|
4752 |
],
|
4753 |
"logging_steps": 1,
|
@@ -4767,7 +4802,7 @@
|
|
4767 |
"attributes": {}
|
4768 |
}
|
4769 |
},
|
4770 |
-
"total_flos": 7.
|
4771 |
"train_batch_size": 4,
|
4772 |
"trial_name": null,
|
4773 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8799741184082821,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 680,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4748 |
"learning_rate": 9.350593706906651e-05,
|
4749 |
"loss": 0.787,
|
4750 |
"step": 675
|
4751 |
+
},
|
4752 |
+
{
|
4753 |
+
"epoch": 0.874797800064704,
|
4754 |
+
"grad_norm": 0.7540378570556641,
|
4755 |
+
"learning_rate": 9.348560326234381e-05,
|
4756 |
+
"loss": 0.8578,
|
4757 |
+
"step": 676
|
4758 |
+
},
|
4759 |
+
{
|
4760 |
+
"epoch": 0.8760918796505985,
|
4761 |
+
"grad_norm": 0.7626041173934937,
|
4762 |
+
"learning_rate": 9.346523988943758e-05,
|
4763 |
+
"loss": 0.9294,
|
4764 |
+
"step": 677
|
4765 |
+
},
|
4766 |
+
{
|
4767 |
+
"epoch": 0.8773859592364931,
|
4768 |
+
"grad_norm": 0.8360442519187927,
|
4769 |
+
"learning_rate": 9.3444846964193e-05,
|
4770 |
+
"loss": 0.8635,
|
4771 |
+
"step": 678
|
4772 |
+
},
|
4773 |
+
{
|
4774 |
+
"epoch": 0.8786800388223875,
|
4775 |
+
"grad_norm": 0.9039386510848999,
|
4776 |
+
"learning_rate": 9.342442450047537e-05,
|
4777 |
+
"loss": 0.83,
|
4778 |
+
"step": 679
|
4779 |
+
},
|
4780 |
+
{
|
4781 |
+
"epoch": 0.8799741184082821,
|
4782 |
+
"grad_norm": 0.7554466724395752,
|
4783 |
+
"learning_rate": 9.340397251217009e-05,
|
4784 |
+
"loss": 0.8103,
|
4785 |
+
"step": 680
|
4786 |
}
|
4787 |
],
|
4788 |
"logging_steps": 1,
|
|
|
4802 |
"attributes": {}
|
4803 |
}
|
4804 |
},
|
4805 |
+
"total_flos": 7.601972767516262e+17,
|
4806 |
"train_batch_size": 4,
|
4807 |
"trial_name": null,
|
4808 |
"trial_params": null
|