Training in progress, step 870, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91bc801b9c6caba8d2de27b5b887e4de8b7defb87eb66b3cacc1b8ec971f50b0
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f71c68db9cf60853128c313c3b9d8c9cfd204245155f1a9dec9e7c0c970a90f1
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:213b6578b38160a37abb0e775f318cf97620a9fbc0b45491cba9a0e1b773ff55
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1fea4e1f6790394bab44f5b295cba869de89257525462490220ac4d8ec4fec6
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6086,6 +6086,41 @@
|
|
6086 |
"learning_rate": 8.912826898448561e-05,
|
6087 |
"loss": 0.6884,
|
6088 |
"step": 865
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6089 |
}
|
6090 |
],
|
6091 |
"logging_steps": 1,
|
@@ -6105,7 +6140,7 @@
|
|
6105 |
"attributes": {}
|
6106 |
}
|
6107 |
},
|
6108 |
-
"total_flos": 9.
|
6109 |
"train_batch_size": 4,
|
6110 |
"trial_name": null,
|
6111 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.1258492397282434,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 870,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6086 |
"learning_rate": 8.912826898448561e-05,
|
6087 |
"loss": 0.6884,
|
6088 |
"step": 865
|
6089 |
+
},
|
6090 |
+
{
|
6091 |
+
"epoch": 1.1206729213846651,
|
6092 |
+
"grad_norm": 0.8322413563728333,
|
6093 |
+
"learning_rate": 8.91025882794697e-05,
|
6094 |
+
"loss": 0.5871,
|
6095 |
+
"step": 866
|
6096 |
+
},
|
6097 |
+
{
|
6098 |
+
"epoch": 1.1219670009705598,
|
6099 |
+
"grad_norm": 0.9673650860786438,
|
6100 |
+
"learning_rate": 8.907688098831454e-05,
|
6101 |
+
"loss": 0.9124,
|
6102 |
+
"step": 867
|
6103 |
+
},
|
6104 |
+
{
|
6105 |
+
"epoch": 1.1232610805564542,
|
6106 |
+
"grad_norm": 1.0415488481521606,
|
6107 |
+
"learning_rate": 8.905114712849875e-05,
|
6108 |
+
"loss": 0.7218,
|
6109 |
+
"step": 868
|
6110 |
+
},
|
6111 |
+
{
|
6112 |
+
"epoch": 1.1245551601423487,
|
6113 |
+
"grad_norm": 0.8599552512168884,
|
6114 |
+
"learning_rate": 8.902538671751897e-05,
|
6115 |
+
"loss": 0.8724,
|
6116 |
+
"step": 869
|
6117 |
+
},
|
6118 |
+
{
|
6119 |
+
"epoch": 1.1258492397282434,
|
6120 |
+
"grad_norm": 0.8935772180557251,
|
6121 |
+
"learning_rate": 8.899959977288987e-05,
|
6122 |
+
"loss": 0.7266,
|
6123 |
+
"step": 870
|
6124 |
}
|
6125 |
],
|
6126 |
"logging_steps": 1,
|
|
|
6140 |
"attributes": {}
|
6141 |
}
|
6142 |
},
|
6143 |
+
"total_flos": 9.725354683001733e+17,
|
6144 |
"train_batch_size": 4,
|
6145 |
"trial_name": null,
|
6146 |
"trial_params": null
|