Training in progress, step 2240, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100697728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0632b909fd1c9aca69067d0bed149621d32fc888cd885edfe8a9c9167000cd0
|
3 |
size 100697728
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201541754
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12617d04abf8d5849ea18ccd1caeaec1b356dd520401e75c5bd06ee3fc8e8aa6
|
3 |
size 201541754
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b4460282b72eb6f8903bafdf89fcbbe259019be8c29ef42adde2e2a7ce8a185
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1f05e22fba1c2918a10ec5938a0705454f8a1b28f2198ed555d6f8a3e293559a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1783,14 +1783,30 @@
|
|
1783 |
"loss": 0.3809,
|
1784 |
"num_input_tokens_seen": 1503138,
|
1785 |
"step": 2220
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1786 |
}
|
1787 |
],
|
1788 |
"logging_steps": 10,
|
1789 |
"max_steps": 2795,
|
1790 |
-
"num_input_tokens_seen":
|
1791 |
"num_train_epochs": 1,
|
1792 |
"save_steps": 20,
|
1793 |
-
"total_flos": 3.
|
1794 |
"train_batch_size": 1,
|
1795 |
"trial_name": null,
|
1796 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8014311270125224,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2240,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1783 |
"loss": 0.3809,
|
1784 |
"num_input_tokens_seen": 1503138,
|
1785 |
"step": 2220
|
1786 |
+
},
|
1787 |
+
{
|
1788 |
+
"epoch": 0.7978533094812165,
|
1789 |
+
"grad_norm": 0.39187124371528625,
|
1790 |
+
"learning_rate": 4.042933810375671e-05,
|
1791 |
+
"loss": 0.3779,
|
1792 |
+
"num_input_tokens_seen": 1509279,
|
1793 |
+
"step": 2230
|
1794 |
+
},
|
1795 |
+
{
|
1796 |
+
"epoch": 0.8014311270125224,
|
1797 |
+
"grad_norm": 0.3644355833530426,
|
1798 |
+
"learning_rate": 3.971377459749553e-05,
|
1799 |
+
"loss": 0.3883,
|
1800 |
+
"num_input_tokens_seen": 1515223,
|
1801 |
+
"step": 2240
|
1802 |
}
|
1803 |
],
|
1804 |
"logging_steps": 10,
|
1805 |
"max_steps": 2795,
|
1806 |
+
"num_input_tokens_seen": 1515223,
|
1807 |
"num_train_epochs": 1,
|
1808 |
"save_steps": 20,
|
1809 |
+
"total_flos": 3.4072014423619584e+16,
|
1810 |
"train_batch_size": 1,
|
1811 |
"trial_name": null,
|
1812 |
"trial_params": null
|