bhuvanmdev
commited on
Commit
•
e1a2b04
1
Parent(s):
d325efa
Training in progress, step 2220, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 100697728
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe4cb03d3561140a6dc960340bfc20239d79b812171f8665033b7d5950060c16
|
3 |
size 100697728
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 201541754
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02194e64ab0e46b57c09b90a41edf3c57c35eb6a761d021610ca25fe7722be96
|
3 |
size 201541754
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2417621c0c251f9779f9e28a0066e04b9dea3b724de678a9d2863d36bd10fb07
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dcda9d1b5e596e9a16f8101054c6161f73e1500ea7e6c07e8c2aa6c3a698c4fe
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1767,14 +1767,30 @@
|
|
1767 |
"loss": 0.4195,
|
1768 |
"num_input_tokens_seen": 1488275,
|
1769 |
"step": 2200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1770 |
}
|
1771 |
],
|
1772 |
"logging_steps": 10,
|
1773 |
"max_steps": 2795,
|
1774 |
-
"num_input_tokens_seen":
|
1775 |
"num_train_epochs": 1,
|
1776 |
"save_steps": 20,
|
1777 |
-
"total_flos": 3.
|
1778 |
"train_batch_size": 1,
|
1779 |
"trial_name": null,
|
1780 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7942754919499105,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2220,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1767 |
"loss": 0.4195,
|
1768 |
"num_input_tokens_seen": 1488275,
|
1769 |
"step": 2200
|
1770 |
+
},
|
1771 |
+
{
|
1772 |
+
"epoch": 0.7906976744186046,
|
1773 |
+
"grad_norm": 0.31744202971458435,
|
1774 |
+
"learning_rate": 4.186046511627907e-05,
|
1775 |
+
"loss": 0.3672,
|
1776 |
+
"num_input_tokens_seen": 1496031,
|
1777 |
+
"step": 2210
|
1778 |
+
},
|
1779 |
+
{
|
1780 |
+
"epoch": 0.7942754919499105,
|
1781 |
+
"grad_norm": 0.3008958697319031,
|
1782 |
+
"learning_rate": 4.114490161001789e-05,
|
1783 |
+
"loss": 0.3809,
|
1784 |
+
"num_input_tokens_seen": 1503138,
|
1785 |
+
"step": 2220
|
1786 |
}
|
1787 |
],
|
1788 |
"logging_steps": 10,
|
1789 |
"max_steps": 2795,
|
1790 |
+
"num_input_tokens_seen": 1503138,
|
1791 |
"num_train_epochs": 1,
|
1792 |
"save_steps": 20,
|
1793 |
+
"total_flos": 3.3800265450491904e+16,
|
1794 |
"train_batch_size": 1,
|
1795 |
"trial_name": null,
|
1796 |
"trial_params": null
|