Training in progress, step 830, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c97f065b63924129e4c663d454d849b99fe21c9944aa43d8df3d63a14aac99d
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08f9ba6c10be2f0554bf200b13df21893e6b5e45fcbf74b40417cfc144aea9fe
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8e4587789d75916daa4e015c3e1d3b012bb77220cf17f2065c98733029f501f8
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:904b9d02e88bf351afd7f5ee6e6ab8b6ff7fc6380ccab1f4d6df6695877c352d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5806,6 +5806,41 @@
|
|
5806 |
"learning_rate": 9.013349813901859e-05,
|
5807 |
"loss": 0.6574,
|
5808 |
"step": 825
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5809 |
}
|
5810 |
],
|
5811 |
"logging_steps": 1,
|
@@ -5825,7 +5860,7 @@
|
|
5825 |
"attributes": {}
|
5826 |
}
|
5827 |
},
|
5828 |
-
"total_flos": 9.
|
5829 |
"train_batch_size": 4,
|
5830 |
"trial_name": null,
|
5831 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.074086056292462,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 830,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5806 |
"learning_rate": 9.013349813901859e-05,
|
5807 |
"loss": 0.6574,
|
5808 |
"step": 825
|
5809 |
+
},
|
5810 |
+
{
|
5811 |
+
"epoch": 1.0689097379488839,
|
5812 |
+
"grad_norm": 0.9867215752601624,
|
5813 |
+
"learning_rate": 9.010889500317294e-05,
|
5814 |
+
"loss": 0.8753,
|
5815 |
+
"step": 826
|
5816 |
+
},
|
5817 |
+
{
|
5818 |
+
"epoch": 1.0702038175347783,
|
5819 |
+
"grad_norm": 0.9969760775566101,
|
5820 |
+
"learning_rate": 9.008426459699269e-05,
|
5821 |
+
"loss": 0.878,
|
5822 |
+
"step": 827
|
5823 |
+
},
|
5824 |
+
{
|
5825 |
+
"epoch": 1.071497897120673,
|
5826 |
+
"grad_norm": 0.971978485584259,
|
5827 |
+
"learning_rate": 9.005960693722422e-05,
|
5828 |
+
"loss": 0.8938,
|
5829 |
+
"step": 828
|
5830 |
+
},
|
5831 |
+
{
|
5832 |
+
"epoch": 1.0727919767065675,
|
5833 |
+
"grad_norm": 0.9560033679008484,
|
5834 |
+
"learning_rate": 9.003492204063247e-05,
|
5835 |
+
"loss": 0.7956,
|
5836 |
+
"step": 829
|
5837 |
+
},
|
5838 |
+
{
|
5839 |
+
"epoch": 1.074086056292462,
|
5840 |
+
"grad_norm": 0.8045957088470459,
|
5841 |
+
"learning_rate": 9.001020992400087e-05,
|
5842 |
+
"loss": 0.7999,
|
5843 |
+
"step": 830
|
5844 |
}
|
5845 |
],
|
5846 |
"logging_steps": 1,
|
|
|
5860 |
"attributes": {}
|
5861 |
}
|
5862 |
},
|
5863 |
+
"total_flos": 9.278179814324306e+17,
|
5864 |
"train_batch_size": 4,
|
5865 |
"trial_name": null,
|
5866 |
"trial_params": null
|