Training in progress, step 840, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 97307544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84ebecbf2ca75ad57ca7b58af0e968ef2090cf0cbf2d9de997a064b018738921
|
3 |
size 97307544
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 49846644
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b00e7236a16f54612acd3cd1712b00ba0e0a5ebd702d01df069c98828b504973
|
3 |
size 49846644
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9305dad521301e27731e7687a716b13bb515e0747793e6ae9190a1ac95e77259
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab189b99109032b154209762c91c1d401e345f9b0f9b7ebe3392ef6b914baeeb
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 386,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5876,6 +5876,41 @@
|
|
5876 |
"learning_rate": 8.988624162896057e-05,
|
5877 |
"loss": 0.8475,
|
5878 |
"step": 835
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5879 |
}
|
5880 |
],
|
5881 |
"logging_steps": 1,
|
@@ -5895,7 +5930,7 @@
|
|
5895 |
"attributes": {}
|
5896 |
}
|
5897 |
},
|
5898 |
-
"total_flos": 9.
|
5899 |
"train_batch_size": 4,
|
5900 |
"trial_name": null,
|
5901 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0870268521514073,
|
5 |
"eval_steps": 386,
|
6 |
+
"global_step": 840,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5876 |
"learning_rate": 8.988624162896057e-05,
|
5877 |
"loss": 0.8475,
|
5878 |
"step": 835
|
5879 |
+
},
|
5880 |
+
{
|
5881 |
+
"epoch": 1.0818505338078293,
|
5882 |
+
"grad_norm": 0.9945587515830994,
|
5883 |
+
"learning_rate": 8.986136654557784e-05,
|
5884 |
+
"loss": 0.8112,
|
5885 |
+
"step": 836
|
5886 |
+
},
|
5887 |
+
{
|
5888 |
+
"epoch": 1.0831446133937237,
|
5889 |
+
"grad_norm": 0.8039492964744568,
|
5890 |
+
"learning_rate": 8.983646436015692e-05,
|
5891 |
+
"loss": 0.6536,
|
5892 |
+
"step": 837
|
5893 |
+
},
|
5894 |
+
{
|
5895 |
+
"epoch": 1.0844386929796181,
|
5896 |
+
"grad_norm": 0.8176629543304443,
|
5897 |
+
"learning_rate": 8.981153508962899e-05,
|
5898 |
+
"loss": 0.7452,
|
5899 |
+
"step": 838
|
5900 |
+
},
|
5901 |
+
{
|
5902 |
+
"epoch": 1.0857327725655128,
|
5903 |
+
"grad_norm": 0.945672869682312,
|
5904 |
+
"learning_rate": 8.978657875094368e-05,
|
5905 |
+
"loss": 0.7925,
|
5906 |
+
"step": 839
|
5907 |
+
},
|
5908 |
+
{
|
5909 |
+
"epoch": 1.0870268521514073,
|
5910 |
+
"grad_norm": 0.9550118446350098,
|
5911 |
+
"learning_rate": 8.976159536106894e-05,
|
5912 |
+
"loss": 0.8496,
|
5913 |
+
"step": 840
|
5914 |
}
|
5915 |
],
|
5916 |
"logging_steps": 1,
|
|
|
5930 |
"attributes": {}
|
5931 |
}
|
5932 |
},
|
5933 |
+
"total_flos": 9.389973531493663e+17,
|
5934 |
"train_batch_size": 4,
|
5935 |
"trial_name": null,
|
5936 |
"trial_params": null
|