Training in progress, step 460000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3851379e290e595ff406c21b8b10ddb1e73359dd0f6752ee66fd50b92159710
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51cc55a03d0db3d8e5de6630971dddba20e9587291496d77623230dc60cf541c
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca511f19607ffe13077a404fa6aa93a3f99da5d803f11fc8cc4ffd982f7eaa96
|
3 |
+
size 14439
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4c5e63367d8a48da7291ac4cad9dbf22e45f23c04ad6dc36fb819eee5567b30
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5ef310867dca4f3b22b044f838f0a52fca0a2ae9be44b0e97a33f54ca14b37c
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07690c863ee7f95b67ca0330c6c558692a4c5296256a3c0c1e6edf6ee11f4a99
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd623ae3bafd7bee294fb51f9ab76259bcc1a1e3cb80be656ec13fa7aea6663f
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18f7a345bdac720f75c3e4ebf8a9d6d8895e230a0405992893aeb1c567a9dd75
|
3 |
+
size 14439
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:099663ce4dbefceb2b014dd475ab003207ed8b9ddbfafed2491187b9c10f927a
|
3 |
+
size 14439
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4435aedaa43e7ef631652a5bc79634e81959ca100d2c5dc8b85db021834925b4
|
3 |
+
size 14439
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d60fd80961b777bf4901f5c7189278f8f31f61a50c51a19e170f6a1919a5ce33
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -9006,11 +9006,211 @@
|
|
9006 |
"eval_samples_per_second": 1989.909,
|
9007 |
"eval_steps_per_second": 31.839,
|
9008 |
"step": 450000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9009 |
}
|
9010 |
],
|
9011 |
"max_steps": 500000,
|
9012 |
"num_train_epochs": 16,
|
9013 |
-
"total_flos": 1.
|
9014 |
"trial_name": null,
|
9015 |
"trial_params": null
|
9016 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 14.093569043169214,
|
5 |
+
"global_step": 460000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
9006 |
"eval_samples_per_second": 1989.909,
|
9007 |
"eval_steps_per_second": 31.839,
|
9008 |
"step": 450000
|
9009 |
+
},
|
9010 |
+
{
|
9011 |
+
"epoch": 13.8,
|
9012 |
+
"learning_rate": 1.770155470293445e-05,
|
9013 |
+
"loss": 0.3141,
|
9014 |
+
"step": 450500
|
9015 |
+
},
|
9016 |
+
{
|
9017 |
+
"epoch": 13.82,
|
9018 |
+
"learning_rate": 1.7548110926261522e-05,
|
9019 |
+
"loss": 0.3144,
|
9020 |
+
"step": 451000
|
9021 |
+
},
|
9022 |
+
{
|
9023 |
+
"epoch": 13.82,
|
9024 |
+
"eval_loss": 0.7752067446708679,
|
9025 |
+
"eval_runtime": 0.5146,
|
9026 |
+
"eval_samples_per_second": 1943.251,
|
9027 |
+
"eval_steps_per_second": 31.092,
|
9028 |
+
"step": 451000
|
9029 |
+
},
|
9030 |
+
{
|
9031 |
+
"epoch": 13.83,
|
9032 |
+
"learning_rate": 1.7396170301425777e-05,
|
9033 |
+
"loss": 0.3141,
|
9034 |
+
"step": 451500
|
9035 |
+
},
|
9036 |
+
{
|
9037 |
+
"epoch": 13.85,
|
9038 |
+
"learning_rate": 1.7245734490025544e-05,
|
9039 |
+
"loss": 0.3142,
|
9040 |
+
"step": 452000
|
9041 |
+
},
|
9042 |
+
{
|
9043 |
+
"epoch": 13.85,
|
9044 |
+
"eval_loss": 0.7712000012397766,
|
9045 |
+
"eval_runtime": 0.5015,
|
9046 |
+
"eval_samples_per_second": 1994.171,
|
9047 |
+
"eval_steps_per_second": 31.907,
|
9048 |
+
"step": 452000
|
9049 |
+
},
|
9050 |
+
{
|
9051 |
+
"epoch": 13.86,
|
9052 |
+
"learning_rate": 1.7096805137202738e-05,
|
9053 |
+
"loss": 0.3139,
|
9054 |
+
"step": 452500
|
9055 |
+
},
|
9056 |
+
{
|
9057 |
+
"epoch": 13.88,
|
9058 |
+
"learning_rate": 1.6949383871624917e-05,
|
9059 |
+
"loss": 0.3139,
|
9060 |
+
"step": 453000
|
9061 |
+
},
|
9062 |
+
{
|
9063 |
+
"epoch": 13.88,
|
9064 |
+
"eval_loss": 0.7725095152854919,
|
9065 |
+
"eval_runtime": 0.5022,
|
9066 |
+
"eval_samples_per_second": 1991.07,
|
9067 |
+
"eval_steps_per_second": 31.857,
|
9068 |
+
"step": 453000
|
9069 |
+
},
|
9070 |
+
{
|
9071 |
+
"epoch": 13.89,
|
9072 |
+
"learning_rate": 1.6803472305467368e-05,
|
9073 |
+
"loss": 0.3137,
|
9074 |
+
"step": 453500
|
9075 |
+
},
|
9076 |
+
{
|
9077 |
+
"epoch": 13.91,
|
9078 |
+
"learning_rate": 1.665907203439568e-05,
|
9079 |
+
"loss": 0.3139,
|
9080 |
+
"step": 454000
|
9081 |
+
},
|
9082 |
+
{
|
9083 |
+
"epoch": 13.91,
|
9084 |
+
"eval_loss": 0.7726877927780151,
|
9085 |
+
"eval_runtime": 0.5045,
|
9086 |
+
"eval_samples_per_second": 1982.357,
|
9087 |
+
"eval_steps_per_second": 31.718,
|
9088 |
+
"step": 454000
|
9089 |
+
},
|
9090 |
+
{
|
9091 |
+
"epoch": 13.93,
|
9092 |
+
"learning_rate": 1.6516184637548058e-05,
|
9093 |
+
"loss": 0.3138,
|
9094 |
+
"step": 454500
|
9095 |
+
},
|
9096 |
+
{
|
9097 |
+
"epoch": 13.94,
|
9098 |
+
"learning_rate": 1.6374811677518142e-05,
|
9099 |
+
"loss": 0.314,
|
9100 |
+
"step": 455000
|
9101 |
+
},
|
9102 |
+
{
|
9103 |
+
"epoch": 13.94,
|
9104 |
+
"eval_loss": 0.7759785056114197,
|
9105 |
+
"eval_runtime": 0.5189,
|
9106 |
+
"eval_samples_per_second": 1927.23,
|
9107 |
+
"eval_steps_per_second": 30.836,
|
9108 |
+
"step": 455000
|
9109 |
+
},
|
9110 |
+
{
|
9111 |
+
"epoch": 13.96,
|
9112 |
+
"learning_rate": 1.6234954700338025e-05,
|
9113 |
+
"loss": 0.3135,
|
9114 |
+
"step": 455500
|
9115 |
+
},
|
9116 |
+
{
|
9117 |
+
"epoch": 13.97,
|
9118 |
+
"learning_rate": 1.6096615235461148e-05,
|
9119 |
+
"loss": 0.3135,
|
9120 |
+
"step": 456000
|
9121 |
+
},
|
9122 |
+
{
|
9123 |
+
"epoch": 13.97,
|
9124 |
+
"eval_loss": 0.773828387260437,
|
9125 |
+
"eval_runtime": 0.5012,
|
9126 |
+
"eval_samples_per_second": 1995.149,
|
9127 |
+
"eval_steps_per_second": 31.922,
|
9128 |
+
"step": 456000
|
9129 |
+
},
|
9130 |
+
{
|
9131 |
+
"epoch": 13.99,
|
9132 |
+
"learning_rate": 1.59597947957458e-05,
|
9133 |
+
"loss": 0.3138,
|
9134 |
+
"step": 456500
|
9135 |
+
},
|
9136 |
+
{
|
9137 |
+
"epoch": 14.0,
|
9138 |
+
"learning_rate": 1.5824494877438344e-05,
|
9139 |
+
"loss": 0.3137,
|
9140 |
+
"step": 457000
|
9141 |
+
},
|
9142 |
+
{
|
9143 |
+
"epoch": 14.0,
|
9144 |
+
"eval_loss": 0.7719975709915161,
|
9145 |
+
"eval_runtime": 0.5107,
|
9146 |
+
"eval_samples_per_second": 1958.237,
|
9147 |
+
"eval_steps_per_second": 31.332,
|
9148 |
+
"step": 457000
|
9149 |
+
},
|
9150 |
+
{
|
9151 |
+
"epoch": 14.02,
|
9152 |
+
"learning_rate": 1.569071696015702e-05,
|
9153 |
+
"loss": 0.3135,
|
9154 |
+
"step": 457500
|
9155 |
+
},
|
9156 |
+
{
|
9157 |
+
"epoch": 14.03,
|
9158 |
+
"learning_rate": 1.555846250687569e-05,
|
9159 |
+
"loss": 0.3138,
|
9160 |
+
"step": 458000
|
9161 |
+
},
|
9162 |
+
{
|
9163 |
+
"epoch": 14.03,
|
9164 |
+
"eval_loss": 0.7755674123764038,
|
9165 |
+
"eval_runtime": 0.5013,
|
9166 |
+
"eval_samples_per_second": 1994.642,
|
9167 |
+
"eval_steps_per_second": 31.914,
|
9168 |
+
"step": 458000
|
9169 |
+
},
|
9170 |
+
{
|
9171 |
+
"epoch": 14.05,
|
9172 |
+
"learning_rate": 1.542773296390789e-05,
|
9173 |
+
"loss": 0.3134,
|
9174 |
+
"step": 458500
|
9175 |
+
},
|
9176 |
+
{
|
9177 |
+
"epoch": 14.06,
|
9178 |
+
"learning_rate": 1.5298529760890945e-05,
|
9179 |
+
"loss": 0.3135,
|
9180 |
+
"step": 459000
|
9181 |
+
},
|
9182 |
+
{
|
9183 |
+
"epoch": 14.06,
|
9184 |
+
"eval_loss": 0.7749778032302856,
|
9185 |
+
"eval_runtime": 0.5253,
|
9186 |
+
"eval_samples_per_second": 1903.509,
|
9187 |
+
"eval_steps_per_second": 30.456,
|
9188 |
+
"step": 459000
|
9189 |
+
},
|
9190 |
+
{
|
9191 |
+
"epoch": 14.08,
|
9192 |
+
"learning_rate": 1.5170854310770376e-05,
|
9193 |
+
"loss": 0.3136,
|
9194 |
+
"step": 459500
|
9195 |
+
},
|
9196 |
+
{
|
9197 |
+
"epoch": 14.09,
|
9198 |
+
"learning_rate": 1.5044708009784457e-05,
|
9199 |
+
"loss": 0.3134,
|
9200 |
+
"step": 460000
|
9201 |
+
},
|
9202 |
+
{
|
9203 |
+
"epoch": 14.09,
|
9204 |
+
"eval_loss": 0.7761635184288025,
|
9205 |
+
"eval_runtime": 0.5003,
|
9206 |
+
"eval_samples_per_second": 1998.696,
|
9207 |
+
"eval_steps_per_second": 31.979,
|
9208 |
+
"step": 460000
|
9209 |
}
|
9210 |
],
|
9211 |
"max_steps": 500000,
|
9212 |
"num_train_epochs": 16,
|
9213 |
+
"total_flos": 1.4696296552299338e+22,
|
9214 |
"trial_name": null,
|
9215 |
"trial_params": null
|
9216 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51cc55a03d0db3d8e5de6630971dddba20e9587291496d77623230dc60cf541c
|
3 |
size 102501541
|