Training in progress, step 360000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbce419654e8e44df2eb4f9682536b881548d65724339ba4c292532cb71337f7
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf27ccfc5825e3575b2b31b80d5eae840d89a2e45fea29d5a456ebd076f43b0c
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:038137270b1b91e0cf2030358ba2f522fddbf586ecfbe80c6c2eb72908a158a2
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -7006,11 +7006,211 @@
|
|
7006 |
"eval_samples_per_second": 1146.703,
|
7007 |
"eval_steps_per_second": 17.972,
|
7008 |
"step": 350000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7009 |
}
|
7010 |
],
|
7011 |
"max_steps": 500000,
|
7012 |
"num_train_epochs": 12,
|
7013 |
-
"total_flos": 1.
|
7014 |
"trial_name": null,
|
7015 |
"trial_params": null
|
7016 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.029978586723768,
|
5 |
+
"global_step": 360000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
7006 |
"eval_samples_per_second": 1146.703,
|
7007 |
"eval_steps_per_second": 17.972,
|
7008 |
"step": 350000
|
7009 |
+
},
|
7010 |
+
{
|
7011 |
+
"epoch": 7.82,
|
7012 |
+
"learning_rate": 7.529152489465592e-05,
|
7013 |
+
"loss": 0.2638,
|
7014 |
+
"step": 350500
|
7015 |
+
},
|
7016 |
+
{
|
7017 |
+
"epoch": 7.83,
|
7018 |
+
"learning_rate": 7.489140439617708e-05,
|
7019 |
+
"loss": 0.2632,
|
7020 |
+
"step": 351000
|
7021 |
+
},
|
7022 |
+
{
|
7023 |
+
"epoch": 7.83,
|
7024 |
+
"eval_loss": 0.2466663420200348,
|
7025 |
+
"eval_runtime": 1.9675,
|
7026 |
+
"eval_samples_per_second": 1167.492,
|
7027 |
+
"eval_steps_per_second": 18.298,
|
7028 |
+
"step": 351000
|
7029 |
+
},
|
7030 |
+
{
|
7031 |
+
"epoch": 7.84,
|
7032 |
+
"learning_rate": 7.449215995246522e-05,
|
7033 |
+
"loss": 0.263,
|
7034 |
+
"step": 351500
|
7035 |
+
},
|
7036 |
+
{
|
7037 |
+
"epoch": 7.85,
|
7038 |
+
"learning_rate": 7.409379592959367e-05,
|
7039 |
+
"loss": 0.2631,
|
7040 |
+
"step": 352000
|
7041 |
+
},
|
7042 |
+
{
|
7043 |
+
"epoch": 7.85,
|
7044 |
+
"eval_loss": 0.24644367396831512,
|
7045 |
+
"eval_runtime": 1.9998,
|
7046 |
+
"eval_samples_per_second": 1148.612,
|
7047 |
+
"eval_steps_per_second": 18.002,
|
7048 |
+
"step": 352000
|
7049 |
+
},
|
7050 |
+
{
|
7051 |
+
"epoch": 7.86,
|
7052 |
+
"learning_rate": 7.369631668400746e-05,
|
7053 |
+
"loss": 0.2632,
|
7054 |
+
"step": 352500
|
7055 |
+
},
|
7056 |
+
{
|
7057 |
+
"epoch": 7.87,
|
7058 |
+
"learning_rate": 7.3299726562476e-05,
|
7059 |
+
"loss": 0.2629,
|
7060 |
+
"step": 353000
|
7061 |
+
},
|
7062 |
+
{
|
7063 |
+
"epoch": 7.87,
|
7064 |
+
"eval_loss": 0.24639040231704712,
|
7065 |
+
"eval_runtime": 1.996,
|
7066 |
+
"eval_samples_per_second": 1150.775,
|
7067 |
+
"eval_steps_per_second": 18.036,
|
7068 |
+
"step": 353000
|
7069 |
+
},
|
7070 |
+
{
|
7071 |
+
"epoch": 7.88,
|
7072 |
+
"learning_rate": 7.290402990204531e-05,
|
7073 |
+
"loss": 0.2628,
|
7074 |
+
"step": 353500
|
7075 |
+
},
|
7076 |
+
{
|
7077 |
+
"epoch": 7.9,
|
7078 |
+
"learning_rate": 7.250923102999073e-05,
|
7079 |
+
"loss": 0.2629,
|
7080 |
+
"step": 354000
|
7081 |
+
},
|
7082 |
+
{
|
7083 |
+
"epoch": 7.9,
|
7084 |
+
"eval_loss": 0.24618536233901978,
|
7085 |
+
"eval_runtime": 1.9784,
|
7086 |
+
"eval_samples_per_second": 1161.045,
|
7087 |
+
"eval_steps_per_second": 18.197,
|
7088 |
+
"step": 354000
|
7089 |
+
},
|
7090 |
+
{
|
7091 |
+
"epoch": 7.91,
|
7092 |
+
"learning_rate": 7.211533426376934e-05,
|
7093 |
+
"loss": 0.2629,
|
7094 |
+
"step": 354500
|
7095 |
+
},
|
7096 |
+
{
|
7097 |
+
"epoch": 7.92,
|
7098 |
+
"learning_rate": 7.172234391097317e-05,
|
7099 |
+
"loss": 0.2625,
|
7100 |
+
"step": 355000
|
7101 |
+
},
|
7102 |
+
{
|
7103 |
+
"epoch": 7.92,
|
7104 |
+
"eval_loss": 0.24593985080718994,
|
7105 |
+
"eval_runtime": 2.059,
|
7106 |
+
"eval_samples_per_second": 1115.596,
|
7107 |
+
"eval_steps_per_second": 17.484,
|
7108 |
+
"step": 355000
|
7109 |
+
},
|
7110 |
+
{
|
7111 |
+
"epoch": 7.93,
|
7112 |
+
"learning_rate": 7.133026426928173e-05,
|
7113 |
+
"loss": 0.2626,
|
7114 |
+
"step": 355500
|
7115 |
+
},
|
7116 |
+
{
|
7117 |
+
"epoch": 7.94,
|
7118 |
+
"learning_rate": 7.093909962641514e-05,
|
7119 |
+
"loss": 0.2626,
|
7120 |
+
"step": 356000
|
7121 |
+
},
|
7122 |
+
{
|
7123 |
+
"epoch": 7.94,
|
7124 |
+
"eval_loss": 0.24694356322288513,
|
7125 |
+
"eval_runtime": 1.9849,
|
7126 |
+
"eval_samples_per_second": 1157.222,
|
7127 |
+
"eval_steps_per_second": 18.137,
|
7128 |
+
"step": 356000
|
7129 |
+
},
|
7130 |
+
{
|
7131 |
+
"epoch": 7.95,
|
7132 |
+
"learning_rate": 7.054885426008737e-05,
|
7133 |
+
"loss": 0.2624,
|
7134 |
+
"step": 356500
|
7135 |
+
},
|
7136 |
+
{
|
7137 |
+
"epoch": 7.96,
|
7138 |
+
"learning_rate": 7.015953243795907e-05,
|
7139 |
+
"loss": 0.2625,
|
7140 |
+
"step": 357000
|
7141 |
+
},
|
7142 |
+
{
|
7143 |
+
"epoch": 7.96,
|
7144 |
+
"eval_loss": 0.24489082396030426,
|
7145 |
+
"eval_runtime": 2.0439,
|
7146 |
+
"eval_samples_per_second": 1123.842,
|
7147 |
+
"eval_steps_per_second": 17.614,
|
7148 |
+
"step": 357000
|
7149 |
+
},
|
7150 |
+
{
|
7151 |
+
"epoch": 7.97,
|
7152 |
+
"learning_rate": 6.97711384175914e-05,
|
7153 |
+
"loss": 0.2623,
|
7154 |
+
"step": 357500
|
7155 |
+
},
|
7156 |
+
{
|
7157 |
+
"epoch": 7.99,
|
7158 |
+
"learning_rate": 6.938367644639911e-05,
|
7159 |
+
"loss": 0.4432,
|
7160 |
+
"step": 358000
|
7161 |
+
},
|
7162 |
+
{
|
7163 |
+
"epoch": 7.99,
|
7164 |
+
"eval_loss": 0.6783205270767212,
|
7165 |
+
"eval_runtime": 2.0172,
|
7166 |
+
"eval_samples_per_second": 1138.683,
|
7167 |
+
"eval_steps_per_second": 17.846,
|
7168 |
+
"step": 358000
|
7169 |
+
},
|
7170 |
+
{
|
7171 |
+
"epoch": 8.0,
|
7172 |
+
"learning_rate": 6.899715076160425e-05,
|
7173 |
+
"loss": 0.6785,
|
7174 |
+
"step": 358500
|
7175 |
+
},
|
7176 |
+
{
|
7177 |
+
"epoch": 8.01,
|
7178 |
+
"learning_rate": 6.861156559018986e-05,
|
7179 |
+
"loss": 0.6774,
|
7180 |
+
"step": 359000
|
7181 |
+
},
|
7182 |
+
{
|
7183 |
+
"epoch": 8.01,
|
7184 |
+
"eval_loss": 0.6766601204872131,
|
7185 |
+
"eval_runtime": 2.0317,
|
7186 |
+
"eval_samples_per_second": 1130.554,
|
7187 |
+
"eval_steps_per_second": 17.719,
|
7188 |
+
"step": 359000
|
7189 |
+
},
|
7190 |
+
{
|
7191 |
+
"epoch": 8.02,
|
7192 |
+
"learning_rate": 6.822692514885346e-05,
|
7193 |
+
"loss": 0.6773,
|
7194 |
+
"step": 359500
|
7195 |
+
},
|
7196 |
+
{
|
7197 |
+
"epoch": 8.03,
|
7198 |
+
"learning_rate": 6.784323364396135e-05,
|
7199 |
+
"loss": 0.6773,
|
7200 |
+
"step": 360000
|
7201 |
+
},
|
7202 |
+
{
|
7203 |
+
"epoch": 8.03,
|
7204 |
+
"eval_loss": 0.6772929430007935,
|
7205 |
+
"eval_runtime": 2.0141,
|
7206 |
+
"eval_samples_per_second": 1140.463,
|
7207 |
+
"eval_steps_per_second": 17.874,
|
7208 |
+
"step": 360000
|
7209 |
}
|
7210 |
],
|
7211 |
"max_steps": 500000,
|
7212 |
"num_train_epochs": 12,
|
7213 |
+
"total_flos": 1.1501309361790678e+22,
|
7214 |
"trial_name": null,
|
7215 |
"trial_params": null
|
7216 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf27ccfc5825e3575b2b31b80d5eae840d89a2e45fea29d5a456ebd076f43b0c
|
3 |
size 102501541
|