Training in progress, step 360000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +2 -2
- last-checkpoint/rng_state_1.pth +2 -2
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +2 -2
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e16c64f372a1e3d1a1bc418a8e836e1cdfb665e54de868c2b11e7665a8124bd
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc2fe2e5c738c8106278780fab9f7f87cb208c3a8d2ade89df93aa0343c89bdb
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:005b2e75d9554e018ed97e7633dc38306c4b6264c09e33e752be796ffbb52bec
|
3 |
+
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f90d85a554f4240712d12f4bc1ca42d7122c1ea252e7027b0c918e99e4e1de85
|
3 |
+
size 14439
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:795aa2456aa093dc231a0ad24cc817827fd817979cb85490c41b919c7e2ac93d
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3a87c23715b7e025f85b63ac84fed0695e2db4c59ea40d605d467271ee7eaff
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2870379f6b63b47a9122657f592b0d575a5671da1a1fb7b494f10475da4ee08
|
3 |
+
size 14439
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccc684958440988665bb83e21073c7e935f44d96d06218e7d486fc15f417721c
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7790e1f5d1d539cc24a5c7f36951077137e9c4173691b395bbb8021eb1098267
|
3 |
+
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:296d7ff2fcb06c6f8604bd8a18f2173bb33ea1cf17b1b7e10c614ffd53051e05
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -7006,11 +7006,211 @@
|
|
7006 |
"eval_samples_per_second": 1982.304,
|
7007 |
"eval_steps_per_second": 31.717,
|
7008 |
"step": 350000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7009 |
}
|
7010 |
],
|
7011 |
"max_steps": 500000,
|
7012 |
"num_train_epochs": 16,
|
7013 |
-
"total_flos": 1.
|
7014 |
"trial_name": null,
|
7015 |
"trial_params": null
|
7016 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.029749685958516,
|
5 |
+
"global_step": 360000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
7006 |
"eval_samples_per_second": 1982.304,
|
7007 |
"eval_steps_per_second": 31.717,
|
7008 |
"step": 350000
|
7009 |
+
},
|
7010 |
+
{
|
7011 |
+
"epoch": 10.74,
|
7012 |
+
"learning_rate": 7.529152489465592e-05,
|
7013 |
+
"loss": 0.3237,
|
7014 |
+
"step": 350500
|
7015 |
+
},
|
7016 |
+
{
|
7017 |
+
"epoch": 10.75,
|
7018 |
+
"learning_rate": 7.489140439617708e-05,
|
7019 |
+
"loss": 0.3239,
|
7020 |
+
"step": 351000
|
7021 |
+
},
|
7022 |
+
{
|
7023 |
+
"epoch": 10.75,
|
7024 |
+
"eval_loss": 0.7802942991256714,
|
7025 |
+
"eval_runtime": 0.4971,
|
7026 |
+
"eval_samples_per_second": 2011.848,
|
7027 |
+
"eval_steps_per_second": 32.19,
|
7028 |
+
"step": 351000
|
7029 |
+
},
|
7030 |
+
{
|
7031 |
+
"epoch": 10.77,
|
7032 |
+
"learning_rate": 7.449215995246522e-05,
|
7033 |
+
"loss": 0.3236,
|
7034 |
+
"step": 351500
|
7035 |
+
},
|
7036 |
+
{
|
7037 |
+
"epoch": 10.78,
|
7038 |
+
"learning_rate": 7.409379592959367e-05,
|
7039 |
+
"loss": 0.3237,
|
7040 |
+
"step": 352000
|
7041 |
+
},
|
7042 |
+
{
|
7043 |
+
"epoch": 10.78,
|
7044 |
+
"eval_loss": 0.7798171043395996,
|
7045 |
+
"eval_runtime": 0.501,
|
7046 |
+
"eval_samples_per_second": 1995.947,
|
7047 |
+
"eval_steps_per_second": 31.935,
|
7048 |
+
"step": 352000
|
7049 |
+
},
|
7050 |
+
{
|
7051 |
+
"epoch": 10.8,
|
7052 |
+
"learning_rate": 7.369631668400746e-05,
|
7053 |
+
"loss": 0.3234,
|
7054 |
+
"step": 352500
|
7055 |
+
},
|
7056 |
+
{
|
7057 |
+
"epoch": 10.82,
|
7058 |
+
"learning_rate": 7.3299726562476e-05,
|
7059 |
+
"loss": 0.3231,
|
7060 |
+
"step": 353000
|
7061 |
+
},
|
7062 |
+
{
|
7063 |
+
"epoch": 10.82,
|
7064 |
+
"eval_loss": 0.7781672477722168,
|
7065 |
+
"eval_runtime": 0.5053,
|
7066 |
+
"eval_samples_per_second": 1979.072,
|
7067 |
+
"eval_steps_per_second": 31.665,
|
7068 |
+
"step": 353000
|
7069 |
+
},
|
7070 |
+
{
|
7071 |
+
"epoch": 10.83,
|
7072 |
+
"learning_rate": 7.290402990204531e-05,
|
7073 |
+
"loss": 0.3233,
|
7074 |
+
"step": 353500
|
7075 |
+
},
|
7076 |
+
{
|
7077 |
+
"epoch": 10.85,
|
7078 |
+
"learning_rate": 7.250923102999073e-05,
|
7079 |
+
"loss": 0.3234,
|
7080 |
+
"step": 354000
|
7081 |
+
},
|
7082 |
+
{
|
7083 |
+
"epoch": 10.85,
|
7084 |
+
"eval_loss": 0.7746726870536804,
|
7085 |
+
"eval_runtime": 0.5021,
|
7086 |
+
"eval_samples_per_second": 1991.707,
|
7087 |
+
"eval_steps_per_second": 31.867,
|
7088 |
+
"step": 354000
|
7089 |
+
},
|
7090 |
+
{
|
7091 |
+
"epoch": 10.86,
|
7092 |
+
"learning_rate": 7.211533426376934e-05,
|
7093 |
+
"loss": 0.3234,
|
7094 |
+
"step": 354500
|
7095 |
+
},
|
7096 |
+
{
|
7097 |
+
"epoch": 10.88,
|
7098 |
+
"learning_rate": 7.172234391097317e-05,
|
7099 |
+
"loss": 0.3232,
|
7100 |
+
"step": 355000
|
7101 |
+
},
|
7102 |
+
{
|
7103 |
+
"epoch": 10.88,
|
7104 |
+
"eval_loss": 0.7761996984481812,
|
7105 |
+
"eval_runtime": 0.5166,
|
7106 |
+
"eval_samples_per_second": 1935.585,
|
7107 |
+
"eval_steps_per_second": 30.969,
|
7108 |
+
"step": 355000
|
7109 |
+
},
|
7110 |
+
{
|
7111 |
+
"epoch": 10.89,
|
7112 |
+
"learning_rate": 7.133026426928173e-05,
|
7113 |
+
"loss": 0.3231,
|
7114 |
+
"step": 355500
|
7115 |
+
},
|
7116 |
+
{
|
7117 |
+
"epoch": 10.91,
|
7118 |
+
"learning_rate": 7.093909962641514e-05,
|
7119 |
+
"loss": 0.3254,
|
7120 |
+
"step": 356000
|
7121 |
+
},
|
7122 |
+
{
|
7123 |
+
"epoch": 10.91,
|
7124 |
+
"eval_loss": 0.7848865985870361,
|
7125 |
+
"eval_runtime": 0.5114,
|
7126 |
+
"eval_samples_per_second": 1955.496,
|
7127 |
+
"eval_steps_per_second": 31.288,
|
7128 |
+
"step": 356000
|
7129 |
+
},
|
7130 |
+
{
|
7131 |
+
"epoch": 10.92,
|
7132 |
+
"learning_rate": 7.054885426008737e-05,
|
7133 |
+
"loss": 0.3229,
|
7134 |
+
"step": 356500
|
7135 |
+
},
|
7136 |
+
{
|
7137 |
+
"epoch": 10.94,
|
7138 |
+
"learning_rate": 7.015953243795907e-05,
|
7139 |
+
"loss": 0.3229,
|
7140 |
+
"step": 357000
|
7141 |
+
},
|
7142 |
+
{
|
7143 |
+
"epoch": 10.94,
|
7144 |
+
"eval_loss": 0.7789940237998962,
|
7145 |
+
"eval_runtime": 0.5055,
|
7146 |
+
"eval_samples_per_second": 1978.178,
|
7147 |
+
"eval_steps_per_second": 31.651,
|
7148 |
+
"step": 357000
|
7149 |
+
},
|
7150 |
+
{
|
7151 |
+
"epoch": 10.95,
|
7152 |
+
"learning_rate": 6.97711384175914e-05,
|
7153 |
+
"loss": 0.3244,
|
7154 |
+
"step": 357500
|
7155 |
+
},
|
7156 |
+
{
|
7157 |
+
"epoch": 10.97,
|
7158 |
+
"learning_rate": 6.938367644639911e-05,
|
7159 |
+
"loss": 0.3227,
|
7160 |
+
"step": 358000
|
7161 |
+
},
|
7162 |
+
{
|
7163 |
+
"epoch": 10.97,
|
7164 |
+
"eval_loss": 0.7808487415313721,
|
7165 |
+
"eval_runtime": 0.5081,
|
7166 |
+
"eval_samples_per_second": 1968.006,
|
7167 |
+
"eval_steps_per_second": 31.488,
|
7168 |
+
"step": 358000
|
7169 |
+
},
|
7170 |
+
{
|
7171 |
+
"epoch": 10.98,
|
7172 |
+
"learning_rate": 6.899715076160425e-05,
|
7173 |
+
"loss": 0.3226,
|
7174 |
+
"step": 358500
|
7175 |
+
},
|
7176 |
+
{
|
7177 |
+
"epoch": 11.0,
|
7178 |
+
"learning_rate": 6.861156559018986e-05,
|
7179 |
+
"loss": 0.323,
|
7180 |
+
"step": 359000
|
7181 |
+
},
|
7182 |
+
{
|
7183 |
+
"epoch": 11.0,
|
7184 |
+
"eval_loss": 0.7747591137886047,
|
7185 |
+
"eval_runtime": 0.493,
|
7186 |
+
"eval_samples_per_second": 2028.496,
|
7187 |
+
"eval_steps_per_second": 32.456,
|
7188 |
+
"step": 359000
|
7189 |
+
},
|
7190 |
+
{
|
7191 |
+
"epoch": 11.01,
|
7192 |
+
"learning_rate": 6.822692514885346e-05,
|
7193 |
+
"loss": 0.3225,
|
7194 |
+
"step": 359500
|
7195 |
+
},
|
7196 |
+
{
|
7197 |
+
"epoch": 11.03,
|
7198 |
+
"learning_rate": 6.784323364396135e-05,
|
7199 |
+
"loss": 0.3224,
|
7200 |
+
"step": 360000
|
7201 |
+
},
|
7202 |
+
{
|
7203 |
+
"epoch": 11.03,
|
7204 |
+
"eval_loss": 0.7760407328605652,
|
7205 |
+
"eval_runtime": 0.5204,
|
7206 |
+
"eval_samples_per_second": 1921.599,
|
7207 |
+
"eval_steps_per_second": 30.746,
|
7208 |
+
"step": 360000
|
7209 |
}
|
7210 |
],
|
7211 |
"max_steps": 500000,
|
7212 |
"num_train_epochs": 16,
|
7213 |
+
"total_flos": 1.1501449128410266e+22,
|
7214 |
"trial_name": null,
|
7215 |
"trial_params": null
|
7216 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc2fe2e5c738c8106278780fab9f7f87cb208c3a8d2ade89df93aa0343c89bdb
|
3 |
size 102501541
|