marinone94
commited on
Commit
β’
7ac2142
1
Parent(s):
881b745
Training in progress, step 3300
Browse files- {checkpoint-2900 β checkpoint-3200}/config.json +0 -0
- {checkpoint-2900 β checkpoint-3200}/optimizer.pt +1 -1
- {checkpoint-2900 β checkpoint-3200}/preprocessor_config.json +0 -0
- {checkpoint-3000 β checkpoint-3200}/pytorch_model.bin +1 -1
- {checkpoint-3000 β checkpoint-3200}/rng_state.pth +2 -2
- {checkpoint-3000 β checkpoint-3200}/scaler.pt +1 -1
- {checkpoint-2900 β checkpoint-3200}/scheduler.pt +1 -1
- {checkpoint-3000 β checkpoint-3200}/trainer_state.json +81 -3
- {checkpoint-2900 β checkpoint-3200}/training_args.bin +0 -0
- {checkpoint-3000 β checkpoint-3300}/config.json +0 -0
- {checkpoint-3000 β checkpoint-3300}/optimizer.pt +1 -1
- {checkpoint-3000 β checkpoint-3300}/preprocessor_config.json +0 -0
- {checkpoint-2900 β checkpoint-3300}/pytorch_model.bin +1 -1
- {checkpoint-2900 β checkpoint-3300}/rng_state.pth +2 -2
- {checkpoint-2900 β checkpoint-3300}/scaler.pt +1 -1
- {checkpoint-3000 β checkpoint-3300}/scheduler.pt +1 -1
- {checkpoint-2900 β checkpoint-3300}/trainer_state.json +159 -3
- {checkpoint-3000 β checkpoint-3300}/training_args.bin +0 -0
{checkpoint-2900 β checkpoint-3200}/config.json
RENAMED
File without changes
|
{checkpoint-2900 β checkpoint-3200}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490337809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d0ef9d1762f7ffe1cc3a0e4748e6ebb583dea4f49da96382216e78f5647702c
|
3 |
size 2490337809
|
{checkpoint-2900 β checkpoint-3200}/preprocessor_config.json
RENAMED
File without changes
|
{checkpoint-3000 β checkpoint-3200}/pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262063089
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ae84798d472963cd276f2962517c928f65dbe900552c2adf1ac952b2b09cb73
|
3 |
size 1262063089
|
{checkpoint-3000 β checkpoint-3200}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2cdfe7c6a1cd08c22115fc108dd9766a68df8ce8189a197857ef6e07d3e9f573
|
3 |
+
size 14503
|
{checkpoint-3000 β checkpoint-3200}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:adc06e1a20206447d0abcf52e13d1dede8b4a0f2043d99c40e3b996eae19e163
|
3 |
size 559
|
{checkpoint-2900 β checkpoint-3200}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d6d7554ac67cd9c5c482a68804a21e6ce04d359c4a6dab9309204dc50f02e7d
|
3 |
size 623
|
{checkpoint-3000 β checkpoint-3200}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1176,11 +1176,89 @@
|
|
1176 |
"eval_steps_per_second": 0.794,
|
1177 |
"eval_wer": 0.1401362475163213,
|
1178 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1179 |
}
|
1180 |
],
|
1181 |
"max_steps": 4550,
|
1182 |
"num_train_epochs": 50,
|
1183 |
-
"total_flos": 4.
|
1184 |
"trial_name": null,
|
1185 |
"trial_params": null
|
1186 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 35.16393442622951,
|
5 |
+
"global_step": 3200,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1176 |
"eval_steps_per_second": 0.794,
|
1177 |
"eval_wer": 0.1401362475163213,
|
1178 |
"step": 3000
|
1179 |
+
},
|
1180 |
+
{
|
1181 |
+
"epoch": 33.19,
|
1182 |
+
"learning_rate": 0.00015019607843137257,
|
1183 |
+
"loss": 0.8263,
|
1184 |
+
"step": 3020
|
1185 |
+
},
|
1186 |
+
{
|
1187 |
+
"epoch": 33.4,
|
1188 |
+
"learning_rate": 0.00014823529411764705,
|
1189 |
+
"loss": 0.8203,
|
1190 |
+
"step": 3040
|
1191 |
+
},
|
1192 |
+
{
|
1193 |
+
"epoch": 33.62,
|
1194 |
+
"learning_rate": 0.00014627450980392157,
|
1195 |
+
"loss": 0.8277,
|
1196 |
+
"step": 3060
|
1197 |
+
},
|
1198 |
+
{
|
1199 |
+
"epoch": 33.84,
|
1200 |
+
"learning_rate": 0.00014431372549019607,
|
1201 |
+
"loss": 0.8183,
|
1202 |
+
"step": 3080
|
1203 |
+
},
|
1204 |
+
{
|
1205 |
+
"epoch": 34.07,
|
1206 |
+
"learning_rate": 0.0001423529411764706,
|
1207 |
+
"loss": 0.8395,
|
1208 |
+
"step": 3100
|
1209 |
+
},
|
1210 |
+
{
|
1211 |
+
"epoch": 34.07,
|
1212 |
+
"eval_loss": 0.1606692373752594,
|
1213 |
+
"eval_runtime": 190.8222,
|
1214 |
+
"eval_samples_per_second": 25.38,
|
1215 |
+
"eval_steps_per_second": 0.797,
|
1216 |
+
"eval_wer": 0.13755322168606302,
|
1217 |
+
"step": 3100
|
1218 |
+
},
|
1219 |
+
{
|
1220 |
+
"epoch": 34.28,
|
1221 |
+
"learning_rate": 0.0001403921568627451,
|
1222 |
+
"loss": 0.8207,
|
1223 |
+
"step": 3120
|
1224 |
+
},
|
1225 |
+
{
|
1226 |
+
"epoch": 34.5,
|
1227 |
+
"learning_rate": 0.0001384313725490196,
|
1228 |
+
"loss": 0.8106,
|
1229 |
+
"step": 3140
|
1230 |
+
},
|
1231 |
+
{
|
1232 |
+
"epoch": 34.72,
|
1233 |
+
"learning_rate": 0.00013647058823529413,
|
1234 |
+
"loss": 0.8329,
|
1235 |
+
"step": 3160
|
1236 |
+
},
|
1237 |
+
{
|
1238 |
+
"epoch": 34.94,
|
1239 |
+
"learning_rate": 0.00013450980392156863,
|
1240 |
+
"loss": 0.8048,
|
1241 |
+
"step": 3180
|
1242 |
+
},
|
1243 |
+
{
|
1244 |
+
"epoch": 35.16,
|
1245 |
+
"learning_rate": 0.00013254901960784313,
|
1246 |
+
"loss": 0.83,
|
1247 |
+
"step": 3200
|
1248 |
+
},
|
1249 |
+
{
|
1250 |
+
"epoch": 35.16,
|
1251 |
+
"eval_loss": 0.15384173393249512,
|
1252 |
+
"eval_runtime": 191.1912,
|
1253 |
+
"eval_samples_per_second": 25.331,
|
1254 |
+
"eval_steps_per_second": 0.795,
|
1255 |
+
"eval_wer": 0.1379222253760999,
|
1256 |
+
"step": 3200
|
1257 |
}
|
1258 |
],
|
1259 |
"max_steps": 4550,
|
1260 |
"num_train_epochs": 50,
|
1261 |
+
"total_flos": 4.949777145440599e+19,
|
1262 |
"trial_name": null,
|
1263 |
"trial_params": null
|
1264 |
}
|
{checkpoint-2900 β checkpoint-3200}/training_args.bin
RENAMED
File without changes
|
{checkpoint-3000 β checkpoint-3300}/config.json
RENAMED
File without changes
|
{checkpoint-3000 β checkpoint-3300}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2490337809
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:806c8e4e7fd28f0571714cc9f9a0055970d60ee26b8981016c52d353bb4430e3
|
3 |
size 2490337809
|
{checkpoint-3000 β checkpoint-3300}/preprocessor_config.json
RENAMED
File without changes
|
{checkpoint-2900 β checkpoint-3300}/pytorch_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262063089
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0e5ce31e623a5f978a7686e5b6636e0db65bea23a7d3b791715106209b73f8c
|
3 |
size 1262063089
|
{checkpoint-2900 β checkpoint-3300}/rng_state.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a716b25bdaec66a312a035315a78027e767ae161c16b37a11eeba450f275b66e
|
3 |
+
size 14503
|
{checkpoint-2900 β checkpoint-3300}/scaler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:011849dafd5feecbd9c7cd405b92e51d3198c6a38da3d9f70b7ac2eb65d83b8f
|
3 |
size 559
|
{checkpoint-3000 β checkpoint-3300}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa9db20ef4c55522e29abf089521aed25de7e4d0ceb54efd10455bf35f8ac946
|
3 |
size 623
|
{checkpoint-2900 β checkpoint-3300}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1137,11 +1137,167 @@
|
|
1137 |
"eval_steps_per_second": 0.799,
|
1138 |
"eval_wer": 0.1397104740278172,
|
1139 |
"step": 2900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1140 |
}
|
1141 |
],
|
1142 |
"max_steps": 4550,
|
1143 |
"num_train_epochs": 50,
|
1144 |
-
"total_flos":
|
1145 |
"trial_name": null,
|
1146 |
"trial_params": null
|
1147 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 36.26229508196721,
|
5 |
+
"global_step": 3300,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1137 |
"eval_steps_per_second": 0.799,
|
1138 |
"eval_wer": 0.1397104740278172,
|
1139 |
"step": 2900
|
1140 |
+
},
|
1141 |
+
{
|
1142 |
+
"epoch": 32.09,
|
1143 |
+
"learning_rate": 0.00016,
|
1144 |
+
"loss": 0.8634,
|
1145 |
+
"step": 2920
|
1146 |
+
},
|
1147 |
+
{
|
1148 |
+
"epoch": 32.31,
|
1149 |
+
"learning_rate": 0.00015803921568627451,
|
1150 |
+
"loss": 0.8351,
|
1151 |
+
"step": 2940
|
1152 |
+
},
|
1153 |
+
{
|
1154 |
+
"epoch": 32.52,
|
1155 |
+
"learning_rate": 0.00015607843137254901,
|
1156 |
+
"loss": 0.8427,
|
1157 |
+
"step": 2960
|
1158 |
+
},
|
1159 |
+
{
|
1160 |
+
"epoch": 32.74,
|
1161 |
+
"learning_rate": 0.00015411764705882352,
|
1162 |
+
"loss": 0.8324,
|
1163 |
+
"step": 2980
|
1164 |
+
},
|
1165 |
+
{
|
1166 |
+
"epoch": 32.96,
|
1167 |
+
"learning_rate": 0.00015215686274509804,
|
1168 |
+
"loss": 0.8491,
|
1169 |
+
"step": 3000
|
1170 |
+
},
|
1171 |
+
{
|
1172 |
+
"epoch": 32.96,
|
1173 |
+
"eval_loss": 0.1594749242067337,
|
1174 |
+
"eval_runtime": 191.4631,
|
1175 |
+
"eval_samples_per_second": 25.295,
|
1176 |
+
"eval_steps_per_second": 0.794,
|
1177 |
+
"eval_wer": 0.1401362475163213,
|
1178 |
+
"step": 3000
|
1179 |
+
},
|
1180 |
+
{
|
1181 |
+
"epoch": 33.19,
|
1182 |
+
"learning_rate": 0.00015019607843137257,
|
1183 |
+
"loss": 0.8263,
|
1184 |
+
"step": 3020
|
1185 |
+
},
|
1186 |
+
{
|
1187 |
+
"epoch": 33.4,
|
1188 |
+
"learning_rate": 0.00014823529411764705,
|
1189 |
+
"loss": 0.8203,
|
1190 |
+
"step": 3040
|
1191 |
+
},
|
1192 |
+
{
|
1193 |
+
"epoch": 33.62,
|
1194 |
+
"learning_rate": 0.00014627450980392157,
|
1195 |
+
"loss": 0.8277,
|
1196 |
+
"step": 3060
|
1197 |
+
},
|
1198 |
+
{
|
1199 |
+
"epoch": 33.84,
|
1200 |
+
"learning_rate": 0.00014431372549019607,
|
1201 |
+
"loss": 0.8183,
|
1202 |
+
"step": 3080
|
1203 |
+
},
|
1204 |
+
{
|
1205 |
+
"epoch": 34.07,
|
1206 |
+
"learning_rate": 0.0001423529411764706,
|
1207 |
+
"loss": 0.8395,
|
1208 |
+
"step": 3100
|
1209 |
+
},
|
1210 |
+
{
|
1211 |
+
"epoch": 34.07,
|
1212 |
+
"eval_loss": 0.1606692373752594,
|
1213 |
+
"eval_runtime": 190.8222,
|
1214 |
+
"eval_samples_per_second": 25.38,
|
1215 |
+
"eval_steps_per_second": 0.797,
|
1216 |
+
"eval_wer": 0.13755322168606302,
|
1217 |
+
"step": 3100
|
1218 |
+
},
|
1219 |
+
{
|
1220 |
+
"epoch": 34.28,
|
1221 |
+
"learning_rate": 0.0001403921568627451,
|
1222 |
+
"loss": 0.8207,
|
1223 |
+
"step": 3120
|
1224 |
+
},
|
1225 |
+
{
|
1226 |
+
"epoch": 34.5,
|
1227 |
+
"learning_rate": 0.0001384313725490196,
|
1228 |
+
"loss": 0.8106,
|
1229 |
+
"step": 3140
|
1230 |
+
},
|
1231 |
+
{
|
1232 |
+
"epoch": 34.72,
|
1233 |
+
"learning_rate": 0.00013647058823529413,
|
1234 |
+
"loss": 0.8329,
|
1235 |
+
"step": 3160
|
1236 |
+
},
|
1237 |
+
{
|
1238 |
+
"epoch": 34.94,
|
1239 |
+
"learning_rate": 0.00013450980392156863,
|
1240 |
+
"loss": 0.8048,
|
1241 |
+
"step": 3180
|
1242 |
+
},
|
1243 |
+
{
|
1244 |
+
"epoch": 35.16,
|
1245 |
+
"learning_rate": 0.00013254901960784313,
|
1246 |
+
"loss": 0.83,
|
1247 |
+
"step": 3200
|
1248 |
+
},
|
1249 |
+
{
|
1250 |
+
"epoch": 35.16,
|
1251 |
+
"eval_loss": 0.15384173393249512,
|
1252 |
+
"eval_runtime": 191.1912,
|
1253 |
+
"eval_samples_per_second": 25.331,
|
1254 |
+
"eval_steps_per_second": 0.795,
|
1255 |
+
"eval_wer": 0.1379222253760999,
|
1256 |
+
"step": 3200
|
1257 |
+
},
|
1258 |
+
{
|
1259 |
+
"epoch": 35.38,
|
1260 |
+
"learning_rate": 0.00013058823529411764,
|
1261 |
+
"loss": 0.7963,
|
1262 |
+
"step": 3220
|
1263 |
+
},
|
1264 |
+
{
|
1265 |
+
"epoch": 35.6,
|
1266 |
+
"learning_rate": 0.00012862745098039216,
|
1267 |
+
"loss": 0.7895,
|
1268 |
+
"step": 3240
|
1269 |
+
},
|
1270 |
+
{
|
1271 |
+
"epoch": 35.82,
|
1272 |
+
"learning_rate": 0.0001266666666666667,
|
1273 |
+
"loss": 0.7964,
|
1274 |
+
"step": 3260
|
1275 |
+
},
|
1276 |
+
{
|
1277 |
+
"epoch": 36.04,
|
1278 |
+
"learning_rate": 0.0001247058823529412,
|
1279 |
+
"loss": 0.7931,
|
1280 |
+
"step": 3280
|
1281 |
+
},
|
1282 |
+
{
|
1283 |
+
"epoch": 36.26,
|
1284 |
+
"learning_rate": 0.0001227450980392157,
|
1285 |
+
"loss": 0.7835,
|
1286 |
+
"step": 3300
|
1287 |
+
},
|
1288 |
+
{
|
1289 |
+
"epoch": 36.26,
|
1290 |
+
"eval_loss": 0.16020993888378143,
|
1291 |
+
"eval_runtime": 192.1405,
|
1292 |
+
"eval_samples_per_second": 25.206,
|
1293 |
+
"eval_steps_per_second": 0.791,
|
1294 |
+
"eval_wer": 0.1408458699971615,
|
1295 |
+
"step": 3300
|
1296 |
}
|
1297 |
],
|
1298 |
"max_steps": 4550,
|
1299 |
"num_train_epochs": 50,
|
1300 |
+
"total_flos": 5.103177199568347e+19,
|
1301 |
"trial_name": null,
|
1302 |
"trial_params": null
|
1303 |
}
|
{checkpoint-3000 β checkpoint-3300}/training_args.bin
RENAMED
File without changes
|