|
{ |
|
"best_metric": 2.310106039047241, |
|
"best_model_checkpoint": "./checkpoint-500", |
|
"epoch": 71.42857142857143, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.2499999999999994e-08, |
|
"loss": 3.5252, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.275e-07, |
|
"loss": 3.509, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.025e-07, |
|
"loss": 3.481, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 2.775e-07, |
|
"loss": 3.4536, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 2.86875e-07, |
|
"loss": 3.3407, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 2.6812500000000003e-07, |
|
"loss": 3.1837, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.49375e-07, |
|
"loss": 3.1471, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 2.3062500000000002e-07, |
|
"loss": 3.0484, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 2.11875e-07, |
|
"loss": 2.9713, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 1.93125e-07, |
|
"loss": 2.9393, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"eval_loss": 2.902493715286255, |
|
"eval_runtime": 131.7731, |
|
"eval_samples_per_second": 3.885, |
|
"eval_steps_per_second": 0.121, |
|
"eval_wer": 369.26452784503635, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 1.74375e-07, |
|
"loss": 2.8874, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 1.5562500000000002e-07, |
|
"loss": 2.8192, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 18.57, |
|
"learning_rate": 1.3687499999999999e-07, |
|
"loss": 2.8025, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.18125e-07, |
|
"loss": 2.7738, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 9.937499999999999e-08, |
|
"loss": 2.7684, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 22.86, |
|
"learning_rate": 8.0625e-08, |
|
"loss": 2.7294, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"learning_rate": 6.187499999999999e-08, |
|
"loss": 2.7318, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 25.71, |
|
"learning_rate": 4.3125e-08, |
|
"loss": 2.7239, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 27.14, |
|
"learning_rate": 2.4374999999999998e-08, |
|
"loss": 2.6988, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"learning_rate": 5.625e-09, |
|
"loss": 2.7124, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 28.57, |
|
"eval_loss": 2.716507911682129, |
|
"eval_runtime": 138.682, |
|
"eval_samples_per_second": 3.692, |
|
"eval_steps_per_second": 0.115, |
|
"eval_wer": 407.92221549636804, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 1.9108695652173912e-07, |
|
"loss": 2.6955, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 31.43, |
|
"learning_rate": 1.8456521739130434e-07, |
|
"loss": 2.6743, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 32.86, |
|
"learning_rate": 1.7804347826086957e-07, |
|
"loss": 2.6255, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 34.29, |
|
"learning_rate": 1.715217391304348e-07, |
|
"loss": 2.6113, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 35.71, |
|
"learning_rate": 1.65e-07, |
|
"loss": 2.5752, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 37.14, |
|
"learning_rate": 1.584782608695652e-07, |
|
"loss": 2.5542, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 38.57, |
|
"learning_rate": 1.5195652173913042e-07, |
|
"loss": 2.514, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 1.4543478260869565e-07, |
|
"loss": 2.5026, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 41.43, |
|
"learning_rate": 1.3891304347826087e-07, |
|
"loss": 2.4645, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"learning_rate": 1.323913043478261e-07, |
|
"loss": 2.4773, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 42.86, |
|
"eval_loss": 2.4649124145507812, |
|
"eval_runtime": 248.4635, |
|
"eval_samples_per_second": 2.061, |
|
"eval_steps_per_second": 0.064, |
|
"eval_wer": 449.50060532687655, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 44.29, |
|
"learning_rate": 1.258695652173913e-07, |
|
"loss": 2.428, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 45.71, |
|
"learning_rate": 1.193478260869565e-07, |
|
"loss": 2.4309, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 47.14, |
|
"learning_rate": 1.1282608695652174e-07, |
|
"loss": 2.395, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 48.57, |
|
"learning_rate": 1.0630434782608696e-07, |
|
"loss": 2.3836, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 9.978260869565218e-08, |
|
"loss": 2.3801, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 51.43, |
|
"learning_rate": 9.326086956521739e-08, |
|
"loss": 2.3529, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 52.86, |
|
"learning_rate": 8.673913043478261e-08, |
|
"loss": 2.3635, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 54.29, |
|
"learning_rate": 8.021739130434783e-08, |
|
"loss": 2.3338, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 55.71, |
|
"learning_rate": 7.369565217391304e-08, |
|
"loss": 2.3501, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"learning_rate": 6.717391304347826e-08, |
|
"loss": 2.3142, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 57.14, |
|
"eval_loss": 2.346592664718628, |
|
"eval_runtime": 217.0162, |
|
"eval_samples_per_second": 2.359, |
|
"eval_steps_per_second": 0.074, |
|
"eval_wer": 473.600181598063, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 58.57, |
|
"learning_rate": 6.065217391304348e-08, |
|
"loss": 2.3374, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 5.4130434782608695e-08, |
|
"loss": 2.2977, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 61.43, |
|
"learning_rate": 4.760869565217391e-08, |
|
"loss": 2.3, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 62.86, |
|
"learning_rate": 4.1086956521739124e-08, |
|
"loss": 2.2979, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 64.29, |
|
"learning_rate": 3.4565217391304345e-08, |
|
"loss": 2.2983, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 65.71, |
|
"learning_rate": 2.8043478260869563e-08, |
|
"loss": 2.3046, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 67.14, |
|
"learning_rate": 2.152173913043478e-08, |
|
"loss": 2.2772, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 68.57, |
|
"learning_rate": 1.5e-08, |
|
"loss": 2.2866, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 8.478260869565216e-09, |
|
"loss": 2.2822, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"learning_rate": 1.9565217391304347e-09, |
|
"loss": 2.2942, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"eval_loss": 2.310106039047241, |
|
"eval_runtime": 207.4467, |
|
"eval_samples_per_second": 2.468, |
|
"eval_steps_per_second": 0.077, |
|
"eval_wer": 485.9866828087167, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"step": 500, |
|
"total_flos": 7.5808932950016e+17, |
|
"train_loss": 0.4595182914733887, |
|
"train_runtime": 383.5233, |
|
"train_samples_per_second": 83.437, |
|
"train_steps_per_second": 1.304 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 72, |
|
"total_flos": 7.5808932950016e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|