|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.789999999999999e-06, |
|
"loss": 15.499, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.379e-05, |
|
"loss": 5.6988, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.0789999999999996e-05, |
|
"loss": 3.9135, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.779e-05, |
|
"loss": 3.315, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 3.479e-05, |
|
"loss": 3.0713, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 4.178999999999999e-05, |
|
"loss": 2.9727, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 4.878999999999999e-05, |
|
"loss": 2.8912, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5.579e-05, |
|
"loss": 2.8514, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 11.25, |
|
"learning_rate": 6.279e-05, |
|
"loss": 2.5765, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 6.979e-05, |
|
"loss": 1.785, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 0.7464718222618103, |
|
"eval_runtime": 42.2657, |
|
"eval_samples_per_second": 28.013, |
|
"eval_steps_per_second": 1.751, |
|
"eval_wer": 0.6812214190883611, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 6.903e-05, |
|
"loss": 1.4773, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 6.803e-05, |
|
"loss": 1.3507, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 6.702999999999999e-05, |
|
"loss": 1.2818, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 6.602999999999999e-05, |
|
"loss": 1.233, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 6.502999999999999e-05, |
|
"loss": 1.161, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.403e-05, |
|
"loss": 1.1004, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 21.25, |
|
"learning_rate": 6.303e-05, |
|
"loss": 1.0246, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 6.202999999999999e-05, |
|
"loss": 0.9693, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 23.75, |
|
"learning_rate": 6.103e-05, |
|
"loss": 0.9507, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 6.002999999999999e-05, |
|
"loss": 0.8989, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.24948318302631378, |
|
"eval_runtime": 40.7976, |
|
"eval_samples_per_second": 29.021, |
|
"eval_steps_per_second": 1.814, |
|
"eval_wer": 0.27319663667207555, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 26.25, |
|
"learning_rate": 5.904e-05, |
|
"loss": 0.8743, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 5.804e-05, |
|
"loss": 0.8558, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 28.75, |
|
"learning_rate": 5.7039999999999996e-05, |
|
"loss": 0.8228, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 5.604e-05, |
|
"loss": 0.8154, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 31.25, |
|
"learning_rate": 5.5039999999999995e-05, |
|
"loss": 0.7932, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 5.404e-05, |
|
"loss": 0.7755, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 33.75, |
|
"learning_rate": 5.3039999999999994e-05, |
|
"loss": 0.7585, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 5.2039999999999996e-05, |
|
"loss": 0.7472, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 36.25, |
|
"learning_rate": 5.103999999999999e-05, |
|
"loss": 0.734, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 5.0039999999999995e-05, |
|
"loss": 0.7118, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"eval_loss": 0.21255508065223694, |
|
"eval_runtime": 40.7166, |
|
"eval_samples_per_second": 29.079, |
|
"eval_steps_per_second": 1.817, |
|
"eval_wer": 0.22835226434577371, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 38.75, |
|
"learning_rate": 4.904e-05, |
|
"loss": 0.7163, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.8039999999999994e-05, |
|
"loss": 0.7066, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 41.25, |
|
"learning_rate": 4.704e-05, |
|
"loss": 0.7067, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 4.603999999999999e-05, |
|
"loss": 0.688, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 43.75, |
|
"learning_rate": 4.5039999999999996e-05, |
|
"loss": 0.6777, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 4.403999999999999e-05, |
|
"loss": 0.6621, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 46.25, |
|
"learning_rate": 4.3039999999999994e-05, |
|
"loss": 0.6494, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"learning_rate": 4.203999999999999e-05, |
|
"loss": 0.646, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 48.75, |
|
"learning_rate": 4.104e-05, |
|
"loss": 0.6356, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 4.0039999999999996e-05, |
|
"loss": 0.6367, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.20487311482429504, |
|
"eval_runtime": 43.4458, |
|
"eval_samples_per_second": 27.252, |
|
"eval_steps_per_second": 1.703, |
|
"eval_wer": 0.20489747750405665, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 51.25, |
|
"learning_rate": 3.904e-05, |
|
"loss": 0.6272, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 52.5, |
|
"learning_rate": 3.804e-05, |
|
"loss": 0.6187, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 53.75, |
|
"learning_rate": 3.704e-05, |
|
"loss": 0.6116, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 3.604e-05, |
|
"loss": 0.6074, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 56.25, |
|
"learning_rate": 3.504999999999999e-05, |
|
"loss": 0.5945, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 57.5, |
|
"learning_rate": 3.4049999999999994e-05, |
|
"loss": 0.6074, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 58.75, |
|
"learning_rate": 3.305e-05, |
|
"loss": 0.6011, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 3.205e-05, |
|
"loss": 0.5815, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 61.25, |
|
"learning_rate": 3.1049999999999996e-05, |
|
"loss": 0.5704, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 3.0049999999999995e-05, |
|
"loss": 0.5763, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"eval_loss": 0.21160605549812317, |
|
"eval_runtime": 41.2219, |
|
"eval_samples_per_second": 28.723, |
|
"eval_steps_per_second": 1.795, |
|
"eval_wer": 0.20548753503466588, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 63.75, |
|
"learning_rate": 2.9049999999999995e-05, |
|
"loss": 0.5742, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 2.8049999999999997e-05, |
|
"loss": 0.58, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 66.25, |
|
"learning_rate": 2.705e-05, |
|
"loss": 0.5542, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 67.5, |
|
"learning_rate": 2.605e-05, |
|
"loss": 0.5472, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 68.75, |
|
"learning_rate": 2.505e-05, |
|
"loss": 0.5433, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 2.405e-05, |
|
"loss": 0.5448, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 71.25, |
|
"learning_rate": 2.3049999999999998e-05, |
|
"loss": 0.5469, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 72.5, |
|
"learning_rate": 2.2049999999999997e-05, |
|
"loss": 0.5424, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 73.75, |
|
"learning_rate": 2.1049999999999997e-05, |
|
"loss": 0.5221, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 2.0049999999999996e-05, |
|
"loss": 0.5196, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 0.21106497943401337, |
|
"eval_runtime": 41.193, |
|
"eval_samples_per_second": 28.743, |
|
"eval_steps_per_second": 1.796, |
|
"eval_wer": 0.19103112553473964, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 76.25, |
|
"learning_rate": 1.905e-05, |
|
"loss": 0.5215, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 77.5, |
|
"learning_rate": 1.8049999999999998e-05, |
|
"loss": 0.5141, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 78.75, |
|
"learning_rate": 1.7049999999999998e-05, |
|
"loss": 0.5087, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 1.6049999999999997e-05, |
|
"loss": 0.5136, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 81.25, |
|
"learning_rate": 1.5049999999999998e-05, |
|
"loss": 0.4973, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 82.5, |
|
"learning_rate": 1.4049999999999998e-05, |
|
"loss": 0.5064, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 83.75, |
|
"learning_rate": 1.3049999999999999e-05, |
|
"loss": 0.4985, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 1.205e-05, |
|
"loss": 0.4926, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 86.25, |
|
"learning_rate": 1.105e-05, |
|
"loss": 0.4973, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"learning_rate": 1.0049999999999999e-05, |
|
"loss": 0.4949, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"eval_loss": 0.21311460435390472, |
|
"eval_runtime": 43.2323, |
|
"eval_samples_per_second": 27.387, |
|
"eval_steps_per_second": 1.712, |
|
"eval_wer": 0.19309632689187195, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 88.75, |
|
"learning_rate": 9.05e-06, |
|
"loss": 0.5008, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 8.05e-06, |
|
"loss": 0.4834, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 91.25, |
|
"learning_rate": 7.049999999999999e-06, |
|
"loss": 0.4717, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 92.5, |
|
"learning_rate": 6.049999999999999e-06, |
|
"loss": 0.485, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 93.75, |
|
"learning_rate": 5.05e-06, |
|
"loss": 0.4835, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 4.049999999999999e-06, |
|
"loss": 0.4787, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 96.25, |
|
"learning_rate": 3.05e-06, |
|
"loss": 0.4747, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 97.5, |
|
"learning_rate": 2.05e-06, |
|
"loss": 0.4804, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 98.75, |
|
"learning_rate": 1.05e-06, |
|
"loss": 0.4731, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 6e-08, |
|
"loss": 0.4797, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 0.20929841697216034, |
|
"eval_runtime": 42.6795, |
|
"eval_samples_per_second": 27.742, |
|
"eval_steps_per_second": 1.734, |
|
"eval_wer": 0.190736096769435, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 8000, |
|
"total_flos": 2.997146399308124e+19, |
|
"train_loss": 1.1495286922454835, |
|
"train_runtime": 11669.3128, |
|
"train_samples_per_second": 21.921, |
|
"train_steps_per_second": 0.686 |
|
} |
|
], |
|
"max_steps": 8000, |
|
"num_train_epochs": 100, |
|
"total_flos": 2.997146399308124e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|