|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.3074691805656273, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.75e-05, |
|
"loss": 12.1562, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.125e-05, |
|
"loss": 8.7679, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 7.398952095808383e-05, |
|
"loss": 5.3683, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 7.286676646706586e-05, |
|
"loss": 4.3219, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 7.17440119760479e-05, |
|
"loss": 3.7182, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 3.836604595184326, |
|
"eval_runtime": 133.4846, |
|
"eval_samples_per_second": 34.611, |
|
"eval_steps_per_second": 4.33, |
|
"eval_wer": 1.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 7.062125748502993e-05, |
|
"loss": 3.478, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 6.949850299401197e-05, |
|
"loss": 3.4492, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.837574850299401e-05, |
|
"loss": 3.3928, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.725299401197604e-05, |
|
"loss": 3.3183, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.613023952095809e-05, |
|
"loss": 3.2075, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 3.258362293243408, |
|
"eval_runtime": 126.6078, |
|
"eval_samples_per_second": 36.491, |
|
"eval_steps_per_second": 4.565, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.500748502994012e-05, |
|
"loss": 3.14, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.388473053892215e-05, |
|
"loss": 3.1281, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.276197604790418e-05, |
|
"loss": 3.0987, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.163922155688622e-05, |
|
"loss": 3.1003, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.0516467065868256e-05, |
|
"loss": 3.0922, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 3.127869129180908, |
|
"eval_runtime": 126.3837, |
|
"eval_samples_per_second": 36.555, |
|
"eval_steps_per_second": 4.573, |
|
"eval_wer": 1.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.9393712574850293e-05, |
|
"loss": 3.0588, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 5.827095808383233e-05, |
|
"loss": 3.0477, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5.714820359281436e-05, |
|
"loss": 3.045, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.602544910179641e-05, |
|
"loss": 3.0439, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.490269461077844e-05, |
|
"loss": 3.0846, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 3.079519271850586, |
|
"eval_runtime": 125.7215, |
|
"eval_samples_per_second": 36.748, |
|
"eval_steps_per_second": 4.597, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.3779940119760477e-05, |
|
"loss": 3.0512, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5.265718562874251e-05, |
|
"loss": 3.0143, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.1534431137724546e-05, |
|
"loss": 3.0387, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.0411676646706584e-05, |
|
"loss": 3.0311, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.9288922155688615e-05, |
|
"loss": 3.0417, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 3.069390058517456, |
|
"eval_runtime": 125.7339, |
|
"eval_samples_per_second": 36.744, |
|
"eval_steps_per_second": 4.597, |
|
"eval_wer": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.816616766467066e-05, |
|
"loss": 3.0219, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.704341317365269e-05, |
|
"loss": 3.0194, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.592065868263473e-05, |
|
"loss": 2.9974, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.479790419161676e-05, |
|
"loss": 2.9996, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.36751497005988e-05, |
|
"loss": 3.0016, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 3.0347490310668945, |
|
"eval_runtime": 132.3039, |
|
"eval_samples_per_second": 34.92, |
|
"eval_steps_per_second": 4.369, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.255239520958083e-05, |
|
"loss": 3.0052, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.142964071856287e-05, |
|
"loss": 2.9826, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.030688622754491e-05, |
|
"loss": 2.9747, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.918413173652694e-05, |
|
"loss": 2.9617, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.806137724550898e-05, |
|
"loss": 3.2053, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.984886407852173, |
|
"eval_runtime": 149.1508, |
|
"eval_samples_per_second": 30.975, |
|
"eval_steps_per_second": 3.875, |
|
"eval_wer": 1.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.693862275449102e-05, |
|
"loss": 2.9665, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.581586826347305e-05, |
|
"loss": 2.9641, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.469311377245509e-05, |
|
"loss": 2.9484, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.3570359281437126e-05, |
|
"loss": 2.9494, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.244760479041916e-05, |
|
"loss": 2.9698, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 2.989494562149048, |
|
"eval_runtime": 150.6903, |
|
"eval_samples_per_second": 30.659, |
|
"eval_steps_per_second": 3.836, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.1324850299401195e-05, |
|
"loss": 2.9664, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.020209580838323e-05, |
|
"loss": 2.9494, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9079341317365265e-05, |
|
"loss": 2.935, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.7956586826347306e-05, |
|
"loss": 2.9397, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.683383233532934e-05, |
|
"loss": 2.9485, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.9584460258483887, |
|
"eval_runtime": 140.6358, |
|
"eval_samples_per_second": 32.851, |
|
"eval_steps_per_second": 4.11, |
|
"eval_wer": 1.0, |
|
"step": 450 |
|
} |
|
], |
|
"max_steps": 688, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.336010418574825e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|