|
{ |
|
"best_metric": Infinity, |
|
"best_model_checkpoint": "./checkpoint-1000", |
|
"epoch": 4.999928330824912, |
|
"global_step": 17440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.7499999999999997e-06, |
|
"loss": 12.8121, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.499999999999999e-06, |
|
"loss": 6.7805, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.1249999999999999e-05, |
|
"loss": 4.6147, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.4999999999999999e-05, |
|
"loss": 3.955, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.875e-05, |
|
"loss": 3.5002, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.2499999999999998e-05, |
|
"loss": 3.1912, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.6249999999999998e-05, |
|
"loss": 3.0349, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.99625e-05, |
|
"loss": 3.0058, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.37125e-05, |
|
"loss": 2.9392, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.7462499999999996e-05, |
|
"loss": 2.9114, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 711.5482, |
|
"eval_samples_per_second": 22.516, |
|
"eval_steps_per_second": 1.408, |
|
"eval_wer": 0.9997048122028068, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.12125e-05, |
|
"loss": 2.8881, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4962499999999995e-05, |
|
"loss": 2.7567, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.871249999999999e-05, |
|
"loss": 2.2193, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5.2462499999999994e-05, |
|
"loss": 1.8226, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5.62125e-05, |
|
"loss": 1.6558, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 5.9962499999999994e-05, |
|
"loss": 1.5479, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 6.37125e-05, |
|
"loss": 1.4422, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 6.746249999999999e-05, |
|
"loss": 1.3521, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.121249999999999e-05, |
|
"loss": 1.2988, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.49625e-05, |
|
"loss": 1.2436, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 723.1036, |
|
"eval_samples_per_second": 22.156, |
|
"eval_steps_per_second": 1.386, |
|
"eval_wer": 0.43096134964994576, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.451910621761658e-05, |
|
"loss": 1.2041, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.403335492227979e-05, |
|
"loss": 1.1714, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.354760362694299e-05, |
|
"loss": 1.1481, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.306185233160621e-05, |
|
"loss": 1.1268, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.258095854922278e-05, |
|
"loss": 1.1271, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.2095207253886e-05, |
|
"loss": 1.0927, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.160945595854922e-05, |
|
"loss": 1.0879, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 7.112370466321243e-05, |
|
"loss": 1.0699, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 7.063795336787565e-05, |
|
"loss": 1.0669, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.015220207253885e-05, |
|
"loss": 1.0552, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 706.7689, |
|
"eval_samples_per_second": 22.668, |
|
"eval_steps_per_second": 1.418, |
|
"eval_wer": 0.3144455923969891, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.966645077720206e-05, |
|
"loss": 1.0525, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.918069948186528e-05, |
|
"loss": 1.0475, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.86949481865285e-05, |
|
"loss": 1.0333, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.82091968911917e-05, |
|
"loss": 1.0227, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.772344559585491e-05, |
|
"loss": 1.0154, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 6.723769430051813e-05, |
|
"loss": 1.0084, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 6.675194300518135e-05, |
|
"loss": 1.0153, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 6.626619170984455e-05, |
|
"loss": 1.0059, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 6.578529792746114e-05, |
|
"loss": 1.0069, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 6.529954663212434e-05, |
|
"loss": 1.0044, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 720.8047, |
|
"eval_samples_per_second": 22.227, |
|
"eval_steps_per_second": 1.39, |
|
"eval_wer": 0.2814038104894342, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 6.481379533678756e-05, |
|
"loss": 0.9918, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.432804404145077e-05, |
|
"loss": 0.9913, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.384229274611399e-05, |
|
"loss": 0.9955, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.33565414507772e-05, |
|
"loss": 0.989, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.287079015544041e-05, |
|
"loss": 0.9825, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 6.238503886010362e-05, |
|
"loss": 0.9793, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 6.189928756476684e-05, |
|
"loss": 0.9768, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 6.141353626943006e-05, |
|
"loss": 0.9712, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 6.092778497409326e-05, |
|
"loss": 0.9777, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 6.0442033678756475e-05, |
|
"loss": 0.9718, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 706.8206, |
|
"eval_samples_per_second": 22.666, |
|
"eval_steps_per_second": 1.418, |
|
"eval_wer": 0.26581661137243073, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 5.995628238341968e-05, |
|
"loss": 0.967, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 5.947053108808289e-05, |
|
"loss": 0.9688, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5.898477979274611e-05, |
|
"loss": 0.9677, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 5.8499028497409324e-05, |
|
"loss": 0.961, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 5.8013277202072536e-05, |
|
"loss": 0.9528, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 5.752752590673574e-05, |
|
"loss": 0.959, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5.7041774611398954e-05, |
|
"loss": 0.9615, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.6556023316062174e-05, |
|
"loss": 0.9587, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5.607512953367875e-05, |
|
"loss": 0.9573, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 5.5589378238341966e-05, |
|
"loss": 0.9502, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 705.7107, |
|
"eval_samples_per_second": 22.702, |
|
"eval_steps_per_second": 1.42, |
|
"eval_wer": 0.2566016183991837, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5.510362694300518e-05, |
|
"loss": 0.9515, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.461787564766839e-05, |
|
"loss": 0.9479, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.4132124352331596e-05, |
|
"loss": 0.94, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5.3646373056994816e-05, |
|
"loss": 0.9363, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.316062176165803e-05, |
|
"loss": 0.9419, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5.267487046632124e-05, |
|
"loss": 0.9435, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5.218911917098445e-05, |
|
"loss": 0.9406, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.170336787564766e-05, |
|
"loss": 0.9457, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.121761658031088e-05, |
|
"loss": 0.9343, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.073186528497409e-05, |
|
"loss": 0.9418, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 712.8454, |
|
"eval_samples_per_second": 22.475, |
|
"eval_steps_per_second": 1.406, |
|
"eval_wer": 0.24762405908889645, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5.02461139896373e-05, |
|
"loss": 0.9401, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.9760362694300515e-05, |
|
"loss": 0.9375, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.927461139896372e-05, |
|
"loss": 0.9362, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.878886010362694e-05, |
|
"loss": 0.9249, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.830310880829015e-05, |
|
"loss": 0.9283, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.782221502590673e-05, |
|
"loss": 0.9371, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.7336463730569944e-05, |
|
"loss": 0.9346, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.6850712435233157e-05, |
|
"loss": 0.9266, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.636496113989637e-05, |
|
"loss": 0.9285, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.587920984455958e-05, |
|
"loss": 0.9215, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 699.6113, |
|
"eval_samples_per_second": 22.9, |
|
"eval_steps_per_second": 1.432, |
|
"eval_wer": 0.24195131968196723, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.5393458549222794e-05, |
|
"loss": 0.9211, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.4907707253886006e-05, |
|
"loss": 0.9198, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.442195595854922e-05, |
|
"loss": 0.9257, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.393620466321243e-05, |
|
"loss": 0.9279, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.345045336787564e-05, |
|
"loss": 0.9111, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.296470207253886e-05, |
|
"loss": 0.9176, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 4.247895077720207e-05, |
|
"loss": 0.9169, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.199319948186528e-05, |
|
"loss": 0.9103, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.150744818652849e-05, |
|
"loss": 0.9102, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 4.102655440414507e-05, |
|
"loss": 0.9236, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 704.4849, |
|
"eval_samples_per_second": 22.741, |
|
"eval_steps_per_second": 1.422, |
|
"eval_wer": 0.23878125942515385, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.0540803108808285e-05, |
|
"loss": 0.9129, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 4.0055051813471504e-05, |
|
"loss": 0.9117, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.956930051813472e-05, |
|
"loss": 0.912, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.908354922279792e-05, |
|
"loss": 0.9013, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 3.8597797927461135e-05, |
|
"loss": 0.9128, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 3.811204663212435e-05, |
|
"loss": 0.9061, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.7626295336787566e-05, |
|
"loss": 0.9055, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.714054404145078e-05, |
|
"loss": 0.9019, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.6654792746113984e-05, |
|
"loss": 0.9, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.61690414507772e-05, |
|
"loss": 0.9014, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 702.0047, |
|
"eval_samples_per_second": 22.822, |
|
"eval_steps_per_second": 1.427, |
|
"eval_wer": 0.2353801826314067, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.5683290155440416e-05, |
|
"loss": 0.9045, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 3.519753886010362e-05, |
|
"loss": 0.8981, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.471178756476684e-05, |
|
"loss": 0.898, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.4226036269430046e-05, |
|
"loss": 0.8926, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.374028497409326e-05, |
|
"loss": 0.9, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.325453367875648e-05, |
|
"loss": 0.8928, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.276878238341968e-05, |
|
"loss": 0.8844, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.22830310880829e-05, |
|
"loss": 0.8844, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.179727979274611e-05, |
|
"loss": 0.89, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.131152849740932e-05, |
|
"loss": 0.8814, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 712.5543, |
|
"eval_samples_per_second": 22.484, |
|
"eval_steps_per_second": 1.406, |
|
"eval_wer": 0.23118979933646916, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.082577720207254e-05, |
|
"loss": 0.8912, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.034002590673575e-05, |
|
"loss": 0.8956, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 2.985427461139896e-05, |
|
"loss": 0.8866, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.9368523316062173e-05, |
|
"loss": 0.8851, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.888277202072539e-05, |
|
"loss": 0.8897, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 2.8397020725388598e-05, |
|
"loss": 0.8832, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 2.791126943005181e-05, |
|
"loss": 0.8887, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 2.7425518134715023e-05, |
|
"loss": 0.8847, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 2.6939766839378235e-05, |
|
"loss": 0.8754, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 2.645401554404145e-05, |
|
"loss": 0.8809, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 721.0245, |
|
"eval_samples_per_second": 22.22, |
|
"eval_steps_per_second": 1.39, |
|
"eval_wer": 0.22846893790147144, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.596826424870466e-05, |
|
"loss": 0.8739, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.5482512953367873e-05, |
|
"loss": 0.8713, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.4996761658031085e-05, |
|
"loss": 0.8833, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.4511010362694297e-05, |
|
"loss": 0.8756, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.4025259067357513e-05, |
|
"loss": 0.8705, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.3539507772020722e-05, |
|
"loss": 0.874, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.3053756476683938e-05, |
|
"loss": 0.8681, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.2568005181347147e-05, |
|
"loss": 0.8699, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.208225388601036e-05, |
|
"loss": 0.8749, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.1596502590673575e-05, |
|
"loss": 0.8717, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 704.6479, |
|
"eval_samples_per_second": 22.736, |
|
"eval_steps_per_second": 1.422, |
|
"eval_wer": 0.22629353217867845, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 2.1110751295336784e-05, |
|
"loss": 0.8725, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 2.0625e-05, |
|
"loss": 0.8678, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.0139248704663212e-05, |
|
"loss": 0.8618, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.9658354922279792e-05, |
|
"loss": 0.8646, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.9172603626943005e-05, |
|
"loss": 0.8746, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.8686852331606217e-05, |
|
"loss": 0.863, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.8205958549222797e-05, |
|
"loss": 0.862, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.772020725388601e-05, |
|
"loss": 0.8678, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.723445595854922e-05, |
|
"loss": 0.8694, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.6748704663212434e-05, |
|
"loss": 0.8787, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 708.1909, |
|
"eval_samples_per_second": 22.622, |
|
"eval_steps_per_second": 1.415, |
|
"eval_wer": 0.2217630412043662, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.6262953367875646e-05, |
|
"loss": 0.8657, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.577720207253886e-05, |
|
"loss": 0.8631, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 1.529630829015544e-05, |
|
"loss": 0.8658, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.4810556994818651e-05, |
|
"loss": 0.855, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.4324805699481864e-05, |
|
"loss": 0.8502, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.3839054404145076e-05, |
|
"loss": 0.8625, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 1.335330310880829e-05, |
|
"loss": 0.8598, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.2867551813471502e-05, |
|
"loss": 0.8574, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.2381800518134713e-05, |
|
"loss": 0.8575, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.1896049222797926e-05, |
|
"loss": 0.8567, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 705.2463, |
|
"eval_samples_per_second": 22.717, |
|
"eval_steps_per_second": 1.421, |
|
"eval_wer": 0.21933736756656164, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 1.1410297927461138e-05, |
|
"loss": 0.8474, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.0924546632124352e-05, |
|
"loss": 0.8519, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.0438795336787564e-05, |
|
"loss": 0.8487, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 9.953044041450777e-06, |
|
"loss": 0.8479, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 9.467292746113987e-06, |
|
"loss": 0.8463, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 8.981541450777202e-06, |
|
"loss": 0.8459, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 8.500647668393781e-06, |
|
"loss": 0.853, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 8.014896373056994e-06, |
|
"loss": 0.8456, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 7.529145077720206e-06, |
|
"loss": 0.8501, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 7.0433937823834195e-06, |
|
"loss": 0.8488, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 702.6461, |
|
"eval_samples_per_second": 22.801, |
|
"eval_steps_per_second": 1.426, |
|
"eval_wer": 0.2187084892160197, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 6.557642487046632e-06, |
|
"loss": 0.8491, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 6.0718911917098434e-06, |
|
"loss": 0.8415, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 5.586139896373057e-06, |
|
"loss": 0.8467, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 5.100388601036269e-06, |
|
"loss": 0.8356, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.6146373056994814e-06, |
|
"loss": 0.8444, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.128886010362694e-06, |
|
"loss": 0.8485, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 3.6431347150259062e-06, |
|
"loss": 0.8424, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 3.1573834196891186e-06, |
|
"loss": 0.8435, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.6716321243523314e-06, |
|
"loss": 0.8444, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.185880829015544e-06, |
|
"loss": 0.8359, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 698.574, |
|
"eval_samples_per_second": 22.934, |
|
"eval_steps_per_second": 1.434, |
|
"eval_wer": 0.2171683789697946, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.7001295336787564e-06, |
|
"loss": 0.8419, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.2143782383419686e-06, |
|
"loss": 0.8392, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 7.286269430051813e-07, |
|
"loss": 0.8369, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.4287564766839375e-07, |
|
"loss": 0.8428, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 17440, |
|
"total_flos": 3.487279250614594e+20, |
|
"train_loss": 1.187884036554109, |
|
"train_runtime": 143109.8834, |
|
"train_samples_per_second": 15.599, |
|
"train_steps_per_second": 0.122 |
|
} |
|
], |
|
"max_steps": 17440, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.487279250614594e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|