|
{ |
|
"best_metric": 0.9087967049132097, |
|
"best_model_checkpoint": "trillsson3-ft-keyword-spotting-11/checkpoint-7980", |
|
"epoch": 19.999373825923605, |
|
"global_step": 7980, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.571428571428571e-05, |
|
"loss": 7.4329, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.330827067669172e-05, |
|
"loss": 4.5207, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00011090225563909774, |
|
"loss": 2.9219, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6216534274786703, |
|
"eval_loss": 1.202303409576416, |
|
"eval_runtime": 139.1467, |
|
"eval_samples_per_second": 48.855, |
|
"eval_steps_per_second": 1.531, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00014849624060150375, |
|
"loss": 2.0315, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00018609022556390976, |
|
"loss": 1.4541, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00022368421052631576, |
|
"loss": 1.1541, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00026127819548872177, |
|
"loss": 0.9604, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8117093262724331, |
|
"eval_loss": 0.5436997413635254, |
|
"eval_runtime": 140.1586, |
|
"eval_samples_per_second": 48.502, |
|
"eval_steps_per_second": 1.52, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0002988721804511278, |
|
"loss": 0.8683, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00029594820384294066, |
|
"loss": 0.8309, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.0002917710944026733, |
|
"loss": 0.7717, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.000287593984962406, |
|
"loss": 0.7608, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8887908208296558, |
|
"eval_loss": 0.42219480872154236, |
|
"eval_runtime": 140.2584, |
|
"eval_samples_per_second": 48.468, |
|
"eval_steps_per_second": 1.519, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00028341687552213866, |
|
"loss": 0.7384, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.0002792397660818713, |
|
"loss": 0.7127, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.000275062656641604, |
|
"loss": 0.7025, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.00027088554720133666, |
|
"loss": 0.7045, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8932038834951457, |
|
"eval_loss": 0.3880767524242401, |
|
"eval_runtime": 139.5385, |
|
"eval_samples_per_second": 48.718, |
|
"eval_steps_per_second": 1.526, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.0002667084377610693, |
|
"loss": 0.7082, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.000262531328320802, |
|
"loss": 0.6833, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00025835421888053466, |
|
"loss": 0.6881, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0002541771094402673, |
|
"loss": 0.659, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8846719623418653, |
|
"eval_loss": 0.37056076526641846, |
|
"eval_runtime": 141.2433, |
|
"eval_samples_per_second": 48.13, |
|
"eval_steps_per_second": 1.508, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00025, |
|
"loss": 0.6621, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00024582289055973265, |
|
"loss": 0.6643, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.00024164578111946533, |
|
"loss": 0.6421, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.00023746867167919798, |
|
"loss": 0.6541, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.891732862606649, |
|
"eval_loss": 0.3553307056427002, |
|
"eval_runtime": 140.013, |
|
"eval_samples_per_second": 48.553, |
|
"eval_steps_per_second": 1.521, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 0.00023329156223893065, |
|
"loss": 0.6477, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.00022911445279866333, |
|
"loss": 0.651, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00022493734335839597, |
|
"loss": 0.6509, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 0.00022076023391812865, |
|
"loss": 0.6448, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8952633127390409, |
|
"eval_loss": 0.3482230305671692, |
|
"eval_runtime": 140.3109, |
|
"eval_samples_per_second": 48.45, |
|
"eval_steps_per_second": 1.518, |
|
"step": 2793 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00021658312447786132, |
|
"loss": 0.6421, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00021240601503759397, |
|
"loss": 0.6418, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.00020822890559732665, |
|
"loss": 0.6453, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.00020405179615705932, |
|
"loss": 0.6288, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8989408649602825, |
|
"eval_loss": 0.34094032645225525, |
|
"eval_runtime": 141.0541, |
|
"eval_samples_per_second": 48.194, |
|
"eval_steps_per_second": 1.51, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.00019987468671679197, |
|
"loss": 0.6336, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.00019569757727652464, |
|
"loss": 0.6441, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 0.00019152046783625732, |
|
"loss": 0.6557, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.00018734335839598997, |
|
"loss": 0.641, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9051191526919682, |
|
"eval_loss": 0.3297290503978729, |
|
"eval_runtime": 140.3281, |
|
"eval_samples_per_second": 48.444, |
|
"eval_steps_per_second": 1.518, |
|
"step": 3591 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 0.00018316624895572264, |
|
"loss": 0.6411, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 0.00017898913951545532, |
|
"loss": 0.6455, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00017481203007518797, |
|
"loss": 0.6464, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 0.00017063492063492064, |
|
"loss": 0.6369, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9042365401588702, |
|
"eval_loss": 0.33252373337745667, |
|
"eval_runtime": 140.3318, |
|
"eval_samples_per_second": 48.442, |
|
"eval_steps_per_second": 1.518, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 0.0001664578111946533, |
|
"loss": 0.6225, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 0.00016228070175438596, |
|
"loss": 0.6295, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 0.0001581453634085213, |
|
"loss": 0.6255, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 0.00015396825396825397, |
|
"loss": 0.6218, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9064430714916152, |
|
"eval_loss": 0.3250460922718048, |
|
"eval_runtime": 140.1683, |
|
"eval_samples_per_second": 48.499, |
|
"eval_steps_per_second": 1.52, |
|
"step": 4389 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 0.00014979114452798661, |
|
"loss": 0.6507, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 0.0001456140350877193, |
|
"loss": 0.623, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"learning_rate": 0.00014143692564745194, |
|
"loss": 0.6278, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 0.0001372598162071846, |
|
"loss": 0.6247, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8958517210944396, |
|
"eval_loss": 0.3311932682991028, |
|
"eval_runtime": 140.5537, |
|
"eval_samples_per_second": 48.366, |
|
"eval_steps_per_second": 1.515, |
|
"step": 4788 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 0.0001330827067669173, |
|
"loss": 0.6149, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 0.00012890559732664993, |
|
"loss": 0.6244, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 0.0001247284878863826, |
|
"loss": 0.6294, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 0.00012055137844611527, |
|
"loss": 0.6284, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9068843777581642, |
|
"eval_loss": 0.321657657623291, |
|
"eval_runtime": 139.9572, |
|
"eval_samples_per_second": 48.572, |
|
"eval_steps_per_second": 1.522, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 0.00011637426900584793, |
|
"loss": 0.6317, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 0.00011219715956558061, |
|
"loss": 0.6397, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 0.00010802005012531327, |
|
"loss": 0.6041, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 0.00010384294068504593, |
|
"loss": 0.6213, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8977640482494852, |
|
"eval_loss": 0.33012473583221436, |
|
"eval_runtime": 139.8104, |
|
"eval_samples_per_second": 48.623, |
|
"eval_steps_per_second": 1.523, |
|
"step": 5586 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 9.96658312447786e-05, |
|
"loss": 0.6206, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 9.548872180451127e-05, |
|
"loss": 0.6379, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 9.131161236424393e-05, |
|
"loss": 0.6269, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 8.713450292397659e-05, |
|
"loss": 0.6274, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9080611944689615, |
|
"eval_loss": 0.31795457005500793, |
|
"eval_runtime": 139.5988, |
|
"eval_samples_per_second": 48.697, |
|
"eval_steps_per_second": 1.526, |
|
"step": 5985 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 8.295739348370926e-05, |
|
"loss": 0.6263, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 7.878028404344193e-05, |
|
"loss": 0.6171, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 7.460317460317459e-05, |
|
"loss": 0.626, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 7.042606516290726e-05, |
|
"loss": 0.627, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9020300088261254, |
|
"eval_loss": 0.32565683126449585, |
|
"eval_runtime": 139.2707, |
|
"eval_samples_per_second": 48.811, |
|
"eval_steps_per_second": 1.529, |
|
"step": 6384 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 6.624895572263992e-05, |
|
"loss": 0.6166, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 16.29, |
|
"learning_rate": 6.207184628237259e-05, |
|
"loss": 0.6146, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 5.7894736842105253e-05, |
|
"loss": 0.6126, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 5.3759398496240595e-05, |
|
"loss": 0.6227, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9055604589585172, |
|
"eval_loss": 0.3193184435367584, |
|
"eval_runtime": 140.199, |
|
"eval_samples_per_second": 48.488, |
|
"eval_steps_per_second": 1.519, |
|
"step": 6783 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"learning_rate": 4.9582289055973256e-05, |
|
"loss": 0.629, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 4.5405179615705925e-05, |
|
"loss": 0.6055, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 4.122807017543859e-05, |
|
"loss": 0.6088, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 3.7050960735171254e-05, |
|
"loss": 0.6192, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9065901735804649, |
|
"eval_loss": 0.31994640827178955, |
|
"eval_runtime": 139.8576, |
|
"eval_samples_per_second": 48.607, |
|
"eval_steps_per_second": 1.523, |
|
"step": 7182 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"learning_rate": 3.287385129490392e-05, |
|
"loss": 0.624, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 18.3, |
|
"learning_rate": 2.8696741854636587e-05, |
|
"loss": 0.6102, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 2.4519632414369252e-05, |
|
"loss": 0.6261, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 2.0342522974101917e-05, |
|
"loss": 0.6075, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9073256840247131, |
|
"eval_loss": 0.3183441460132599, |
|
"eval_runtime": 145.1619, |
|
"eval_samples_per_second": 46.83, |
|
"eval_steps_per_second": 1.467, |
|
"step": 7581 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 1.6165413533834585e-05, |
|
"loss": 0.6066, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 19.3, |
|
"learning_rate": 1.198830409356725e-05, |
|
"loss": 0.6057, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 19.55, |
|
"learning_rate": 7.811194653299915e-06, |
|
"loss": 0.6276, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 19.8, |
|
"learning_rate": 3.634085213032581e-06, |
|
"loss": 0.6196, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9087967049132097, |
|
"eval_loss": 0.3166075348854065, |
|
"eval_runtime": 146.0633, |
|
"eval_samples_per_second": 46.541, |
|
"eval_steps_per_second": 1.458, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 7980, |
|
"total_flos": 0.0, |
|
"train_loss": 0.8469889265552798, |
|
"train_runtime": 24285.4703, |
|
"train_samples_per_second": 42.078, |
|
"train_steps_per_second": 0.329 |
|
} |
|
], |
|
"max_steps": 7980, |
|
"num_train_epochs": 20, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|