|
{ |
|
"best_metric": 0.9149749926448956, |
|
"best_model_checkpoint": "trillsson3-ft-keyword-spotting-14/checkpoint-23955", |
|
"epoch": 20.0, |
|
"global_step": 31940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.73512836568566e-06, |
|
"loss": 8.0793, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.812773951158422e-05, |
|
"loss": 7.1754, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.7520350657482777e-05, |
|
"loss": 5.6723, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.691296180338134e-05, |
|
"loss": 4.1468, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.6305572949279896e-05, |
|
"loss": 3.3576, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5.5698184095178454e-05, |
|
"loss": 2.955, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.509079524107701e-05, |
|
"loss": 2.7689, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.448340638697557e-05, |
|
"loss": 2.5858, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.387601753287413e-05, |
|
"loss": 2.3275, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.326862867877268e-05, |
|
"loss": 2.0771, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00010266123982467126, |
|
"loss": 1.8958, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001120538509705698, |
|
"loss": 1.6778, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00012144646211646837, |
|
"loss": 1.5346, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00013083907326236693, |
|
"loss": 1.4238, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001402316844082655, |
|
"loss": 1.2824, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6891732862606649, |
|
"eval_loss": 0.7817752957344055, |
|
"eval_runtime": 152.2428, |
|
"eval_samples_per_second": 44.652, |
|
"eval_steps_per_second": 0.703, |
|
"step": 1597 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00014962429555416404, |
|
"loss": 1.2121, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001590169067000626, |
|
"loss": 1.1391, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00016840951784596116, |
|
"loss": 1.12, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00017780212899185972, |
|
"loss": 1.053, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0001871947401377583, |
|
"loss": 1.0245, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00019658735128365683, |
|
"loss": 0.9887, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002059799624295554, |
|
"loss": 0.9671, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00021537257357545395, |
|
"loss": 0.8912, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00022476518472135253, |
|
"loss": 0.9015, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002341577958672511, |
|
"loss": 0.89, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00024355040701314962, |
|
"loss": 0.8173, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0002529430181590482, |
|
"loss": 0.8338, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00026233562930494674, |
|
"loss": 0.8313, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002717282404508453, |
|
"loss": 0.8395, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00028112085159674385, |
|
"loss": 0.8036, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00029051346274264244, |
|
"loss": 0.8003, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8734922035892909, |
|
"eval_loss": 0.44425612688064575, |
|
"eval_runtime": 147.8186, |
|
"eval_samples_per_second": 45.989, |
|
"eval_steps_per_second": 0.724, |
|
"step": 3194 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00029990607388854097, |
|
"loss": 0.7951, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0002989668127739511, |
|
"loss": 0.8114, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00029792318931329574, |
|
"loss": 0.8343, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00029687956585264035, |
|
"loss": 0.7617, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00029583594239198497, |
|
"loss": 0.7779, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0002947923189313295, |
|
"loss": 0.7512, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00029374869547067414, |
|
"loss": 0.7962, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00029270507201001875, |
|
"loss": 0.7134, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00029166144854936337, |
|
"loss": 0.786, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.000290617825088708, |
|
"loss": 0.7808, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0002895742016280526, |
|
"loss": 0.7379, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00028853057816739715, |
|
"loss": 0.7545, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00028748695470674177, |
|
"loss": 0.7557, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.0002864433312460864, |
|
"loss": 0.7633, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.000285399707785431, |
|
"loss": 0.7414, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0002843560843247756, |
|
"loss": 0.7232, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8833480435422183, |
|
"eval_loss": 0.372787207365036, |
|
"eval_runtime": 148.4355, |
|
"eval_samples_per_second": 45.798, |
|
"eval_steps_per_second": 0.721, |
|
"step": 4791 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.00028331246086412017, |
|
"loss": 0.7661, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 0.00028226883740346484, |
|
"loss": 0.7224, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 0.0002812252139428094, |
|
"loss": 0.7186, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.0002801920267167606, |
|
"loss": 0.7151, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00027914840325610516, |
|
"loss": 0.7175, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.0002781047797954498, |
|
"loss": 0.6871, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.0002770611563347944, |
|
"loss": 0.7446, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.000276017532874139, |
|
"loss": 0.7128, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.0002749739094134836, |
|
"loss": 0.6896, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0002739302859528282, |
|
"loss": 0.714, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0002728866624921728, |
|
"loss": 0.7478, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0002718430390315174, |
|
"loss": 0.6853, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 0.000270799415570862, |
|
"loss": 0.7165, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0002697557921102066, |
|
"loss": 0.704, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00026871216864955125, |
|
"loss": 0.6954, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.0002676685451888958, |
|
"loss": 0.73, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8973227419829362, |
|
"eval_loss": 0.346542090177536, |
|
"eval_runtime": 147.4068, |
|
"eval_samples_per_second": 46.117, |
|
"eval_steps_per_second": 0.726, |
|
"step": 6388 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.0002666249217282404, |
|
"loss": 0.7149, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00026558129826758503, |
|
"loss": 0.7107, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.00026453767480692965, |
|
"loss": 0.6744, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0002634940513462742, |
|
"loss": 0.727, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.0002624504278856189, |
|
"loss": 0.7, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.00026140680442496343, |
|
"loss": 0.7239, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.00026036318096430805, |
|
"loss": 0.6847, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.00025931955750365266, |
|
"loss": 0.7151, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.0002582759340429973, |
|
"loss": 0.7228, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0002572427468169484, |
|
"loss": 0.6925, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.000256199123356293, |
|
"loss": 0.7064, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.00025515549989563765, |
|
"loss": 0.6861, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.0002541118764349822, |
|
"loss": 0.7057, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.0002530682529743269, |
|
"loss": 0.6811, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.00025202462951367144, |
|
"loss": 0.6676, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00025098100605301605, |
|
"loss": 0.7015, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.910856134157105, |
|
"eval_loss": 0.3211327791213989, |
|
"eval_runtime": 150.4771, |
|
"eval_samples_per_second": 45.176, |
|
"eval_steps_per_second": 0.711, |
|
"step": 7985 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00024993738259236067, |
|
"loss": 0.708, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 0.0002488937591317053, |
|
"loss": 0.7372, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.00024785013567104984, |
|
"loss": 0.7089, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.00024680651221039445, |
|
"loss": 0.6962, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00024576288874973907, |
|
"loss": 0.7209, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.0002447192652890837, |
|
"loss": 0.7091, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.0002436756418284283, |
|
"loss": 0.6898, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.00024263201836777288, |
|
"loss": 0.726, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.00024158839490711747, |
|
"loss": 0.682, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 0.0002405447714464621, |
|
"loss": 0.6855, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 0.0002395011479858067, |
|
"loss": 0.7108, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00023845752452515128, |
|
"loss": 0.6945, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 0.00023741390106449592, |
|
"loss": 0.6621, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.0002363702776038405, |
|
"loss": 0.7176, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00023532665414318512, |
|
"loss": 0.6824, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.00023428303068252974, |
|
"loss": 0.6981, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9080611944689615, |
|
"eval_loss": 0.3200249671936035, |
|
"eval_runtime": 148.3702, |
|
"eval_samples_per_second": 45.818, |
|
"eval_steps_per_second": 0.721, |
|
"step": 9582 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.00023323940722187433, |
|
"loss": 0.6934, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00023219578376121894, |
|
"loss": 0.7101, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 0.00023115216030056353, |
|
"loss": 0.6922, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.00023010853683990814, |
|
"loss": 0.7027, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.00022906491337925275, |
|
"loss": 0.6931, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 0.00022802128991859734, |
|
"loss": 0.6782, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00022697766645794195, |
|
"loss": 0.6842, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00022593404299728657, |
|
"loss": 0.7295, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.00022489041953663115, |
|
"loss": 0.7145, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00022384679607597577, |
|
"loss": 0.6637, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.00022280317261532038, |
|
"loss": 0.6666, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.00022175954915466497, |
|
"loss": 0.7068, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 0.00022071592569400955, |
|
"loss": 0.6759, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.0002196723022333542, |
|
"loss": 0.6926, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 0.00021862867877269878, |
|
"loss": 0.6825, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.00021758505531204342, |
|
"loss": 0.6807, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9058546631362165, |
|
"eval_loss": 0.3208906650543213, |
|
"eval_runtime": 149.169, |
|
"eval_samples_per_second": 45.572, |
|
"eval_steps_per_second": 0.717, |
|
"step": 11179 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 0.000216541431851388, |
|
"loss": 0.677, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00021550824462533916, |
|
"loss": 0.7379, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00021447505739929034, |
|
"loss": 0.7156, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00021343143393863493, |
|
"loss": 0.6406, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0002123878104779795, |
|
"loss": 0.6888, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00021134418701732413, |
|
"loss": 0.6968, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 0.00021030056355666874, |
|
"loss": 0.6989, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.00020925694009601333, |
|
"loss": 0.6751, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 0.00020821331663535794, |
|
"loss": 0.6879, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.00020716969317470255, |
|
"loss": 0.678, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.00020612606971404714, |
|
"loss": 0.6501, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.00020508244625339176, |
|
"loss": 0.6679, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00020403882279273637, |
|
"loss": 0.7116, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 0.00020299519933208098, |
|
"loss": 0.6899, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 0.00020195157587142557, |
|
"loss": 0.6892, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.00020090795241077016, |
|
"loss": 0.6873, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.902177110914975, |
|
"eval_loss": 0.3205910921096802, |
|
"eval_runtime": 147.3348, |
|
"eval_samples_per_second": 46.14, |
|
"eval_steps_per_second": 0.726, |
|
"step": 12776 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 0.0001998643289501148, |
|
"loss": 0.6847, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 0.00019882070548945938, |
|
"loss": 0.6833, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.00019777708202880397, |
|
"loss": 0.6737, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.0001967334585681486, |
|
"loss": 0.6568, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 0.0001956898351074932, |
|
"loss": 0.7059, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.00019464621164683778, |
|
"loss": 0.6831, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 0.00019360258818618243, |
|
"loss": 0.7174, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.00019256940096013358, |
|
"loss": 0.6966, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 0.00019152577749947816, |
|
"loss": 0.699, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 0.00019048215403882278, |
|
"loss": 0.6774, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.0001894385305781674, |
|
"loss": 0.6848, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.00018839490711751198, |
|
"loss": 0.6901, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 0.00018735128365685656, |
|
"loss": 0.6994, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 0.0001863076601962012, |
|
"loss": 0.6739, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 0.0001852640367355458, |
|
"loss": 0.6854, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.00018422041327489038, |
|
"loss": 0.6416, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9057075610473668, |
|
"eval_loss": 0.31237688660621643, |
|
"eval_runtime": 148.9632, |
|
"eval_samples_per_second": 45.635, |
|
"eval_steps_per_second": 0.718, |
|
"step": 14373 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 0.00018317678981423502, |
|
"loss": 0.6711, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 0.0001821331663535796, |
|
"loss": 0.6965, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.00018108954289292422, |
|
"loss": 0.6896, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.00018004591943226883, |
|
"loss": 0.6944, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 0.00017900229597161342, |
|
"loss": 0.6981, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.00017795867251095803, |
|
"loss": 0.6819, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 0.00017691504905030265, |
|
"loss": 0.6869, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 0.00017587142558964723, |
|
"loss": 0.6974, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 0.00017482780212899185, |
|
"loss": 0.6621, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.00017378417866833646, |
|
"loss": 0.6732, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 0.00017274055520768105, |
|
"loss": 0.678, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 0.00017169693174702566, |
|
"loss": 0.6494, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 0.00017065330828637025, |
|
"loss": 0.6909, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.00016960968482571486, |
|
"loss": 0.687, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 0.00016856606136505948, |
|
"loss": 0.6705, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 0.00016752243790440406, |
|
"loss": 0.6698, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8949691085613416, |
|
"eval_loss": 0.3288457989692688, |
|
"eval_runtime": 150.3856, |
|
"eval_samples_per_second": 45.204, |
|
"eval_steps_per_second": 0.712, |
|
"step": 15970 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 0.00016648925067835524, |
|
"loss": 0.6701, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.00016544562721769983, |
|
"loss": 0.6771, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 0.00016440200375704444, |
|
"loss": 0.6877, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.00016335838029638906, |
|
"loss": 0.6495, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.00016231475683573364, |
|
"loss": 0.6925, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 0.00016127113337507826, |
|
"loss": 0.647, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 0.00016022750991442287, |
|
"loss": 0.658, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 0.00015918388645376746, |
|
"loss": 0.7033, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 0.00015814026299311207, |
|
"loss": 0.6675, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 0.00015709663953245666, |
|
"loss": 0.6905, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 0.0001560530160718013, |
|
"loss": 0.6766, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 0.00015500939261114588, |
|
"loss": 0.684, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 10.77, |
|
"learning_rate": 0.00015396576915049047, |
|
"loss": 0.6382, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.0001529221456898351, |
|
"loss": 0.6737, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 0.0001518785222291797, |
|
"loss": 0.691, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 10.96, |
|
"learning_rate": 0.00015083489876852429, |
|
"loss": 0.716, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8998234774933804, |
|
"eval_loss": 0.31469690799713135, |
|
"eval_runtime": 150.5759, |
|
"eval_samples_per_second": 45.147, |
|
"eval_steps_per_second": 0.711, |
|
"step": 17567 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 0.0001497912753078689, |
|
"loss": 0.7326, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 0.0001487476518472135, |
|
"loss": 0.6747, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 0.00014770402838655813, |
|
"loss": 0.7075, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 0.0001466604049259027, |
|
"loss": 0.69, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 0.00014561678146524733, |
|
"loss": 0.6793, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 0.00014457315800459194, |
|
"loss": 0.6782, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 0.00014352953454393655, |
|
"loss": 0.6532, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 0.00014249634731788768, |
|
"loss": 0.7053, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 0.0001414527238572323, |
|
"loss": 0.6476, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 0.0001404091003965769, |
|
"loss": 0.6308, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 0.0001393654769359215, |
|
"loss": 0.6886, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 0.0001383218534752661, |
|
"loss": 0.6631, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 0.00013727823001461072, |
|
"loss": 0.7056, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 0.00013623460655395533, |
|
"loss": 0.6602, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 0.00013519098309329992, |
|
"loss": 0.6728, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 0.00013414735963264453, |
|
"loss": 0.6514, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9111503383348043, |
|
"eval_loss": 0.3034283220767975, |
|
"eval_runtime": 145.7542, |
|
"eval_samples_per_second": 46.64, |
|
"eval_steps_per_second": 0.734, |
|
"step": 19164 |
|
}, |
|
{ |
|
"epoch": 12.02, |
|
"learning_rate": 0.00013310373617198915, |
|
"loss": 0.6567, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 0.00013206011271133373, |
|
"loss": 0.6882, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 0.00013101648925067835, |
|
"loss": 0.6511, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 0.00012997286579002296, |
|
"loss": 0.6705, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 0.00012892924232936755, |
|
"loss": 0.6693, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 0.00012788561886871216, |
|
"loss": 0.68, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 0.00012684199540805675, |
|
"loss": 0.6767, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 0.00012579837194740136, |
|
"loss": 0.6768, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 0.00012475474848674598, |
|
"loss": 0.6662, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 0.00012371112502609056, |
|
"loss": 0.6511, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 0.00012266750156543518, |
|
"loss": 0.7057, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.00012163431433938634, |
|
"loss": 0.6699, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 0.00012059069087873094, |
|
"loss": 0.6541, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"learning_rate": 0.00011954706741807554, |
|
"loss": 0.6741, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.00011850344395742016, |
|
"loss": 0.658, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 0.00011745982049676476, |
|
"loss": 0.6513, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9092380111797588, |
|
"eval_loss": 0.30905914306640625, |
|
"eval_runtime": 146.3169, |
|
"eval_samples_per_second": 46.461, |
|
"eval_steps_per_second": 0.731, |
|
"step": 20761 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 0.00011641619703610936, |
|
"loss": 0.6568, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 0.00011537257357545397, |
|
"loss": 0.6853, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 0.00011432895011479858, |
|
"loss": 0.6699, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 0.00011328532665414317, |
|
"loss": 0.6494, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 0.00011224170319348778, |
|
"loss": 0.7118, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 13.34, |
|
"learning_rate": 0.00011119807973283239, |
|
"loss": 0.6649, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 0.00011015445627217699, |
|
"loss": 0.6646, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 0.0001091108328115216, |
|
"loss": 0.6436, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 0.0001080672093508662, |
|
"loss": 0.6258, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 0.0001070235858902108, |
|
"loss": 0.6754, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 0.0001059799624295554, |
|
"loss": 0.6737, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.00010493633896890001, |
|
"loss": 0.6511, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 0.00010389271550824463, |
|
"loss": 0.6472, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 13.84, |
|
"learning_rate": 0.00010284909204758921, |
|
"loss": 0.6571, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 0.00010180546858693383, |
|
"loss": 0.693, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 13.96, |
|
"learning_rate": 0.00010076184512627843, |
|
"loss": 0.652, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.909973521624007, |
|
"eval_loss": 0.30560359358787537, |
|
"eval_runtime": 146.3619, |
|
"eval_samples_per_second": 46.447, |
|
"eval_steps_per_second": 0.731, |
|
"step": 22358 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 9.971822166562303e-05, |
|
"loss": 0.6286, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 14.09, |
|
"learning_rate": 9.867459820496764e-05, |
|
"loss": 0.6503, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 9.763097474431224e-05, |
|
"loss": 0.6514, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 9.659778751826341e-05, |
|
"loss": 0.6728, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 9.555416405760801e-05, |
|
"loss": 0.6621, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 9.451054059695261e-05, |
|
"loss": 0.6771, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 9.346691713629722e-05, |
|
"loss": 0.6689, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 9.242329367564181e-05, |
|
"loss": 0.6712, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 14.53, |
|
"learning_rate": 9.137967021498642e-05, |
|
"loss": 0.6761, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 9.033604675433104e-05, |
|
"loss": 0.6327, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 8.929242329367564e-05, |
|
"loss": 0.6671, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 8.824879983302024e-05, |
|
"loss": 0.6598, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 8.720517637236485e-05, |
|
"loss": 0.6317, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 8.616155291170945e-05, |
|
"loss": 0.6615, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 8.511792945105405e-05, |
|
"loss": 0.6087, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 8.407430599039865e-05, |
|
"loss": 0.7105, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9149749926448956, |
|
"eval_loss": 0.30149412155151367, |
|
"eval_runtime": 145.6909, |
|
"eval_samples_per_second": 46.66, |
|
"eval_steps_per_second": 0.734, |
|
"step": 23955 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 8.303068252974326e-05, |
|
"loss": 0.6911, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 8.198705906908788e-05, |
|
"loss": 0.6717, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 8.094343560843246e-05, |
|
"loss": 0.6564, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 7.989981214777708e-05, |
|
"loss": 0.6446, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 15.28, |
|
"learning_rate": 7.885618868712169e-05, |
|
"loss": 0.6431, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 15.34, |
|
"learning_rate": 7.781256522646628e-05, |
|
"loss": 0.6762, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 7.676894176581089e-05, |
|
"loss": 0.6656, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 7.572531830515549e-05, |
|
"loss": 0.6337, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 7.468169484450009e-05, |
|
"loss": 0.6541, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 7.363807138384469e-05, |
|
"loss": 0.6772, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 7.260488415779586e-05, |
|
"loss": 0.629, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 7.156126069714046e-05, |
|
"loss": 0.6998, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 7.051763723648507e-05, |
|
"loss": 0.6686, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 6.947401377582967e-05, |
|
"loss": 0.6822, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 6.843039031517429e-05, |
|
"loss": 0.6143, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 6.738676685451889e-05, |
|
"loss": 0.6337, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9090909090909091, |
|
"eval_loss": 0.30700910091400146, |
|
"eval_runtime": 146.384, |
|
"eval_samples_per_second": 46.44, |
|
"eval_steps_per_second": 0.731, |
|
"step": 25552 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 6.634314339386349e-05, |
|
"loss": 0.6502, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 6.52995199332081e-05, |
|
"loss": 0.6448, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"learning_rate": 6.42558964725527e-05, |
|
"loss": 0.6272, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 6.32122730118973e-05, |
|
"loss": 0.6316, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 6.21686495512419e-05, |
|
"loss": 0.6442, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 6.11250260905865e-05, |
|
"loss": 0.6489, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 6.0081402629931114e-05, |
|
"loss": 0.6603, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 5.9037779169275714e-05, |
|
"loss": 0.6449, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 5.799415570862033e-05, |
|
"loss": 0.661, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 16.59, |
|
"learning_rate": 5.695053224796493e-05, |
|
"loss": 0.642, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 16.66, |
|
"learning_rate": 5.5906908787309535e-05, |
|
"loss": 0.6498, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 5.4863285326654135e-05, |
|
"loss": 0.6902, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 5.381966186599874e-05, |
|
"loss": 0.6336, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 5.277603840534335e-05, |
|
"loss": 0.6393, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 5.173241494468795e-05, |
|
"loss": 0.6496, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 5.0688791484032557e-05, |
|
"loss": 0.63, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.913503971756399, |
|
"eval_loss": 0.30175167322158813, |
|
"eval_runtime": 149.8031, |
|
"eval_samples_per_second": 45.38, |
|
"eval_steps_per_second": 0.714, |
|
"step": 27149 |
|
}, |
|
{ |
|
"epoch": 17.03, |
|
"learning_rate": 4.9645168023377163e-05, |
|
"loss": 0.6195, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 4.861198079732832e-05, |
|
"loss": 0.6299, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 4.756835733667292e-05, |
|
"loss": 0.6939, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 4.652473387601752e-05, |
|
"loss": 0.6416, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 4.5481110415362136e-05, |
|
"loss": 0.6626, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 4.4437486954706736e-05, |
|
"loss": 0.6444, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 4.339386349405134e-05, |
|
"loss": 0.6238, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 17.47, |
|
"learning_rate": 4.2350240033395944e-05, |
|
"loss": 0.6505, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 17.53, |
|
"learning_rate": 4.130661657274056e-05, |
|
"loss": 0.6346, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 4.026299311208516e-05, |
|
"loss": 0.6319, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"learning_rate": 3.921936965142976e-05, |
|
"loss": 0.6811, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 3.8175746190774365e-05, |
|
"loss": 0.6645, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 17.78, |
|
"learning_rate": 3.713212273011897e-05, |
|
"loss": 0.6512, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 3.608849926946357e-05, |
|
"loss": 0.6578, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 3.504487580880818e-05, |
|
"loss": 0.6358, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"learning_rate": 3.4001252348152786e-05, |
|
"loss": 0.6672, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9087967049132097, |
|
"eval_loss": 0.30836355686187744, |
|
"eval_runtime": 148.2197, |
|
"eval_samples_per_second": 45.864, |
|
"eval_steps_per_second": 0.722, |
|
"step": 28746 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 3.295762888749739e-05, |
|
"loss": 0.65, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 3.191400542684199e-05, |
|
"loss": 0.6293, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 3.087038196618659e-05, |
|
"loss": 0.6059, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 2.98267585055312e-05, |
|
"loss": 0.5975, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"learning_rate": 2.8783135044875807e-05, |
|
"loss": 0.6759, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 18.35, |
|
"learning_rate": 2.773951158422041e-05, |
|
"loss": 0.6457, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 2.6695888123565017e-05, |
|
"loss": 0.6715, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 18.47, |
|
"learning_rate": 2.565226466290962e-05, |
|
"loss": 0.6642, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 2.460864120225422e-05, |
|
"loss": 0.6432, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 18.6, |
|
"learning_rate": 2.3565017741598828e-05, |
|
"loss": 0.6441, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 2.2521394280943432e-05, |
|
"loss": 0.6843, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 18.72, |
|
"learning_rate": 2.147777082028804e-05, |
|
"loss": 0.6459, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 2.0434147359632642e-05, |
|
"loss": 0.6233, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"learning_rate": 1.939052389897725e-05, |
|
"loss": 0.6634, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 1.8346900438321853e-05, |
|
"loss": 0.6701, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"learning_rate": 1.7303276977666456e-05, |
|
"loss": 0.6479, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9101206237128567, |
|
"eval_loss": 0.3060016632080078, |
|
"eval_runtime": 149.555, |
|
"eval_samples_per_second": 45.455, |
|
"eval_steps_per_second": 0.715, |
|
"step": 30343 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 1.625965351701106e-05, |
|
"loss": 0.6155, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 1.5216030056355665e-05, |
|
"loss": 0.6079, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"learning_rate": 1.417240659570027e-05, |
|
"loss": 0.6709, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 1.3128783135044874e-05, |
|
"loss": 0.6604, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 19.29, |
|
"learning_rate": 1.208515967438948e-05, |
|
"loss": 0.6222, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 1.1041536213734085e-05, |
|
"loss": 0.6428, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 9.997912753078688e-06, |
|
"loss": 0.6664, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 8.954289292423293e-06, |
|
"loss": 0.6489, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 7.910665831767897e-06, |
|
"loss": 0.6414, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 6.877478605719056e-06, |
|
"loss": 0.6599, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 19.66, |
|
"learning_rate": 5.83385514506366e-06, |
|
"loss": 0.6433, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"learning_rate": 4.790231684408265e-06, |
|
"loss": 0.6325, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 3.7466082237528697e-06, |
|
"loss": 0.6363, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"learning_rate": 2.7029847630974745e-06, |
|
"loss": 0.6408, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 1.6593613024420787e-06, |
|
"loss": 0.645, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 19.97, |
|
"learning_rate": 6.157378417866834e-07, |
|
"loss": 0.6658, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9089438070020595, |
|
"eval_loss": 0.3071773946285248, |
|
"eval_runtime": 150.0563, |
|
"eval_samples_per_second": 45.303, |
|
"eval_steps_per_second": 0.713, |
|
"step": 31940 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 31940, |
|
"total_flos": 0.0, |
|
"train_loss": 0.8136612295581911, |
|
"train_runtime": 27726.7705, |
|
"train_samples_per_second": 36.855, |
|
"train_steps_per_second": 1.152 |
|
} |
|
], |
|
"max_steps": 31940, |
|
"num_train_epochs": 20, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|