trillsson3-ft-keyword-spotting-14 / trainer_state.json
vumichien's picture
End of training
ad0dc1d
{
"best_metric": 0.9149749926448956,
"best_model_checkpoint": "trillsson3-ft-keyword-spotting-14/checkpoint-23955",
"epoch": 20.0,
"global_step": 31940,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 8.73512836568566e-06,
"loss": 8.0793,
"step": 100
},
{
"epoch": 0.13,
"learning_rate": 1.812773951158422e-05,
"loss": 7.1754,
"step": 200
},
{
"epoch": 0.19,
"learning_rate": 2.7520350657482777e-05,
"loss": 5.6723,
"step": 300
},
{
"epoch": 0.25,
"learning_rate": 3.691296180338134e-05,
"loss": 4.1468,
"step": 400
},
{
"epoch": 0.31,
"learning_rate": 4.6305572949279896e-05,
"loss": 3.3576,
"step": 500
},
{
"epoch": 0.38,
"learning_rate": 5.5698184095178454e-05,
"loss": 2.955,
"step": 600
},
{
"epoch": 0.44,
"learning_rate": 6.509079524107701e-05,
"loss": 2.7689,
"step": 700
},
{
"epoch": 0.5,
"learning_rate": 7.448340638697557e-05,
"loss": 2.5858,
"step": 800
},
{
"epoch": 0.56,
"learning_rate": 8.387601753287413e-05,
"loss": 2.3275,
"step": 900
},
{
"epoch": 0.63,
"learning_rate": 9.326862867877268e-05,
"loss": 2.0771,
"step": 1000
},
{
"epoch": 0.69,
"learning_rate": 0.00010266123982467126,
"loss": 1.8958,
"step": 1100
},
{
"epoch": 0.75,
"learning_rate": 0.0001120538509705698,
"loss": 1.6778,
"step": 1200
},
{
"epoch": 0.81,
"learning_rate": 0.00012144646211646837,
"loss": 1.5346,
"step": 1300
},
{
"epoch": 0.88,
"learning_rate": 0.00013083907326236693,
"loss": 1.4238,
"step": 1400
},
{
"epoch": 0.94,
"learning_rate": 0.0001402316844082655,
"loss": 1.2824,
"step": 1500
},
{
"epoch": 1.0,
"eval_accuracy": 0.6891732862606649,
"eval_loss": 0.7817752957344055,
"eval_runtime": 152.2428,
"eval_samples_per_second": 44.652,
"eval_steps_per_second": 0.703,
"step": 1597
},
{
"epoch": 1.0,
"learning_rate": 0.00014962429555416404,
"loss": 1.2121,
"step": 1600
},
{
"epoch": 1.06,
"learning_rate": 0.0001590169067000626,
"loss": 1.1391,
"step": 1700
},
{
"epoch": 1.13,
"learning_rate": 0.00016840951784596116,
"loss": 1.12,
"step": 1800
},
{
"epoch": 1.19,
"learning_rate": 0.00017780212899185972,
"loss": 1.053,
"step": 1900
},
{
"epoch": 1.25,
"learning_rate": 0.0001871947401377583,
"loss": 1.0245,
"step": 2000
},
{
"epoch": 1.31,
"learning_rate": 0.00019658735128365683,
"loss": 0.9887,
"step": 2100
},
{
"epoch": 1.38,
"learning_rate": 0.0002059799624295554,
"loss": 0.9671,
"step": 2200
},
{
"epoch": 1.44,
"learning_rate": 0.00021537257357545395,
"loss": 0.8912,
"step": 2300
},
{
"epoch": 1.5,
"learning_rate": 0.00022476518472135253,
"loss": 0.9015,
"step": 2400
},
{
"epoch": 1.57,
"learning_rate": 0.0002341577958672511,
"loss": 0.89,
"step": 2500
},
{
"epoch": 1.63,
"learning_rate": 0.00024355040701314962,
"loss": 0.8173,
"step": 2600
},
{
"epoch": 1.69,
"learning_rate": 0.0002529430181590482,
"loss": 0.8338,
"step": 2700
},
{
"epoch": 1.75,
"learning_rate": 0.00026233562930494674,
"loss": 0.8313,
"step": 2800
},
{
"epoch": 1.82,
"learning_rate": 0.0002717282404508453,
"loss": 0.8395,
"step": 2900
},
{
"epoch": 1.88,
"learning_rate": 0.00028112085159674385,
"loss": 0.8036,
"step": 3000
},
{
"epoch": 1.94,
"learning_rate": 0.00029051346274264244,
"loss": 0.8003,
"step": 3100
},
{
"epoch": 2.0,
"eval_accuracy": 0.8734922035892909,
"eval_loss": 0.44425612688064575,
"eval_runtime": 147.8186,
"eval_samples_per_second": 45.989,
"eval_steps_per_second": 0.724,
"step": 3194
},
{
"epoch": 2.0,
"learning_rate": 0.00029990607388854097,
"loss": 0.7951,
"step": 3200
},
{
"epoch": 2.07,
"learning_rate": 0.0002989668127739511,
"loss": 0.8114,
"step": 3300
},
{
"epoch": 2.13,
"learning_rate": 0.00029792318931329574,
"loss": 0.8343,
"step": 3400
},
{
"epoch": 2.19,
"learning_rate": 0.00029687956585264035,
"loss": 0.7617,
"step": 3500
},
{
"epoch": 2.25,
"learning_rate": 0.00029583594239198497,
"loss": 0.7779,
"step": 3600
},
{
"epoch": 2.32,
"learning_rate": 0.0002947923189313295,
"loss": 0.7512,
"step": 3700
},
{
"epoch": 2.38,
"learning_rate": 0.00029374869547067414,
"loss": 0.7962,
"step": 3800
},
{
"epoch": 2.44,
"learning_rate": 0.00029270507201001875,
"loss": 0.7134,
"step": 3900
},
{
"epoch": 2.5,
"learning_rate": 0.00029166144854936337,
"loss": 0.786,
"step": 4000
},
{
"epoch": 2.57,
"learning_rate": 0.000290617825088708,
"loss": 0.7808,
"step": 4100
},
{
"epoch": 2.63,
"learning_rate": 0.0002895742016280526,
"loss": 0.7379,
"step": 4200
},
{
"epoch": 2.69,
"learning_rate": 0.00028853057816739715,
"loss": 0.7545,
"step": 4300
},
{
"epoch": 2.76,
"learning_rate": 0.00028748695470674177,
"loss": 0.7557,
"step": 4400
},
{
"epoch": 2.82,
"learning_rate": 0.0002864433312460864,
"loss": 0.7633,
"step": 4500
},
{
"epoch": 2.88,
"learning_rate": 0.000285399707785431,
"loss": 0.7414,
"step": 4600
},
{
"epoch": 2.94,
"learning_rate": 0.0002843560843247756,
"loss": 0.7232,
"step": 4700
},
{
"epoch": 3.0,
"eval_accuracy": 0.8833480435422183,
"eval_loss": 0.372787207365036,
"eval_runtime": 148.4355,
"eval_samples_per_second": 45.798,
"eval_steps_per_second": 0.721,
"step": 4791
},
{
"epoch": 3.01,
"learning_rate": 0.00028331246086412017,
"loss": 0.7661,
"step": 4800
},
{
"epoch": 3.07,
"learning_rate": 0.00028226883740346484,
"loss": 0.7224,
"step": 4900
},
{
"epoch": 3.13,
"learning_rate": 0.0002812252139428094,
"loss": 0.7186,
"step": 5000
},
{
"epoch": 3.19,
"learning_rate": 0.0002801920267167606,
"loss": 0.7151,
"step": 5100
},
{
"epoch": 3.26,
"learning_rate": 0.00027914840325610516,
"loss": 0.7175,
"step": 5200
},
{
"epoch": 3.32,
"learning_rate": 0.0002781047797954498,
"loss": 0.6871,
"step": 5300
},
{
"epoch": 3.38,
"learning_rate": 0.0002770611563347944,
"loss": 0.7446,
"step": 5400
},
{
"epoch": 3.44,
"learning_rate": 0.000276017532874139,
"loss": 0.7128,
"step": 5500
},
{
"epoch": 3.51,
"learning_rate": 0.0002749739094134836,
"loss": 0.6896,
"step": 5600
},
{
"epoch": 3.57,
"learning_rate": 0.0002739302859528282,
"loss": 0.714,
"step": 5700
},
{
"epoch": 3.63,
"learning_rate": 0.0002728866624921728,
"loss": 0.7478,
"step": 5800
},
{
"epoch": 3.69,
"learning_rate": 0.0002718430390315174,
"loss": 0.6853,
"step": 5900
},
{
"epoch": 3.76,
"learning_rate": 0.000270799415570862,
"loss": 0.7165,
"step": 6000
},
{
"epoch": 3.82,
"learning_rate": 0.0002697557921102066,
"loss": 0.704,
"step": 6100
},
{
"epoch": 3.88,
"learning_rate": 0.00026871216864955125,
"loss": 0.6954,
"step": 6200
},
{
"epoch": 3.94,
"learning_rate": 0.0002676685451888958,
"loss": 0.73,
"step": 6300
},
{
"epoch": 4.0,
"eval_accuracy": 0.8973227419829362,
"eval_loss": 0.346542090177536,
"eval_runtime": 147.4068,
"eval_samples_per_second": 46.117,
"eval_steps_per_second": 0.726,
"step": 6388
},
{
"epoch": 4.01,
"learning_rate": 0.0002666249217282404,
"loss": 0.7149,
"step": 6400
},
{
"epoch": 4.07,
"learning_rate": 0.00026558129826758503,
"loss": 0.7107,
"step": 6500
},
{
"epoch": 4.13,
"learning_rate": 0.00026453767480692965,
"loss": 0.6744,
"step": 6600
},
{
"epoch": 4.2,
"learning_rate": 0.0002634940513462742,
"loss": 0.727,
"step": 6700
},
{
"epoch": 4.26,
"learning_rate": 0.0002624504278856189,
"loss": 0.7,
"step": 6800
},
{
"epoch": 4.32,
"learning_rate": 0.00026140680442496343,
"loss": 0.7239,
"step": 6900
},
{
"epoch": 4.38,
"learning_rate": 0.00026036318096430805,
"loss": 0.6847,
"step": 7000
},
{
"epoch": 4.45,
"learning_rate": 0.00025931955750365266,
"loss": 0.7151,
"step": 7100
},
{
"epoch": 4.51,
"learning_rate": 0.0002582759340429973,
"loss": 0.7228,
"step": 7200
},
{
"epoch": 4.57,
"learning_rate": 0.0002572427468169484,
"loss": 0.6925,
"step": 7300
},
{
"epoch": 4.63,
"learning_rate": 0.000256199123356293,
"loss": 0.7064,
"step": 7400
},
{
"epoch": 4.7,
"learning_rate": 0.00025515549989563765,
"loss": 0.6861,
"step": 7500
},
{
"epoch": 4.76,
"learning_rate": 0.0002541118764349822,
"loss": 0.7057,
"step": 7600
},
{
"epoch": 4.82,
"learning_rate": 0.0002530682529743269,
"loss": 0.6811,
"step": 7700
},
{
"epoch": 4.88,
"learning_rate": 0.00025202462951367144,
"loss": 0.6676,
"step": 7800
},
{
"epoch": 4.95,
"learning_rate": 0.00025098100605301605,
"loss": 0.7015,
"step": 7900
},
{
"epoch": 5.0,
"eval_accuracy": 0.910856134157105,
"eval_loss": 0.3211327791213989,
"eval_runtime": 150.4771,
"eval_samples_per_second": 45.176,
"eval_steps_per_second": 0.711,
"step": 7985
},
{
"epoch": 5.01,
"learning_rate": 0.00024993738259236067,
"loss": 0.708,
"step": 8000
},
{
"epoch": 5.07,
"learning_rate": 0.0002488937591317053,
"loss": 0.7372,
"step": 8100
},
{
"epoch": 5.13,
"learning_rate": 0.00024785013567104984,
"loss": 0.7089,
"step": 8200
},
{
"epoch": 5.2,
"learning_rate": 0.00024680651221039445,
"loss": 0.6962,
"step": 8300
},
{
"epoch": 5.26,
"learning_rate": 0.00024576288874973907,
"loss": 0.7209,
"step": 8400
},
{
"epoch": 5.32,
"learning_rate": 0.0002447192652890837,
"loss": 0.7091,
"step": 8500
},
{
"epoch": 5.39,
"learning_rate": 0.0002436756418284283,
"loss": 0.6898,
"step": 8600
},
{
"epoch": 5.45,
"learning_rate": 0.00024263201836777288,
"loss": 0.726,
"step": 8700
},
{
"epoch": 5.51,
"learning_rate": 0.00024158839490711747,
"loss": 0.682,
"step": 8800
},
{
"epoch": 5.57,
"learning_rate": 0.0002405447714464621,
"loss": 0.6855,
"step": 8900
},
{
"epoch": 5.64,
"learning_rate": 0.0002395011479858067,
"loss": 0.7108,
"step": 9000
},
{
"epoch": 5.7,
"learning_rate": 0.00023845752452515128,
"loss": 0.6945,
"step": 9100
},
{
"epoch": 5.76,
"learning_rate": 0.00023741390106449592,
"loss": 0.6621,
"step": 9200
},
{
"epoch": 5.82,
"learning_rate": 0.0002363702776038405,
"loss": 0.7176,
"step": 9300
},
{
"epoch": 5.89,
"learning_rate": 0.00023532665414318512,
"loss": 0.6824,
"step": 9400
},
{
"epoch": 5.95,
"learning_rate": 0.00023428303068252974,
"loss": 0.6981,
"step": 9500
},
{
"epoch": 6.0,
"eval_accuracy": 0.9080611944689615,
"eval_loss": 0.3200249671936035,
"eval_runtime": 148.3702,
"eval_samples_per_second": 45.818,
"eval_steps_per_second": 0.721,
"step": 9582
},
{
"epoch": 6.01,
"learning_rate": 0.00023323940722187433,
"loss": 0.6934,
"step": 9600
},
{
"epoch": 6.07,
"learning_rate": 0.00023219578376121894,
"loss": 0.7101,
"step": 9700
},
{
"epoch": 6.14,
"learning_rate": 0.00023115216030056353,
"loss": 0.6922,
"step": 9800
},
{
"epoch": 6.2,
"learning_rate": 0.00023010853683990814,
"loss": 0.7027,
"step": 9900
},
{
"epoch": 6.26,
"learning_rate": 0.00022906491337925275,
"loss": 0.6931,
"step": 10000
},
{
"epoch": 6.32,
"learning_rate": 0.00022802128991859734,
"loss": 0.6782,
"step": 10100
},
{
"epoch": 6.39,
"learning_rate": 0.00022697766645794195,
"loss": 0.6842,
"step": 10200
},
{
"epoch": 6.45,
"learning_rate": 0.00022593404299728657,
"loss": 0.7295,
"step": 10300
},
{
"epoch": 6.51,
"learning_rate": 0.00022489041953663115,
"loss": 0.7145,
"step": 10400
},
{
"epoch": 6.57,
"learning_rate": 0.00022384679607597577,
"loss": 0.6637,
"step": 10500
},
{
"epoch": 6.64,
"learning_rate": 0.00022280317261532038,
"loss": 0.6666,
"step": 10600
},
{
"epoch": 6.7,
"learning_rate": 0.00022175954915466497,
"loss": 0.7068,
"step": 10700
},
{
"epoch": 6.76,
"learning_rate": 0.00022071592569400955,
"loss": 0.6759,
"step": 10800
},
{
"epoch": 6.83,
"learning_rate": 0.0002196723022333542,
"loss": 0.6926,
"step": 10900
},
{
"epoch": 6.89,
"learning_rate": 0.00021862867877269878,
"loss": 0.6825,
"step": 11000
},
{
"epoch": 6.95,
"learning_rate": 0.00021758505531204342,
"loss": 0.6807,
"step": 11100
},
{
"epoch": 7.0,
"eval_accuracy": 0.9058546631362165,
"eval_loss": 0.3208906650543213,
"eval_runtime": 149.169,
"eval_samples_per_second": 45.572,
"eval_steps_per_second": 0.717,
"step": 11179
},
{
"epoch": 7.01,
"learning_rate": 0.000216541431851388,
"loss": 0.677,
"step": 11200
},
{
"epoch": 7.08,
"learning_rate": 0.00021550824462533916,
"loss": 0.7379,
"step": 11300
},
{
"epoch": 7.14,
"learning_rate": 0.00021447505739929034,
"loss": 0.7156,
"step": 11400
},
{
"epoch": 7.2,
"learning_rate": 0.00021343143393863493,
"loss": 0.6406,
"step": 11500
},
{
"epoch": 7.26,
"learning_rate": 0.0002123878104779795,
"loss": 0.6888,
"step": 11600
},
{
"epoch": 7.33,
"learning_rate": 0.00021134418701732413,
"loss": 0.6968,
"step": 11700
},
{
"epoch": 7.39,
"learning_rate": 0.00021030056355666874,
"loss": 0.6989,
"step": 11800
},
{
"epoch": 7.45,
"learning_rate": 0.00020925694009601333,
"loss": 0.6751,
"step": 11900
},
{
"epoch": 7.51,
"learning_rate": 0.00020821331663535794,
"loss": 0.6879,
"step": 12000
},
{
"epoch": 7.58,
"learning_rate": 0.00020716969317470255,
"loss": 0.678,
"step": 12100
},
{
"epoch": 7.64,
"learning_rate": 0.00020612606971404714,
"loss": 0.6501,
"step": 12200
},
{
"epoch": 7.7,
"learning_rate": 0.00020508244625339176,
"loss": 0.6679,
"step": 12300
},
{
"epoch": 7.76,
"learning_rate": 0.00020403882279273637,
"loss": 0.7116,
"step": 12400
},
{
"epoch": 7.83,
"learning_rate": 0.00020299519933208098,
"loss": 0.6899,
"step": 12500
},
{
"epoch": 7.89,
"learning_rate": 0.00020195157587142557,
"loss": 0.6892,
"step": 12600
},
{
"epoch": 7.95,
"learning_rate": 0.00020090795241077016,
"loss": 0.6873,
"step": 12700
},
{
"epoch": 8.0,
"eval_accuracy": 0.902177110914975,
"eval_loss": 0.3205910921096802,
"eval_runtime": 147.3348,
"eval_samples_per_second": 46.14,
"eval_steps_per_second": 0.726,
"step": 12776
},
{
"epoch": 8.02,
"learning_rate": 0.0001998643289501148,
"loss": 0.6847,
"step": 12800
},
{
"epoch": 8.08,
"learning_rate": 0.00019882070548945938,
"loss": 0.6833,
"step": 12900
},
{
"epoch": 8.14,
"learning_rate": 0.00019777708202880397,
"loss": 0.6737,
"step": 13000
},
{
"epoch": 8.2,
"learning_rate": 0.0001967334585681486,
"loss": 0.6568,
"step": 13100
},
{
"epoch": 8.27,
"learning_rate": 0.0001956898351074932,
"loss": 0.7059,
"step": 13200
},
{
"epoch": 8.33,
"learning_rate": 0.00019464621164683778,
"loss": 0.6831,
"step": 13300
},
{
"epoch": 8.39,
"learning_rate": 0.00019360258818618243,
"loss": 0.7174,
"step": 13400
},
{
"epoch": 8.45,
"learning_rate": 0.00019256940096013358,
"loss": 0.6966,
"step": 13500
},
{
"epoch": 8.52,
"learning_rate": 0.00019152577749947816,
"loss": 0.699,
"step": 13600
},
{
"epoch": 8.58,
"learning_rate": 0.00019048215403882278,
"loss": 0.6774,
"step": 13700
},
{
"epoch": 8.64,
"learning_rate": 0.0001894385305781674,
"loss": 0.6848,
"step": 13800
},
{
"epoch": 8.7,
"learning_rate": 0.00018839490711751198,
"loss": 0.6901,
"step": 13900
},
{
"epoch": 8.77,
"learning_rate": 0.00018735128365685656,
"loss": 0.6994,
"step": 14000
},
{
"epoch": 8.83,
"learning_rate": 0.0001863076601962012,
"loss": 0.6739,
"step": 14100
},
{
"epoch": 8.89,
"learning_rate": 0.0001852640367355458,
"loss": 0.6854,
"step": 14200
},
{
"epoch": 8.95,
"learning_rate": 0.00018422041327489038,
"loss": 0.6416,
"step": 14300
},
{
"epoch": 9.0,
"eval_accuracy": 0.9057075610473668,
"eval_loss": 0.31237688660621643,
"eval_runtime": 148.9632,
"eval_samples_per_second": 45.635,
"eval_steps_per_second": 0.718,
"step": 14373
},
{
"epoch": 9.02,
"learning_rate": 0.00018317678981423502,
"loss": 0.6711,
"step": 14400
},
{
"epoch": 9.08,
"learning_rate": 0.0001821331663535796,
"loss": 0.6965,
"step": 14500
},
{
"epoch": 9.14,
"learning_rate": 0.00018108954289292422,
"loss": 0.6896,
"step": 14600
},
{
"epoch": 9.2,
"learning_rate": 0.00018004591943226883,
"loss": 0.6944,
"step": 14700
},
{
"epoch": 9.27,
"learning_rate": 0.00017900229597161342,
"loss": 0.6981,
"step": 14800
},
{
"epoch": 9.33,
"learning_rate": 0.00017795867251095803,
"loss": 0.6819,
"step": 14900
},
{
"epoch": 9.39,
"learning_rate": 0.00017691504905030265,
"loss": 0.6869,
"step": 15000
},
{
"epoch": 9.46,
"learning_rate": 0.00017587142558964723,
"loss": 0.6974,
"step": 15100
},
{
"epoch": 9.52,
"learning_rate": 0.00017482780212899185,
"loss": 0.6621,
"step": 15200
},
{
"epoch": 9.58,
"learning_rate": 0.00017378417866833646,
"loss": 0.6732,
"step": 15300
},
{
"epoch": 9.64,
"learning_rate": 0.00017274055520768105,
"loss": 0.678,
"step": 15400
},
{
"epoch": 9.71,
"learning_rate": 0.00017169693174702566,
"loss": 0.6494,
"step": 15500
},
{
"epoch": 9.77,
"learning_rate": 0.00017065330828637025,
"loss": 0.6909,
"step": 15600
},
{
"epoch": 9.83,
"learning_rate": 0.00016960968482571486,
"loss": 0.687,
"step": 15700
},
{
"epoch": 9.89,
"learning_rate": 0.00016856606136505948,
"loss": 0.6705,
"step": 15800
},
{
"epoch": 9.96,
"learning_rate": 0.00016752243790440406,
"loss": 0.6698,
"step": 15900
},
{
"epoch": 10.0,
"eval_accuracy": 0.8949691085613416,
"eval_loss": 0.3288457989692688,
"eval_runtime": 150.3856,
"eval_samples_per_second": 45.204,
"eval_steps_per_second": 0.712,
"step": 15970
},
{
"epoch": 10.02,
"learning_rate": 0.00016648925067835524,
"loss": 0.6701,
"step": 16000
},
{
"epoch": 10.08,
"learning_rate": 0.00016544562721769983,
"loss": 0.6771,
"step": 16100
},
{
"epoch": 10.14,
"learning_rate": 0.00016440200375704444,
"loss": 0.6877,
"step": 16200
},
{
"epoch": 10.21,
"learning_rate": 0.00016335838029638906,
"loss": 0.6495,
"step": 16300
},
{
"epoch": 10.27,
"learning_rate": 0.00016231475683573364,
"loss": 0.6925,
"step": 16400
},
{
"epoch": 10.33,
"learning_rate": 0.00016127113337507826,
"loss": 0.647,
"step": 16500
},
{
"epoch": 10.39,
"learning_rate": 0.00016022750991442287,
"loss": 0.658,
"step": 16600
},
{
"epoch": 10.46,
"learning_rate": 0.00015918388645376746,
"loss": 0.7033,
"step": 16700
},
{
"epoch": 10.52,
"learning_rate": 0.00015814026299311207,
"loss": 0.6675,
"step": 16800
},
{
"epoch": 10.58,
"learning_rate": 0.00015709663953245666,
"loss": 0.6905,
"step": 16900
},
{
"epoch": 10.64,
"learning_rate": 0.0001560530160718013,
"loss": 0.6766,
"step": 17000
},
{
"epoch": 10.71,
"learning_rate": 0.00015500939261114588,
"loss": 0.684,
"step": 17100
},
{
"epoch": 10.77,
"learning_rate": 0.00015396576915049047,
"loss": 0.6382,
"step": 17200
},
{
"epoch": 10.83,
"learning_rate": 0.0001529221456898351,
"loss": 0.6737,
"step": 17300
},
{
"epoch": 10.9,
"learning_rate": 0.0001518785222291797,
"loss": 0.691,
"step": 17400
},
{
"epoch": 10.96,
"learning_rate": 0.00015083489876852429,
"loss": 0.716,
"step": 17500
},
{
"epoch": 11.0,
"eval_accuracy": 0.8998234774933804,
"eval_loss": 0.31469690799713135,
"eval_runtime": 150.5759,
"eval_samples_per_second": 45.147,
"eval_steps_per_second": 0.711,
"step": 17567
},
{
"epoch": 11.02,
"learning_rate": 0.0001497912753078689,
"loss": 0.7326,
"step": 17600
},
{
"epoch": 11.08,
"learning_rate": 0.0001487476518472135,
"loss": 0.6747,
"step": 17700
},
{
"epoch": 11.15,
"learning_rate": 0.00014770402838655813,
"loss": 0.7075,
"step": 17800
},
{
"epoch": 11.21,
"learning_rate": 0.0001466604049259027,
"loss": 0.69,
"step": 17900
},
{
"epoch": 11.27,
"learning_rate": 0.00014561678146524733,
"loss": 0.6793,
"step": 18000
},
{
"epoch": 11.33,
"learning_rate": 0.00014457315800459194,
"loss": 0.6782,
"step": 18100
},
{
"epoch": 11.4,
"learning_rate": 0.00014352953454393655,
"loss": 0.6532,
"step": 18200
},
{
"epoch": 11.46,
"learning_rate": 0.00014249634731788768,
"loss": 0.7053,
"step": 18300
},
{
"epoch": 11.52,
"learning_rate": 0.0001414527238572323,
"loss": 0.6476,
"step": 18400
},
{
"epoch": 11.58,
"learning_rate": 0.0001404091003965769,
"loss": 0.6308,
"step": 18500
},
{
"epoch": 11.65,
"learning_rate": 0.0001393654769359215,
"loss": 0.6886,
"step": 18600
},
{
"epoch": 11.71,
"learning_rate": 0.0001383218534752661,
"loss": 0.6631,
"step": 18700
},
{
"epoch": 11.77,
"learning_rate": 0.00013727823001461072,
"loss": 0.7056,
"step": 18800
},
{
"epoch": 11.83,
"learning_rate": 0.00013623460655395533,
"loss": 0.6602,
"step": 18900
},
{
"epoch": 11.9,
"learning_rate": 0.00013519098309329992,
"loss": 0.6728,
"step": 19000
},
{
"epoch": 11.96,
"learning_rate": 0.00013414735963264453,
"loss": 0.6514,
"step": 19100
},
{
"epoch": 12.0,
"eval_accuracy": 0.9111503383348043,
"eval_loss": 0.3034283220767975,
"eval_runtime": 145.7542,
"eval_samples_per_second": 46.64,
"eval_steps_per_second": 0.734,
"step": 19164
},
{
"epoch": 12.02,
"learning_rate": 0.00013310373617198915,
"loss": 0.6567,
"step": 19200
},
{
"epoch": 12.09,
"learning_rate": 0.00013206011271133373,
"loss": 0.6882,
"step": 19300
},
{
"epoch": 12.15,
"learning_rate": 0.00013101648925067835,
"loss": 0.6511,
"step": 19400
},
{
"epoch": 12.21,
"learning_rate": 0.00012997286579002296,
"loss": 0.6705,
"step": 19500
},
{
"epoch": 12.27,
"learning_rate": 0.00012892924232936755,
"loss": 0.6693,
"step": 19600
},
{
"epoch": 12.34,
"learning_rate": 0.00012788561886871216,
"loss": 0.68,
"step": 19700
},
{
"epoch": 12.4,
"learning_rate": 0.00012684199540805675,
"loss": 0.6767,
"step": 19800
},
{
"epoch": 12.46,
"learning_rate": 0.00012579837194740136,
"loss": 0.6768,
"step": 19900
},
{
"epoch": 12.52,
"learning_rate": 0.00012475474848674598,
"loss": 0.6662,
"step": 20000
},
{
"epoch": 12.59,
"learning_rate": 0.00012371112502609056,
"loss": 0.6511,
"step": 20100
},
{
"epoch": 12.65,
"learning_rate": 0.00012266750156543518,
"loss": 0.7057,
"step": 20200
},
{
"epoch": 12.71,
"learning_rate": 0.00012163431433938634,
"loss": 0.6699,
"step": 20300
},
{
"epoch": 12.77,
"learning_rate": 0.00012059069087873094,
"loss": 0.6541,
"step": 20400
},
{
"epoch": 12.84,
"learning_rate": 0.00011954706741807554,
"loss": 0.6741,
"step": 20500
},
{
"epoch": 12.9,
"learning_rate": 0.00011850344395742016,
"loss": 0.658,
"step": 20600
},
{
"epoch": 12.96,
"learning_rate": 0.00011745982049676476,
"loss": 0.6513,
"step": 20700
},
{
"epoch": 13.0,
"eval_accuracy": 0.9092380111797588,
"eval_loss": 0.30905914306640625,
"eval_runtime": 146.3169,
"eval_samples_per_second": 46.461,
"eval_steps_per_second": 0.731,
"step": 20761
},
{
"epoch": 13.02,
"learning_rate": 0.00011641619703610936,
"loss": 0.6568,
"step": 20800
},
{
"epoch": 13.09,
"learning_rate": 0.00011537257357545397,
"loss": 0.6853,
"step": 20900
},
{
"epoch": 13.15,
"learning_rate": 0.00011432895011479858,
"loss": 0.6699,
"step": 21000
},
{
"epoch": 13.21,
"learning_rate": 0.00011328532665414317,
"loss": 0.6494,
"step": 21100
},
{
"epoch": 13.27,
"learning_rate": 0.00011224170319348778,
"loss": 0.7118,
"step": 21200
},
{
"epoch": 13.34,
"learning_rate": 0.00011119807973283239,
"loss": 0.6649,
"step": 21300
},
{
"epoch": 13.4,
"learning_rate": 0.00011015445627217699,
"loss": 0.6646,
"step": 21400
},
{
"epoch": 13.46,
"learning_rate": 0.0001091108328115216,
"loss": 0.6436,
"step": 21500
},
{
"epoch": 13.53,
"learning_rate": 0.0001080672093508662,
"loss": 0.6258,
"step": 21600
},
{
"epoch": 13.59,
"learning_rate": 0.0001070235858902108,
"loss": 0.6754,
"step": 21700
},
{
"epoch": 13.65,
"learning_rate": 0.0001059799624295554,
"loss": 0.6737,
"step": 21800
},
{
"epoch": 13.71,
"learning_rate": 0.00010493633896890001,
"loss": 0.6511,
"step": 21900
},
{
"epoch": 13.78,
"learning_rate": 0.00010389271550824463,
"loss": 0.6472,
"step": 22000
},
{
"epoch": 13.84,
"learning_rate": 0.00010284909204758921,
"loss": 0.6571,
"step": 22100
},
{
"epoch": 13.9,
"learning_rate": 0.00010180546858693383,
"loss": 0.693,
"step": 22200
},
{
"epoch": 13.96,
"learning_rate": 0.00010076184512627843,
"loss": 0.652,
"step": 22300
},
{
"epoch": 14.0,
"eval_accuracy": 0.909973521624007,
"eval_loss": 0.30560359358787537,
"eval_runtime": 146.3619,
"eval_samples_per_second": 46.447,
"eval_steps_per_second": 0.731,
"step": 22358
},
{
"epoch": 14.03,
"learning_rate": 9.971822166562303e-05,
"loss": 0.6286,
"step": 22400
},
{
"epoch": 14.09,
"learning_rate": 9.867459820496764e-05,
"loss": 0.6503,
"step": 22500
},
{
"epoch": 14.15,
"learning_rate": 9.763097474431224e-05,
"loss": 0.6514,
"step": 22600
},
{
"epoch": 14.21,
"learning_rate": 9.659778751826341e-05,
"loss": 0.6728,
"step": 22700
},
{
"epoch": 14.28,
"learning_rate": 9.555416405760801e-05,
"loss": 0.6621,
"step": 22800
},
{
"epoch": 14.34,
"learning_rate": 9.451054059695261e-05,
"loss": 0.6771,
"step": 22900
},
{
"epoch": 14.4,
"learning_rate": 9.346691713629722e-05,
"loss": 0.6689,
"step": 23000
},
{
"epoch": 14.46,
"learning_rate": 9.242329367564181e-05,
"loss": 0.6712,
"step": 23100
},
{
"epoch": 14.53,
"learning_rate": 9.137967021498642e-05,
"loss": 0.6761,
"step": 23200
},
{
"epoch": 14.59,
"learning_rate": 9.033604675433104e-05,
"loss": 0.6327,
"step": 23300
},
{
"epoch": 14.65,
"learning_rate": 8.929242329367564e-05,
"loss": 0.6671,
"step": 23400
},
{
"epoch": 14.72,
"learning_rate": 8.824879983302024e-05,
"loss": 0.6598,
"step": 23500
},
{
"epoch": 14.78,
"learning_rate": 8.720517637236485e-05,
"loss": 0.6317,
"step": 23600
},
{
"epoch": 14.84,
"learning_rate": 8.616155291170945e-05,
"loss": 0.6615,
"step": 23700
},
{
"epoch": 14.9,
"learning_rate": 8.511792945105405e-05,
"loss": 0.6087,
"step": 23800
},
{
"epoch": 14.97,
"learning_rate": 8.407430599039865e-05,
"loss": 0.7105,
"step": 23900
},
{
"epoch": 15.0,
"eval_accuracy": 0.9149749926448956,
"eval_loss": 0.30149412155151367,
"eval_runtime": 145.6909,
"eval_samples_per_second": 46.66,
"eval_steps_per_second": 0.734,
"step": 23955
},
{
"epoch": 15.03,
"learning_rate": 8.303068252974326e-05,
"loss": 0.6911,
"step": 24000
},
{
"epoch": 15.09,
"learning_rate": 8.198705906908788e-05,
"loss": 0.6717,
"step": 24100
},
{
"epoch": 15.15,
"learning_rate": 8.094343560843246e-05,
"loss": 0.6564,
"step": 24200
},
{
"epoch": 15.22,
"learning_rate": 7.989981214777708e-05,
"loss": 0.6446,
"step": 24300
},
{
"epoch": 15.28,
"learning_rate": 7.885618868712169e-05,
"loss": 0.6431,
"step": 24400
},
{
"epoch": 15.34,
"learning_rate": 7.781256522646628e-05,
"loss": 0.6762,
"step": 24500
},
{
"epoch": 15.4,
"learning_rate": 7.676894176581089e-05,
"loss": 0.6656,
"step": 24600
},
{
"epoch": 15.47,
"learning_rate": 7.572531830515549e-05,
"loss": 0.6337,
"step": 24700
},
{
"epoch": 15.53,
"learning_rate": 7.468169484450009e-05,
"loss": 0.6541,
"step": 24800
},
{
"epoch": 15.59,
"learning_rate": 7.363807138384469e-05,
"loss": 0.6772,
"step": 24900
},
{
"epoch": 15.65,
"learning_rate": 7.260488415779586e-05,
"loss": 0.629,
"step": 25000
},
{
"epoch": 15.72,
"learning_rate": 7.156126069714046e-05,
"loss": 0.6998,
"step": 25100
},
{
"epoch": 15.78,
"learning_rate": 7.051763723648507e-05,
"loss": 0.6686,
"step": 25200
},
{
"epoch": 15.84,
"learning_rate": 6.947401377582967e-05,
"loss": 0.6822,
"step": 25300
},
{
"epoch": 15.9,
"learning_rate": 6.843039031517429e-05,
"loss": 0.6143,
"step": 25400
},
{
"epoch": 15.97,
"learning_rate": 6.738676685451889e-05,
"loss": 0.6337,
"step": 25500
},
{
"epoch": 16.0,
"eval_accuracy": 0.9090909090909091,
"eval_loss": 0.30700910091400146,
"eval_runtime": 146.384,
"eval_samples_per_second": 46.44,
"eval_steps_per_second": 0.731,
"step": 25552
},
{
"epoch": 16.03,
"learning_rate": 6.634314339386349e-05,
"loss": 0.6502,
"step": 25600
},
{
"epoch": 16.09,
"learning_rate": 6.52995199332081e-05,
"loss": 0.6448,
"step": 25700
},
{
"epoch": 16.16,
"learning_rate": 6.42558964725527e-05,
"loss": 0.6272,
"step": 25800
},
{
"epoch": 16.22,
"learning_rate": 6.32122730118973e-05,
"loss": 0.6316,
"step": 25900
},
{
"epoch": 16.28,
"learning_rate": 6.21686495512419e-05,
"loss": 0.6442,
"step": 26000
},
{
"epoch": 16.34,
"learning_rate": 6.11250260905865e-05,
"loss": 0.6489,
"step": 26100
},
{
"epoch": 16.41,
"learning_rate": 6.0081402629931114e-05,
"loss": 0.6603,
"step": 26200
},
{
"epoch": 16.47,
"learning_rate": 5.9037779169275714e-05,
"loss": 0.6449,
"step": 26300
},
{
"epoch": 16.53,
"learning_rate": 5.799415570862033e-05,
"loss": 0.661,
"step": 26400
},
{
"epoch": 16.59,
"learning_rate": 5.695053224796493e-05,
"loss": 0.642,
"step": 26500
},
{
"epoch": 16.66,
"learning_rate": 5.5906908787309535e-05,
"loss": 0.6498,
"step": 26600
},
{
"epoch": 16.72,
"learning_rate": 5.4863285326654135e-05,
"loss": 0.6902,
"step": 26700
},
{
"epoch": 16.78,
"learning_rate": 5.381966186599874e-05,
"loss": 0.6336,
"step": 26800
},
{
"epoch": 16.84,
"learning_rate": 5.277603840534335e-05,
"loss": 0.6393,
"step": 26900
},
{
"epoch": 16.91,
"learning_rate": 5.173241494468795e-05,
"loss": 0.6496,
"step": 27000
},
{
"epoch": 16.97,
"learning_rate": 5.0688791484032557e-05,
"loss": 0.63,
"step": 27100
},
{
"epoch": 17.0,
"eval_accuracy": 0.913503971756399,
"eval_loss": 0.30175167322158813,
"eval_runtime": 149.8031,
"eval_samples_per_second": 45.38,
"eval_steps_per_second": 0.714,
"step": 27149
},
{
"epoch": 17.03,
"learning_rate": 4.9645168023377163e-05,
"loss": 0.6195,
"step": 27200
},
{
"epoch": 17.09,
"learning_rate": 4.861198079732832e-05,
"loss": 0.6299,
"step": 27300
},
{
"epoch": 17.16,
"learning_rate": 4.756835733667292e-05,
"loss": 0.6939,
"step": 27400
},
{
"epoch": 17.22,
"learning_rate": 4.652473387601752e-05,
"loss": 0.6416,
"step": 27500
},
{
"epoch": 17.28,
"learning_rate": 4.5481110415362136e-05,
"loss": 0.6626,
"step": 27600
},
{
"epoch": 17.35,
"learning_rate": 4.4437486954706736e-05,
"loss": 0.6444,
"step": 27700
},
{
"epoch": 17.41,
"learning_rate": 4.339386349405134e-05,
"loss": 0.6238,
"step": 27800
},
{
"epoch": 17.47,
"learning_rate": 4.2350240033395944e-05,
"loss": 0.6505,
"step": 27900
},
{
"epoch": 17.53,
"learning_rate": 4.130661657274056e-05,
"loss": 0.6346,
"step": 28000
},
{
"epoch": 17.6,
"learning_rate": 4.026299311208516e-05,
"loss": 0.6319,
"step": 28100
},
{
"epoch": 17.66,
"learning_rate": 3.921936965142976e-05,
"loss": 0.6811,
"step": 28200
},
{
"epoch": 17.72,
"learning_rate": 3.8175746190774365e-05,
"loss": 0.6645,
"step": 28300
},
{
"epoch": 17.78,
"learning_rate": 3.713212273011897e-05,
"loss": 0.6512,
"step": 28400
},
{
"epoch": 17.85,
"learning_rate": 3.608849926946357e-05,
"loss": 0.6578,
"step": 28500
},
{
"epoch": 17.91,
"learning_rate": 3.504487580880818e-05,
"loss": 0.6358,
"step": 28600
},
{
"epoch": 17.97,
"learning_rate": 3.4001252348152786e-05,
"loss": 0.6672,
"step": 28700
},
{
"epoch": 18.0,
"eval_accuracy": 0.9087967049132097,
"eval_loss": 0.30836355686187744,
"eval_runtime": 148.2197,
"eval_samples_per_second": 45.864,
"eval_steps_per_second": 0.722,
"step": 28746
},
{
"epoch": 18.03,
"learning_rate": 3.295762888749739e-05,
"loss": 0.65,
"step": 28800
},
{
"epoch": 18.1,
"learning_rate": 3.191400542684199e-05,
"loss": 0.6293,
"step": 28900
},
{
"epoch": 18.16,
"learning_rate": 3.087038196618659e-05,
"loss": 0.6059,
"step": 29000
},
{
"epoch": 18.22,
"learning_rate": 2.98267585055312e-05,
"loss": 0.5975,
"step": 29100
},
{
"epoch": 18.28,
"learning_rate": 2.8783135044875807e-05,
"loss": 0.6759,
"step": 29200
},
{
"epoch": 18.35,
"learning_rate": 2.773951158422041e-05,
"loss": 0.6457,
"step": 29300
},
{
"epoch": 18.41,
"learning_rate": 2.6695888123565017e-05,
"loss": 0.6715,
"step": 29400
},
{
"epoch": 18.47,
"learning_rate": 2.565226466290962e-05,
"loss": 0.6642,
"step": 29500
},
{
"epoch": 18.53,
"learning_rate": 2.460864120225422e-05,
"loss": 0.6432,
"step": 29600
},
{
"epoch": 18.6,
"learning_rate": 2.3565017741598828e-05,
"loss": 0.6441,
"step": 29700
},
{
"epoch": 18.66,
"learning_rate": 2.2521394280943432e-05,
"loss": 0.6843,
"step": 29800
},
{
"epoch": 18.72,
"learning_rate": 2.147777082028804e-05,
"loss": 0.6459,
"step": 29900
},
{
"epoch": 18.79,
"learning_rate": 2.0434147359632642e-05,
"loss": 0.6233,
"step": 30000
},
{
"epoch": 18.85,
"learning_rate": 1.939052389897725e-05,
"loss": 0.6634,
"step": 30100
},
{
"epoch": 18.91,
"learning_rate": 1.8346900438321853e-05,
"loss": 0.6701,
"step": 30200
},
{
"epoch": 18.97,
"learning_rate": 1.7303276977666456e-05,
"loss": 0.6479,
"step": 30300
},
{
"epoch": 19.0,
"eval_accuracy": 0.9101206237128567,
"eval_loss": 0.3060016632080078,
"eval_runtime": 149.555,
"eval_samples_per_second": 45.455,
"eval_steps_per_second": 0.715,
"step": 30343
},
{
"epoch": 19.04,
"learning_rate": 1.625965351701106e-05,
"loss": 0.6155,
"step": 30400
},
{
"epoch": 19.1,
"learning_rate": 1.5216030056355665e-05,
"loss": 0.6079,
"step": 30500
},
{
"epoch": 19.16,
"learning_rate": 1.417240659570027e-05,
"loss": 0.6709,
"step": 30600
},
{
"epoch": 19.22,
"learning_rate": 1.3128783135044874e-05,
"loss": 0.6604,
"step": 30700
},
{
"epoch": 19.29,
"learning_rate": 1.208515967438948e-05,
"loss": 0.6222,
"step": 30800
},
{
"epoch": 19.35,
"learning_rate": 1.1041536213734085e-05,
"loss": 0.6428,
"step": 30900
},
{
"epoch": 19.41,
"learning_rate": 9.997912753078688e-06,
"loss": 0.6664,
"step": 31000
},
{
"epoch": 19.47,
"learning_rate": 8.954289292423293e-06,
"loss": 0.6489,
"step": 31100
},
{
"epoch": 19.54,
"learning_rate": 7.910665831767897e-06,
"loss": 0.6414,
"step": 31200
},
{
"epoch": 19.6,
"learning_rate": 6.877478605719056e-06,
"loss": 0.6599,
"step": 31300
},
{
"epoch": 19.66,
"learning_rate": 5.83385514506366e-06,
"loss": 0.6433,
"step": 31400
},
{
"epoch": 19.72,
"learning_rate": 4.790231684408265e-06,
"loss": 0.6325,
"step": 31500
},
{
"epoch": 19.79,
"learning_rate": 3.7466082237528697e-06,
"loss": 0.6363,
"step": 31600
},
{
"epoch": 19.85,
"learning_rate": 2.7029847630974745e-06,
"loss": 0.6408,
"step": 31700
},
{
"epoch": 19.91,
"learning_rate": 1.6593613024420787e-06,
"loss": 0.645,
"step": 31800
},
{
"epoch": 19.97,
"learning_rate": 6.157378417866834e-07,
"loss": 0.6658,
"step": 31900
},
{
"epoch": 20.0,
"eval_accuracy": 0.9089438070020595,
"eval_loss": 0.3071773946285248,
"eval_runtime": 150.0563,
"eval_samples_per_second": 45.303,
"eval_steps_per_second": 0.713,
"step": 31940
},
{
"epoch": 20.0,
"step": 31940,
"total_flos": 0.0,
"train_loss": 0.8136612295581911,
"train_runtime": 27726.7705,
"train_samples_per_second": 36.855,
"train_steps_per_second": 1.152
}
],
"max_steps": 31940,
"num_train_epochs": 20,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}