wav2vec2-large-xlsr-53-greek / trainer_state.json
lighteternal's picture
Added new model weights and info, trained on CV and CSS
626bc84
raw
history blame
44.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 49.93538067818298,
"global_step": 39000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.26,
"learning_rate": 2e-05,
"loss": 16.6458,
"step": 200
},
{
"epoch": 0.51,
"learning_rate": 4e-05,
"loss": 4.0476,
"step": 400
},
{
"epoch": 0.51,
"eval_loss": 3.31887149810791,
"eval_runtime": 123.9389,
"eval_samples_per_second": 12.28,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 0.77,
"learning_rate": 6e-05,
"loss": 3.2819,
"step": 600
},
{
"epoch": 1.02,
"learning_rate": 8e-05,
"loss": 3.2548,
"step": 800
},
{
"epoch": 1.02,
"eval_loss": 3.226016044616699,
"eval_runtime": 123.3474,
"eval_samples_per_second": 12.339,
"eval_wer": 1.0,
"step": 800
},
{
"epoch": 1.28,
"learning_rate": 0.0001,
"loss": 3.1788,
"step": 1000
},
{
"epoch": 1.54,
"learning_rate": 9.947437582128778e-05,
"loss": 2.064,
"step": 1200
},
{
"epoch": 1.54,
"eval_loss": 0.9218049049377441,
"eval_runtime": 123.4543,
"eval_samples_per_second": 12.328,
"eval_wer": 0.8255778742303422,
"step": 1200
},
{
"epoch": 1.79,
"learning_rate": 9.894875164257556e-05,
"loss": 0.9611,
"step": 1400
},
{
"epoch": 2.05,
"learning_rate": 9.842312746386335e-05,
"loss": 0.741,
"step": 1600
},
{
"epoch": 2.05,
"eval_loss": 0.5120046138763428,
"eval_runtime": 124.3691,
"eval_samples_per_second": 12.238,
"eval_wer": 0.6447966084586656,
"step": 1600
},
{
"epoch": 2.3,
"learning_rate": 9.789750328515113e-05,
"loss": 0.6514,
"step": 1800
},
{
"epoch": 2.56,
"learning_rate": 9.73718791064389e-05,
"loss": 0.5501,
"step": 2000
},
{
"epoch": 2.56,
"eval_loss": 0.4019804298877716,
"eval_runtime": 124.8901,
"eval_samples_per_second": 12.187,
"eval_wer": 0.5592005652568891,
"step": 2000
},
{
"epoch": 2.82,
"learning_rate": 9.684625492772667e-05,
"loss": 0.5241,
"step": 2200
},
{
"epoch": 3.07,
"learning_rate": 9.632063074901446e-05,
"loss": 0.4649,
"step": 2400
},
{
"epoch": 3.07,
"eval_loss": 0.3544917106628418,
"eval_runtime": 124.4823,
"eval_samples_per_second": 12.227,
"eval_wer": 0.49167255475926114,
"step": 2400
},
{
"epoch": 3.33,
"learning_rate": 9.579500657030223e-05,
"loss": 0.409,
"step": 2600
},
{
"epoch": 3.58,
"learning_rate": 9.526938239159002e-05,
"loss": 0.4302,
"step": 2800
},
{
"epoch": 3.58,
"eval_loss": 0.3078452944755554,
"eval_runtime": 116.543,
"eval_samples_per_second": 13.06,
"eval_wer": 0.46431815887756134,
"step": 2800
},
{
"epoch": 3.84,
"learning_rate": 9.47437582128778e-05,
"loss": 0.3978,
"step": 3000
},
{
"epoch": 4.1,
"learning_rate": 9.421813403416557e-05,
"loss": 0.3713,
"step": 3200
},
{
"epoch": 4.1,
"eval_loss": 0.29381415247917175,
"eval_runtime": 125.5692,
"eval_samples_per_second": 12.121,
"eval_wer": 0.44190976077520944,
"step": 3200
},
{
"epoch": 4.35,
"learning_rate": 9.369250985545336e-05,
"loss": 0.326,
"step": 3400
},
{
"epoch": 4.61,
"learning_rate": 9.316688567674113e-05,
"loss": 0.3284,
"step": 3600
},
{
"epoch": 4.61,
"eval_loss": 0.27045169472694397,
"eval_runtime": 117.0161,
"eval_samples_per_second": 13.007,
"eval_wer": 0.4268698899767841,
"step": 3600
},
{
"epoch": 4.87,
"learning_rate": 9.26412614980289e-05,
"loss": 0.3261,
"step": 3800
},
{
"epoch": 5.12,
"learning_rate": 9.21156373193167e-05,
"loss": 0.3229,
"step": 4000
},
{
"epoch": 5.12,
"eval_loss": 0.2579388916492462,
"eval_runtime": 117.216,
"eval_samples_per_second": 12.985,
"eval_wer": 0.40022206520641973,
"step": 4000
},
{
"epoch": 5.38,
"learning_rate": 9.159001314060447e-05,
"loss": 0.2779,
"step": 4200
},
{
"epoch": 5.63,
"learning_rate": 9.106438896189226e-05,
"loss": 0.2622,
"step": 4400
},
{
"epoch": 5.63,
"eval_loss": 0.24829687178134918,
"eval_runtime": 125.5848,
"eval_samples_per_second": 12.119,
"eval_wer": 0.3847784394872312,
"step": 4400
},
{
"epoch": 5.89,
"learning_rate": 9.053876478318003e-05,
"loss": 0.2819,
"step": 4600
},
{
"epoch": 6.15,
"learning_rate": 9.00131406044678e-05,
"loss": 0.2831,
"step": 4800
},
{
"epoch": 6.15,
"eval_loss": 0.22981220483779907,
"eval_runtime": 120.9279,
"eval_samples_per_second": 12.586,
"eval_wer": 0.37579489250025233,
"step": 4800
},
{
"epoch": 6.4,
"learning_rate": 8.948751642575559e-05,
"loss": 0.2358,
"step": 5000
},
{
"epoch": 6.66,
"learning_rate": 8.896189224704337e-05,
"loss": 0.2505,
"step": 5200
},
{
"epoch": 6.66,
"eval_loss": 0.2343754768371582,
"eval_runtime": 125.1098,
"eval_samples_per_second": 12.165,
"eval_wer": 0.36943575249823357,
"step": 5200
},
{
"epoch": 6.91,
"learning_rate": 8.843626806833116e-05,
"loss": 0.2277,
"step": 5400
},
{
"epoch": 7.17,
"learning_rate": 8.791064388961893e-05,
"loss": 0.2436,
"step": 5600
},
{
"epoch": 7.17,
"eval_loss": 0.22785206139087677,
"eval_runtime": 118.7977,
"eval_samples_per_second": 12.812,
"eval_wer": 0.370142323609569,
"step": 5600
},
{
"epoch": 7.43,
"learning_rate": 8.73850197109067e-05,
"loss": 0.2253,
"step": 5800
},
{
"epoch": 7.68,
"learning_rate": 8.685939553219448e-05,
"loss": 0.2139,
"step": 6000
},
{
"epoch": 7.68,
"eval_loss": 0.23501864075660706,
"eval_runtime": 125.2542,
"eval_samples_per_second": 12.151,
"eval_wer": 0.35853436963762997,
"step": 6000
},
{
"epoch": 7.94,
"learning_rate": 8.633377135348227e-05,
"loss": 0.2083,
"step": 6200
},
{
"epoch": 8.19,
"learning_rate": 8.580814717477005e-05,
"loss": 0.2075,
"step": 6400
},
{
"epoch": 8.19,
"eval_loss": 0.21803195774555206,
"eval_runtime": 117.7604,
"eval_samples_per_second": 12.925,
"eval_wer": 0.3443020086807308,
"step": 6400
},
{
"epoch": 8.45,
"learning_rate": 8.528252299605783e-05,
"loss": 0.1983,
"step": 6600
},
{
"epoch": 8.71,
"learning_rate": 8.47568988173456e-05,
"loss": 0.2165,
"step": 6800
},
{
"epoch": 8.71,
"eval_loss": 0.19772419333457947,
"eval_runtime": 117.7225,
"eval_samples_per_second": 12.929,
"eval_wer": 0.34490764106187544,
"step": 6800
},
{
"epoch": 8.96,
"learning_rate": 8.423127463863338e-05,
"loss": 0.1871,
"step": 7000
},
{
"epoch": 9.22,
"learning_rate": 8.370565045992115e-05,
"loss": 0.1695,
"step": 7200
},
{
"epoch": 9.22,
"eval_loss": 0.2186300903558731,
"eval_runtime": 116.8691,
"eval_samples_per_second": 13.023,
"eval_wer": 0.3477339255072171,
"step": 7200
},
{
"epoch": 9.47,
"learning_rate": 8.318002628120894e-05,
"loss": 0.181,
"step": 7400
},
{
"epoch": 9.73,
"learning_rate": 8.265440210249672e-05,
"loss": 0.1998,
"step": 7600
},
{
"epoch": 9.73,
"eval_loss": 0.20928959548473358,
"eval_runtime": 125.7928,
"eval_samples_per_second": 12.099,
"eval_wer": 0.33057434137478553,
"step": 7600
},
{
"epoch": 9.99,
"learning_rate": 8.21287779237845e-05,
"loss": 0.1776,
"step": 7800
},
{
"epoch": 10.24,
"learning_rate": 8.160315374507228e-05,
"loss": 0.1778,
"step": 8000
},
{
"epoch": 10.24,
"eval_loss": 0.2176637053489685,
"eval_runtime": 118.7135,
"eval_samples_per_second": 12.821,
"eval_wer": 0.32734430200868075,
"step": 8000
},
{
"epoch": 10.5,
"learning_rate": 8.107752956636005e-05,
"loss": 0.1779,
"step": 8200
},
{
"epoch": 10.75,
"learning_rate": 8.055190538764782e-05,
"loss": 0.1748,
"step": 8400
},
{
"epoch": 10.75,
"eval_loss": 0.21325238049030304,
"eval_runtime": 126.2474,
"eval_samples_per_second": 12.056,
"eval_wer": 0.3220954880387605,
"step": 8400
},
{
"epoch": 11.01,
"learning_rate": 8.002628120893562e-05,
"loss": 0.1676,
"step": 8600
},
{
"epoch": 11.27,
"learning_rate": 7.95006570302234e-05,
"loss": 0.1591,
"step": 8800
},
{
"epoch": 11.27,
"eval_loss": 0.21119922399520874,
"eval_runtime": 116.6989,
"eval_samples_per_second": 13.042,
"eval_wer": 0.32340769153124055,
"step": 8800
},
{
"epoch": 11.52,
"learning_rate": 7.897503285151118e-05,
"loss": 0.1613,
"step": 9000
},
{
"epoch": 11.78,
"learning_rate": 7.844940867279895e-05,
"loss": 0.1697,
"step": 9200
},
{
"epoch": 11.78,
"eval_loss": 0.19785504043102264,
"eval_runtime": 117.4774,
"eval_samples_per_second": 12.956,
"eval_wer": 0.319168264863228,
"step": 9200
},
{
"epoch": 12.04,
"learning_rate": 7.792378449408672e-05,
"loss": 0.1434,
"step": 9400
},
{
"epoch": 12.29,
"learning_rate": 7.739816031537451e-05,
"loss": 0.1429,
"step": 9600
},
{
"epoch": 12.29,
"eval_loss": 0.19984780251979828,
"eval_runtime": 116.8648,
"eval_samples_per_second": 13.024,
"eval_wer": 0.3129100635914,
"step": 9600
},
{
"epoch": 12.55,
"learning_rate": 7.687253613666229e-05,
"loss": 0.1492,
"step": 9800
},
{
"epoch": 12.8,
"learning_rate": 7.634691195795008e-05,
"loss": 0.158,
"step": 10000
},
{
"epoch": 12.8,
"eval_loss": 0.1837874799966812,
"eval_runtime": 117.4413,
"eval_samples_per_second": 12.96,
"eval_wer": 0.3110931664479661,
"step": 10000
},
{
"epoch": 13.06,
"learning_rate": 7.582128777923785e-05,
"loss": 0.1671,
"step": 10200
},
{
"epoch": 13.32,
"learning_rate": 7.529566360052562e-05,
"loss": 0.1486,
"step": 10400
},
{
"epoch": 13.32,
"eval_loss": 0.1937190741300583,
"eval_runtime": 117.8624,
"eval_samples_per_second": 12.913,
"eval_wer": 0.30897345311395985,
"step": 10400
},
{
"epoch": 13.57,
"learning_rate": 7.47700394218134e-05,
"loss": 0.1446,
"step": 10600
},
{
"epoch": 13.83,
"learning_rate": 7.424441524310118e-05,
"loss": 0.1486,
"step": 10800
},
{
"epoch": 13.83,
"eval_loss": 0.21288040280342102,
"eval_runtime": 118.4975,
"eval_samples_per_second": 12.844,
"eval_wer": 0.30806500454224284,
"step": 10800
},
{
"epoch": 14.08,
"learning_rate": 7.371879106438898e-05,
"loss": 0.1455,
"step": 11000
},
{
"epoch": 14.34,
"learning_rate": 7.319316688567675e-05,
"loss": 0.1407,
"step": 11200
},
{
"epoch": 14.34,
"eval_loss": 0.1925038844347,
"eval_runtime": 125.2228,
"eval_samples_per_second": 12.154,
"eval_wer": 0.30493590390632885,
"step": 11200
},
{
"epoch": 14.6,
"learning_rate": 7.266754270696452e-05,
"loss": 0.1342,
"step": 11400
},
{
"epoch": 14.85,
"learning_rate": 7.21419185282523e-05,
"loss": 0.148,
"step": 11600
},
{
"epoch": 14.85,
"eval_loss": 0.19629527628421783,
"eval_runtime": 117.7213,
"eval_samples_per_second": 12.929,
"eval_wer": 0.3043302715251842,
"step": 11600
},
{
"epoch": 15.11,
"learning_rate": 7.161629434954008e-05,
"loss": 0.1295,
"step": 11800
},
{
"epoch": 15.36,
"learning_rate": 7.109067017082786e-05,
"loss": 0.1307,
"step": 12000
},
{
"epoch": 15.36,
"eval_loss": 0.19587725400924683,
"eval_runtime": 116.5086,
"eval_samples_per_second": 13.063,
"eval_wer": 0.30301806803270415,
"step": 12000
},
{
"epoch": 15.62,
"learning_rate": 7.056504599211565e-05,
"loss": 0.1371,
"step": 12200
},
{
"epoch": 15.88,
"learning_rate": 7.003942181340342e-05,
"loss": 0.1356,
"step": 12400
},
{
"epoch": 15.88,
"eval_loss": 0.2000432014465332,
"eval_runtime": 117.211,
"eval_samples_per_second": 12.985,
"eval_wer": 0.2992833350156455,
"step": 12400
},
{
"epoch": 16.13,
"learning_rate": 6.95137976346912e-05,
"loss": 0.1351,
"step": 12600
},
{
"epoch": 16.39,
"learning_rate": 6.898817345597897e-05,
"loss": 0.1213,
"step": 12800
},
{
"epoch": 16.39,
"eval_loss": 0.19138583540916443,
"eval_runtime": 117.6478,
"eval_samples_per_second": 12.937,
"eval_wer": 0.2961542343797315,
"step": 12800
},
{
"epoch": 16.64,
"learning_rate": 6.846254927726675e-05,
"loss": 0.122,
"step": 13000
},
{
"epoch": 16.9,
"learning_rate": 6.793692509855453e-05,
"loss": 0.1266,
"step": 13200
},
{
"epoch": 16.9,
"eval_loss": 0.1783333718776703,
"eval_runtime": 117.3319,
"eval_samples_per_second": 12.972,
"eval_wer": 0.2945392146966791,
"step": 13200
},
{
"epoch": 17.16,
"learning_rate": 6.741130091984232e-05,
"loss": 0.1292,
"step": 13400
},
{
"epoch": 17.41,
"learning_rate": 6.68856767411301e-05,
"loss": 0.1128,
"step": 13600
},
{
"epoch": 17.41,
"eval_loss": 0.1909104287624359,
"eval_runtime": 117.9127,
"eval_samples_per_second": 12.908,
"eval_wer": 0.29575047945896843,
"step": 13600
},
{
"epoch": 17.67,
"learning_rate": 6.636005256241787e-05,
"loss": 0.1293,
"step": 13800
},
{
"epoch": 17.93,
"learning_rate": 6.583442838370564e-05,
"loss": 0.1236,
"step": 14000
},
{
"epoch": 17.93,
"eval_loss": 0.19043225049972534,
"eval_runtime": 116.9176,
"eval_samples_per_second": 13.018,
"eval_wer": 0.2986777026345009,
"step": 14000
},
{
"epoch": 18.18,
"learning_rate": 6.530880420499343e-05,
"loss": 0.1228,
"step": 14200
},
{
"epoch": 18.44,
"learning_rate": 6.478318002628122e-05,
"loss": 0.1183,
"step": 14400
},
{
"epoch": 18.44,
"eval_loss": 0.19979140162467957,
"eval_runtime": 118.1055,
"eval_samples_per_second": 12.887,
"eval_wer": 0.29231856263248207,
"step": 14400
},
{
"epoch": 18.69,
"learning_rate": 6.4257555847569e-05,
"loss": 0.1195,
"step": 14600
},
{
"epoch": 18.95,
"learning_rate": 6.373193166885677e-05,
"loss": 0.1238,
"step": 14800
},
{
"epoch": 18.95,
"eval_loss": 0.18403638899326324,
"eval_runtime": 121.6258,
"eval_samples_per_second": 12.514,
"eval_wer": 0.287877258504088,
"step": 14800
},
{
"epoch": 19.21,
"learning_rate": 6.320630749014454e-05,
"loss": 0.1044,
"step": 15000
},
{
"epoch": 19.46,
"learning_rate": 6.268068331143233e-05,
"loss": 0.1266,
"step": 15200
},
{
"epoch": 19.46,
"eval_loss": 0.193573996424675,
"eval_runtime": 121.7733,
"eval_samples_per_second": 12.499,
"eval_wer": 0.2893913394569496,
"step": 15200
},
{
"epoch": 19.72,
"learning_rate": 6.21550591327201e-05,
"loss": 0.1104,
"step": 15400
},
{
"epoch": 19.97,
"learning_rate": 6.162943495400789e-05,
"loss": 0.1232,
"step": 15600
},
{
"epoch": 19.97,
"eval_loss": 0.18593670427799225,
"eval_runtime": 125.602,
"eval_samples_per_second": 12.118,
"eval_wer": 0.28474815786817403,
"step": 15600
},
{
"epoch": 20.23,
"learning_rate": 6.110381077529567e-05,
"loss": 0.1072,
"step": 15800
},
{
"epoch": 20.49,
"learning_rate": 6.057818659658344e-05,
"loss": 0.1093,
"step": 16000
},
{
"epoch": 20.49,
"eval_loss": 0.1964750736951828,
"eval_runtime": 120.8874,
"eval_samples_per_second": 12.59,
"eval_wer": 0.28484909659836477,
"step": 16000
},
{
"epoch": 20.74,
"learning_rate": 6.005256241787123e-05,
"loss": 0.1223,
"step": 16200
},
{
"epoch": 21.0,
"learning_rate": 5.9526938239159005e-05,
"loss": 0.1037,
"step": 16400
},
{
"epoch": 21.0,
"eval_loss": 0.20040956139564514,
"eval_runtime": 122.3183,
"eval_samples_per_second": 12.443,
"eval_wer": 0.28505097405874635,
"step": 16400
},
{
"epoch": 21.25,
"learning_rate": 5.900131406044679e-05,
"loss": 0.1114,
"step": 16600
},
{
"epoch": 21.51,
"learning_rate": 5.847568988173456e-05,
"loss": 0.1018,
"step": 16800
},
{
"epoch": 21.51,
"eval_loss": 0.1897500604391098,
"eval_runtime": 123.2693,
"eval_samples_per_second": 12.347,
"eval_wer": 0.288785707075805,
"step": 16800
},
{
"epoch": 21.77,
"learning_rate": 5.795006570302234e-05,
"loss": 0.1116,
"step": 17000
},
{
"epoch": 22.02,
"learning_rate": 5.7424441524310126e-05,
"loss": 0.1018,
"step": 17200
},
{
"epoch": 22.02,
"eval_loss": 0.17437991499900818,
"eval_runtime": 118.5065,
"eval_samples_per_second": 12.843,
"eval_wer": 0.28616130009084484,
"step": 17200
},
{
"epoch": 22.28,
"learning_rate": 5.68988173455979e-05,
"loss": 0.1042,
"step": 17400
},
{
"epoch": 22.53,
"learning_rate": 5.637319316688568e-05,
"loss": 0.0895,
"step": 17600
},
{
"epoch": 22.53,
"eval_loss": 0.18042823672294617,
"eval_runtime": 117.6274,
"eval_samples_per_second": 12.939,
"eval_wer": 0.2839406480266478,
"step": 17600
},
{
"epoch": 22.79,
"learning_rate": 5.584756898817346e-05,
"loss": 0.1136,
"step": 17800
},
{
"epoch": 23.05,
"learning_rate": 5.532194480946123e-05,
"loss": 0.1098,
"step": 18000
},
{
"epoch": 23.05,
"eval_loss": 0.1741502583026886,
"eval_runtime": 125.9195,
"eval_samples_per_second": 12.087,
"eval_wer": 0.28071060866054304,
"step": 18000
},
{
"epoch": 23.3,
"learning_rate": 5.479632063074902e-05,
"loss": 0.1041,
"step": 18200
},
{
"epoch": 23.56,
"learning_rate": 5.4270696452036796e-05,
"loss": 0.0944,
"step": 18400
},
{
"epoch": 23.56,
"eval_loss": 0.18072044849395752,
"eval_runtime": 117.8579,
"eval_samples_per_second": 12.914,
"eval_wer": 0.27758150802462905,
"step": 18400
},
{
"epoch": 23.82,
"learning_rate": 5.374507227332457e-05,
"loss": 0.1025,
"step": 18600
},
{
"epoch": 24.07,
"learning_rate": 5.321944809461236e-05,
"loss": 0.109,
"step": 18800
},
{
"epoch": 24.07,
"eval_loss": 0.1833416223526001,
"eval_runtime": 125.3831,
"eval_samples_per_second": 12.139,
"eval_wer": 0.2742505299283335,
"step": 18800
},
{
"epoch": 24.33,
"learning_rate": 5.269382391590013e-05,
"loss": 0.0997,
"step": 19000
},
{
"epoch": 24.58,
"learning_rate": 5.2168199737187916e-05,
"loss": 0.0954,
"step": 19200
},
{
"epoch": 24.58,
"eval_loss": 0.18462379276752472,
"eval_runtime": 126.0288,
"eval_samples_per_second": 12.077,
"eval_wer": 0.27758150802462905,
"step": 19200
},
{
"epoch": 24.84,
"learning_rate": 5.1642575558475694e-05,
"loss": 0.0982,
"step": 19400
},
{
"epoch": 25.1,
"learning_rate": 5.1116951379763466e-05,
"loss": 0.1054,
"step": 19600
},
{
"epoch": 25.1,
"eval_loss": 0.17953717708587646,
"eval_runtime": 125.8704,
"eval_samples_per_second": 12.092,
"eval_wer": 0.2747552235792874,
"step": 19600
},
{
"epoch": 25.35,
"learning_rate": 5.059132720105125e-05,
"loss": 0.0965,
"step": 19800
},
{
"epoch": 25.61,
"learning_rate": 5.006570302233903e-05,
"loss": 0.097,
"step": 20000
},
{
"epoch": 25.61,
"eval_loss": 0.1935284584760666,
"eval_runtime": 117.464,
"eval_samples_per_second": 12.957,
"eval_wer": 0.27566367215100435,
"step": 20000
},
{
"epoch": 25.86,
"learning_rate": 4.954007884362681e-05,
"loss": 0.0924,
"step": 20200
},
{
"epoch": 26.12,
"learning_rate": 4.9014454664914586e-05,
"loss": 0.0832,
"step": 20400
},
{
"epoch": 26.12,
"eval_loss": 0.1959334760904312,
"eval_runtime": 125.8706,
"eval_samples_per_second": 12.092,
"eval_wer": 0.27334208135661653,
"step": 20400
},
{
"epoch": 26.38,
"learning_rate": 4.848883048620237e-05,
"loss": 0.0977,
"step": 20600
},
{
"epoch": 26.63,
"learning_rate": 4.796320630749015e-05,
"loss": 0.0936,
"step": 20800
},
{
"epoch": 26.63,
"eval_loss": 0.1751490980386734,
"eval_runtime": 126.4687,
"eval_samples_per_second": 12.035,
"eval_wer": 0.27202987786413646,
"step": 20800
},
{
"epoch": 26.89,
"learning_rate": 4.743758212877792e-05,
"loss": 0.0843,
"step": 21000
},
{
"epoch": 27.14,
"learning_rate": 4.6911957950065706e-05,
"loss": 0.1042,
"step": 21200
},
{
"epoch": 27.14,
"eval_loss": 0.18527205288410187,
"eval_runtime": 117.2693,
"eval_samples_per_second": 12.979,
"eval_wer": 0.2728373877056627,
"step": 21200
},
{
"epoch": 27.4,
"learning_rate": 4.6386333771353484e-05,
"loss": 0.0881,
"step": 21400
},
{
"epoch": 27.66,
"learning_rate": 4.586070959264126e-05,
"loss": 0.0794,
"step": 21600
},
{
"epoch": 27.66,
"eval_loss": 0.17558707296848297,
"eval_runtime": 117.2386,
"eval_samples_per_second": 12.982,
"eval_wer": 0.27192893913394567,
"step": 21600
},
{
"epoch": 27.91,
"learning_rate": 4.533508541392904e-05,
"loss": 0.0973,
"step": 21800
},
{
"epoch": 28.17,
"learning_rate": 4.480946123521682e-05,
"loss": 0.0882,
"step": 22000
},
{
"epoch": 28.17,
"eval_loss": 0.18478873372077942,
"eval_runtime": 117.58,
"eval_samples_per_second": 12.944,
"eval_wer": 0.27172706167356414,
"step": 22000
},
{
"epoch": 28.42,
"learning_rate": 4.42838370565046e-05,
"loss": 0.0894,
"step": 22200
},
{
"epoch": 28.68,
"learning_rate": 4.375821287779238e-05,
"loss": 0.0941,
"step": 22400
},
{
"epoch": 28.68,
"eval_loss": 0.1826159507036209,
"eval_runtime": 117.7022,
"eval_samples_per_second": 12.931,
"eval_wer": 0.2711214292924195,
"step": 22400
},
{
"epoch": 28.94,
"learning_rate": 4.323258869908016e-05,
"loss": 0.0804,
"step": 22600
},
{
"epoch": 29.19,
"learning_rate": 4.270696452036794e-05,
"loss": 0.0904,
"step": 22800
},
{
"epoch": 29.19,
"eval_loss": 0.1743590384721756,
"eval_runtime": 117.4139,
"eval_samples_per_second": 12.963,
"eval_wer": 0.27182800040375493,
"step": 22800
},
{
"epoch": 29.45,
"learning_rate": 4.218134034165572e-05,
"loss": 0.0869,
"step": 23000
},
{
"epoch": 29.71,
"learning_rate": 4.1655716162943496e-05,
"loss": 0.0845,
"step": 23200
},
{
"epoch": 29.71,
"eval_loss": 0.18005579710006714,
"eval_runtime": 117.9505,
"eval_samples_per_second": 12.904,
"eval_wer": 0.2710204905622287,
"step": 23200
},
{
"epoch": 29.96,
"learning_rate": 4.113009198423128e-05,
"loss": 0.0788,
"step": 23400
},
{
"epoch": 30.22,
"learning_rate": 4.060446780551905e-05,
"loss": 0.0701,
"step": 23600
},
{
"epoch": 30.22,
"eval_loss": 0.17918919026851654,
"eval_runtime": 118.3396,
"eval_samples_per_second": 12.861,
"eval_wer": 0.27051579691127486,
"step": 23600
},
{
"epoch": 30.47,
"learning_rate": 4.007884362680683e-05,
"loss": 0.0851,
"step": 23800
},
{
"epoch": 30.73,
"learning_rate": 3.9553219448094617e-05,
"loss": 0.0772,
"step": 24000
},
{
"epoch": 30.73,
"eval_loss": 0.1685931384563446,
"eval_runtime": 117.9466,
"eval_samples_per_second": 12.904,
"eval_wer": 0.2686988997678409,
"step": 24000
},
{
"epoch": 30.99,
"learning_rate": 3.9027595269382395e-05,
"loss": 0.0863,
"step": 24200
},
{
"epoch": 31.24,
"learning_rate": 3.850197109067017e-05,
"loss": 0.0772,
"step": 24400
},
{
"epoch": 31.24,
"eval_loss": 0.16450461745262146,
"eval_runtime": 118.178,
"eval_samples_per_second": 12.879,
"eval_wer": 0.26637730897345313,
"step": 24400
},
{
"epoch": 31.5,
"learning_rate": 3.797634691195795e-05,
"loss": 0.0935,
"step": 24600
},
{
"epoch": 31.75,
"learning_rate": 3.745072273324573e-05,
"loss": 0.0855,
"step": 24800
},
{
"epoch": 31.75,
"eval_loss": 0.1594998687505722,
"eval_runtime": 120.8974,
"eval_samples_per_second": 12.589,
"eval_wer": 0.26476228929040074,
"step": 24800
},
{
"epoch": 32.01,
"learning_rate": 3.692509855453351e-05,
"loss": 0.0794,
"step": 25000
},
{
"epoch": 32.27,
"learning_rate": 3.6399474375821293e-05,
"loss": 0.084,
"step": 25200
},
{
"epoch": 32.27,
"eval_loss": 0.16218672692775726,
"eval_runtime": 120.7524,
"eval_samples_per_second": 12.604,
"eval_wer": 0.2636519632583022,
"step": 25200
},
{
"epoch": 32.52,
"learning_rate": 3.587385019710907e-05,
"loss": 0.0815,
"step": 25400
},
{
"epoch": 32.78,
"learning_rate": 3.534822601839684e-05,
"loss": 0.0812,
"step": 25600
},
{
"epoch": 32.78,
"eval_loss": 0.16536009311676025,
"eval_runtime": 129.74,
"eval_samples_per_second": 11.731,
"eval_wer": 0.26466135056020995,
"step": 25600
},
{
"epoch": 33.03,
"learning_rate": 3.482260183968463e-05,
"loss": 0.0707,
"step": 25800
},
{
"epoch": 33.29,
"learning_rate": 3.429697766097241e-05,
"loss": 0.0859,
"step": 26000
},
{
"epoch": 33.29,
"eval_loss": 0.18085506558418274,
"eval_runtime": 129.8039,
"eval_samples_per_second": 11.725,
"eval_wer": 0.2648632280205915,
"step": 26000
},
{
"epoch": 33.55,
"learning_rate": 3.3771353482260185e-05,
"loss": 0.0666,
"step": 26200
},
{
"epoch": 33.8,
"learning_rate": 3.3245729303547964e-05,
"loss": 0.0796,
"step": 26400
},
{
"epoch": 33.8,
"eval_loss": 0.17295604944229126,
"eval_runtime": 125.8166,
"eval_samples_per_second": 12.097,
"eval_wer": 0.2602200464318159,
"step": 26400
},
{
"epoch": 34.06,
"learning_rate": 3.272010512483574e-05,
"loss": 0.0797,
"step": 26600
},
{
"epoch": 34.31,
"learning_rate": 3.219448094612352e-05,
"loss": 0.0738,
"step": 26800
},
{
"epoch": 34.31,
"eval_loss": 0.17386285960674286,
"eval_runtime": 129.8901,
"eval_samples_per_second": 11.718,
"eval_wer": 0.2622388210356314,
"step": 26800
},
{
"epoch": 34.57,
"learning_rate": 3.1668856767411305e-05,
"loss": 0.0805,
"step": 27000
},
{
"epoch": 34.83,
"learning_rate": 3.1143232588699084e-05,
"loss": 0.0659,
"step": 27200
},
{
"epoch": 34.83,
"eval_loss": 0.17923414707183838,
"eval_runtime": 124.9405,
"eval_samples_per_second": 12.182,
"eval_wer": 0.2634500857979207,
"step": 27200
},
{
"epoch": 35.08,
"learning_rate": 3.061760840998686e-05,
"loss": 0.0847,
"step": 27400
},
{
"epoch": 35.34,
"learning_rate": 3.009198423127464e-05,
"loss": 0.0657,
"step": 27600
},
{
"epoch": 35.34,
"eval_loss": 0.16643725335597992,
"eval_runtime": 128.2311,
"eval_samples_per_second": 11.869,
"eval_wer": 0.2618350661148683,
"step": 27600
},
{
"epoch": 35.6,
"learning_rate": 2.956636005256242e-05,
"loss": 0.0716,
"step": 27800
},
{
"epoch": 35.85,
"learning_rate": 2.90407358738502e-05,
"loss": 0.0761,
"step": 28000
},
{
"epoch": 35.85,
"eval_loss": 0.17373836040496826,
"eval_runtime": 131.1327,
"eval_samples_per_second": 11.607,
"eval_wer": 0.26284445341677604,
"step": 28000
},
{
"epoch": 36.11,
"learning_rate": 2.851511169513798e-05,
"loss": 0.0759,
"step": 28200
},
{
"epoch": 36.36,
"learning_rate": 2.7989487516425754e-05,
"loss": 0.0724,
"step": 28400
},
{
"epoch": 36.36,
"eval_loss": 0.17160625755786896,
"eval_runtime": 130.0843,
"eval_samples_per_second": 11.7,
"eval_wer": 0.2599172302412436,
"step": 28400
},
{
"epoch": 36.62,
"learning_rate": 2.7463863337713536e-05,
"loss": 0.0682,
"step": 28600
},
{
"epoch": 36.88,
"learning_rate": 2.6938239159001317e-05,
"loss": 0.0712,
"step": 28800
},
{
"epoch": 36.88,
"eval_loss": 0.18185953795909882,
"eval_runtime": 131.2291,
"eval_samples_per_second": 11.598,
"eval_wer": 0.2621378823054406,
"step": 28800
},
{
"epoch": 37.13,
"learning_rate": 2.6412614980289096e-05,
"loss": 0.0755,
"step": 29000
},
{
"epoch": 37.39,
"learning_rate": 2.588699080157687e-05,
"loss": 0.0725,
"step": 29200
},
{
"epoch": 37.39,
"eval_loss": 0.18556980788707733,
"eval_runtime": 129.7316,
"eval_samples_per_second": 11.732,
"eval_wer": 0.26082567881296054,
"step": 29200
},
{
"epoch": 37.64,
"learning_rate": 2.5361366622864652e-05,
"loss": 0.0711,
"step": 29400
},
{
"epoch": 37.9,
"learning_rate": 2.483574244415243e-05,
"loss": 0.0703,
"step": 29600
},
{
"epoch": 37.9,
"eval_loss": 0.18763265013694763,
"eval_runtime": 130.3296,
"eval_samples_per_second": 11.678,
"eval_wer": 0.2598162915110528,
"step": 29600
},
{
"epoch": 38.16,
"learning_rate": 2.4310118265440212e-05,
"loss": 0.0696,
"step": 29800
},
{
"epoch": 38.41,
"learning_rate": 2.378449408672799e-05,
"loss": 0.0656,
"step": 30000
},
{
"epoch": 38.41,
"eval_loss": 0.19020360708236694,
"eval_runtime": 120.9063,
"eval_samples_per_second": 12.588,
"eval_wer": 0.25608155849399417,
"step": 30000
},
{
"epoch": 38.67,
"learning_rate": 2.325886990801577e-05,
"loss": 0.0694,
"step": 30200
},
{
"epoch": 38.92,
"learning_rate": 2.2733245729303547e-05,
"loss": 0.0704,
"step": 30400
},
{
"epoch": 38.92,
"eval_loss": 0.1706458330154419,
"eval_runtime": 121.416,
"eval_samples_per_second": 12.535,
"eval_wer": 0.25386090642979714,
"step": 30400
},
{
"epoch": 39.18,
"learning_rate": 2.220762155059133e-05,
"loss": 0.0732,
"step": 30600
},
{
"epoch": 39.44,
"learning_rate": 2.1681997371879108e-05,
"loss": 0.0631,
"step": 30800
},
{
"epoch": 39.44,
"eval_loss": 0.18751636147499084,
"eval_runtime": 121.7301,
"eval_samples_per_second": 12.503,
"eval_wer": 0.25678812960532954,
"step": 30800
},
{
"epoch": 39.69,
"learning_rate": 2.1156373193166886e-05,
"loss": 0.0669,
"step": 31000
},
{
"epoch": 39.95,
"learning_rate": 2.0630749014454668e-05,
"loss": 0.0674,
"step": 31200
},
{
"epoch": 39.95,
"eval_loss": 0.1696523278951645,
"eval_runtime": 130.8318,
"eval_samples_per_second": 11.633,
"eval_wer": 0.25739376198647423,
"step": 31200
},
{
"epoch": 40.2,
"learning_rate": 2.0105124835742446e-05,
"loss": 0.0642,
"step": 31400
},
{
"epoch": 40.46,
"learning_rate": 1.9579500657030224e-05,
"loss": 0.0729,
"step": 31600
},
{
"epoch": 40.46,
"eval_loss": 0.17705558240413666,
"eval_runtime": 123.0592,
"eval_samples_per_second": 12.368,
"eval_wer": 0.2561824972241849,
"step": 31600
},
{
"epoch": 40.72,
"learning_rate": 1.9053876478318003e-05,
"loss": 0.0636,
"step": 31800
},
{
"epoch": 40.97,
"learning_rate": 1.8528252299605784e-05,
"loss": 0.0696,
"step": 32000
},
{
"epoch": 40.97,
"eval_loss": 0.16749870777130127,
"eval_runtime": 128.7269,
"eval_samples_per_second": 11.823,
"eval_wer": 0.25577874230342185,
"step": 32000
},
{
"epoch": 41.23,
"learning_rate": 1.800262812089356e-05,
"loss": 0.0703,
"step": 32200
},
{
"epoch": 41.48,
"learning_rate": 1.747700394218134e-05,
"loss": 0.0674,
"step": 32400
},
{
"epoch": 41.48,
"eval_loss": 0.1687408983707428,
"eval_runtime": 123.6528,
"eval_samples_per_second": 12.309,
"eval_wer": 0.25688906833552033,
"step": 32400
},
{
"epoch": 41.74,
"learning_rate": 1.695137976346912e-05,
"loss": 0.066,
"step": 32600
},
{
"epoch": 42.0,
"learning_rate": 1.6425755584756898e-05,
"loss": 0.0594,
"step": 32800
},
{
"epoch": 42.0,
"eval_loss": 0.1702132523059845,
"eval_runtime": 122.2348,
"eval_samples_per_second": 12.451,
"eval_wer": 0.25406278389017867,
"step": 32800
},
{
"epoch": 42.25,
"learning_rate": 1.590013140604468e-05,
"loss": 0.0595,
"step": 33000
},
{
"epoch": 42.51,
"learning_rate": 1.5374507227332458e-05,
"loss": 0.0653,
"step": 33200
},
{
"epoch": 42.51,
"eval_loss": 0.1668023020029068,
"eval_runtime": 121.6403,
"eval_samples_per_second": 12.512,
"eval_wer": 0.25355809023922476,
"step": 33200
},
{
"epoch": 42.77,
"learning_rate": 1.4848883048620238e-05,
"loss": 0.0671,
"step": 33400
},
{
"epoch": 43.02,
"learning_rate": 1.4323258869908016e-05,
"loss": 0.0647,
"step": 33600
},
{
"epoch": 43.02,
"eval_loss": 0.1677715927362442,
"eval_runtime": 121.5078,
"eval_samples_per_second": 12.526,
"eval_wer": 0.2561824972241849,
"step": 33600
},
{
"epoch": 43.28,
"learning_rate": 1.3797634691195796e-05,
"loss": 0.065,
"step": 33800
},
{
"epoch": 43.53,
"learning_rate": 1.3272010512483573e-05,
"loss": 0.059,
"step": 34000
},
{
"epoch": 43.53,
"eval_loss": 0.17403629422187805,
"eval_runtime": 121.8146,
"eval_samples_per_second": 12.494,
"eval_wer": 0.2570909457959019,
"step": 34000
},
{
"epoch": 43.79,
"learning_rate": 1.2746386333771355e-05,
"loss": 0.0586,
"step": 34200
},
{
"epoch": 44.05,
"learning_rate": 1.2220762155059133e-05,
"loss": 0.0597,
"step": 34400
},
{
"epoch": 44.05,
"eval_loss": 0.1736619770526886,
"eval_runtime": 122.8592,
"eval_samples_per_second": 12.388,
"eval_wer": 0.25375996769960635,
"step": 34400
},
{
"epoch": 44.3,
"learning_rate": 1.1695137976346911e-05,
"loss": 0.057,
"step": 34600
},
{
"epoch": 44.56,
"learning_rate": 1.1169513797634691e-05,
"loss": 0.0644,
"step": 34800
},
{
"epoch": 44.56,
"eval_loss": 0.17717154324054718,
"eval_runtime": 121.8694,
"eval_samples_per_second": 12.489,
"eval_wer": 0.2528515191278894,
"step": 34800
},
{
"epoch": 44.81,
"learning_rate": 1.0643889618922471e-05,
"loss": 0.0606,
"step": 35000
},
{
"epoch": 45.07,
"learning_rate": 1.011826544021025e-05,
"loss": 0.0572,
"step": 35200
},
{
"epoch": 45.07,
"eval_loss": 0.1694445163011551,
"eval_runtime": 122.3379,
"eval_samples_per_second": 12.441,
"eval_wer": 0.2547693550015141,
"step": 35200
},
{
"epoch": 45.33,
"learning_rate": 9.59264126149803e-06,
"loss": 0.0539,
"step": 35400
},
{
"epoch": 45.58,
"learning_rate": 9.067017082785808e-06,
"loss": 0.0695,
"step": 35600
},
{
"epoch": 45.58,
"eval_loss": 0.16321270167827606,
"eval_runtime": 121.7216,
"eval_samples_per_second": 12.504,
"eval_wer": 0.25214494801655396,
"step": 35600
},
{
"epoch": 45.84,
"learning_rate": 8.541392904073588e-06,
"loss": 0.0626,
"step": 35800
},
{
"epoch": 46.09,
"learning_rate": 8.015768725361367e-06,
"loss": 0.0626,
"step": 36000
},
{
"epoch": 46.09,
"eval_loss": 0.163968026638031,
"eval_runtime": 129.7157,
"eval_samples_per_second": 11.733,
"eval_wer": 0.25345715150903403,
"step": 36000
},
{
"epoch": 46.35,
"learning_rate": 7.490144546649146e-06,
"loss": 0.0577,
"step": 36200
},
{
"epoch": 46.61,
"learning_rate": 6.964520367936925e-06,
"loss": 0.0595,
"step": 36400
},
{
"epoch": 46.61,
"eval_loss": 0.16631732881069183,
"eval_runtime": 132.5032,
"eval_samples_per_second": 11.487,
"eval_wer": 0.25143837690521853,
"step": 36400
},
{
"epoch": 46.86,
"learning_rate": 6.438896189224705e-06,
"loss": 0.054,
"step": 36600
},
{
"epoch": 47.12,
"learning_rate": 5.913272010512484e-06,
"loss": 0.0625,
"step": 36800
},
{
"epoch": 47.12,
"eval_loss": 0.1676352173089981,
"eval_runtime": 131.5543,
"eval_samples_per_second": 11.569,
"eval_wer": 0.25214494801655396,
"step": 36800
},
{
"epoch": 47.37,
"learning_rate": 5.3876478318002635e-06,
"loss": 0.0599,
"step": 37000
},
{
"epoch": 47.63,
"learning_rate": 4.862023653088043e-06,
"loss": 0.0625,
"step": 37200
},
{
"epoch": 47.63,
"eval_loss": 0.16559843719005585,
"eval_runtime": 122.7608,
"eval_samples_per_second": 12.398,
"eval_wer": 0.2523468254769355,
"step": 37200
},
{
"epoch": 47.89,
"learning_rate": 4.336399474375821e-06,
"loss": 0.0612,
"step": 37400
},
{
"epoch": 48.14,
"learning_rate": 3.810775295663601e-06,
"loss": 0.0526,
"step": 37600
},
{
"epoch": 48.14,
"eval_loss": 0.1701846420764923,
"eval_runtime": 121.6682,
"eval_samples_per_second": 12.509,
"eval_wer": 0.2527505803976986,
"step": 37600
},
{
"epoch": 48.4,
"learning_rate": 3.2851511169513802e-06,
"loss": 0.0645,
"step": 37800
},
{
"epoch": 48.66,
"learning_rate": 2.759526938239159e-06,
"loss": 0.0484,
"step": 38000
},
{
"epoch": 48.66,
"eval_loss": 0.16875717043876648,
"eval_runtime": 121.6109,
"eval_samples_per_second": 12.515,
"eval_wer": 0.25335621277884324,
"step": 38000
},
{
"epoch": 48.91,
"learning_rate": 2.2339027595269386e-06,
"loss": 0.0564,
"step": 38200
},
{
"epoch": 49.17,
"learning_rate": 1.7082785808147174e-06,
"loss": 0.0674,
"step": 38400
},
{
"epoch": 49.17,
"eval_loss": 0.16684982180595398,
"eval_runtime": 121.9908,
"eval_samples_per_second": 12.476,
"eval_wer": 0.25224588674674475,
"step": 38400
},
{
"epoch": 49.42,
"learning_rate": 1.1826544021024968e-06,
"loss": 0.064,
"step": 38600
},
{
"epoch": 49.68,
"learning_rate": 6.57030223390276e-07,
"loss": 0.054,
"step": 38800
},
{
"epoch": 49.68,
"eval_loss": 0.16605187952518463,
"eval_runtime": 121.8992,
"eval_samples_per_second": 12.486,
"eval_wer": 0.2519430705561724,
"step": 38800
},
{
"epoch": 49.94,
"learning_rate": 1.314060446780552e-07,
"loss": 0.0545,
"step": 39000
}
],
"max_steps": 39050,
"num_train_epochs": 50,
"total_flos": 5.8955237568621085e+19,
"trial_name": null,
"trial_params": null
}