wav2vec2-xls-r-2b-ft-btb-ccv-cy / trainer_state.json
DewiBrynJones's picture
End of training
cafbada verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.65345170671973,
"eval_steps": 1000,
"global_step": 200000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.019133629266799325,
"grad_norm": 3.534731864929199,
"learning_rate": 0.0002982,
"loss": 2.0385,
"step": 500
},
{
"epoch": 0.03826725853359865,
"grad_norm": 3.997960090637207,
"learning_rate": 0.00029925413533834583,
"loss": 1.5221,
"step": 1000
},
{
"epoch": 0.03826725853359865,
"eval_loss": Infinity,
"eval_runtime": 316.3189,
"eval_samples_per_second": 22.199,
"eval_steps_per_second": 2.776,
"eval_wer": 0.9139914321980114,
"step": 1000
},
{
"epoch": 0.05740088780039798,
"grad_norm": 2.7378287315368652,
"learning_rate": 0.0002985022556390977,
"loss": 1.4633,
"step": 1500
},
{
"epoch": 0.0765345170671973,
"grad_norm": 3.522592782974243,
"learning_rate": 0.0002977503759398496,
"loss": 1.4078,
"step": 2000
},
{
"epoch": 0.0765345170671973,
"eval_loss": Infinity,
"eval_runtime": 316.2822,
"eval_samples_per_second": 22.202,
"eval_steps_per_second": 2.776,
"eval_wer": 0.9065607150412524,
"step": 2000
},
{
"epoch": 0.09566814633399663,
"grad_norm": 8.136958122253418,
"learning_rate": 0.0002969984962406015,
"loss": 1.3589,
"step": 2500
},
{
"epoch": 0.11480177560079596,
"grad_norm": 2.9784021377563477,
"learning_rate": 0.00029624661654135335,
"loss": 1.336,
"step": 3000
},
{
"epoch": 0.11480177560079596,
"eval_loss": Infinity,
"eval_runtime": 315.7333,
"eval_samples_per_second": 22.24,
"eval_steps_per_second": 2.781,
"eval_wer": 0.8949386503067485,
"step": 3000
},
{
"epoch": 0.13393540486759528,
"grad_norm": 3.5435404777526855,
"learning_rate": 0.0002954947368421052,
"loss": 1.3258,
"step": 3500
},
{
"epoch": 0.1530690341343946,
"grad_norm": 2.8903892040252686,
"learning_rate": 0.0002947443609022556,
"loss": 1.2887,
"step": 4000
},
{
"epoch": 0.1530690341343946,
"eval_loss": Infinity,
"eval_runtime": 316.2047,
"eval_samples_per_second": 22.207,
"eval_steps_per_second": 2.777,
"eval_wer": 0.8745901205838799,
"step": 4000
},
{
"epoch": 0.17220266340119394,
"grad_norm": 2.647843360900879,
"learning_rate": 0.000293993984962406,
"loss": 1.2713,
"step": 4500
},
{
"epoch": 0.19133629266799326,
"grad_norm": 3.8844950199127197,
"learning_rate": 0.00029324210526315787,
"loss": 1.26,
"step": 5000
},
{
"epoch": 0.19133629266799326,
"eval_loss": Infinity,
"eval_runtime": 315.2217,
"eval_samples_per_second": 22.276,
"eval_steps_per_second": 2.785,
"eval_wer": 0.8671197376771737,
"step": 5000
},
{
"epoch": 0.21046992193479258,
"grad_norm": 3.8498334884643555,
"learning_rate": 0.00029249022556390974,
"loss": 1.231,
"step": 5500
},
{
"epoch": 0.22960355120159193,
"grad_norm": 2.3221595287323,
"learning_rate": 0.0002917383458646616,
"loss": 1.2188,
"step": 6000
},
{
"epoch": 0.22960355120159193,
"eval_loss": Infinity,
"eval_runtime": 315.8377,
"eval_samples_per_second": 22.233,
"eval_steps_per_second": 2.78,
"eval_wer": 0.8699756716733658,
"step": 6000
},
{
"epoch": 0.24873718046839124,
"grad_norm": 2.9522998332977295,
"learning_rate": 0.00029098646616541353,
"loss": 1.1974,
"step": 6500
},
{
"epoch": 0.26787080973519056,
"grad_norm": 5.0502214431762695,
"learning_rate": 0.0002902345864661654,
"loss": 1.1992,
"step": 7000
},
{
"epoch": 0.26787080973519056,
"eval_loss": Infinity,
"eval_runtime": 315.6699,
"eval_samples_per_second": 22.245,
"eval_steps_per_second": 2.781,
"eval_wer": 0.8537920456949439,
"step": 7000
},
{
"epoch": 0.2870044390019899,
"grad_norm": 3.480316400527954,
"learning_rate": 0.00028948270676691727,
"loss": 1.1786,
"step": 7500
},
{
"epoch": 0.3061380682687892,
"grad_norm": 3.4583587646484375,
"learning_rate": 0.00028873082706766913,
"loss": 1.1773,
"step": 8000
},
{
"epoch": 0.3061380682687892,
"eval_loss": Infinity,
"eval_runtime": 315.6868,
"eval_samples_per_second": 22.244,
"eval_steps_per_second": 2.781,
"eval_wer": 0.8329278612227629,
"step": 8000
},
{
"epoch": 0.32527169753558854,
"grad_norm": 3.2121310234069824,
"learning_rate": 0.00028797894736842106,
"loss": 1.1475,
"step": 8500
},
{
"epoch": 0.3444053268023879,
"grad_norm": 6.1687846183776855,
"learning_rate": 0.0002872285714285714,
"loss": 1.1602,
"step": 9000
},
{
"epoch": 0.3444053268023879,
"eval_loss": Infinity,
"eval_runtime": 315.9185,
"eval_samples_per_second": 22.227,
"eval_steps_per_second": 2.779,
"eval_wer": 0.8200497144066003,
"step": 9000
},
{
"epoch": 0.3635389560691872,
"grad_norm": 2.9368505477905273,
"learning_rate": 0.0002864766917293233,
"loss": 1.1233,
"step": 9500
},
{
"epoch": 0.3826725853359865,
"grad_norm": 2.0912511348724365,
"learning_rate": 0.00028572481203007513,
"loss": 1.1128,
"step": 10000
},
{
"epoch": 0.3826725853359865,
"eval_loss": Infinity,
"eval_runtime": 316.2709,
"eval_samples_per_second": 22.202,
"eval_steps_per_second": 2.776,
"eval_wer": 0.8059287074254284,
"step": 10000
},
{
"epoch": 0.40180621460278587,
"grad_norm": 2.822795867919922,
"learning_rate": 0.00028497293233082705,
"loss": 1.096,
"step": 10500
},
{
"epoch": 0.42093984386958516,
"grad_norm": 2.4663002490997314,
"learning_rate": 0.0002842210526315789,
"loss": 1.0893,
"step": 11000
},
{
"epoch": 0.42093984386958516,
"eval_loss": Infinity,
"eval_runtime": 317.2266,
"eval_samples_per_second": 22.136,
"eval_steps_per_second": 2.768,
"eval_wer": 0.827678760313095,
"step": 11000
},
{
"epoch": 0.4400734731363845,
"grad_norm": 1.9610426425933838,
"learning_rate": 0.0002834691729323308,
"loss": 1.0702,
"step": 11500
},
{
"epoch": 0.45920710240318385,
"grad_norm": 3.893796682357788,
"learning_rate": 0.00028271729323308266,
"loss": 1.0809,
"step": 12000
},
{
"epoch": 0.45920710240318385,
"eval_loss": Infinity,
"eval_runtime": 316.2707,
"eval_samples_per_second": 22.202,
"eval_steps_per_second": 2.776,
"eval_wer": 0.8028480008462027,
"step": 12000
},
{
"epoch": 0.47834073166998314,
"grad_norm": 3.08317494392395,
"learning_rate": 0.0002819669172932331,
"loss": 1.0616,
"step": 12500
},
{
"epoch": 0.4974743609367825,
"grad_norm": 1.9941602945327759,
"learning_rate": 0.0002812150375939849,
"loss": 1.0426,
"step": 13000
},
{
"epoch": 0.4974743609367825,
"eval_loss": Infinity,
"eval_runtime": 314.9265,
"eval_samples_per_second": 22.297,
"eval_steps_per_second": 2.788,
"eval_wer": 0.8004416120160779,
"step": 13000
},
{
"epoch": 0.5166079902035818,
"grad_norm": 2.075686454772949,
"learning_rate": 0.0002804646616541353,
"loss": 1.0316,
"step": 13500
},
{
"epoch": 0.5357416194703811,
"grad_norm": 1.7053288221359253,
"learning_rate": 0.0002797127819548872,
"loss": 1.0202,
"step": 14000
},
{
"epoch": 0.5357416194703811,
"eval_loss": Infinity,
"eval_runtime": 315.6762,
"eval_samples_per_second": 22.244,
"eval_steps_per_second": 2.781,
"eval_wer": 0.7824598053733869,
"step": 14000
},
{
"epoch": 0.5548752487371804,
"grad_norm": 2.8614988327026367,
"learning_rate": 0.00027896240601503757,
"loss": 1.0398,
"step": 14500
},
{
"epoch": 0.5740088780039798,
"grad_norm": 2.7677505016326904,
"learning_rate": 0.0002782105263157895,
"loss": 1.0005,
"step": 15000
},
{
"epoch": 0.5740088780039798,
"eval_loss": Infinity,
"eval_runtime": 316.6237,
"eval_samples_per_second": 22.178,
"eval_steps_per_second": 2.773,
"eval_wer": 0.7785725618785699,
"step": 15000
},
{
"epoch": 0.5931425072707791,
"grad_norm": 1.8297598361968994,
"learning_rate": 0.0002774586466165413,
"loss": 1.0069,
"step": 15500
},
{
"epoch": 0.6122761365375784,
"grad_norm": 3.259901285171509,
"learning_rate": 0.0002767067669172932,
"loss": 0.9987,
"step": 16000
},
{
"epoch": 0.6122761365375784,
"eval_loss": Infinity,
"eval_runtime": 317.076,
"eval_samples_per_second": 22.146,
"eval_steps_per_second": 2.769,
"eval_wer": 0.752102284747197,
"step": 16000
},
{
"epoch": 0.6314097658043778,
"grad_norm": 2.1204450130462646,
"learning_rate": 0.0002759548872180451,
"loss": 0.9862,
"step": 16500
},
{
"epoch": 0.6505433950711771,
"grad_norm": 4.308948993682861,
"learning_rate": 0.00027520300751879696,
"loss": 0.9705,
"step": 17000
},
{
"epoch": 0.6505433950711771,
"eval_loss": Infinity,
"eval_runtime": 316.4489,
"eval_samples_per_second": 22.19,
"eval_steps_per_second": 2.775,
"eval_wer": 0.7592817854876243,
"step": 17000
},
{
"epoch": 0.6696770243379764,
"grad_norm": 3.809417247772217,
"learning_rate": 0.00027445112781954883,
"loss": 0.9836,
"step": 17500
},
{
"epoch": 0.6888106536047758,
"grad_norm": 8.3826904296875,
"learning_rate": 0.00027369924812030075,
"loss": 0.9884,
"step": 18000
},
{
"epoch": 0.6888106536047758,
"eval_loss": Infinity,
"eval_runtime": 317.2775,
"eval_samples_per_second": 22.132,
"eval_steps_per_second": 2.767,
"eval_wer": 0.7380606092659192,
"step": 18000
},
{
"epoch": 0.7079442828715751,
"grad_norm": 2.119809627532959,
"learning_rate": 0.0002729473684210526,
"loss": 0.9554,
"step": 18500
},
{
"epoch": 0.7270779121383744,
"grad_norm": 2.714183807373047,
"learning_rate": 0.0002721954887218045,
"loss": 0.9618,
"step": 19000
},
{
"epoch": 0.7270779121383744,
"eval_loss": Infinity,
"eval_runtime": 316.8028,
"eval_samples_per_second": 22.165,
"eval_steps_per_second": 2.771,
"eval_wer": 0.7364739792680347,
"step": 19000
},
{
"epoch": 0.7462115414051738,
"grad_norm": 4.593650817871094,
"learning_rate": 0.00027144360902255635,
"loss": 0.9389,
"step": 19500
},
{
"epoch": 0.765345170671973,
"grad_norm": 1.7419074773788452,
"learning_rate": 0.0002706917293233083,
"loss": 0.9374,
"step": 20000
},
{
"epoch": 0.765345170671973,
"eval_loss": Infinity,
"eval_runtime": 316.3558,
"eval_samples_per_second": 22.197,
"eval_steps_per_second": 2.775,
"eval_wer": 0.7415644171779141,
"step": 20000
},
{
"epoch": 0.7844787999387723,
"grad_norm": NaN,
"learning_rate": 0.0002699413533834586,
"loss": 0.9241,
"step": 20500
},
{
"epoch": 0.8036124292055717,
"grad_norm": 2.104843854904175,
"learning_rate": 0.0002691894736842105,
"loss": 0.9175,
"step": 21000
},
{
"epoch": 0.8036124292055717,
"eval_loss": Infinity,
"eval_runtime": 315.3422,
"eval_samples_per_second": 22.268,
"eval_steps_per_second": 2.784,
"eval_wer": 0.7299291305267611,
"step": 21000
},
{
"epoch": 0.822746058472371,
"grad_norm": 3.1283345222473145,
"learning_rate": 0.0002684390977443609,
"loss": 0.9193,
"step": 21500
},
{
"epoch": 0.8418796877391703,
"grad_norm": 3.1702754497528076,
"learning_rate": 0.00026768721804511274,
"loss": 0.9247,
"step": 22000
},
{
"epoch": 0.8418796877391703,
"eval_loss": Infinity,
"eval_runtime": 316.0504,
"eval_samples_per_second": 22.218,
"eval_steps_per_second": 2.778,
"eval_wer": 0.7260154431986461,
"step": 22000
},
{
"epoch": 0.8610133170059697,
"grad_norm": 4.537879943847656,
"learning_rate": 0.00026693533834586466,
"loss": 0.9154,
"step": 22500
},
{
"epoch": 0.880146946272769,
"grad_norm": 3.9466328620910645,
"learning_rate": 0.00026618345864661653,
"loss": 0.9001,
"step": 23000
},
{
"epoch": 0.880146946272769,
"eval_loss": Infinity,
"eval_runtime": 315.881,
"eval_samples_per_second": 22.23,
"eval_steps_per_second": 2.78,
"eval_wer": 0.72349005711868,
"step": 23000
},
{
"epoch": 0.8992805755395683,
"grad_norm": 5.256113052368164,
"learning_rate": 0.0002654315789473684,
"loss": 0.9027,
"step": 23500
},
{
"epoch": 0.9184142048063677,
"grad_norm": 2.1492791175842285,
"learning_rate": 0.00026467969924812027,
"loss": 0.8836,
"step": 24000
},
{
"epoch": 0.9184142048063677,
"eval_loss": Infinity,
"eval_runtime": 315.4361,
"eval_samples_per_second": 22.261,
"eval_steps_per_second": 2.783,
"eval_wer": 0.7085757351385656,
"step": 24000
},
{
"epoch": 0.937547834073167,
"grad_norm": 3.541306734085083,
"learning_rate": 0.0002639278195488722,
"loss": 0.8782,
"step": 24500
},
{
"epoch": 0.9566814633399663,
"grad_norm": 1.7392828464508057,
"learning_rate": 0.000263175939849624,
"loss": 0.8789,
"step": 25000
},
{
"epoch": 0.9566814633399663,
"eval_loss": Infinity,
"eval_runtime": 316.1779,
"eval_samples_per_second": 22.209,
"eval_steps_per_second": 2.777,
"eval_wer": 0.7144859318806854,
"step": 25000
},
{
"epoch": 0.9758150926067657,
"grad_norm": 2.769277572631836,
"learning_rate": 0.0002624240601503759,
"loss": 0.8776,
"step": 25500
},
{
"epoch": 0.994948721873565,
"grad_norm": 3.246126413345337,
"learning_rate": 0.0002616721804511278,
"loss": 0.8734,
"step": 26000
},
{
"epoch": 0.994948721873565,
"eval_loss": Infinity,
"eval_runtime": 315.5548,
"eval_samples_per_second": 22.253,
"eval_steps_per_second": 2.782,
"eval_wer": 0.7195102602073197,
"step": 26000
},
{
"epoch": 1.0140823511403643,
"grad_norm": 1.4256001710891724,
"learning_rate": 0.00026092030075187966,
"loss": 0.8471,
"step": 26500
},
{
"epoch": 1.0332159804071637,
"grad_norm": 1.2950644493103027,
"learning_rate": 0.00026016992481203005,
"loss": 0.8398,
"step": 27000
},
{
"epoch": 1.0332159804071637,
"eval_loss": Infinity,
"eval_runtime": 314.7022,
"eval_samples_per_second": 22.313,
"eval_steps_per_second": 2.79,
"eval_wer": 0.6837978633382695,
"step": 27000
},
{
"epoch": 1.0523496096739628,
"grad_norm": 0.8874345421791077,
"learning_rate": 0.0002594180451127819,
"loss": 0.8403,
"step": 27500
},
{
"epoch": 1.0714832389407623,
"grad_norm": 1.146148920059204,
"learning_rate": 0.0002586661654135338,
"loss": 0.8268,
"step": 28000
},
{
"epoch": 1.0714832389407623,
"eval_loss": Infinity,
"eval_runtime": 313.7884,
"eval_samples_per_second": 22.378,
"eval_steps_per_second": 2.798,
"eval_wer": 0.6793288555108948,
"step": 28000
},
{
"epoch": 1.0906168682075617,
"grad_norm": 0.687147319316864,
"learning_rate": 0.0002579142857142857,
"loss": 0.8228,
"step": 28500
},
{
"epoch": 1.1097504974743608,
"grad_norm": 0.614025890827179,
"learning_rate": 0.00025716390977443605,
"loss": 0.8196,
"step": 29000
},
{
"epoch": 1.1097504974743608,
"eval_loss": Infinity,
"eval_runtime": 315.3179,
"eval_samples_per_second": 22.27,
"eval_steps_per_second": 2.784,
"eval_wer": 0.66387243494817,
"step": 29000
},
{
"epoch": 1.1288841267411602,
"grad_norm": 0.7900418043136597,
"learning_rate": 0.0002564120300751879,
"loss": 0.8262,
"step": 29500
},
{
"epoch": 1.1480177560079596,
"grad_norm": 0.9414839148521423,
"learning_rate": 0.00025566015037593984,
"loss": 0.8124,
"step": 30000
},
{
"epoch": 1.1480177560079596,
"eval_loss": Infinity,
"eval_runtime": 314.1972,
"eval_samples_per_second": 22.349,
"eval_steps_per_second": 2.794,
"eval_wer": 0.6615453776179395,
"step": 30000
},
{
"epoch": 1.1671513852747588,
"grad_norm": 0.9619298577308655,
"learning_rate": 0.0002549082706766917,
"loss": 0.8052,
"step": 30500
},
{
"epoch": 1.1862850145415582,
"grad_norm": 1.3762531280517578,
"learning_rate": 0.0002541563909774436,
"loss": 0.7935,
"step": 31000
},
{
"epoch": 1.1862850145415582,
"eval_loss": Infinity,
"eval_runtime": 315.3254,
"eval_samples_per_second": 22.269,
"eval_steps_per_second": 2.784,
"eval_wer": 0.6607520626189972,
"step": 31000
},
{
"epoch": 1.2054186438083576,
"grad_norm": 1.0670289993286133,
"learning_rate": 0.00025340451127819544,
"loss": 0.799,
"step": 31500
},
{
"epoch": 1.2245522730751568,
"grad_norm": 1.5455262660980225,
"learning_rate": 0.00025265263157894736,
"loss": 0.817,
"step": 32000
},
{
"epoch": 1.2245522730751568,
"eval_loss": Infinity,
"eval_runtime": 314.3401,
"eval_samples_per_second": 22.339,
"eval_steps_per_second": 2.793,
"eval_wer": 0.6709726041887032,
"step": 32000
},
{
"epoch": 1.2436859023419562,
"grad_norm": 1.185735821723938,
"learning_rate": 0.00025190075187969923,
"loss": 0.7835,
"step": 32500
},
{
"epoch": 1.2628195316087556,
"grad_norm": 0.8058122396469116,
"learning_rate": 0.0002511503759398496,
"loss": 0.7975,
"step": 33000
},
{
"epoch": 1.2628195316087556,
"eval_loss": Infinity,
"eval_runtime": 315.7071,
"eval_samples_per_second": 22.242,
"eval_steps_per_second": 2.781,
"eval_wer": 0.66950497144066,
"step": 33000
},
{
"epoch": 1.2819531608755548,
"grad_norm": 0.6225046515464783,
"learning_rate": 0.0002503984962406015,
"loss": 0.8023,
"step": 33500
},
{
"epoch": 1.3010867901423542,
"grad_norm": 0.8266538381576538,
"learning_rate": 0.00024964661654135336,
"loss": 0.7746,
"step": 34000
},
{
"epoch": 1.3010867901423542,
"eval_loss": Infinity,
"eval_runtime": 316.3157,
"eval_samples_per_second": 22.199,
"eval_steps_per_second": 2.776,
"eval_wer": 0.6674423524434102,
"step": 34000
},
{
"epoch": 1.3202204194091536,
"grad_norm": 0.8520437479019165,
"learning_rate": 0.00024889624060150375,
"loss": 0.7828,
"step": 34500
},
{
"epoch": 1.3393540486759528,
"grad_norm": 0.9565845131874084,
"learning_rate": 0.0002481443609022556,
"loss": 0.8013,
"step": 35000
},
{
"epoch": 1.3393540486759528,
"eval_loss": Infinity,
"eval_runtime": 315.1504,
"eval_samples_per_second": 22.281,
"eval_steps_per_second": 2.786,
"eval_wer": 0.6586762217050983,
"step": 35000
},
{
"epoch": 1.3584876779427522,
"grad_norm": 1.2239874601364136,
"learning_rate": 0.0002473924812030075,
"loss": 0.7765,
"step": 35500
},
{
"epoch": 1.3776213072095516,
"grad_norm": 0.6564140319824219,
"learning_rate": 0.00024664060150375935,
"loss": 0.7703,
"step": 36000
},
{
"epoch": 1.3776213072095516,
"eval_loss": Infinity,
"eval_runtime": 316.0508,
"eval_samples_per_second": 22.218,
"eval_steps_per_second": 2.778,
"eval_wer": 0.6388301248148932,
"step": 36000
},
{
"epoch": 1.3967549364763507,
"grad_norm": 0.7060734629631042,
"learning_rate": 0.0002458887218045113,
"loss": 0.7685,
"step": 36500
},
{
"epoch": 1.4158885657431501,
"grad_norm": 1.3065494298934937,
"learning_rate": 0.00024513684210526314,
"loss": 0.7581,
"step": 37000
},
{
"epoch": 1.4158885657431501,
"eval_loss": Infinity,
"eval_runtime": 315.8618,
"eval_samples_per_second": 22.231,
"eval_steps_per_second": 2.78,
"eval_wer": 0.6461021789718637,
"step": 37000
},
{
"epoch": 1.4350221950099495,
"grad_norm": 1.282378911972046,
"learning_rate": 0.00024438646616541354,
"loss": 0.7693,
"step": 37500
},
{
"epoch": 1.4541558242767487,
"grad_norm": 1.1911076307296753,
"learning_rate": 0.00024363458646616538,
"loss": 0.7468,
"step": 38000
},
{
"epoch": 1.4541558242767487,
"eval_loss": Infinity,
"eval_runtime": 316.6111,
"eval_samples_per_second": 22.179,
"eval_steps_per_second": 2.773,
"eval_wer": 0.6333959170721388,
"step": 38000
},
{
"epoch": 1.4732894535435481,
"grad_norm": 0.7600271105766296,
"learning_rate": 0.00024288270676691727,
"loss": 0.7582,
"step": 38500
},
{
"epoch": 1.4924230828103475,
"grad_norm": 0.6814852356910706,
"learning_rate": 0.00024213082706766914,
"loss": 0.7534,
"step": 39000
},
{
"epoch": 1.4924230828103475,
"eval_loss": Infinity,
"eval_runtime": 316.0864,
"eval_samples_per_second": 22.215,
"eval_steps_per_second": 2.778,
"eval_wer": 0.6300639940765813,
"step": 39000
},
{
"epoch": 1.5115567120771467,
"grad_norm": 0.7314792275428772,
"learning_rate": 0.00024137894736842104,
"loss": 0.7336,
"step": 39500
},
{
"epoch": 1.530690341343946,
"grad_norm": 1.1860034465789795,
"learning_rate": 0.0002406270676691729,
"loss": 0.752,
"step": 40000
},
{
"epoch": 1.530690341343946,
"eval_loss": Infinity,
"eval_runtime": 318.462,
"eval_samples_per_second": 22.05,
"eval_steps_per_second": 2.757,
"eval_wer": 0.6221969536704041,
"step": 40000
},
{
"epoch": 1.5498239706107455,
"grad_norm": 1.3469467163085938,
"learning_rate": 0.0002398766917293233,
"loss": 0.7508,
"step": 40500
},
{
"epoch": 1.5689575998775447,
"grad_norm": 0.7789831757545471,
"learning_rate": 0.00023912481203007516,
"loss": 0.736,
"step": 41000
},
{
"epoch": 1.5689575998775447,
"eval_loss": Infinity,
"eval_runtime": 316.8112,
"eval_samples_per_second": 22.165,
"eval_steps_per_second": 2.771,
"eval_wer": 0.6203326634228897,
"step": 41000
},
{
"epoch": 1.588091229144344,
"grad_norm": 1.3181277513504028,
"learning_rate": 0.00023837443609022553,
"loss": 0.752,
"step": 41500
},
{
"epoch": 1.6072248584111435,
"grad_norm": 0.6017114520072937,
"learning_rate": 0.00023762255639097742,
"loss": 0.7188,
"step": 42000
},
{
"epoch": 1.6072248584111435,
"eval_loss": Infinity,
"eval_runtime": 315.1913,
"eval_samples_per_second": 22.279,
"eval_steps_per_second": 2.786,
"eval_wer": 0.620795430505606,
"step": 42000
},
{
"epoch": 1.6263584876779427,
"grad_norm": 0.7502321600914001,
"learning_rate": 0.0002368706766917293,
"loss": 0.7138,
"step": 42500
},
{
"epoch": 1.645492116944742,
"grad_norm": 0.6769944429397583,
"learning_rate": 0.00023611879699248119,
"loss": 0.7308,
"step": 43000
},
{
"epoch": 1.645492116944742,
"eval_loss": Infinity,
"eval_runtime": 316.0035,
"eval_samples_per_second": 22.221,
"eval_steps_per_second": 2.778,
"eval_wer": 0.6056695578591073,
"step": 43000
},
{
"epoch": 1.6646257462115415,
"grad_norm": 1.050374984741211,
"learning_rate": 0.00023536691729323305,
"loss": 0.7145,
"step": 43500
},
{
"epoch": 1.6837593754783406,
"grad_norm": 1.2912209033966064,
"learning_rate": 0.00023461503759398495,
"loss": 0.7179,
"step": 44000
},
{
"epoch": 1.6837593754783406,
"eval_loss": Infinity,
"eval_runtime": 315.6998,
"eval_samples_per_second": 22.243,
"eval_steps_per_second": 2.781,
"eval_wer": 0.6291649037444468,
"step": 44000
},
{
"epoch": 1.70289300474514,
"grad_norm": 0.8525875210762024,
"learning_rate": 0.00023386315789473682,
"loss": 0.7237,
"step": 44500
},
{
"epoch": 1.7220266340119394,
"grad_norm": 1.1356332302093506,
"learning_rate": 0.0002331112781954887,
"loss": 0.7341,
"step": 45000
},
{
"epoch": 1.7220266340119394,
"eval_loss": Infinity,
"eval_runtime": 316.323,
"eval_samples_per_second": 22.199,
"eval_steps_per_second": 2.776,
"eval_wer": 0.6034218320287709,
"step": 45000
},
{
"epoch": 1.7411602632787386,
"grad_norm": 0.6900098323822021,
"learning_rate": 0.00023235939849624058,
"loss": 0.7166,
"step": 45500
},
{
"epoch": 1.760293892545538,
"grad_norm": 0.7305801510810852,
"learning_rate": 0.00023160902255639097,
"loss": 0.7061,
"step": 46000
},
{
"epoch": 1.760293892545538,
"eval_loss": Infinity,
"eval_runtime": 315.9517,
"eval_samples_per_second": 22.225,
"eval_steps_per_second": 2.779,
"eval_wer": 0.6136555955151258,
"step": 46000
},
{
"epoch": 1.7794275218123374,
"grad_norm": 1.960204839706421,
"learning_rate": 0.00023085714285714284,
"loss": 0.688,
"step": 46500
},
{
"epoch": 1.7985611510791366,
"grad_norm": 1.001535415649414,
"learning_rate": 0.00023010526315789473,
"loss": 0.7081,
"step": 47000
},
{
"epoch": 1.7985611510791366,
"eval_loss": Infinity,
"eval_runtime": 316.7057,
"eval_samples_per_second": 22.172,
"eval_steps_per_second": 2.772,
"eval_wer": 0.6123334038502222,
"step": 47000
},
{
"epoch": 1.817694780345936,
"grad_norm": 0.8360883593559265,
"learning_rate": 0.0002293533834586466,
"loss": 0.704,
"step": 47500
},
{
"epoch": 1.8368284096127354,
"grad_norm": 1.349821925163269,
"learning_rate": 0.0002286015037593985,
"loss": 0.6957,
"step": 48000
},
{
"epoch": 1.8368284096127354,
"eval_loss": Infinity,
"eval_runtime": 316.3207,
"eval_samples_per_second": 22.199,
"eval_steps_per_second": 2.776,
"eval_wer": 0.6053522318595304,
"step": 48000
},
{
"epoch": 1.8559620388795346,
"grad_norm": 0.9751301407814026,
"learning_rate": 0.00022785112781954886,
"loss": 0.6974,
"step": 48500
},
{
"epoch": 1.875095668146334,
"grad_norm": 0.7246661186218262,
"learning_rate": 0.00022710075187969923,
"loss": 0.7052,
"step": 49000
},
{
"epoch": 1.875095668146334,
"eval_loss": Infinity,
"eval_runtime": 315.447,
"eval_samples_per_second": 22.26,
"eval_steps_per_second": 2.783,
"eval_wer": 0.616498307594669,
"step": 49000
},
{
"epoch": 1.8942292974131334,
"grad_norm": 0.716491162776947,
"learning_rate": 0.00022634887218045112,
"loss": 0.6898,
"step": 49500
},
{
"epoch": 1.9133629266799326,
"grad_norm": 0.7438942193984985,
"learning_rate": 0.000225596992481203,
"loss": 0.6833,
"step": 50000
},
{
"epoch": 1.9133629266799326,
"eval_loss": Infinity,
"eval_runtime": 316.1363,
"eval_samples_per_second": 22.212,
"eval_steps_per_second": 2.777,
"eval_wer": 0.5887455045483393,
"step": 50000
},
{
"epoch": 1.932496555946732,
"grad_norm": 0.8066436648368835,
"learning_rate": 0.00022484511278195488,
"loss": 0.6755,
"step": 50500
},
{
"epoch": 1.9516301852135314,
"grad_norm": 0.6446587443351746,
"learning_rate": 0.00022409323308270672,
"loss": 0.6995,
"step": 51000
},
{
"epoch": 1.9516301852135314,
"eval_loss": Infinity,
"eval_runtime": 315.7016,
"eval_samples_per_second": 22.243,
"eval_steps_per_second": 2.781,
"eval_wer": 0.5871456526338058,
"step": 51000
},
{
"epoch": 1.9707638144803306,
"grad_norm": 0.8823833465576172,
"learning_rate": 0.00022334135338345862,
"loss": 0.6909,
"step": 51500
},
{
"epoch": 1.98989744374713,
"grad_norm": 0.8129588961601257,
"learning_rate": 0.0002225894736842105,
"loss": 0.6703,
"step": 52000
},
{
"epoch": 1.98989744374713,
"eval_loss": Infinity,
"eval_runtime": 316.397,
"eval_samples_per_second": 22.194,
"eval_steps_per_second": 2.775,
"eval_wer": 0.5954357943727523,
"step": 52000
},
{
"epoch": 2.0090310730139294,
"grad_norm": 0.9364180564880371,
"learning_rate": 0.0002218390977443609,
"loss": 0.6462,
"step": 52500
},
{
"epoch": 2.0281647022807285,
"grad_norm": 0.5429893732070923,
"learning_rate": 0.00022108721804511275,
"loss": 0.6265,
"step": 53000
},
{
"epoch": 2.0281647022807285,
"eval_loss": Infinity,
"eval_runtime": 315.6741,
"eval_samples_per_second": 22.244,
"eval_steps_per_second": 2.781,
"eval_wer": 0.5791728368944362,
"step": 53000
},
{
"epoch": 2.0472983315475277,
"grad_norm": 0.7964287996292114,
"learning_rate": 0.00022033533834586464,
"loss": 0.6283,
"step": 53500
},
{
"epoch": 2.0664319608143273,
"grad_norm": 0.928997278213501,
"learning_rate": 0.0002195834586466165,
"loss": 0.633,
"step": 54000
},
{
"epoch": 2.0664319608143273,
"eval_loss": Infinity,
"eval_runtime": 316.8143,
"eval_samples_per_second": 22.164,
"eval_steps_per_second": 2.771,
"eval_wer": 0.5696001692405331,
"step": 54000
},
{
"epoch": 2.0855655900811265,
"grad_norm": 0.9358041882514954,
"learning_rate": 0.0002188315789473684,
"loss": 0.6608,
"step": 54500
},
{
"epoch": 2.1046992193479257,
"grad_norm": 0.49673086404800415,
"learning_rate": 0.00021807969924812027,
"loss": 0.6399,
"step": 55000
},
{
"epoch": 2.1046992193479257,
"eval_loss": Infinity,
"eval_runtime": 316.4793,
"eval_samples_per_second": 22.188,
"eval_steps_per_second": 2.774,
"eval_wer": 0.5717685635709753,
"step": 55000
},
{
"epoch": 2.1238328486147253,
"grad_norm": 0.824380099773407,
"learning_rate": 0.00021732932330827064,
"loss": 0.6317,
"step": 55500
},
{
"epoch": 2.1429664778815245,
"grad_norm": 0.784487783908844,
"learning_rate": 0.00021657744360902253,
"loss": 0.6165,
"step": 56000
},
{
"epoch": 2.1429664778815245,
"eval_loss": Infinity,
"eval_runtime": 314.9557,
"eval_samples_per_second": 22.295,
"eval_steps_per_second": 2.788,
"eval_wer": 0.5836550666384599,
"step": 56000
},
{
"epoch": 2.1621001071483237,
"grad_norm": 1.1931605339050293,
"learning_rate": 0.0002158270676691729,
"loss": 0.6268,
"step": 56500
},
{
"epoch": 2.1812337364151233,
"grad_norm": 1.4376397132873535,
"learning_rate": 0.0002150751879699248,
"loss": 0.6148,
"step": 57000
},
{
"epoch": 2.1812337364151233,
"eval_loss": Infinity,
"eval_runtime": 314.0058,
"eval_samples_per_second": 22.363,
"eval_steps_per_second": 2.796,
"eval_wer": 0.5597233975037021,
"step": 57000
},
{
"epoch": 2.2003673656819225,
"grad_norm": 1.6363264322280884,
"learning_rate": 0.00021432330827067666,
"loss": 0.6377,
"step": 57500
},
{
"epoch": 2.2195009949487217,
"grad_norm": 4.29092264175415,
"learning_rate": 0.00021357142857142855,
"loss": 0.6228,
"step": 58000
},
{
"epoch": 2.2195009949487217,
"eval_loss": Infinity,
"eval_runtime": 314.1699,
"eval_samples_per_second": 22.351,
"eval_steps_per_second": 2.795,
"eval_wer": 0.5706843664057542,
"step": 58000
},
{
"epoch": 2.2386346242155213,
"grad_norm": 1.1523572206497192,
"learning_rate": 0.00021281954887218042,
"loss": 0.6243,
"step": 58500
},
{
"epoch": 2.2577682534823205,
"grad_norm": 2.0131170749664307,
"learning_rate": 0.00021206766917293232,
"loss": 0.6302,
"step": 59000
},
{
"epoch": 2.2577682534823205,
"eval_loss": Infinity,
"eval_runtime": 315.1147,
"eval_samples_per_second": 22.284,
"eval_steps_per_second": 2.786,
"eval_wer": 0.5717685635709753,
"step": 59000
},
{
"epoch": 2.2769018827491196,
"grad_norm": 1.358688235282898,
"learning_rate": 0.00021131578947368419,
"loss": 0.612,
"step": 59500
},
{
"epoch": 2.2960355120159193,
"grad_norm": 0.5921105742454529,
"learning_rate": 0.00021056390977443608,
"loss": 0.6035,
"step": 60000
},
{
"epoch": 2.2960355120159193,
"eval_loss": Infinity,
"eval_runtime": 314.6822,
"eval_samples_per_second": 22.315,
"eval_steps_per_second": 2.79,
"eval_wer": 0.5638883012481489,
"step": 60000
},
{
"epoch": 2.3151691412827184,
"grad_norm": 0.600351095199585,
"learning_rate": 0.00020981203007518795,
"loss": 0.6157,
"step": 60500
},
{
"epoch": 2.3343027705495176,
"grad_norm": 1.6874371767044067,
"learning_rate": 0.00020906015037593984,
"loss": 0.602,
"step": 61000
},
{
"epoch": 2.3343027705495176,
"eval_loss": Infinity,
"eval_runtime": 316.103,
"eval_samples_per_second": 22.214,
"eval_steps_per_second": 2.778,
"eval_wer": 0.5633197588322403,
"step": 61000
},
{
"epoch": 2.3534363998163172,
"grad_norm": 0.5364500284194946,
"learning_rate": 0.0002083082706766917,
"loss": 0.6057,
"step": 61500
},
{
"epoch": 2.3725700290831164,
"grad_norm": 1.9607787132263184,
"learning_rate": 0.0002075563909774436,
"loss": 0.6023,
"step": 62000
},
{
"epoch": 2.3725700290831164,
"eval_loss": Infinity,
"eval_runtime": 314.4546,
"eval_samples_per_second": 22.331,
"eval_steps_per_second": 2.792,
"eval_wer": 0.5581103236725196,
"step": 62000
},
{
"epoch": 2.3917036583499156,
"grad_norm": 1.1173665523529053,
"learning_rate": 0.00020680451127819547,
"loss": 0.6097,
"step": 62500
},
{
"epoch": 2.4108372876167152,
"grad_norm": 1.7585097551345825,
"learning_rate": 0.00020605563909774434,
"loss": 0.5924,
"step": 63000
},
{
"epoch": 2.4108372876167152,
"eval_loss": Infinity,
"eval_runtime": 314.7721,
"eval_samples_per_second": 22.308,
"eval_steps_per_second": 2.789,
"eval_wer": 0.5511820393484239,
"step": 63000
},
{
"epoch": 2.4299709168835144,
"grad_norm": 0.9437362551689148,
"learning_rate": 0.00020530375939849623,
"loss": 0.5968,
"step": 63500
},
{
"epoch": 2.4491045461503136,
"grad_norm": 0.6077060103416443,
"learning_rate": 0.0002045518796992481,
"loss": 0.5969,
"step": 64000
},
{
"epoch": 2.4491045461503136,
"eval_loss": Infinity,
"eval_runtime": 314.4786,
"eval_samples_per_second": 22.329,
"eval_steps_per_second": 2.792,
"eval_wer": 0.5489739792680347,
"step": 64000
},
{
"epoch": 2.468238175417113,
"grad_norm": 1.9816350936889648,
"learning_rate": 0.0002038,
"loss": 0.5978,
"step": 64500
},
{
"epoch": 2.4873718046839124,
"grad_norm": 1.865081787109375,
"learning_rate": 0.00020304812030075186,
"loss": 0.6029,
"step": 65000
},
{
"epoch": 2.4873718046839124,
"eval_loss": Infinity,
"eval_runtime": 314.4282,
"eval_samples_per_second": 22.333,
"eval_steps_per_second": 2.792,
"eval_wer": 0.5444124180241168,
"step": 65000
},
{
"epoch": 2.5065054339507116,
"grad_norm": 0.9471901059150696,
"learning_rate": 0.00020229624060150376,
"loss": 0.6068,
"step": 65500
},
{
"epoch": 2.525639063217511,
"grad_norm": 1.5803519487380981,
"learning_rate": 0.0002015443609022556,
"loss": 0.6046,
"step": 66000
},
{
"epoch": 2.525639063217511,
"eval_loss": Infinity,
"eval_runtime": 316.3481,
"eval_samples_per_second": 22.197,
"eval_steps_per_second": 2.775,
"eval_wer": 0.5460519356885974,
"step": 66000
},
{
"epoch": 2.5447726924843104,
"grad_norm": 0.7890714406967163,
"learning_rate": 0.0002007924812030075,
"loss": 0.5865,
"step": 66500
},
{
"epoch": 2.5639063217511096,
"grad_norm": 0.7467088103294373,
"learning_rate": 0.00020004060150375936,
"loss": 0.6095,
"step": 67000
},
{
"epoch": 2.5639063217511096,
"eval_loss": Infinity,
"eval_runtime": 316.5673,
"eval_samples_per_second": 22.182,
"eval_steps_per_second": 2.774,
"eval_wer": 0.5476253437698329,
"step": 67000
},
{
"epoch": 2.583039951017909,
"grad_norm": 0.7762987613677979,
"learning_rate": 0.00019928872180451126,
"loss": 0.5869,
"step": 67500
},
{
"epoch": 2.6021735802847084,
"grad_norm": 0.5018890500068665,
"learning_rate": 0.00019853684210526312,
"loss": 0.598,
"step": 68000
},
{
"epoch": 2.6021735802847084,
"eval_loss": Infinity,
"eval_runtime": 316.3997,
"eval_samples_per_second": 22.193,
"eval_steps_per_second": 2.775,
"eval_wer": 0.532129257457161,
"step": 68000
},
{
"epoch": 2.6213072095515075,
"grad_norm": 1.0056216716766357,
"learning_rate": 0.00019778496240601502,
"loss": 0.5821,
"step": 68500
},
{
"epoch": 2.640440838818307,
"grad_norm": 1.3957178592681885,
"learning_rate": 0.00019703308270676689,
"loss": 0.5812,
"step": 69000
},
{
"epoch": 2.640440838818307,
"eval_loss": Infinity,
"eval_runtime": 315.6807,
"eval_samples_per_second": 22.244,
"eval_steps_per_second": 2.781,
"eval_wer": 0.5357388407023482,
"step": 69000
},
{
"epoch": 2.6595744680851063,
"grad_norm": 0.6841593384742737,
"learning_rate": 0.00019628120300751878,
"loss": 0.5802,
"step": 69500
},
{
"epoch": 2.6787080973519055,
"grad_norm": 0.8700592517852783,
"learning_rate": 0.00019552932330827065,
"loss": 0.5957,
"step": 70000
},
{
"epoch": 2.6787080973519055,
"eval_loss": Infinity,
"eval_runtime": 316.4344,
"eval_samples_per_second": 22.191,
"eval_steps_per_second": 2.775,
"eval_wer": 0.5368230378675692,
"step": 70000
},
{
"epoch": 2.697841726618705,
"grad_norm": 1.0066908597946167,
"learning_rate": 0.00019477894736842104,
"loss": 0.5817,
"step": 70500
},
{
"epoch": 2.7169753558855043,
"grad_norm": 0.8253029584884644,
"learning_rate": 0.0001940270676691729,
"loss": 0.5909,
"step": 71000
},
{
"epoch": 2.7169753558855043,
"eval_loss": Infinity,
"eval_runtime": 318.9129,
"eval_samples_per_second": 22.019,
"eval_steps_per_second": 2.753,
"eval_wer": 0.5239448910514068,
"step": 71000
},
{
"epoch": 2.7361089851523035,
"grad_norm": 0.9649154543876648,
"learning_rate": 0.0001932751879699248,
"loss": 0.5877,
"step": 71500
},
{
"epoch": 2.755242614419103,
"grad_norm": 1.6835025548934937,
"learning_rate": 0.00019252330827067667,
"loss": 0.5953,
"step": 72000
},
{
"epoch": 2.755242614419103,
"eval_loss": Infinity,
"eval_runtime": 316.62,
"eval_samples_per_second": 22.178,
"eval_steps_per_second": 2.773,
"eval_wer": 0.5421911360270785,
"step": 72000
},
{
"epoch": 2.7743762436859023,
"grad_norm": 1.4002177715301514,
"learning_rate": 0.00019177443609022553,
"loss": 0.5811,
"step": 72500
},
{
"epoch": 2.7935098729527015,
"grad_norm": 1.1865595579147339,
"learning_rate": 0.00019102255639097743,
"loss": 0.5702,
"step": 73000
},
{
"epoch": 2.7935098729527015,
"eval_loss": Infinity,
"eval_runtime": 315.9208,
"eval_samples_per_second": 22.227,
"eval_steps_per_second": 2.779,
"eval_wer": 0.5225698117199069,
"step": 73000
},
{
"epoch": 2.812643502219501,
"grad_norm": 0.8575685620307922,
"learning_rate": 0.0001902706766917293,
"loss": 0.5806,
"step": 73500
},
{
"epoch": 2.8317771314863003,
"grad_norm": 0.9127354025840759,
"learning_rate": 0.0001895187969924812,
"loss": 0.5755,
"step": 74000
},
{
"epoch": 2.8317771314863003,
"eval_loss": Infinity,
"eval_runtime": 315.601,
"eval_samples_per_second": 22.25,
"eval_steps_per_second": 2.782,
"eval_wer": 0.5319441506240745,
"step": 74000
},
{
"epoch": 2.8509107607530995,
"grad_norm": 1.893068552017212,
"learning_rate": 0.00018876691729323306,
"loss": 0.574,
"step": 74500
},
{
"epoch": 2.870044390019899,
"grad_norm": 0.7603012323379517,
"learning_rate": 0.00018801654135338345,
"loss": 0.5659,
"step": 75000
},
{
"epoch": 2.870044390019899,
"eval_loss": Infinity,
"eval_runtime": 315.7507,
"eval_samples_per_second": 22.239,
"eval_steps_per_second": 2.781,
"eval_wer": 0.5286783372117622,
"step": 75000
},
{
"epoch": 2.8891780192866983,
"grad_norm": 1.636072039604187,
"learning_rate": 0.00018726466165413532,
"loss": 0.5645,
"step": 75500
},
{
"epoch": 2.9083116485534974,
"grad_norm": 0.8112033605575562,
"learning_rate": 0.0001865127819548872,
"loss": 0.5581,
"step": 76000
},
{
"epoch": 2.9083116485534974,
"eval_loss": Infinity,
"eval_runtime": 315.6028,
"eval_samples_per_second": 22.249,
"eval_steps_per_second": 2.782,
"eval_wer": 0.5277924687962767,
"step": 76000
},
{
"epoch": 2.927445277820297,
"grad_norm": 0.8813944458961487,
"learning_rate": 0.00018576090225563908,
"loss": 0.573,
"step": 76500
},
{
"epoch": 2.9465789070870962,
"grad_norm": 0.7851129174232483,
"learning_rate": 0.00018501052631578945,
"loss": 0.5786,
"step": 77000
},
{
"epoch": 2.9465789070870962,
"eval_loss": Infinity,
"eval_runtime": 316.8095,
"eval_samples_per_second": 22.165,
"eval_steps_per_second": 2.771,
"eval_wer": 0.5194626613073832,
"step": 77000
},
{
"epoch": 2.9657125363538954,
"grad_norm": 0.9913876056671143,
"learning_rate": 0.00018425864661654134,
"loss": 0.5783,
"step": 77500
},
{
"epoch": 2.984846165620695,
"grad_norm": 0.9374109506607056,
"learning_rate": 0.0001835067669172932,
"loss": 0.5485,
"step": 78000
},
{
"epoch": 2.984846165620695,
"eval_loss": Infinity,
"eval_runtime": 316.6351,
"eval_samples_per_second": 22.177,
"eval_steps_per_second": 2.773,
"eval_wer": 0.5255976306325365,
"step": 78000
},
{
"epoch": 3.0039797948874942,
"grad_norm": 0.9412303566932678,
"learning_rate": 0.0001827548872180451,
"loss": 0.543,
"step": 78500
},
{
"epoch": 3.0231134241542934,
"grad_norm": 0.9871559739112854,
"learning_rate": 0.00018200300751879697,
"loss": 0.5113,
"step": 79000
},
{
"epoch": 3.0231134241542934,
"eval_loss": Infinity,
"eval_runtime": 316.7492,
"eval_samples_per_second": 22.169,
"eval_steps_per_second": 2.772,
"eval_wer": 0.5220277131372963,
"step": 79000
},
{
"epoch": 3.042247053421093,
"grad_norm": 1.3727389574050903,
"learning_rate": 0.00018125112781954887,
"loss": 0.5075,
"step": 79500
},
{
"epoch": 3.061380682687892,
"grad_norm": 1.3193981647491455,
"learning_rate": 0.00018049924812030073,
"loss": 0.4973,
"step": 80000
},
{
"epoch": 3.061380682687892,
"eval_loss": Infinity,
"eval_runtime": 315.9407,
"eval_samples_per_second": 22.226,
"eval_steps_per_second": 2.779,
"eval_wer": 0.5146102178971864,
"step": 80000
},
{
"epoch": 3.0805143119546914,
"grad_norm": 1.4254885911941528,
"learning_rate": 0.00017974887218045113,
"loss": 0.5063,
"step": 80500
},
{
"epoch": 3.099647941221491,
"grad_norm": 1.1925376653671265,
"learning_rate": 0.000178996992481203,
"loss": 0.5085,
"step": 81000
},
{
"epoch": 3.099647941221491,
"eval_loss": Infinity,
"eval_runtime": 315.3194,
"eval_samples_per_second": 22.269,
"eval_steps_per_second": 2.784,
"eval_wer": 0.5240903321345463,
"step": 81000
},
{
"epoch": 3.11878157048829,
"grad_norm": 0.8363128304481506,
"learning_rate": 0.00017824661654135339,
"loss": 0.5263,
"step": 81500
},
{
"epoch": 3.1379151997550894,
"grad_norm": 0.8608238697052002,
"learning_rate": 0.00017749473684210525,
"loss": 0.5111,
"step": 82000
},
{
"epoch": 3.1379151997550894,
"eval_loss": Infinity,
"eval_runtime": 316.6205,
"eval_samples_per_second": 22.178,
"eval_steps_per_second": 2.773,
"eval_wer": 0.5104585360693886,
"step": 82000
},
{
"epoch": 3.157048829021889,
"grad_norm": 0.7947099208831787,
"learning_rate": 0.00017674285714285715,
"loss": 0.496,
"step": 82500
},
{
"epoch": 3.176182458288688,
"grad_norm": 0.629405677318573,
"learning_rate": 0.00017599097744360902,
"loss": 0.5047,
"step": 83000
},
{
"epoch": 3.176182458288688,
"eval_loss": Infinity,
"eval_runtime": 315.567,
"eval_samples_per_second": 22.252,
"eval_steps_per_second": 2.782,
"eval_wer": 0.5117675058176433,
"step": 83000
},
{
"epoch": 3.1953160875554873,
"grad_norm": 0.6956018209457397,
"learning_rate": 0.0001752390977443609,
"loss": 0.519,
"step": 83500
},
{
"epoch": 3.214449716822287,
"grad_norm": 0.4928194284439087,
"learning_rate": 0.00017448721804511275,
"loss": 0.4994,
"step": 84000
},
{
"epoch": 3.214449716822287,
"eval_loss": Infinity,
"eval_runtime": 316.5623,
"eval_samples_per_second": 22.182,
"eval_steps_per_second": 2.774,
"eval_wer": 0.49931246033425003,
"step": 84000
},
{
"epoch": 3.233583346089086,
"grad_norm": 0.9308450222015381,
"learning_rate": 0.00017373533834586465,
"loss": 0.5108,
"step": 84500
},
{
"epoch": 3.2527169753558853,
"grad_norm": 1.1794687509536743,
"learning_rate": 0.00017298345864661652,
"loss": 0.5077,
"step": 85000
},
{
"epoch": 3.2527169753558853,
"eval_loss": Infinity,
"eval_runtime": 316.1672,
"eval_samples_per_second": 22.21,
"eval_steps_per_second": 2.777,
"eval_wer": 0.5099825470700232,
"step": 85000
},
{
"epoch": 3.271850604622685,
"grad_norm": 0.6575067639350891,
"learning_rate": 0.0001722315789473684,
"loss": 0.5123,
"step": 85500
},
{
"epoch": 3.290984233889484,
"grad_norm": 0.60300612449646,
"learning_rate": 0.00017148120300751877,
"loss": 0.5035,
"step": 86000
},
{
"epoch": 3.290984233889484,
"eval_loss": Infinity,
"eval_runtime": 316.8684,
"eval_samples_per_second": 22.161,
"eval_steps_per_second": 2.771,
"eval_wer": 0.49292627459276495,
"step": 86000
},
{
"epoch": 3.3101178631562833,
"grad_norm": 1.378197193145752,
"learning_rate": 0.00017072932330827064,
"loss": 0.5036,
"step": 86500
},
{
"epoch": 3.329251492423083,
"grad_norm": 0.6758792996406555,
"learning_rate": 0.00016997894736842103,
"loss": 0.5045,
"step": 87000
},
{
"epoch": 3.329251492423083,
"eval_loss": Infinity,
"eval_runtime": 315.6861,
"eval_samples_per_second": 22.244,
"eval_steps_per_second": 2.781,
"eval_wer": 0.5026311614131584,
"step": 87000
},
{
"epoch": 3.348385121689882,
"grad_norm": 0.708972692489624,
"learning_rate": 0.0001692270676691729,
"loss": 0.5057,
"step": 87500
},
{
"epoch": 3.3675187509566813,
"grad_norm": 1.0322130918502808,
"learning_rate": 0.0001684751879699248,
"loss": 0.4951,
"step": 88000
},
{
"epoch": 3.3675187509566813,
"eval_loss": Infinity,
"eval_runtime": 316.1978,
"eval_samples_per_second": 22.208,
"eval_steps_per_second": 2.777,
"eval_wer": 0.49707795642056274,
"step": 88000
},
{
"epoch": 3.386652380223481,
"grad_norm": 0.5131962299346924,
"learning_rate": 0.00016772330827067667,
"loss": 0.4925,
"step": 88500
},
{
"epoch": 3.40578600949028,
"grad_norm": 0.7158399820327759,
"learning_rate": 0.00016697142857142856,
"loss": 0.4915,
"step": 89000
},
{
"epoch": 3.40578600949028,
"eval_loss": Infinity,
"eval_runtime": 319.2292,
"eval_samples_per_second": 21.997,
"eval_steps_per_second": 2.75,
"eval_wer": 0.4984133700021155,
"step": 89000
},
{
"epoch": 3.4249196387570793,
"grad_norm": 1.1134260892868042,
"learning_rate": 0.00016621954887218043,
"loss": 0.5056,
"step": 89500
},
{
"epoch": 3.444053268023879,
"grad_norm": 0.7996990084648132,
"learning_rate": 0.00016546766917293232,
"loss": 0.4875,
"step": 90000
},
{
"epoch": 3.444053268023879,
"eval_loss": Infinity,
"eval_runtime": 316.288,
"eval_samples_per_second": 22.201,
"eval_steps_per_second": 2.776,
"eval_wer": 0.49683996192088004,
"step": 90000
},
{
"epoch": 3.463186897290678,
"grad_norm": 1.4016754627227783,
"learning_rate": 0.0001647157894736842,
"loss": 0.5019,
"step": 90500
},
{
"epoch": 3.4823205265574773,
"grad_norm": 0.6514917016029358,
"learning_rate": 0.00016396541353383458,
"loss": 0.4964,
"step": 91000
},
{
"epoch": 3.4823205265574773,
"eval_loss": Infinity,
"eval_runtime": 316.9586,
"eval_samples_per_second": 22.154,
"eval_steps_per_second": 2.77,
"eval_wer": 0.49888935900148085,
"step": 91000
},
{
"epoch": 3.501454155824277,
"grad_norm": 0.3953873813152313,
"learning_rate": 0.00016321353383458645,
"loss": 0.4878,
"step": 91500
},
{
"epoch": 3.520587785091076,
"grad_norm": 0.6485087871551514,
"learning_rate": 0.00016246165413533832,
"loss": 0.4767,
"step": 92000
},
{
"epoch": 3.520587785091076,
"eval_loss": Infinity,
"eval_runtime": 317.0491,
"eval_samples_per_second": 22.148,
"eval_steps_per_second": 2.769,
"eval_wer": 0.4921594034271208,
"step": 92000
},
{
"epoch": 3.5397214143578752,
"grad_norm": 0.649442732334137,
"learning_rate": 0.00016170977443609021,
"loss": 0.491,
"step": 92500
},
{
"epoch": 3.558855043624675,
"grad_norm": 0.7617647051811218,
"learning_rate": 0.00016095939849624058,
"loss": 0.4765,
"step": 93000
},
{
"epoch": 3.558855043624675,
"eval_loss": Infinity,
"eval_runtime": 317.7633,
"eval_samples_per_second": 22.098,
"eval_steps_per_second": 2.763,
"eval_wer": 0.48691030251745293,
"step": 93000
},
{
"epoch": 3.577988672891474,
"grad_norm": 0.5942517518997192,
"learning_rate": 0.00016020751879699247,
"loss": 0.4805,
"step": 93500
},
{
"epoch": 3.597122302158273,
"grad_norm": 0.9230866432189941,
"learning_rate": 0.00015945563909774434,
"loss": 0.4967,
"step": 94000
},
{
"epoch": 3.597122302158273,
"eval_loss": Infinity,
"eval_runtime": 317.3012,
"eval_samples_per_second": 22.13,
"eval_steps_per_second": 2.767,
"eval_wer": 0.49814893166913476,
"step": 94000
},
{
"epoch": 3.616255931425073,
"grad_norm": 0.601637065410614,
"learning_rate": 0.00015870375939849624,
"loss": 0.5055,
"step": 94500
},
{
"epoch": 3.635389560691872,
"grad_norm": 0.6463965773582458,
"learning_rate": 0.0001579518796992481,
"loss": 0.4941,
"step": 95000
},
{
"epoch": 3.635389560691872,
"eval_loss": Infinity,
"eval_runtime": 318.5729,
"eval_samples_per_second": 22.042,
"eval_steps_per_second": 2.756,
"eval_wer": 0.49616564417177916,
"step": 95000
},
{
"epoch": 3.654523189958671,
"grad_norm": 0.8088521957397461,
"learning_rate": 0.0001572,
"loss": 0.4851,
"step": 95500
},
{
"epoch": 3.673656819225471,
"grad_norm": 1.0758212804794312,
"learning_rate": 0.00015644812030075187,
"loss": 0.4808,
"step": 96000
},
{
"epoch": 3.673656819225471,
"eval_loss": Infinity,
"eval_runtime": 317.3887,
"eval_samples_per_second": 22.124,
"eval_steps_per_second": 2.766,
"eval_wer": 0.4856277766024963,
"step": 96000
},
{
"epoch": 3.69279044849227,
"grad_norm": 2.3342583179473877,
"learning_rate": 0.00015569624060150376,
"loss": 0.4682,
"step": 96500
},
{
"epoch": 3.711924077759069,
"grad_norm": 0.9281033873558044,
"learning_rate": 0.0001549443609022556,
"loss": 0.4838,
"step": 97000
},
{
"epoch": 3.711924077759069,
"eval_loss": Infinity,
"eval_runtime": 317.8326,
"eval_samples_per_second": 22.093,
"eval_steps_per_second": 2.762,
"eval_wer": 0.47491802411677597,
"step": 97000
},
{
"epoch": 3.731057707025869,
"grad_norm": 0.6656193733215332,
"learning_rate": 0.00015419248120300753,
"loss": 0.4893,
"step": 97500
},
{
"epoch": 3.750191336292668,
"grad_norm": 0.8286083936691284,
"learning_rate": 0.0001534421052631579,
"loss": 0.4644,
"step": 98000
},
{
"epoch": 3.750191336292668,
"eval_loss": Infinity,
"eval_runtime": 317.0552,
"eval_samples_per_second": 22.148,
"eval_steps_per_second": 2.769,
"eval_wer": 0.4738073831182568,
"step": 98000
},
{
"epoch": 3.769324965559467,
"grad_norm": 0.7517048716545105,
"learning_rate": 0.00015269022556390978,
"loss": 0.4828,
"step": 98500
},
{
"epoch": 3.788458594826267,
"grad_norm": 1.126383662223816,
"learning_rate": 0.00015193834586466163,
"loss": 0.4818,
"step": 99000
},
{
"epoch": 3.788458594826267,
"eval_loss": Infinity,
"eval_runtime": 317.5174,
"eval_samples_per_second": 22.115,
"eval_steps_per_second": 2.765,
"eval_wer": 0.47370160778506454,
"step": 99000
},
{
"epoch": 3.807592224093066,
"grad_norm": 1.257995843887329,
"learning_rate": 0.00015118646616541352,
"loss": 0.485,
"step": 99500
},
{
"epoch": 3.826725853359865,
"grad_norm": 1.0233116149902344,
"learning_rate": 0.0001504345864661654,
"loss": 0.4741,
"step": 100000
},
{
"epoch": 3.826725853359865,
"eval_loss": Infinity,
"eval_runtime": 317.1035,
"eval_samples_per_second": 22.144,
"eval_steps_per_second": 2.769,
"eval_wer": 0.48123810027501585,
"step": 100000
},
{
"epoch": 3.8458594826266648,
"grad_norm": 1.8550606966018677,
"learning_rate": 0.00014968270676691728,
"loss": 0.4763,
"step": 100500
},
{
"epoch": 3.864993111893464,
"grad_norm": 0.8199677467346191,
"learning_rate": 0.00014893082706766915,
"loss": 0.4734,
"step": 101000
},
{
"epoch": 3.864993111893464,
"eval_loss": Infinity,
"eval_runtime": 317.4217,
"eval_samples_per_second": 22.122,
"eval_steps_per_second": 2.766,
"eval_wer": 0.4772186376137085,
"step": 101000
},
{
"epoch": 3.884126741160263,
"grad_norm": 0.5380846261978149,
"learning_rate": 0.00014817894736842105,
"loss": 0.4602,
"step": 101500
},
{
"epoch": 3.9032603704270628,
"grad_norm": 1.026496410369873,
"learning_rate": 0.00014742706766917291,
"loss": 0.4733,
"step": 102000
},
{
"epoch": 3.9032603704270628,
"eval_loss": Infinity,
"eval_runtime": 317.6547,
"eval_samples_per_second": 22.106,
"eval_steps_per_second": 2.764,
"eval_wer": 0.47355616670192513,
"step": 102000
},
{
"epoch": 3.922393999693862,
"grad_norm": 1.4769624471664429,
"learning_rate": 0.0001466766917293233,
"loss": 0.477,
"step": 102500
},
{
"epoch": 3.941527628960661,
"grad_norm": 1.2881931066513062,
"learning_rate": 0.00014592481203007517,
"loss": 0.4937,
"step": 103000
},
{
"epoch": 3.941527628960661,
"eval_loss": Infinity,
"eval_runtime": 317.1763,
"eval_samples_per_second": 22.139,
"eval_steps_per_second": 2.768,
"eval_wer": 0.4694573725407235,
"step": 103000
},
{
"epoch": 3.9606612582274607,
"grad_norm": 0.6641072630882263,
"learning_rate": 0.00014517443609022554,
"loss": 0.4728,
"step": 103500
},
{
"epoch": 3.97979488749426,
"grad_norm": 0.7772675156593323,
"learning_rate": 0.00014442255639097743,
"loss": 0.4864,
"step": 104000
},
{
"epoch": 3.97979488749426,
"eval_loss": Infinity,
"eval_runtime": 316.8284,
"eval_samples_per_second": 22.163,
"eval_steps_per_second": 2.771,
"eval_wer": 0.47485191453353076,
"step": 104000
},
{
"epoch": 3.998928516761059,
"grad_norm": 0.4977366626262665,
"learning_rate": 0.0001436706766917293,
"loss": 0.4671,
"step": 104500
},
{
"epoch": 4.018062146027859,
"grad_norm": 1.0952422618865967,
"learning_rate": 0.00014291879699248117,
"loss": 0.4126,
"step": 105000
},
{
"epoch": 4.018062146027859,
"eval_loss": Infinity,
"eval_runtime": 318.0988,
"eval_samples_per_second": 22.075,
"eval_steps_per_second": 2.76,
"eval_wer": 0.4635207319653057,
"step": 105000
},
{
"epoch": 4.0371957752946575,
"grad_norm": 0.694837212562561,
"learning_rate": 0.00014216691729323306,
"loss": 0.4202,
"step": 105500
},
{
"epoch": 4.056329404561457,
"grad_norm": 0.7097035646438599,
"learning_rate": 0.00014141503759398493,
"loss": 0.4228,
"step": 106000
},
{
"epoch": 4.056329404561457,
"eval_loss": Infinity,
"eval_runtime": 315.7201,
"eval_samples_per_second": 22.241,
"eval_steps_per_second": 2.781,
"eval_wer": 0.47007880262322826,
"step": 106000
},
{
"epoch": 4.075463033828257,
"grad_norm": 1.1219637393951416,
"learning_rate": 0.00014066315789473683,
"loss": 0.4349,
"step": 106500
},
{
"epoch": 4.094596663095055,
"grad_norm": 0.5968381762504578,
"learning_rate": 0.0001399127819548872,
"loss": 0.4098,
"step": 107000
},
{
"epoch": 4.094596663095055,
"eval_loss": Infinity,
"eval_runtime": 316.2686,
"eval_samples_per_second": 22.203,
"eval_steps_per_second": 2.776,
"eval_wer": 0.45890628305479164,
"step": 107000
},
{
"epoch": 4.113730292361855,
"grad_norm": 0.5920900106430054,
"learning_rate": 0.0001391609022556391,
"loss": 0.4182,
"step": 107500
},
{
"epoch": 4.132863921628655,
"grad_norm": 0.45823031663894653,
"learning_rate": 0.00013840902255639095,
"loss": 0.4193,
"step": 108000
},
{
"epoch": 4.132863921628655,
"eval_loss": Infinity,
"eval_runtime": 316.9696,
"eval_samples_per_second": 22.154,
"eval_steps_per_second": 2.77,
"eval_wer": 0.46152422255130104,
"step": 108000
},
{
"epoch": 4.151997550895453,
"grad_norm": 0.6653383374214172,
"learning_rate": 0.00013765714285714285,
"loss": 0.4087,
"step": 108500
},
{
"epoch": 4.171131180162253,
"grad_norm": 0.5999200940132141,
"learning_rate": 0.00013690526315789472,
"loss": 0.4083,
"step": 109000
},
{
"epoch": 4.171131180162253,
"eval_loss": Infinity,
"eval_runtime": 318.122,
"eval_samples_per_second": 22.073,
"eval_steps_per_second": 2.76,
"eval_wer": 0.46397027713137295,
"step": 109000
},
{
"epoch": 4.190264809429053,
"grad_norm": 0.5787246823310852,
"learning_rate": 0.0001361533834586466,
"loss": 0.4075,
"step": 109500
},
{
"epoch": 4.209398438695851,
"grad_norm": 2.8409461975097656,
"learning_rate": 0.00013540150375939848,
"loss": 0.406,
"step": 110000
},
{
"epoch": 4.209398438695851,
"eval_loss": Infinity,
"eval_runtime": 316.8914,
"eval_samples_per_second": 22.159,
"eval_steps_per_second": 2.771,
"eval_wer": 0.46135233763486355,
"step": 110000
},
{
"epoch": 4.228532067962651,
"grad_norm": 0.8015612959861755,
"learning_rate": 0.00013464962406015038,
"loss": 0.4167,
"step": 110500
},
{
"epoch": 4.247665697229451,
"grad_norm": 3.532646417617798,
"learning_rate": 0.00013389774436090224,
"loss": 0.4125,
"step": 111000
},
{
"epoch": 4.247665697229451,
"eval_loss": Infinity,
"eval_runtime": 316.906,
"eval_samples_per_second": 22.158,
"eval_steps_per_second": 2.771,
"eval_wer": 0.4608366828855511,
"step": 111000
},
{
"epoch": 4.266799326496249,
"grad_norm": 0.7524324059486389,
"learning_rate": 0.00013314736842105264,
"loss": 0.4205,
"step": 111500
},
{
"epoch": 4.285932955763049,
"grad_norm": 2.5353856086730957,
"learning_rate": 0.0001323954887218045,
"loss": 0.4104,
"step": 112000
},
{
"epoch": 4.285932955763049,
"eval_loss": Infinity,
"eval_runtime": 318.4796,
"eval_samples_per_second": 22.049,
"eval_steps_per_second": 2.757,
"eval_wer": 0.44868574148508567,
"step": 112000
},
{
"epoch": 4.305066585029849,
"grad_norm": 0.9442459940910339,
"learning_rate": 0.00013164360902255637,
"loss": 0.4114,
"step": 112500
},
{
"epoch": 4.324200214296647,
"grad_norm": 1.0531048774719238,
"learning_rate": 0.00013089172932330827,
"loss": 0.3988,
"step": 113000
},
{
"epoch": 4.324200214296647,
"eval_loss": Infinity,
"eval_runtime": 317.0914,
"eval_samples_per_second": 22.145,
"eval_steps_per_second": 2.769,
"eval_wer": 0.45987148297017133,
"step": 113000
},
{
"epoch": 4.343333843563447,
"grad_norm": 0.7277682423591614,
"learning_rate": 0.00013013984962406013,
"loss": 0.4164,
"step": 113500
},
{
"epoch": 4.362467472830247,
"grad_norm": 0.48210740089416504,
"learning_rate": 0.00012938796992481203,
"loss": 0.4034,
"step": 114000
},
{
"epoch": 4.362467472830247,
"eval_loss": Infinity,
"eval_runtime": 318.084,
"eval_samples_per_second": 22.076,
"eval_steps_per_second": 2.76,
"eval_wer": 0.45389517664480644,
"step": 114000
},
{
"epoch": 4.381601102097045,
"grad_norm": 1.0025782585144043,
"learning_rate": 0.0001286375939849624,
"loss": 0.423,
"step": 114500
},
{
"epoch": 4.400734731363845,
"grad_norm": 0.5586313009262085,
"learning_rate": 0.00012788571428571426,
"loss": 0.4023,
"step": 115000
},
{
"epoch": 4.400734731363845,
"eval_loss": Infinity,
"eval_runtime": 318.3491,
"eval_samples_per_second": 22.058,
"eval_steps_per_second": 2.758,
"eval_wer": 0.4479585360693886,
"step": 115000
},
{
"epoch": 4.419868360630645,
"grad_norm": 0.6510444283485413,
"learning_rate": 0.00012713383458646616,
"loss": 0.4041,
"step": 115500
},
{
"epoch": 4.439001989897443,
"grad_norm": 0.6380518674850464,
"learning_rate": 0.00012638195488721802,
"loss": 0.4026,
"step": 116000
},
{
"epoch": 4.439001989897443,
"eval_loss": Infinity,
"eval_runtime": 318.4508,
"eval_samples_per_second": 22.051,
"eval_steps_per_second": 2.757,
"eval_wer": 0.45242754389676326,
"step": 116000
},
{
"epoch": 4.458135619164243,
"grad_norm": 0.7297781109809875,
"learning_rate": 0.00012563157894736842,
"loss": 0.4124,
"step": 116500
},
{
"epoch": 4.477269248431043,
"grad_norm": 0.9323301911354065,
"learning_rate": 0.00012487969924812028,
"loss": 0.4182,
"step": 117000
},
{
"epoch": 4.477269248431043,
"eval_loss": Infinity,
"eval_runtime": 316.9248,
"eval_samples_per_second": 22.157,
"eval_steps_per_second": 2.77,
"eval_wer": 0.44729744023693674,
"step": 117000
},
{
"epoch": 4.496402877697841,
"grad_norm": 0.7702882289886475,
"learning_rate": 0.00012412781954887218,
"loss": 0.4137,
"step": 117500
},
{
"epoch": 4.515536506964641,
"grad_norm": 0.45166295766830444,
"learning_rate": 0.00012337593984962405,
"loss": 0.4046,
"step": 118000
},
{
"epoch": 4.515536506964641,
"eval_loss": Infinity,
"eval_runtime": 313.8786,
"eval_samples_per_second": 22.372,
"eval_steps_per_second": 2.797,
"eval_wer": 0.44563147873915804,
"step": 118000
},
{
"epoch": 4.5346701362314406,
"grad_norm": 0.8825483322143555,
"learning_rate": 0.00012262406015037594,
"loss": 0.4001,
"step": 118500
},
{
"epoch": 4.553803765498239,
"grad_norm": 0.5982456207275391,
"learning_rate": 0.00012187218045112781,
"loss": 0.4126,
"step": 119000
},
{
"epoch": 4.553803765498239,
"eval_loss": Infinity,
"eval_runtime": 313.2679,
"eval_samples_per_second": 22.415,
"eval_steps_per_second": 2.803,
"eval_wer": 0.4406071504125238,
"step": 119000
},
{
"epoch": 4.572937394765039,
"grad_norm": 0.49651646614074707,
"learning_rate": 0.00012112030075187969,
"loss": 0.3899,
"step": 119500
},
{
"epoch": 4.5920710240318385,
"grad_norm": 0.6609870195388794,
"learning_rate": 0.00012036842105263157,
"loss": 0.4144,
"step": 120000
},
{
"epoch": 4.5920710240318385,
"eval_loss": Infinity,
"eval_runtime": 313.9675,
"eval_samples_per_second": 22.365,
"eval_steps_per_second": 2.796,
"eval_wer": 0.44486460757351387,
"step": 120000
},
{
"epoch": 4.611204653298637,
"grad_norm": 0.8339817523956299,
"learning_rate": 0.00011961804511278195,
"loss": 0.4067,
"step": 120500
},
{
"epoch": 4.630338282565437,
"grad_norm": 0.683476984500885,
"learning_rate": 0.00011886766917293232,
"loss": 0.4074,
"step": 121000
},
{
"epoch": 4.630338282565437,
"eval_loss": Infinity,
"eval_runtime": 314.2537,
"eval_samples_per_second": 22.345,
"eval_steps_per_second": 2.794,
"eval_wer": 0.44750899090332136,
"step": 121000
},
{
"epoch": 4.6494719118322365,
"grad_norm": 0.6157557964324951,
"learning_rate": 0.0001181172932330827,
"loss": 0.3947,
"step": 121500
},
{
"epoch": 4.668605541099035,
"grad_norm": 0.7728904485702515,
"learning_rate": 0.00011736541353383457,
"loss": 0.3922,
"step": 122000
},
{
"epoch": 4.668605541099035,
"eval_loss": Infinity,
"eval_runtime": 312.5029,
"eval_samples_per_second": 22.47,
"eval_steps_per_second": 2.81,
"eval_wer": 0.4387693039983076,
"step": 122000
},
{
"epoch": 4.687739170365835,
"grad_norm": 0.6580181121826172,
"learning_rate": 0.00011661353383458646,
"loss": 0.3969,
"step": 122500
},
{
"epoch": 4.7068727996326345,
"grad_norm": 3.9013619422912598,
"learning_rate": 0.00011586165413533834,
"loss": 0.3866,
"step": 123000
},
{
"epoch": 4.7068727996326345,
"eval_loss": Infinity,
"eval_runtime": 312.7382,
"eval_samples_per_second": 22.453,
"eval_steps_per_second": 2.807,
"eval_wer": 0.44741643748677806,
"step": 123000
},
{
"epoch": 4.726006428899433,
"grad_norm": 0.7487606406211853,
"learning_rate": 0.00011510977443609022,
"loss": 0.4057,
"step": 123500
},
{
"epoch": 4.745140058166233,
"grad_norm": 0.9207865595817566,
"learning_rate": 0.0001143578947368421,
"loss": 0.3873,
"step": 124000
},
{
"epoch": 4.745140058166233,
"eval_loss": Infinity,
"eval_runtime": 312.9132,
"eval_samples_per_second": 22.441,
"eval_steps_per_second": 2.806,
"eval_wer": 0.4344986249206685,
"step": 124000
},
{
"epoch": 4.7642736874330325,
"grad_norm": 0.6548067331314087,
"learning_rate": 0.00011360601503759398,
"loss": 0.4058,
"step": 124500
},
{
"epoch": 4.783407316699831,
"grad_norm": 0.97373366355896,
"learning_rate": 0.00011285413533834586,
"loss": 0.3917,
"step": 125000
},
{
"epoch": 4.783407316699831,
"eval_loss": Infinity,
"eval_runtime": 313.7438,
"eval_samples_per_second": 22.381,
"eval_steps_per_second": 2.798,
"eval_wer": 0.4338243071715676,
"step": 125000
},
{
"epoch": 4.802540945966631,
"grad_norm": 0.7585910558700562,
"learning_rate": 0.00011210375939849623,
"loss": 0.4095,
"step": 125500
},
{
"epoch": 4.8216745752334305,
"grad_norm": 1.3345550298690796,
"learning_rate": 0.0001113533834586466,
"loss": 0.3864,
"step": 126000
},
{
"epoch": 4.8216745752334305,
"eval_loss": Infinity,
"eval_runtime": 312.643,
"eval_samples_per_second": 22.46,
"eval_steps_per_second": 2.808,
"eval_wer": 0.4350803892532262,
"step": 126000
},
{
"epoch": 4.840808204500229,
"grad_norm": 0.6124061346054077,
"learning_rate": 0.00011060150375939849,
"loss": 0.3909,
"step": 126500
},
{
"epoch": 4.859941833767029,
"grad_norm": 0.7190678119659424,
"learning_rate": 0.00010984962406015037,
"loss": 0.3826,
"step": 127000
},
{
"epoch": 4.859941833767029,
"eval_loss": Infinity,
"eval_runtime": 312.9649,
"eval_samples_per_second": 22.437,
"eval_steps_per_second": 2.805,
"eval_wer": 0.430783266342289,
"step": 127000
},
{
"epoch": 4.879075463033828,
"grad_norm": 0.9486848711967468,
"learning_rate": 0.00010909774436090225,
"loss": 0.3974,
"step": 127500
},
{
"epoch": 4.898209092300627,
"grad_norm": 3.170478343963623,
"learning_rate": 0.00010834736842105263,
"loss": 0.391,
"step": 128000
},
{
"epoch": 4.898209092300627,
"eval_loss": Infinity,
"eval_runtime": 313.5327,
"eval_samples_per_second": 22.396,
"eval_steps_per_second": 2.8,
"eval_wer": 0.431497249841337,
"step": 128000
},
{
"epoch": 4.917342721567427,
"grad_norm": 0.462166428565979,
"learning_rate": 0.000107596992481203,
"loss": 0.3817,
"step": 128500
},
{
"epoch": 4.936476350834226,
"grad_norm": 1.1014796495437622,
"learning_rate": 0.00010684511278195487,
"loss": 0.394,
"step": 129000
},
{
"epoch": 4.936476350834226,
"eval_loss": Infinity,
"eval_runtime": 313.0162,
"eval_samples_per_second": 22.433,
"eval_steps_per_second": 2.805,
"eval_wer": 0.42787444467950075,
"step": 129000
},
{
"epoch": 4.955609980101025,
"grad_norm": 0.624528706073761,
"learning_rate": 0.00010609323308270676,
"loss": 0.3821,
"step": 129500
},
{
"epoch": 4.974743609367825,
"grad_norm": 0.5960122346878052,
"learning_rate": 0.00010534135338345864,
"loss": 0.3957,
"step": 130000
},
{
"epoch": 4.974743609367825,
"eval_loss": Infinity,
"eval_runtime": 313.106,
"eval_samples_per_second": 22.427,
"eval_steps_per_second": 2.804,
"eval_wer": 0.4235112121853184,
"step": 130000
},
{
"epoch": 4.993877238634624,
"grad_norm": 0.7740840911865234,
"learning_rate": 0.0001045894736842105,
"loss": 0.4012,
"step": 130500
},
{
"epoch": 5.013010867901423,
"grad_norm": 0.39604371786117554,
"learning_rate": 0.00010383759398496239,
"loss": 0.3515,
"step": 131000
},
{
"epoch": 5.013010867901423,
"eval_loss": Infinity,
"eval_runtime": 312.7575,
"eval_samples_per_second": 22.452,
"eval_steps_per_second": 2.807,
"eval_wer": 0.42159403427120795,
"step": 131000
},
{
"epoch": 5.032144497168223,
"grad_norm": 0.582255482673645,
"learning_rate": 0.00010308571428571427,
"loss": 0.332,
"step": 131500
},
{
"epoch": 5.051278126435022,
"grad_norm": 1.186954379081726,
"learning_rate": 0.00010233383458646615,
"loss": 0.3389,
"step": 132000
},
{
"epoch": 5.051278126435022,
"eval_loss": Infinity,
"eval_runtime": 312.5025,
"eval_samples_per_second": 22.47,
"eval_steps_per_second": 2.81,
"eval_wer": 0.42548127776602496,
"step": 132000
},
{
"epoch": 5.070411755701821,
"grad_norm": 0.7782790064811707,
"learning_rate": 0.00010158195488721803,
"loss": 0.3352,
"step": 132500
},
{
"epoch": 5.089545384968621,
"grad_norm": 0.5987495183944702,
"learning_rate": 0.00010083157894736841,
"loss": 0.333,
"step": 133000
},
{
"epoch": 5.089545384968621,
"eval_loss": Infinity,
"eval_runtime": 514.1295,
"eval_samples_per_second": 13.658,
"eval_steps_per_second": 1.708,
"eval_wer": 0.42526972709964034,
"step": 133000
},
{
"epoch": 5.10867901423542,
"grad_norm": 0.714470624923706,
"learning_rate": 0.0001000812030075188,
"loss": 0.3334,
"step": 133500
},
{
"epoch": 5.127812643502219,
"grad_norm": 0.554375171661377,
"learning_rate": 9.932932330827067e-05,
"loss": 0.3313,
"step": 134000
},
{
"epoch": 5.127812643502219,
"eval_loss": Infinity,
"eval_runtime": 313.7907,
"eval_samples_per_second": 22.378,
"eval_steps_per_second": 2.798,
"eval_wer": 0.4178390099428813,
"step": 134000
},
{
"epoch": 5.146946272769019,
"grad_norm": 0.8462457060813904,
"learning_rate": 9.857744360902255e-05,
"loss": 0.3391,
"step": 134500
},
{
"epoch": 5.166079902035818,
"grad_norm": 0.5063708424568176,
"learning_rate": 9.782556390977443e-05,
"loss": 0.3351,
"step": 135000
},
{
"epoch": 5.166079902035818,
"eval_loss": Infinity,
"eval_runtime": 314.6864,
"eval_samples_per_second": 22.314,
"eval_steps_per_second": 2.79,
"eval_wer": 0.422321239686905,
"step": 135000
},
{
"epoch": 5.185213531302617,
"grad_norm": 0.5310961008071899,
"learning_rate": 9.707368421052631e-05,
"loss": 0.3435,
"step": 135500
},
{
"epoch": 5.204347160569417,
"grad_norm": 0.7004749774932861,
"learning_rate": 9.63218045112782e-05,
"loss": 0.3262,
"step": 136000
},
{
"epoch": 5.204347160569417,
"eval_loss": Infinity,
"eval_runtime": 314.217,
"eval_samples_per_second": 22.348,
"eval_steps_per_second": 2.794,
"eval_wer": 0.416318489528242,
"step": 136000
},
{
"epoch": 5.223480789836216,
"grad_norm": 0.5730725526809692,
"learning_rate": 9.556992481203008e-05,
"loss": 0.333,
"step": 136500
},
{
"epoch": 5.242614419103015,
"grad_norm": 1.6563917398452759,
"learning_rate": 9.481804511278196e-05,
"loss": 0.3333,
"step": 137000
},
{
"epoch": 5.242614419103015,
"eval_loss": Infinity,
"eval_runtime": 315.3953,
"eval_samples_per_second": 22.264,
"eval_steps_per_second": 2.784,
"eval_wer": 0.42156759043790987,
"step": 137000
},
{
"epoch": 5.261748048369815,
"grad_norm": 0.5844420790672302,
"learning_rate": 9.406616541353384e-05,
"loss": 0.3369,
"step": 137500
},
{
"epoch": 5.280881677636614,
"grad_norm": 0.6321229934692383,
"learning_rate": 9.331428571428571e-05,
"loss": 0.3229,
"step": 138000
},
{
"epoch": 5.280881677636614,
"eval_loss": Infinity,
"eval_runtime": 315.3276,
"eval_samples_per_second": 22.269,
"eval_steps_per_second": 2.784,
"eval_wer": 0.41326422678231434,
"step": 138000
},
{
"epoch": 5.300015306903413,
"grad_norm": 0.6538165807723999,
"learning_rate": 9.256240601503759e-05,
"loss": 0.3382,
"step": 138500
},
{
"epoch": 5.319148936170213,
"grad_norm": 0.7837240099906921,
"learning_rate": 9.181052631578947e-05,
"loss": 0.3345,
"step": 139000
},
{
"epoch": 5.319148936170213,
"eval_loss": Infinity,
"eval_runtime": 316.2249,
"eval_samples_per_second": 22.206,
"eval_steps_per_second": 2.777,
"eval_wer": 0.4136079966151893,
"step": 139000
},
{
"epoch": 5.338282565437012,
"grad_norm": 0.8858345150947571,
"learning_rate": 9.106015037593984e-05,
"loss": 0.3293,
"step": 139500
},
{
"epoch": 5.357416194703811,
"grad_norm": 1.3112056255340576,
"learning_rate": 9.030827067669172e-05,
"loss": 0.3365,
"step": 140000
},
{
"epoch": 5.357416194703811,
"eval_loss": Infinity,
"eval_runtime": 316.1322,
"eval_samples_per_second": 22.212,
"eval_steps_per_second": 2.777,
"eval_wer": 0.4193330865242226,
"step": 140000
},
{
"epoch": 5.376549823970611,
"grad_norm": 0.720756471157074,
"learning_rate": 8.95563909774436e-05,
"loss": 0.3318,
"step": 140500
},
{
"epoch": 5.39568345323741,
"grad_norm": 0.46977701783180237,
"learning_rate": 8.880451127819548e-05,
"loss": 0.3165,
"step": 141000
},
{
"epoch": 5.39568345323741,
"eval_loss": Infinity,
"eval_runtime": 314.295,
"eval_samples_per_second": 22.342,
"eval_steps_per_second": 2.794,
"eval_wer": 0.4112148297017136,
"step": 141000
},
{
"epoch": 5.414817082504209,
"grad_norm": 0.7260543704032898,
"learning_rate": 8.805263157894736e-05,
"loss": 0.337,
"step": 141500
},
{
"epoch": 5.433950711771009,
"grad_norm": 1.3891347646713257,
"learning_rate": 8.730075187969924e-05,
"loss": 0.3224,
"step": 142000
},
{
"epoch": 5.433950711771009,
"eval_loss": Infinity,
"eval_runtime": 315.0255,
"eval_samples_per_second": 22.29,
"eval_steps_per_second": 2.787,
"eval_wer": 0.40745980537338694,
"step": 142000
},
{
"epoch": 5.453084341037808,
"grad_norm": 0.4979989528656006,
"learning_rate": 8.654887218045112e-05,
"loss": 0.3341,
"step": 142500
},
{
"epoch": 5.472217970304607,
"grad_norm": 0.9820772409439087,
"learning_rate": 8.579699248120299e-05,
"loss": 0.335,
"step": 143000
},
{
"epoch": 5.472217970304607,
"eval_loss": Infinity,
"eval_runtime": 314.1397,
"eval_samples_per_second": 22.353,
"eval_steps_per_second": 2.795,
"eval_wer": 0.4112941612016078,
"step": 143000
},
{
"epoch": 5.491351599571407,
"grad_norm": 1.6596304178237915,
"learning_rate": 8.504511278195487e-05,
"loss": 0.3243,
"step": 143500
},
{
"epoch": 5.510485228838206,
"grad_norm": 0.687848687171936,
"learning_rate": 8.429323308270675e-05,
"loss": 0.3377,
"step": 144000
},
{
"epoch": 5.510485228838206,
"eval_loss": Infinity,
"eval_runtime": 315.8491,
"eval_samples_per_second": 22.232,
"eval_steps_per_second": 2.78,
"eval_wer": 0.41762745927649675,
"step": 144000
},
{
"epoch": 5.529618858105005,
"grad_norm": 1.0538053512573242,
"learning_rate": 8.354135338345864e-05,
"loss": 0.3195,
"step": 144500
},
{
"epoch": 5.548752487371805,
"grad_norm": 0.6369953751564026,
"learning_rate": 8.278947368421052e-05,
"loss": 0.3411,
"step": 145000
},
{
"epoch": 5.548752487371805,
"eval_loss": Infinity,
"eval_runtime": 316.0558,
"eval_samples_per_second": 22.218,
"eval_steps_per_second": 2.778,
"eval_wer": 0.4091125449545166,
"step": 145000
},
{
"epoch": 5.567886116638604,
"grad_norm": 1.193766474723816,
"learning_rate": 8.20390977443609e-05,
"loss": 0.3385,
"step": 145500
},
{
"epoch": 5.587019745905403,
"grad_norm": 0.5476765632629395,
"learning_rate": 8.128721804511278e-05,
"loss": 0.3247,
"step": 146000
},
{
"epoch": 5.587019745905403,
"eval_loss": Infinity,
"eval_runtime": 315.7439,
"eval_samples_per_second": 22.24,
"eval_steps_per_second": 2.781,
"eval_wer": 0.4096414216204781,
"step": 146000
},
{
"epoch": 5.606153375172203,
"grad_norm": 0.9929884076118469,
"learning_rate": 8.053533834586466e-05,
"loss": 0.3305,
"step": 146500
},
{
"epoch": 5.625287004439002,
"grad_norm": 0.3942908048629761,
"learning_rate": 7.978345864661654e-05,
"loss": 0.3304,
"step": 147000
},
{
"epoch": 5.625287004439002,
"eval_loss": Infinity,
"eval_runtime": 314.4131,
"eval_samples_per_second": 22.334,
"eval_steps_per_second": 2.793,
"eval_wer": 0.40843822720541567,
"step": 147000
},
{
"epoch": 5.644420633705801,
"grad_norm": 0.7728341221809387,
"learning_rate": 7.903157894736842e-05,
"loss": 0.3301,
"step": 147500
},
{
"epoch": 5.663554262972601,
"grad_norm": 1.4606784582138062,
"learning_rate": 7.82796992481203e-05,
"loss": 0.3267,
"step": 148000
},
{
"epoch": 5.663554262972601,
"eval_loss": Infinity,
"eval_runtime": 314.4256,
"eval_samples_per_second": 22.333,
"eval_steps_per_second": 2.792,
"eval_wer": 0.40423365771102177,
"step": 148000
},
{
"epoch": 5.6826878922394,
"grad_norm": 0.6086077690124512,
"learning_rate": 7.752781954887217e-05,
"loss": 0.3195,
"step": 148500
},
{
"epoch": 5.701821521506199,
"grad_norm": 0.5943909883499146,
"learning_rate": 7.677593984962405e-05,
"loss": 0.3193,
"step": 149000
},
{
"epoch": 5.701821521506199,
"eval_loss": Infinity,
"eval_runtime": 314.9231,
"eval_samples_per_second": 22.298,
"eval_steps_per_second": 2.788,
"eval_wer": 0.4026470277131373,
"step": 149000
},
{
"epoch": 5.7209551507729985,
"grad_norm": 0.5374177098274231,
"learning_rate": 7.602556390977442e-05,
"loss": 0.3374,
"step": 149500
},
{
"epoch": 5.740088780039798,
"grad_norm": 0.675542950630188,
"learning_rate": 7.52736842105263e-05,
"loss": 0.3155,
"step": 150000
},
{
"epoch": 5.740088780039798,
"eval_loss": Infinity,
"eval_runtime": 316.593,
"eval_samples_per_second": 22.18,
"eval_steps_per_second": 2.773,
"eval_wer": 0.40481542204357945,
"step": 150000
},
{
"epoch": 5.759222409306597,
"grad_norm": 1.1648385524749756,
"learning_rate": 7.45218045112782e-05,
"loss": 0.3247,
"step": 150500
},
{
"epoch": 5.7783560385733965,
"grad_norm": 0.6115811467170715,
"learning_rate": 7.376992481203008e-05,
"loss": 0.3238,
"step": 151000
},
{
"epoch": 5.7783560385733965,
"eval_loss": Infinity,
"eval_runtime": 318.3837,
"eval_samples_per_second": 22.055,
"eval_steps_per_second": 2.758,
"eval_wer": 0.40333456737888723,
"step": 151000
},
{
"epoch": 5.797489667840196,
"grad_norm": 1.0145585536956787,
"learning_rate": 7.301804511278196e-05,
"loss": 0.3175,
"step": 151500
},
{
"epoch": 5.816623297106995,
"grad_norm": 0.7065938115119934,
"learning_rate": 7.226766917293232e-05,
"loss": 0.3172,
"step": 152000
},
{
"epoch": 5.816623297106995,
"eval_loss": Infinity,
"eval_runtime": 318.6878,
"eval_samples_per_second": 22.034,
"eval_steps_per_second": 2.755,
"eval_wer": 0.40486830971017557,
"step": 152000
},
{
"epoch": 5.8357569263737945,
"grad_norm": 0.7309425473213196,
"learning_rate": 7.15172932330827e-05,
"loss": 0.3229,
"step": 152500
},
{
"epoch": 5.854890555640594,
"grad_norm": 1.0197374820709229,
"learning_rate": 7.076541353383458e-05,
"loss": 0.3148,
"step": 153000
},
{
"epoch": 5.854890555640594,
"eval_loss": Infinity,
"eval_runtime": 323.1191,
"eval_samples_per_second": 21.732,
"eval_steps_per_second": 2.717,
"eval_wer": 0.3989052253014597,
"step": 153000
},
{
"epoch": 5.874024184907393,
"grad_norm": 0.7352388501167297,
"learning_rate": 7.001503759398496e-05,
"loss": 0.313,
"step": 153500
},
{
"epoch": 5.8931578141741925,
"grad_norm": 0.6405870914459229,
"learning_rate": 6.926315789473684e-05,
"loss": 0.3217,
"step": 154000
},
{
"epoch": 5.8931578141741925,
"eval_loss": Infinity,
"eval_runtime": 319.2909,
"eval_samples_per_second": 21.992,
"eval_steps_per_second": 2.75,
"eval_wer": 0.39782102813623865,
"step": 154000
},
{
"epoch": 5.912291443440992,
"grad_norm": 0.711800754070282,
"learning_rate": 6.851127819548872e-05,
"loss": 0.3206,
"step": 154500
},
{
"epoch": 5.931425072707791,
"grad_norm": 0.41337111592292786,
"learning_rate": 6.77593984962406e-05,
"loss": 0.3145,
"step": 155000
},
{
"epoch": 5.931425072707791,
"eval_loss": Infinity,
"eval_runtime": 318.549,
"eval_samples_per_second": 22.044,
"eval_steps_per_second": 2.756,
"eval_wer": 0.39298180664269095,
"step": 155000
},
{
"epoch": 5.9505587019745905,
"grad_norm": 0.3790297210216522,
"learning_rate": 6.700751879699248e-05,
"loss": 0.3185,
"step": 155500
},
{
"epoch": 5.96969233124139,
"grad_norm": 0.7092337012290955,
"learning_rate": 6.625563909774435e-05,
"loss": 0.3178,
"step": 156000
},
{
"epoch": 5.96969233124139,
"eval_loss": Infinity,
"eval_runtime": 318.815,
"eval_samples_per_second": 22.025,
"eval_steps_per_second": 2.754,
"eval_wer": 0.39946054580071927,
"step": 156000
},
{
"epoch": 5.988825960508189,
"grad_norm": 1.5943549871444702,
"learning_rate": 6.550375939849623e-05,
"loss": 0.3154,
"step": 156500
},
{
"epoch": 6.0079595897749885,
"grad_norm": 1.1709485054016113,
"learning_rate": 6.475187969924812e-05,
"loss": 0.2895,
"step": 157000
},
{
"epoch": 6.0079595897749885,
"eval_loss": Infinity,
"eval_runtime": 318.8329,
"eval_samples_per_second": 22.024,
"eval_steps_per_second": 2.754,
"eval_wer": 0.39980431563359425,
"step": 157000
},
{
"epoch": 6.027093219041788,
"grad_norm": 0.6642709374427795,
"learning_rate": 6.4e-05,
"loss": 0.2714,
"step": 157500
},
{
"epoch": 6.046226848308587,
"grad_norm": 0.7695789337158203,
"learning_rate": 6.324812030075188e-05,
"loss": 0.269,
"step": 158000
},
{
"epoch": 6.046226848308587,
"eval_loss": Infinity,
"eval_runtime": 318.0108,
"eval_samples_per_second": 22.081,
"eval_steps_per_second": 2.761,
"eval_wer": 0.3926248148931669,
"step": 158000
},
{
"epoch": 6.065360477575386,
"grad_norm": 0.6437819600105286,
"learning_rate": 6.249624060150375e-05,
"loss": 0.2651,
"step": 158500
},
{
"epoch": 6.084494106842186,
"grad_norm": 0.9133914113044739,
"learning_rate": 6.174436090225563e-05,
"loss": 0.2757,
"step": 159000
},
{
"epoch": 6.084494106842186,
"eval_loss": Infinity,
"eval_runtime": 317.9915,
"eval_samples_per_second": 22.082,
"eval_steps_per_second": 2.761,
"eval_wer": 0.39225460122699385,
"step": 159000
},
{
"epoch": 6.103627736108985,
"grad_norm": 0.8651337623596191,
"learning_rate": 6.099398496240601e-05,
"loss": 0.2763,
"step": 159500
},
{
"epoch": 6.122761365375784,
"grad_norm": 0.6800199151039124,
"learning_rate": 6.0243609022556384e-05,
"loss": 0.2573,
"step": 160000
},
{
"epoch": 6.122761365375784,
"eval_loss": Infinity,
"eval_runtime": 318.7524,
"eval_samples_per_second": 22.03,
"eval_steps_per_second": 2.754,
"eval_wer": 0.3906283054791623,
"step": 160000
},
{
"epoch": 6.141894994642584,
"grad_norm": 0.7264246940612793,
"learning_rate": 5.9491729323308265e-05,
"loss": 0.2632,
"step": 160500
},
{
"epoch": 6.161028623909383,
"grad_norm": 0.2874845862388611,
"learning_rate": 5.873984962406015e-05,
"loss": 0.2666,
"step": 161000
},
{
"epoch": 6.161028623909383,
"eval_loss": Infinity,
"eval_runtime": 318.0588,
"eval_samples_per_second": 22.078,
"eval_steps_per_second": 2.76,
"eval_wer": 0.38831447006558073,
"step": 161000
},
{
"epoch": 6.180162253176182,
"grad_norm": 0.36712953448295593,
"learning_rate": 5.798796992481202e-05,
"loss": 0.2719,
"step": 161500
},
{
"epoch": 6.199295882442982,
"grad_norm": 1.4700485467910767,
"learning_rate": 5.72360902255639e-05,
"loss": 0.2691,
"step": 162000
},
{
"epoch": 6.199295882442982,
"eval_loss": Infinity,
"eval_runtime": 318.6549,
"eval_samples_per_second": 22.036,
"eval_steps_per_second": 2.755,
"eval_wer": 0.391950497144066,
"step": 162000
},
{
"epoch": 6.218429511709781,
"grad_norm": 0.664314866065979,
"learning_rate": 5.6484210526315785e-05,
"loss": 0.2734,
"step": 162500
},
{
"epoch": 6.23756314097658,
"grad_norm": 0.7786546945571899,
"learning_rate": 5.5732330827067666e-05,
"loss": 0.2699,
"step": 163000
},
{
"epoch": 6.23756314097658,
"eval_loss": Infinity,
"eval_runtime": 323.3374,
"eval_samples_per_second": 21.717,
"eval_steps_per_second": 2.715,
"eval_wer": 0.39616828855510894,
"step": 163000
},
{
"epoch": 6.25669677024338,
"grad_norm": 1.2886419296264648,
"learning_rate": 5.498045112781954e-05,
"loss": 0.2702,
"step": 163500
},
{
"epoch": 6.275830399510179,
"grad_norm": 0.6407492160797119,
"learning_rate": 5.422857142857142e-05,
"loss": 0.259,
"step": 164000
},
{
"epoch": 6.275830399510179,
"eval_loss": Infinity,
"eval_runtime": 318.7872,
"eval_samples_per_second": 22.027,
"eval_steps_per_second": 2.754,
"eval_wer": 0.39016553839644597,
"step": 164000
},
{
"epoch": 6.294964028776978,
"grad_norm": 0.36012986302375793,
"learning_rate": 5.3476691729323304e-05,
"loss": 0.2591,
"step": 164500
},
{
"epoch": 6.314097658043778,
"grad_norm": 0.5582063794136047,
"learning_rate": 5.2724812030075185e-05,
"loss": 0.2707,
"step": 165000
},
{
"epoch": 6.314097658043778,
"eval_loss": Infinity,
"eval_runtime": 317.8204,
"eval_samples_per_second": 22.094,
"eval_steps_per_second": 2.763,
"eval_wer": 0.38777237148297017,
"step": 165000
},
{
"epoch": 6.333231287310577,
"grad_norm": 0.5500897765159607,
"learning_rate": 5.197293233082706e-05,
"loss": 0.2574,
"step": 165500
},
{
"epoch": 6.352364916577376,
"grad_norm": 0.5922083854675293,
"learning_rate": 5.122105263157894e-05,
"loss": 0.265,
"step": 166000
},
{
"epoch": 6.352364916577376,
"eval_loss": Infinity,
"eval_runtime": 316.5005,
"eval_samples_per_second": 22.186,
"eval_steps_per_second": 2.774,
"eval_wer": 0.38557753331922995,
"step": 166000
},
{
"epoch": 6.371498545844176,
"grad_norm": 0.5268240571022034,
"learning_rate": 5.046917293233082e-05,
"loss": 0.2693,
"step": 166500
},
{
"epoch": 6.390632175110975,
"grad_norm": 2.8765857219696045,
"learning_rate": 4.9717293233082705e-05,
"loss": 0.2657,
"step": 167000
},
{
"epoch": 6.390632175110975,
"eval_loss": Infinity,
"eval_runtime": 317.5854,
"eval_samples_per_second": 22.111,
"eval_steps_per_second": 2.765,
"eval_wer": 0.38506187856991747,
"step": 167000
},
{
"epoch": 6.409765804377774,
"grad_norm": 0.6294525265693665,
"learning_rate": 4.896541353383458e-05,
"loss": 0.2634,
"step": 167500
},
{
"epoch": 6.428899433644574,
"grad_norm": 1.270578384399414,
"learning_rate": 4.821503759398496e-05,
"loss": 0.2625,
"step": 168000
},
{
"epoch": 6.428899433644574,
"eval_loss": Infinity,
"eval_runtime": 317.2046,
"eval_samples_per_second": 22.137,
"eval_steps_per_second": 2.768,
"eval_wer": 0.3841495663211339,
"step": 168000
},
{
"epoch": 6.448033062911373,
"grad_norm": 1.4967974424362183,
"learning_rate": 4.746315789473684e-05,
"loss": 0.2679,
"step": 168500
},
{
"epoch": 6.467166692178172,
"grad_norm": 0.8087161779403687,
"learning_rate": 4.671278195488721e-05,
"loss": 0.2615,
"step": 169000
},
{
"epoch": 6.467166692178172,
"eval_loss": Infinity,
"eval_runtime": 316.9216,
"eval_samples_per_second": 22.157,
"eval_steps_per_second": 2.77,
"eval_wer": 0.3831975883224032,
"step": 169000
},
{
"epoch": 6.486300321444972,
"grad_norm": 0.5716475248336792,
"learning_rate": 4.596090225563909e-05,
"loss": 0.2507,
"step": 169500
},
{
"epoch": 6.505433950711771,
"grad_norm": 0.7699230909347534,
"learning_rate": 4.520902255639097e-05,
"loss": 0.2629,
"step": 170000
},
{
"epoch": 6.505433950711771,
"eval_loss": Infinity,
"eval_runtime": 317.644,
"eval_samples_per_second": 22.107,
"eval_steps_per_second": 2.764,
"eval_wer": 0.38342236090543685,
"step": 170000
},
{
"epoch": 6.52456757997857,
"grad_norm": 0.8144583702087402,
"learning_rate": 4.445714285714285e-05,
"loss": 0.2646,
"step": 170500
},
{
"epoch": 6.54370120924537,
"grad_norm": 0.9461275339126587,
"learning_rate": 4.370526315789473e-05,
"loss": 0.276,
"step": 171000
},
{
"epoch": 6.54370120924537,
"eval_loss": Infinity,
"eval_runtime": 316.3907,
"eval_samples_per_second": 22.194,
"eval_steps_per_second": 2.775,
"eval_wer": 0.38307859107256187,
"step": 171000
},
{
"epoch": 6.562834838512169,
"grad_norm": 0.5300208926200867,
"learning_rate": 4.295338345864661e-05,
"loss": 0.2657,
"step": 171500
},
{
"epoch": 6.581968467778968,
"grad_norm": 0.611358106136322,
"learning_rate": 4.220150375939849e-05,
"loss": 0.2623,
"step": 172000
},
{
"epoch": 6.581968467778968,
"eval_loss": Infinity,
"eval_runtime": 316.1573,
"eval_samples_per_second": 22.21,
"eval_steps_per_second": 2.777,
"eval_wer": 0.38129363232494184,
"step": 172000
},
{
"epoch": 6.601102097045768,
"grad_norm": 0.8705514073371887,
"learning_rate": 4.144962406015037e-05,
"loss": 0.2614,
"step": 172500
},
{
"epoch": 6.620235726312567,
"grad_norm": 0.4813309609889984,
"learning_rate": 4.069924812030075e-05,
"loss": 0.27,
"step": 173000
},
{
"epoch": 6.620235726312567,
"eval_loss": Infinity,
"eval_runtime": 316.2461,
"eval_samples_per_second": 22.204,
"eval_steps_per_second": 2.776,
"eval_wer": 0.3814787391580283,
"step": 173000
},
{
"epoch": 6.639369355579366,
"grad_norm": 2.4199442863464355,
"learning_rate": 3.994736842105263e-05,
"loss": 0.261,
"step": 173500
},
{
"epoch": 6.658502984846166,
"grad_norm": 0.6200481653213501,
"learning_rate": 3.919548872180451e-05,
"loss": 0.2712,
"step": 174000
},
{
"epoch": 6.658502984846166,
"eval_loss": Infinity,
"eval_runtime": 317.1791,
"eval_samples_per_second": 22.139,
"eval_steps_per_second": 2.768,
"eval_wer": 0.3812143008250476,
"step": 174000
},
{
"epoch": 6.677636614112965,
"grad_norm": 0.2569734454154968,
"learning_rate": 3.844360902255639e-05,
"loss": 0.2614,
"step": 174500
},
{
"epoch": 6.696770243379764,
"grad_norm": 0.44579431414604187,
"learning_rate": 3.769172932330827e-05,
"loss": 0.263,
"step": 175000
},
{
"epoch": 6.696770243379764,
"eval_loss": Infinity,
"eval_runtime": 316.5662,
"eval_samples_per_second": 22.182,
"eval_steps_per_second": 2.774,
"eval_wer": 0.38161095832451875,
"step": 175000
},
{
"epoch": 6.715903872646564,
"grad_norm": 0.38563570380210876,
"learning_rate": 3.6939849624060146e-05,
"loss": 0.2488,
"step": 175500
},
{
"epoch": 6.735037501913363,
"grad_norm": 0.5862724781036377,
"learning_rate": 3.6189473684210524e-05,
"loss": 0.2616,
"step": 176000
},
{
"epoch": 6.735037501913363,
"eval_loss": Infinity,
"eval_runtime": 316.2674,
"eval_samples_per_second": 22.203,
"eval_steps_per_second": 2.776,
"eval_wer": 0.379574783160567,
"step": 176000
},
{
"epoch": 6.754171131180162,
"grad_norm": 0.5157662034034729,
"learning_rate": 3.5437593984962405e-05,
"loss": 0.2525,
"step": 176500
},
{
"epoch": 6.773304760446962,
"grad_norm": 0.807600200176239,
"learning_rate": 3.4687218045112776e-05,
"loss": 0.253,
"step": 177000
},
{
"epoch": 6.773304760446962,
"eval_loss": Infinity,
"eval_runtime": 316.2322,
"eval_samples_per_second": 22.205,
"eval_steps_per_second": 2.776,
"eval_wer": 0.3794161201607785,
"step": 177000
},
{
"epoch": 6.792438389713761,
"grad_norm": 0.4601055383682251,
"learning_rate": 3.393533834586466e-05,
"loss": 0.2513,
"step": 177500
},
{
"epoch": 6.81157201898056,
"grad_norm": 0.4807584881782532,
"learning_rate": 3.3184962406015036e-05,
"loss": 0.2572,
"step": 178000
},
{
"epoch": 6.81157201898056,
"eval_loss": Infinity,
"eval_runtime": 314.7771,
"eval_samples_per_second": 22.308,
"eval_steps_per_second": 2.789,
"eval_wer": 0.38285381848952826,
"step": 178000
},
{
"epoch": 6.83070564824736,
"grad_norm": 2.668820381164551,
"learning_rate": 3.243308270676692e-05,
"loss": 0.2522,
"step": 178500
},
{
"epoch": 6.8498392775141586,
"grad_norm": 0.5270944833755493,
"learning_rate": 3.168270676691729e-05,
"loss": 0.2517,
"step": 179000
},
{
"epoch": 6.8498392775141586,
"eval_loss": Infinity,
"eval_runtime": 314.726,
"eval_samples_per_second": 22.311,
"eval_steps_per_second": 2.79,
"eval_wer": 0.37728739158028346,
"step": 179000
},
{
"epoch": 6.868972906780958,
"grad_norm": 0.5133803486824036,
"learning_rate": 3.093082706766917e-05,
"loss": 0.2479,
"step": 179500
},
{
"epoch": 6.888106536047758,
"grad_norm": 0.6608215570449829,
"learning_rate": 3.0178947368421048e-05,
"loss": 0.2471,
"step": 180000
},
{
"epoch": 6.888106536047758,
"eval_loss": Infinity,
"eval_runtime": 315.1185,
"eval_samples_per_second": 22.284,
"eval_steps_per_second": 2.786,
"eval_wer": 0.37833192299555746,
"step": 180000
},
{
"epoch": 6.9072401653145565,
"grad_norm": 0.9923522472381592,
"learning_rate": 2.942706766917293e-05,
"loss": 0.2533,
"step": 180500
},
{
"epoch": 6.926373794581356,
"grad_norm": 0.6495700478553772,
"learning_rate": 2.8675187969924808e-05,
"loss": 0.2441,
"step": 181000
},
{
"epoch": 6.926373794581356,
"eval_loss": Infinity,
"eval_runtime": 315.6279,
"eval_samples_per_second": 22.248,
"eval_steps_per_second": 2.782,
"eval_wer": 0.37630896974825473,
"step": 181000
},
{
"epoch": 6.945507423848156,
"grad_norm": 0.5995193123817444,
"learning_rate": 2.792330827067669e-05,
"loss": 0.2509,
"step": 181500
},
{
"epoch": 6.9646410531149545,
"grad_norm": 0.6942078471183777,
"learning_rate": 2.7172932330827067e-05,
"loss": 0.245,
"step": 182000
},
{
"epoch": 6.9646410531149545,
"eval_loss": Infinity,
"eval_runtime": 316.6284,
"eval_samples_per_second": 22.177,
"eval_steps_per_second": 2.773,
"eval_wer": 0.3749338904167548,
"step": 182000
},
{
"epoch": 6.983774682381754,
"grad_norm": 0.7296892404556274,
"learning_rate": 2.6421052631578945e-05,
"loss": 0.2513,
"step": 182500
},
{
"epoch": 7.002908311648554,
"grad_norm": 0.9255119562149048,
"learning_rate": 2.5669172932330827e-05,
"loss": 0.235,
"step": 183000
},
{
"epoch": 7.002908311648554,
"eval_loss": Infinity,
"eval_runtime": 315.5684,
"eval_samples_per_second": 22.252,
"eval_steps_per_second": 2.782,
"eval_wer": 0.3724085043367887,
"step": 183000
},
{
"epoch": 7.0220419409153525,
"grad_norm": 0.6719674468040466,
"learning_rate": 2.4917293233082705e-05,
"loss": 0.2154,
"step": 183500
},
{
"epoch": 7.041175570182152,
"grad_norm": 0.5619477033615112,
"learning_rate": 2.4165413533834586e-05,
"loss": 0.2281,
"step": 184000
},
{
"epoch": 7.041175570182152,
"eval_loss": Infinity,
"eval_runtime": 316.4942,
"eval_samples_per_second": 22.187,
"eval_steps_per_second": 2.774,
"eval_wer": 0.37427279458430296,
"step": 184000
},
{
"epoch": 7.060309199448952,
"grad_norm": 0.7847068309783936,
"learning_rate": 2.3413533834586465e-05,
"loss": 0.2184,
"step": 184500
},
{
"epoch": 7.0794428287157505,
"grad_norm": 0.7864698171615601,
"learning_rate": 2.2661654135338346e-05,
"loss": 0.2155,
"step": 185000
},
{
"epoch": 7.0794428287157505,
"eval_loss": Infinity,
"eval_runtime": 315.7964,
"eval_samples_per_second": 22.236,
"eval_steps_per_second": 2.78,
"eval_wer": 0.3742463507510049,
"step": 185000
},
{
"epoch": 7.09857645798255,
"grad_norm": 0.6666255593299866,
"learning_rate": 2.1909774436090224e-05,
"loss": 0.2106,
"step": 185500
},
{
"epoch": 7.11771008724935,
"grad_norm": 0.5075043439865112,
"learning_rate": 2.1157894736842106e-05,
"loss": 0.2177,
"step": 186000
},
{
"epoch": 7.11771008724935,
"eval_loss": Infinity,
"eval_runtime": 316.0968,
"eval_samples_per_second": 22.215,
"eval_steps_per_second": 2.778,
"eval_wer": 0.37367780833509623,
"step": 186000
},
{
"epoch": 7.1368437165161485,
"grad_norm": 0.7393398284912109,
"learning_rate": 2.0406015037593984e-05,
"loss": 0.2208,
"step": 186500
},
{
"epoch": 7.155977345782948,
"grad_norm": 0.3300219476222992,
"learning_rate": 1.9654135338345865e-05,
"loss": 0.2107,
"step": 187000
},
{
"epoch": 7.155977345782948,
"eval_loss": Infinity,
"eval_runtime": 315.1907,
"eval_samples_per_second": 22.279,
"eval_steps_per_second": 2.786,
"eval_wer": 0.37078220858895705,
"step": 187000
},
{
"epoch": 7.175110975049748,
"grad_norm": 3.2175910472869873,
"learning_rate": 1.8902255639097743e-05,
"loss": 0.2032,
"step": 187500
},
{
"epoch": 7.194244604316546,
"grad_norm": 0.28260278701782227,
"learning_rate": 1.8151879699248118e-05,
"loss": 0.2129,
"step": 188000
},
{
"epoch": 7.194244604316546,
"eval_loss": Infinity,
"eval_runtime": 315.4927,
"eval_samples_per_second": 22.257,
"eval_steps_per_second": 2.783,
"eval_wer": 0.37161518933784643,
"step": 188000
},
{
"epoch": 7.213378233583346,
"grad_norm": 1.4389430284500122,
"learning_rate": 1.74e-05,
"loss": 0.221,
"step": 188500
},
{
"epoch": 7.232511862850146,
"grad_norm": 0.5784205198287964,
"learning_rate": 1.6648120300751878e-05,
"loss": 0.2173,
"step": 189000
},
{
"epoch": 7.232511862850146,
"eval_loss": Infinity,
"eval_runtime": 317.5523,
"eval_samples_per_second": 22.113,
"eval_steps_per_second": 2.765,
"eval_wer": 0.36953934842394753,
"step": 189000
},
{
"epoch": 7.251645492116944,
"grad_norm": 0.5264465808868408,
"learning_rate": 1.589624060150376e-05,
"loss": 0.2102,
"step": 189500
},
{
"epoch": 7.270779121383744,
"grad_norm": 1.0469930171966553,
"learning_rate": 1.5144360902255639e-05,
"loss": 0.2145,
"step": 190000
},
{
"epoch": 7.270779121383744,
"eval_loss": Infinity,
"eval_runtime": 316.5149,
"eval_samples_per_second": 22.185,
"eval_steps_per_second": 2.774,
"eval_wer": 0.37215728792045694,
"step": 190000
},
{
"epoch": 7.289912750650544,
"grad_norm": 0.43167009949684143,
"learning_rate": 1.4392481203007517e-05,
"loss": 0.2217,
"step": 190500
},
{
"epoch": 7.309046379917342,
"grad_norm": 1.083001732826233,
"learning_rate": 1.3640601503759397e-05,
"loss": 0.2116,
"step": 191000
},
{
"epoch": 7.309046379917342,
"eval_loss": Infinity,
"eval_runtime": 315.4082,
"eval_samples_per_second": 22.263,
"eval_steps_per_second": 2.784,
"eval_wer": 0.37024011000634655,
"step": 191000
},
{
"epoch": 7.328180009184142,
"grad_norm": 0.5683468580245972,
"learning_rate": 1.2888721804511277e-05,
"loss": 0.2085,
"step": 191500
},
{
"epoch": 7.347313638450942,
"grad_norm": 0.45209017395973206,
"learning_rate": 1.2136842105263156e-05,
"loss": 0.212,
"step": 192000
},
{
"epoch": 7.347313638450942,
"eval_loss": Infinity,
"eval_runtime": 316.0151,
"eval_samples_per_second": 22.22,
"eval_steps_per_second": 2.778,
"eval_wer": 0.37038555108948595,
"step": 192000
},
{
"epoch": 7.36644726771774,
"grad_norm": 1.5557799339294434,
"learning_rate": 1.1384962406015036e-05,
"loss": 0.2091,
"step": 192500
},
{
"epoch": 7.38558089698454,
"grad_norm": 0.456394761800766,
"learning_rate": 1.0633082706766916e-05,
"loss": 0.2116,
"step": 193000
},
{
"epoch": 7.38558089698454,
"eval_loss": Infinity,
"eval_runtime": 316.9212,
"eval_samples_per_second": 22.157,
"eval_steps_per_second": 2.77,
"eval_wer": 0.37012111275650517,
"step": 193000
},
{
"epoch": 7.40471452625134,
"grad_norm": 0.34570273756980896,
"learning_rate": 9.881203007518796e-06,
"loss": 0.2034,
"step": 193500
},
{
"epoch": 7.423848155518138,
"grad_norm": 0.6514278054237366,
"learning_rate": 9.129323308270676e-06,
"loss": 0.2124,
"step": 194000
},
{
"epoch": 7.423848155518138,
"eval_loss": Infinity,
"eval_runtime": 315.4723,
"eval_samples_per_second": 22.259,
"eval_steps_per_second": 2.783,
"eval_wer": 0.36865348000846204,
"step": 194000
},
{
"epoch": 7.442981784784938,
"grad_norm": 0.3534170091152191,
"learning_rate": 8.378947368421052e-06,
"loss": 0.2146,
"step": 194500
},
{
"epoch": 7.462115414051738,
"grad_norm": 0.6505366563796997,
"learning_rate": 7.627067669172932e-06,
"loss": 0.2078,
"step": 195000
},
{
"epoch": 7.462115414051738,
"eval_loss": Infinity,
"eval_runtime": 316.1812,
"eval_samples_per_second": 22.209,
"eval_steps_per_second": 2.777,
"eval_wer": 0.3681113814258515,
"step": 195000
},
{
"epoch": 7.481249043318536,
"grad_norm": 0.5068254470825195,
"learning_rate": 6.8751879699248115e-06,
"loss": 0.2097,
"step": 195500
},
{
"epoch": 7.500382672585336,
"grad_norm": 0.32878100872039795,
"learning_rate": 6.124812030075188e-06,
"loss": 0.2158,
"step": 196000
},
{
"epoch": 7.500382672585336,
"eval_loss": Infinity,
"eval_runtime": 316.2612,
"eval_samples_per_second": 22.203,
"eval_steps_per_second": 2.776,
"eval_wer": 0.3682171567590438,
"step": 196000
},
{
"epoch": 7.519516301852136,
"grad_norm": 0.9998613595962524,
"learning_rate": 5.3729323308270675e-06,
"loss": 0.2031,
"step": 196500
},
{
"epoch": 7.538649931118934,
"grad_norm": 0.6963976621627808,
"learning_rate": 4.622556390977443e-06,
"loss": 0.2157,
"step": 197000
},
{
"epoch": 7.538649931118934,
"eval_loss": Infinity,
"eval_runtime": 316.927,
"eval_samples_per_second": 22.157,
"eval_steps_per_second": 2.77,
"eval_wer": 0.36727840067696216,
"step": 197000
},
{
"epoch": 7.557783560385734,
"grad_norm": 0.6300442218780518,
"learning_rate": 3.870676691729323e-06,
"loss": 0.2082,
"step": 197500
},
{
"epoch": 7.576917189652534,
"grad_norm": 0.3542906939983368,
"learning_rate": 3.118796992481203e-06,
"loss": 0.2045,
"step": 198000
},
{
"epoch": 7.576917189652534,
"eval_loss": Infinity,
"eval_runtime": 317.109,
"eval_samples_per_second": 22.144,
"eval_steps_per_second": 2.769,
"eval_wer": 0.3666834144277554,
"step": 198000
},
{
"epoch": 7.596050818919332,
"grad_norm": 0.9721285700798035,
"learning_rate": 2.366917293233083e-06,
"loss": 0.209,
"step": 198500
},
{
"epoch": 7.615184448186132,
"grad_norm": 0.8516126275062561,
"learning_rate": 1.6165413533834587e-06,
"loss": 0.2188,
"step": 199000
},
{
"epoch": 7.615184448186132,
"eval_loss": Infinity,
"eval_runtime": 316.4647,
"eval_samples_per_second": 22.189,
"eval_steps_per_second": 2.774,
"eval_wer": 0.36752961709329385,
"step": 199000
},
{
"epoch": 7.634318077452932,
"grad_norm": 0.7036492824554443,
"learning_rate": 8.646616541353383e-07,
"loss": 0.2072,
"step": 199500
},
{
"epoch": 7.65345170671973,
"grad_norm": 0.5840544104576111,
"learning_rate": 1.1278195488721805e-07,
"loss": 0.2041,
"step": 200000
},
{
"epoch": 7.65345170671973,
"eval_loss": Infinity,
"eval_runtime": 315.8635,
"eval_samples_per_second": 22.231,
"eval_steps_per_second": 2.78,
"eval_wer": 0.3670800719272266,
"step": 200000
},
{
"epoch": 7.65345170671973,
"step": 200000,
"total_flos": 1.6865103601997185e+21,
"train_loss": 0.5320780529403687,
"train_runtime": 333857.2134,
"train_samples_per_second": 4.792,
"train_steps_per_second": 0.599
}
],
"logging_steps": 500,
"max_steps": 200000,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6865103601997185e+21,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}