diff --git "a/idefics2/checkpoint-10000/trainer_state.json" "b/idefics2/checkpoint-10000/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/idefics2/checkpoint-10000/trainer_state.json"
@@ -0,0 +1,7483 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.5127448755767339,
+  "eval_steps": 200,
+  "global_step": 10000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.001512744875576734,
+      "grad_norm": 0.8282566070556641,
+      "learning_rate": 0.00019996,
+      "loss": 3.4576,
+      "step": 10
+    },
+    {
+      "epoch": 0.003025489751153468,
+      "grad_norm": 0.1628154069185257,
+      "learning_rate": 0.00019992000000000002,
+      "loss": 0.0992,
+      "step": 20
+    },
+    {
+      "epoch": 0.004538234626730202,
+      "grad_norm": 0.17421123385429382,
+      "learning_rate": 0.00019988,
+      "loss": 0.0666,
+      "step": 30
+    },
+    {
+      "epoch": 0.006050979502306936,
+      "grad_norm": 0.08850277960300446,
+      "learning_rate": 0.00019984,
+      "loss": 0.0661,
+      "step": 40
+    },
+    {
+      "epoch": 0.00756372437788367,
+      "grad_norm": 0.11368270963430405,
+      "learning_rate": 0.0001998,
+      "loss": 0.0639,
+      "step": 50
+    },
+    {
+      "epoch": 0.009076469253460404,
+      "grad_norm": 0.12990300357341766,
+      "learning_rate": 0.00019976000000000003,
+      "loss": 0.0617,
+      "step": 60
+    },
+    {
+      "epoch": 0.010589214129037138,
+      "grad_norm": 0.08885369449853897,
+      "learning_rate": 0.00019972000000000002,
+      "loss": 0.0643,
+      "step": 70
+    },
+    {
+      "epoch": 0.012101959004613872,
+      "grad_norm": 0.07073435187339783,
+      "learning_rate": 0.00019968,
+      "loss": 0.0629,
+      "step": 80
+    },
+    {
+      "epoch": 0.013614703880190605,
+      "grad_norm": 0.061856113374233246,
+      "learning_rate": 0.00019964,
+      "loss": 0.061,
+      "step": 90
+    },
+    {
+      "epoch": 0.01512744875576734,
+      "grad_norm": 0.06827201694250107,
+      "learning_rate": 0.0001996,
+      "loss": 0.0586,
+      "step": 100
+    },
+    {
+      "epoch": 0.016640193631344075,
+      "grad_norm": 0.07220456004142761,
+      "learning_rate": 0.00019956000000000002,
+      "loss": 0.055,
+      "step": 110
+    },
+    {
+      "epoch": 0.018152938506920808,
+      "grad_norm": 0.06632555276155472,
+      "learning_rate": 0.00019952000000000001,
+      "loss": 0.0586,
+      "step": 120
+    },
+    {
+      "epoch": 0.01966568338249754,
+      "grad_norm": 0.09966724365949631,
+      "learning_rate": 0.00019948,
+      "loss": 0.0621,
+      "step": 130
+    },
+    {
+      "epoch": 0.021178428258074276,
+      "grad_norm": 0.0833888053894043,
+      "learning_rate": 0.00019944,
+      "loss": 0.0591,
+      "step": 140
+    },
+    {
+      "epoch": 0.02269117313365101,
+      "grad_norm": 0.08170727640390396,
+      "learning_rate": 0.00019940000000000002,
+      "loss": 0.055,
+      "step": 150
+    },
+    {
+      "epoch": 0.024203918009227745,
+      "grad_norm": 0.07089231163263321,
+      "learning_rate": 0.00019936000000000002,
+      "loss": 0.0582,
+      "step": 160
+    },
+    {
+      "epoch": 0.025716662884804477,
+      "grad_norm": 0.09390200674533844,
+      "learning_rate": 0.00019932,
+      "loss": 0.0628,
+      "step": 170
+    },
+    {
+      "epoch": 0.02722940776038121,
+      "grad_norm": 0.06722863018512726,
+      "learning_rate": 0.00019928,
+      "loss": 0.0591,
+      "step": 180
+    },
+    {
+      "epoch": 0.028742152635957946,
+      "grad_norm": 0.0743609368801117,
+      "learning_rate": 0.00019924,
+      "loss": 0.0626,
+      "step": 190
+    },
+    {
+      "epoch": 0.03025489751153468,
+      "grad_norm": 0.08125407248735428,
+      "learning_rate": 0.00019920000000000002,
+      "loss": 0.0601,
+      "step": 200
+    },
+    {
+      "epoch": 0.03025489751153468,
+      "eval_cer": 0.5356160728183765,
+      "eval_loss": 0.05078176036477089,
+      "eval_runtime": 10281.8657,
+      "eval_samples_per_second": 2.047,
+      "eval_steps_per_second": 0.256,
+      "step": 200
+    },
+    {
+      "epoch": 0.03176764238711141,
+      "grad_norm": 0.07030890136957169,
+      "learning_rate": 0.00019916,
+      "loss": 0.0597,
+      "step": 210
+    },
+    {
+      "epoch": 0.03328038726268815,
+      "grad_norm": 0.05290469154715538,
+      "learning_rate": 0.00019912,
+      "loss": 0.0587,
+      "step": 220
+    },
+    {
+      "epoch": 0.03479313213826488,
+      "grad_norm": 0.07339277863502502,
+      "learning_rate": 0.00019908,
+      "loss": 0.0529,
+      "step": 230
+    },
+    {
+      "epoch": 0.036305877013841616,
+      "grad_norm": 0.0727711170911789,
+      "learning_rate": 0.00019904,
+      "loss": 0.0539,
+      "step": 240
+    },
+    {
+      "epoch": 0.03781862188941835,
+      "grad_norm": 0.07383541762828827,
+      "learning_rate": 0.000199,
+      "loss": 0.0532,
+      "step": 250
+    },
+    {
+      "epoch": 0.03933136676499508,
+      "grad_norm": 0.07042526453733444,
+      "learning_rate": 0.00019896,
+      "loss": 0.0571,
+      "step": 260
+    },
+    {
+      "epoch": 0.04084411164057182,
+      "grad_norm": 0.08188482373952866,
+      "learning_rate": 0.00019892000000000003,
+      "loss": 0.0521,
+      "step": 270
+    },
+    {
+      "epoch": 0.04235685651614855,
+      "grad_norm": 0.07334589958190918,
+      "learning_rate": 0.00019888,
+      "loss": 0.0532,
+      "step": 280
+    },
+    {
+      "epoch": 0.043869601391725285,
+      "grad_norm": 0.06326377391815186,
+      "learning_rate": 0.00019884000000000001,
+      "loss": 0.0528,
+      "step": 290
+    },
+    {
+      "epoch": 0.04538234626730202,
+      "grad_norm": 0.05303795263171196,
+      "learning_rate": 0.0001988,
+      "loss": 0.0539,
+      "step": 300
+    },
+    {
+      "epoch": 0.04689509114287875,
+      "grad_norm": 0.058723289519548416,
+      "learning_rate": 0.00019876,
+      "loss": 0.0469,
+      "step": 310
+    },
+    {
+      "epoch": 0.04840783601845549,
+      "grad_norm": 0.08683237433433533,
+      "learning_rate": 0.00019872000000000002,
+      "loss": 0.0601,
+      "step": 320
+    },
+    {
+      "epoch": 0.04992058089403222,
+      "grad_norm": 0.07650341093540192,
+      "learning_rate": 0.00019868,
+      "loss": 0.0582,
+      "step": 330
+    },
+    {
+      "epoch": 0.051433325769608955,
+      "grad_norm": 0.054965659976005554,
+      "learning_rate": 0.00019864,
+      "loss": 0.0548,
+      "step": 340
+    },
+    {
+      "epoch": 0.05294607064518569,
+      "grad_norm": 0.06949716061353683,
+      "learning_rate": 0.0001986,
+      "loss": 0.0581,
+      "step": 350
+    },
+    {
+      "epoch": 0.05445881552076242,
+      "grad_norm": 0.10514732450246811,
+      "learning_rate": 0.00019856000000000002,
+      "loss": 0.0587,
+      "step": 360
+    },
+    {
+      "epoch": 0.05597156039633916,
+      "grad_norm": 0.06586117297410965,
+      "learning_rate": 0.00019852000000000002,
+      "loss": 0.0561,
+      "step": 370
+    },
+    {
+      "epoch": 0.05748430527191589,
+      "grad_norm": 0.09821395576000214,
+      "learning_rate": 0.00019848,
+      "loss": 0.0556,
+      "step": 380
+    },
+    {
+      "epoch": 0.058997050147492625,
+      "grad_norm": 0.06488014757633209,
+      "learning_rate": 0.00019844,
+      "loss": 0.0634,
+      "step": 390
+    },
+    {
+      "epoch": 0.06050979502306936,
+      "grad_norm": 0.06910958141088486,
+      "learning_rate": 0.0001984,
+      "loss": 0.052,
+      "step": 400
+    },
+    {
+      "epoch": 0.06050979502306936,
+      "eval_cer": 0.2714758865721352,
+      "eval_loss": 0.04847713187336922,
+      "eval_runtime": 10484.76,
+      "eval_samples_per_second": 2.008,
+      "eval_steps_per_second": 0.251,
+      "step": 400
+    },
+    {
+      "epoch": 0.0620225398986461,
+      "grad_norm": 0.048563435673713684,
+      "learning_rate": 0.00019836000000000002,
+      "loss": 0.0565,
+      "step": 410
+    },
+    {
+      "epoch": 0.06353528477422282,
+      "grad_norm": 0.055841896682977676,
+      "learning_rate": 0.00019832,
+      "loss": 0.0547,
+      "step": 420
+    },
+    {
+      "epoch": 0.06504802964979955,
+      "grad_norm": 0.05644605681300163,
+      "learning_rate": 0.00019828,
+      "loss": 0.0575,
+      "step": 430
+    },
+    {
+      "epoch": 0.0665607745253763,
+      "grad_norm": 0.05617703124880791,
+      "learning_rate": 0.00019824,
+      "loss": 0.0514,
+      "step": 440
+    },
+    {
+      "epoch": 0.06807351940095303,
+      "grad_norm": 0.11480820178985596,
+      "learning_rate": 0.00019820000000000002,
+      "loss": 0.0562,
+      "step": 450
+    },
+    {
+      "epoch": 0.06958626427652977,
+      "grad_norm": 0.06004955247044563,
+      "learning_rate": 0.00019816000000000001,
+      "loss": 0.0575,
+      "step": 460
+    },
+    {
+      "epoch": 0.0710990091521065,
+      "grad_norm": 0.07830873131752014,
+      "learning_rate": 0.00019812,
+      "loss": 0.0621,
+      "step": 470
+    },
+    {
+      "epoch": 0.07261175402768323,
+      "grad_norm": 0.052650969475507736,
+      "learning_rate": 0.00019808,
+      "loss": 0.0599,
+      "step": 480
+    },
+    {
+      "epoch": 0.07412449890325996,
+      "grad_norm": 0.09298545122146606,
+      "learning_rate": 0.00019804,
+      "loss": 0.0559,
+      "step": 490
+    },
+    {
+      "epoch": 0.0756372437788367,
+      "grad_norm": 0.06198689714074135,
+      "learning_rate": 0.00019800000000000002,
+      "loss": 0.047,
+      "step": 500
+    },
+    {
+      "epoch": 0.07714998865441343,
+      "grad_norm": 0.06688915193080902,
+      "learning_rate": 0.00019796,
+      "loss": 0.0523,
+      "step": 510
+    },
+    {
+      "epoch": 0.07866273352999016,
+      "grad_norm": 0.06676903367042542,
+      "learning_rate": 0.00019792000000000003,
+      "loss": 0.0509,
+      "step": 520
+    },
+    {
+      "epoch": 0.08017547840556691,
+      "grad_norm": 0.06219707056879997,
+      "learning_rate": 0.00019788,
+      "loss": 0.0553,
+      "step": 530
+    },
+    {
+      "epoch": 0.08168822328114364,
+      "grad_norm": 0.07905440032482147,
+      "learning_rate": 0.00019784,
+      "loss": 0.0506,
+      "step": 540
+    },
+    {
+      "epoch": 0.08320096815672037,
+      "grad_norm": 0.08591905236244202,
+      "learning_rate": 0.0001978,
+      "loss": 0.0603,
+      "step": 550
+    },
+    {
+      "epoch": 0.0847137130322971,
+      "grad_norm": 0.05921874940395355,
+      "learning_rate": 0.00019776,
+      "loss": 0.0562,
+      "step": 560
+    },
+    {
+      "epoch": 0.08622645790787384,
+      "grad_norm": 0.058868613094091415,
+      "learning_rate": 0.00019772000000000002,
+      "loss": 0.0517,
+      "step": 570
+    },
+    {
+      "epoch": 0.08773920278345057,
+      "grad_norm": 0.06818246096372604,
+      "learning_rate": 0.00019768,
+      "loss": 0.0478,
+      "step": 580
+    },
+    {
+      "epoch": 0.0892519476590273,
+      "grad_norm": 0.07364825904369354,
+      "learning_rate": 0.00019764,
+      "loss": 0.0553,
+      "step": 590
+    },
+    {
+      "epoch": 0.09076469253460404,
+      "grad_norm": 0.07647281885147095,
+      "learning_rate": 0.0001976,
+      "loss": 0.0527,
+      "step": 600
+    },
+    {
+      "epoch": 0.09076469253460404,
+      "eval_cer": 0.282631389088609,
+      "eval_loss": 0.047340717166662216,
+      "eval_runtime": 10466.4392,
+      "eval_samples_per_second": 2.011,
+      "eval_steps_per_second": 0.251,
+      "step": 600
+    },
+    {
+      "epoch": 0.09227743741018077,
+      "grad_norm": 0.0819125548005104,
+      "learning_rate": 0.00019756,
+      "loss": 0.0509,
+      "step": 610
+    },
+    {
+      "epoch": 0.0937901822857575,
+      "grad_norm": 0.06566735357046127,
+      "learning_rate": 0.00019752000000000002,
+      "loss": 0.0583,
+      "step": 620
+    },
+    {
+      "epoch": 0.09530292716133425,
+      "grad_norm": 0.06856215745210648,
+      "learning_rate": 0.00019748,
+      "loss": 0.0465,
+      "step": 630
+    },
+    {
+      "epoch": 0.09681567203691098,
+      "grad_norm": 0.06130633130669594,
+      "learning_rate": 0.00019744,
+      "loss": 0.0509,
+      "step": 640
+    },
+    {
+      "epoch": 0.09832841691248771,
+      "grad_norm": 0.08208902925252914,
+      "learning_rate": 0.0001974,
+      "loss": 0.0549,
+      "step": 650
+    },
+    {
+      "epoch": 0.09984116178806444,
+      "grad_norm": 0.08106379210948944,
+      "learning_rate": 0.00019736000000000002,
+      "loss": 0.0584,
+      "step": 660
+    },
+    {
+      "epoch": 0.10135390666364118,
+      "grad_norm": 0.08364614844322205,
+      "learning_rate": 0.00019732000000000001,
+      "loss": 0.0543,
+      "step": 670
+    },
+    {
+      "epoch": 0.10286665153921791,
+      "grad_norm": 0.06432674080133438,
+      "learning_rate": 0.00019728,
+      "loss": 0.0535,
+      "step": 680
+    },
+    {
+      "epoch": 0.10437939641479464,
+      "grad_norm": 0.07217614352703094,
+      "learning_rate": 0.00019724,
+      "loss": 0.0521,
+      "step": 690
+    },
+    {
+      "epoch": 0.10589214129037137,
+      "grad_norm": 0.06074230372905731,
+      "learning_rate": 0.0001972,
+      "loss": 0.0545,
+      "step": 700
+    },
+    {
+      "epoch": 0.10740488616594811,
+      "grad_norm": 0.04888018220663071,
+      "learning_rate": 0.00019716000000000002,
+      "loss": 0.0445,
+      "step": 710
+    },
+    {
+      "epoch": 0.10891763104152484,
+      "grad_norm": 0.07705683261156082,
+      "learning_rate": 0.00019712,
+      "loss": 0.0491,
+      "step": 720
+    },
+    {
+      "epoch": 0.11043037591710159,
+      "grad_norm": 0.06741231679916382,
+      "learning_rate": 0.00019708000000000003,
+      "loss": 0.053,
+      "step": 730
+    },
+    {
+      "epoch": 0.11194312079267832,
+      "grad_norm": 0.0673738569021225,
+      "learning_rate": 0.00019704,
+      "loss": 0.0473,
+      "step": 740
+    },
+    {
+      "epoch": 0.11345586566825505,
+      "grad_norm": 0.06236235797405243,
+      "learning_rate": 0.00019700000000000002,
+      "loss": 0.0538,
+      "step": 750
+    },
+    {
+      "epoch": 0.11496861054383178,
+      "grad_norm": 0.0538531057536602,
+      "learning_rate": 0.00019696,
+      "loss": 0.0414,
+      "step": 760
+    },
+    {
+      "epoch": 0.11648135541940852,
+      "grad_norm": 0.09818791598081589,
+      "learning_rate": 0.00019692,
+      "loss": 0.0551,
+      "step": 770
+    },
+    {
+      "epoch": 0.11799410029498525,
+      "grad_norm": 0.06459952145814896,
+      "learning_rate": 0.00019688000000000003,
+      "loss": 0.0543,
+      "step": 780
+    },
+    {
+      "epoch": 0.11950684517056198,
+      "grad_norm": 0.09495878219604492,
+      "learning_rate": 0.00019684,
+      "loss": 0.0566,
+      "step": 790
+    },
+    {
+      "epoch": 0.12101959004613871,
+      "grad_norm": 0.06249309703707695,
+      "learning_rate": 0.0001968,
+      "loss": 0.0492,
+      "step": 800
+    },
+    {
+      "epoch": 0.12101959004613871,
+      "eval_cer": 0.0030890735373690806,
+      "eval_loss": 0.046879783272743225,
+      "eval_runtime": 10443.0859,
+      "eval_samples_per_second": 2.016,
+      "eval_steps_per_second": 0.252,
+      "step": 800
+    },
+    {
+      "epoch": 0.12253233492171545,
+      "grad_norm": 0.06483816355466843,
+      "learning_rate": 0.00019676,
+      "loss": 0.048,
+      "step": 810
+    },
+    {
+      "epoch": 0.1240450797972922,
+      "grad_norm": 0.05618014931678772,
+      "learning_rate": 0.00019672000000000003,
+      "loss": 0.0484,
+      "step": 820
+    },
+    {
+      "epoch": 0.1255578246728689,
+      "grad_norm": 0.07441507279872894,
+      "learning_rate": 0.00019668000000000002,
+      "loss": 0.0548,
+      "step": 830
+    },
+    {
+      "epoch": 0.12707056954844564,
+      "grad_norm": 0.05274181067943573,
+      "learning_rate": 0.00019664000000000001,
+      "loss": 0.0619,
+      "step": 840
+    },
+    {
+      "epoch": 0.12858331442402238,
+      "grad_norm": 0.06264190375804901,
+      "learning_rate": 0.0001966,
+      "loss": 0.0525,
+      "step": 850
+    },
+    {
+      "epoch": 0.1300960592995991,
+      "grad_norm": 0.07662319391965866,
+      "learning_rate": 0.00019656,
+      "loss": 0.0532,
+      "step": 860
+    },
+    {
+      "epoch": 0.13160880417517587,
+      "grad_norm": 0.06203316152095795,
+      "learning_rate": 0.00019652000000000002,
+      "loss": 0.0525,
+      "step": 870
+    },
+    {
+      "epoch": 0.1331215490507526,
+      "grad_norm": 0.1326906681060791,
+      "learning_rate": 0.00019648000000000002,
+      "loss": 0.0539,
+      "step": 880
+    },
+    {
+      "epoch": 0.13463429392632933,
+      "grad_norm": 0.10350421816110611,
+      "learning_rate": 0.00019644,
+      "loss": 0.0556,
+      "step": 890
+    },
+    {
+      "epoch": 0.13614703880190607,
+      "grad_norm": 0.049543242901563644,
+      "learning_rate": 0.0001964,
+      "loss": 0.0482,
+      "step": 900
+    },
+    {
+      "epoch": 0.1376597836774828,
+      "grad_norm": 0.11776097118854523,
+      "learning_rate": 0.00019636000000000002,
+      "loss": 0.0538,
+      "step": 910
+    },
+    {
+      "epoch": 0.13917252855305953,
+      "grad_norm": 0.05535553768277168,
+      "learning_rate": 0.00019632000000000002,
+      "loss": 0.052,
+      "step": 920
+    },
+    {
+      "epoch": 0.14068527342863626,
+      "grad_norm": 0.05945896357297897,
+      "learning_rate": 0.00019628,
+      "loss": 0.0491,
+      "step": 930
+    },
+    {
+      "epoch": 0.142198018304213,
+      "grad_norm": 0.1228972002863884,
+      "learning_rate": 0.00019624,
+      "loss": 0.0511,
+      "step": 940
+    },
+    {
+      "epoch": 0.14371076317978973,
+      "grad_norm": 0.08868791162967682,
+      "learning_rate": 0.0001962,
+      "loss": 0.057,
+      "step": 950
+    },
+    {
+      "epoch": 0.14522350805536646,
+      "grad_norm": 0.07960449159145355,
+      "learning_rate": 0.00019616000000000002,
+      "loss": 0.0514,
+      "step": 960
+    },
+    {
+      "epoch": 0.1467362529309432,
+      "grad_norm": 0.06392108649015427,
+      "learning_rate": 0.00019612,
+      "loss": 0.0558,
+      "step": 970
+    },
+    {
+      "epoch": 0.14824899780651993,
+      "grad_norm": 0.07048727571964264,
+      "learning_rate": 0.00019608,
+      "loss": 0.053,
+      "step": 980
+    },
+    {
+      "epoch": 0.14976174268209666,
+      "grad_norm": 0.10491488873958588,
+      "learning_rate": 0.00019604,
+      "loss": 0.0489,
+      "step": 990
+    },
+    {
+      "epoch": 0.1512744875576734,
+      "grad_norm": 0.059835776686668396,
+      "learning_rate": 0.000196,
+      "loss": 0.0474,
+      "step": 1000
+    },
+    {
+      "epoch": 0.1512744875576734,
+      "eval_cer": 0.4367181574025345,
+      "eval_loss": 0.04569260776042938,
+      "eval_runtime": 10457.5718,
+      "eval_samples_per_second": 2.013,
+      "eval_steps_per_second": 0.252,
+      "step": 1000
+    },
+    {
+      "epoch": 0.15278723243325013,
+      "grad_norm": 0.07570289075374603,
+      "learning_rate": 0.00019596000000000001,
+      "loss": 0.0522,
+      "step": 1010
+    },
+    {
+      "epoch": 0.15429997730882686,
+      "grad_norm": 0.09082864969968796,
+      "learning_rate": 0.00019592,
+      "loss": 0.0516,
+      "step": 1020
+    },
+    {
+      "epoch": 0.1558127221844036,
+      "grad_norm": 0.06894449889659882,
+      "learning_rate": 0.00019588000000000003,
+      "loss": 0.0489,
+      "step": 1030
+    },
+    {
+      "epoch": 0.15732546705998032,
+      "grad_norm": 0.05989064276218414,
+      "learning_rate": 0.00019584,
+      "loss": 0.0514,
+      "step": 1040
+    },
+    {
+      "epoch": 0.15883821193555706,
+      "grad_norm": 0.060047443956136703,
+      "learning_rate": 0.00019580000000000002,
+      "loss": 0.047,
+      "step": 1050
+    },
+    {
+      "epoch": 0.16035095681113382,
+      "grad_norm": 0.06459174305200577,
+      "learning_rate": 0.00019576,
+      "loss": 0.0532,
+      "step": 1060
+    },
+    {
+      "epoch": 0.16186370168671055,
+      "grad_norm": 0.061583805829286575,
+      "learning_rate": 0.00019572,
+      "loss": 0.0485,
+      "step": 1070
+    },
+    {
+      "epoch": 0.16337644656228728,
+      "grad_norm": 0.060534268617630005,
+      "learning_rate": 0.00019568000000000002,
+      "loss": 0.0468,
+      "step": 1080
+    },
+    {
+      "epoch": 0.164889191437864,
+      "grad_norm": 0.06731607764959335,
+      "learning_rate": 0.00019564,
+      "loss": 0.0481,
+      "step": 1090
+    },
+    {
+      "epoch": 0.16640193631344075,
+      "grad_norm": 0.0757998675107956,
+      "learning_rate": 0.0001956,
+      "loss": 0.056,
+      "step": 1100
+    },
+    {
+      "epoch": 0.16791468118901748,
+      "grad_norm": 0.08009450882673264,
+      "learning_rate": 0.00019556,
+      "loss": 0.0523,
+      "step": 1110
+    },
+    {
+      "epoch": 0.1694274260645942,
+      "grad_norm": 2.663090944290161,
+      "learning_rate": 0.00019552000000000003,
+      "loss": 0.1404,
+      "step": 1120
+    },
+    {
+      "epoch": 0.17094017094017094,
+      "grad_norm": 14.877944946289062,
+      "learning_rate": 0.00019548000000000002,
+      "loss": 0.1442,
+      "step": 1130
+    },
+    {
+      "epoch": 0.17245291581574768,
+      "grad_norm": 2.8173887729644775,
+      "learning_rate": 0.000195448,
+      "loss": 0.5461,
+      "step": 1140
+    },
+    {
+      "epoch": 0.1739656606913244,
+      "grad_norm": 9.367515563964844,
+      "learning_rate": 0.00019540800000000002,
+      "loss": 0.2832,
+      "step": 1150
+    },
+    {
+      "epoch": 0.17547840556690114,
+      "grad_norm": 0.34991636872291565,
+      "learning_rate": 0.00019536800000000002,
+      "loss": 0.1497,
+      "step": 1160
+    },
+    {
+      "epoch": 0.17699115044247787,
+      "grad_norm": 0.10464385151863098,
+      "learning_rate": 0.000195328,
+      "loss": 0.0686,
+      "step": 1170
+    },
+    {
+      "epoch": 0.1785038953180546,
+      "grad_norm": 0.8961012363433838,
+      "learning_rate": 0.000195288,
+      "loss": 0.0822,
+      "step": 1180
+    },
+    {
+      "epoch": 0.18001664019363134,
+      "grad_norm": 8.467473983764648,
+      "learning_rate": 0.000195248,
+      "loss": 0.0949,
+      "step": 1190
+    },
+    {
+      "epoch": 0.18152938506920807,
+      "grad_norm": 0.08059060573577881,
+      "learning_rate": 0.00019520800000000002,
+      "loss": 0.0552,
+      "step": 1200
+    },
+    {
+      "epoch": 0.18152938506920807,
+      "eval_cer": 0.0833932493767496,
+      "eval_loss": 0.04637393727898598,
+      "eval_runtime": 10459.5021,
+      "eval_samples_per_second": 2.013,
+      "eval_steps_per_second": 0.252,
+      "step": 1200
+    },
+    {
+      "epoch": 0.1830421299447848,
+      "grad_norm": 0.08795847743749619,
+      "learning_rate": 0.000195168,
+      "loss": 0.055,
+      "step": 1210
+    },
+    {
+      "epoch": 0.18455487482036154,
+      "grad_norm": 0.10272721946239471,
+      "learning_rate": 0.000195128,
+      "loss": 0.0557,
+      "step": 1220
+    },
+    {
+      "epoch": 0.18606761969593827,
+      "grad_norm": 0.23404774069786072,
+      "learning_rate": 0.000195088,
+      "loss": 0.0611,
+      "step": 1230
+    },
+    {
+      "epoch": 0.187580364571515,
+      "grad_norm": 0.2968621253967285,
+      "learning_rate": 0.00019504800000000002,
+      "loss": 0.0817,
+      "step": 1240
+    },
+    {
+      "epoch": 0.18909310944709176,
+      "grad_norm": 0.08634278923273087,
+      "learning_rate": 0.00019500800000000001,
+      "loss": 0.0685,
+      "step": 1250
+    },
+    {
+      "epoch": 0.1906058543226685,
+      "grad_norm": 0.11241244524717331,
+      "learning_rate": 0.000194968,
+      "loss": 0.0563,
+      "step": 1260
+    },
+    {
+      "epoch": 0.19211859919824523,
+      "grad_norm": 0.17380298674106598,
+      "learning_rate": 0.000194928,
+      "loss": 0.065,
+      "step": 1270
+    },
+    {
+      "epoch": 0.19363134407382196,
+      "grad_norm": 0.13615791499614716,
+      "learning_rate": 0.000194888,
+      "loss": 0.0667,
+      "step": 1280
+    },
+    {
+      "epoch": 0.1951440889493987,
+      "grad_norm": 0.0854301005601883,
+      "learning_rate": 0.00019484800000000002,
+      "loss": 0.0507,
+      "step": 1290
+    },
+    {
+      "epoch": 0.19665683382497542,
+      "grad_norm": 0.08915933966636658,
+      "learning_rate": 0.000194808,
+      "loss": 0.0561,
+      "step": 1300
+    },
+    {
+      "epoch": 0.19816957870055216,
+      "grad_norm": 0.09583040326833725,
+      "learning_rate": 0.00019476800000000003,
+      "loss": 0.0514,
+      "step": 1310
+    },
+    {
+      "epoch": 0.1996823235761289,
+      "grad_norm": 0.09624961763620377,
+      "learning_rate": 0.000194728,
+      "loss": 0.052,
+      "step": 1320
+    },
+    {
+      "epoch": 0.20119506845170562,
+      "grad_norm": 0.05612370744347572,
+      "learning_rate": 0.00019468800000000002,
+      "loss": 0.0471,
+      "step": 1330
+    },
+    {
+      "epoch": 0.20270781332728235,
+      "grad_norm": 0.0653730109333992,
+      "learning_rate": 0.000194648,
+      "loss": 0.0521,
+      "step": 1340
+    },
+    {
+      "epoch": 0.2042205582028591,
+      "grad_norm": 0.07432978600263596,
+      "learning_rate": 0.000194608,
+      "loss": 0.0577,
+      "step": 1350
+    },
+    {
+      "epoch": 0.20573330307843582,
+      "grad_norm": 0.05863150209188461,
+      "learning_rate": 0.00019456800000000003,
+      "loss": 0.0435,
+      "step": 1360
+    },
+    {
+      "epoch": 0.20724604795401255,
+      "grad_norm": 0.056969739496707916,
+      "learning_rate": 0.000194528,
+      "loss": 0.0502,
+      "step": 1370
+    },
+    {
+      "epoch": 0.20875879282958928,
+      "grad_norm": 0.10658754408359528,
+      "learning_rate": 0.000194488,
+      "loss": 0.0469,
+      "step": 1380
+    },
+    {
+      "epoch": 0.21027153770516602,
+      "grad_norm": 0.06535681337118149,
+      "learning_rate": 0.000194448,
+      "loss": 0.0519,
+      "step": 1390
+    },
+    {
+      "epoch": 0.21178428258074275,
+      "grad_norm": 0.08987314254045486,
+      "learning_rate": 0.000194408,
+      "loss": 0.0482,
+      "step": 1400
+    },
+    {
+      "epoch": 0.21178428258074275,
+      "eval_cer": 0.14607469615771385,
+      "eval_loss": 0.04351452365517616,
+      "eval_runtime": 10473.9712,
+      "eval_samples_per_second": 2.01,
+      "eval_steps_per_second": 0.251,
+      "step": 1400
+    },
+    {
+      "epoch": 0.21329702745631948,
+      "grad_norm": 0.09238473325967789,
+      "learning_rate": 0.00019436800000000002,
+      "loss": 0.0483,
+      "step": 1410
+    },
+    {
+      "epoch": 0.21480977233189621,
+      "grad_norm": 0.10443761199712753,
+      "learning_rate": 0.000194328,
+      "loss": 0.054,
+      "step": 1420
+    },
+    {
+      "epoch": 0.21632251720747295,
+      "grad_norm": 0.0742131844162941,
+      "learning_rate": 0.000194288,
+      "loss": 0.0507,
+      "step": 1430
+    },
+    {
+      "epoch": 0.21783526208304968,
+      "grad_norm": 0.09358492493629456,
+      "learning_rate": 0.000194248,
+      "loss": 0.0496,
+      "step": 1440
+    },
+    {
+      "epoch": 0.21934800695862644,
+      "grad_norm": 0.07695715129375458,
+      "learning_rate": 0.00019420800000000002,
+      "loss": 0.046,
+      "step": 1450
+    },
+    {
+      "epoch": 0.22086075183420317,
+      "grad_norm": 0.07772234827280045,
+      "learning_rate": 0.00019416800000000002,
+      "loss": 0.0468,
+      "step": 1460
+    },
+    {
+      "epoch": 0.2223734967097799,
+      "grad_norm": 0.04500894993543625,
+      "learning_rate": 0.000194128,
+      "loss": 0.0428,
+      "step": 1470
+    },
+    {
+      "epoch": 0.22388624158535664,
+      "grad_norm": 0.08258084207773209,
+      "learning_rate": 0.000194088,
+      "loss": 0.0542,
+      "step": 1480
+    },
+    {
+      "epoch": 0.22539898646093337,
+      "grad_norm": 0.06530752032995224,
+      "learning_rate": 0.000194048,
+      "loss": 0.0477,
+      "step": 1490
+    },
+    {
+      "epoch": 0.2269117313365101,
+      "grad_norm": 0.06770725548267365,
+      "learning_rate": 0.00019400800000000002,
+      "loss": 0.052,
+      "step": 1500
+    },
+    {
+      "epoch": 0.22842447621208684,
+      "grad_norm": 0.04499737173318863,
+      "learning_rate": 0.000193968,
+      "loss": 0.0392,
+      "step": 1510
+    },
+    {
+      "epoch": 0.22993722108766357,
+      "grad_norm": 0.0594199039041996,
+      "learning_rate": 0.000193928,
+      "loss": 0.0469,
+      "step": 1520
+    },
+    {
+      "epoch": 0.2314499659632403,
+      "grad_norm": 0.05143499746918678,
+      "learning_rate": 0.000193888,
+      "loss": 0.0384,
+      "step": 1530
+    },
+    {
+      "epoch": 0.23296271083881703,
+      "grad_norm": 0.05464276298880577,
+      "learning_rate": 0.00019384800000000002,
+      "loss": 0.0479,
+      "step": 1540
+    },
+    {
+      "epoch": 0.23447545571439377,
+      "grad_norm": 0.0698809027671814,
+      "learning_rate": 0.000193808,
+      "loss": 0.0493,
+      "step": 1550
+    },
+    {
+      "epoch": 0.2359882005899705,
+      "grad_norm": 0.059237249195575714,
+      "learning_rate": 0.000193768,
+      "loss": 0.0493,
+      "step": 1560
+    },
+    {
+      "epoch": 0.23750094546554723,
+      "grad_norm": 0.08654357492923737,
+      "learning_rate": 0.000193728,
+      "loss": 0.0481,
+      "step": 1570
+    },
+    {
+      "epoch": 0.23901369034112396,
+      "grad_norm": 0.19063305854797363,
+      "learning_rate": 0.000193688,
+      "loss": 0.051,
+      "step": 1580
+    },
+    {
+      "epoch": 0.2405264352167007,
+      "grad_norm": 0.08095410466194153,
+      "learning_rate": 0.000193648,
+      "loss": 0.0447,
+      "step": 1590
+    },
+    {
+      "epoch": 0.24203918009227743,
+      "grad_norm": 0.056007932871580124,
+      "learning_rate": 0.000193608,
+      "loss": 0.0431,
+      "step": 1600
+    },
+    {
+      "epoch": 0.24203918009227743,
+      "eval_cer": 0.1667197881072213,
+      "eval_loss": 0.04373455420136452,
+      "eval_runtime": 10595.1515,
+      "eval_samples_per_second": 1.987,
+      "eval_steps_per_second": 0.248,
+      "step": 1600
+    },
+    {
+      "epoch": 0.24355192496785416,
+      "grad_norm": 0.06981740891933441,
+      "learning_rate": 0.00019356800000000003,
+      "loss": 0.0442,
+      "step": 1610
+    },
+    {
+      "epoch": 0.2450646698434309,
+      "grad_norm": 0.10189545899629593,
+      "learning_rate": 0.000193528,
+      "loss": 0.0477,
+      "step": 1620
+    },
+    {
+      "epoch": 0.24657741471900763,
+      "grad_norm": 0.06565351039171219,
+      "learning_rate": 0.00019348800000000002,
+      "loss": 0.0532,
+      "step": 1630
+    },
+    {
+      "epoch": 0.2480901595945844,
+      "grad_norm": 0.06872796267271042,
+      "learning_rate": 0.000193448,
+      "loss": 0.0472,
+      "step": 1640
+    },
+    {
+      "epoch": 0.24960290447016112,
+      "grad_norm": 0.06040889397263527,
+      "learning_rate": 0.000193408,
+      "loss": 0.0463,
+      "step": 1650
+    },
+    {
+      "epoch": 0.2511156493457378,
+      "grad_norm": 0.08789139986038208,
+      "learning_rate": 0.00019336800000000002,
+      "loss": 0.0495,
+      "step": 1660
+    },
+    {
+      "epoch": 0.25262839422131456,
+      "grad_norm": 0.0869157686829567,
+      "learning_rate": 0.00019332800000000002,
+      "loss": 0.0491,
+      "step": 1670
+    },
+    {
+      "epoch": 0.2541411390968913,
+      "grad_norm": 0.06886725127696991,
+      "learning_rate": 0.000193288,
+      "loss": 0.0508,
+      "step": 1680
+    },
+    {
+      "epoch": 0.255653883972468,
+      "grad_norm": 0.06138046458363533,
+      "learning_rate": 0.000193248,
+      "loss": 0.0435,
+      "step": 1690
+    },
+    {
+      "epoch": 0.25716662884804475,
+      "grad_norm": 0.05554139241576195,
+      "learning_rate": 0.00019320800000000002,
+      "loss": 0.0483,
+      "step": 1700
+    },
+    {
+      "epoch": 0.2586793737236215,
+      "grad_norm": 0.06712419539690018,
+      "learning_rate": 0.00019316800000000002,
+      "loss": 0.0545,
+      "step": 1710
+    },
+    {
+      "epoch": 0.2601921185991982,
+      "grad_norm": 0.07289120554924011,
+      "learning_rate": 0.000193128,
+      "loss": 0.0481,
+      "step": 1720
+    },
+    {
+      "epoch": 0.261704863474775,
+      "grad_norm": 0.07003842294216156,
+      "learning_rate": 0.000193088,
+      "loss": 0.0493,
+      "step": 1730
+    },
+    {
+      "epoch": 0.26321760835035174,
+      "grad_norm": 0.06333723664283752,
+      "learning_rate": 0.000193048,
+      "loss": 0.0536,
+      "step": 1740
+    },
+    {
+      "epoch": 0.26473035322592847,
+      "grad_norm": 0.0609460324048996,
+      "learning_rate": 0.00019300800000000002,
+      "loss": 0.0516,
+      "step": 1750
+    },
+    {
+      "epoch": 0.2662430981015052,
+      "grad_norm": 0.14176234602928162,
+      "learning_rate": 0.000192968,
+      "loss": 0.0522,
+      "step": 1760
+    },
+    {
+      "epoch": 0.26775584297708194,
+      "grad_norm": 0.09526730328798294,
+      "learning_rate": 0.000192928,
+      "loss": 0.0468,
+      "step": 1770
+    },
+    {
+      "epoch": 0.26926858785265867,
+      "grad_norm": 0.05794398859143257,
+      "learning_rate": 0.000192888,
+      "loss": 0.051,
+      "step": 1780
+    },
+    {
+      "epoch": 0.2707813327282354,
+      "grad_norm": 0.07408788055181503,
+      "learning_rate": 0.000192848,
+      "loss": 0.0482,
+      "step": 1790
+    },
+    {
+      "epoch": 0.27229407760381213,
+      "grad_norm": 0.07873456180095673,
+      "learning_rate": 0.00019280800000000001,
+      "loss": 0.0576,
+      "step": 1800
+    },
+    {
+      "epoch": 0.27229407760381213,
+      "eval_cer": 0.28151275038111545,
+      "eval_loss": 0.042666129767894745,
+      "eval_runtime": 10460.0372,
+      "eval_samples_per_second": 2.013,
+      "eval_steps_per_second": 0.252,
+      "step": 1800
+    },
+    {
+      "epoch": 0.27380682247938887,
+      "grad_norm": 0.06786733120679855,
+      "learning_rate": 0.000192768,
+      "loss": 0.0505,
+      "step": 1810
+    },
+    {
+      "epoch": 0.2753195673549656,
+      "grad_norm": 0.090096116065979,
+      "learning_rate": 0.00019272800000000003,
+      "loss": 0.0458,
+      "step": 1820
+    },
+    {
+      "epoch": 0.27683231223054233,
+      "grad_norm": 0.058033574372529984,
+      "learning_rate": 0.000192688,
+      "loss": 0.0415,
+      "step": 1830
+    },
+    {
+      "epoch": 0.27834505710611906,
+      "grad_norm": 0.09522871673107147,
+      "learning_rate": 0.00019264800000000002,
+      "loss": 0.0456,
+      "step": 1840
+    },
+    {
+      "epoch": 0.2798578019816958,
+      "grad_norm": 0.06533698737621307,
+      "learning_rate": 0.000192608,
+      "loss": 0.045,
+      "step": 1850
+    },
+    {
+      "epoch": 0.28137054685727253,
+      "grad_norm": 0.07162319868803024,
+      "learning_rate": 0.000192568,
+      "loss": 0.0511,
+      "step": 1860
+    },
+    {
+      "epoch": 0.28288329173284926,
+      "grad_norm": 0.06015852093696594,
+      "learning_rate": 0.00019252800000000002,
+      "loss": 0.0453,
+      "step": 1870
+    },
+    {
+      "epoch": 0.284396036608426,
+      "grad_norm": 0.0789792612195015,
+      "learning_rate": 0.000192488,
+      "loss": 0.0498,
+      "step": 1880
+    },
+    {
+      "epoch": 0.2859087814840027,
+      "grad_norm": 0.05619093030691147,
+      "learning_rate": 0.000192448,
+      "loss": 0.0454,
+      "step": 1890
+    },
+    {
+      "epoch": 0.28742152635957946,
+      "grad_norm": 0.061943668872117996,
+      "learning_rate": 0.000192408,
+      "loss": 0.0496,
+      "step": 1900
+    },
+    {
+      "epoch": 0.2889342712351562,
+      "grad_norm": 0.07192958891391754,
+      "learning_rate": 0.00019236800000000003,
+      "loss": 0.05,
+      "step": 1910
+    },
+    {
+      "epoch": 0.2904470161107329,
+      "grad_norm": 0.07053862512111664,
+      "learning_rate": 0.00019232800000000002,
+      "loss": 0.0504,
+      "step": 1920
+    },
+    {
+      "epoch": 0.29195976098630966,
+      "grad_norm": 0.06491555273532867,
+      "learning_rate": 0.000192288,
+      "loss": 0.0478,
+      "step": 1930
+    },
+    {
+      "epoch": 0.2934725058618864,
+      "grad_norm": 0.06389233469963074,
+      "learning_rate": 0.000192248,
+      "loss": 0.0469,
+      "step": 1940
+    },
+    {
+      "epoch": 0.2949852507374631,
+      "grad_norm": 0.06336333602666855,
+      "learning_rate": 0.000192208,
+      "loss": 0.0472,
+      "step": 1950
+    },
+    {
+      "epoch": 0.29649799561303986,
+      "grad_norm": 0.06351201981306076,
+      "learning_rate": 0.00019216800000000002,
+      "loss": 0.0459,
+      "step": 1960
+    },
+    {
+      "epoch": 0.2980107404886166,
+      "grad_norm": 0.0773550271987915,
+      "learning_rate": 0.00019212800000000001,
+      "loss": 0.0435,
+      "step": 1970
+    },
+    {
+      "epoch": 0.2995234853641933,
+      "grad_norm": 0.07999245822429657,
+      "learning_rate": 0.000192088,
+      "loss": 0.051,
+      "step": 1980
+    },
+    {
+      "epoch": 0.30103623023977005,
+      "grad_norm": 0.05664638802409172,
+      "learning_rate": 0.000192048,
+      "loss": 0.0493,
+      "step": 1990
+    },
+    {
+      "epoch": 0.3025489751153468,
+      "grad_norm": 0.050149012356996536,
+      "learning_rate": 0.00019200800000000002,
+      "loss": 0.0491,
+      "step": 2000
+    },
+    {
+      "epoch": 0.3025489751153468,
+      "eval_cer": 0.10787543886957575,
+      "eval_loss": 0.042158011347055435,
+      "eval_runtime": 10458.1763,
+      "eval_samples_per_second": 2.013,
+      "eval_steps_per_second": 0.252,
+      "step": 2000
+    },
+    {
+      "epoch": 0.3040617199909235,
+      "grad_norm": 0.06383787840604782,
+      "learning_rate": 0.00019196800000000002,
+      "loss": 0.0421,
+      "step": 2010
+    },
+    {
+      "epoch": 0.30557446486650025,
+      "grad_norm": 0.05740641430020332,
+      "learning_rate": 0.000191928,
+      "loss": 0.0499,
+      "step": 2020
+    },
+    {
+      "epoch": 0.307087209742077,
+      "grad_norm": 0.07163075357675552,
+      "learning_rate": 0.000191888,
+      "loss": 0.0431,
+      "step": 2030
+    },
+    {
+      "epoch": 0.3085999546176537,
+      "grad_norm": 0.05976075306534767,
+      "learning_rate": 0.000191848,
+      "loss": 0.0476,
+      "step": 2040
+    },
+    {
+      "epoch": 0.31011269949323045,
+      "grad_norm": 0.0871894434094429,
+      "learning_rate": 0.00019180800000000002,
+      "loss": 0.0449,
+      "step": 2050
+    },
+    {
+      "epoch": 0.3116254443688072,
+      "grad_norm": 0.07474277913570404,
+      "learning_rate": 0.000191768,
+      "loss": 0.0422,
+      "step": 2060
+    },
+    {
+      "epoch": 0.3131381892443839,
+      "grad_norm": 0.05594407767057419,
+      "learning_rate": 0.00019172800000000003,
+      "loss": 0.0479,
+      "step": 2070
+    },
+    {
+      "epoch": 0.31465093411996065,
+      "grad_norm": 0.06565164029598236,
+      "learning_rate": 0.000191688,
+      "loss": 0.0501,
+      "step": 2080
+    },
+    {
+      "epoch": 0.3161636789955374,
+      "grad_norm": 0.07224603742361069,
+      "learning_rate": 0.000191648,
+      "loss": 0.0474,
+      "step": 2090
+    },
+    {
+      "epoch": 0.3176764238711141,
+      "grad_norm": 0.07781083881855011,
+      "learning_rate": 0.000191608,
+      "loss": 0.0401,
+      "step": 2100
+    },
+    {
+      "epoch": 0.31918916874669084,
+      "grad_norm": 0.08147955685853958,
+      "learning_rate": 0.000191568,
+      "loss": 0.0486,
+      "step": 2110
+    },
+    {
+      "epoch": 0.32070191362226763,
+      "grad_norm": 0.05572337657213211,
+      "learning_rate": 0.00019152800000000003,
+      "loss": 0.0488,
+      "step": 2120
+    },
+    {
+      "epoch": 0.32221465849784436,
+      "grad_norm": 0.06601813435554504,
+      "learning_rate": 0.000191488,
+      "loss": 0.0466,
+      "step": 2130
+    },
+    {
+      "epoch": 0.3237274033734211,
+      "grad_norm": 0.057904861867427826,
+      "learning_rate": 0.00019144800000000001,
+      "loss": 0.0479,
+      "step": 2140
+    },
+    {
+      "epoch": 0.32524014824899783,
+      "grad_norm": 0.057231709361076355,
+      "learning_rate": 0.000191408,
+      "loss": 0.0522,
+      "step": 2150
+    },
+    {
+      "epoch": 0.32675289312457456,
+      "grad_norm": 0.08306867629289627,
+      "learning_rate": 0.000191368,
+      "loss": 0.0439,
+      "step": 2160
+    },
+    {
+      "epoch": 0.3282656380001513,
+      "grad_norm": 0.0742512047290802,
+      "learning_rate": 0.00019132800000000002,
+      "loss": 0.0434,
+      "step": 2170
+    },
+    {
+      "epoch": 0.329778382875728,
+      "grad_norm": 0.07260335236787796,
+      "learning_rate": 0.000191288,
+      "loss": 0.0505,
+      "step": 2180
+    },
+    {
+      "epoch": 0.33129112775130476,
+      "grad_norm": 0.07398936152458191,
+      "learning_rate": 0.000191248,
+      "loss": 0.0519,
+      "step": 2190
+    },
+    {
+      "epoch": 0.3328038726268815,
+      "grad_norm": 0.069728784263134,
+      "learning_rate": 0.000191208,
+      "loss": 0.0501,
+      "step": 2200
+    },
+    {
+      "epoch": 0.3328038726268815,
+      "eval_cer": 0.07287520414693144,
+      "eval_loss": 0.041937489062547684,
+      "eval_runtime": 10449.7877,
+      "eval_samples_per_second": 2.015,
+      "eval_steps_per_second": 0.252,
+      "step": 2200
+    },
+    {
+      "epoch": 0.3343166175024582,
+      "grad_norm": 0.07778773456811905,
+      "learning_rate": 0.00019116800000000002,
+      "loss": 0.0485,
+      "step": 2210
+    },
+    {
+      "epoch": 0.33582936237803496,
+      "grad_norm": 0.08489017933607101,
+      "learning_rate": 0.00019112800000000002,
+      "loss": 0.047,
+      "step": 2220
+    },
+    {
+      "epoch": 0.3373421072536117,
+      "grad_norm": 0.0746629610657692,
+      "learning_rate": 0.000191088,
+      "loss": 0.0444,
+      "step": 2230
+    },
+    {
+      "epoch": 0.3388548521291884,
+      "grad_norm": 0.07858649641275406,
+      "learning_rate": 0.000191048,
+      "loss": 0.0537,
+      "step": 2240
+    },
+    {
+      "epoch": 0.34036759700476515,
+      "grad_norm": 0.08357574045658112,
+      "learning_rate": 0.000191008,
+      "loss": 0.054,
+      "step": 2250
+    },
+    {
+      "epoch": 0.3418803418803419,
+      "grad_norm": 0.05976574867963791,
+      "learning_rate": 0.00019096800000000002,
+      "loss": 0.0465,
+      "step": 2260
+    },
+    {
+      "epoch": 0.3433930867559186,
+      "grad_norm": 0.07549616694450378,
+      "learning_rate": 0.000190928,
+      "loss": 0.0479,
+      "step": 2270
+    },
+    {
+      "epoch": 0.34490583163149535,
+      "grad_norm": 0.07128783315420151,
+      "learning_rate": 0.000190888,
+      "loss": 0.0481,
+      "step": 2280
+    },
+    {
+      "epoch": 0.3464185765070721,
+      "grad_norm": 0.05093182995915413,
+      "learning_rate": 0.000190848,
+      "loss": 0.039,
+      "step": 2290
+    },
+    {
+      "epoch": 0.3479313213826488,
+      "grad_norm": 0.07213055342435837,
+      "learning_rate": 0.00019080800000000002,
+      "loss": 0.0486,
+      "step": 2300
+    },
+    {
+      "epoch": 0.34944406625822555,
+      "grad_norm": 0.08296896517276764,
+      "learning_rate": 0.00019076800000000001,
+      "loss": 0.0436,
+      "step": 2310
+    },
+    {
+      "epoch": 0.3509568111338023,
+      "grad_norm": 0.05904708430171013,
+      "learning_rate": 0.000190728,
+      "loss": 0.0457,
+      "step": 2320
+    },
+    {
+      "epoch": 0.352469556009379,
+      "grad_norm": 0.07709085941314697,
+      "learning_rate": 0.000190688,
+      "loss": 0.0456,
+      "step": 2330
+    },
+    {
+      "epoch": 0.35398230088495575,
+      "grad_norm": 0.061139535158872604,
+      "learning_rate": 0.000190648,
+      "loss": 0.0484,
+      "step": 2340
+    },
+    {
+      "epoch": 0.3554950457605325,
+      "grad_norm": 0.11013538390398026,
+      "learning_rate": 0.00019060800000000002,
+      "loss": 0.0463,
+      "step": 2350
+    },
+    {
+      "epoch": 0.3570077906361092,
+      "grad_norm": 0.04920123890042305,
+      "learning_rate": 0.000190568,
+      "loss": 0.0404,
+      "step": 2360
+    },
+    {
+      "epoch": 0.35852053551168594,
+      "grad_norm": 0.05916327238082886,
+      "learning_rate": 0.00019052800000000003,
+      "loss": 0.0506,
+      "step": 2370
+    },
+    {
+      "epoch": 0.3600332803872627,
+      "grad_norm": 0.08169171214103699,
+      "learning_rate": 0.000190488,
+      "loss": 0.0422,
+      "step": 2380
+    },
+    {
+      "epoch": 0.3615460252628394,
+      "grad_norm": 0.07195686548948288,
+      "learning_rate": 0.00019044800000000002,
+      "loss": 0.0476,
+      "step": 2390
+    },
+    {
+      "epoch": 0.36305877013841614,
+      "grad_norm": 0.06132512912154198,
+      "learning_rate": 0.000190408,
+      "loss": 0.0451,
+      "step": 2400
+    },
+    {
+      "epoch": 0.36305877013841614,
+      "eval_cer": 0.22885396051223894,
+      "eval_loss": 0.04164993762969971,
+      "eval_runtime": 10444.7845,
+      "eval_samples_per_second": 2.016,
+      "eval_steps_per_second": 0.252,
+      "step": 2400
+    },
+    {
+      "epoch": 0.3645715150139929,
+      "grad_norm": 0.06889329850673676,
+      "learning_rate": 0.000190368,
+      "loss": 0.0536,
+      "step": 2410
+    },
+    {
+      "epoch": 0.3660842598895696,
+      "grad_norm": 0.06513672322034836,
+      "learning_rate": 0.00019032800000000002,
+      "loss": 0.0472,
+      "step": 2420
+    },
+    {
+      "epoch": 0.36759700476514634,
+      "grad_norm": 0.06588304787874222,
+      "learning_rate": 0.000190288,
+      "loss": 0.046,
+      "step": 2430
+    },
+    {
+      "epoch": 0.3691097496407231,
+      "grad_norm": 0.07162468135356903,
+      "learning_rate": 0.000190248,
+      "loss": 0.0444,
+      "step": 2440
+    },
+    {
+      "epoch": 0.3706224945162998,
+      "grad_norm": 0.05831474810838699,
+      "learning_rate": 0.000190208,
+      "loss": 0.0448,
+      "step": 2450
+    },
+    {
+      "epoch": 0.37213523939187654,
+      "grad_norm": 0.11214031279087067,
+      "learning_rate": 0.000190168,
+      "loss": 0.0491,
+      "step": 2460
+    },
+    {
+      "epoch": 0.37364798426745327,
+      "grad_norm": 0.07672178000211716,
+      "learning_rate": 0.00019012800000000002,
+      "loss": 0.0489,
+      "step": 2470
+    },
+    {
+      "epoch": 0.37516072914303,
+      "grad_norm": 0.07850979268550873,
+      "learning_rate": 0.000190088,
+      "loss": 0.047,
+      "step": 2480
+    },
+    {
+      "epoch": 0.37667347401860674,
+      "grad_norm": 0.0473526194691658,
+      "learning_rate": 0.000190048,
+      "loss": 0.0436,
+      "step": 2490
+    },
+    {
+      "epoch": 0.3781862188941835,
+      "grad_norm": 0.08313214778900146,
+      "learning_rate": 0.000190008,
+      "loss": 0.0457,
+      "step": 2500
+    },
+    {
+      "epoch": 0.37969896376976026,
+      "grad_norm": 0.07851678878068924,
+      "learning_rate": 0.00018996800000000002,
+      "loss": 0.0399,
+      "step": 2510
+    },
+    {
+      "epoch": 0.381211708645337,
+      "grad_norm": 0.06067463755607605,
+      "learning_rate": 0.00018992800000000002,
+      "loss": 0.0406,
+      "step": 2520
+    },
+    {
+      "epoch": 0.3827244535209137,
+      "grad_norm": 0.07291869819164276,
+      "learning_rate": 0.000189888,
+      "loss": 0.0411,
+      "step": 2530
+    },
+    {
+      "epoch": 0.38423719839649045,
+      "grad_norm": 0.05576318874955177,
+      "learning_rate": 0.000189848,
+      "loss": 0.0412,
+      "step": 2540
+    },
+    {
+      "epoch": 0.3857499432720672,
+      "grad_norm": 0.05669853091239929,
+      "learning_rate": 0.000189808,
+      "loss": 0.0462,
+      "step": 2550
+    },
+    {
+      "epoch": 0.3872626881476439,
+      "grad_norm": 0.0653596743941307,
+      "learning_rate": 0.00018976800000000002,
+      "loss": 0.0504,
+      "step": 2560
+    },
+    {
+      "epoch": 0.38877543302322065,
+      "grad_norm": 0.07938168197870255,
+      "learning_rate": 0.000189728,
+      "loss": 0.0423,
+      "step": 2570
+    },
+    {
+      "epoch": 0.3902881778987974,
+      "grad_norm": 0.19600598514080048,
+      "learning_rate": 0.000189688,
+      "loss": 0.0422,
+      "step": 2580
+    },
+    {
+      "epoch": 0.3918009227743741,
+      "grad_norm": 0.08753781765699387,
+      "learning_rate": 0.000189648,
+      "loss": 0.0485,
+      "step": 2590
+    },
+    {
+      "epoch": 0.39331366764995085,
+      "grad_norm": 0.07059615105390549,
+      "learning_rate": 0.00018960800000000002,
+      "loss": 0.0441,
+      "step": 2600
+    },
+    {
+      "epoch": 0.39331366764995085,
+      "eval_cer": 0.12797016798729038,
+      "eval_loss": 0.040877681225538254,
+      "eval_runtime": 10426.9488,
+      "eval_samples_per_second": 2.019,
+      "eval_steps_per_second": 0.252,
+      "step": 2600
+    },
+    {
+      "epoch": 0.3948264125255276,
+      "grad_norm": 0.07426866888999939,
+      "learning_rate": 0.000189568,
+      "loss": 0.0456,
+      "step": 2610
+    },
+    {
+      "epoch": 0.3963391574011043,
+      "grad_norm": 0.05869770795106888,
+      "learning_rate": 0.000189528,
+      "loss": 0.047,
+      "step": 2620
+    },
+    {
+      "epoch": 0.39785190227668105,
+      "grad_norm": 0.09353045374155045,
+      "learning_rate": 0.000189488,
+      "loss": 0.0457,
+      "step": 2630
+    },
+    {
+      "epoch": 0.3993646471522578,
+      "grad_norm": 0.083396315574646,
+      "learning_rate": 0.000189448,
+      "loss": 0.0441,
+      "step": 2640
+    },
+    {
+      "epoch": 0.4008773920278345,
+      "grad_norm": 0.0698527917265892,
+      "learning_rate": 0.000189408,
+      "loss": 0.0469,
+      "step": 2650
+    },
+    {
+      "epoch": 0.40239013690341124,
+      "grad_norm": 0.07554033398628235,
+      "learning_rate": 0.000189368,
+      "loss": 0.0523,
+      "step": 2660
+    },
+    {
+      "epoch": 0.403902881778988,
+      "grad_norm": 0.08026187121868134,
+      "learning_rate": 0.00018932800000000003,
+      "loss": 0.0492,
+      "step": 2670
+    },
+    {
+      "epoch": 0.4054156266545647,
+      "grad_norm": 0.0758117213845253,
+      "learning_rate": 0.000189288,
+      "loss": 0.0471,
+      "step": 2680
+    },
+    {
+      "epoch": 0.40692837153014144,
+      "grad_norm": 0.0716470330953598,
+      "learning_rate": 0.00018924800000000001,
+      "loss": 0.0401,
+      "step": 2690
+    },
+    {
+      "epoch": 0.4084411164057182,
+      "grad_norm": 0.07114976644515991,
+      "learning_rate": 0.000189208,
+      "loss": 0.0483,
+      "step": 2700
+    },
+    {
+      "epoch": 0.4099538612812949,
+      "grad_norm": 0.059242133051157,
+      "learning_rate": 0.000189168,
+      "loss": 0.0416,
+      "step": 2710
+    },
+    {
+      "epoch": 0.41146660615687164,
+      "grad_norm": 0.07214327901601791,
+      "learning_rate": 0.00018912800000000002,
+      "loss": 0.0446,
+      "step": 2720
+    },
+    {
+      "epoch": 0.41297935103244837,
+      "grad_norm": 0.0404672808945179,
+      "learning_rate": 0.000189088,
+      "loss": 0.0445,
+      "step": 2730
+    },
+    {
+      "epoch": 0.4144920959080251,
+      "grad_norm": 0.06663410365581512,
+      "learning_rate": 0.000189048,
+      "loss": 0.0435,
+      "step": 2740
+    },
+    {
+      "epoch": 0.41600484078360184,
+      "grad_norm": 0.0690486952662468,
+      "learning_rate": 0.000189008,
+      "loss": 0.048,
+      "step": 2750
+    },
+    {
+      "epoch": 0.41751758565917857,
+      "grad_norm": 0.07034830003976822,
+      "learning_rate": 0.00018896800000000002,
+      "loss": 0.0423,
+      "step": 2760
+    },
+    {
+      "epoch": 0.4190303305347553,
+      "grad_norm": 0.08420894294977188,
+      "learning_rate": 0.00018892800000000002,
+      "loss": 0.0525,
+      "step": 2770
+    },
+    {
+      "epoch": 0.42054307541033203,
+      "grad_norm": 0.07617480307817459,
+      "learning_rate": 0.000188888,
+      "loss": 0.0492,
+      "step": 2780
+    },
+    {
+      "epoch": 0.42205582028590877,
+      "grad_norm": 0.06841789186000824,
+      "learning_rate": 0.000188848,
+      "loss": 0.0427,
+      "step": 2790
+    },
+    {
+      "epoch": 0.4235685651614855,
+      "grad_norm": 0.07013357430696487,
+      "learning_rate": 0.000188808,
+      "loss": 0.04,
+      "step": 2800
+    },
+    {
+      "epoch": 0.4235685651614855,
+      "eval_cer": 0.26005539454405746,
+      "eval_loss": 0.04089580848813057,
+      "eval_runtime": 10530.3682,
+      "eval_samples_per_second": 1.999,
+      "eval_steps_per_second": 0.25,
+      "step": 2800
+    },
+    {
+      "epoch": 0.42508131003706223,
+      "grad_norm": 0.06432001292705536,
+      "learning_rate": 0.00018876800000000002,
+      "loss": 0.0402,
+      "step": 2810
+    },
+    {
+      "epoch": 0.42659405491263896,
+      "grad_norm": 0.06437406688928604,
+      "learning_rate": 0.000188728,
+      "loss": 0.0397,
+      "step": 2820
+    },
+    {
+      "epoch": 0.4281067997882157,
+      "grad_norm": 0.0579422190785408,
+      "learning_rate": 0.000188688,
+      "loss": 0.0431,
+      "step": 2830
+    },
+    {
+      "epoch": 0.42961954466379243,
+      "grad_norm": 0.0628400593996048,
+      "learning_rate": 0.000188648,
+      "loss": 0.0426,
+      "step": 2840
+    },
+    {
+      "epoch": 0.43113228953936916,
+      "grad_norm": 0.04976367950439453,
+      "learning_rate": 0.000188608,
+      "loss": 0.0448,
+      "step": 2850
+    },
+    {
+      "epoch": 0.4326450344149459,
+      "grad_norm": 0.07479149103164673,
+      "learning_rate": 0.00018856800000000001,
+      "loss": 0.0458,
+      "step": 2860
+    },
+    {
+      "epoch": 0.4341577792905226,
+      "grad_norm": 0.06853318214416504,
+      "learning_rate": 0.000188528,
+      "loss": 0.045,
+      "step": 2870
+    },
+    {
+      "epoch": 0.43567052416609936,
+      "grad_norm": 0.08534535765647888,
+      "learning_rate": 0.00018848800000000003,
+      "loss": 0.044,
+      "step": 2880
+    },
+    {
+      "epoch": 0.43718326904167615,
+      "grad_norm": 0.05148012563586235,
+      "learning_rate": 0.000188448,
+      "loss": 0.0448,
+      "step": 2890
+    },
+    {
+      "epoch": 0.4386960139172529,
+      "grad_norm": 0.073714479804039,
+      "learning_rate": 0.00018840800000000002,
+      "loss": 0.0388,
+      "step": 2900
+    },
+    {
+      "epoch": 0.4402087587928296,
+      "grad_norm": 0.06875050067901611,
+      "learning_rate": 0.000188368,
+      "loss": 0.0476,
+      "step": 2910
+    },
+    {
+      "epoch": 0.44172150366840635,
+      "grad_norm": 0.07048488408327103,
+      "learning_rate": 0.000188328,
+      "loss": 0.0537,
+      "step": 2920
+    },
+    {
+      "epoch": 0.4432342485439831,
+      "grad_norm": 0.06159156188368797,
+      "learning_rate": 0.00018828800000000002,
+      "loss": 0.0523,
+      "step": 2930
+    },
+    {
+      "epoch": 0.4447469934195598,
+      "grad_norm": 0.0851297378540039,
+      "learning_rate": 0.000188248,
+      "loss": 0.0466,
+      "step": 2940
+    },
+    {
+      "epoch": 0.44625973829513654,
+      "grad_norm": 0.07920840382575989,
+      "learning_rate": 0.000188208,
+      "loss": 0.0434,
+      "step": 2950
+    },
+    {
+      "epoch": 0.4477724831707133,
+      "grad_norm": 0.06767392158508301,
+      "learning_rate": 0.000188168,
+      "loss": 0.0446,
+      "step": 2960
+    },
+    {
+      "epoch": 0.44928522804629,
+      "grad_norm": 0.0621979758143425,
+      "learning_rate": 0.00018812800000000003,
+      "loss": 0.0514,
+      "step": 2970
+    },
+    {
+      "epoch": 0.45079797292186674,
+      "grad_norm": 0.06485885381698608,
+      "learning_rate": 0.00018808800000000002,
+      "loss": 0.0403,
+      "step": 2980
+    },
+    {
+      "epoch": 0.4523107177974435,
+      "grad_norm": 0.07618974149227142,
+      "learning_rate": 0.000188048,
+      "loss": 0.046,
+      "step": 2990
+    },
+    {
+      "epoch": 0.4538234626730202,
+      "grad_norm": 0.050627488642930984,
+      "learning_rate": 0.000188008,
+      "loss": 0.04,
+      "step": 3000
+    },
+    {
+      "epoch": 0.4538234626730202,
+      "eval_cer": 0.027385337988253985,
+      "eval_loss": 0.0410909466445446,
+      "eval_runtime": 11737.0194,
+      "eval_samples_per_second": 1.794,
+      "eval_steps_per_second": 0.224,
+      "step": 3000
+    },
+    {
+      "epoch": 0.45533620754859694,
+      "grad_norm": 0.07569224387407303,
+      "learning_rate": 0.000187968,
+      "loss": 0.0453,
+      "step": 3010
+    },
+    {
+      "epoch": 0.45684895242417367,
+      "grad_norm": 0.06267885118722916,
+      "learning_rate": 0.00018792800000000002,
+      "loss": 0.0519,
+      "step": 3020
+    },
+    {
+      "epoch": 0.4583616972997504,
+      "grad_norm": 0.0801217257976532,
+      "learning_rate": 0.00018788800000000001,
+      "loss": 0.0452,
+      "step": 3030
+    },
+    {
+      "epoch": 0.45987444217532714,
+      "grad_norm": 0.06966337561607361,
+      "learning_rate": 0.000187848,
+      "loss": 0.0459,
+      "step": 3040
+    },
+    {
+      "epoch": 0.46138718705090387,
+      "grad_norm": 0.05708028003573418,
+      "learning_rate": 0.000187808,
+      "loss": 0.0462,
+      "step": 3050
+    },
+    {
+      "epoch": 0.4628999319264806,
+      "grad_norm": 0.06033516675233841,
+      "learning_rate": 0.00018776800000000002,
+      "loss": 0.0459,
+      "step": 3060
+    },
+    {
+      "epoch": 0.46441267680205733,
+      "grad_norm": 0.06908197700977325,
+      "learning_rate": 0.00018772800000000002,
+      "loss": 0.048,
+      "step": 3070
+    },
+    {
+      "epoch": 0.46592542167763407,
+      "grad_norm": 0.0723978653550148,
+      "learning_rate": 0.000187688,
+      "loss": 0.047,
+      "step": 3080
+    },
+    {
+      "epoch": 0.4674381665532108,
+      "grad_norm": 0.06268727034330368,
+      "learning_rate": 0.000187648,
+      "loss": 0.0387,
+      "step": 3090
+    },
+    {
+      "epoch": 0.46895091142878753,
+      "grad_norm": 0.06796183437108994,
+      "learning_rate": 0.000187608,
+      "loss": 0.0379,
+      "step": 3100
+    },
+    {
+      "epoch": 0.47046365630436426,
+      "grad_norm": 0.08227751404047012,
+      "learning_rate": 0.00018756800000000002,
+      "loss": 0.0497,
+      "step": 3110
+    },
+    {
+      "epoch": 0.471976401179941,
+      "grad_norm": 0.06391087174415588,
+      "learning_rate": 0.000187528,
+      "loss": 0.045,
+      "step": 3120
+    },
+    {
+      "epoch": 0.47348914605551773,
+      "grad_norm": 0.09645809978246689,
+      "learning_rate": 0.00018748800000000003,
+      "loss": 0.0479,
+      "step": 3130
+    },
+    {
+      "epoch": 0.47500189093109446,
+      "grad_norm": 0.07187838107347488,
+      "learning_rate": 0.000187448,
+      "loss": 0.0438,
+      "step": 3140
+    },
+    {
+      "epoch": 0.4765146358066712,
+      "grad_norm": 0.06578271836042404,
+      "learning_rate": 0.00018740800000000002,
+      "loss": 0.0471,
+      "step": 3150
+    },
+    {
+      "epoch": 0.4780273806822479,
+      "grad_norm": 0.06598031520843506,
+      "learning_rate": 0.000187368,
+      "loss": 0.0463,
+      "step": 3160
+    },
+    {
+      "epoch": 0.47954012555782466,
+      "grad_norm": 0.06380560249090195,
+      "learning_rate": 0.000187328,
+      "loss": 0.0439,
+      "step": 3170
+    },
+    {
+      "epoch": 0.4810528704334014,
+      "grad_norm": 0.05300907790660858,
+      "learning_rate": 0.00018728800000000003,
+      "loss": 0.0385,
+      "step": 3180
+    },
+    {
+      "epoch": 0.4825656153089781,
+      "grad_norm": 0.08515879511833191,
+      "learning_rate": 0.000187248,
+      "loss": 0.0444,
+      "step": 3190
+    },
+    {
+      "epoch": 0.48407836018455486,
+      "grad_norm": 0.0779171735048294,
+      "learning_rate": 0.00018720800000000001,
+      "loss": 0.0453,
+      "step": 3200
+    },
+    {
+      "epoch": 0.48407836018455486,
+      "eval_cer": 0.010036246117811001,
+      "eval_loss": 0.04116720333695412,
+      "eval_runtime": 10575.268,
+      "eval_samples_per_second": 1.991,
+      "eval_steps_per_second": 0.249,
+      "step": 3200
+    },
+    {
+      "epoch": 0.4855911050601316,
+      "grad_norm": 0.07719563692808151,
+      "learning_rate": 0.000187168,
+      "loss": 0.0516,
+      "step": 3210
+    },
+    {
+      "epoch": 0.4871038499357083,
+      "grad_norm": 0.0623527429997921,
+      "learning_rate": 0.000187128,
+      "loss": 0.0412,
+      "step": 3220
+    },
+    {
+      "epoch": 0.48861659481128505,
+      "grad_norm": 0.05286158621311188,
+      "learning_rate": 0.00018708800000000002,
+      "loss": 0.0433,
+      "step": 3230
+    },
+    {
+      "epoch": 0.4901293396868618,
+      "grad_norm": 0.05317120626568794,
+      "learning_rate": 0.000187048,
+      "loss": 0.0451,
+      "step": 3240
+    },
+    {
+      "epoch": 0.4916420845624385,
+      "grad_norm": 0.06447257846593857,
+      "learning_rate": 0.000187008,
+      "loss": 0.0552,
+      "step": 3250
+    },
+    {
+      "epoch": 0.49315482943801525,
+      "grad_norm": 0.05432993173599243,
+      "learning_rate": 0.000186968,
+      "loss": 0.0454,
+      "step": 3260
+    },
+    {
+      "epoch": 0.49466757431359204,
+      "grad_norm": 0.07853369414806366,
+      "learning_rate": 0.00018692800000000002,
+      "loss": 0.0513,
+      "step": 3270
+    },
+    {
+      "epoch": 0.4961803191891688,
+      "grad_norm": 0.07532196491956711,
+      "learning_rate": 0.00018688800000000002,
+      "loss": 0.0494,
+      "step": 3280
+    },
+    {
+      "epoch": 0.4976930640647455,
+      "grad_norm": 0.0591423436999321,
+      "learning_rate": 0.000186848,
+      "loss": 0.0406,
+      "step": 3290
+    },
+    {
+      "epoch": 0.49920580894032224,
+      "grad_norm": 0.05588558688759804,
+      "learning_rate": 0.000186808,
+      "loss": 0.0454,
+      "step": 3300
+    },
+    {
+      "epoch": 0.5007185538158989,
+      "grad_norm": 0.06208329647779465,
+      "learning_rate": 0.000186768,
+      "loss": 0.0379,
+      "step": 3310
+    },
+    {
+      "epoch": 0.5022312986914756,
+      "grad_norm": 0.09954684972763062,
+      "learning_rate": 0.00018672800000000002,
+      "loss": 0.0441,
+      "step": 3320
+    },
+    {
+      "epoch": 0.5037440435670524,
+      "grad_norm": 0.06522241979837418,
+      "learning_rate": 0.000186688,
+      "loss": 0.0435,
+      "step": 3330
+    },
+    {
+      "epoch": 0.5052567884426291,
+      "grad_norm": 0.06771814823150635,
+      "learning_rate": 0.000186648,
+      "loss": 0.0407,
+      "step": 3340
+    },
+    {
+      "epoch": 0.5067695333182058,
+      "grad_norm": 0.09186646342277527,
+      "learning_rate": 0.000186608,
+      "loss": 0.0468,
+      "step": 3350
+    },
+    {
+      "epoch": 0.5082822781937826,
+      "grad_norm": 0.05741488188505173,
+      "learning_rate": 0.00018656800000000002,
+      "loss": 0.0427,
+      "step": 3360
+    },
+    {
+      "epoch": 0.5097950230693593,
+      "grad_norm": 0.078957200050354,
+      "learning_rate": 0.00018652800000000001,
+      "loss": 0.0524,
+      "step": 3370
+    },
+    {
+      "epoch": 0.511307767944936,
+      "grad_norm": 0.06480754166841507,
+      "learning_rate": 0.000186488,
+      "loss": 0.0491,
+      "step": 3380
+    },
+    {
+      "epoch": 0.5128205128205128,
+      "grad_norm": 0.07016266882419586,
+      "learning_rate": 0.000186448,
+      "loss": 0.0455,
+      "step": 3390
+    },
+    {
+      "epoch": 0.5143332576960895,
+      "grad_norm": 0.09549427777528763,
+      "learning_rate": 0.000186408,
+      "loss": 0.0435,
+      "step": 3400
+    },
+    {
+      "epoch": 0.5143332576960895,
+      "eval_cer": 0.06014582453123417,
+      "eval_loss": 0.040756821632385254,
+      "eval_runtime": 10458.365,
+      "eval_samples_per_second": 2.013,
+      "eval_steps_per_second": 0.252,
+      "step": 3400
+    },
+    {
+      "epoch": 0.5158460025716662,
+      "grad_norm": 0.06771855056285858,
+      "learning_rate": 0.00018636800000000002,
+      "loss": 0.0496,
+      "step": 3410
+    },
+    {
+      "epoch": 0.517358747447243,
+      "grad_norm": 0.051270436495542526,
+      "learning_rate": 0.000186328,
+      "loss": 0.0376,
+      "step": 3420
+    },
+    {
+      "epoch": 0.5188714923228197,
+      "grad_norm": 0.05424557998776436,
+      "learning_rate": 0.00018628800000000003,
+      "loss": 0.0455,
+      "step": 3430
+    },
+    {
+      "epoch": 0.5203842371983964,
+      "grad_norm": 0.07000952959060669,
+      "learning_rate": 0.000186248,
+      "loss": 0.0494,
+      "step": 3440
+    },
+    {
+      "epoch": 0.5218969820739732,
+      "grad_norm": 0.06696450710296631,
+      "learning_rate": 0.00018620800000000002,
+      "loss": 0.0449,
+      "step": 3450
+    },
+    {
+      "epoch": 0.52340972694955,
+      "grad_norm": 0.07243742048740387,
+      "learning_rate": 0.000186168,
+      "loss": 0.0481,
+      "step": 3460
+    },
+    {
+      "epoch": 0.5249224718251267,
+      "grad_norm": 0.07457748800516129,
+      "learning_rate": 0.000186128,
+      "loss": 0.0413,
+      "step": 3470
+    },
+    {
+      "epoch": 0.5264352167007035,
+      "grad_norm": 0.05373325198888779,
+      "learning_rate": 0.00018608800000000002,
+      "loss": 0.046,
+      "step": 3480
+    },
+    {
+      "epoch": 0.5279479615762802,
+      "grad_norm": 0.07769589871168137,
+      "learning_rate": 0.000186048,
+      "loss": 0.0443,
+      "step": 3490
+    },
+    {
+      "epoch": 0.5294607064518569,
+      "grad_norm": 0.05949350818991661,
+      "learning_rate": 0.000186008,
+      "loss": 0.0426,
+      "step": 3500
+    },
+    {
+      "epoch": 0.5309734513274337,
+      "grad_norm": 0.08557622879743576,
+      "learning_rate": 0.000185968,
+      "loss": 0.0436,
+      "step": 3510
+    },
+    {
+      "epoch": 0.5324861962030104,
+      "grad_norm": 0.07504332065582275,
+      "learning_rate": 0.00018592800000000003,
+      "loss": 0.045,
+      "step": 3520
+    },
+    {
+      "epoch": 0.5339989410785871,
+      "grad_norm": 0.08510497957468033,
+      "learning_rate": 0.00018588800000000002,
+      "loss": 0.0451,
+      "step": 3530
+    },
+    {
+      "epoch": 0.5355116859541639,
+      "grad_norm": 0.06645802408456802,
+      "learning_rate": 0.000185848,
+      "loss": 0.0459,
+      "step": 3540
+    },
+    {
+      "epoch": 0.5370244308297406,
+      "grad_norm": 0.05905970185995102,
+      "learning_rate": 0.000185808,
+      "loss": 0.0431,
+      "step": 3550
+    },
+    {
+      "epoch": 0.5385371757053173,
+      "grad_norm": 0.059341125190258026,
+      "learning_rate": 0.000185768,
+      "loss": 0.0521,
+      "step": 3560
+    },
+    {
+      "epoch": 0.5400499205808941,
+      "grad_norm": 0.07676515728235245,
+      "learning_rate": 0.00018572800000000002,
+      "loss": 0.0446,
+      "step": 3570
+    },
+    {
+      "epoch": 0.5415626654564708,
+      "grad_norm": 0.05860384181141853,
+      "learning_rate": 0.00018568800000000002,
+      "loss": 0.041,
+      "step": 3580
+    },
+    {
+      "epoch": 0.5430754103320475,
+      "grad_norm": 0.07133147865533829,
+      "learning_rate": 0.000185648,
+      "loss": 0.0479,
+      "step": 3590
+    },
+    {
+      "epoch": 0.5445881552076243,
+      "grad_norm": 0.058478474617004395,
+      "learning_rate": 0.000185608,
+      "loss": 0.0447,
+      "step": 3600
+    },
+    {
+      "epoch": 0.5445881552076243,
+      "eval_cer": 0.16368877753976077,
+      "eval_loss": 0.04047335311770439,
+      "eval_runtime": 10446.0422,
+      "eval_samples_per_second": 2.015,
+      "eval_steps_per_second": 0.252,
+      "step": 3600
+    },
+    {
+      "epoch": 0.546100900083201,
+      "grad_norm": 0.06725309789180756,
+      "learning_rate": 0.000185568,
+      "loss": 0.053,
+      "step": 3610
+    },
+    {
+      "epoch": 0.5476136449587777,
+      "grad_norm": 0.06334862858057022,
+      "learning_rate": 0.00018552800000000002,
+      "loss": 0.0451,
+      "step": 3620
+    },
+    {
+      "epoch": 0.5491263898343545,
+      "grad_norm": 0.12283937633037567,
+      "learning_rate": 0.000185488,
+      "loss": 0.0437,
+      "step": 3630
+    },
+    {
+      "epoch": 0.5506391347099312,
+      "grad_norm": 0.05931037664413452,
+      "learning_rate": 0.000185448,
+      "loss": 0.0431,
+      "step": 3640
+    },
+    {
+      "epoch": 0.5521518795855079,
+      "grad_norm": 0.05501909554004669,
+      "learning_rate": 0.000185408,
+      "loss": 0.0398,
+      "step": 3650
+    },
+    {
+      "epoch": 0.5536646244610847,
+      "grad_norm": 0.06066635251045227,
+      "learning_rate": 0.00018536800000000002,
+      "loss": 0.0497,
+      "step": 3660
+    },
+    {
+      "epoch": 0.5551773693366614,
+      "grad_norm": 0.1352480947971344,
+      "learning_rate": 0.000185328,
+      "loss": 0.0445,
+      "step": 3670
+    },
+    {
+      "epoch": 0.5566901142122381,
+      "grad_norm": 0.08712221682071686,
+      "learning_rate": 0.000185288,
+      "loss": 0.0485,
+      "step": 3680
+    },
+    {
+      "epoch": 0.5582028590878149,
+      "grad_norm": 0.06511665135622025,
+      "learning_rate": 0.000185248,
+      "loss": 0.0464,
+      "step": 3690
+    },
+    {
+      "epoch": 0.5597156039633916,
+      "grad_norm": 0.052760981023311615,
+      "learning_rate": 0.000185208,
+      "loss": 0.0417,
+      "step": 3700
+    },
+    {
+      "epoch": 0.5612283488389683,
+      "grad_norm": 0.05113260820508003,
+      "learning_rate": 0.000185168,
+      "loss": 0.0426,
+      "step": 3710
+    },
+    {
+      "epoch": 0.5627410937145451,
+      "grad_norm": 0.06565012037754059,
+      "learning_rate": 0.000185128,
+      "loss": 0.0397,
+      "step": 3720
+    },
+    {
+      "epoch": 0.5642538385901218,
+      "grad_norm": 0.0608823299407959,
+      "learning_rate": 0.00018508800000000003,
+      "loss": 0.0411,
+      "step": 3730
+    },
+    {
+      "epoch": 0.5657665834656985,
+      "grad_norm": 0.0670706033706665,
+      "learning_rate": 0.000185048,
+      "loss": 0.0495,
+      "step": 3740
+    },
+    {
+      "epoch": 0.5672793283412753,
+      "grad_norm": 0.07000606507062912,
+      "learning_rate": 0.00018500800000000001,
+      "loss": 0.0457,
+      "step": 3750
+    },
+    {
+      "epoch": 0.568792073216852,
+      "grad_norm": 0.08072007447481155,
+      "learning_rate": 0.000184968,
+      "loss": 0.0484,
+      "step": 3760
+    },
+    {
+      "epoch": 0.5703048180924287,
+      "grad_norm": 0.06795356422662735,
+      "learning_rate": 0.000184928,
+      "loss": 0.0495,
+      "step": 3770
+    },
+    {
+      "epoch": 0.5718175629680055,
+      "grad_norm": 0.3031274974346161,
+      "learning_rate": 0.00018488800000000002,
+      "loss": 0.0504,
+      "step": 3780
+    },
+    {
+      "epoch": 0.5733303078435822,
+      "grad_norm": 0.05166814848780632,
+      "learning_rate": 0.000184848,
+      "loss": 0.0442,
+      "step": 3790
+    },
+    {
+      "epoch": 0.5748430527191589,
+      "grad_norm": 0.08816450089216232,
+      "learning_rate": 0.000184808,
+      "loss": 0.0525,
+      "step": 3800
+    },
+    {
+      "epoch": 0.5748430527191589,
+      "eval_cer": 0.09852050611143642,
+      "eval_loss": 0.041136305779218674,
+      "eval_runtime": 10432.1011,
+      "eval_samples_per_second": 2.018,
+      "eval_steps_per_second": 0.252,
+      "step": 3800
+    },
+    {
+      "epoch": 0.5763557975947357,
+      "grad_norm": 0.06531400233507156,
+      "learning_rate": 0.000184768,
+      "loss": 0.0459,
+      "step": 3810
+    },
+    {
+      "epoch": 0.5778685424703124,
+      "grad_norm": 0.07049426436424255,
+      "learning_rate": 0.00018472800000000002,
+      "loss": 0.0386,
+      "step": 3820
+    },
+    {
+      "epoch": 0.5793812873458891,
+      "grad_norm": 0.07954803854227066,
+      "learning_rate": 0.00018468800000000002,
+      "loss": 0.0451,
+      "step": 3830
+    },
+    {
+      "epoch": 0.5808940322214659,
+      "grad_norm": 0.07543455064296722,
+      "learning_rate": 0.000184648,
+      "loss": 0.0406,
+      "step": 3840
+    },
+    {
+      "epoch": 0.5824067770970426,
+      "grad_norm": 0.08292882144451141,
+      "learning_rate": 0.000184608,
+      "loss": 0.0544,
+      "step": 3850
+    },
+    {
+      "epoch": 0.5839195219726193,
+      "grad_norm": 0.05814971402287483,
+      "learning_rate": 0.000184568,
+      "loss": 0.0441,
+      "step": 3860
+    },
+    {
+      "epoch": 0.585432266848196,
+      "grad_norm": 0.06112606078386307,
+      "learning_rate": 0.00018452800000000002,
+      "loss": 0.0482,
+      "step": 3870
+    },
+    {
+      "epoch": 0.5869450117237728,
+      "grad_norm": 0.08487452566623688,
+      "learning_rate": 0.000184488,
+      "loss": 0.0446,
+      "step": 3880
+    },
+    {
+      "epoch": 0.5884577565993495,
+      "grad_norm": 0.05025780200958252,
+      "learning_rate": 0.000184448,
+      "loss": 0.0453,
+      "step": 3890
+    },
+    {
+      "epoch": 0.5899705014749262,
+      "grad_norm": 0.10276935994625092,
+      "learning_rate": 0.000184408,
+      "loss": 0.0427,
+      "step": 3900
+    },
+    {
+      "epoch": 0.591483246350503,
+      "grad_norm": 0.11926810443401337,
+      "learning_rate": 0.000184368,
+      "loss": 0.0472,
+      "step": 3910
+    },
+    {
+      "epoch": 0.5929959912260797,
+      "grad_norm": 0.08615875244140625,
+      "learning_rate": 0.00018432800000000001,
+      "loss": 0.0504,
+      "step": 3920
+    },
+    {
+      "epoch": 0.5945087361016564,
+      "grad_norm": 0.05418393015861511,
+      "learning_rate": 0.000184288,
+      "loss": 0.0397,
+      "step": 3930
+    },
+    {
+      "epoch": 0.5960214809772332,
+      "grad_norm": 0.06980731338262558,
+      "learning_rate": 0.000184248,
+      "loss": 0.0407,
+      "step": 3940
+    },
+    {
+      "epoch": 0.5975342258528099,
+      "grad_norm": 0.07121722400188446,
+      "learning_rate": 0.000184208,
+      "loss": 0.0441,
+      "step": 3950
+    },
+    {
+      "epoch": 0.5990469707283866,
+      "grad_norm": 0.05750627815723419,
+      "learning_rate": 0.00018416800000000002,
+      "loss": 0.049,
+      "step": 3960
+    },
+    {
+      "epoch": 0.6005597156039634,
+      "grad_norm": 0.08207126706838608,
+      "learning_rate": 0.000184128,
+      "loss": 0.0475,
+      "step": 3970
+    },
+    {
+      "epoch": 0.6020724604795401,
+      "grad_norm": 0.07319646328687668,
+      "learning_rate": 0.000184088,
+      "loss": 0.0517,
+      "step": 3980
+    },
+    {
+      "epoch": 0.6035852053551168,
+      "grad_norm": 0.06762152910232544,
+      "learning_rate": 0.000184048,
+      "loss": 0.042,
+      "step": 3990
+    },
+    {
+      "epoch": 0.6050979502306936,
+      "grad_norm": 0.05603775382041931,
+      "learning_rate": 0.000184008,
+      "loss": 0.0434,
+      "step": 4000
+    },
+    {
+      "epoch": 0.6050979502306936,
+      "eval_cer": 0.2283245991802003,
+      "eval_loss": 0.03986261412501335,
+      "eval_runtime": 10464.7689,
+      "eval_samples_per_second": 2.012,
+      "eval_steps_per_second": 0.252,
+      "step": 4000
+    },
+    {
+      "epoch": 0.6066106951062703,
+      "grad_norm": 0.05094938725233078,
+      "learning_rate": 0.000183968,
+      "loss": 0.0493,
+      "step": 4010
+    },
+    {
+      "epoch": 0.608123439981847,
+      "grad_norm": 0.08996951580047607,
+      "learning_rate": 0.000183928,
+      "loss": 0.0475,
+      "step": 4020
+    },
+    {
+      "epoch": 0.6096361848574238,
+      "grad_norm": 0.07369961589574814,
+      "learning_rate": 0.00018388800000000003,
+      "loss": 0.0441,
+      "step": 4030
+    },
+    {
+      "epoch": 0.6111489297330005,
+      "grad_norm": 0.06135983020067215,
+      "learning_rate": 0.000183848,
+      "loss": 0.0421,
+      "step": 4040
+    },
+    {
+      "epoch": 0.6126616746085772,
+      "grad_norm": 0.04601254314184189,
+      "learning_rate": 0.000183808,
+      "loss": 0.037,
+      "step": 4050
+    },
+    {
+      "epoch": 0.614174419484154,
+      "grad_norm": 0.04949349910020828,
+      "learning_rate": 0.000183768,
+      "loss": 0.0424,
+      "step": 4060
+    },
+    {
+      "epoch": 0.6156871643597307,
+      "grad_norm": 0.08714490383863449,
+      "learning_rate": 0.000183728,
+      "loss": 0.0459,
+      "step": 4070
+    },
+    {
+      "epoch": 0.6171999092353074,
+      "grad_norm": 0.07733121514320374,
+      "learning_rate": 0.00018368800000000002,
+      "loss": 0.0423,
+      "step": 4080
+    },
+    {
+      "epoch": 0.6187126541108842,
+      "grad_norm": 0.070652537047863,
+      "learning_rate": 0.000183648,
+      "loss": 0.0417,
+      "step": 4090
+    },
+    {
+      "epoch": 0.6202253989864609,
+      "grad_norm": 0.08538975566625595,
+      "learning_rate": 0.000183608,
+      "loss": 0.045,
+      "step": 4100
+    },
+    {
+      "epoch": 0.6217381438620376,
+      "grad_norm": 0.07866961508989334,
+      "learning_rate": 0.000183568,
+      "loss": 0.0435,
+      "step": 4110
+    },
+    {
+      "epoch": 0.6232508887376144,
+      "grad_norm": 0.052214980125427246,
+      "learning_rate": 0.00018352800000000002,
+      "loss": 0.0389,
+      "step": 4120
+    },
+    {
+      "epoch": 0.6247636336131911,
+      "grad_norm": 0.07548975199460983,
+      "learning_rate": 0.00018348800000000002,
+      "loss": 0.0406,
+      "step": 4130
+    },
+    {
+      "epoch": 0.6262763784887678,
+      "grad_norm": 0.06064745783805847,
+      "learning_rate": 0.000183448,
+      "loss": 0.0405,
+      "step": 4140
+    },
+    {
+      "epoch": 0.6277891233643446,
+      "grad_norm": 0.06255548447370529,
+      "learning_rate": 0.000183408,
+      "loss": 0.0426,
+      "step": 4150
+    },
+    {
+      "epoch": 0.6293018682399213,
+      "grad_norm": 0.05550558492541313,
+      "learning_rate": 0.000183368,
+      "loss": 0.0432,
+      "step": 4160
+    },
+    {
+      "epoch": 0.630814613115498,
+      "grad_norm": 0.06224781274795532,
+      "learning_rate": 0.00018332800000000002,
+      "loss": 0.0489,
+      "step": 4170
+    },
+    {
+      "epoch": 0.6323273579910748,
+      "grad_norm": 0.04567689448595047,
+      "learning_rate": 0.000183288,
+      "loss": 0.0392,
+      "step": 4180
+    },
+    {
+      "epoch": 0.6338401028666515,
+      "grad_norm": 0.08686509728431702,
+      "learning_rate": 0.00018324800000000003,
+      "loss": 0.0503,
+      "step": 4190
+    },
+    {
+      "epoch": 0.6353528477422282,
+      "grad_norm": 0.039897847920656204,
+      "learning_rate": 0.000183208,
+      "loss": 0.0437,
+      "step": 4200
+    },
+    {
+      "epoch": 0.6353528477422282,
+      "eval_cer": 0.0028697931722888917,
+      "eval_loss": 0.03980256989598274,
+      "eval_runtime": 10439.5254,
+      "eval_samples_per_second": 2.017,
+      "eval_steps_per_second": 0.252,
+      "step": 4200
+    },
+    {
+      "epoch": 0.636865592617805,
+      "grad_norm": 0.07222657650709152,
+      "learning_rate": 0.00018316800000000002,
+      "loss": 0.0445,
+      "step": 4210
+    },
+    {
+      "epoch": 0.6383783374933817,
+      "grad_norm": 0.06796406954526901,
+      "learning_rate": 0.000183128,
+      "loss": 0.0452,
+      "step": 4220
+    },
+    {
+      "epoch": 0.6398910823689585,
+      "grad_norm": 0.07380914688110352,
+      "learning_rate": 0.000183088,
+      "loss": 0.0456,
+      "step": 4230
+    },
+    {
+      "epoch": 0.6414038272445353,
+      "grad_norm": 0.05780802294611931,
+      "learning_rate": 0.00018304800000000003,
+      "loss": 0.043,
+      "step": 4240
+    },
+    {
+      "epoch": 0.642916572120112,
+      "grad_norm": 0.07155787944793701,
+      "learning_rate": 0.000183008,
+      "loss": 0.0422,
+      "step": 4250
+    },
+    {
+      "epoch": 0.6444293169956887,
+      "grad_norm": 0.06419336050748825,
+      "learning_rate": 0.00018296800000000001,
+      "loss": 0.0453,
+      "step": 4260
+    },
+    {
+      "epoch": 0.6459420618712655,
+      "grad_norm": 0.06702402234077454,
+      "learning_rate": 0.000182928,
+      "loss": 0.0416,
+      "step": 4270
+    },
+    {
+      "epoch": 0.6474548067468422,
+      "grad_norm": 0.062247395515441895,
+      "learning_rate": 0.00018288800000000003,
+      "loss": 0.0431,
+      "step": 4280
+    },
+    {
+      "epoch": 0.6489675516224189,
+      "grad_norm": 0.05556045100092888,
+      "learning_rate": 0.00018284800000000002,
+      "loss": 0.0542,
+      "step": 4290
+    },
+    {
+      "epoch": 0.6504802964979957,
+      "grad_norm": 0.07586701959371567,
+      "learning_rate": 0.000182808,
+      "loss": 0.0476,
+      "step": 4300
+    },
+    {
+      "epoch": 0.6519930413735724,
+      "grad_norm": 0.056563302874565125,
+      "learning_rate": 0.000182768,
+      "loss": 0.0441,
+      "step": 4310
+    },
+    {
+      "epoch": 0.6535057862491491,
+      "grad_norm": 0.08210831135511398,
+      "learning_rate": 0.000182728,
+      "loss": 0.0428,
+      "step": 4320
+    },
+    {
+      "epoch": 0.6550185311247259,
+      "grad_norm": 0.06154036149382591,
+      "learning_rate": 0.00018268800000000002,
+      "loss": 0.0437,
+      "step": 4330
+    },
+    {
+      "epoch": 0.6565312760003026,
+      "grad_norm": 0.06387040764093399,
+      "learning_rate": 0.00018264800000000002,
+      "loss": 0.0503,
+      "step": 4340
+    },
+    {
+      "epoch": 0.6580440208758793,
+      "grad_norm": 0.07460694015026093,
+      "learning_rate": 0.000182608,
+      "loss": 0.0388,
+      "step": 4350
+    },
+    {
+      "epoch": 0.659556765751456,
+      "grad_norm": 0.05871427804231644,
+      "learning_rate": 0.000182568,
+      "loss": 0.0409,
+      "step": 4360
+    },
+    {
+      "epoch": 0.6610695106270328,
+      "grad_norm": 0.05525946244597435,
+      "learning_rate": 0.000182528,
+      "loss": 0.0403,
+      "step": 4370
+    },
+    {
+      "epoch": 0.6625822555026095,
+      "grad_norm": 0.07400190085172653,
+      "learning_rate": 0.00018248800000000002,
+      "loss": 0.0544,
+      "step": 4380
+    },
+    {
+      "epoch": 0.6640950003781863,
+      "grad_norm": 0.05236358568072319,
+      "learning_rate": 0.000182448,
+      "loss": 0.0424,
+      "step": 4390
+    },
+    {
+      "epoch": 0.665607745253763,
+      "grad_norm": 0.07223962247371674,
+      "learning_rate": 0.000182408,
+      "loss": 0.0427,
+      "step": 4400
+    },
+    {
+      "epoch": 0.665607745253763,
+      "eval_cer": 0.22895526186399429,
+      "eval_loss": 0.039881668984889984,
+      "eval_runtime": 10486.5948,
+      "eval_samples_per_second": 2.008,
+      "eval_steps_per_second": 0.251,
+      "step": 4400
+    },
+    {
+      "epoch": 0.6671204901293397,
+      "grad_norm": 0.04777299240231514,
+      "learning_rate": 0.000182368,
+      "loss": 0.0365,
+      "step": 4410
+    },
+    {
+      "epoch": 0.6686332350049164,
+      "grad_norm": 0.06789238750934601,
+      "learning_rate": 0.00018232800000000002,
+      "loss": 0.041,
+      "step": 4420
+    },
+    {
+      "epoch": 0.6701459798804932,
+      "grad_norm": 0.07556366920471191,
+      "learning_rate": 0.00018228800000000001,
+      "loss": 0.0454,
+      "step": 4430
+    },
+    {
+      "epoch": 0.6716587247560699,
+      "grad_norm": 0.05699057877063751,
+      "learning_rate": 0.000182248,
+      "loss": 0.0412,
+      "step": 4440
+    },
+    {
+      "epoch": 0.6731714696316466,
+      "grad_norm": 0.06115678697824478,
+      "learning_rate": 0.000182208,
+      "loss": 0.0494,
+      "step": 4450
+    },
+    {
+      "epoch": 0.6746842145072234,
+      "grad_norm": 0.16907750070095062,
+      "learning_rate": 0.000182168,
+      "loss": 0.0457,
+      "step": 4460
+    },
+    {
+      "epoch": 0.6761969593828001,
+      "grad_norm": 0.23710806667804718,
+      "learning_rate": 0.00018212800000000002,
+      "loss": 0.0491,
+      "step": 4470
+    },
+    {
+      "epoch": 0.6777097042583768,
+      "grad_norm": 0.13006287813186646,
+      "learning_rate": 0.000182088,
+      "loss": 0.0528,
+      "step": 4480
+    },
+    {
+      "epoch": 0.6792224491339536,
+      "grad_norm": 0.24661995470523834,
+      "learning_rate": 0.00018204800000000003,
+      "loss": 0.043,
+      "step": 4490
+    },
+    {
+      "epoch": 0.6807351940095303,
+      "grad_norm": 0.2757125198841095,
+      "learning_rate": 0.000182008,
+      "loss": 0.0477,
+      "step": 4500
+    },
+    {
+      "epoch": 0.682247938885107,
+      "grad_norm": 0.27585530281066895,
+      "learning_rate": 0.00018196800000000002,
+      "loss": 0.0486,
+      "step": 4510
+    },
+    {
+      "epoch": 0.6837606837606838,
+      "grad_norm": 0.10548703372478485,
+      "learning_rate": 0.000181928,
+      "loss": 0.0448,
+      "step": 4520
+    },
+    {
+      "epoch": 0.6852734286362605,
+      "grad_norm": 0.1989259272813797,
+      "learning_rate": 0.000181888,
+      "loss": 0.0508,
+      "step": 4530
+    },
+    {
+      "epoch": 0.6867861735118372,
+      "grad_norm": 0.10586623847484589,
+      "learning_rate": 0.00018184800000000002,
+      "loss": 0.0486,
+      "step": 4540
+    },
+    {
+      "epoch": 0.688298918387414,
+      "grad_norm": 0.09687965363264084,
+      "learning_rate": 0.000181808,
+      "loss": 0.0463,
+      "step": 4550
+    },
+    {
+      "epoch": 0.6898116632629907,
+      "grad_norm": 0.13362692296504974,
+      "learning_rate": 0.000181768,
+      "loss": 0.0441,
+      "step": 4560
+    },
+    {
+      "epoch": 0.6913244081385674,
+      "grad_norm": 0.07124081254005432,
+      "learning_rate": 0.000181728,
+      "loss": 0.0479,
+      "step": 4570
+    },
+    {
+      "epoch": 0.6928371530141442,
+      "grad_norm": 0.060886889696121216,
+      "learning_rate": 0.00018168800000000003,
+      "loss": 0.0425,
+      "step": 4580
+    },
+    {
+      "epoch": 0.6943498978897209,
+      "grad_norm": 0.09697773307561874,
+      "learning_rate": 0.00018164800000000002,
+      "loss": 0.0466,
+      "step": 4590
+    },
+    {
+      "epoch": 0.6958626427652976,
+      "grad_norm": 0.09655246883630753,
+      "learning_rate": 0.00018160800000000001,
+      "loss": 0.0423,
+      "step": 4600
+    },
+    {
+      "epoch": 0.6958626427652976,
+      "eval_cer": 0.3264485475609846,
+      "eval_loss": 0.04431215673685074,
+      "eval_runtime": 9966.6677,
+      "eval_samples_per_second": 2.112,
+      "eval_steps_per_second": 0.264,
+      "step": 4600
+    },
+    {
+      "epoch": 0.6973753876408744,
+      "grad_norm": 0.6920335292816162,
+      "learning_rate": 0.000181568,
+      "loss": 0.0612,
+      "step": 4610
+    },
+    {
+      "epoch": 0.6988881325164511,
+      "grad_norm": 21.773630142211914,
+      "learning_rate": 0.00018153600000000002,
+      "loss": 0.3452,
+      "step": 4620
+    },
+    {
+      "epoch": 0.7004008773920278,
+      "grad_norm": 0.6047945022583008,
+      "learning_rate": 0.0001815,
+      "loss": 0.8043,
+      "step": 4630
+    },
+    {
+      "epoch": 0.7019136222676046,
+      "grad_norm": 0.30588680505752563,
+      "learning_rate": 0.00018146000000000001,
+      "loss": 0.094,
+      "step": 4640
+    },
+    {
+      "epoch": 0.7034263671431813,
+      "grad_norm": 2.5436811447143555,
+      "learning_rate": 0.00018142,
+      "loss": 0.1421,
+      "step": 4650
+    },
+    {
+      "epoch": 0.704939112018758,
+      "grad_norm": 3.3921713829040527,
+      "learning_rate": 0.00018138000000000003,
+      "loss": 0.2285,
+      "step": 4660
+    },
+    {
+      "epoch": 0.7064518568943348,
+      "grad_norm": 6.751514434814453,
+      "learning_rate": 0.00018134,
+      "loss": 0.1609,
+      "step": 4670
+    },
+    {
+      "epoch": 0.7079646017699115,
+      "grad_norm": 0.2919982075691223,
+      "learning_rate": 0.00018130000000000002,
+      "loss": 0.0731,
+      "step": 4680
+    },
+    {
+      "epoch": 0.7094773466454882,
+      "grad_norm": 0.2757503092288971,
+      "learning_rate": 0.00018126,
+      "loss": 0.0553,
+      "step": 4690
+    },
+    {
+      "epoch": 0.710990091521065,
+      "grad_norm": 0.12121643126010895,
+      "learning_rate": 0.00018122,
+      "loss": 0.0637,
+      "step": 4700
+    },
+    {
+      "epoch": 0.7125028363966417,
+      "grad_norm": 0.6880851984024048,
+      "learning_rate": 0.00018118000000000002,
+      "loss": 0.0556,
+      "step": 4710
+    },
+    {
+      "epoch": 0.7140155812722184,
+      "grad_norm": 0.17397326231002808,
+      "learning_rate": 0.00018114,
+      "loss": 0.0619,
+      "step": 4720
+    },
+    {
+      "epoch": 0.7155283261477952,
+      "grad_norm": 0.4361652433872223,
+      "learning_rate": 0.0001811,
+      "loss": 0.052,
+      "step": 4730
+    },
+    {
+      "epoch": 0.7170410710233719,
+      "grad_norm": 0.08802498877048492,
+      "learning_rate": 0.00018106,
+      "loss": 0.0531,
+      "step": 4740
+    },
+    {
+      "epoch": 0.7185538158989486,
+      "grad_norm": 0.16508696973323822,
+      "learning_rate": 0.00018102000000000003,
+      "loss": 0.0519,
+      "step": 4750
+    },
+    {
+      "epoch": 0.7200665607745254,
+      "grad_norm": 0.1359723061323166,
+      "learning_rate": 0.00018098000000000002,
+      "loss": 0.0559,
+      "step": 4760
+    },
+    {
+      "epoch": 0.7215793056501021,
+      "grad_norm": 0.12716355919837952,
+      "learning_rate": 0.00018093999999999999,
+      "loss": 0.0478,
+      "step": 4770
+    },
+    {
+      "epoch": 0.7230920505256788,
+      "grad_norm": 0.24563723802566528,
+      "learning_rate": 0.0001809,
+      "loss": 0.0508,
+      "step": 4780
+    },
+    {
+      "epoch": 0.7246047954012556,
+      "grad_norm": 0.15526343882083893,
+      "learning_rate": 0.00018086,
+      "loss": 0.053,
+      "step": 4790
+    },
+    {
+      "epoch": 0.7261175402768323,
+      "grad_norm": 0.39961257576942444,
+      "learning_rate": 0.00018082000000000002,
+      "loss": 0.0543,
+      "step": 4800
+    },
+    {
+      "epoch": 0.7261175402768323,
+      "eval_cer": 0.8969592299120654,
+      "eval_loss": 0.04724743589758873,
+      "eval_runtime": 9508.4862,
+      "eval_samples_per_second": 2.214,
+      "eval_steps_per_second": 0.277,
+      "step": 4800
+    },
+    {
+      "epoch": 0.727630285152409,
+      "grad_norm": 0.11674599349498749,
+      "learning_rate": 0.00018078000000000001,
+      "loss": 0.045,
+      "step": 4810
+    },
+    {
+      "epoch": 0.7291430300279858,
+      "grad_norm": 0.12775878608226776,
+      "learning_rate": 0.00018074,
+      "loss": 0.0507,
+      "step": 4820
+    },
+    {
+      "epoch": 0.7306557749035625,
+      "grad_norm": 0.21720856428146362,
+      "learning_rate": 0.0001807,
+      "loss": 0.0507,
+      "step": 4830
+    },
+    {
+      "epoch": 0.7321685197791392,
+      "grad_norm": 0.09953787177801132,
+      "learning_rate": 0.00018066,
+      "loss": 0.0455,
+      "step": 4840
+    },
+    {
+      "epoch": 0.733681264654716,
+      "grad_norm": 0.1652969866991043,
+      "learning_rate": 0.00018062000000000002,
+      "loss": 0.058,
+      "step": 4850
+    },
+    {
+      "epoch": 0.7351940095302927,
+      "grad_norm": 0.15136420726776123,
+      "learning_rate": 0.00018058,
+      "loss": 0.0403,
+      "step": 4860
+    },
+    {
+      "epoch": 0.7367067544058694,
+      "grad_norm": 0.09294873476028442,
+      "learning_rate": 0.00018054,
+      "loss": 0.0454,
+      "step": 4870
+    },
+    {
+      "epoch": 0.7382194992814461,
+      "grad_norm": 0.06313528120517731,
+      "learning_rate": 0.0001805,
+      "loss": 0.0486,
+      "step": 4880
+    },
+    {
+      "epoch": 0.7397322441570229,
+      "grad_norm": 0.10854914039373398,
+      "learning_rate": 0.00018046000000000002,
+      "loss": 0.0419,
+      "step": 4890
+    },
+    {
+      "epoch": 0.7412449890325996,
+      "grad_norm": 0.08302963525056839,
+      "learning_rate": 0.00018042,
+      "loss": 0.0447,
+      "step": 4900
+    },
+    {
+      "epoch": 0.7427577339081763,
+      "grad_norm": 0.0761631429195404,
+      "learning_rate": 0.00018038,
+      "loss": 0.0446,
+      "step": 4910
+    },
+    {
+      "epoch": 0.7442704787837531,
+      "grad_norm": 0.10130470246076584,
+      "learning_rate": 0.00018034,
+      "loss": 0.045,
+      "step": 4920
+    },
+    {
+      "epoch": 0.7457832236593298,
+      "grad_norm": 0.18436622619628906,
+      "learning_rate": 0.0001803,
+      "loss": 0.0429,
+      "step": 4930
+    },
+    {
+      "epoch": 0.7472959685349065,
+      "grad_norm": 0.08756496757268906,
+      "learning_rate": 0.00018026,
+      "loss": 0.0444,
+      "step": 4940
+    },
+    {
+      "epoch": 0.7488087134104833,
+      "grad_norm": 0.0750514343380928,
+      "learning_rate": 0.00018022,
+      "loss": 0.0507,
+      "step": 4950
+    },
+    {
+      "epoch": 0.75032145828606,
+      "grad_norm": 0.07460404187440872,
+      "learning_rate": 0.00018018000000000003,
+      "loss": 0.0397,
+      "step": 4960
+    },
+    {
+      "epoch": 0.7518342031616367,
+      "grad_norm": 0.12696300446987152,
+      "learning_rate": 0.00018014,
+      "loss": 0.0412,
+      "step": 4970
+    },
+    {
+      "epoch": 0.7533469480372135,
+      "grad_norm": 0.09411120414733887,
+      "learning_rate": 0.00018010000000000001,
+      "loss": 0.0431,
+      "step": 4980
+    },
+    {
+      "epoch": 0.7548596929127902,
+      "grad_norm": 0.08611701428890228,
+      "learning_rate": 0.00018006,
+      "loss": 0.041,
+      "step": 4990
+    },
+    {
+      "epoch": 0.756372437788367,
+      "grad_norm": 0.07411106675863266,
+      "learning_rate": 0.00018002,
+      "loss": 0.0448,
+      "step": 5000
+    },
+    {
+      "epoch": 0.756372437788367,
+      "eval_cer": 0.9283299113242558,
+      "eval_loss": 0.0398402214050293,
+      "eval_runtime": 9972.2961,
+      "eval_samples_per_second": 2.111,
+      "eval_steps_per_second": 0.264,
+      "step": 5000
+    },
+    {
+      "epoch": 0.7578851826639438,
+      "grad_norm": 0.06552145630121231,
+      "learning_rate": 0.00017998000000000002,
+      "loss": 0.0411,
+      "step": 5010
+    },
+    {
+      "epoch": 0.7593979275395205,
+      "grad_norm": 0.14544987678527832,
+      "learning_rate": 0.00017994000000000002,
+      "loss": 0.0401,
+      "step": 5020
+    },
+    {
+      "epoch": 0.7609106724150972,
+      "grad_norm": 0.06693132221698761,
+      "learning_rate": 0.0001799,
+      "loss": 0.045,
+      "step": 5030
+    },
+    {
+      "epoch": 0.762423417290674,
+      "grad_norm": 0.08100226521492004,
+      "learning_rate": 0.00017986,
+      "loss": 0.0478,
+      "step": 5040
+    },
+    {
+      "epoch": 0.7639361621662507,
+      "grad_norm": 0.10020666569471359,
+      "learning_rate": 0.00017982000000000002,
+      "loss": 0.0484,
+      "step": 5050
+    },
+    {
+      "epoch": 0.7654489070418274,
+      "grad_norm": 0.055785536766052246,
+      "learning_rate": 0.00017978000000000002,
+      "loss": 0.0423,
+      "step": 5060
+    },
+    {
+      "epoch": 0.7669616519174042,
+      "grad_norm": 0.08791428059339523,
+      "learning_rate": 0.00017974,
+      "loss": 0.0433,
+      "step": 5070
+    },
+    {
+      "epoch": 0.7684743967929809,
+      "grad_norm": 0.10156507045030594,
+      "learning_rate": 0.0001797,
+      "loss": 0.0447,
+      "step": 5080
+    },
+    {
+      "epoch": 0.7699871416685576,
+      "grad_norm": 0.1160702183842659,
+      "learning_rate": 0.00017966,
+      "loss": 0.0388,
+      "step": 5090
+    },
+    {
+      "epoch": 0.7714998865441344,
+      "grad_norm": 0.08716849237680435,
+      "learning_rate": 0.00017962000000000002,
+      "loss": 0.0492,
+      "step": 5100
+    },
+    {
+      "epoch": 0.7730126314197111,
+      "grad_norm": 0.046968474984169006,
+      "learning_rate": 0.00017958,
+      "loss": 0.0434,
+      "step": 5110
+    },
+    {
+      "epoch": 0.7745253762952878,
+      "grad_norm": 0.06234806030988693,
+      "learning_rate": 0.00017954000000000003,
+      "loss": 0.0504,
+      "step": 5120
+    },
+    {
+      "epoch": 0.7760381211708646,
+      "grad_norm": 0.102174311876297,
+      "learning_rate": 0.0001795,
+      "loss": 0.044,
+      "step": 5130
+    },
+    {
+      "epoch": 0.7775508660464413,
+      "grad_norm": 0.0620570033788681,
+      "learning_rate": 0.00017946,
+      "loss": 0.0386,
+      "step": 5140
+    },
+    {
+      "epoch": 0.779063610922018,
+      "grad_norm": 0.057656314224004745,
+      "learning_rate": 0.00017942,
+      "loss": 0.043,
+      "step": 5150
+    },
+    {
+      "epoch": 0.7805763557975948,
+      "grad_norm": 0.08451346307992935,
+      "learning_rate": 0.00017938,
+      "loss": 0.0452,
+      "step": 5160
+    },
+    {
+      "epoch": 0.7820891006731715,
+      "grad_norm": 0.09557165950536728,
+      "learning_rate": 0.00017934000000000003,
+      "loss": 0.0437,
+      "step": 5170
+    },
+    {
+      "epoch": 0.7836018455487482,
+      "grad_norm": 0.12275496870279312,
+      "learning_rate": 0.0001793,
+      "loss": 0.0427,
+      "step": 5180
+    },
+    {
+      "epoch": 0.785114590424325,
+      "grad_norm": 0.3277435600757599,
+      "learning_rate": 0.00017926000000000002,
+      "loss": 0.045,
+      "step": 5190
+    },
+    {
+      "epoch": 0.7866273352999017,
+      "grad_norm": 0.12806734442710876,
+      "learning_rate": 0.00017922,
+      "loss": 0.0383,
+      "step": 5200
+    },
+    {
+      "epoch": 0.7866273352999017,
+      "eval_cer": 0.8426215554451947,
+      "eval_loss": 0.03898792341351509,
+      "eval_runtime": 10404.4584,
+      "eval_samples_per_second": 2.023,
+      "eval_steps_per_second": 0.253,
+      "step": 5200
+    },
+    {
+      "epoch": 0.7881400801754784,
+      "grad_norm": 0.07969816774129868,
+      "learning_rate": 0.00017918,
+      "loss": 0.0474,
+      "step": 5210
+    },
+    {
+      "epoch": 0.7896528250510552,
+      "grad_norm": 0.20492368936538696,
+      "learning_rate": 0.00017914000000000002,
+      "loss": 0.0423,
+      "step": 5220
+    },
+    {
+      "epoch": 0.7911655699266319,
+      "grad_norm": 0.0960281640291214,
+      "learning_rate": 0.0001791,
+      "loss": 0.0392,
+      "step": 5230
+    },
+    {
+      "epoch": 0.7926783148022086,
+      "grad_norm": 0.16566351056098938,
+      "learning_rate": 0.00017906,
+      "loss": 0.0415,
+      "step": 5240
+    },
+    {
+      "epoch": 0.7941910596777854,
+      "grad_norm": 0.12343327701091766,
+      "learning_rate": 0.00017902,
+      "loss": 0.0439,
+      "step": 5250
+    },
+    {
+      "epoch": 0.7957038045533621,
+      "grad_norm": 0.0732201486825943,
+      "learning_rate": 0.00017898000000000002,
+      "loss": 0.0462,
+      "step": 5260
+    },
+    {
+      "epoch": 0.7972165494289388,
+      "grad_norm": 0.07991164177656174,
+      "learning_rate": 0.00017894000000000002,
+      "loss": 0.0412,
+      "step": 5270
+    },
+    {
+      "epoch": 0.7987292943045156,
+      "grad_norm": 0.07868771255016327,
+      "learning_rate": 0.0001789,
+      "loss": 0.0458,
+      "step": 5280
+    },
+    {
+      "epoch": 0.8002420391800923,
+      "grad_norm": 0.07392987608909607,
+      "learning_rate": 0.00017886,
+      "loss": 0.0489,
+      "step": 5290
+    },
+    {
+      "epoch": 0.801754784055669,
+      "grad_norm": 0.08330372720956802,
+      "learning_rate": 0.00017882,
+      "loss": 0.0448,
+      "step": 5300
+    },
+    {
+      "epoch": 0.8032675289312458,
+      "grad_norm": 0.06118497997522354,
+      "learning_rate": 0.00017878000000000002,
+      "loss": 0.0406,
+      "step": 5310
+    },
+    {
+      "epoch": 0.8047802738068225,
+      "grad_norm": 0.14288772642612457,
+      "learning_rate": 0.00017874,
+      "loss": 0.0439,
+      "step": 5320
+    },
+    {
+      "epoch": 0.8062930186823992,
+      "grad_norm": 0.06868502497673035,
+      "learning_rate": 0.0001787,
+      "loss": 0.0439,
+      "step": 5330
+    },
+    {
+      "epoch": 0.807805763557976,
+      "grad_norm": 0.08165542781352997,
+      "learning_rate": 0.00017866,
+      "loss": 0.0449,
+      "step": 5340
+    },
+    {
+      "epoch": 0.8093185084335527,
+      "grad_norm": 0.08748511224985123,
+      "learning_rate": 0.00017862000000000002,
+      "loss": 0.0455,
+      "step": 5350
+    },
+    {
+      "epoch": 0.8108312533091294,
+      "grad_norm": 0.0799604058265686,
+      "learning_rate": 0.00017858000000000001,
+      "loss": 0.0466,
+      "step": 5360
+    },
+    {
+      "epoch": 0.8123439981847062,
+      "grad_norm": 0.09606848657131195,
+      "learning_rate": 0.00017854,
+      "loss": 0.0452,
+      "step": 5370
+    },
+    {
+      "epoch": 0.8138567430602829,
+      "grad_norm": 0.07232715934515,
+      "learning_rate": 0.0001785,
+      "loss": 0.0426,
+      "step": 5380
+    },
+    {
+      "epoch": 0.8153694879358596,
+      "grad_norm": 0.07278240472078323,
+      "learning_rate": 0.00017846,
+      "loss": 0.0468,
+      "step": 5390
+    },
+    {
+      "epoch": 0.8168822328114363,
+      "grad_norm": 0.06568820029497147,
+      "learning_rate": 0.00017842000000000002,
+      "loss": 0.0407,
+      "step": 5400
+    },
+    {
+      "epoch": 0.8168822328114363,
+      "eval_cer": 0.9304918304165957,
+      "eval_loss": 0.039248276501894,
+      "eval_runtime": 10433.9841,
+      "eval_samples_per_second": 2.018,
+      "eval_steps_per_second": 0.252,
+      "step": 5400
+    },
+    {
+      "epoch": 0.8183949776870131,
+      "grad_norm": 0.08667409420013428,
+      "learning_rate": 0.00017838,
+      "loss": 0.0504,
+      "step": 5410
+    },
+    {
+      "epoch": 0.8199077225625898,
+      "grad_norm": 0.0701778307557106,
+      "learning_rate": 0.00017834000000000003,
+      "loss": 0.0425,
+      "step": 5420
+    },
+    {
+      "epoch": 0.8214204674381665,
+      "grad_norm": 0.07078663259744644,
+      "learning_rate": 0.0001783,
+      "loss": 0.0456,
+      "step": 5430
+    },
+    {
+      "epoch": 0.8229332123137433,
+      "grad_norm": 0.08540530502796173,
+      "learning_rate": 0.00017826000000000002,
+      "loss": 0.0437,
+      "step": 5440
+    },
+    {
+      "epoch": 0.82444595718932,
+      "grad_norm": 0.044258490204811096,
+      "learning_rate": 0.00017822,
+      "loss": 0.0373,
+      "step": 5450
+    },
+    {
+      "epoch": 0.8259587020648967,
+      "grad_norm": 0.08837467432022095,
+      "learning_rate": 0.00017818,
+      "loss": 0.0418,
+      "step": 5460
+    },
+    {
+      "epoch": 0.8274714469404735,
+      "grad_norm": 0.06399261206388474,
+      "learning_rate": 0.00017814000000000003,
+      "loss": 0.0461,
+      "step": 5470
+    },
+    {
+      "epoch": 0.8289841918160502,
+      "grad_norm": 0.07160426676273346,
+      "learning_rate": 0.0001781,
+      "loss": 0.0384,
+      "step": 5480
+    },
+    {
+      "epoch": 0.8304969366916269,
+      "grad_norm": 0.06335125118494034,
+      "learning_rate": 0.00017806,
+      "loss": 0.04,
+      "step": 5490
+    },
+    {
+      "epoch": 0.8320096815672037,
+      "grad_norm": 0.10239727795124054,
+      "learning_rate": 0.00017802,
+      "loss": 0.0396,
+      "step": 5500
+    },
+    {
+      "epoch": 0.8335224264427804,
+      "grad_norm": 0.06797724217176437,
+      "learning_rate": 0.00017798,
+      "loss": 0.0406,
+      "step": 5510
+    },
+    {
+      "epoch": 0.8350351713183571,
+      "grad_norm": 0.08448281139135361,
+      "learning_rate": 0.00017794000000000002,
+      "loss": 0.0489,
+      "step": 5520
+    },
+    {
+      "epoch": 0.8365479161939339,
+      "grad_norm": 0.0817868560552597,
+      "learning_rate": 0.0001779,
+      "loss": 0.0437,
+      "step": 5530
+    },
+    {
+      "epoch": 0.8380606610695106,
+      "grad_norm": 0.12232506275177002,
+      "learning_rate": 0.00017786,
+      "loss": 0.0475,
+      "step": 5540
+    },
+    {
+      "epoch": 0.8395734059450873,
+      "grad_norm": 0.0839553102850914,
+      "learning_rate": 0.00017782,
+      "loss": 0.0447,
+      "step": 5550
+    },
+    {
+      "epoch": 0.8410861508206641,
+      "grad_norm": 0.07315023243427277,
+      "learning_rate": 0.00017778000000000002,
+      "loss": 0.0441,
+      "step": 5560
+    },
+    {
+      "epoch": 0.8425988956962408,
+      "grad_norm": 0.07943390309810638,
+      "learning_rate": 0.00017774000000000002,
+      "loss": 0.0457,
+      "step": 5570
+    },
+    {
+      "epoch": 0.8441116405718175,
+      "grad_norm": 0.07185439020395279,
+      "learning_rate": 0.0001777,
+      "loss": 0.0429,
+      "step": 5580
+    },
+    {
+      "epoch": 0.8456243854473943,
+      "grad_norm": 0.06304585933685303,
+      "learning_rate": 0.00017766,
+      "loss": 0.046,
+      "step": 5590
+    },
+    {
+      "epoch": 0.847137130322971,
+      "grad_norm": 0.07005342841148376,
+      "learning_rate": 0.00017762,
+      "loss": 0.0359,
+      "step": 5600
+    },
+    {
+      "epoch": 0.847137130322971,
+      "eval_cer": 0.5003496132017898,
+      "eval_loss": 0.038213107734918594,
+      "eval_runtime": 10454.3437,
+      "eval_samples_per_second": 2.014,
+      "eval_steps_per_second": 0.252,
+      "step": 5600
+    },
+    {
+      "epoch": 0.8486498751985477,
+      "grad_norm": 0.08005109429359436,
+      "learning_rate": 0.00017758000000000002,
+      "loss": 0.0491,
+      "step": 5610
+    },
+    {
+      "epoch": 0.8501626200741245,
+      "grad_norm": 0.07554598152637482,
+      "learning_rate": 0.00017754,
+      "loss": 0.0384,
+      "step": 5620
+    },
+    {
+      "epoch": 0.8516753649497012,
+      "grad_norm": 0.08396964520215988,
+      "learning_rate": 0.0001775,
+      "loss": 0.0439,
+      "step": 5630
+    },
+    {
+      "epoch": 0.8531881098252779,
+      "grad_norm": 0.08719771355390549,
+      "learning_rate": 0.00017746,
+      "loss": 0.0417,
+      "step": 5640
+    },
+    {
+      "epoch": 0.8547008547008547,
+      "grad_norm": 0.09563528001308441,
+      "learning_rate": 0.00017742000000000002,
+      "loss": 0.0456,
+      "step": 5650
+    },
+    {
+      "epoch": 0.8562135995764314,
+      "grad_norm": 0.07019315659999847,
+      "learning_rate": 0.00017738,
+      "loss": 0.0394,
+      "step": 5660
+    },
+    {
+      "epoch": 0.8577263444520081,
+      "grad_norm": 0.06756678968667984,
+      "learning_rate": 0.00017734,
+      "loss": 0.046,
+      "step": 5670
+    },
+    {
+      "epoch": 0.8592390893275849,
+      "grad_norm": 0.06660816073417664,
+      "learning_rate": 0.0001773,
+      "loss": 0.0415,
+      "step": 5680
+    },
+    {
+      "epoch": 0.8607518342031616,
+      "grad_norm": 0.10737419873476028,
+      "learning_rate": 0.00017726,
+      "loss": 0.0402,
+      "step": 5690
+    },
+    {
+      "epoch": 0.8622645790787383,
+      "grad_norm": 0.06818167865276337,
+      "learning_rate": 0.00017722000000000001,
+      "loss": 0.039,
+      "step": 5700
+    },
+    {
+      "epoch": 0.8637773239543151,
+      "grad_norm": 0.05077315866947174,
+      "learning_rate": 0.00017718,
+      "loss": 0.0376,
+      "step": 5710
+    },
+    {
+      "epoch": 0.8652900688298918,
+      "grad_norm": 0.08248795568943024,
+      "learning_rate": 0.00017714000000000003,
+      "loss": 0.0427,
+      "step": 5720
+    },
+    {
+      "epoch": 0.8668028137054685,
+      "grad_norm": 0.06273633241653442,
+      "learning_rate": 0.0001771,
+      "loss": 0.0405,
+      "step": 5730
+    },
+    {
+      "epoch": 0.8683155585810453,
+      "grad_norm": 0.11920665949583054,
+      "learning_rate": 0.00017706000000000002,
+      "loss": 0.0416,
+      "step": 5740
+    },
+    {
+      "epoch": 0.869828303456622,
+      "grad_norm": 0.061835162341594696,
+      "learning_rate": 0.00017702,
+      "loss": 0.0456,
+      "step": 5750
+    },
+    {
+      "epoch": 0.8713410483321987,
+      "grad_norm": 0.06891065835952759,
+      "learning_rate": 0.00017698,
+      "loss": 0.0435,
+      "step": 5760
+    },
+    {
+      "epoch": 0.8728537932077756,
+      "grad_norm": 0.06323794275522232,
+      "learning_rate": 0.00017694000000000002,
+      "loss": 0.0424,
+      "step": 5770
+    },
+    {
+      "epoch": 0.8743665380833523,
+      "grad_norm": 0.08218410611152649,
+      "learning_rate": 0.0001769,
+      "loss": 0.0428,
+      "step": 5780
+    },
+    {
+      "epoch": 0.875879282958929,
+      "grad_norm": 0.05943075567483902,
+      "learning_rate": 0.00017686,
+      "loss": 0.0373,
+      "step": 5790
+    },
+    {
+      "epoch": 0.8773920278345058,
+      "grad_norm": 0.09316141158342361,
+      "learning_rate": 0.00017682,
+      "loss": 0.0436,
+      "step": 5800
+    },
+    {
+      "epoch": 0.8773920278345058,
+      "eval_cer": 0.5988355286077488,
+      "eval_loss": 0.0380551740527153,
+      "eval_runtime": 10439.6932,
+      "eval_samples_per_second": 2.017,
+      "eval_steps_per_second": 0.252,
+      "step": 5800
+    },
+    {
+      "epoch": 0.8789047727100825,
+      "grad_norm": 0.06791754812002182,
+      "learning_rate": 0.00017678000000000003,
+      "loss": 0.0424,
+      "step": 5810
+    },
+    {
+      "epoch": 0.8804175175856592,
+      "grad_norm": 0.06572896242141724,
+      "learning_rate": 0.00017674000000000002,
+      "loss": 0.0446,
+      "step": 5820
+    },
+    {
+      "epoch": 0.881930262461236,
+      "grad_norm": 0.07208286970853806,
+      "learning_rate": 0.00017669999999999999,
+      "loss": 0.0438,
+      "step": 5830
+    },
+    {
+      "epoch": 0.8834430073368127,
+      "grad_norm": 0.08518756181001663,
+      "learning_rate": 0.00017666,
+      "loss": 0.0401,
+      "step": 5840
+    },
+    {
+      "epoch": 0.8849557522123894,
+      "grad_norm": 0.060736026614904404,
+      "learning_rate": 0.00017662,
+      "loss": 0.0393,
+      "step": 5850
+    },
+    {
+      "epoch": 0.8864684970879662,
+      "grad_norm": 0.0627061128616333,
+      "learning_rate": 0.00017658000000000002,
+      "loss": 0.0358,
+      "step": 5860
+    },
+    {
+      "epoch": 0.8879812419635429,
+      "grad_norm": 0.06178157031536102,
+      "learning_rate": 0.00017654000000000001,
+      "loss": 0.0467,
+      "step": 5870
+    },
+    {
+      "epoch": 0.8894939868391196,
+      "grad_norm": 0.0688227042555809,
+      "learning_rate": 0.0001765,
+      "loss": 0.0415,
+      "step": 5880
+    },
+    {
+      "epoch": 0.8910067317146964,
+      "grad_norm": 0.06773985177278519,
+      "learning_rate": 0.00017646,
+      "loss": 0.0354,
+      "step": 5890
+    },
+    {
+      "epoch": 0.8925194765902731,
+      "grad_norm": 0.09130257368087769,
+      "learning_rate": 0.00017642,
+      "loss": 0.0414,
+      "step": 5900
+    },
+    {
+      "epoch": 0.8940322214658498,
+      "grad_norm": 0.06815651059150696,
+      "learning_rate": 0.00017638000000000002,
+      "loss": 0.0495,
+      "step": 5910
+    },
+    {
+      "epoch": 0.8955449663414266,
+      "grad_norm": 0.07239062339067459,
+      "learning_rate": 0.00017634,
+      "loss": 0.0459,
+      "step": 5920
+    },
+    {
+      "epoch": 0.8970577112170033,
+      "grad_norm": 0.08951979130506516,
+      "learning_rate": 0.0001763,
+      "loss": 0.047,
+      "step": 5930
+    },
+    {
+      "epoch": 0.89857045609258,
+      "grad_norm": 0.07267329841852188,
+      "learning_rate": 0.00017626,
+      "loss": 0.0384,
+      "step": 5940
+    },
+    {
+      "epoch": 0.9000832009681567,
+      "grad_norm": 0.06272245943546295,
+      "learning_rate": 0.00017622000000000002,
+      "loss": 0.0373,
+      "step": 5950
+    },
+    {
+      "epoch": 0.9015959458437335,
+      "grad_norm": 0.07484642416238785,
+      "learning_rate": 0.00017618,
+      "loss": 0.0445,
+      "step": 5960
+    },
+    {
+      "epoch": 0.9031086907193102,
+      "grad_norm": 0.06894571334123611,
+      "learning_rate": 0.00017614,
+      "loss": 0.0418,
+      "step": 5970
+    },
+    {
+      "epoch": 0.904621435594887,
+      "grad_norm": 0.07352825254201889,
+      "learning_rate": 0.0001761,
+      "loss": 0.0361,
+      "step": 5980
+    },
+    {
+      "epoch": 0.9061341804704637,
+      "grad_norm": 0.07955580949783325,
+      "learning_rate": 0.00017606,
+      "loss": 0.0418,
+      "step": 5990
+    },
+    {
+      "epoch": 0.9076469253460404,
+      "grad_norm": 0.057830698788166046,
+      "learning_rate": 0.00017602,
+      "loss": 0.0359,
+      "step": 6000
+    },
+    {
+      "epoch": 0.9076469253460404,
+      "eval_cer": 0.5058427407698408,
+      "eval_loss": 0.038296379148960114,
+      "eval_runtime": 10426.1739,
+      "eval_samples_per_second": 2.019,
+      "eval_steps_per_second": 0.252,
+      "step": 6000
+    },
+    {
+      "epoch": 0.9091596702216171,
+      "grad_norm": 0.08560307323932648,
+      "learning_rate": 0.00017598,
+      "loss": 0.0465,
+      "step": 6010
+    },
+    {
+      "epoch": 0.9106724150971939,
+      "grad_norm": 0.06908106803894043,
+      "learning_rate": 0.00017594000000000003,
+      "loss": 0.0469,
+      "step": 6020
+    },
+    {
+      "epoch": 0.9121851599727706,
+      "grad_norm": 0.058405641466379166,
+      "learning_rate": 0.0001759,
+      "loss": 0.0459,
+      "step": 6030
+    },
+    {
+      "epoch": 0.9136979048483473,
+      "grad_norm": 0.06696103513240814,
+      "learning_rate": 0.00017586000000000001,
+      "loss": 0.0389,
+      "step": 6040
+    },
+    {
+      "epoch": 0.9152106497239241,
+      "grad_norm": 0.06927672773599625,
+      "learning_rate": 0.00017582,
+      "loss": 0.0369,
+      "step": 6050
+    },
+    {
+      "epoch": 0.9167233945995008,
+      "grad_norm": 0.11847919970750809,
+      "learning_rate": 0.00017578,
+      "loss": 0.0379,
+      "step": 6060
+    },
+    {
+      "epoch": 0.9182361394750775,
+      "grad_norm": 0.06731213629245758,
+      "learning_rate": 0.00017574000000000002,
+      "loss": 0.0492,
+      "step": 6070
+    },
+    {
+      "epoch": 0.9197488843506543,
+      "grad_norm": 0.06238566339015961,
+      "learning_rate": 0.0001757,
+      "loss": 0.0351,
+      "step": 6080
+    },
+    {
+      "epoch": 0.921261629226231,
+      "grad_norm": 0.07023432850837708,
+      "learning_rate": 0.00017566,
+      "loss": 0.0418,
+      "step": 6090
+    },
+    {
+      "epoch": 0.9227743741018077,
+      "grad_norm": 0.07269687950611115,
+      "learning_rate": 0.00017562,
+      "loss": 0.0473,
+      "step": 6100
+    },
+    {
+      "epoch": 0.9242871189773845,
+      "grad_norm": 0.0714830756187439,
+      "learning_rate": 0.00017558000000000002,
+      "loss": 0.0419,
+      "step": 6110
+    },
+    {
+      "epoch": 0.9257998638529612,
+      "grad_norm": 0.06455916166305542,
+      "learning_rate": 0.00017554000000000002,
+      "loss": 0.0386,
+      "step": 6120
+    },
+    {
+      "epoch": 0.9273126087285379,
+      "grad_norm": 0.0797223374247551,
+      "learning_rate": 0.0001755,
+      "loss": 0.0425,
+      "step": 6130
+    },
+    {
+      "epoch": 0.9288253536041147,
+      "grad_norm": 0.08360251039266586,
+      "learning_rate": 0.00017546,
+      "loss": 0.0414,
+      "step": 6140
+    },
+    {
+      "epoch": 0.9303380984796914,
+      "grad_norm": 0.06491956114768982,
+      "learning_rate": 0.00017542,
+      "loss": 0.0367,
+      "step": 6150
+    },
+    {
+      "epoch": 0.9318508433552681,
+      "grad_norm": 0.06236764043569565,
+      "learning_rate": 0.00017538000000000002,
+      "loss": 0.0514,
+      "step": 6160
+    },
+    {
+      "epoch": 0.9333635882308449,
+      "grad_norm": 0.08555632829666138,
+      "learning_rate": 0.00017534,
+      "loss": 0.041,
+      "step": 6170
+    },
+    {
+      "epoch": 0.9348763331064216,
+      "grad_norm": 0.08949322998523712,
+      "learning_rate": 0.0001753,
+      "loss": 0.0462,
+      "step": 6180
+    },
+    {
+      "epoch": 0.9363890779819983,
+      "grad_norm": 0.07832244038581848,
+      "learning_rate": 0.00017526,
+      "loss": 0.0471,
+      "step": 6190
+    },
+    {
+      "epoch": 0.9379018228575751,
+      "grad_norm": 0.06077546253800392,
+      "learning_rate": 0.00017522000000000002,
+      "loss": 0.0457,
+      "step": 6200
+    },
+    {
+      "epoch": 0.9379018228575751,
+      "eval_cer": 0.3344013213649492,
+      "eval_loss": 0.03830147907137871,
+      "eval_runtime": 10461.8882,
+      "eval_samples_per_second": 2.012,
+      "eval_steps_per_second": 0.252,
+      "step": 6200
+    },
+    {
+      "epoch": 0.9394145677331518,
+      "grad_norm": 0.048287175595760345,
+      "learning_rate": 0.00017518,
+      "loss": 0.0393,
+      "step": 6210
+    },
+    {
+      "epoch": 0.9409273126087285,
+      "grad_norm": 0.08072841167449951,
+      "learning_rate": 0.00017514,
+      "loss": 0.0447,
+      "step": 6220
+    },
+    {
+      "epoch": 0.9424400574843053,
+      "grad_norm": 0.07255307585000992,
+      "learning_rate": 0.0001751,
+      "loss": 0.0492,
+      "step": 6230
+    },
+    {
+      "epoch": 0.943952802359882,
+      "grad_norm": 0.05136171355843544,
+      "learning_rate": 0.00017506,
+      "loss": 0.0438,
+      "step": 6240
+    },
+    {
+      "epoch": 0.9454655472354587,
+      "grad_norm": 0.079404316842556,
+      "learning_rate": 0.00017502000000000001,
+      "loss": 0.0383,
+      "step": 6250
+    },
+    {
+      "epoch": 0.9469782921110355,
+      "grad_norm": 0.10744167119264603,
+      "learning_rate": 0.00017498,
+      "loss": 0.0406,
+      "step": 6260
+    },
+    {
+      "epoch": 0.9484910369866122,
+      "grad_norm": 0.09439695626497269,
+      "learning_rate": 0.00017494,
+      "loss": 0.0448,
+      "step": 6270
+    },
+    {
+      "epoch": 0.9500037818621889,
+      "grad_norm": 0.07746788114309311,
+      "learning_rate": 0.00017490000000000002,
+      "loss": 0.0425,
+      "step": 6280
+    },
+    {
+      "epoch": 0.9515165267377657,
+      "grad_norm": 0.161416694521904,
+      "learning_rate": 0.00017486,
+      "loss": 0.04,
+      "step": 6290
+    },
+    {
+      "epoch": 0.9530292716133424,
+      "grad_norm": 0.05279407650232315,
+      "learning_rate": 0.00017482,
+      "loss": 0.0387,
+      "step": 6300
+    },
+    {
+      "epoch": 0.9545420164889191,
+      "grad_norm": 0.06324402987957001,
+      "learning_rate": 0.00017478,
+      "loss": 0.0425,
+      "step": 6310
+    },
+    {
+      "epoch": 0.9560547613644959,
+      "grad_norm": 0.08716294914484024,
+      "learning_rate": 0.00017474000000000002,
+      "loss": 0.0436,
+      "step": 6320
+    },
+    {
+      "epoch": 0.9575675062400726,
+      "grad_norm": 0.08212625980377197,
+      "learning_rate": 0.00017470000000000002,
+      "loss": 0.0445,
+      "step": 6330
+    },
+    {
+      "epoch": 0.9590802511156493,
+      "grad_norm": 0.08856002241373062,
+      "learning_rate": 0.00017466,
+      "loss": 0.0385,
+      "step": 6340
+    },
+    {
+      "epoch": 0.960592995991226,
+      "grad_norm": 0.08907803148031235,
+      "learning_rate": 0.00017462,
+      "loss": 0.0451,
+      "step": 6350
+    },
+    {
+      "epoch": 0.9621057408668028,
+      "grad_norm": 0.053175825625658035,
+      "learning_rate": 0.00017458,
+      "loss": 0.0428,
+      "step": 6360
+    },
+    {
+      "epoch": 0.9636184857423795,
+      "grad_norm": 0.055600494146347046,
+      "learning_rate": 0.00017454000000000002,
+      "loss": 0.047,
+      "step": 6370
+    },
+    {
+      "epoch": 0.9651312306179562,
+      "grad_norm": 0.10455228388309479,
+      "learning_rate": 0.0001745,
+      "loss": 0.0517,
+      "step": 6380
+    },
+    {
+      "epoch": 0.966643975493533,
+      "grad_norm": 0.11780910938978195,
+      "learning_rate": 0.00017446,
+      "loss": 0.0414,
+      "step": 6390
+    },
+    {
+      "epoch": 0.9681567203691097,
+      "grad_norm": 0.12388743460178375,
+      "learning_rate": 0.00017442,
+      "loss": 0.0438,
+      "step": 6400
+    },
+    {
+      "epoch": 0.9681567203691097,
+      "eval_cer": 0.5869913004375724,
+      "eval_loss": 0.03873522952198982,
+      "eval_runtime": 10437.6142,
+      "eval_samples_per_second": 2.017,
+      "eval_steps_per_second": 0.252,
+      "step": 6400
+    },
+    {
+      "epoch": 0.9696694652446864,
+      "grad_norm": 0.07916050404310226,
+      "learning_rate": 0.00017438000000000002,
+      "loss": 0.0402,
+      "step": 6410
+    },
+    {
+      "epoch": 0.9711822101202632,
+      "grad_norm": 0.05646761879324913,
+      "learning_rate": 0.00017434000000000001,
+      "loss": 0.0425,
+      "step": 6420
+    },
+    {
+      "epoch": 0.9726949549958399,
+      "grad_norm": 0.08374381810426712,
+      "learning_rate": 0.0001743,
+      "loss": 0.041,
+      "step": 6430
+    },
+    {
+      "epoch": 0.9742076998714166,
+      "grad_norm": 0.06789222359657288,
+      "learning_rate": 0.00017426,
+      "loss": 0.0391,
+      "step": 6440
+    },
+    {
+      "epoch": 0.9757204447469934,
+      "grad_norm": 0.0788172036409378,
+      "learning_rate": 0.00017422,
+      "loss": 0.0449,
+      "step": 6450
+    },
+    {
+      "epoch": 0.9772331896225701,
+      "grad_norm": 0.1257173717021942,
+      "learning_rate": 0.00017418000000000002,
+      "loss": 0.0484,
+      "step": 6460
+    },
+    {
+      "epoch": 0.9787459344981468,
+      "grad_norm": 0.05888710170984268,
+      "learning_rate": 0.00017414,
+      "loss": 0.0387,
+      "step": 6470
+    },
+    {
+      "epoch": 0.9802586793737236,
+      "grad_norm": 0.07102910429239273,
+      "learning_rate": 0.00017410000000000003,
+      "loss": 0.0386,
+      "step": 6480
+    },
+    {
+      "epoch": 0.9817714242493003,
+      "grad_norm": 0.058048397302627563,
+      "learning_rate": 0.00017406,
+      "loss": 0.0415,
+      "step": 6490
+    },
+    {
+      "epoch": 0.983284169124877,
+      "grad_norm": 0.07222626358270645,
+      "learning_rate": 0.00017402000000000002,
+      "loss": 0.0378,
+      "step": 6500
+    },
+    {
+      "epoch": 0.9847969140004538,
+      "grad_norm": 0.06445878744125366,
+      "learning_rate": 0.00017398,
+      "loss": 0.0409,
+      "step": 6510
+    },
+    {
+      "epoch": 0.9863096588760305,
+      "grad_norm": 0.09191201627254486,
+      "learning_rate": 0.00017394,
+      "loss": 0.0414,
+      "step": 6520
+    },
+    {
+      "epoch": 0.9878224037516072,
+      "grad_norm": 0.08073204010725021,
+      "learning_rate": 0.00017390000000000003,
+      "loss": 0.0404,
+      "step": 6530
+    },
+    {
+      "epoch": 0.9893351486271841,
+      "grad_norm": 0.08427068591117859,
+      "learning_rate": 0.00017386,
+      "loss": 0.0398,
+      "step": 6540
+    },
+    {
+      "epoch": 0.9908478935027608,
+      "grad_norm": 0.19870494306087494,
+      "learning_rate": 0.00017382,
+      "loss": 0.0388,
+      "step": 6550
+    },
+    {
+      "epoch": 0.9923606383783375,
+      "grad_norm": 0.34985288977622986,
+      "learning_rate": 0.00017378,
+      "loss": 0.051,
+      "step": 6560
+    },
+    {
+      "epoch": 0.9938733832539143,
+      "grad_norm": 0.12121633440256119,
+      "learning_rate": 0.00017374000000000003,
+      "loss": 0.0385,
+      "step": 6570
+    },
+    {
+      "epoch": 0.995386128129491,
+      "grad_norm": 0.140520840883255,
+      "learning_rate": 0.00017370000000000002,
+      "loss": 0.0417,
+      "step": 6580
+    },
+    {
+      "epoch": 0.9968988730050677,
+      "grad_norm": 0.06655796617269516,
+      "learning_rate": 0.00017366,
+      "loss": 0.0394,
+      "step": 6590
+    },
+    {
+      "epoch": 0.9984116178806445,
+      "grad_norm": 0.07498542964458466,
+      "learning_rate": 0.00017362,
+      "loss": 0.0419,
+      "step": 6600
+    },
+    {
+      "epoch": 0.9984116178806445,
+      "eval_cer": 0.25282902555511905,
+      "eval_loss": 0.038411665707826614,
+      "eval_runtime": 10433.3935,
+      "eval_samples_per_second": 2.018,
+      "eval_steps_per_second": 0.252,
+      "step": 6600
+    },
+    {
+      "epoch": 0.9999243627562212,
+      "grad_norm": 0.25646254420280457,
+      "learning_rate": 0.00017358,
+      "loss": 0.039,
+      "step": 6610
+    },
+    {
+      "epoch": 1.0014371076317978,
+      "grad_norm": 0.07744245231151581,
+      "learning_rate": 0.00017354000000000002,
+      "loss": 0.0371,
+      "step": 6620
+    },
+    {
+      "epoch": 1.0029498525073746,
+      "grad_norm": 0.11968632787466049,
+      "learning_rate": 0.00017350000000000002,
+      "loss": 0.0303,
+      "step": 6630
+    },
+    {
+      "epoch": 1.0044625973829513,
+      "grad_norm": 0.07235859334468842,
+      "learning_rate": 0.00017346,
+      "loss": 0.0387,
+      "step": 6640
+    },
+    {
+      "epoch": 1.005975342258528,
+      "grad_norm": 0.12598702311515808,
+      "learning_rate": 0.00017342,
+      "loss": 0.0355,
+      "step": 6650
+    },
+    {
+      "epoch": 1.0074880871341048,
+      "grad_norm": 0.10832694917917252,
+      "learning_rate": 0.00017338,
+      "loss": 0.0297,
+      "step": 6660
+    },
+    {
+      "epoch": 1.0090008320096815,
+      "grad_norm": 0.13988302648067474,
+      "learning_rate": 0.00017334000000000002,
+      "loss": 0.0352,
+      "step": 6670
+    },
+    {
+      "epoch": 1.0105135768852582,
+      "grad_norm": 0.09534142911434174,
+      "learning_rate": 0.0001733,
+      "loss": 0.0308,
+      "step": 6680
+    },
+    {
+      "epoch": 1.012026321760835,
+      "grad_norm": 0.05622931197285652,
+      "learning_rate": 0.00017326,
+      "loss": 0.0311,
+      "step": 6690
+    },
+    {
+      "epoch": 1.0135390666364117,
+      "grad_norm": 0.06480368971824646,
+      "learning_rate": 0.00017322,
+      "loss": 0.033,
+      "step": 6700
+    },
+    {
+      "epoch": 1.0150518115119884,
+      "grad_norm": 0.08531224727630615,
+      "learning_rate": 0.00017318000000000002,
+      "loss": 0.0345,
+      "step": 6710
+    },
+    {
+      "epoch": 1.0165645563875652,
+      "grad_norm": 0.11494185030460358,
+      "learning_rate": 0.00017314,
+      "loss": 0.0292,
+      "step": 6720
+    },
+    {
+      "epoch": 1.0180773012631419,
+      "grad_norm": 0.06993953883647919,
+      "learning_rate": 0.0001731,
+      "loss": 0.0343,
+      "step": 6730
+    },
+    {
+      "epoch": 1.0195900461387186,
+      "grad_norm": 0.09449311345815659,
+      "learning_rate": 0.00017306,
+      "loss": 0.0285,
+      "step": 6740
+    },
+    {
+      "epoch": 1.0211027910142954,
+      "grad_norm": 0.10550418496131897,
+      "learning_rate": 0.00017302,
+      "loss": 0.0337,
+      "step": 6750
+    },
+    {
+      "epoch": 1.022615535889872,
+      "grad_norm": 0.06987041234970093,
+      "learning_rate": 0.00017298000000000001,
+      "loss": 0.0273,
+      "step": 6760
+    },
+    {
+      "epoch": 1.0241282807654488,
+      "grad_norm": 0.08014168590307236,
+      "learning_rate": 0.00017294,
+      "loss": 0.0318,
+      "step": 6770
+    },
+    {
+      "epoch": 1.0256410256410255,
+      "grad_norm": 0.04886119067668915,
+      "learning_rate": 0.00017290000000000003,
+      "loss": 0.0318,
+      "step": 6780
+    },
+    {
+      "epoch": 1.0271537705166023,
+      "grad_norm": 0.07735268771648407,
+      "learning_rate": 0.00017286,
+      "loss": 0.0377,
+      "step": 6790
+    },
+    {
+      "epoch": 1.028666515392179,
+      "grad_norm": 0.07365155220031738,
+      "learning_rate": 0.00017282000000000002,
+      "loss": 0.0397,
+      "step": 6800
+    },
+    {
+      "epoch": 1.028666515392179,
+      "eval_cer": 0.5956908628651482,
+      "eval_loss": 0.03884879872202873,
+      "eval_runtime": 10443.3198,
+      "eval_samples_per_second": 2.016,
+      "eval_steps_per_second": 0.252,
+      "step": 6800
+    },
+    {
+      "epoch": 1.0301792602677557,
+      "grad_norm": 0.08235965669155121,
+      "learning_rate": 0.00017278,
+      "loss": 0.0356,
+      "step": 6810
+    },
+    {
+      "epoch": 1.0316920051433325,
+      "grad_norm": 0.1203494668006897,
+      "learning_rate": 0.00017274,
+      "loss": 0.0391,
+      "step": 6820
+    },
+    {
+      "epoch": 1.0332047500189092,
+      "grad_norm": 0.059709157794713974,
+      "learning_rate": 0.00017270000000000002,
+      "loss": 0.036,
+      "step": 6830
+    },
+    {
+      "epoch": 1.034717494894486,
+      "grad_norm": 0.08380923420190811,
+      "learning_rate": 0.00017266,
+      "loss": 0.0311,
+      "step": 6840
+    },
+    {
+      "epoch": 1.0362302397700627,
+      "grad_norm": 0.0642111599445343,
+      "learning_rate": 0.00017262,
+      "loss": 0.0296,
+      "step": 6850
+    },
+    {
+      "epoch": 1.0377429846456394,
+      "grad_norm": 0.07701337337493896,
+      "learning_rate": 0.00017258,
+      "loss": 0.0318,
+      "step": 6860
+    },
+    {
+      "epoch": 1.0392557295212161,
+      "grad_norm": 0.09674856811761856,
+      "learning_rate": 0.00017254000000000003,
+      "loss": 0.0294,
+      "step": 6870
+    },
+    {
+      "epoch": 1.0407684743967929,
+      "grad_norm": 0.08543815463781357,
+      "learning_rate": 0.00017250000000000002,
+      "loss": 0.0322,
+      "step": 6880
+    },
+    {
+      "epoch": 1.0422812192723696,
+      "grad_norm": 0.08181754499673843,
+      "learning_rate": 0.00017246,
+      "loss": 0.031,
+      "step": 6890
+    },
+    {
+      "epoch": 1.0437939641479463,
+      "grad_norm": 0.07326922565698624,
+      "learning_rate": 0.00017242,
+      "loss": 0.0298,
+      "step": 6900
+    },
+    {
+      "epoch": 1.045306709023523,
+      "grad_norm": 0.060128018260002136,
+      "learning_rate": 0.00017238,
+      "loss": 0.0351,
+      "step": 6910
+    },
+    {
+      "epoch": 1.0468194538990998,
+      "grad_norm": 0.055250383913517,
+      "learning_rate": 0.00017234000000000002,
+      "loss": 0.0322,
+      "step": 6920
+    },
+    {
+      "epoch": 1.0483321987746765,
+      "grad_norm": 0.07841707766056061,
+      "learning_rate": 0.00017230000000000001,
+      "loss": 0.0311,
+      "step": 6930
+    },
+    {
+      "epoch": 1.0498449436502533,
+      "grad_norm": 0.06094701215624809,
+      "learning_rate": 0.00017226,
+      "loss": 0.0331,
+      "step": 6940
+    },
+    {
+      "epoch": 1.0513576885258302,
+      "grad_norm": 0.0738435760140419,
+      "learning_rate": 0.00017222,
+      "loss": 0.0385,
+      "step": 6950
+    },
+    {
+      "epoch": 1.052870433401407,
+      "grad_norm": 0.0741799846291542,
+      "learning_rate": 0.00017218,
+      "loss": 0.0332,
+      "step": 6960
+    },
+    {
+      "epoch": 1.0543831782769837,
+      "grad_norm": 0.11769600957632065,
+      "learning_rate": 0.00017214000000000002,
+      "loss": 0.0288,
+      "step": 6970
+    },
+    {
+      "epoch": 1.0558959231525604,
+      "grad_norm": 0.05547551065683365,
+      "learning_rate": 0.0001721,
+      "loss": 0.0351,
+      "step": 6980
+    },
+    {
+      "epoch": 1.0574086680281372,
+      "grad_norm": 0.059602439403533936,
+      "learning_rate": 0.00017206,
+      "loss": 0.0315,
+      "step": 6990
+    },
+    {
+      "epoch": 1.0589214129037139,
+      "grad_norm": 0.07523063570261002,
+      "learning_rate": 0.00017202,
+      "loss": 0.0344,
+      "step": 7000
+    },
+    {
+      "epoch": 1.0589214129037139,
+      "eval_cer": 0.06192848124566072,
+      "eval_loss": 0.03872867301106453,
+      "eval_runtime": 10423.0915,
+      "eval_samples_per_second": 2.02,
+      "eval_steps_per_second": 0.253,
+      "step": 7000
+    },
+    {
+      "epoch": 1.0604341577792906,
+      "grad_norm": 0.07334991544485092,
+      "learning_rate": 0.00017198000000000002,
+      "loss": 0.0394,
+      "step": 7010
+    },
+    {
+      "epoch": 1.0619469026548674,
+      "grad_norm": 0.08875437080860138,
+      "learning_rate": 0.00017194,
+      "loss": 0.0316,
+      "step": 7020
+    },
+    {
+      "epoch": 1.063459647530444,
+      "grad_norm": 0.06492207199335098,
+      "learning_rate": 0.0001719,
+      "loss": 0.0375,
+      "step": 7030
+    },
+    {
+      "epoch": 1.0649723924060208,
+      "grad_norm": 0.08707519620656967,
+      "learning_rate": 0.00017186,
+      "loss": 0.0333,
+      "step": 7040
+    },
+    {
+      "epoch": 1.0664851372815976,
+      "grad_norm": 0.06477733701467514,
+      "learning_rate": 0.00017182,
+      "loss": 0.036,
+      "step": 7050
+    },
+    {
+      "epoch": 1.0679978821571743,
+      "grad_norm": 0.05914880335330963,
+      "learning_rate": 0.00017178,
+      "loss": 0.0307,
+      "step": 7060
+    },
+    {
+      "epoch": 1.069510627032751,
+      "grad_norm": 0.11167873442173004,
+      "learning_rate": 0.00017174,
+      "loss": 0.0355,
+      "step": 7070
+    },
+    {
+      "epoch": 1.0710233719083277,
+      "grad_norm": 0.08664342761039734,
+      "learning_rate": 0.00017170000000000003,
+      "loss": 0.0373,
+      "step": 7080
+    },
+    {
+      "epoch": 1.0725361167839045,
+      "grad_norm": 0.06912154704332352,
+      "learning_rate": 0.00017166,
+      "loss": 0.0283,
+      "step": 7090
+    },
+    {
+      "epoch": 1.0740488616594812,
+      "grad_norm": 0.09120757132768631,
+      "learning_rate": 0.00017162000000000001,
+      "loss": 0.0313,
+      "step": 7100
+    },
+    {
+      "epoch": 1.075561606535058,
+      "grad_norm": 0.08159112185239792,
+      "learning_rate": 0.00017158,
+      "loss": 0.0413,
+      "step": 7110
+    },
+    {
+      "epoch": 1.0770743514106347,
+      "grad_norm": 0.095944344997406,
+      "learning_rate": 0.00017154,
+      "loss": 0.0355,
+      "step": 7120
+    },
+    {
+      "epoch": 1.0785870962862114,
+      "grad_norm": 0.10682930797338486,
+      "learning_rate": 0.00017150000000000002,
+      "loss": 0.0278,
+      "step": 7130
+    },
+    {
+      "epoch": 1.0800998411617881,
+      "grad_norm": 0.06514004617929459,
+      "learning_rate": 0.00017146,
+      "loss": 0.0306,
+      "step": 7140
+    },
+    {
+      "epoch": 1.0816125860373649,
+      "grad_norm": 0.07849156856536865,
+      "learning_rate": 0.00017142,
+      "loss": 0.0379,
+      "step": 7150
+    },
+    {
+      "epoch": 1.0831253309129416,
+      "grad_norm": 0.0788741260766983,
+      "learning_rate": 0.00017138,
+      "loss": 0.032,
+      "step": 7160
+    },
+    {
+      "epoch": 1.0846380757885183,
+      "grad_norm": 0.10495191067457199,
+      "learning_rate": 0.00017134000000000002,
+      "loss": 0.0358,
+      "step": 7170
+    },
+    {
+      "epoch": 1.086150820664095,
+      "grad_norm": 0.07463409751653671,
+      "learning_rate": 0.00017130000000000002,
+      "loss": 0.0356,
+      "step": 7180
+    },
+    {
+      "epoch": 1.0876635655396718,
+      "grad_norm": 0.08425049483776093,
+      "learning_rate": 0.00017126,
+      "loss": 0.0327,
+      "step": 7190
+    },
+    {
+      "epoch": 1.0891763104152485,
+      "grad_norm": 0.07767146825790405,
+      "learning_rate": 0.00017122,
+      "loss": 0.034,
+      "step": 7200
+    },
+    {
+      "epoch": 1.0891763104152485,
+      "eval_cer": 0.09758161553419167,
+      "eval_loss": 0.037929706275463104,
+      "eval_runtime": 10420.1284,
+      "eval_samples_per_second": 2.02,
+      "eval_steps_per_second": 0.253,
+      "step": 7200
+    },
+    {
+      "epoch": 1.0906890552908253,
+      "grad_norm": 0.07770776748657227,
+      "learning_rate": 0.00017118,
+      "loss": 0.0321,
+      "step": 7210
+    },
+    {
+      "epoch": 1.092201800166402,
+      "grad_norm": 0.06977003812789917,
+      "learning_rate": 0.00017114000000000002,
+      "loss": 0.0315,
+      "step": 7220
+    },
+    {
+      "epoch": 1.0937145450419787,
+      "grad_norm": 0.077842116355896,
+      "learning_rate": 0.0001711,
+      "loss": 0.0317,
+      "step": 7230
+    },
+    {
+      "epoch": 1.0952272899175555,
+      "grad_norm": 0.11414997279644012,
+      "learning_rate": 0.00017106,
+      "loss": 0.0392,
+      "step": 7240
+    },
+    {
+      "epoch": 1.0967400347931322,
+      "grad_norm": 0.07568582892417908,
+      "learning_rate": 0.00017102,
+      "loss": 0.0369,
+      "step": 7250
+    },
+    {
+      "epoch": 1.098252779668709,
+      "grad_norm": 0.07864728569984436,
+      "learning_rate": 0.00017098000000000002,
+      "loss": 0.038,
+      "step": 7260
+    },
+    {
+      "epoch": 1.0997655245442857,
+      "grad_norm": 0.0852401927113533,
+      "learning_rate": 0.00017094,
+      "loss": 0.0323,
+      "step": 7270
+    },
+    {
+      "epoch": 1.1012782694198624,
+      "grad_norm": 0.06548303365707397,
+      "learning_rate": 0.0001709,
+      "loss": 0.0373,
+      "step": 7280
+    },
+    {
+      "epoch": 1.1027910142954391,
+      "grad_norm": 0.10153812170028687,
+      "learning_rate": 0.00017086,
+      "loss": 0.0321,
+      "step": 7290
+    },
+    {
+      "epoch": 1.1043037591710159,
+      "grad_norm": 0.09032442420721054,
+      "learning_rate": 0.00017082,
+      "loss": 0.0306,
+      "step": 7300
+    },
+    {
+      "epoch": 1.1058165040465926,
+      "grad_norm": 0.12109789252281189,
+      "learning_rate": 0.00017078000000000001,
+      "loss": 0.0355,
+      "step": 7310
+    },
+    {
+      "epoch": 1.1073292489221693,
+      "grad_norm": 0.08515240997076035,
+      "learning_rate": 0.00017074,
+      "loss": 0.0374,
+      "step": 7320
+    },
+    {
+      "epoch": 1.108841993797746,
+      "grad_norm": 0.06838446855545044,
+      "learning_rate": 0.0001707,
+      "loss": 0.0309,
+      "step": 7330
+    },
+    {
+      "epoch": 1.1103547386733228,
+      "grad_norm": 0.10029911994934082,
+      "learning_rate": 0.00017066,
+      "loss": 0.0377,
+      "step": 7340
+    },
+    {
+      "epoch": 1.1118674835488995,
+      "grad_norm": 0.08499938994646072,
+      "learning_rate": 0.00017062,
+      "loss": 0.0317,
+      "step": 7350
+    },
+    {
+      "epoch": 1.1133802284244763,
+      "grad_norm": 0.10972133278846741,
+      "learning_rate": 0.00017058,
+      "loss": 0.0344,
+      "step": 7360
+    },
+    {
+      "epoch": 1.114892973300053,
+      "grad_norm": 0.06848263740539551,
+      "learning_rate": 0.00017054,
+      "loss": 0.0356,
+      "step": 7370
+    },
+    {
+      "epoch": 1.1164057181756297,
+      "grad_norm": 0.06813491135835648,
+      "learning_rate": 0.00017050000000000002,
+      "loss": 0.0291,
+      "step": 7380
+    },
+    {
+      "epoch": 1.1179184630512065,
+      "grad_norm": 0.053215883672237396,
+      "learning_rate": 0.00017046,
+      "loss": 0.0297,
+      "step": 7390
+    },
+    {
+      "epoch": 1.1194312079267832,
+      "grad_norm": 0.08575928211212158,
+      "learning_rate": 0.00017042,
+      "loss": 0.0378,
+      "step": 7400
+    },
+    {
+      "epoch": 1.1194312079267832,
+      "eval_cer": 0.05163898174846133,
+      "eval_loss": 0.03768303617835045,
+      "eval_runtime": 10418.7834,
+      "eval_samples_per_second": 2.021,
+      "eval_steps_per_second": 0.253,
+      "step": 7400
+    },
+    {
+      "epoch": 1.12094395280236,
+      "grad_norm": 0.07621601223945618,
+      "learning_rate": 0.00017038,
+      "loss": 0.032,
+      "step": 7410
+    },
+    {
+      "epoch": 1.1224566976779367,
+      "grad_norm": 0.11499703675508499,
+      "learning_rate": 0.00017034,
+      "loss": 0.0331,
+      "step": 7420
+    },
+    {
+      "epoch": 1.1239694425535134,
+      "grad_norm": 0.08789568394422531,
+      "learning_rate": 0.00017030000000000002,
+      "loss": 0.0332,
+      "step": 7430
+    },
+    {
+      "epoch": 1.1254821874290901,
+      "grad_norm": 0.0887342318892479,
+      "learning_rate": 0.00017025999999999999,
+      "loss": 0.0374,
+      "step": 7440
+    },
+    {
+      "epoch": 1.1269949323046669,
+      "grad_norm": 0.11794856935739517,
+      "learning_rate": 0.00017022,
+      "loss": 0.0347,
+      "step": 7450
+    },
+    {
+      "epoch": 1.1285076771802436,
+      "grad_norm": 0.07593784481287003,
+      "learning_rate": 0.00017018,
+      "loss": 0.0323,
+      "step": 7460
+    },
+    {
+      "epoch": 1.1300204220558203,
+      "grad_norm": 0.06868909299373627,
+      "learning_rate": 0.00017014000000000002,
+      "loss": 0.0311,
+      "step": 7470
+    },
+    {
+      "epoch": 1.131533166931397,
+      "grad_norm": 0.1010032370686531,
+      "learning_rate": 0.00017010000000000001,
+      "loss": 0.0333,
+      "step": 7480
+    },
+    {
+      "epoch": 1.1330459118069738,
+      "grad_norm": 0.08664656430482864,
+      "learning_rate": 0.00017006,
+      "loss": 0.0358,
+      "step": 7490
+    },
+    {
+      "epoch": 1.1345586566825505,
+      "grad_norm": 0.09153386205434799,
+      "learning_rate": 0.00017002,
+      "loss": 0.0288,
+      "step": 7500
+    },
+    {
+      "epoch": 1.1360714015581272,
+      "grad_norm": 0.10042116045951843,
+      "learning_rate": 0.00016998,
+      "loss": 0.0324,
+      "step": 7510
+    },
+    {
+      "epoch": 1.137584146433704,
+      "grad_norm": 0.09703629463911057,
+      "learning_rate": 0.00016994000000000002,
+      "loss": 0.0356,
+      "step": 7520
+    },
+    {
+      "epoch": 1.1390968913092807,
+      "grad_norm": 0.07961410284042358,
+      "learning_rate": 0.0001699,
+      "loss": 0.0279,
+      "step": 7530
+    },
+    {
+      "epoch": 1.1406096361848574,
+      "grad_norm": 0.09164062142372131,
+      "learning_rate": 0.00016986000000000003,
+      "loss": 0.033,
+      "step": 7540
+    },
+    {
+      "epoch": 1.1421223810604342,
+      "grad_norm": 0.0804910659790039,
+      "learning_rate": 0.00016982,
+      "loss": 0.033,
+      "step": 7550
+    },
+    {
+      "epoch": 1.143635125936011,
+      "grad_norm": 0.07923970371484756,
+      "learning_rate": 0.00016978000000000002,
+      "loss": 0.0366,
+      "step": 7560
+    },
+    {
+      "epoch": 1.1451478708115876,
+      "grad_norm": 0.1198810487985611,
+      "learning_rate": 0.00016974,
+      "loss": 0.0361,
+      "step": 7570
+    },
+    {
+      "epoch": 1.1466606156871644,
+      "grad_norm": 0.08409520238637924,
+      "learning_rate": 0.0001697,
+      "loss": 0.0323,
+      "step": 7580
+    },
+    {
+      "epoch": 1.148173360562741,
+      "grad_norm": 0.09524326026439667,
+      "learning_rate": 0.00016966000000000003,
+      "loss": 0.0338,
+      "step": 7590
+    },
+    {
+      "epoch": 1.1496861054383178,
+      "grad_norm": 0.0670013502240181,
+      "learning_rate": 0.00016962,
+      "loss": 0.033,
+      "step": 7600
+    },
+    {
+      "epoch": 1.1496861054383178,
+      "eval_cer": 0.04317970118571997,
+      "eval_loss": 0.03775278851389885,
+      "eval_runtime": 10413.2831,
+      "eval_samples_per_second": 2.022,
+      "eval_steps_per_second": 0.253,
+      "step": 7600
+    },
+    {
+      "epoch": 1.1511988503138946,
+      "grad_norm": 0.07331959903240204,
+      "learning_rate": 0.00016958,
+      "loss": 0.0331,
+      "step": 7610
+    },
+    {
+      "epoch": 1.1527115951894713,
+      "grad_norm": 0.06851343810558319,
+      "learning_rate": 0.00016954,
+      "loss": 0.0306,
+      "step": 7620
+    },
+    {
+      "epoch": 1.154224340065048,
+      "grad_norm": 0.07627418637275696,
+      "learning_rate": 0.00016950000000000003,
+      "loss": 0.0334,
+      "step": 7630
+    },
+    {
+      "epoch": 1.1557370849406248,
+      "grad_norm": 0.08676694333553314,
+      "learning_rate": 0.00016946000000000002,
+      "loss": 0.0322,
+      "step": 7640
+    },
+    {
+      "epoch": 1.1572498298162015,
+      "grad_norm": 0.07023747265338898,
+      "learning_rate": 0.00016942000000000001,
+      "loss": 0.0358,
+      "step": 7650
+    },
+    {
+      "epoch": 1.1587625746917782,
+      "grad_norm": 0.07805462926626205,
+      "learning_rate": 0.00016938,
+      "loss": 0.0325,
+      "step": 7660
+    },
+    {
+      "epoch": 1.160275319567355,
+      "grad_norm": 0.0867529958486557,
+      "learning_rate": 0.00016934,
+      "loss": 0.0318,
+      "step": 7670
+    },
+    {
+      "epoch": 1.1617880644429317,
+      "grad_norm": 0.08449842035770416,
+      "learning_rate": 0.00016930000000000002,
+      "loss": 0.0408,
+      "step": 7680
+    },
+    {
+      "epoch": 1.1633008093185084,
+      "grad_norm": 0.08054087311029434,
+      "learning_rate": 0.00016926000000000002,
+      "loss": 0.0306,
+      "step": 7690
+    },
+    {
+      "epoch": 1.1648135541940852,
+      "grad_norm": 0.08645962178707123,
+      "learning_rate": 0.00016922,
+      "loss": 0.0299,
+      "step": 7700
+    },
+    {
+      "epoch": 1.166326299069662,
+      "grad_norm": 0.0892554521560669,
+      "learning_rate": 0.00016918,
+      "loss": 0.0352,
+      "step": 7710
+    },
+    {
+      "epoch": 1.1678390439452386,
+      "grad_norm": 0.06643500924110413,
+      "learning_rate": 0.00016914,
+      "loss": 0.0284,
+      "step": 7720
+    },
+    {
+      "epoch": 1.1693517888208154,
+      "grad_norm": 0.06918591260910034,
+      "learning_rate": 0.00016910000000000002,
+      "loss": 0.0278,
+      "step": 7730
+    },
+    {
+      "epoch": 1.170864533696392,
+      "grad_norm": 0.08370740711688995,
+      "learning_rate": 0.00016906,
+      "loss": 0.0316,
+      "step": 7740
+    },
+    {
+      "epoch": 1.1723772785719688,
+      "grad_norm": 0.053777385503053665,
+      "learning_rate": 0.00016902,
+      "loss": 0.036,
+      "step": 7750
+    },
+    {
+      "epoch": 1.1738900234475456,
+      "grad_norm": 0.0665329247713089,
+      "learning_rate": 0.00016898,
+      "loss": 0.0333,
+      "step": 7760
+    },
+    {
+      "epoch": 1.1754027683231223,
+      "grad_norm": 0.07484222948551178,
+      "learning_rate": 0.00016894000000000002,
+      "loss": 0.0319,
+      "step": 7770
+    },
+    {
+      "epoch": 1.176915513198699,
+      "grad_norm": 0.08218715339899063,
+      "learning_rate": 0.0001689,
+      "loss": 0.0308,
+      "step": 7780
+    },
+    {
+      "epoch": 1.1784282580742758,
+      "grad_norm": 0.06873024255037308,
+      "learning_rate": 0.00016886,
+      "loss": 0.0349,
+      "step": 7790
+    },
+    {
+      "epoch": 1.1799410029498525,
+      "grad_norm": 0.07846609503030777,
+      "learning_rate": 0.00016882,
+      "loss": 0.0359,
+      "step": 7800
+    },
+    {
+      "epoch": 1.1799410029498525,
+      "eval_cer": 0.1078840865459451,
+      "eval_loss": 0.03878456726670265,
+      "eval_runtime": 10398.1972,
+      "eval_samples_per_second": 2.025,
+      "eval_steps_per_second": 0.253,
+      "step": 7800
+    },
+    {
+      "epoch": 1.1814537478254292,
+      "grad_norm": 0.06112883612513542,
+      "learning_rate": 0.00016878,
+      "loss": 0.0324,
+      "step": 7810
+    },
+    {
+      "epoch": 1.182966492701006,
+      "grad_norm": 0.07065495103597641,
+      "learning_rate": 0.00016874000000000001,
+      "loss": 0.0333,
+      "step": 7820
+    },
+    {
+      "epoch": 1.1844792375765827,
+      "grad_norm": 0.10944267362356186,
+      "learning_rate": 0.0001687,
+      "loss": 0.0322,
+      "step": 7830
+    },
+    {
+      "epoch": 1.1859919824521594,
+      "grad_norm": 0.08741329610347748,
+      "learning_rate": 0.00016866000000000003,
+      "loss": 0.0339,
+      "step": 7840
+    },
+    {
+      "epoch": 1.1875047273277362,
+      "grad_norm": 0.06457091867923737,
+      "learning_rate": 0.00016862,
+      "loss": 0.0345,
+      "step": 7850
+    },
+    {
+      "epoch": 1.1890174722033129,
+      "grad_norm": 0.0570165179669857,
+      "learning_rate": 0.00016858000000000002,
+      "loss": 0.032,
+      "step": 7860
+    },
+    {
+      "epoch": 1.1905302170788896,
+      "grad_norm": 0.07944530248641968,
+      "learning_rate": 0.00016854,
+      "loss": 0.0347,
+      "step": 7870
+    },
+    {
+      "epoch": 1.1920429619544664,
+      "grad_norm": 0.06981216371059418,
+      "learning_rate": 0.0001685,
+      "loss": 0.0329,
+      "step": 7880
+    },
+    {
+      "epoch": 1.193555706830043,
+      "grad_norm": 0.052252449095249176,
+      "learning_rate": 0.00016846000000000002,
+      "loss": 0.0327,
+      "step": 7890
+    },
+    {
+      "epoch": 1.1950684517056198,
+      "grad_norm": 0.05333190783858299,
+      "learning_rate": 0.00016842,
+      "loss": 0.0269,
+      "step": 7900
+    },
+    {
+      "epoch": 1.1965811965811965,
+      "grad_norm": 0.18012838065624237,
+      "learning_rate": 0.00016838,
+      "loss": 0.0324,
+      "step": 7910
+    },
+    {
+      "epoch": 1.1980939414567733,
+      "grad_norm": 0.06892676651477814,
+      "learning_rate": 0.00016834,
+      "loss": 0.0294,
+      "step": 7920
+    },
+    {
+      "epoch": 1.19960668633235,
+      "grad_norm": 0.07558593899011612,
+      "learning_rate": 0.00016830000000000003,
+      "loss": 0.0371,
+      "step": 7930
+    },
+    {
+      "epoch": 1.2011194312079267,
+      "grad_norm": 0.08046507835388184,
+      "learning_rate": 0.00016826000000000002,
+      "loss": 0.0311,
+      "step": 7940
+    },
+    {
+      "epoch": 1.2026321760835035,
+      "grad_norm": 0.07986424118280411,
+      "learning_rate": 0.00016822,
+      "loss": 0.0357,
+      "step": 7950
+    },
+    {
+      "epoch": 1.2041449209590802,
+      "grad_norm": 0.07394195348024368,
+      "learning_rate": 0.00016818,
+      "loss": 0.0341,
+      "step": 7960
+    },
+    {
+      "epoch": 1.205657665834657,
+      "grad_norm": 0.06269822269678116,
+      "learning_rate": 0.00016814,
+      "loss": 0.0329,
+      "step": 7970
+    },
+    {
+      "epoch": 1.2071704107102337,
+      "grad_norm": 0.07179784774780273,
+      "learning_rate": 0.00016810000000000002,
+      "loss": 0.0329,
+      "step": 7980
+    },
+    {
+      "epoch": 1.2086831555858104,
+      "grad_norm": 0.10174887627363205,
+      "learning_rate": 0.00016806000000000001,
+      "loss": 0.0262,
+      "step": 7990
+    },
+    {
+      "epoch": 1.2101959004613871,
+      "grad_norm": 0.06536643952131271,
+      "learning_rate": 0.00016802,
+      "loss": 0.034,
+      "step": 8000
+    },
+    {
+      "epoch": 1.2101959004613871,
+      "eval_cer": 0.15941559003095868,
+      "eval_loss": 0.03837862238287926,
+      "eval_runtime": 10390.1541,
+      "eval_samples_per_second": 2.026,
+      "eval_steps_per_second": 0.253,
+      "step": 8000
+    },
+    {
+      "epoch": 1.2117086453369639,
+      "grad_norm": 0.13079065084457397,
+      "learning_rate": 0.00016798,
+      "loss": 0.037,
+      "step": 8010
+    },
+    {
+      "epoch": 1.2132213902125406,
+      "grad_norm": 0.07293607294559479,
+      "learning_rate": 0.00016794000000000002,
+      "loss": 0.0295,
+      "step": 8020
+    },
+    {
+      "epoch": 1.2147341350881173,
+      "grad_norm": 0.07390507310628891,
+      "learning_rate": 0.00016790000000000002,
+      "loss": 0.0309,
+      "step": 8030
+    },
+    {
+      "epoch": 1.216246879963694,
+      "grad_norm": 0.22675780951976776,
+      "learning_rate": 0.00016786,
+      "loss": 0.0341,
+      "step": 8040
+    },
+    {
+      "epoch": 1.2177596248392708,
+      "grad_norm": 0.06630139797925949,
+      "learning_rate": 0.00016782,
+      "loss": 0.0359,
+      "step": 8050
+    },
+    {
+      "epoch": 1.2192723697148475,
+      "grad_norm": 0.09231210500001907,
+      "learning_rate": 0.00016778,
+      "loss": 0.0325,
+      "step": 8060
+    },
+    {
+      "epoch": 1.2207851145904243,
+      "grad_norm": 0.067893847823143,
+      "learning_rate": 0.00016774000000000002,
+      "loss": 0.0338,
+      "step": 8070
+    },
+    {
+      "epoch": 1.222297859466001,
+      "grad_norm": 0.16284491121768951,
+      "learning_rate": 0.0001677,
+      "loss": 0.0362,
+      "step": 8080
+    },
+    {
+      "epoch": 1.2238106043415777,
+      "grad_norm": 0.07695828378200531,
+      "learning_rate": 0.00016766,
+      "loss": 0.0367,
+      "step": 8090
+    },
+    {
+      "epoch": 1.2253233492171545,
+      "grad_norm": 0.07685229927301407,
+      "learning_rate": 0.00016762,
+      "loss": 0.0383,
+      "step": 8100
+    },
+    {
+      "epoch": 1.2268360940927312,
+      "grad_norm": 0.08510534465312958,
+      "learning_rate": 0.00016758,
+      "loss": 0.0346,
+      "step": 8110
+    },
+    {
+      "epoch": 1.228348838968308,
+      "grad_norm": 0.16018977761268616,
+      "learning_rate": 0.00016754,
+      "loss": 0.0314,
+      "step": 8120
+    },
+    {
+      "epoch": 1.2298615838438847,
+      "grad_norm": 0.10644716769456863,
+      "learning_rate": 0.0001675,
+      "loss": 0.0427,
+      "step": 8130
+    },
+    {
+      "epoch": 1.2313743287194614,
+      "grad_norm": 0.06390608847141266,
+      "learning_rate": 0.00016746000000000003,
+      "loss": 0.0333,
+      "step": 8140
+    },
+    {
+      "epoch": 1.2328870735950381,
+      "grad_norm": 0.1173742264509201,
+      "learning_rate": 0.00016742,
+      "loss": 0.0335,
+      "step": 8150
+    },
+    {
+      "epoch": 1.2343998184706149,
+      "grad_norm": 0.08506636321544647,
+      "learning_rate": 0.00016738000000000001,
+      "loss": 0.0393,
+      "step": 8160
+    },
+    {
+      "epoch": 1.2359125633461916,
+      "grad_norm": 0.08176897466182709,
+      "learning_rate": 0.00016734,
+      "loss": 0.0306,
+      "step": 8170
+    },
+    {
+      "epoch": 1.2374253082217683,
+      "grad_norm": 0.11272590607404709,
+      "learning_rate": 0.0001673,
+      "loss": 0.0368,
+      "step": 8180
+    },
+    {
+      "epoch": 1.238938053097345,
+      "grad_norm": 0.10923430323600769,
+      "learning_rate": 0.00016726000000000002,
+      "loss": 0.0389,
+      "step": 8190
+    },
+    {
+      "epoch": 1.2404507979729218,
+      "grad_norm": 0.05665091425180435,
+      "learning_rate": 0.00016722,
+      "loss": 0.0352,
+      "step": 8200
+    },
+    {
+      "epoch": 1.2404507979729218,
+      "eval_cer": 0.195939668868118,
+      "eval_loss": 0.03837649151682854,
+      "eval_runtime": 10379.5895,
+      "eval_samples_per_second": 2.028,
+      "eval_steps_per_second": 0.254,
+      "step": 8200
+    },
+    {
+      "epoch": 1.2419635428484985,
+      "grad_norm": 0.08927123993635178,
+      "learning_rate": 0.00016718,
+      "loss": 0.0356,
+      "step": 8210
+    },
+    {
+      "epoch": 1.2434762877240753,
+      "grad_norm": 0.09398534893989563,
+      "learning_rate": 0.00016714,
+      "loss": 0.0365,
+      "step": 8220
+    },
+    {
+      "epoch": 1.244989032599652,
+      "grad_norm": 0.0905461311340332,
+      "learning_rate": 0.00016710000000000002,
+      "loss": 0.0335,
+      "step": 8230
+    },
+    {
+      "epoch": 1.2465017774752287,
+      "grad_norm": 0.09033455699682236,
+      "learning_rate": 0.00016706000000000002,
+      "loss": 0.0376,
+      "step": 8240
+    },
+    {
+      "epoch": 1.2480145223508055,
+      "grad_norm": 0.08217161148786545,
+      "learning_rate": 0.00016702,
+      "loss": 0.032,
+      "step": 8250
+    },
+    {
+      "epoch": 1.2495272672263822,
+      "grad_norm": 0.0694824755191803,
+      "learning_rate": 0.00016698,
+      "loss": 0.0354,
+      "step": 8260
+    },
+    {
+      "epoch": 1.2510400121019591,
+      "grad_norm": 0.08535374701023102,
+      "learning_rate": 0.00016694,
+      "loss": 0.0288,
+      "step": 8270
+    },
+    {
+      "epoch": 1.2525527569775359,
+      "grad_norm": 0.10267391055822372,
+      "learning_rate": 0.00016690000000000002,
+      "loss": 0.0331,
+      "step": 8280
+    },
+    {
+      "epoch": 1.2540655018531126,
+      "grad_norm": 0.0720328763127327,
+      "learning_rate": 0.00016686,
+      "loss": 0.0324,
+      "step": 8290
+    },
+    {
+      "epoch": 1.2555782467286893,
+      "grad_norm": 0.15617039799690247,
+      "learning_rate": 0.00016682,
+      "loss": 0.0374,
+      "step": 8300
+    },
+    {
+      "epoch": 1.257090991604266,
+      "grad_norm": 0.09863468259572983,
+      "learning_rate": 0.00016678,
+      "loss": 0.0363,
+      "step": 8310
+    },
+    {
+      "epoch": 1.2586037364798428,
+      "grad_norm": 0.08562877029180527,
+      "learning_rate": 0.00016674000000000002,
+      "loss": 0.0347,
+      "step": 8320
+    },
+    {
+      "epoch": 1.2601164813554195,
+      "grad_norm": 0.09868349879980087,
+      "learning_rate": 0.0001667,
+      "loss": 0.0362,
+      "step": 8330
+    },
+    {
+      "epoch": 1.2616292262309963,
+      "grad_norm": 0.09744835644960403,
+      "learning_rate": 0.00016666,
+      "loss": 0.0364,
+      "step": 8340
+    },
+    {
+      "epoch": 1.263141971106573,
+      "grad_norm": 0.19243358075618744,
+      "learning_rate": 0.00016662,
+      "loss": 0.0378,
+      "step": 8350
+    },
+    {
+      "epoch": 1.2646547159821497,
+      "grad_norm": 0.06478457897901535,
+      "learning_rate": 0.00016658,
+      "loss": 0.033,
+      "step": 8360
+    },
+    {
+      "epoch": 1.2661674608577265,
+      "grad_norm": 0.09313791990280151,
+      "learning_rate": 0.00016654000000000001,
+      "loss": 0.04,
+      "step": 8370
+    },
+    {
+      "epoch": 1.2676802057333032,
+      "grad_norm": 0.0906825065612793,
+      "learning_rate": 0.0001665,
+      "loss": 0.0341,
+      "step": 8380
+    },
+    {
+      "epoch": 1.26919295060888,
+      "grad_norm": 0.08549359440803528,
+      "learning_rate": 0.00016646000000000003,
+      "loss": 0.0376,
+      "step": 8390
+    },
+    {
+      "epoch": 1.2707056954844567,
+      "grad_norm": 0.0915452241897583,
+      "learning_rate": 0.00016642,
+      "loss": 0.029,
+      "step": 8400
+    },
+    {
+      "epoch": 1.2707056954844567,
+      "eval_cer": 0.19141261028875828,
+      "eval_loss": 0.03777679055929184,
+      "eval_runtime": 10360.722,
+      "eval_samples_per_second": 2.032,
+      "eval_steps_per_second": 0.254,
+      "step": 8400
+    },
+    {
+      "epoch": 1.2722184403600334,
+      "grad_norm": 0.07039971649646759,
+      "learning_rate": 0.00016638,
+      "loss": 0.0355,
+      "step": 8410
+    },
+    {
+      "epoch": 1.2737311852356101,
+      "grad_norm": 0.08890164643526077,
+      "learning_rate": 0.00016634,
+      "loss": 0.03,
+      "step": 8420
+    },
+    {
+      "epoch": 1.2752439301111869,
+      "grad_norm": 0.07611805945634842,
+      "learning_rate": 0.0001663,
+      "loss": 0.037,
+      "step": 8430
+    },
+    {
+      "epoch": 1.2767566749867636,
+      "grad_norm": 0.10268427431583405,
+      "learning_rate": 0.00016626000000000002,
+      "loss": 0.0346,
+      "step": 8440
+    },
+    {
+      "epoch": 1.2782694198623403,
+      "grad_norm": 0.07185817509889603,
+      "learning_rate": 0.00016622,
+      "loss": 0.0334,
+      "step": 8450
+    },
+    {
+      "epoch": 1.279782164737917,
+      "grad_norm": 0.09720634669065475,
+      "learning_rate": 0.00016618,
+      "loss": 0.0328,
+      "step": 8460
+    },
+    {
+      "epoch": 1.2812949096134938,
+      "grad_norm": 0.08373324573040009,
+      "learning_rate": 0.00016614,
+      "loss": 0.0342,
+      "step": 8470
+    },
+    {
+      "epoch": 1.2828076544890705,
+      "grad_norm": 0.05525701493024826,
+      "learning_rate": 0.0001661,
+      "loss": 0.0295,
+      "step": 8480
+    },
+    {
+      "epoch": 1.2843203993646473,
+      "grad_norm": 0.08398504555225372,
+      "learning_rate": 0.00016606000000000002,
+      "loss": 0.0336,
+      "step": 8490
+    },
+    {
+      "epoch": 1.285833144240224,
+      "grad_norm": 0.11384329944849014,
+      "learning_rate": 0.00016601999999999999,
+      "loss": 0.0335,
+      "step": 8500
+    },
+    {
+      "epoch": 1.2873458891158007,
+      "grad_norm": 0.05366117134690285,
+      "learning_rate": 0.00016598,
+      "loss": 0.0303,
+      "step": 8510
+    },
+    {
+      "epoch": 1.2888586339913775,
+      "grad_norm": 0.09270923584699631,
+      "learning_rate": 0.00016594,
+      "loss": 0.0309,
+      "step": 8520
+    },
+    {
+      "epoch": 1.2903713788669542,
+      "grad_norm": 0.09621911495923996,
+      "learning_rate": 0.00016590000000000002,
+      "loss": 0.0326,
+      "step": 8530
+    },
+    {
+      "epoch": 1.291884123742531,
+      "grad_norm": 0.09750113636255264,
+      "learning_rate": 0.00016586000000000001,
+      "loss": 0.032,
+      "step": 8540
+    },
+    {
+      "epoch": 1.2933968686181077,
+      "grad_norm": 0.08557499945163727,
+      "learning_rate": 0.00016582,
+      "loss": 0.0331,
+      "step": 8550
+    },
+    {
+      "epoch": 1.2949096134936844,
+      "grad_norm": 0.0842200294137001,
+      "learning_rate": 0.00016578,
+      "loss": 0.0339,
+      "step": 8560
+    },
+    {
+      "epoch": 1.2964223583692611,
+      "grad_norm": 0.06341574341058731,
+      "learning_rate": 0.00016574,
+      "loss": 0.0369,
+      "step": 8570
+    },
+    {
+      "epoch": 1.2979351032448379,
+      "grad_norm": 0.07687686383724213,
+      "learning_rate": 0.00016570000000000002,
+      "loss": 0.0291,
+      "step": 8580
+    },
+    {
+      "epoch": 1.2994478481204146,
+      "grad_norm": 0.07118263840675354,
+      "learning_rate": 0.00016566,
+      "loss": 0.0331,
+      "step": 8590
+    },
+    {
+      "epoch": 1.3009605929959913,
+      "grad_norm": 0.10967772454023361,
+      "learning_rate": 0.00016562,
+      "loss": 0.04,
+      "step": 8600
+    },
+    {
+      "epoch": 1.3009605929959913,
+      "eval_cer": 0.15955704130871465,
+      "eval_loss": 0.03786647692322731,
+      "eval_runtime": 10383.8112,
+      "eval_samples_per_second": 2.027,
+      "eval_steps_per_second": 0.253,
+      "step": 8600
+    },
+    {
+      "epoch": 1.302473337871568,
+      "grad_norm": 0.09102348983287811,
+      "learning_rate": 0.00016558,
+      "loss": 0.0337,
+      "step": 8610
+    },
+    {
+      "epoch": 1.3039860827471448,
+      "grad_norm": 0.0596625916659832,
+      "learning_rate": 0.00016554000000000002,
+      "loss": 0.0341,
+      "step": 8620
+    },
+    {
+      "epoch": 1.3054988276227215,
+      "grad_norm": 0.0790410116314888,
+      "learning_rate": 0.0001655,
+      "loss": 0.0348,
+      "step": 8630
+    },
+    {
+      "epoch": 1.3070115724982982,
+      "grad_norm": 0.08243832737207413,
+      "learning_rate": 0.00016546,
+      "loss": 0.0351,
+      "step": 8640
+    },
+    {
+      "epoch": 1.308524317373875,
+      "grad_norm": 0.07890262454748154,
+      "learning_rate": 0.00016542,
+      "loss": 0.0331,
+      "step": 8650
+    },
+    {
+      "epoch": 1.3100370622494517,
+      "grad_norm": 0.06424404680728912,
+      "learning_rate": 0.00016538,
+      "loss": 0.032,
+      "step": 8660
+    },
+    {
+      "epoch": 1.3115498071250284,
+      "grad_norm": 0.08828658610582352,
+      "learning_rate": 0.00016534,
+      "loss": 0.0351,
+      "step": 8670
+    },
+    {
+      "epoch": 1.3130625520006052,
+      "grad_norm": 0.07190482318401337,
+      "learning_rate": 0.0001653,
+      "loss": 0.0334,
+      "step": 8680
+    },
+    {
+      "epoch": 1.314575296876182,
+      "grad_norm": 0.1207108125090599,
+      "learning_rate": 0.00016526000000000003,
+      "loss": 0.0333,
+      "step": 8690
+    },
+    {
+      "epoch": 1.3160880417517586,
+      "grad_norm": 0.057197410613298416,
+      "learning_rate": 0.00016522,
+      "loss": 0.0273,
+      "step": 8700
+    },
+    {
+      "epoch": 1.3176007866273354,
+      "grad_norm": 0.0845530703663826,
+      "learning_rate": 0.00016518000000000001,
+      "loss": 0.0398,
+      "step": 8710
+    },
+    {
+      "epoch": 1.319113531502912,
+      "grad_norm": 0.07357069104909897,
+      "learning_rate": 0.00016514,
+      "loss": 0.0334,
+      "step": 8720
+    },
+    {
+      "epoch": 1.3206262763784888,
+      "grad_norm": 0.07419273257255554,
+      "learning_rate": 0.0001651,
+      "loss": 0.0267,
+      "step": 8730
+    },
+    {
+      "epoch": 1.3221390212540656,
+      "grad_norm": 0.08293847739696503,
+      "learning_rate": 0.00016506000000000002,
+      "loss": 0.0286,
+      "step": 8740
+    },
+    {
+      "epoch": 1.3236517661296423,
+      "grad_norm": 0.09437254071235657,
+      "learning_rate": 0.00016502,
+      "loss": 0.0411,
+      "step": 8750
+    },
+    {
+      "epoch": 1.325164511005219,
+      "grad_norm": 0.06988554447889328,
+      "learning_rate": 0.00016498,
+      "loss": 0.0288,
+      "step": 8760
+    },
+    {
+      "epoch": 1.3266772558807958,
+      "grad_norm": 0.11081293970346451,
+      "learning_rate": 0.00016494,
+      "loss": 0.0342,
+      "step": 8770
+    },
+    {
+      "epoch": 1.3281900007563725,
+      "grad_norm": 0.0911073237657547,
+      "learning_rate": 0.0001649,
+      "loss": 0.0324,
+      "step": 8780
+    },
+    {
+      "epoch": 1.3297027456319492,
+      "grad_norm": 0.08337673544883728,
+      "learning_rate": 0.00016486000000000002,
+      "loss": 0.0297,
+      "step": 8790
+    },
+    {
+      "epoch": 1.331215490507526,
+      "grad_norm": 0.09077824652194977,
+      "learning_rate": 0.00016482,
+      "loss": 0.0319,
+      "step": 8800
+    },
+    {
+      "epoch": 1.331215490507526,
+      "eval_cer": 0.050760007214632856,
+      "eval_loss": 0.03842457756400108,
+      "eval_runtime": 10378.6583,
+      "eval_samples_per_second": 2.028,
+      "eval_steps_per_second": 0.254,
+      "step": 8800
+    },
+    {
+      "epoch": 1.3327282353831027,
+      "grad_norm": 0.12336084991693497,
+      "learning_rate": 0.00016478,
+      "loss": 0.0371,
+      "step": 8810
+    },
+    {
+      "epoch": 1.3342409802586794,
+      "grad_norm": 0.07978357374668121,
+      "learning_rate": 0.00016474,
+      "loss": 0.0349,
+      "step": 8820
+    },
+    {
+      "epoch": 1.3357537251342562,
+      "grad_norm": 0.1073361411690712,
+      "learning_rate": 0.00016470000000000002,
+      "loss": 0.0417,
+      "step": 8830
+    },
+    {
+      "epoch": 1.337266470009833,
+      "grad_norm": 0.05822708085179329,
+      "learning_rate": 0.00016466,
+      "loss": 0.0302,
+      "step": 8840
+    },
+    {
+      "epoch": 1.3387792148854096,
+      "grad_norm": 0.06241593137383461,
+      "learning_rate": 0.00016462,
+      "loss": 0.0365,
+      "step": 8850
+    },
+    {
+      "epoch": 1.3402919597609864,
+      "grad_norm": 0.10107123106718063,
+      "learning_rate": 0.00016458,
+      "loss": 0.0345,
+      "step": 8860
+    },
+    {
+      "epoch": 1.341804704636563,
+      "grad_norm": 0.09659604728221893,
+      "learning_rate": 0.00016454,
+      "loss": 0.0324,
+      "step": 8870
+    },
+    {
+      "epoch": 1.3433174495121398,
+      "grad_norm": 0.07501540333032608,
+      "learning_rate": 0.00016450000000000001,
+      "loss": 0.0317,
+      "step": 8880
+    },
+    {
+      "epoch": 1.3448301943877166,
+      "grad_norm": 0.071120485663414,
+      "learning_rate": 0.00016446,
+      "loss": 0.0299,
+      "step": 8890
+    },
+    {
+      "epoch": 1.3463429392632933,
+      "grad_norm": 0.07235920429229736,
+      "learning_rate": 0.00016442000000000003,
+      "loss": 0.0337,
+      "step": 8900
+    },
+    {
+      "epoch": 1.34785568413887,
+      "grad_norm": 0.08588097244501114,
+      "learning_rate": 0.00016438,
+      "loss": 0.0302,
+      "step": 8910
+    },
+    {
+      "epoch": 1.3493684290144468,
+      "grad_norm": 0.052244190126657486,
+      "learning_rate": 0.00016434000000000002,
+      "loss": 0.0326,
+      "step": 8920
+    },
+    {
+      "epoch": 1.3508811738900235,
+      "grad_norm": 0.0702931210398674,
+      "learning_rate": 0.0001643,
+      "loss": 0.0372,
+      "step": 8930
+    },
+    {
+      "epoch": 1.3523939187656002,
+      "grad_norm": 0.10441485792398453,
+      "learning_rate": 0.00016426,
+      "loss": 0.037,
+      "step": 8940
+    },
+    {
+      "epoch": 1.353906663641177,
+      "grad_norm": 0.10514800250530243,
+      "learning_rate": 0.00016422000000000002,
+      "loss": 0.037,
+      "step": 8950
+    },
+    {
+      "epoch": 1.3554194085167537,
+      "grad_norm": 0.07011867314577103,
+      "learning_rate": 0.00016418,
+      "loss": 0.0314,
+      "step": 8960
+    },
+    {
+      "epoch": 1.3569321533923304,
+      "grad_norm": 0.06335943937301636,
+      "learning_rate": 0.00016414,
+      "loss": 0.0311,
+      "step": 8970
+    },
+    {
+      "epoch": 1.3584448982679072,
+      "grad_norm": 0.07194424420595169,
+      "learning_rate": 0.0001641,
+      "loss": 0.0336,
+      "step": 8980
+    },
+    {
+      "epoch": 1.3599576431434839,
+      "grad_norm": 0.07171431183815002,
+      "learning_rate": 0.00016406000000000003,
+      "loss": 0.0312,
+      "step": 8990
+    },
+    {
+      "epoch": 1.3614703880190606,
+      "grad_norm": 0.14893119037151337,
+      "learning_rate": 0.00016402000000000002,
+      "loss": 0.0348,
+      "step": 9000
+    },
+    {
+      "epoch": 1.3614703880190606,
+      "eval_cer": 0.23852391576669063,
+      "eval_loss": 0.03737874701619148,
+      "eval_runtime": 10378.6671,
+      "eval_samples_per_second": 2.028,
+      "eval_steps_per_second": 0.254,
+      "step": 9000
+    },
+    {
+      "epoch": 1.3629831328946374,
+      "grad_norm": 0.09854207932949066,
+      "learning_rate": 0.00016398,
+      "loss": 0.0334,
+      "step": 9010
+    },
+    {
+      "epoch": 1.364495877770214,
+      "grad_norm": 0.0829731673002243,
+      "learning_rate": 0.00016394,
+      "loss": 0.0367,
+      "step": 9020
+    },
+    {
+      "epoch": 1.3660086226457908,
+      "grad_norm": 0.05378841981291771,
+      "learning_rate": 0.0001639,
+      "loss": 0.0328,
+      "step": 9030
+    },
+    {
+      "epoch": 1.3675213675213675,
+      "grad_norm": 0.08590775728225708,
+      "learning_rate": 0.00016386000000000002,
+      "loss": 0.0337,
+      "step": 9040
+    },
+    {
+      "epoch": 1.3690341123969443,
+      "grad_norm": 0.06473217159509659,
+      "learning_rate": 0.00016382000000000001,
+      "loss": 0.0309,
+      "step": 9050
+    },
+    {
+      "epoch": 1.370546857272521,
+      "grad_norm": 0.14496292173862457,
+      "learning_rate": 0.00016378,
+      "loss": 0.0362,
+      "step": 9060
+    },
+    {
+      "epoch": 1.3720596021480977,
+      "grad_norm": 0.0658840760588646,
+      "learning_rate": 0.00016374,
+      "loss": 0.0316,
+      "step": 9070
+    },
+    {
+      "epoch": 1.3735723470236745,
+      "grad_norm": 0.0722692534327507,
+      "learning_rate": 0.00016370000000000002,
+      "loss": 0.0321,
+      "step": 9080
+    },
+    {
+      "epoch": 1.3750850918992512,
+      "grad_norm": 0.0751873180270195,
+      "learning_rate": 0.00016366000000000002,
+      "loss": 0.0357,
+      "step": 9090
+    },
+    {
+      "epoch": 1.376597836774828,
+      "grad_norm": 0.07309116423130035,
+      "learning_rate": 0.00016362,
+      "loss": 0.0329,
+      "step": 9100
+    },
+    {
+      "epoch": 1.3781105816504047,
+      "grad_norm": 0.09205902367830276,
+      "learning_rate": 0.00016358,
+      "loss": 0.0311,
+      "step": 9110
+    },
+    {
+      "epoch": 1.3796233265259814,
+      "grad_norm": 0.06787604093551636,
+      "learning_rate": 0.00016354,
+      "loss": 0.0308,
+      "step": 9120
+    },
+    {
+      "epoch": 1.3811360714015581,
+      "grad_norm": 0.08365906029939651,
+      "learning_rate": 0.00016350000000000002,
+      "loss": 0.0344,
+      "step": 9130
+    },
+    {
+      "epoch": 1.3826488162771349,
+      "grad_norm": 0.07461418211460114,
+      "learning_rate": 0.00016346,
+      "loss": 0.0286,
+      "step": 9140
+    },
+    {
+      "epoch": 1.3841615611527116,
+      "grad_norm": 0.11862760782241821,
+      "learning_rate": 0.00016342,
+      "loss": 0.0361,
+      "step": 9150
+    },
+    {
+      "epoch": 1.3856743060282883,
+      "grad_norm": 0.07170487195253372,
+      "learning_rate": 0.00016338,
+      "loss": 0.0335,
+      "step": 9160
+    },
+    {
+      "epoch": 1.387187050903865,
+      "grad_norm": 0.05578533932566643,
+      "learning_rate": 0.00016334,
+      "loss": 0.0311,
+      "step": 9170
+    },
+    {
+      "epoch": 1.3886997957794418,
+      "grad_norm": 0.08838359266519547,
+      "learning_rate": 0.0001633,
+      "loss": 0.0341,
+      "step": 9180
+    },
+    {
+      "epoch": 1.3902125406550185,
+      "grad_norm": 0.09284081310033798,
+      "learning_rate": 0.00016326,
+      "loss": 0.0322,
+      "step": 9190
+    },
+    {
+      "epoch": 1.3917252855305953,
+      "grad_norm": 0.07425800710916519,
+      "learning_rate": 0.00016322000000000003,
+      "loss": 0.0319,
+      "step": 9200
+    },
+    {
+      "epoch": 1.3917252855305953,
+      "eval_cer": 0.11210909414354649,
+      "eval_loss": 0.036687206476926804,
+      "eval_runtime": 10439.2076,
+      "eval_samples_per_second": 2.017,
+      "eval_steps_per_second": 0.252,
+      "step": 9200
+    },
+    {
+      "epoch": 1.393238030406172,
+      "grad_norm": 0.0754477009177208,
+      "learning_rate": 0.00016318,
+      "loss": 0.0355,
+      "step": 9210
+    },
+    {
+      "epoch": 1.3947507752817487,
+      "grad_norm": 0.06408898532390594,
+      "learning_rate": 0.00016314,
+      "loss": 0.0345,
+      "step": 9220
+    },
+    {
+      "epoch": 1.3962635201573255,
+      "grad_norm": 0.06003674492239952,
+      "learning_rate": 0.0001631,
+      "loss": 0.0316,
+      "step": 9230
+    },
+    {
+      "epoch": 1.3977762650329022,
+      "grad_norm": 0.07409165799617767,
+      "learning_rate": 0.00016306,
+      "loss": 0.03,
+      "step": 9240
+    },
+    {
+      "epoch": 1.399289009908479,
+      "grad_norm": 0.07411226630210876,
+      "learning_rate": 0.00016302000000000002,
+      "loss": 0.0325,
+      "step": 9250
+    },
+    {
+      "epoch": 1.4008017547840557,
+      "grad_norm": 0.09041300415992737,
+      "learning_rate": 0.00016298,
+      "loss": 0.034,
+      "step": 9260
+    },
+    {
+      "epoch": 1.4023144996596324,
+      "grad_norm": 0.0684356689453125,
+      "learning_rate": 0.00016294,
+      "loss": 0.0345,
+      "step": 9270
+    },
+    {
+      "epoch": 1.4038272445352091,
+      "grad_norm": 0.08621818572282791,
+      "learning_rate": 0.0001629,
+      "loss": 0.0287,
+      "step": 9280
+    },
+    {
+      "epoch": 1.4053399894107859,
+      "grad_norm": 0.09592179954051971,
+      "learning_rate": 0.00016286000000000002,
+      "loss": 0.0371,
+      "step": 9290
+    },
+    {
+      "epoch": 1.4068527342863626,
+      "grad_norm": 0.061489395797252655,
+      "learning_rate": 0.00016282000000000002,
+      "loss": 0.0297,
+      "step": 9300
+    },
+    {
+      "epoch": 1.4083654791619393,
+      "grad_norm": 0.08933687955141068,
+      "learning_rate": 0.00016278,
+      "loss": 0.0329,
+      "step": 9310
+    },
+    {
+      "epoch": 1.409878224037516,
+      "grad_norm": 0.06542832404375076,
+      "learning_rate": 0.00016274,
+      "loss": 0.0359,
+      "step": 9320
+    },
+    {
+      "epoch": 1.4113909689130928,
+      "grad_norm": 0.10515543818473816,
+      "learning_rate": 0.0001627,
+      "loss": 0.0282,
+      "step": 9330
+    },
+    {
+      "epoch": 1.4129037137886695,
+      "grad_norm": 0.11535684019327164,
+      "learning_rate": 0.00016266000000000002,
+      "loss": 0.0346,
+      "step": 9340
+    },
+    {
+      "epoch": 1.4144164586642463,
+      "grad_norm": 0.10359009355306625,
+      "learning_rate": 0.00016262,
+      "loss": 0.0326,
+      "step": 9350
+    },
+    {
+      "epoch": 1.415929203539823,
+      "grad_norm": 0.08905740082263947,
+      "learning_rate": 0.00016258,
+      "loss": 0.0353,
+      "step": 9360
+    },
+    {
+      "epoch": 1.4174419484153997,
+      "grad_norm": 0.0570446141064167,
+      "learning_rate": 0.00016254,
+      "loss": 0.0282,
+      "step": 9370
+    },
+    {
+      "epoch": 1.4189546932909765,
+      "grad_norm": 0.0748140960931778,
+      "learning_rate": 0.00016250000000000002,
+      "loss": 0.0304,
+      "step": 9380
+    },
+    {
+      "epoch": 1.4204674381665532,
+      "grad_norm": 0.07355400919914246,
+      "learning_rate": 0.00016246,
+      "loss": 0.031,
+      "step": 9390
+    },
+    {
+      "epoch": 1.42198018304213,
+      "grad_norm": 0.09431416541337967,
+      "learning_rate": 0.00016242,
+      "loss": 0.0355,
+      "step": 9400
+    },
+    {
+      "epoch": 1.42198018304213,
+      "eval_cer": 0.09460805024547048,
+      "eval_loss": 0.03653513640165329,
+      "eval_runtime": 10519.6629,
+      "eval_samples_per_second": 2.001,
+      "eval_steps_per_second": 0.25,
+      "step": 9400
+    },
+    {
+      "epoch": 1.4234929279177067,
+      "grad_norm": 0.10641132295131683,
+      "learning_rate": 0.00016238,
+      "loss": 0.0299,
+      "step": 9410
+    },
+    {
+      "epoch": 1.4250056727932834,
+      "grad_norm": 0.051270656287670135,
+      "learning_rate": 0.00016234,
+      "loss": 0.0317,
+      "step": 9420
+    },
+    {
+      "epoch": 1.4265184176688601,
+      "grad_norm": 0.07362283766269684,
+      "learning_rate": 0.00016230000000000001,
+      "loss": 0.0269,
+      "step": 9430
+    },
+    {
+      "epoch": 1.4280311625444368,
+      "grad_norm": 0.060159552842378616,
+      "learning_rate": 0.00016226,
+      "loss": 0.0335,
+      "step": 9440
+    },
+    {
+      "epoch": 1.4295439074200136,
+      "grad_norm": 0.08667318522930145,
+      "learning_rate": 0.00016222000000000003,
+      "loss": 0.0361,
+      "step": 9450
+    },
+    {
+      "epoch": 1.4310566522955903,
+      "grad_norm": 0.06154588237404823,
+      "learning_rate": 0.00016218,
+      "loss": 0.0334,
+      "step": 9460
+    },
+    {
+      "epoch": 1.432569397171167,
+      "grad_norm": 0.10563425719738007,
+      "learning_rate": 0.00016214000000000002,
+      "loss": 0.0362,
+      "step": 9470
+    },
+    {
+      "epoch": 1.4340821420467438,
+      "grad_norm": 0.10325556248426437,
+      "learning_rate": 0.0001621,
+      "loss": 0.0343,
+      "step": 9480
+    },
+    {
+      "epoch": 1.4355948869223205,
+      "grad_norm": 0.08902329206466675,
+      "learning_rate": 0.00016206,
+      "loss": 0.032,
+      "step": 9490
+    },
+    {
+      "epoch": 1.4371076317978972,
+      "grad_norm": 0.07280543446540833,
+      "learning_rate": 0.00016202000000000002,
+      "loss": 0.0366,
+      "step": 9500
+    },
+    {
+      "epoch": 1.438620376673474,
+      "grad_norm": 0.09071139991283417,
+      "learning_rate": 0.00016198,
+      "loss": 0.0299,
+      "step": 9510
+    },
+    {
+      "epoch": 1.4401331215490507,
+      "grad_norm": 0.06658421456813812,
+      "learning_rate": 0.00016194,
+      "loss": 0.0281,
+      "step": 9520
+    },
+    {
+      "epoch": 1.4416458664246274,
+      "grad_norm": 0.0793207511305809,
+      "learning_rate": 0.0001619,
+      "loss": 0.0292,
+      "step": 9530
+    },
+    {
+      "epoch": 1.4431586113002042,
+      "grad_norm": 0.0829392522573471,
+      "learning_rate": 0.00016186,
+      "loss": 0.0337,
+      "step": 9540
+    },
+    {
+      "epoch": 1.444671356175781,
+      "grad_norm": 0.061817191541194916,
+      "learning_rate": 0.00016182000000000002,
+      "loss": 0.0298,
+      "step": 9550
+    },
+    {
+      "epoch": 1.4461841010513576,
+      "grad_norm": 0.09837779402732849,
+      "learning_rate": 0.00016177999999999999,
+      "loss": 0.037,
+      "step": 9560
+    },
+    {
+      "epoch": 1.4476968459269344,
+      "grad_norm": 0.05777046084403992,
+      "learning_rate": 0.00016174,
+      "loss": 0.0339,
+      "step": 9570
+    },
+    {
+      "epoch": 1.449209590802511,
+      "grad_norm": 0.07731931656599045,
+      "learning_rate": 0.0001617,
+      "loss": 0.0338,
+      "step": 9580
+    },
+    {
+      "epoch": 1.4507223356780878,
+      "grad_norm": 0.08898504078388214,
+      "learning_rate": 0.00016166000000000002,
+      "loss": 0.0358,
+      "step": 9590
+    },
+    {
+      "epoch": 1.4522350805536646,
+      "grad_norm": 0.0696534812450409,
+      "learning_rate": 0.00016162000000000001,
+      "loss": 0.0318,
+      "step": 9600
+    },
+    {
+      "epoch": 1.4522350805536646,
+      "eval_cer": 0.08453906649568975,
+      "eval_loss": 0.036363635212183,
+      "eval_runtime": 10514.0599,
+      "eval_samples_per_second": 2.002,
+      "eval_steps_per_second": 0.25,
+      "step": 9600
+    },
+    {
+      "epoch": 1.4537478254292413,
+      "grad_norm": 0.059242941439151764,
+      "learning_rate": 0.00016158,
+      "loss": 0.0313,
+      "step": 9610
+    },
+    {
+      "epoch": 1.455260570304818,
+      "grad_norm": 0.0844852551817894,
+      "learning_rate": 0.00016154,
+      "loss": 0.034,
+      "step": 9620
+    },
+    {
+      "epoch": 1.4567733151803948,
+      "grad_norm": 0.08737514168024063,
+      "learning_rate": 0.0001615,
+      "loss": 0.0314,
+      "step": 9630
+    },
+    {
+      "epoch": 1.4582860600559715,
+      "grad_norm": 0.08028477430343628,
+      "learning_rate": 0.00016146000000000002,
+      "loss": 0.028,
+      "step": 9640
+    },
+    {
+      "epoch": 1.4597988049315482,
+      "grad_norm": 0.08293917775154114,
+      "learning_rate": 0.00016142,
+      "loss": 0.0344,
+      "step": 9650
+    },
+    {
+      "epoch": 1.461311549807125,
+      "grad_norm": 0.07055462896823883,
+      "learning_rate": 0.00016138,
+      "loss": 0.0329,
+      "step": 9660
+    },
+    {
+      "epoch": 1.4628242946827017,
+      "grad_norm": 0.08431320637464523,
+      "learning_rate": 0.00016134,
+      "loss": 0.0313,
+      "step": 9670
+    },
+    {
+      "epoch": 1.4643370395582784,
+      "grad_norm": 0.09756868332624435,
+      "learning_rate": 0.00016130000000000002,
+      "loss": 0.0305,
+      "step": 9680
+    },
+    {
+      "epoch": 1.4658497844338552,
+      "grad_norm": 0.07265082001686096,
+      "learning_rate": 0.00016126,
+      "loss": 0.0333,
+      "step": 9690
+    },
+    {
+      "epoch": 1.467362529309432,
+      "grad_norm": 0.09156455099582672,
+      "learning_rate": 0.00016122,
+      "loss": 0.0356,
+      "step": 9700
+    },
+    {
+      "epoch": 1.4688752741850086,
+      "grad_norm": 0.06957582384347916,
+      "learning_rate": 0.00016118,
+      "loss": 0.0313,
+      "step": 9710
+    },
+    {
+      "epoch": 1.4703880190605854,
+      "grad_norm": 0.06783420592546463,
+      "learning_rate": 0.00016114,
+      "loss": 0.0297,
+      "step": 9720
+    },
+    {
+      "epoch": 1.471900763936162,
+      "grad_norm": 0.07193417102098465,
+      "learning_rate": 0.0001611,
+      "loss": 0.0302,
+      "step": 9730
+    },
+    {
+      "epoch": 1.4734135088117388,
+      "grad_norm": 0.08238872140645981,
+      "learning_rate": 0.00016106,
+      "loss": 0.0335,
+      "step": 9740
+    },
+    {
+      "epoch": 1.4749262536873156,
+      "grad_norm": 0.07197025418281555,
+      "learning_rate": 0.00016102000000000003,
+      "loss": 0.0369,
+      "step": 9750
+    },
+    {
+      "epoch": 1.4764389985628923,
+      "grad_norm": 0.08109525591135025,
+      "learning_rate": 0.00016098,
+      "loss": 0.0327,
+      "step": 9760
+    },
+    {
+      "epoch": 1.477951743438469,
+      "grad_norm": 0.12331151217222214,
+      "learning_rate": 0.00016094000000000001,
+      "loss": 0.0372,
+      "step": 9770
+    },
+    {
+      "epoch": 1.4794644883140458,
+      "grad_norm": 0.08190298080444336,
+      "learning_rate": 0.0001609,
+      "loss": 0.0293,
+      "step": 9780
+    },
+    {
+      "epoch": 1.4809772331896225,
+      "grad_norm": 0.05840008333325386,
+      "learning_rate": 0.00016086,
+      "loss": 0.0349,
+      "step": 9790
+    },
+    {
+      "epoch": 1.4824899780651992,
+      "grad_norm": 0.07874023169279099,
+      "learning_rate": 0.00016082000000000002,
+      "loss": 0.0322,
+      "step": 9800
+    },
+    {
+      "epoch": 1.4824899780651992,
+      "eval_cer": 0.24973192203254985,
+      "eval_loss": 0.036100711673498154,
+      "eval_runtime": 10381.657,
+      "eval_samples_per_second": 2.028,
+      "eval_steps_per_second": 0.254,
+      "step": 9800
+    },
+    {
+      "epoch": 1.484002722940776,
+      "grad_norm": 0.0776941329240799,
+      "learning_rate": 0.00016078,
+      "loss": 0.0358,
+      "step": 9810
+    },
+    {
+      "epoch": 1.4855154678163527,
+      "grad_norm": 0.12248267233371735,
+      "learning_rate": 0.00016074,
+      "loss": 0.0356,
+      "step": 9820
+    },
+    {
+      "epoch": 1.4870282126919294,
+      "grad_norm": 0.08847146481275558,
+      "learning_rate": 0.0001607,
+      "loss": 0.0274,
+      "step": 9830
+    },
+    {
+      "epoch": 1.4885409575675062,
+      "grad_norm": 0.0689850002527237,
+      "learning_rate": 0.00016066000000000002,
+      "loss": 0.0266,
+      "step": 9840
+    },
+    {
+      "epoch": 1.4900537024430829,
+      "grad_norm": 0.06342552602291107,
+      "learning_rate": 0.00016062000000000002,
+      "loss": 0.031,
+      "step": 9850
+    },
+    {
+      "epoch": 1.4915664473186596,
+      "grad_norm": 0.11846140772104263,
+      "learning_rate": 0.00016057999999999998,
+      "loss": 0.0348,
+      "step": 9860
+    },
+    {
+      "epoch": 1.4930791921942363,
+      "grad_norm": 0.07698410004377365,
+      "learning_rate": 0.00016054,
+      "loss": 0.0259,
+      "step": 9870
+    },
+    {
+      "epoch": 1.494591937069813,
+      "grad_norm": 0.11177106946706772,
+      "learning_rate": 0.0001605,
+      "loss": 0.0301,
+      "step": 9880
+    },
+    {
+      "epoch": 1.4961046819453898,
+      "grad_norm": 0.09459209442138672,
+      "learning_rate": 0.00016046000000000002,
+      "loss": 0.0349,
+      "step": 9890
+    },
+    {
+      "epoch": 1.4976174268209665,
+      "grad_norm": 0.08800119906663895,
+      "learning_rate": 0.00016042,
+      "loss": 0.0335,
+      "step": 9900
+    },
+    {
+      "epoch": 1.4991301716965433,
+      "grad_norm": 0.09330447763204575,
+      "learning_rate": 0.00016038,
+      "loss": 0.0326,
+      "step": 9910
+    },
+    {
+      "epoch": 1.50064291657212,
+      "grad_norm": 0.10210063308477402,
+      "learning_rate": 0.00016034,
+      "loss": 0.035,
+      "step": 9920
+    },
+    {
+      "epoch": 1.5021556614476967,
+      "grad_norm": 0.11886809766292572,
+      "learning_rate": 0.0001603,
+      "loss": 0.036,
+      "step": 9930
+    },
+    {
+      "epoch": 1.5036684063232735,
+      "grad_norm": 0.07646410167217255,
+      "learning_rate": 0.00016026000000000001,
+      "loss": 0.0269,
+      "step": 9940
+    },
+    {
+      "epoch": 1.5051811511988502,
+      "grad_norm": 0.09994587302207947,
+      "learning_rate": 0.00016022,
+      "loss": 0.0298,
+      "step": 9950
+    },
+    {
+      "epoch": 1.506693896074427,
+      "grad_norm": 0.0781632736325264,
+      "learning_rate": 0.00016018,
+      "loss": 0.0299,
+      "step": 9960
+    },
+    {
+      "epoch": 1.5082066409500037,
+      "grad_norm": 0.09286709874868393,
+      "learning_rate": 0.00016014,
+      "loss": 0.0334,
+      "step": 9970
+    },
+    {
+      "epoch": 1.5097193858255804,
+      "grad_norm": 0.08658807724714279,
+      "learning_rate": 0.00016010000000000002,
+      "loss": 0.032,
+      "step": 9980
+    },
+    {
+      "epoch": 1.5112321307011571,
+      "grad_norm": 0.09535326808691025,
+      "learning_rate": 0.00016006,
+      "loss": 0.032,
+      "step": 9990
+    },
+    {
+      "epoch": 1.5127448755767339,
+      "grad_norm": 0.056372299790382385,
+      "learning_rate": 0.00016002,
+      "loss": 0.033,
+      "step": 10000
+    },
+    {
+      "epoch": 1.5127448755767339,
+      "eval_cer": 0.1808933296766016,
+      "eval_loss": 0.03580623120069504,
+      "eval_runtime": 10388.4948,
+      "eval_samples_per_second": 2.026,
+      "eval_steps_per_second": 0.253,
+      "step": 10000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 50000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 10000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.622822387689695e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}