{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.6899028305872412, "eval_steps": 100, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0008449514152936206, "grad_norm": 1.3914175033569336, "learning_rate": 4.99991550485847e-05, "loss": 0.0856, "step": 2 }, { "epoch": 0.0016899028305872412, "grad_norm": 1.3959804773330688, "learning_rate": 4.999831009716942e-05, "loss": 0.0982, "step": 4 }, { "epoch": 0.0025348542458808617, "grad_norm": 1.4670644998550415, "learning_rate": 4.999746514575412e-05, "loss": 0.0918, "step": 6 }, { "epoch": 0.0033798056611744824, "grad_norm": 2.241302728652954, "learning_rate": 4.999662019433883e-05, "loss": 0.1214, "step": 8 }, { "epoch": 0.004224757076468103, "grad_norm": 2.0347797870635986, "learning_rate": 4.999577524292353e-05, "loss": 0.1111, "step": 10 }, { "epoch": 0.005069708491761723, "grad_norm": 1.1951134204864502, "learning_rate": 4.999493029150824e-05, "loss": 0.0817, "step": 12 }, { "epoch": 0.005914659907055344, "grad_norm": 1.2443560361862183, "learning_rate": 4.9994085340092946e-05, "loss": 0.0903, "step": 14 }, { "epoch": 0.006759611322348965, "grad_norm": 1.4616291522979736, "learning_rate": 4.999324038867765e-05, "loss": 0.0968, "step": 16 }, { "epoch": 0.0076045627376425855, "grad_norm": 1.999237298965454, "learning_rate": 4.999239543726236e-05, "loss": 0.1221, "step": 18 }, { "epoch": 0.008449514152936205, "grad_norm": 1.4877630472183228, "learning_rate": 4.999155048584707e-05, "loss": 0.1137, "step": 20 }, { "epoch": 0.009294465568229826, "grad_norm": 1.4436724185943604, "learning_rate": 4.999070553443177e-05, "loss": 0.1129, "step": 22 }, { "epoch": 0.010139416983523447, "grad_norm": 2.1934313774108887, "learning_rate": 4.998986058301648e-05, "loss": 0.1052, "step": 24 }, { "epoch": 0.010984368398817067, "grad_norm": 1.8377941846847534, "learning_rate": 4.998901563160118e-05, "loss": 0.1381, "step": 26 }, { "epoch": 0.011829319814110688, "grad_norm": 2.0095016956329346, "learning_rate": 4.998817068018589e-05, "loss": 0.0852, "step": 28 }, { "epoch": 0.012674271229404309, "grad_norm": 3.2389862537384033, "learning_rate": 4.99873257287706e-05, "loss": 0.1161, "step": 30 }, { "epoch": 0.01351922264469793, "grad_norm": 2.737915277481079, "learning_rate": 4.9986480777355304e-05, "loss": 0.1713, "step": 32 }, { "epoch": 0.01436417405999155, "grad_norm": 1.6804777383804321, "learning_rate": 4.998563582594001e-05, "loss": 0.1234, "step": 34 }, { "epoch": 0.015209125475285171, "grad_norm": 1.8924976587295532, "learning_rate": 4.998479087452472e-05, "loss": 0.0826, "step": 36 }, { "epoch": 0.01605407689057879, "grad_norm": 1.8899160623550415, "learning_rate": 4.998394592310942e-05, "loss": 0.1116, "step": 38 }, { "epoch": 0.01689902830587241, "grad_norm": 1.7245818376541138, "learning_rate": 4.998310097169413e-05, "loss": 0.0863, "step": 40 }, { "epoch": 0.017743979721166033, "grad_norm": 1.1223626136779785, "learning_rate": 4.998225602027883e-05, "loss": 0.0823, "step": 42 }, { "epoch": 0.018588931136459652, "grad_norm": 2.4463398456573486, "learning_rate": 4.998141106886354e-05, "loss": 0.1465, "step": 44 }, { "epoch": 0.019433882551753275, "grad_norm": 1.4499152898788452, "learning_rate": 4.998056611744825e-05, "loss": 0.1043, "step": 46 }, { "epoch": 0.020278833967046894, "grad_norm": 2.1130013465881348, "learning_rate": 4.9979721166032955e-05, "loss": 0.153, "step": 48 }, { "epoch": 0.021123785382340516, "grad_norm": 1.4567383527755737, "learning_rate": 4.997887621461766e-05, "loss": 0.1481, "step": 50 }, { "epoch": 0.021968736797634135, "grad_norm": 1.4559706449508667, "learning_rate": 4.997803126320237e-05, "loss": 0.0989, "step": 52 }, { "epoch": 0.022813688212927757, "grad_norm": 1.9101276397705078, "learning_rate": 4.997718631178707e-05, "loss": 0.1458, "step": 54 }, { "epoch": 0.023658639628221376, "grad_norm": 1.50411856174469, "learning_rate": 4.9976341360371784e-05, "loss": 0.0991, "step": 56 }, { "epoch": 0.024503591043515, "grad_norm": 1.0242832899093628, "learning_rate": 4.9975496408956484e-05, "loss": 0.0766, "step": 58 }, { "epoch": 0.025348542458808618, "grad_norm": 2.2057089805603027, "learning_rate": 4.997465145754119e-05, "loss": 0.1176, "step": 60 }, { "epoch": 0.02619349387410224, "grad_norm": 1.3259694576263428, "learning_rate": 4.99738065061259e-05, "loss": 0.1631, "step": 62 }, { "epoch": 0.02703844528939586, "grad_norm": 1.2836776971817017, "learning_rate": 4.9972961554710606e-05, "loss": 0.08, "step": 64 }, { "epoch": 0.02788339670468948, "grad_norm": 1.250314712524414, "learning_rate": 4.997211660329531e-05, "loss": 0.0944, "step": 66 }, { "epoch": 0.0287283481199831, "grad_norm": 1.753688097000122, "learning_rate": 4.997127165188002e-05, "loss": 0.133, "step": 68 }, { "epoch": 0.029573299535276723, "grad_norm": 1.5159249305725098, "learning_rate": 4.997042670046472e-05, "loss": 0.0975, "step": 70 }, { "epoch": 0.030418250950570342, "grad_norm": 1.78278386592865, "learning_rate": 4.9969581749049434e-05, "loss": 0.1093, "step": 72 }, { "epoch": 0.03126320236586396, "grad_norm": 1.725652813911438, "learning_rate": 4.9968736797634135e-05, "loss": 0.1074, "step": 74 }, { "epoch": 0.03210815378115758, "grad_norm": 1.6478575468063354, "learning_rate": 4.996789184621884e-05, "loss": 0.1279, "step": 76 }, { "epoch": 0.032953105196451206, "grad_norm": 1.2018022537231445, "learning_rate": 4.996704689480355e-05, "loss": 0.0749, "step": 78 }, { "epoch": 0.03379805661174482, "grad_norm": 0.8992096185684204, "learning_rate": 4.9966201943388256e-05, "loss": 0.0656, "step": 80 }, { "epoch": 0.034643008027038444, "grad_norm": 2.0089211463928223, "learning_rate": 4.9965356991972964e-05, "loss": 0.1403, "step": 82 }, { "epoch": 0.035487959442332066, "grad_norm": 1.9890964031219482, "learning_rate": 4.996451204055767e-05, "loss": 0.0917, "step": 84 }, { "epoch": 0.03633291085762569, "grad_norm": 1.6564279794692993, "learning_rate": 4.996366708914237e-05, "loss": 0.127, "step": 86 }, { "epoch": 0.037177862272919304, "grad_norm": 2.2462258338928223, "learning_rate": 4.9962822137727085e-05, "loss": 0.1866, "step": 88 }, { "epoch": 0.03802281368821293, "grad_norm": 2.442894697189331, "learning_rate": 4.9961977186311786e-05, "loss": 0.096, "step": 90 }, { "epoch": 0.03886776510350655, "grad_norm": 1.7473067045211792, "learning_rate": 4.996113223489649e-05, "loss": 0.1065, "step": 92 }, { "epoch": 0.03971271651880017, "grad_norm": 1.4837480783462524, "learning_rate": 4.99602872834812e-05, "loss": 0.0878, "step": 94 }, { "epoch": 0.04055766793409379, "grad_norm": 0.9949487447738647, "learning_rate": 4.995944233206591e-05, "loss": 0.0795, "step": 96 }, { "epoch": 0.04140261934938741, "grad_norm": 1.6431764364242554, "learning_rate": 4.9958597380650614e-05, "loss": 0.1184, "step": 98 }, { "epoch": 0.04224757076468103, "grad_norm": 1.0905040502548218, "learning_rate": 4.995775242923532e-05, "loss": 0.086, "step": 100 }, { "epoch": 0.04224757076468103, "eval_accuracy": 0.6362303845490602, "eval_cer": 0.11421438464320686, "eval_loss": 0.22910405695438385, "eval_runtime": 887.866, "eval_samples_per_second": 13.063, "eval_steps_per_second": 0.409, "step": 100 }, { "epoch": 0.043092522179974654, "grad_norm": 1.3749598264694214, "learning_rate": 4.995690747782002e-05, "loss": 0.1197, "step": 102 }, { "epoch": 0.04393747359526827, "grad_norm": 1.572817325592041, "learning_rate": 4.9956062526404736e-05, "loss": 0.0924, "step": 104 }, { "epoch": 0.04478242501056189, "grad_norm": 1.5053339004516602, "learning_rate": 4.9955217574989436e-05, "loss": 0.0926, "step": 106 }, { "epoch": 0.045627376425855515, "grad_norm": 1.816696047782898, "learning_rate": 4.995437262357415e-05, "loss": 0.143, "step": 108 }, { "epoch": 0.04647232784114914, "grad_norm": 1.8459056615829468, "learning_rate": 4.995352767215885e-05, "loss": 0.1229, "step": 110 }, { "epoch": 0.04731727925644275, "grad_norm": 1.3267372846603394, "learning_rate": 4.995268272074356e-05, "loss": 0.0874, "step": 112 }, { "epoch": 0.048162230671736375, "grad_norm": 1.7917542457580566, "learning_rate": 4.9951837769328265e-05, "loss": 0.1485, "step": 114 }, { "epoch": 0.04900718208703, "grad_norm": 1.7345539331436157, "learning_rate": 4.995099281791297e-05, "loss": 0.09, "step": 116 }, { "epoch": 0.04985213350232361, "grad_norm": 1.5779085159301758, "learning_rate": 4.995014786649768e-05, "loss": 0.1262, "step": 118 }, { "epoch": 0.050697084917617236, "grad_norm": 1.391013503074646, "learning_rate": 4.994930291508239e-05, "loss": 0.1145, "step": 120 }, { "epoch": 0.05154203633291086, "grad_norm": 1.6034536361694336, "learning_rate": 4.994845796366709e-05, "loss": 0.0881, "step": 122 }, { "epoch": 0.05238698774820448, "grad_norm": 1.5294889211654663, "learning_rate": 4.99476130122518e-05, "loss": 0.0793, "step": 124 }, { "epoch": 0.053231939163498096, "grad_norm": 1.197351336479187, "learning_rate": 4.99467680608365e-05, "loss": 0.091, "step": 126 }, { "epoch": 0.05407689057879172, "grad_norm": 1.323849081993103, "learning_rate": 4.994592310942121e-05, "loss": 0.0888, "step": 128 }, { "epoch": 0.05492184199408534, "grad_norm": 1.7171475887298584, "learning_rate": 4.9945078158005916e-05, "loss": 0.1436, "step": 130 }, { "epoch": 0.05576679340937896, "grad_norm": 1.7138563394546509, "learning_rate": 4.994423320659062e-05, "loss": 0.09, "step": 132 }, { "epoch": 0.05661174482467258, "grad_norm": 1.6496665477752686, "learning_rate": 4.994338825517533e-05, "loss": 0.1415, "step": 134 }, { "epoch": 0.0574566962399662, "grad_norm": 1.3270729780197144, "learning_rate": 4.994254330376004e-05, "loss": 0.1073, "step": 136 }, { "epoch": 0.058301647655259824, "grad_norm": 1.2084906101226807, "learning_rate": 4.994169835234474e-05, "loss": 0.0861, "step": 138 }, { "epoch": 0.059146599070553446, "grad_norm": 1.885323405265808, "learning_rate": 4.994085340092945e-05, "loss": 0.138, "step": 140 }, { "epoch": 0.05999155048584706, "grad_norm": 1.3070194721221924, "learning_rate": 4.994000844951415e-05, "loss": 0.0725, "step": 142 }, { "epoch": 0.060836501901140684, "grad_norm": 1.494089961051941, "learning_rate": 4.993916349809886e-05, "loss": 0.1076, "step": 144 }, { "epoch": 0.061681453316434307, "grad_norm": 1.5598512887954712, "learning_rate": 4.993831854668357e-05, "loss": 0.1068, "step": 146 }, { "epoch": 0.06252640473172792, "grad_norm": 2.4986348152160645, "learning_rate": 4.9937473595268274e-05, "loss": 0.1439, "step": 148 }, { "epoch": 0.06337135614702155, "grad_norm": 1.6942882537841797, "learning_rate": 4.993662864385298e-05, "loss": 0.0694, "step": 150 }, { "epoch": 0.06421630756231517, "grad_norm": 1.8440594673156738, "learning_rate": 4.993578369243769e-05, "loss": 0.1387, "step": 152 }, { "epoch": 0.06506125897760878, "grad_norm": 1.647815227508545, "learning_rate": 4.993493874102239e-05, "loss": 0.0997, "step": 154 }, { "epoch": 0.06590621039290241, "grad_norm": 1.5283992290496826, "learning_rate": 4.99340937896071e-05, "loss": 0.0803, "step": 156 }, { "epoch": 0.06675116180819603, "grad_norm": 1.7466899156570435, "learning_rate": 4.99332488381918e-05, "loss": 0.1309, "step": 158 }, { "epoch": 0.06759611322348964, "grad_norm": 1.4426119327545166, "learning_rate": 4.993240388677651e-05, "loss": 0.107, "step": 160 }, { "epoch": 0.06844106463878327, "grad_norm": 1.6664427518844604, "learning_rate": 4.993155893536122e-05, "loss": 0.1763, "step": 162 }, { "epoch": 0.06928601605407689, "grad_norm": 1.8783625364303589, "learning_rate": 4.9930713983945925e-05, "loss": 0.0942, "step": 164 }, { "epoch": 0.07013096746937052, "grad_norm": 1.3738420009613037, "learning_rate": 4.992986903253063e-05, "loss": 0.1128, "step": 166 }, { "epoch": 0.07097591888466413, "grad_norm": 1.2888435125350952, "learning_rate": 4.992902408111534e-05, "loss": 0.08, "step": 168 }, { "epoch": 0.07182087029995775, "grad_norm": 0.921699047088623, "learning_rate": 4.992817912970004e-05, "loss": 0.1028, "step": 170 }, { "epoch": 0.07266582171525138, "grad_norm": 1.8012775182724, "learning_rate": 4.9927334178284754e-05, "loss": 0.1012, "step": 172 }, { "epoch": 0.07351077313054499, "grad_norm": 1.061102271080017, "learning_rate": 4.9926489226869454e-05, "loss": 0.1341, "step": 174 }, { "epoch": 0.07435572454583861, "grad_norm": 1.1755284070968628, "learning_rate": 4.992564427545416e-05, "loss": 0.0944, "step": 176 }, { "epoch": 0.07520067596113224, "grad_norm": 1.4010897874832153, "learning_rate": 4.992479932403887e-05, "loss": 0.0736, "step": 178 }, { "epoch": 0.07604562737642585, "grad_norm": 1.391518235206604, "learning_rate": 4.9923954372623576e-05, "loss": 0.0622, "step": 180 }, { "epoch": 0.07689057879171948, "grad_norm": 1.2992569208145142, "learning_rate": 4.992310942120828e-05, "loss": 0.0982, "step": 182 }, { "epoch": 0.0777355302070131, "grad_norm": 1.4223698377609253, "learning_rate": 4.992226446979299e-05, "loss": 0.098, "step": 184 }, { "epoch": 0.07858048162230671, "grad_norm": 1.5545382499694824, "learning_rate": 4.992141951837769e-05, "loss": 0.171, "step": 186 }, { "epoch": 0.07942543303760034, "grad_norm": 2.143627882003784, "learning_rate": 4.9920574566962404e-05, "loss": 0.1158, "step": 188 }, { "epoch": 0.08027038445289396, "grad_norm": 1.5633560419082642, "learning_rate": 4.9919729615547105e-05, "loss": 0.0865, "step": 190 }, { "epoch": 0.08111533586818757, "grad_norm": 1.6810214519500732, "learning_rate": 4.991888466413181e-05, "loss": 0.0905, "step": 192 }, { "epoch": 0.0819602872834812, "grad_norm": 1.0320245027542114, "learning_rate": 4.991803971271652e-05, "loss": 0.0748, "step": 194 }, { "epoch": 0.08280523869877482, "grad_norm": 1.9010602235794067, "learning_rate": 4.9917194761301226e-05, "loss": 0.1433, "step": 196 }, { "epoch": 0.08365019011406843, "grad_norm": 2.0801427364349365, "learning_rate": 4.9916349809885934e-05, "loss": 0.1286, "step": 198 }, { "epoch": 0.08449514152936206, "grad_norm": 1.6263489723205566, "learning_rate": 4.991550485847064e-05, "loss": 0.097, "step": 200 }, { "epoch": 0.08449514152936206, "eval_accuracy": 0.6549405069839628, "eval_cer": 0.10543680584348562, "eval_loss": 0.21902646124362946, "eval_runtime": 857.1192, "eval_samples_per_second": 13.531, "eval_steps_per_second": 0.424, "step": 200 }, { "epoch": 0.08534009294465568, "grad_norm": 1.3326414823532104, "learning_rate": 4.991465990705534e-05, "loss": 0.1085, "step": 202 }, { "epoch": 0.08618504435994931, "grad_norm": 1.6693871021270752, "learning_rate": 4.9913814955640055e-05, "loss": 0.1202, "step": 204 }, { "epoch": 0.08702999577524292, "grad_norm": 1.5326752662658691, "learning_rate": 4.9912970004224756e-05, "loss": 0.1062, "step": 206 }, { "epoch": 0.08787494719053654, "grad_norm": 2.1729543209075928, "learning_rate": 4.991212505280947e-05, "loss": 0.1443, "step": 208 }, { "epoch": 0.08871989860583017, "grad_norm": 1.343558669090271, "learning_rate": 4.991128010139417e-05, "loss": 0.1014, "step": 210 }, { "epoch": 0.08956485002112378, "grad_norm": 1.2149333953857422, "learning_rate": 4.991043514997888e-05, "loss": 0.09, "step": 212 }, { "epoch": 0.0904098014364174, "grad_norm": 1.063955307006836, "learning_rate": 4.9909590198563584e-05, "loss": 0.1024, "step": 214 }, { "epoch": 0.09125475285171103, "grad_norm": 1.5514568090438843, "learning_rate": 4.990874524714829e-05, "loss": 0.0901, "step": 216 }, { "epoch": 0.09209970426700465, "grad_norm": 1.8420741558074951, "learning_rate": 4.9907900295733e-05, "loss": 0.1084, "step": 218 }, { "epoch": 0.09294465568229827, "grad_norm": 1.7071248292922974, "learning_rate": 4.9907055344317706e-05, "loss": 0.0838, "step": 220 }, { "epoch": 0.09378960709759189, "grad_norm": 1.5390372276306152, "learning_rate": 4.9906210392902406e-05, "loss": 0.1133, "step": 222 }, { "epoch": 0.0946345585128855, "grad_norm": 1.734567403793335, "learning_rate": 4.990536544148712e-05, "loss": 0.0862, "step": 224 }, { "epoch": 0.09547950992817913, "grad_norm": 0.9405034780502319, "learning_rate": 4.990452049007182e-05, "loss": 0.0941, "step": 226 }, { "epoch": 0.09632446134347275, "grad_norm": 1.2817813158035278, "learning_rate": 4.990367553865653e-05, "loss": 0.073, "step": 228 }, { "epoch": 0.09716941275876637, "grad_norm": 2.075343132019043, "learning_rate": 4.9902830587241235e-05, "loss": 0.132, "step": 230 }, { "epoch": 0.09801436417406, "grad_norm": 1.2112518548965454, "learning_rate": 4.990198563582594e-05, "loss": 0.0918, "step": 232 }, { "epoch": 0.09885931558935361, "grad_norm": 0.846361517906189, "learning_rate": 4.990114068441065e-05, "loss": 0.063, "step": 234 }, { "epoch": 0.09970426700464723, "grad_norm": 1.6504607200622559, "learning_rate": 4.990029573299536e-05, "loss": 0.0915, "step": 236 }, { "epoch": 0.10054921841994086, "grad_norm": 1.5142329931259155, "learning_rate": 4.989945078158006e-05, "loss": 0.1099, "step": 238 }, { "epoch": 0.10139416983523447, "grad_norm": 2.2613418102264404, "learning_rate": 4.989860583016477e-05, "loss": 0.1245, "step": 240 }, { "epoch": 0.1022391212505281, "grad_norm": 2.134218454360962, "learning_rate": 4.989776087874947e-05, "loss": 0.1116, "step": 242 }, { "epoch": 0.10308407266582172, "grad_norm": 1.6032145023345947, "learning_rate": 4.989691592733418e-05, "loss": 0.085, "step": 244 }, { "epoch": 0.10392902408111533, "grad_norm": 1.2227692604064941, "learning_rate": 4.9896070975918886e-05, "loss": 0.0886, "step": 246 }, { "epoch": 0.10477397549640896, "grad_norm": 2.2352206707000732, "learning_rate": 4.989522602450359e-05, "loss": 0.1255, "step": 248 }, { "epoch": 0.10561892691170258, "grad_norm": 0.9914106726646423, "learning_rate": 4.98943810730883e-05, "loss": 0.0814, "step": 250 }, { "epoch": 0.10646387832699619, "grad_norm": 2.0763142108917236, "learning_rate": 4.989353612167301e-05, "loss": 0.0949, "step": 252 }, { "epoch": 0.10730882974228982, "grad_norm": 1.0693895816802979, "learning_rate": 4.989269117025771e-05, "loss": 0.0659, "step": 254 }, { "epoch": 0.10815378115758344, "grad_norm": 1.8804770708084106, "learning_rate": 4.989184621884242e-05, "loss": 0.1019, "step": 256 }, { "epoch": 0.10899873257287707, "grad_norm": 1.8545231819152832, "learning_rate": 4.989100126742712e-05, "loss": 0.1008, "step": 258 }, { "epoch": 0.10984368398817068, "grad_norm": 1.1952356100082397, "learning_rate": 4.989015631601183e-05, "loss": 0.1363, "step": 260 }, { "epoch": 0.1106886354034643, "grad_norm": 1.715725064277649, "learning_rate": 4.988931136459654e-05, "loss": 0.1249, "step": 262 }, { "epoch": 0.11153358681875793, "grad_norm": 1.5759917497634888, "learning_rate": 4.9888466413181244e-05, "loss": 0.0784, "step": 264 }, { "epoch": 0.11237853823405154, "grad_norm": 1.252929925918579, "learning_rate": 4.988762146176595e-05, "loss": 0.073, "step": 266 }, { "epoch": 0.11322348964934516, "grad_norm": 1.0508784055709839, "learning_rate": 4.988677651035066e-05, "loss": 0.0835, "step": 268 }, { "epoch": 0.11406844106463879, "grad_norm": 1.2772610187530518, "learning_rate": 4.988593155893536e-05, "loss": 0.0727, "step": 270 }, { "epoch": 0.1149133924799324, "grad_norm": 1.5705053806304932, "learning_rate": 4.988508660752007e-05, "loss": 0.1377, "step": 272 }, { "epoch": 0.11575834389522602, "grad_norm": 2.1583268642425537, "learning_rate": 4.988424165610477e-05, "loss": 0.1126, "step": 274 }, { "epoch": 0.11660329531051965, "grad_norm": 1.3954452276229858, "learning_rate": 4.988339670468948e-05, "loss": 0.0855, "step": 276 }, { "epoch": 0.11744824672581326, "grad_norm": 2.0817723274230957, "learning_rate": 4.988255175327419e-05, "loss": 0.1132, "step": 278 }, { "epoch": 0.11829319814110689, "grad_norm": 1.833322525024414, "learning_rate": 4.9881706801858895e-05, "loss": 0.1117, "step": 280 }, { "epoch": 0.11913814955640051, "grad_norm": 1.5989530086517334, "learning_rate": 4.98808618504436e-05, "loss": 0.1265, "step": 282 }, { "epoch": 0.11998310097169412, "grad_norm": 1.8421043157577515, "learning_rate": 4.988001689902831e-05, "loss": 0.1372, "step": 284 }, { "epoch": 0.12082805238698775, "grad_norm": 1.897548794746399, "learning_rate": 4.987917194761301e-05, "loss": 0.1103, "step": 286 }, { "epoch": 0.12167300380228137, "grad_norm": 1.4291915893554688, "learning_rate": 4.9878326996197724e-05, "loss": 0.0847, "step": 288 }, { "epoch": 0.12251795521757498, "grad_norm": 1.1558469533920288, "learning_rate": 4.9877482044782424e-05, "loss": 0.0889, "step": 290 }, { "epoch": 0.12336290663286861, "grad_norm": 1.7630038261413574, "learning_rate": 4.987663709336713e-05, "loss": 0.126, "step": 292 }, { "epoch": 0.12420785804816223, "grad_norm": 1.0064209699630737, "learning_rate": 4.987579214195184e-05, "loss": 0.086, "step": 294 }, { "epoch": 0.12505280946345584, "grad_norm": 1.59539794921875, "learning_rate": 4.9874947190536546e-05, "loss": 0.1184, "step": 296 }, { "epoch": 0.12589776087874946, "grad_norm": 1.7932195663452148, "learning_rate": 4.987410223912125e-05, "loss": 0.1376, "step": 298 }, { "epoch": 0.1267427122940431, "grad_norm": 1.8148212432861328, "learning_rate": 4.987325728770596e-05, "loss": 0.081, "step": 300 }, { "epoch": 0.1267427122940431, "eval_accuracy": 0.6697706501120883, "eval_cer": 0.096782730665067, "eval_loss": 0.2164023518562317, "eval_runtime": 849.0391, "eval_samples_per_second": 13.66, "eval_steps_per_second": 0.428, "step": 300 }, { "epoch": 0.12758766370933672, "grad_norm": 1.354212760925293, "learning_rate": 4.987241233629066e-05, "loss": 0.1006, "step": 302 }, { "epoch": 0.12843261512463033, "grad_norm": 2.113490104675293, "learning_rate": 4.9871567384875374e-05, "loss": 0.1404, "step": 304 }, { "epoch": 0.12927756653992395, "grad_norm": 1.6982043981552124, "learning_rate": 4.9870722433460075e-05, "loss": 0.2001, "step": 306 }, { "epoch": 0.13012251795521756, "grad_norm": 1.6756244897842407, "learning_rate": 4.986987748204479e-05, "loss": 0.085, "step": 308 }, { "epoch": 0.1309674693705112, "grad_norm": 1.2098358869552612, "learning_rate": 4.986903253062949e-05, "loss": 0.0603, "step": 310 }, { "epoch": 0.13181242078580482, "grad_norm": 1.7302310466766357, "learning_rate": 4.9868187579214196e-05, "loss": 0.0816, "step": 312 }, { "epoch": 0.13265737220109844, "grad_norm": 1.3803563117980957, "learning_rate": 4.9867342627798904e-05, "loss": 0.0795, "step": 314 }, { "epoch": 0.13350232361639205, "grad_norm": 1.4253759384155273, "learning_rate": 4.986649767638361e-05, "loss": 0.1219, "step": 316 }, { "epoch": 0.13434727503168567, "grad_norm": 1.8579727411270142, "learning_rate": 4.986565272496832e-05, "loss": 0.117, "step": 318 }, { "epoch": 0.13519222644697929, "grad_norm": 1.3731677532196045, "learning_rate": 4.9864807773553025e-05, "loss": 0.0697, "step": 320 }, { "epoch": 0.13603717786227293, "grad_norm": 1.7941423654556274, "learning_rate": 4.9863962822137726e-05, "loss": 0.1499, "step": 322 }, { "epoch": 0.13688212927756654, "grad_norm": 1.8242301940917969, "learning_rate": 4.986311787072244e-05, "loss": 0.1148, "step": 324 }, { "epoch": 0.13772708069286016, "grad_norm": 1.8088881969451904, "learning_rate": 4.986227291930714e-05, "loss": 0.1117, "step": 326 }, { "epoch": 0.13857203210815378, "grad_norm": 1.3591686487197876, "learning_rate": 4.986142796789185e-05, "loss": 0.1057, "step": 328 }, { "epoch": 0.1394169835234474, "grad_norm": 1.6867038011550903, "learning_rate": 4.9860583016476554e-05, "loss": 0.1065, "step": 330 }, { "epoch": 0.14026193493874103, "grad_norm": 1.2499358654022217, "learning_rate": 4.985973806506126e-05, "loss": 0.1251, "step": 332 }, { "epoch": 0.14110688635403465, "grad_norm": 1.7259184122085571, "learning_rate": 4.985889311364597e-05, "loss": 0.1066, "step": 334 }, { "epoch": 0.14195183776932827, "grad_norm": 2.0314226150512695, "learning_rate": 4.9858048162230676e-05, "loss": 0.117, "step": 336 }, { "epoch": 0.14279678918462188, "grad_norm": 1.4193428754806519, "learning_rate": 4.9857203210815376e-05, "loss": 0.129, "step": 338 }, { "epoch": 0.1436417405999155, "grad_norm": 1.639891266822815, "learning_rate": 4.985635825940009e-05, "loss": 0.1269, "step": 340 }, { "epoch": 0.1444866920152091, "grad_norm": 1.1235016584396362, "learning_rate": 4.985551330798479e-05, "loss": 0.0722, "step": 342 }, { "epoch": 0.14533164343050275, "grad_norm": 1.941998839378357, "learning_rate": 4.98546683565695e-05, "loss": 0.085, "step": 344 }, { "epoch": 0.14617659484579637, "grad_norm": 1.3687087297439575, "learning_rate": 4.9853823405154205e-05, "loss": 0.0776, "step": 346 }, { "epoch": 0.14702154626108999, "grad_norm": 1.7349680662155151, "learning_rate": 4.985297845373891e-05, "loss": 0.1329, "step": 348 }, { "epoch": 0.1478664976763836, "grad_norm": 1.4035015106201172, "learning_rate": 4.985213350232362e-05, "loss": 0.1047, "step": 350 }, { "epoch": 0.14871144909167722, "grad_norm": 1.2989908456802368, "learning_rate": 4.985128855090833e-05, "loss": 0.0821, "step": 352 }, { "epoch": 0.14955640050697086, "grad_norm": 1.9474072456359863, "learning_rate": 4.985044359949303e-05, "loss": 0.1334, "step": 354 }, { "epoch": 0.15040135192226448, "grad_norm": 1.348775863647461, "learning_rate": 4.984959864807774e-05, "loss": 0.0856, "step": 356 }, { "epoch": 0.1512463033375581, "grad_norm": 1.348597764968872, "learning_rate": 4.984875369666244e-05, "loss": 0.093, "step": 358 }, { "epoch": 0.1520912547528517, "grad_norm": 1.7595020532608032, "learning_rate": 4.984790874524715e-05, "loss": 0.1192, "step": 360 }, { "epoch": 0.15293620616814532, "grad_norm": 1.1924686431884766, "learning_rate": 4.9847063793831856e-05, "loss": 0.0749, "step": 362 }, { "epoch": 0.15378115758343897, "grad_norm": 1.7564600706100464, "learning_rate": 4.984621884241656e-05, "loss": 0.0952, "step": 364 }, { "epoch": 0.15462610899873258, "grad_norm": 1.3915026187896729, "learning_rate": 4.984537389100127e-05, "loss": 0.1268, "step": 366 }, { "epoch": 0.1554710604140262, "grad_norm": 1.8819456100463867, "learning_rate": 4.984452893958598e-05, "loss": 0.0854, "step": 368 }, { "epoch": 0.1563160118293198, "grad_norm": 1.6254719495773315, "learning_rate": 4.984368398817068e-05, "loss": 0.084, "step": 370 }, { "epoch": 0.15716096324461343, "grad_norm": 2.196483850479126, "learning_rate": 4.984283903675539e-05, "loss": 0.1089, "step": 372 }, { "epoch": 0.15800591465990704, "grad_norm": 1.471126675605774, "learning_rate": 4.984199408534009e-05, "loss": 0.0832, "step": 374 }, { "epoch": 0.1588508660752007, "grad_norm": 1.1505743265151978, "learning_rate": 4.98411491339248e-05, "loss": 0.076, "step": 376 }, { "epoch": 0.1596958174904943, "grad_norm": 1.4914796352386475, "learning_rate": 4.984030418250951e-05, "loss": 0.1304, "step": 378 }, { "epoch": 0.16054076890578792, "grad_norm": 1.9942247867584229, "learning_rate": 4.9839459231094214e-05, "loss": 0.1227, "step": 380 }, { "epoch": 0.16138572032108153, "grad_norm": 1.6885210275650024, "learning_rate": 4.983861427967892e-05, "loss": 0.0926, "step": 382 }, { "epoch": 0.16223067173637515, "grad_norm": 1.2002671957015991, "learning_rate": 4.983776932826363e-05, "loss": 0.1011, "step": 384 }, { "epoch": 0.1630756231516688, "grad_norm": 1.6890459060668945, "learning_rate": 4.983692437684833e-05, "loss": 0.1243, "step": 386 }, { "epoch": 0.1639205745669624, "grad_norm": 1.4184560775756836, "learning_rate": 4.983607942543304e-05, "loss": 0.0848, "step": 388 }, { "epoch": 0.16476552598225602, "grad_norm": 2.200415849685669, "learning_rate": 4.983523447401774e-05, "loss": 0.0779, "step": 390 }, { "epoch": 0.16561047739754964, "grad_norm": 1.5030500888824463, "learning_rate": 4.983438952260245e-05, "loss": 0.0828, "step": 392 }, { "epoch": 0.16645542881284325, "grad_norm": 1.2970515489578247, "learning_rate": 4.983354457118716e-05, "loss": 0.0889, "step": 394 }, { "epoch": 0.16730038022813687, "grad_norm": 1.576575517654419, "learning_rate": 4.9832699619771865e-05, "loss": 0.0942, "step": 396 }, { "epoch": 0.1681453316434305, "grad_norm": 1.6719400882720947, "learning_rate": 4.983185466835657e-05, "loss": 0.1166, "step": 398 }, { "epoch": 0.16899028305872413, "grad_norm": 1.6408090591430664, "learning_rate": 4.983100971694128e-05, "loss": 0.1293, "step": 400 }, { "epoch": 0.16899028305872413, "eval_accuracy": 0.6733919641317468, "eval_cer": 0.09762961263971347, "eval_loss": 0.20648193359375, "eval_runtime": 861.8594, "eval_samples_per_second": 13.457, "eval_steps_per_second": 0.421, "step": 400 }, { "epoch": 0.16983523447401774, "grad_norm": 2.1278653144836426, "learning_rate": 4.983016476552598e-05, "loss": 0.1321, "step": 402 }, { "epoch": 0.17068018588931136, "grad_norm": 1.8531532287597656, "learning_rate": 4.9829319814110694e-05, "loss": 0.1042, "step": 404 }, { "epoch": 0.17152513730460497, "grad_norm": 1.587566614151001, "learning_rate": 4.9828474862695394e-05, "loss": 0.1147, "step": 406 }, { "epoch": 0.17237008871989862, "grad_norm": 2.2086760997772217, "learning_rate": 4.982762991128011e-05, "loss": 0.1246, "step": 408 }, { "epoch": 0.17321504013519223, "grad_norm": 1.2952370643615723, "learning_rate": 4.982678495986481e-05, "loss": 0.1073, "step": 410 }, { "epoch": 0.17405999155048585, "grad_norm": 1.7142947912216187, "learning_rate": 4.9825940008449516e-05, "loss": 0.0869, "step": 412 }, { "epoch": 0.17490494296577946, "grad_norm": 1.9402838945388794, "learning_rate": 4.982509505703422e-05, "loss": 0.1126, "step": 414 }, { "epoch": 0.17574989438107308, "grad_norm": 2.2303125858306885, "learning_rate": 4.982425010561893e-05, "loss": 0.1515, "step": 416 }, { "epoch": 0.1765948457963667, "grad_norm": 1.8753235340118408, "learning_rate": 4.982340515420364e-05, "loss": 0.1059, "step": 418 }, { "epoch": 0.17743979721166034, "grad_norm": 1.825201392173767, "learning_rate": 4.9822560202788344e-05, "loss": 0.1604, "step": 420 }, { "epoch": 0.17828474862695395, "grad_norm": 2.0175940990448, "learning_rate": 4.9821715251373045e-05, "loss": 0.1078, "step": 422 }, { "epoch": 0.17912970004224757, "grad_norm": 1.2101908922195435, "learning_rate": 4.982087029995776e-05, "loss": 0.1017, "step": 424 }, { "epoch": 0.17997465145754118, "grad_norm": 1.1782002449035645, "learning_rate": 4.982002534854246e-05, "loss": 0.0722, "step": 426 }, { "epoch": 0.1808196028728348, "grad_norm": 1.684949517250061, "learning_rate": 4.9819180397127167e-05, "loss": 0.1096, "step": 428 }, { "epoch": 0.18166455428812844, "grad_norm": 1.9654364585876465, "learning_rate": 4.9818335445711874e-05, "loss": 0.0896, "step": 430 }, { "epoch": 0.18250950570342206, "grad_norm": 1.5413470268249512, "learning_rate": 4.981749049429658e-05, "loss": 0.0781, "step": 432 }, { "epoch": 0.18335445711871567, "grad_norm": 2.4538686275482178, "learning_rate": 4.981664554288129e-05, "loss": 0.1153, "step": 434 }, { "epoch": 0.1841994085340093, "grad_norm": 1.3073904514312744, "learning_rate": 4.9815800591465995e-05, "loss": 0.0847, "step": 436 }, { "epoch": 0.1850443599493029, "grad_norm": 1.651445746421814, "learning_rate": 4.9814955640050696e-05, "loss": 0.1313, "step": 438 }, { "epoch": 0.18588931136459655, "grad_norm": 1.7214906215667725, "learning_rate": 4.981411068863541e-05, "loss": 0.1008, "step": 440 }, { "epoch": 0.18673426277989016, "grad_norm": 1.0988144874572754, "learning_rate": 4.981326573722011e-05, "loss": 0.1162, "step": 442 }, { "epoch": 0.18757921419518378, "grad_norm": 2.5338993072509766, "learning_rate": 4.981242078580482e-05, "loss": 0.1522, "step": 444 }, { "epoch": 0.1884241656104774, "grad_norm": 1.2699946165084839, "learning_rate": 4.9811575834389525e-05, "loss": 0.0943, "step": 446 }, { "epoch": 0.189269117025771, "grad_norm": 2.2291219234466553, "learning_rate": 4.981073088297423e-05, "loss": 0.0993, "step": 448 }, { "epoch": 0.19011406844106463, "grad_norm": 1.102462649345398, "learning_rate": 4.980988593155894e-05, "loss": 0.0959, "step": 450 }, { "epoch": 0.19095901985635827, "grad_norm": 1.1297458410263062, "learning_rate": 4.9809040980143646e-05, "loss": 0.0748, "step": 452 }, { "epoch": 0.19180397127165189, "grad_norm": 1.316767930984497, "learning_rate": 4.9808196028728347e-05, "loss": 0.0729, "step": 454 }, { "epoch": 0.1926489226869455, "grad_norm": 1.3369216918945312, "learning_rate": 4.980735107731306e-05, "loss": 0.0896, "step": 456 }, { "epoch": 0.19349387410223912, "grad_norm": 1.6894314289093018, "learning_rate": 4.980650612589776e-05, "loss": 0.1607, "step": 458 }, { "epoch": 0.19433882551753273, "grad_norm": 1.727819800376892, "learning_rate": 4.980566117448247e-05, "loss": 0.1024, "step": 460 }, { "epoch": 0.19518377693282637, "grad_norm": 2.435275077819824, "learning_rate": 4.9804816223067175e-05, "loss": 0.1779, "step": 462 }, { "epoch": 0.19602872834812, "grad_norm": 1.8471691608428955, "learning_rate": 4.980397127165188e-05, "loss": 0.0943, "step": 464 }, { "epoch": 0.1968736797634136, "grad_norm": 1.471346139907837, "learning_rate": 4.980312632023659e-05, "loss": 0.1148, "step": 466 }, { "epoch": 0.19771863117870722, "grad_norm": 2.621778726577759, "learning_rate": 4.98022813688213e-05, "loss": 0.1145, "step": 468 }, { "epoch": 0.19856358259400084, "grad_norm": 1.4826730489730835, "learning_rate": 4.9801436417406e-05, "loss": 0.1011, "step": 470 }, { "epoch": 0.19940853400929445, "grad_norm": 1.8840138912200928, "learning_rate": 4.980059146599071e-05, "loss": 0.0823, "step": 472 }, { "epoch": 0.2002534854245881, "grad_norm": 1.329613208770752, "learning_rate": 4.979974651457541e-05, "loss": 0.0794, "step": 474 }, { "epoch": 0.2010984368398817, "grad_norm": 1.6798653602600098, "learning_rate": 4.979890156316012e-05, "loss": 0.068, "step": 476 }, { "epoch": 0.20194338825517533, "grad_norm": 2.3119256496429443, "learning_rate": 4.9798056611744826e-05, "loss": 0.1738, "step": 478 }, { "epoch": 0.20278833967046894, "grad_norm": 1.5852372646331787, "learning_rate": 4.979721166032953e-05, "loss": 0.1466, "step": 480 }, { "epoch": 0.20363329108576256, "grad_norm": 1.3242956399917603, "learning_rate": 4.979636670891424e-05, "loss": 0.0747, "step": 482 }, { "epoch": 0.2044782425010562, "grad_norm": 1.8334121704101562, "learning_rate": 4.979552175749895e-05, "loss": 0.0832, "step": 484 }, { "epoch": 0.20532319391634982, "grad_norm": 1.5589213371276855, "learning_rate": 4.979467680608365e-05, "loss": 0.0939, "step": 486 }, { "epoch": 0.20616814533164343, "grad_norm": 0.6316360831260681, "learning_rate": 4.979383185466836e-05, "loss": 0.0623, "step": 488 }, { "epoch": 0.20701309674693705, "grad_norm": 1.5444834232330322, "learning_rate": 4.979298690325306e-05, "loss": 0.0954, "step": 490 }, { "epoch": 0.20785804816223066, "grad_norm": 2.2183780670166016, "learning_rate": 4.979214195183777e-05, "loss": 0.1144, "step": 492 }, { "epoch": 0.20870299957752428, "grad_norm": 1.4162744283676147, "learning_rate": 4.979129700042248e-05, "loss": 0.0713, "step": 494 }, { "epoch": 0.20954795099281792, "grad_norm": 1.2704912424087524, "learning_rate": 4.9790452049007184e-05, "loss": 0.0556, "step": 496 }, { "epoch": 0.21039290240811154, "grad_norm": 1.9707940816879272, "learning_rate": 4.978960709759189e-05, "loss": 0.0829, "step": 498 }, { "epoch": 0.21123785382340515, "grad_norm": 1.1645714044570923, "learning_rate": 4.97887621461766e-05, "loss": 0.0997, "step": 500 }, { "epoch": 0.21123785382340515, "eval_accuracy": 0.6679599931022591, "eval_cer": 0.10002028988064257, "eval_loss": 0.21607132256031036, "eval_runtime": 889.5025, "eval_samples_per_second": 13.039, "eval_steps_per_second": 0.408, "step": 500 }, { "epoch": 0.21208280523869877, "grad_norm": 1.2636017799377441, "learning_rate": 4.97879171947613e-05, "loss": 0.0853, "step": 502 }, { "epoch": 0.21292775665399238, "grad_norm": 1.3952999114990234, "learning_rate": 4.978707224334601e-05, "loss": 0.0827, "step": 504 }, { "epoch": 0.21377270806928603, "grad_norm": 1.4890190362930298, "learning_rate": 4.978622729193071e-05, "loss": 0.0793, "step": 506 }, { "epoch": 0.21461765948457964, "grad_norm": 3.3428499698638916, "learning_rate": 4.978538234051543e-05, "loss": 0.0804, "step": 508 }, { "epoch": 0.21546261089987326, "grad_norm": 1.8637378215789795, "learning_rate": 4.978453738910013e-05, "loss": 0.1163, "step": 510 }, { "epoch": 0.21630756231516687, "grad_norm": 1.4218493700027466, "learning_rate": 4.9783692437684835e-05, "loss": 0.0926, "step": 512 }, { "epoch": 0.2171525137304605, "grad_norm": 1.2058361768722534, "learning_rate": 4.978284748626954e-05, "loss": 0.0877, "step": 514 }, { "epoch": 0.21799746514575413, "grad_norm": 1.6888997554779053, "learning_rate": 4.978200253485425e-05, "loss": 0.1208, "step": 516 }, { "epoch": 0.21884241656104775, "grad_norm": 1.4230191707611084, "learning_rate": 4.9781157583438957e-05, "loss": 0.1062, "step": 518 }, { "epoch": 0.21968736797634136, "grad_norm": 1.6900297403335571, "learning_rate": 4.9780312632023664e-05, "loss": 0.1051, "step": 520 }, { "epoch": 0.22053231939163498, "grad_norm": 1.9866880178451538, "learning_rate": 4.9779467680608364e-05, "loss": 0.144, "step": 522 }, { "epoch": 0.2213772708069286, "grad_norm": 1.5090681314468384, "learning_rate": 4.977862272919308e-05, "loss": 0.1688, "step": 524 }, { "epoch": 0.2222222222222222, "grad_norm": 1.5939511060714722, "learning_rate": 4.977777777777778e-05, "loss": 0.1095, "step": 526 }, { "epoch": 0.22306717363751585, "grad_norm": 1.6898002624511719, "learning_rate": 4.9776932826362486e-05, "loss": 0.0933, "step": 528 }, { "epoch": 0.22391212505280947, "grad_norm": 1.6217966079711914, "learning_rate": 4.977608787494719e-05, "loss": 0.0906, "step": 530 }, { "epoch": 0.22475707646810308, "grad_norm": 1.187659740447998, "learning_rate": 4.97752429235319e-05, "loss": 0.1091, "step": 532 }, { "epoch": 0.2256020278833967, "grad_norm": 1.255968689918518, "learning_rate": 4.977439797211661e-05, "loss": 0.0659, "step": 534 }, { "epoch": 0.22644697929869032, "grad_norm": 1.5511211156845093, "learning_rate": 4.9773553020701315e-05, "loss": 0.0802, "step": 536 }, { "epoch": 0.22729193071398396, "grad_norm": 1.7793858051300049, "learning_rate": 4.9772708069286015e-05, "loss": 0.1129, "step": 538 }, { "epoch": 0.22813688212927757, "grad_norm": 1.4437144994735718, "learning_rate": 4.977186311787073e-05, "loss": 0.1, "step": 540 }, { "epoch": 0.2289818335445712, "grad_norm": 1.4379544258117676, "learning_rate": 4.977101816645543e-05, "loss": 0.0864, "step": 542 }, { "epoch": 0.2298267849598648, "grad_norm": 0.8294060826301575, "learning_rate": 4.9770173215040137e-05, "loss": 0.0763, "step": 544 }, { "epoch": 0.23067173637515842, "grad_norm": 2.189450740814209, "learning_rate": 4.9769328263624844e-05, "loss": 0.123, "step": 546 }, { "epoch": 0.23151668779045204, "grad_norm": 1.3774739503860474, "learning_rate": 4.976848331220955e-05, "loss": 0.0839, "step": 548 }, { "epoch": 0.23236163920574568, "grad_norm": 1.9243252277374268, "learning_rate": 4.976763836079426e-05, "loss": 0.1049, "step": 550 }, { "epoch": 0.2332065906210393, "grad_norm": 1.1769413948059082, "learning_rate": 4.9766793409378965e-05, "loss": 0.0834, "step": 552 }, { "epoch": 0.2340515420363329, "grad_norm": 1.6378949880599976, "learning_rate": 4.9765948457963666e-05, "loss": 0.1071, "step": 554 }, { "epoch": 0.23489649345162653, "grad_norm": 1.901498556137085, "learning_rate": 4.976510350654838e-05, "loss": 0.1007, "step": 556 }, { "epoch": 0.23574144486692014, "grad_norm": 1.9260848760604858, "learning_rate": 4.976425855513308e-05, "loss": 0.2061, "step": 558 }, { "epoch": 0.23658639628221378, "grad_norm": 3.253631353378296, "learning_rate": 4.976341360371779e-05, "loss": 0.1726, "step": 560 }, { "epoch": 0.2374313476975074, "grad_norm": 1.5378361940383911, "learning_rate": 4.9762568652302495e-05, "loss": 0.0962, "step": 562 }, { "epoch": 0.23827629911280102, "grad_norm": 1.3367713689804077, "learning_rate": 4.97617237008872e-05, "loss": 0.1047, "step": 564 }, { "epoch": 0.23912125052809463, "grad_norm": 1.2774410247802734, "learning_rate": 4.976087874947191e-05, "loss": 0.0745, "step": 566 }, { "epoch": 0.23996620194338825, "grad_norm": 1.8468585014343262, "learning_rate": 4.9760033798056616e-05, "loss": 0.1207, "step": 568 }, { "epoch": 0.24081115335868186, "grad_norm": 2.125532388687134, "learning_rate": 4.9759188846641317e-05, "loss": 0.1321, "step": 570 }, { "epoch": 0.2416561047739755, "grad_norm": 1.7013835906982422, "learning_rate": 4.975834389522603e-05, "loss": 0.0941, "step": 572 }, { "epoch": 0.24250105618926912, "grad_norm": 1.4416042566299438, "learning_rate": 4.975749894381073e-05, "loss": 0.1195, "step": 574 }, { "epoch": 0.24334600760456274, "grad_norm": 1.7456398010253906, "learning_rate": 4.975665399239544e-05, "loss": 0.1035, "step": 576 }, { "epoch": 0.24419095901985635, "grad_norm": 1.6715686321258545, "learning_rate": 4.9755809040980145e-05, "loss": 0.083, "step": 578 }, { "epoch": 0.24503591043514997, "grad_norm": 1.738721251487732, "learning_rate": 4.975496408956485e-05, "loss": 0.0842, "step": 580 }, { "epoch": 0.2458808618504436, "grad_norm": 1.4637415409088135, "learning_rate": 4.975411913814956e-05, "loss": 0.1192, "step": 582 }, { "epoch": 0.24672581326573723, "grad_norm": 1.5448874235153198, "learning_rate": 4.975327418673427e-05, "loss": 0.1092, "step": 584 }, { "epoch": 0.24757076468103084, "grad_norm": 1.877724051475525, "learning_rate": 4.975242923531897e-05, "loss": 0.1203, "step": 586 }, { "epoch": 0.24841571609632446, "grad_norm": 1.81662917137146, "learning_rate": 4.975158428390368e-05, "loss": 0.1101, "step": 588 }, { "epoch": 0.24926066751161807, "grad_norm": 1.8778841495513916, "learning_rate": 4.975073933248838e-05, "loss": 0.105, "step": 590 }, { "epoch": 0.2501056189269117, "grad_norm": 1.7268378734588623, "learning_rate": 4.974989438107309e-05, "loss": 0.1164, "step": 592 }, { "epoch": 0.2509505703422053, "grad_norm": 1.972676396369934, "learning_rate": 4.9749049429657796e-05, "loss": 0.1177, "step": 594 }, { "epoch": 0.2517955217574989, "grad_norm": 1.2417385578155518, "learning_rate": 4.97482044782425e-05, "loss": 0.0752, "step": 596 }, { "epoch": 0.2526404731727926, "grad_norm": 1.4219918251037598, "learning_rate": 4.974735952682721e-05, "loss": 0.0786, "step": 598 }, { "epoch": 0.2534854245880862, "grad_norm": 1.9878331422805786, "learning_rate": 4.974651457541192e-05, "loss": 0.1277, "step": 600 }, { "epoch": 0.2534854245880862, "eval_accuracy": 0.6739092947059838, "eval_cer": 0.09566237638610761, "eval_loss": 0.20857658982276917, "eval_runtime": 850.0665, "eval_samples_per_second": 13.644, "eval_steps_per_second": 0.427, "step": 600 }, { "epoch": 0.2543303760033798, "grad_norm": 2.0730292797088623, "learning_rate": 4.974566962399662e-05, "loss": 0.1183, "step": 602 }, { "epoch": 0.25517532741867344, "grad_norm": 1.7861765623092651, "learning_rate": 4.974482467258133e-05, "loss": 0.1055, "step": 604 }, { "epoch": 0.25602027883396705, "grad_norm": 1.1870955228805542, "learning_rate": 4.974397972116603e-05, "loss": 0.1028, "step": 606 }, { "epoch": 0.25686523024926067, "grad_norm": 1.5035452842712402, "learning_rate": 4.9743134769750747e-05, "loss": 0.0945, "step": 608 }, { "epoch": 0.2577101816645543, "grad_norm": 1.4320887327194214, "learning_rate": 4.974228981833545e-05, "loss": 0.0896, "step": 610 }, { "epoch": 0.2585551330798479, "grad_norm": 1.3751976490020752, "learning_rate": 4.9741444866920154e-05, "loss": 0.0968, "step": 612 }, { "epoch": 0.2594000844951415, "grad_norm": 1.5161865949630737, "learning_rate": 4.974059991550486e-05, "loss": 0.1359, "step": 614 }, { "epoch": 0.26024503591043513, "grad_norm": 1.5288583040237427, "learning_rate": 4.973975496408957e-05, "loss": 0.0897, "step": 616 }, { "epoch": 0.26108998732572875, "grad_norm": 2.264397382736206, "learning_rate": 4.9738910012674276e-05, "loss": 0.1312, "step": 618 }, { "epoch": 0.2619349387410224, "grad_norm": 1.3001281023025513, "learning_rate": 4.973806506125898e-05, "loss": 0.0848, "step": 620 }, { "epoch": 0.26277989015631603, "grad_norm": 2.159705638885498, "learning_rate": 4.973722010984368e-05, "loss": 0.0851, "step": 622 }, { "epoch": 0.26362484157160965, "grad_norm": 1.870424747467041, "learning_rate": 4.97363751584284e-05, "loss": 0.0959, "step": 624 }, { "epoch": 0.26446979298690326, "grad_norm": 1.2810477018356323, "learning_rate": 4.97355302070131e-05, "loss": 0.0888, "step": 626 }, { "epoch": 0.2653147444021969, "grad_norm": 2.051835775375366, "learning_rate": 4.9734685255597805e-05, "loss": 0.0992, "step": 628 }, { "epoch": 0.2661596958174905, "grad_norm": 2.29409122467041, "learning_rate": 4.973384030418251e-05, "loss": 0.1117, "step": 630 }, { "epoch": 0.2670046472327841, "grad_norm": 1.814590573310852, "learning_rate": 4.973299535276722e-05, "loss": 0.0997, "step": 632 }, { "epoch": 0.2678495986480777, "grad_norm": 1.616007685661316, "learning_rate": 4.9732150401351927e-05, "loss": 0.1106, "step": 634 }, { "epoch": 0.26869455006337134, "grad_norm": 1.9944722652435303, "learning_rate": 4.9731305449936634e-05, "loss": 0.1214, "step": 636 }, { "epoch": 0.26953950147866496, "grad_norm": 1.2416284084320068, "learning_rate": 4.9730460498521334e-05, "loss": 0.1131, "step": 638 }, { "epoch": 0.27038445289395857, "grad_norm": 0.6368843913078308, "learning_rate": 4.972961554710605e-05, "loss": 0.043, "step": 640 }, { "epoch": 0.27122940430925224, "grad_norm": 1.266077995300293, "learning_rate": 4.972877059569075e-05, "loss": 0.1076, "step": 642 }, { "epoch": 0.27207435572454586, "grad_norm": 1.4216665029525757, "learning_rate": 4.9727925644275456e-05, "loss": 0.1004, "step": 644 }, { "epoch": 0.2729193071398395, "grad_norm": 1.3999009132385254, "learning_rate": 4.972708069286016e-05, "loss": 0.0958, "step": 646 }, { "epoch": 0.2737642585551331, "grad_norm": 0.8566420078277588, "learning_rate": 4.972623574144487e-05, "loss": 0.1033, "step": 648 }, { "epoch": 0.2746092099704267, "grad_norm": 1.770908236503601, "learning_rate": 4.972539079002958e-05, "loss": 0.1356, "step": 650 }, { "epoch": 0.2754541613857203, "grad_norm": 1.3873233795166016, "learning_rate": 4.9724545838614285e-05, "loss": 0.1131, "step": 652 }, { "epoch": 0.27629911280101394, "grad_norm": 1.4224236011505127, "learning_rate": 4.9723700887198985e-05, "loss": 0.0794, "step": 654 }, { "epoch": 0.27714406421630755, "grad_norm": 1.4874037504196167, "learning_rate": 4.97228559357837e-05, "loss": 0.0949, "step": 656 }, { "epoch": 0.27798901563160117, "grad_norm": 1.5964959859848022, "learning_rate": 4.97220109843684e-05, "loss": 0.0936, "step": 658 }, { "epoch": 0.2788339670468948, "grad_norm": 1.5574475526809692, "learning_rate": 4.9721166032953107e-05, "loss": 0.0992, "step": 660 }, { "epoch": 0.2796789184621884, "grad_norm": 3.1248722076416016, "learning_rate": 4.9720321081537814e-05, "loss": 0.0757, "step": 662 }, { "epoch": 0.28052386987748207, "grad_norm": 2.060107707977295, "learning_rate": 4.971947613012252e-05, "loss": 0.0923, "step": 664 }, { "epoch": 0.2813688212927757, "grad_norm": 1.112054467201233, "learning_rate": 4.971863117870723e-05, "loss": 0.0541, "step": 666 }, { "epoch": 0.2822137727080693, "grad_norm": 1.5512161254882812, "learning_rate": 4.9717786227291935e-05, "loss": 0.0921, "step": 668 }, { "epoch": 0.2830587241233629, "grad_norm": 1.544339895248413, "learning_rate": 4.9716941275876636e-05, "loss": 0.0877, "step": 670 }, { "epoch": 0.28390367553865653, "grad_norm": 1.6371303796768188, "learning_rate": 4.971609632446135e-05, "loss": 0.1396, "step": 672 }, { "epoch": 0.28474862695395015, "grad_norm": 1.3973644971847534, "learning_rate": 4.971525137304605e-05, "loss": 0.1194, "step": 674 }, { "epoch": 0.28559357836924376, "grad_norm": 1.4875513315200806, "learning_rate": 4.971440642163076e-05, "loss": 0.0841, "step": 676 }, { "epoch": 0.2864385297845374, "grad_norm": 1.3536832332611084, "learning_rate": 4.9713561470215465e-05, "loss": 0.0736, "step": 678 }, { "epoch": 0.287283481199831, "grad_norm": 1.8549797534942627, "learning_rate": 4.971271651880017e-05, "loss": 0.1192, "step": 680 }, { "epoch": 0.2881284326151246, "grad_norm": 2.0601296424865723, "learning_rate": 4.971187156738488e-05, "loss": 0.1393, "step": 682 }, { "epoch": 0.2889733840304182, "grad_norm": 1.577346920967102, "learning_rate": 4.9711026615969586e-05, "loss": 0.072, "step": 684 }, { "epoch": 0.2898183354457119, "grad_norm": 1.3023242950439453, "learning_rate": 4.9710181664554287e-05, "loss": 0.0936, "step": 686 }, { "epoch": 0.2906632868610055, "grad_norm": 1.5268125534057617, "learning_rate": 4.9709336713139e-05, "loss": 0.1015, "step": 688 }, { "epoch": 0.2915082382762991, "grad_norm": 1.6719399690628052, "learning_rate": 4.97084917617237e-05, "loss": 0.0853, "step": 690 }, { "epoch": 0.29235318969159274, "grad_norm": 1.622419834136963, "learning_rate": 4.970764681030841e-05, "loss": 0.0808, "step": 692 }, { "epoch": 0.29319814110688636, "grad_norm": 1.5559837818145752, "learning_rate": 4.9706801858893115e-05, "loss": 0.1144, "step": 694 }, { "epoch": 0.29404309252217997, "grad_norm": 1.5186535120010376, "learning_rate": 4.970595690747782e-05, "loss": 0.1343, "step": 696 }, { "epoch": 0.2948880439374736, "grad_norm": 1.011915683746338, "learning_rate": 4.970511195606253e-05, "loss": 0.064, "step": 698 }, { "epoch": 0.2957329953527672, "grad_norm": 1.7633938789367676, "learning_rate": 4.970426700464724e-05, "loss": 0.1294, "step": 700 }, { "epoch": 0.2957329953527672, "eval_accuracy": 0.6749439558544577, "eval_cer": 0.09162204363206507, "eval_loss": 0.20422282814979553, "eval_runtime": 857.9136, "eval_samples_per_second": 13.519, "eval_steps_per_second": 0.423, "step": 700 }, { "epoch": 0.2965779467680608, "grad_norm": 1.6819132566452026, "learning_rate": 4.970342205323194e-05, "loss": 0.0934, "step": 702 }, { "epoch": 0.29742289818335443, "grad_norm": 1.0647895336151123, "learning_rate": 4.970257710181665e-05, "loss": 0.1007, "step": 704 }, { "epoch": 0.2982678495986481, "grad_norm": 2.1595728397369385, "learning_rate": 4.970173215040135e-05, "loss": 0.1345, "step": 706 }, { "epoch": 0.2991128010139417, "grad_norm": 1.0478488206863403, "learning_rate": 4.9700887198986066e-05, "loss": 0.0608, "step": 708 }, { "epoch": 0.29995775242923534, "grad_norm": 1.2893446683883667, "learning_rate": 4.9700042247570766e-05, "loss": 0.1021, "step": 710 }, { "epoch": 0.30080270384452895, "grad_norm": 2.40004563331604, "learning_rate": 4.969919729615547e-05, "loss": 0.1251, "step": 712 }, { "epoch": 0.30164765525982257, "grad_norm": 1.4895504713058472, "learning_rate": 4.969835234474018e-05, "loss": 0.101, "step": 714 }, { "epoch": 0.3024926066751162, "grad_norm": 1.2066045999526978, "learning_rate": 4.969750739332489e-05, "loss": 0.1069, "step": 716 }, { "epoch": 0.3033375580904098, "grad_norm": 2.0504655838012695, "learning_rate": 4.9696662441909595e-05, "loss": 0.1287, "step": 718 }, { "epoch": 0.3041825095057034, "grad_norm": 2.006098508834839, "learning_rate": 4.96958174904943e-05, "loss": 0.1442, "step": 720 }, { "epoch": 0.30502746092099703, "grad_norm": 1.9753419160842896, "learning_rate": 4.9694972539079e-05, "loss": 0.1023, "step": 722 }, { "epoch": 0.30587241233629064, "grad_norm": 1.803053617477417, "learning_rate": 4.9694127587663717e-05, "loss": 0.1168, "step": 724 }, { "epoch": 0.30671736375158426, "grad_norm": 1.8871780633926392, "learning_rate": 4.969328263624842e-05, "loss": 0.12, "step": 726 }, { "epoch": 0.30756231516687793, "grad_norm": 1.7384098768234253, "learning_rate": 4.9692437684833124e-05, "loss": 0.1312, "step": 728 }, { "epoch": 0.30840726658217155, "grad_norm": 1.6403692960739136, "learning_rate": 4.969159273341783e-05, "loss": 0.0897, "step": 730 }, { "epoch": 0.30925221799746516, "grad_norm": 1.7307875156402588, "learning_rate": 4.969074778200254e-05, "loss": 0.0944, "step": 732 }, { "epoch": 0.3100971694127588, "grad_norm": 1.8397547006607056, "learning_rate": 4.9689902830587246e-05, "loss": 0.0745, "step": 734 }, { "epoch": 0.3109421208280524, "grad_norm": 1.6901930570602417, "learning_rate": 4.968905787917195e-05, "loss": 0.1161, "step": 736 }, { "epoch": 0.311787072243346, "grad_norm": 1.9397594928741455, "learning_rate": 4.9688212927756653e-05, "loss": 0.0964, "step": 738 }, { "epoch": 0.3126320236586396, "grad_norm": 1.4291322231292725, "learning_rate": 4.968736797634137e-05, "loss": 0.0987, "step": 740 }, { "epoch": 0.31347697507393324, "grad_norm": 1.0510236024856567, "learning_rate": 4.968652302492607e-05, "loss": 0.0831, "step": 742 }, { "epoch": 0.31432192648922685, "grad_norm": 1.2731682062149048, "learning_rate": 4.9685678073510775e-05, "loss": 0.0792, "step": 744 }, { "epoch": 0.31516687790452047, "grad_norm": 1.3949739933013916, "learning_rate": 4.968483312209548e-05, "loss": 0.0904, "step": 746 }, { "epoch": 0.3160118293198141, "grad_norm": 1.6889134645462036, "learning_rate": 4.968398817068019e-05, "loss": 0.0752, "step": 748 }, { "epoch": 0.31685678073510776, "grad_norm": 1.578761339187622, "learning_rate": 4.9683143219264897e-05, "loss": 0.0781, "step": 750 }, { "epoch": 0.3177017321504014, "grad_norm": 1.338944435119629, "learning_rate": 4.9682298267849604e-05, "loss": 0.0753, "step": 752 }, { "epoch": 0.318546683565695, "grad_norm": 1.8360531330108643, "learning_rate": 4.9681453316434304e-05, "loss": 0.1355, "step": 754 }, { "epoch": 0.3193916349809886, "grad_norm": 1.851449966430664, "learning_rate": 4.968060836501902e-05, "loss": 0.1004, "step": 756 }, { "epoch": 0.3202365863962822, "grad_norm": 1.6657130718231201, "learning_rate": 4.967976341360372e-05, "loss": 0.1208, "step": 758 }, { "epoch": 0.32108153781157583, "grad_norm": 1.2685651779174805, "learning_rate": 4.9678918462188426e-05, "loss": 0.0878, "step": 760 }, { "epoch": 0.32192648922686945, "grad_norm": 1.2503762245178223, "learning_rate": 4.967807351077313e-05, "loss": 0.084, "step": 762 }, { "epoch": 0.32277144064216307, "grad_norm": 1.9573127031326294, "learning_rate": 4.967722855935784e-05, "loss": 0.1035, "step": 764 }, { "epoch": 0.3236163920574567, "grad_norm": 1.7501864433288574, "learning_rate": 4.967638360794255e-05, "loss": 0.1081, "step": 766 }, { "epoch": 0.3244613434727503, "grad_norm": 1.4179309606552124, "learning_rate": 4.9675538656527255e-05, "loss": 0.0809, "step": 768 }, { "epoch": 0.3253062948880439, "grad_norm": 1.814603328704834, "learning_rate": 4.9674693705111955e-05, "loss": 0.1207, "step": 770 }, { "epoch": 0.3261512463033376, "grad_norm": 2.027031660079956, "learning_rate": 4.967384875369667e-05, "loss": 0.1055, "step": 772 }, { "epoch": 0.3269961977186312, "grad_norm": 1.419382095336914, "learning_rate": 4.967300380228137e-05, "loss": 0.1301, "step": 774 }, { "epoch": 0.3278411491339248, "grad_norm": 1.9750804901123047, "learning_rate": 4.9672158850866077e-05, "loss": 0.0979, "step": 776 }, { "epoch": 0.32868610054921843, "grad_norm": 1.274034857749939, "learning_rate": 4.9671313899450784e-05, "loss": 0.1249, "step": 778 }, { "epoch": 0.32953105196451205, "grad_norm": 1.4384434223175049, "learning_rate": 4.967046894803549e-05, "loss": 0.0958, "step": 780 }, { "epoch": 0.33037600337980566, "grad_norm": 1.5350993871688843, "learning_rate": 4.96696239966202e-05, "loss": 0.104, "step": 782 }, { "epoch": 0.3312209547950993, "grad_norm": 1.434937596321106, "learning_rate": 4.9668779045204905e-05, "loss": 0.1108, "step": 784 }, { "epoch": 0.3320659062103929, "grad_norm": 1.7014148235321045, "learning_rate": 4.9667934093789606e-05, "loss": 0.0979, "step": 786 }, { "epoch": 0.3329108576256865, "grad_norm": 2.0817980766296387, "learning_rate": 4.966708914237432e-05, "loss": 0.1755, "step": 788 }, { "epoch": 0.3337558090409801, "grad_norm": 1.4785884618759155, "learning_rate": 4.966624419095902e-05, "loss": 0.1047, "step": 790 }, { "epoch": 0.33460076045627374, "grad_norm": 1.2489787340164185, "learning_rate": 4.966539923954373e-05, "loss": 0.0788, "step": 792 }, { "epoch": 0.3354457118715674, "grad_norm": 1.1976639032363892, "learning_rate": 4.9664554288128435e-05, "loss": 0.0901, "step": 794 }, { "epoch": 0.336290663286861, "grad_norm": 1.2119868993759155, "learning_rate": 4.966370933671314e-05, "loss": 0.0854, "step": 796 }, { "epoch": 0.33713561470215464, "grad_norm": 1.3082143068313599, "learning_rate": 4.966286438529785e-05, "loss": 0.0958, "step": 798 }, { "epoch": 0.33798056611744826, "grad_norm": 1.2046393156051636, "learning_rate": 4.9662019433882556e-05, "loss": 0.1144, "step": 800 }, { "epoch": 0.33798056611744826, "eval_accuracy": 0.6734781858941197, "eval_cer": 0.09568884144781531, "eval_loss": 0.20647239685058594, "eval_runtime": 849.7943, "eval_samples_per_second": 13.648, "eval_steps_per_second": 0.427, "step": 800 }, { "epoch": 0.33882551753274187, "grad_norm": 1.3277101516723633, "learning_rate": 4.966117448246726e-05, "loss": 0.1023, "step": 802 }, { "epoch": 0.3396704689480355, "grad_norm": 2.174595832824707, "learning_rate": 4.966032953105197e-05, "loss": 0.1265, "step": 804 }, { "epoch": 0.3405154203633291, "grad_norm": 1.4048463106155396, "learning_rate": 4.965948457963667e-05, "loss": 0.0777, "step": 806 }, { "epoch": 0.3413603717786227, "grad_norm": 1.428268313407898, "learning_rate": 4.9658639628221385e-05, "loss": 0.0767, "step": 808 }, { "epoch": 0.34220532319391633, "grad_norm": 1.6813422441482544, "learning_rate": 4.9657794676806085e-05, "loss": 0.088, "step": 810 }, { "epoch": 0.34305027460920995, "grad_norm": 1.722402811050415, "learning_rate": 4.965694972539079e-05, "loss": 0.135, "step": 812 }, { "epoch": 0.34389522602450356, "grad_norm": 1.913205862045288, "learning_rate": 4.96561047739755e-05, "loss": 0.084, "step": 814 }, { "epoch": 0.34474017743979724, "grad_norm": 2.381237506866455, "learning_rate": 4.965525982256021e-05, "loss": 0.0957, "step": 816 }, { "epoch": 0.34558512885509085, "grad_norm": 1.3818658590316772, "learning_rate": 4.9654414871144914e-05, "loss": 0.0889, "step": 818 }, { "epoch": 0.34643008027038447, "grad_norm": 2.095109701156616, "learning_rate": 4.965356991972962e-05, "loss": 0.1397, "step": 820 }, { "epoch": 0.3472750316856781, "grad_norm": 1.6586017608642578, "learning_rate": 4.965272496831432e-05, "loss": 0.105, "step": 822 }, { "epoch": 0.3481199831009717, "grad_norm": 0.987585723400116, "learning_rate": 4.9651880016899036e-05, "loss": 0.0574, "step": 824 }, { "epoch": 0.3489649345162653, "grad_norm": 1.1450375318527222, "learning_rate": 4.9651035065483736e-05, "loss": 0.1137, "step": 826 }, { "epoch": 0.34980988593155893, "grad_norm": 1.1953158378601074, "learning_rate": 4.9650190114068443e-05, "loss": 0.0888, "step": 828 }, { "epoch": 0.35065483734685254, "grad_norm": 1.4406365156173706, "learning_rate": 4.964934516265315e-05, "loss": 0.0587, "step": 830 }, { "epoch": 0.35149978876214616, "grad_norm": 2.095817804336548, "learning_rate": 4.964850021123786e-05, "loss": 0.0986, "step": 832 }, { "epoch": 0.3523447401774398, "grad_norm": 1.560104489326477, "learning_rate": 4.9647655259822565e-05, "loss": 0.0834, "step": 834 }, { "epoch": 0.3531896915927334, "grad_norm": 1.1767041683197021, "learning_rate": 4.964681030840727e-05, "loss": 0.0685, "step": 836 }, { "epoch": 0.35403464300802706, "grad_norm": 1.3894157409667969, "learning_rate": 4.964596535699197e-05, "loss": 0.075, "step": 838 }, { "epoch": 0.3548795944233207, "grad_norm": 2.41341233253479, "learning_rate": 4.9645120405576687e-05, "loss": 0.1638, "step": 840 }, { "epoch": 0.3557245458386143, "grad_norm": 1.4480561017990112, "learning_rate": 4.964427545416139e-05, "loss": 0.0849, "step": 842 }, { "epoch": 0.3565694972539079, "grad_norm": 1.3235077857971191, "learning_rate": 4.9643430502746094e-05, "loss": 0.0826, "step": 844 }, { "epoch": 0.3574144486692015, "grad_norm": 1.5331876277923584, "learning_rate": 4.96425855513308e-05, "loss": 0.1023, "step": 846 }, { "epoch": 0.35825940008449514, "grad_norm": 1.0759912729263306, "learning_rate": 4.964174059991551e-05, "loss": 0.1031, "step": 848 }, { "epoch": 0.35910435149978875, "grad_norm": 2.00351881980896, "learning_rate": 4.9640895648500216e-05, "loss": 0.1153, "step": 850 }, { "epoch": 0.35994930291508237, "grad_norm": 1.0817115306854248, "learning_rate": 4.964005069708492e-05, "loss": 0.0852, "step": 852 }, { "epoch": 0.360794254330376, "grad_norm": 2.143852472305298, "learning_rate": 4.9639205745669623e-05, "loss": 0.1136, "step": 854 }, { "epoch": 0.3616392057456696, "grad_norm": 1.8244491815567017, "learning_rate": 4.963836079425434e-05, "loss": 0.0983, "step": 856 }, { "epoch": 0.36248415716096327, "grad_norm": 1.4860317707061768, "learning_rate": 4.963751584283904e-05, "loss": 0.0958, "step": 858 }, { "epoch": 0.3633291085762569, "grad_norm": 2.0593981742858887, "learning_rate": 4.9636670891423745e-05, "loss": 0.101, "step": 860 }, { "epoch": 0.3641740599915505, "grad_norm": 1.366678237915039, "learning_rate": 4.963582594000845e-05, "loss": 0.0713, "step": 862 }, { "epoch": 0.3650190114068441, "grad_norm": 1.5878137350082397, "learning_rate": 4.963498098859316e-05, "loss": 0.0652, "step": 864 }, { "epoch": 0.36586396282213773, "grad_norm": 1.733601689338684, "learning_rate": 4.9634136037177867e-05, "loss": 0.0886, "step": 866 }, { "epoch": 0.36670891423743135, "grad_norm": 1.2537791728973389, "learning_rate": 4.9633291085762574e-05, "loss": 0.0853, "step": 868 }, { "epoch": 0.36755386565272496, "grad_norm": 1.5744574069976807, "learning_rate": 4.9632446134347274e-05, "loss": 0.0706, "step": 870 }, { "epoch": 0.3683988170680186, "grad_norm": 2.3552136421203613, "learning_rate": 4.963160118293199e-05, "loss": 0.1153, "step": 872 }, { "epoch": 0.3692437684833122, "grad_norm": 1.0820305347442627, "learning_rate": 4.963075623151669e-05, "loss": 0.0671, "step": 874 }, { "epoch": 0.3700887198986058, "grad_norm": 1.6050610542297363, "learning_rate": 4.9629911280101396e-05, "loss": 0.0934, "step": 876 }, { "epoch": 0.3709336713138994, "grad_norm": 1.5215171575546265, "learning_rate": 4.96290663286861e-05, "loss": 0.1586, "step": 878 }, { "epoch": 0.3717786227291931, "grad_norm": 1.4043138027191162, "learning_rate": 4.962822137727081e-05, "loss": 0.0879, "step": 880 }, { "epoch": 0.3726235741444867, "grad_norm": 0.8804981112480164, "learning_rate": 4.962737642585552e-05, "loss": 0.0489, "step": 882 }, { "epoch": 0.37346852555978033, "grad_norm": 1.3882102966308594, "learning_rate": 4.9626531474440225e-05, "loss": 0.0761, "step": 884 }, { "epoch": 0.37431347697507394, "grad_norm": 1.4841866493225098, "learning_rate": 4.9625686523024925e-05, "loss": 0.0844, "step": 886 }, { "epoch": 0.37515842839036756, "grad_norm": 1.3954799175262451, "learning_rate": 4.962484157160964e-05, "loss": 0.0709, "step": 888 }, { "epoch": 0.3760033798056612, "grad_norm": 1.3372573852539062, "learning_rate": 4.962399662019434e-05, "loss": 0.096, "step": 890 }, { "epoch": 0.3768483312209548, "grad_norm": 1.2923667430877686, "learning_rate": 4.962315166877905e-05, "loss": 0.1023, "step": 892 }, { "epoch": 0.3776932826362484, "grad_norm": 1.5196459293365479, "learning_rate": 4.9622306717363754e-05, "loss": 0.115, "step": 894 }, { "epoch": 0.378538234051542, "grad_norm": 1.83379065990448, "learning_rate": 4.962146176594846e-05, "loss": 0.0993, "step": 896 }, { "epoch": 0.37938318546683564, "grad_norm": 1.5020735263824463, "learning_rate": 4.962061681453317e-05, "loss": 0.0853, "step": 898 }, { "epoch": 0.38022813688212925, "grad_norm": 1.7137893438339233, "learning_rate": 4.9619771863117875e-05, "loss": 0.1109, "step": 900 }, { "epoch": 0.38022813688212925, "eval_accuracy": 0.6778754957751336, "eval_cer": 0.09363338832185043, "eval_loss": 0.20783965289592743, "eval_runtime": 856.7083, "eval_samples_per_second": 13.538, "eval_steps_per_second": 0.424, "step": 900 }, { "epoch": 0.3810730882974229, "grad_norm": 1.7347830533981323, "learning_rate": 4.9618926911702576e-05, "loss": 0.1199, "step": 902 }, { "epoch": 0.38191803971271654, "grad_norm": 1.9052618741989136, "learning_rate": 4.961808196028729e-05, "loss": 0.1172, "step": 904 }, { "epoch": 0.38276299112801015, "grad_norm": 1.5322483777999878, "learning_rate": 4.961723700887199e-05, "loss": 0.0881, "step": 906 }, { "epoch": 0.38360794254330377, "grad_norm": 0.9071521759033203, "learning_rate": 4.9616392057456704e-05, "loss": 0.0793, "step": 908 }, { "epoch": 0.3844528939585974, "grad_norm": 1.6952403783798218, "learning_rate": 4.9615547106041405e-05, "loss": 0.0917, "step": 910 }, { "epoch": 0.385297845373891, "grad_norm": 1.9736824035644531, "learning_rate": 4.961470215462611e-05, "loss": 0.09, "step": 912 }, { "epoch": 0.3861427967891846, "grad_norm": 1.999121904373169, "learning_rate": 4.961385720321082e-05, "loss": 0.1251, "step": 914 }, { "epoch": 0.38698774820447823, "grad_norm": 1.406522512435913, "learning_rate": 4.9613012251795526e-05, "loss": 0.0969, "step": 916 }, { "epoch": 0.38783269961977185, "grad_norm": 1.3701584339141846, "learning_rate": 4.9612167300380233e-05, "loss": 0.0932, "step": 918 }, { "epoch": 0.38867765103506546, "grad_norm": 1.0016272068023682, "learning_rate": 4.961132234896494e-05, "loss": 0.0597, "step": 920 }, { "epoch": 0.3895226024503591, "grad_norm": 1.3503810167312622, "learning_rate": 4.961047739754964e-05, "loss": 0.071, "step": 922 }, { "epoch": 0.39036755386565275, "grad_norm": 1.599910855293274, "learning_rate": 4.9609632446134355e-05, "loss": 0.099, "step": 924 }, { "epoch": 0.39121250528094637, "grad_norm": 1.4374080896377563, "learning_rate": 4.9608787494719055e-05, "loss": 0.0792, "step": 926 }, { "epoch": 0.39205745669624, "grad_norm": 2.009080648422241, "learning_rate": 4.960794254330376e-05, "loss": 0.1313, "step": 928 }, { "epoch": 0.3929024081115336, "grad_norm": 1.5988420248031616, "learning_rate": 4.960709759188847e-05, "loss": 0.1013, "step": 930 }, { "epoch": 0.3937473595268272, "grad_norm": 1.9358876943588257, "learning_rate": 4.960625264047318e-05, "loss": 0.1015, "step": 932 }, { "epoch": 0.3945923109421208, "grad_norm": 1.4540034532546997, "learning_rate": 4.9605407689057884e-05, "loss": 0.0727, "step": 934 }, { "epoch": 0.39543726235741444, "grad_norm": 1.400652289390564, "learning_rate": 4.960456273764259e-05, "loss": 0.0867, "step": 936 }, { "epoch": 0.39628221377270806, "grad_norm": 1.7808191776275635, "learning_rate": 4.960371778622729e-05, "loss": 0.1269, "step": 938 }, { "epoch": 0.3971271651880017, "grad_norm": 1.5522500276565552, "learning_rate": 4.9602872834812006e-05, "loss": 0.1115, "step": 940 }, { "epoch": 0.3979721166032953, "grad_norm": 1.5305380821228027, "learning_rate": 4.9602027883396706e-05, "loss": 0.0993, "step": 942 }, { "epoch": 0.3988170680185889, "grad_norm": 1.0456280708312988, "learning_rate": 4.9601182931981413e-05, "loss": 0.0775, "step": 944 }, { "epoch": 0.3996620194338826, "grad_norm": 1.5103769302368164, "learning_rate": 4.960033798056612e-05, "loss": 0.0801, "step": 946 }, { "epoch": 0.4005069708491762, "grad_norm": 1.217235803604126, "learning_rate": 4.959949302915083e-05, "loss": 0.0951, "step": 948 }, { "epoch": 0.4013519222644698, "grad_norm": 1.5151118040084839, "learning_rate": 4.9598648077735535e-05, "loss": 0.1016, "step": 950 }, { "epoch": 0.4021968736797634, "grad_norm": 1.443966269493103, "learning_rate": 4.959780312632024e-05, "loss": 0.1283, "step": 952 }, { "epoch": 0.40304182509505704, "grad_norm": 1.539581537246704, "learning_rate": 4.959695817490494e-05, "loss": 0.115, "step": 954 }, { "epoch": 0.40388677651035065, "grad_norm": 1.4787349700927734, "learning_rate": 4.959611322348966e-05, "loss": 0.1022, "step": 956 }, { "epoch": 0.40473172792564427, "grad_norm": 2.0702366828918457, "learning_rate": 4.959526827207436e-05, "loss": 0.1072, "step": 958 }, { "epoch": 0.4055766793409379, "grad_norm": 1.1393963098526, "learning_rate": 4.9594423320659064e-05, "loss": 0.1219, "step": 960 }, { "epoch": 0.4064216307562315, "grad_norm": 0.837650716304779, "learning_rate": 4.959357836924377e-05, "loss": 0.0657, "step": 962 }, { "epoch": 0.4072665821715251, "grad_norm": 1.5785096883773804, "learning_rate": 4.959273341782848e-05, "loss": 0.0883, "step": 964 }, { "epoch": 0.40811153358681873, "grad_norm": 1.83567214012146, "learning_rate": 4.9591888466413186e-05, "loss": 0.0854, "step": 966 }, { "epoch": 0.4089564850021124, "grad_norm": 1.9943451881408691, "learning_rate": 4.959104351499789e-05, "loss": 0.133, "step": 968 }, { "epoch": 0.409801436417406, "grad_norm": 1.8926469087600708, "learning_rate": 4.9590198563582593e-05, "loss": 0.1113, "step": 970 }, { "epoch": 0.41064638783269963, "grad_norm": 1.2133026123046875, "learning_rate": 4.958935361216731e-05, "loss": 0.0637, "step": 972 }, { "epoch": 0.41149133924799325, "grad_norm": 1.2940943241119385, "learning_rate": 4.958850866075201e-05, "loss": 0.0829, "step": 974 }, { "epoch": 0.41233629066328686, "grad_norm": 1.3455214500427246, "learning_rate": 4.9587663709336715e-05, "loss": 0.0953, "step": 976 }, { "epoch": 0.4131812420785805, "grad_norm": 1.8440299034118652, "learning_rate": 4.958681875792142e-05, "loss": 0.1046, "step": 978 }, { "epoch": 0.4140261934938741, "grad_norm": 1.57143235206604, "learning_rate": 4.958597380650613e-05, "loss": 0.1112, "step": 980 }, { "epoch": 0.4148711449091677, "grad_norm": 1.4068124294281006, "learning_rate": 4.958512885509084e-05, "loss": 0.0852, "step": 982 }, { "epoch": 0.4157160963244613, "grad_norm": 1.538854718208313, "learning_rate": 4.9584283903675544e-05, "loss": 0.0878, "step": 984 }, { "epoch": 0.41656104773975494, "grad_norm": 0.881242573261261, "learning_rate": 4.9583438952260244e-05, "loss": 0.0786, "step": 986 }, { "epoch": 0.41740599915504856, "grad_norm": 1.5759536027908325, "learning_rate": 4.958259400084496e-05, "loss": 0.0756, "step": 988 }, { "epoch": 0.41825095057034223, "grad_norm": 1.311062216758728, "learning_rate": 4.958174904942966e-05, "loss": 0.0629, "step": 990 }, { "epoch": 0.41909590198563584, "grad_norm": 1.1126432418823242, "learning_rate": 4.9580904098014366e-05, "loss": 0.0562, "step": 992 }, { "epoch": 0.41994085340092946, "grad_norm": 1.553884744644165, "learning_rate": 4.958005914659907e-05, "loss": 0.1244, "step": 994 }, { "epoch": 0.4207858048162231, "grad_norm": 2.2669692039489746, "learning_rate": 4.957921419518378e-05, "loss": 0.098, "step": 996 }, { "epoch": 0.4216307562315167, "grad_norm": 1.335403323173523, "learning_rate": 4.957836924376849e-05, "loss": 0.0727, "step": 998 }, { "epoch": 0.4224757076468103, "grad_norm": 3.8670544624328613, "learning_rate": 4.9577524292353195e-05, "loss": 0.1018, "step": 1000 }, { "epoch": 0.4224757076468103, "eval_accuracy": 0.6790826004483531, "eval_cer": 0.09158675688312147, "eval_loss": 0.20456762611865997, "eval_runtime": 853.1585, "eval_samples_per_second": 13.594, "eval_steps_per_second": 0.425, "step": 1000 }, { "epoch": 0.4233206590621039, "grad_norm": 1.5914243459701538, "learning_rate": 4.9576679340937895e-05, "loss": 0.0926, "step": 1002 }, { "epoch": 0.42416561047739754, "grad_norm": 2.1932218074798584, "learning_rate": 4.957583438952261e-05, "loss": 0.1381, "step": 1004 }, { "epoch": 0.42501056189269115, "grad_norm": 1.216361165046692, "learning_rate": 4.957498943810731e-05, "loss": 0.0675, "step": 1006 }, { "epoch": 0.42585551330798477, "grad_norm": 1.8609379529953003, "learning_rate": 4.9574144486692023e-05, "loss": 0.0891, "step": 1008 }, { "epoch": 0.42670046472327844, "grad_norm": 1.5109552145004272, "learning_rate": 4.9573299535276724e-05, "loss": 0.0534, "step": 1010 }, { "epoch": 0.42754541613857205, "grad_norm": 2.247575521469116, "learning_rate": 4.957245458386143e-05, "loss": 0.1475, "step": 1012 }, { "epoch": 0.42839036755386567, "grad_norm": 1.9921953678131104, "learning_rate": 4.957160963244614e-05, "loss": 0.1092, "step": 1014 }, { "epoch": 0.4292353189691593, "grad_norm": 1.308531641960144, "learning_rate": 4.9570764681030845e-05, "loss": 0.048, "step": 1016 }, { "epoch": 0.4300802703844529, "grad_norm": 1.459812045097351, "learning_rate": 4.956991972961555e-05, "loss": 0.0829, "step": 1018 }, { "epoch": 0.4309252217997465, "grad_norm": 1.1062469482421875, "learning_rate": 4.956907477820026e-05, "loss": 0.0761, "step": 1020 }, { "epoch": 0.43177017321504013, "grad_norm": 0.9959312081336975, "learning_rate": 4.956822982678496e-05, "loss": 0.0776, "step": 1022 }, { "epoch": 0.43261512463033375, "grad_norm": 2.045827627182007, "learning_rate": 4.9567384875369674e-05, "loss": 0.0805, "step": 1024 }, { "epoch": 0.43346007604562736, "grad_norm": 1.678009271621704, "learning_rate": 4.9566539923954375e-05, "loss": 0.129, "step": 1026 }, { "epoch": 0.434305027460921, "grad_norm": 1.8472874164581299, "learning_rate": 4.956569497253908e-05, "loss": 0.0689, "step": 1028 }, { "epoch": 0.4351499788762146, "grad_norm": 1.6969095468521118, "learning_rate": 4.956485002112379e-05, "loss": 0.104, "step": 1030 }, { "epoch": 0.43599493029150826, "grad_norm": 1.5743629932403564, "learning_rate": 4.9564005069708496e-05, "loss": 0.0823, "step": 1032 }, { "epoch": 0.4368398817068019, "grad_norm": 1.9706130027770996, "learning_rate": 4.9563160118293203e-05, "loss": 0.1057, "step": 1034 }, { "epoch": 0.4376848331220955, "grad_norm": 1.8118597269058228, "learning_rate": 4.956231516687791e-05, "loss": 0.0937, "step": 1036 }, { "epoch": 0.4385297845373891, "grad_norm": 1.5070606470108032, "learning_rate": 4.956147021546261e-05, "loss": 0.0813, "step": 1038 }, { "epoch": 0.4393747359526827, "grad_norm": 2.170083999633789, "learning_rate": 4.9560625264047325e-05, "loss": 0.1135, "step": 1040 }, { "epoch": 0.44021968736797634, "grad_norm": 1.5932613611221313, "learning_rate": 4.9559780312632025e-05, "loss": 0.1101, "step": 1042 }, { "epoch": 0.44106463878326996, "grad_norm": 1.2873493432998657, "learning_rate": 4.955893536121673e-05, "loss": 0.0622, "step": 1044 }, { "epoch": 0.4419095901985636, "grad_norm": 1.2306898832321167, "learning_rate": 4.955809040980144e-05, "loss": 0.0947, "step": 1046 }, { "epoch": 0.4427545416138572, "grad_norm": 1.1455901861190796, "learning_rate": 4.955724545838615e-05, "loss": 0.0694, "step": 1048 }, { "epoch": 0.4435994930291508, "grad_norm": 1.363356351852417, "learning_rate": 4.9556400506970854e-05, "loss": 0.0776, "step": 1050 }, { "epoch": 0.4444444444444444, "grad_norm": 2.466238260269165, "learning_rate": 4.955555555555556e-05, "loss": 0.1503, "step": 1052 }, { "epoch": 0.4452893958597381, "grad_norm": 1.633049488067627, "learning_rate": 4.955471060414026e-05, "loss": 0.0902, "step": 1054 }, { "epoch": 0.4461343472750317, "grad_norm": 1.3308897018432617, "learning_rate": 4.9553865652724976e-05, "loss": 0.1079, "step": 1056 }, { "epoch": 0.4469792986903253, "grad_norm": 1.2919471263885498, "learning_rate": 4.9553020701309676e-05, "loss": 0.0943, "step": 1058 }, { "epoch": 0.44782425010561894, "grad_norm": 2.11273193359375, "learning_rate": 4.9552175749894383e-05, "loss": 0.1257, "step": 1060 }, { "epoch": 0.44866920152091255, "grad_norm": 1.6662590503692627, "learning_rate": 4.955133079847909e-05, "loss": 0.1103, "step": 1062 }, { "epoch": 0.44951415293620617, "grad_norm": 1.2448116540908813, "learning_rate": 4.95504858470638e-05, "loss": 0.1195, "step": 1064 }, { "epoch": 0.4503591043514998, "grad_norm": 1.083648920059204, "learning_rate": 4.9549640895648505e-05, "loss": 0.0719, "step": 1066 }, { "epoch": 0.4512040557667934, "grad_norm": 1.6844433546066284, "learning_rate": 4.954879594423321e-05, "loss": 0.1019, "step": 1068 }, { "epoch": 0.452049007182087, "grad_norm": 1.9984877109527588, "learning_rate": 4.954795099281791e-05, "loss": 0.1353, "step": 1070 }, { "epoch": 0.45289395859738063, "grad_norm": 1.462343692779541, "learning_rate": 4.954710604140263e-05, "loss": 0.1035, "step": 1072 }, { "epoch": 0.45373891001267425, "grad_norm": 2.0531160831451416, "learning_rate": 4.954626108998733e-05, "loss": 0.1279, "step": 1074 }, { "epoch": 0.4545838614279679, "grad_norm": 3.241274833679199, "learning_rate": 4.9545416138572034e-05, "loss": 0.0799, "step": 1076 }, { "epoch": 0.45542881284326153, "grad_norm": 1.5598374605178833, "learning_rate": 4.954457118715674e-05, "loss": 0.0958, "step": 1078 }, { "epoch": 0.45627376425855515, "grad_norm": 0.751557469367981, "learning_rate": 4.954372623574145e-05, "loss": 0.0542, "step": 1080 }, { "epoch": 0.45711871567384876, "grad_norm": 1.6809083223342896, "learning_rate": 4.9542881284326156e-05, "loss": 0.0907, "step": 1082 }, { "epoch": 0.4579636670891424, "grad_norm": 1.2016775608062744, "learning_rate": 4.954203633291086e-05, "loss": 0.0635, "step": 1084 }, { "epoch": 0.458808618504436, "grad_norm": 1.912942886352539, "learning_rate": 4.9541191381495563e-05, "loss": 0.1076, "step": 1086 }, { "epoch": 0.4596535699197296, "grad_norm": 1.3507989645004272, "learning_rate": 4.954034643008027e-05, "loss": 0.0906, "step": 1088 }, { "epoch": 0.4604985213350232, "grad_norm": 1.4633934497833252, "learning_rate": 4.953950147866498e-05, "loss": 0.1085, "step": 1090 }, { "epoch": 0.46134347275031684, "grad_norm": 1.7697800397872925, "learning_rate": 4.9538656527249685e-05, "loss": 0.1049, "step": 1092 }, { "epoch": 0.46218842416561046, "grad_norm": 1.7392349243164062, "learning_rate": 4.953781157583439e-05, "loss": 0.1521, "step": 1094 }, { "epoch": 0.46303337558090407, "grad_norm": 1.0418444871902466, "learning_rate": 4.953696662441909e-05, "loss": 0.0685, "step": 1096 }, { "epoch": 0.46387832699619774, "grad_norm": 1.2857134342193604, "learning_rate": 4.953612167300381e-05, "loss": 0.1004, "step": 1098 }, { "epoch": 0.46472327841149136, "grad_norm": 1.4575908184051514, "learning_rate": 4.953527672158851e-05, "loss": 0.0841, "step": 1100 }, { "epoch": 0.46472327841149136, "eval_accuracy": 0.6908087601310571, "eval_cer": 0.0895048386954489, "eval_loss": 0.19633953273296356, "eval_runtime": 863.5349, "eval_samples_per_second": 13.431, "eval_steps_per_second": 0.42, "step": 1100 }, { "epoch": 0.465568229826785, "grad_norm": 1.182370901107788, "learning_rate": 4.9534431770173214e-05, "loss": 0.085, "step": 1102 }, { "epoch": 0.4664131812420786, "grad_norm": 1.535293698310852, "learning_rate": 4.953358681875792e-05, "loss": 0.077, "step": 1104 }, { "epoch": 0.4672581326573722, "grad_norm": 1.1662472486495972, "learning_rate": 4.953274186734263e-05, "loss": 0.0968, "step": 1106 }, { "epoch": 0.4681030840726658, "grad_norm": 3.085517406463623, "learning_rate": 4.9531896915927336e-05, "loss": 0.1435, "step": 1108 }, { "epoch": 0.46894803548795944, "grad_norm": 1.6203645467758179, "learning_rate": 4.953105196451204e-05, "loss": 0.0944, "step": 1110 }, { "epoch": 0.46979298690325305, "grad_norm": 1.4693485498428345, "learning_rate": 4.9530207013096744e-05, "loss": 0.0955, "step": 1112 }, { "epoch": 0.47063793831854667, "grad_norm": 1.4041072130203247, "learning_rate": 4.952936206168146e-05, "loss": 0.0675, "step": 1114 }, { "epoch": 0.4714828897338403, "grad_norm": 2.4356982707977295, "learning_rate": 4.952851711026616e-05, "loss": 0.0835, "step": 1116 }, { "epoch": 0.4723278411491339, "grad_norm": 1.7264307737350464, "learning_rate": 4.952767215885087e-05, "loss": 0.1218, "step": 1118 }, { "epoch": 0.47317279256442757, "grad_norm": 1.329499363899231, "learning_rate": 4.952682720743557e-05, "loss": 0.0959, "step": 1120 }, { "epoch": 0.4740177439797212, "grad_norm": 1.0061687231063843, "learning_rate": 4.952598225602028e-05, "loss": 0.052, "step": 1122 }, { "epoch": 0.4748626953950148, "grad_norm": 1.739823818206787, "learning_rate": 4.952513730460499e-05, "loss": 0.0817, "step": 1124 }, { "epoch": 0.4757076468103084, "grad_norm": 1.7504117488861084, "learning_rate": 4.9524292353189694e-05, "loss": 0.0709, "step": 1126 }, { "epoch": 0.47655259822560203, "grad_norm": 2.0645530223846436, "learning_rate": 4.9523447401774394e-05, "loss": 0.1052, "step": 1128 }, { "epoch": 0.47739754964089565, "grad_norm": 1.7341753244400024, "learning_rate": 4.952260245035911e-05, "loss": 0.0761, "step": 1130 }, { "epoch": 0.47824250105618926, "grad_norm": 1.0064074993133545, "learning_rate": 4.952175749894381e-05, "loss": 0.0867, "step": 1132 }, { "epoch": 0.4790874524714829, "grad_norm": 2.0180881023406982, "learning_rate": 4.952091254752852e-05, "loss": 0.0961, "step": 1134 }, { "epoch": 0.4799324038867765, "grad_norm": 1.345908761024475, "learning_rate": 4.952006759611322e-05, "loss": 0.1018, "step": 1136 }, { "epoch": 0.4807773553020701, "grad_norm": 1.6427843570709229, "learning_rate": 4.951922264469793e-05, "loss": 0.1179, "step": 1138 }, { "epoch": 0.4816223067173637, "grad_norm": 1.294453501701355, "learning_rate": 4.951837769328264e-05, "loss": 0.0795, "step": 1140 }, { "epoch": 0.4824672581326574, "grad_norm": 1.0479216575622559, "learning_rate": 4.9517532741867345e-05, "loss": 0.075, "step": 1142 }, { "epoch": 0.483312209547951, "grad_norm": 1.1413781642913818, "learning_rate": 4.951668779045205e-05, "loss": 0.0705, "step": 1144 }, { "epoch": 0.4841571609632446, "grad_norm": 1.2139525413513184, "learning_rate": 4.951584283903676e-05, "loss": 0.075, "step": 1146 }, { "epoch": 0.48500211237853824, "grad_norm": 1.3668715953826904, "learning_rate": 4.951499788762146e-05, "loss": 0.1006, "step": 1148 }, { "epoch": 0.48584706379383186, "grad_norm": 1.5356974601745605, "learning_rate": 4.9514152936206173e-05, "loss": 0.0996, "step": 1150 }, { "epoch": 0.4866920152091255, "grad_norm": 1.1368151903152466, "learning_rate": 4.9513307984790874e-05, "loss": 0.1125, "step": 1152 }, { "epoch": 0.4875369666244191, "grad_norm": 1.661220908164978, "learning_rate": 4.951246303337558e-05, "loss": 0.0705, "step": 1154 }, { "epoch": 0.4883819180397127, "grad_norm": 1.7183247804641724, "learning_rate": 4.951161808196029e-05, "loss": 0.086, "step": 1156 }, { "epoch": 0.4892268694550063, "grad_norm": 1.1112664937973022, "learning_rate": 4.9510773130544995e-05, "loss": 0.0401, "step": 1158 }, { "epoch": 0.49007182087029993, "grad_norm": 2.460999011993408, "learning_rate": 4.95099281791297e-05, "loss": 0.1188, "step": 1160 }, { "epoch": 0.4909167722855936, "grad_norm": 1.5830085277557373, "learning_rate": 4.950908322771441e-05, "loss": 0.0991, "step": 1162 }, { "epoch": 0.4917617237008872, "grad_norm": 1.55359947681427, "learning_rate": 4.950823827629911e-05, "loss": 0.0971, "step": 1164 }, { "epoch": 0.49260667511618084, "grad_norm": 1.919572114944458, "learning_rate": 4.9507393324883824e-05, "loss": 0.0943, "step": 1166 }, { "epoch": 0.49345162653147445, "grad_norm": 1.5410270690917969, "learning_rate": 4.9506548373468525e-05, "loss": 0.1009, "step": 1168 }, { "epoch": 0.49429657794676807, "grad_norm": 1.9407888650894165, "learning_rate": 4.950570342205323e-05, "loss": 0.1136, "step": 1170 }, { "epoch": 0.4951415293620617, "grad_norm": 1.451253056526184, "learning_rate": 4.950485847063794e-05, "loss": 0.0933, "step": 1172 }, { "epoch": 0.4959864807773553, "grad_norm": 1.2166279554367065, "learning_rate": 4.9504013519222646e-05, "loss": 0.0745, "step": 1174 }, { "epoch": 0.4968314321926489, "grad_norm": 1.455151081085205, "learning_rate": 4.9503168567807353e-05, "loss": 0.0863, "step": 1176 }, { "epoch": 0.49767638360794253, "grad_norm": 1.4487229585647583, "learning_rate": 4.950232361639206e-05, "loss": 0.1104, "step": 1178 }, { "epoch": 0.49852133502323615, "grad_norm": 1.898914098739624, "learning_rate": 4.950147866497676e-05, "loss": 0.0923, "step": 1180 }, { "epoch": 0.49936628643852976, "grad_norm": 1.0237014293670654, "learning_rate": 4.9500633713561475e-05, "loss": 0.0685, "step": 1182 }, { "epoch": 0.5002112378538234, "grad_norm": 2.124537944793701, "learning_rate": 4.9499788762146176e-05, "loss": 0.0698, "step": 1184 }, { "epoch": 0.501056189269117, "grad_norm": 1.1420694589614868, "learning_rate": 4.949894381073088e-05, "loss": 0.0652, "step": 1186 }, { "epoch": 0.5019011406844106, "grad_norm": 1.4151241779327393, "learning_rate": 4.949809885931559e-05, "loss": 0.0701, "step": 1188 }, { "epoch": 0.5027460920997042, "grad_norm": 1.3990213871002197, "learning_rate": 4.94972539079003e-05, "loss": 0.0737, "step": 1190 }, { "epoch": 0.5035910435149978, "grad_norm": 1.4167402982711792, "learning_rate": 4.9496408956485004e-05, "loss": 0.0812, "step": 1192 }, { "epoch": 0.5044359949302915, "grad_norm": 1.370712399482727, "learning_rate": 4.949556400506971e-05, "loss": 0.0744, "step": 1194 }, { "epoch": 0.5052809463455852, "grad_norm": 1.3453139066696167, "learning_rate": 4.949471905365441e-05, "loss": 0.0754, "step": 1196 }, { "epoch": 0.5061258977608788, "grad_norm": 1.5076264142990112, "learning_rate": 4.9493874102239126e-05, "loss": 0.1056, "step": 1198 }, { "epoch": 0.5069708491761724, "grad_norm": 1.5745254755020142, "learning_rate": 4.9493029150823826e-05, "loss": 0.1156, "step": 1200 }, { "epoch": 0.5069708491761724, "eval_accuracy": 0.6875323331608898, "eval_cer": 0.0944185184858456, "eval_loss": 0.20057915151119232, "eval_runtime": 866.1613, "eval_samples_per_second": 13.39, "eval_steps_per_second": 0.419, "step": 1200 }, { "epoch": 0.507815800591466, "grad_norm": 1.8199948072433472, "learning_rate": 4.9492184199408534e-05, "loss": 0.0937, "step": 1202 }, { "epoch": 0.5086607520067596, "grad_norm": 1.4204751253128052, "learning_rate": 4.949133924799324e-05, "loss": 0.0713, "step": 1204 }, { "epoch": 0.5095057034220533, "grad_norm": 1.4452489614486694, "learning_rate": 4.949049429657795e-05, "loss": 0.0746, "step": 1206 }, { "epoch": 0.5103506548373469, "grad_norm": 1.4586948156356812, "learning_rate": 4.9489649345162655e-05, "loss": 0.0855, "step": 1208 }, { "epoch": 0.5111956062526405, "grad_norm": 1.7247511148452759, "learning_rate": 4.948880439374736e-05, "loss": 0.116, "step": 1210 }, { "epoch": 0.5120405576679341, "grad_norm": 1.3063023090362549, "learning_rate": 4.948795944233206e-05, "loss": 0.1308, "step": 1212 }, { "epoch": 0.5128855090832277, "grad_norm": 1.9612244367599487, "learning_rate": 4.948711449091678e-05, "loss": 0.1132, "step": 1214 }, { "epoch": 0.5137304604985213, "grad_norm": 1.2515413761138916, "learning_rate": 4.948626953950148e-05, "loss": 0.0853, "step": 1216 }, { "epoch": 0.514575411913815, "grad_norm": 1.5823932886123657, "learning_rate": 4.948542458808619e-05, "loss": 0.0938, "step": 1218 }, { "epoch": 0.5154203633291086, "grad_norm": 1.1870125532150269, "learning_rate": 4.948457963667089e-05, "loss": 0.0729, "step": 1220 }, { "epoch": 0.5162653147444022, "grad_norm": 1.5410425662994385, "learning_rate": 4.94837346852556e-05, "loss": 0.1073, "step": 1222 }, { "epoch": 0.5171102661596958, "grad_norm": 1.2500526905059814, "learning_rate": 4.9482889733840306e-05, "loss": 0.0997, "step": 1224 }, { "epoch": 0.5179552175749894, "grad_norm": 1.1845496892929077, "learning_rate": 4.948204478242501e-05, "loss": 0.0758, "step": 1226 }, { "epoch": 0.518800168990283, "grad_norm": 1.2934046983718872, "learning_rate": 4.9481199831009714e-05, "loss": 0.0725, "step": 1228 }, { "epoch": 0.5196451204055766, "grad_norm": 1.3043361902236938, "learning_rate": 4.948035487959443e-05, "loss": 0.0793, "step": 1230 }, { "epoch": 0.5204900718208703, "grad_norm": 1.6191684007644653, "learning_rate": 4.947950992817913e-05, "loss": 0.1012, "step": 1232 }, { "epoch": 0.5213350232361639, "grad_norm": 1.4432382583618164, "learning_rate": 4.947866497676384e-05, "loss": 0.0988, "step": 1234 }, { "epoch": 0.5221799746514575, "grad_norm": 2.0699760913848877, "learning_rate": 4.947782002534854e-05, "loss": 0.1041, "step": 1236 }, { "epoch": 0.5230249260667512, "grad_norm": 1.1539101600646973, "learning_rate": 4.947697507393325e-05, "loss": 0.0571, "step": 1238 }, { "epoch": 0.5238698774820448, "grad_norm": 1.6627494096755981, "learning_rate": 4.947613012251796e-05, "loss": 0.1198, "step": 1240 }, { "epoch": 0.5247148288973384, "grad_norm": 1.9634501934051514, "learning_rate": 4.9475285171102664e-05, "loss": 0.0775, "step": 1242 }, { "epoch": 0.5255597803126321, "grad_norm": 2.4786336421966553, "learning_rate": 4.947444021968737e-05, "loss": 0.1651, "step": 1244 }, { "epoch": 0.5264047317279257, "grad_norm": 1.4655958414077759, "learning_rate": 4.947359526827208e-05, "loss": 0.1079, "step": 1246 }, { "epoch": 0.5272496831432193, "grad_norm": 1.3418774604797363, "learning_rate": 4.947275031685678e-05, "loss": 0.0967, "step": 1248 }, { "epoch": 0.5280946345585129, "grad_norm": 0.9843947887420654, "learning_rate": 4.947190536544149e-05, "loss": 0.0791, "step": 1250 }, { "epoch": 0.5289395859738065, "grad_norm": 1.4920053482055664, "learning_rate": 4.947106041402619e-05, "loss": 0.1008, "step": 1252 }, { "epoch": 0.5297845373891001, "grad_norm": 1.4727956056594849, "learning_rate": 4.94702154626109e-05, "loss": 0.0876, "step": 1254 }, { "epoch": 0.5306294888043938, "grad_norm": 1.8977808952331543, "learning_rate": 4.946937051119561e-05, "loss": 0.1361, "step": 1256 }, { "epoch": 0.5314744402196874, "grad_norm": 1.456107258796692, "learning_rate": 4.9468525559780315e-05, "loss": 0.0878, "step": 1258 }, { "epoch": 0.532319391634981, "grad_norm": 1.4788284301757812, "learning_rate": 4.946768060836502e-05, "loss": 0.0822, "step": 1260 }, { "epoch": 0.5331643430502746, "grad_norm": 1.1848583221435547, "learning_rate": 4.946683565694973e-05, "loss": 0.0886, "step": 1262 }, { "epoch": 0.5340092944655682, "grad_norm": 0.9302415251731873, "learning_rate": 4.946599070553443e-05, "loss": 0.0481, "step": 1264 }, { "epoch": 0.5348542458808618, "grad_norm": 1.38499915599823, "learning_rate": 4.9465145754119144e-05, "loss": 0.0893, "step": 1266 }, { "epoch": 0.5356991972961554, "grad_norm": 1.4074949026107788, "learning_rate": 4.9464300802703844e-05, "loss": 0.1123, "step": 1268 }, { "epoch": 0.5365441487114491, "grad_norm": 1.7731876373291016, "learning_rate": 4.946345585128855e-05, "loss": 0.1557, "step": 1270 }, { "epoch": 0.5373891001267427, "grad_norm": 1.3012690544128418, "learning_rate": 4.946261089987326e-05, "loss": 0.0454, "step": 1272 }, { "epoch": 0.5382340515420363, "grad_norm": 1.2259783744812012, "learning_rate": 4.9461765948457966e-05, "loss": 0.0802, "step": 1274 }, { "epoch": 0.5390790029573299, "grad_norm": 1.516466736793518, "learning_rate": 4.946092099704267e-05, "loss": 0.0518, "step": 1276 }, { "epoch": 0.5399239543726235, "grad_norm": 1.8392707109451294, "learning_rate": 4.946007604562738e-05, "loss": 0.1264, "step": 1278 }, { "epoch": 0.5407689057879171, "grad_norm": 1.0790072679519653, "learning_rate": 4.945923109421208e-05, "loss": 0.0821, "step": 1280 }, { "epoch": 0.5416138572032109, "grad_norm": 1.1289983987808228, "learning_rate": 4.9458386142796794e-05, "loss": 0.1059, "step": 1282 }, { "epoch": 0.5424588086185045, "grad_norm": 1.5571544170379639, "learning_rate": 4.9457541191381495e-05, "loss": 0.0883, "step": 1284 }, { "epoch": 0.5433037600337981, "grad_norm": 1.0887531042099, "learning_rate": 4.94566962399662e-05, "loss": 0.0886, "step": 1286 }, { "epoch": 0.5441487114490917, "grad_norm": 1.6409960985183716, "learning_rate": 4.945585128855091e-05, "loss": 0.1336, "step": 1288 }, { "epoch": 0.5449936628643853, "grad_norm": 1.6622674465179443, "learning_rate": 4.9455006337135616e-05, "loss": 0.114, "step": 1290 }, { "epoch": 0.545838614279679, "grad_norm": 1.7212952375411987, "learning_rate": 4.9454161385720324e-05, "loss": 0.1141, "step": 1292 }, { "epoch": 0.5466835656949726, "grad_norm": 1.6510391235351562, "learning_rate": 4.945331643430503e-05, "loss": 0.1123, "step": 1294 }, { "epoch": 0.5475285171102662, "grad_norm": 2.1044700145721436, "learning_rate": 4.945247148288973e-05, "loss": 0.0899, "step": 1296 }, { "epoch": 0.5483734685255598, "grad_norm": 1.7959705591201782, "learning_rate": 4.9451626531474445e-05, "loss": 0.1213, "step": 1298 }, { "epoch": 0.5492184199408534, "grad_norm": 0.6972851753234863, "learning_rate": 4.9450781580059146e-05, "loss": 0.0517, "step": 1300 }, { "epoch": 0.5492184199408534, "eval_accuracy": 0.7086566649422315, "eval_cer": 0.08514692520091392, "eval_loss": 0.189493328332901, "eval_runtime": 866.8131, "eval_samples_per_second": 13.38, "eval_steps_per_second": 0.419, "step": 1300 }, { "epoch": 0.550063371356147, "grad_norm": 1.3948755264282227, "learning_rate": 4.944993662864385e-05, "loss": 0.058, "step": 1302 }, { "epoch": 0.5509083227714406, "grad_norm": 1.276227593421936, "learning_rate": 4.944909167722856e-05, "loss": 0.0629, "step": 1304 }, { "epoch": 0.5517532741867343, "grad_norm": 1.310956597328186, "learning_rate": 4.944824672581327e-05, "loss": 0.1241, "step": 1306 }, { "epoch": 0.5525982256020279, "grad_norm": 1.3557322025299072, "learning_rate": 4.9447401774397974e-05, "loss": 0.081, "step": 1308 }, { "epoch": 0.5534431770173215, "grad_norm": 1.0258203744888306, "learning_rate": 4.944655682298268e-05, "loss": 0.0719, "step": 1310 }, { "epoch": 0.5542881284326151, "grad_norm": 1.1905845403671265, "learning_rate": 4.944571187156738e-05, "loss": 0.0712, "step": 1312 }, { "epoch": 0.5551330798479087, "grad_norm": 1.176316499710083, "learning_rate": 4.9444866920152096e-05, "loss": 0.0587, "step": 1314 }, { "epoch": 0.5559780312632023, "grad_norm": 9.898603439331055, "learning_rate": 4.9444021968736796e-05, "loss": 0.1039, "step": 1316 }, { "epoch": 0.556822982678496, "grad_norm": 1.3090765476226807, "learning_rate": 4.944317701732151e-05, "loss": 0.0823, "step": 1318 }, { "epoch": 0.5576679340937896, "grad_norm": 1.292490839958191, "learning_rate": 4.944233206590621e-05, "loss": 0.0615, "step": 1320 }, { "epoch": 0.5585128855090832, "grad_norm": 1.8449876308441162, "learning_rate": 4.944148711449092e-05, "loss": 0.0872, "step": 1322 }, { "epoch": 0.5593578369243768, "grad_norm": 1.1056883335113525, "learning_rate": 4.9440642163075625e-05, "loss": 0.059, "step": 1324 }, { "epoch": 0.5602027883396705, "grad_norm": 1.3612993955612183, "learning_rate": 4.943979721166033e-05, "loss": 0.0717, "step": 1326 }, { "epoch": 0.5610477397549641, "grad_norm": 1.218751311302185, "learning_rate": 4.943895226024503e-05, "loss": 0.0785, "step": 1328 }, { "epoch": 0.5618926911702578, "grad_norm": 1.318162441253662, "learning_rate": 4.943810730882975e-05, "loss": 0.0926, "step": 1330 }, { "epoch": 0.5627376425855514, "grad_norm": 1.3321706056594849, "learning_rate": 4.943726235741445e-05, "loss": 0.0892, "step": 1332 }, { "epoch": 0.563582594000845, "grad_norm": 1.7704854011535645, "learning_rate": 4.943641740599916e-05, "loss": 0.1265, "step": 1334 }, { "epoch": 0.5644275454161386, "grad_norm": 1.5705130100250244, "learning_rate": 4.943557245458386e-05, "loss": 0.1006, "step": 1336 }, { "epoch": 0.5652724968314322, "grad_norm": 1.3921126127243042, "learning_rate": 4.943472750316857e-05, "loss": 0.0755, "step": 1338 }, { "epoch": 0.5661174482467258, "grad_norm": 1.9573496580123901, "learning_rate": 4.9433882551753276e-05, "loss": 0.1235, "step": 1340 }, { "epoch": 0.5669623996620194, "grad_norm": 1.466693639755249, "learning_rate": 4.943303760033798e-05, "loss": 0.0701, "step": 1342 }, { "epoch": 0.5678073510773131, "grad_norm": 0.9446965456008911, "learning_rate": 4.943219264892269e-05, "loss": 0.1052, "step": 1344 }, { "epoch": 0.5686523024926067, "grad_norm": 1.8884165287017822, "learning_rate": 4.94313476975074e-05, "loss": 0.0726, "step": 1346 }, { "epoch": 0.5694972539079003, "grad_norm": 2.2107183933258057, "learning_rate": 4.94305027460921e-05, "loss": 0.1353, "step": 1348 }, { "epoch": 0.5703422053231939, "grad_norm": 1.5481027364730835, "learning_rate": 4.942965779467681e-05, "loss": 0.0856, "step": 1350 }, { "epoch": 0.5711871567384875, "grad_norm": 1.549285650253296, "learning_rate": 4.942881284326151e-05, "loss": 0.1037, "step": 1352 }, { "epoch": 0.5720321081537811, "grad_norm": 1.2403219938278198, "learning_rate": 4.942796789184622e-05, "loss": 0.0832, "step": 1354 }, { "epoch": 0.5728770595690748, "grad_norm": 0.9897046089172363, "learning_rate": 4.942712294043093e-05, "loss": 0.0664, "step": 1356 }, { "epoch": 0.5737220109843684, "grad_norm": 1.9358055591583252, "learning_rate": 4.9426277989015634e-05, "loss": 0.0824, "step": 1358 }, { "epoch": 0.574566962399662, "grad_norm": 1.8482145071029663, "learning_rate": 4.942543303760034e-05, "loss": 0.1046, "step": 1360 }, { "epoch": 0.5754119138149556, "grad_norm": 1.4269449710845947, "learning_rate": 4.942458808618505e-05, "loss": 0.0983, "step": 1362 }, { "epoch": 0.5762568652302492, "grad_norm": 1.4967262744903564, "learning_rate": 4.942374313476975e-05, "loss": 0.0965, "step": 1364 }, { "epoch": 0.5771018166455428, "grad_norm": 1.8186310529708862, "learning_rate": 4.942289818335446e-05, "loss": 0.092, "step": 1366 }, { "epoch": 0.5779467680608364, "grad_norm": 1.330729603767395, "learning_rate": 4.942205323193916e-05, "loss": 0.0979, "step": 1368 }, { "epoch": 0.5787917194761302, "grad_norm": 1.4221312999725342, "learning_rate": 4.942120828052387e-05, "loss": 0.0516, "step": 1370 }, { "epoch": 0.5796366708914238, "grad_norm": 1.670209288597107, "learning_rate": 4.942036332910858e-05, "loss": 0.0888, "step": 1372 }, { "epoch": 0.5804816223067174, "grad_norm": 1.2513757944107056, "learning_rate": 4.9419518377693285e-05, "loss": 0.106, "step": 1374 }, { "epoch": 0.581326573722011, "grad_norm": 0.760837197303772, "learning_rate": 4.941867342627799e-05, "loss": 0.0722, "step": 1376 }, { "epoch": 0.5821715251373046, "grad_norm": 1.0159239768981934, "learning_rate": 4.94178284748627e-05, "loss": 0.0757, "step": 1378 }, { "epoch": 0.5830164765525983, "grad_norm": 1.9194375276565552, "learning_rate": 4.94169835234474e-05, "loss": 0.0796, "step": 1380 }, { "epoch": 0.5838614279678919, "grad_norm": 1.4433456659317017, "learning_rate": 4.9416138572032114e-05, "loss": 0.076, "step": 1382 }, { "epoch": 0.5847063793831855, "grad_norm": 1.4545176029205322, "learning_rate": 4.9415293620616814e-05, "loss": 0.0777, "step": 1384 }, { "epoch": 0.5855513307984791, "grad_norm": 0.9777308106422424, "learning_rate": 4.941444866920152e-05, "loss": 0.0548, "step": 1386 }, { "epoch": 0.5863962822137727, "grad_norm": 1.5005024671554565, "learning_rate": 4.941360371778623e-05, "loss": 0.1143, "step": 1388 }, { "epoch": 0.5872412336290663, "grad_norm": 1.571602463722229, "learning_rate": 4.9412758766370936e-05, "loss": 0.0929, "step": 1390 }, { "epoch": 0.5880861850443599, "grad_norm": 1.8419545888900757, "learning_rate": 4.941191381495564e-05, "loss": 0.1039, "step": 1392 }, { "epoch": 0.5889311364596536, "grad_norm": 1.9891055822372437, "learning_rate": 4.941106886354035e-05, "loss": 0.0944, "step": 1394 }, { "epoch": 0.5897760878749472, "grad_norm": 1.0187172889709473, "learning_rate": 4.941022391212505e-05, "loss": 0.1026, "step": 1396 }, { "epoch": 0.5906210392902408, "grad_norm": 1.8273286819458008, "learning_rate": 4.9409378960709764e-05, "loss": 0.1627, "step": 1398 }, { "epoch": 0.5914659907055344, "grad_norm": 1.9127320051193237, "learning_rate": 4.9408534009294465e-05, "loss": 0.0871, "step": 1400 }, { "epoch": 0.5914659907055344, "eval_accuracy": 0.6934816347646146, "eval_cer": 0.08890496396340763, "eval_loss": 0.19768354296684265, "eval_runtime": 860.1969, "eval_samples_per_second": 13.483, "eval_steps_per_second": 0.422, "step": 1400 }, { "epoch": 0.592310942120828, "grad_norm": 1.432771921157837, "learning_rate": 4.940768905787917e-05, "loss": 0.12, "step": 1402 }, { "epoch": 0.5931558935361216, "grad_norm": 1.443217158317566, "learning_rate": 4.940684410646388e-05, "loss": 0.0944, "step": 1404 }, { "epoch": 0.5940008449514153, "grad_norm": 1.263504981994629, "learning_rate": 4.9405999155048586e-05, "loss": 0.0651, "step": 1406 }, { "epoch": 0.5948457963667089, "grad_norm": 1.656178593635559, "learning_rate": 4.9405154203633294e-05, "loss": 0.1205, "step": 1408 }, { "epoch": 0.5956907477820025, "grad_norm": 1.8223587274551392, "learning_rate": 4.9404309252218e-05, "loss": 0.0985, "step": 1410 }, { "epoch": 0.5965356991972962, "grad_norm": 1.6137700080871582, "learning_rate": 4.94034643008027e-05, "loss": 0.1026, "step": 1412 }, { "epoch": 0.5973806506125898, "grad_norm": 1.2118284702301025, "learning_rate": 4.9402619349387415e-05, "loss": 0.1008, "step": 1414 }, { "epoch": 0.5982256020278834, "grad_norm": 1.541785717010498, "learning_rate": 4.9401774397972116e-05, "loss": 0.1128, "step": 1416 }, { "epoch": 0.5990705534431771, "grad_norm": 1.5790534019470215, "learning_rate": 4.940092944655683e-05, "loss": 0.1197, "step": 1418 }, { "epoch": 0.5999155048584707, "grad_norm": 2.461165189743042, "learning_rate": 4.940008449514153e-05, "loss": 0.1317, "step": 1420 }, { "epoch": 0.6007604562737643, "grad_norm": 1.9079132080078125, "learning_rate": 4.939923954372624e-05, "loss": 0.0902, "step": 1422 }, { "epoch": 0.6016054076890579, "grad_norm": 1.5427287817001343, "learning_rate": 4.9398394592310944e-05, "loss": 0.0782, "step": 1424 }, { "epoch": 0.6024503591043515, "grad_norm": 1.3272346258163452, "learning_rate": 4.939754964089565e-05, "loss": 0.0631, "step": 1426 }, { "epoch": 0.6032953105196451, "grad_norm": 1.1318906545639038, "learning_rate": 4.939670468948035e-05, "loss": 0.0485, "step": 1428 }, { "epoch": 0.6041402619349387, "grad_norm": 2.0333144664764404, "learning_rate": 4.9395859738065066e-05, "loss": 0.1254, "step": 1430 }, { "epoch": 0.6049852133502324, "grad_norm": 1.9606693983078003, "learning_rate": 4.9395014786649766e-05, "loss": 0.1414, "step": 1432 }, { "epoch": 0.605830164765526, "grad_norm": 1.3295642137527466, "learning_rate": 4.939416983523448e-05, "loss": 0.0697, "step": 1434 }, { "epoch": 0.6066751161808196, "grad_norm": 1.227980613708496, "learning_rate": 4.939332488381918e-05, "loss": 0.0723, "step": 1436 }, { "epoch": 0.6075200675961132, "grad_norm": 1.926112413406372, "learning_rate": 4.939247993240389e-05, "loss": 0.136, "step": 1438 }, { "epoch": 0.6083650190114068, "grad_norm": 1.1189000606536865, "learning_rate": 4.9391634980988595e-05, "loss": 0.0709, "step": 1440 }, { "epoch": 0.6092099704267004, "grad_norm": 1.3692779541015625, "learning_rate": 4.93907900295733e-05, "loss": 0.0687, "step": 1442 }, { "epoch": 0.6100549218419941, "grad_norm": 1.1951595544815063, "learning_rate": 4.938994507815801e-05, "loss": 0.0743, "step": 1444 }, { "epoch": 0.6108998732572877, "grad_norm": 1.0123379230499268, "learning_rate": 4.938910012674272e-05, "loss": 0.0779, "step": 1446 }, { "epoch": 0.6117448246725813, "grad_norm": 1.1633543968200684, "learning_rate": 4.938825517532742e-05, "loss": 0.0938, "step": 1448 }, { "epoch": 0.6125897760878749, "grad_norm": 1.457838773727417, "learning_rate": 4.938741022391213e-05, "loss": 0.0932, "step": 1450 }, { "epoch": 0.6134347275031685, "grad_norm": 1.9920662641525269, "learning_rate": 4.938656527249683e-05, "loss": 0.1435, "step": 1452 }, { "epoch": 0.6142796789184621, "grad_norm": 1.591558814048767, "learning_rate": 4.938572032108154e-05, "loss": 0.0985, "step": 1454 }, { "epoch": 0.6151246303337559, "grad_norm": 1.4729034900665283, "learning_rate": 4.9384875369666246e-05, "loss": 0.1336, "step": 1456 }, { "epoch": 0.6159695817490495, "grad_norm": 1.2558763027191162, "learning_rate": 4.938403041825095e-05, "loss": 0.0699, "step": 1458 }, { "epoch": 0.6168145331643431, "grad_norm": 1.9105608463287354, "learning_rate": 4.938318546683566e-05, "loss": 0.1193, "step": 1460 }, { "epoch": 0.6176594845796367, "grad_norm": 1.7269306182861328, "learning_rate": 4.938234051542037e-05, "loss": 0.0777, "step": 1462 }, { "epoch": 0.6185044359949303, "grad_norm": 1.195877194404602, "learning_rate": 4.938149556400507e-05, "loss": 0.0568, "step": 1464 }, { "epoch": 0.6193493874102239, "grad_norm": 1.5723657608032227, "learning_rate": 4.938065061258978e-05, "loss": 0.1203, "step": 1466 }, { "epoch": 0.6201943388255176, "grad_norm": 0.8827709555625916, "learning_rate": 4.937980566117448e-05, "loss": 0.0517, "step": 1468 }, { "epoch": 0.6210392902408112, "grad_norm": 1.3201031684875488, "learning_rate": 4.937896070975919e-05, "loss": 0.0948, "step": 1470 }, { "epoch": 0.6218842416561048, "grad_norm": 0.9939825534820557, "learning_rate": 4.93781157583439e-05, "loss": 0.0537, "step": 1472 }, { "epoch": 0.6227291930713984, "grad_norm": 1.3091474771499634, "learning_rate": 4.9377270806928604e-05, "loss": 0.0888, "step": 1474 }, { "epoch": 0.623574144486692, "grad_norm": 1.2489527463912964, "learning_rate": 4.937642585551331e-05, "loss": 0.1017, "step": 1476 }, { "epoch": 0.6244190959019856, "grad_norm": 1.2982828617095947, "learning_rate": 4.937558090409802e-05, "loss": 0.0805, "step": 1478 }, { "epoch": 0.6252640473172792, "grad_norm": 1.9336445331573486, "learning_rate": 4.937473595268272e-05, "loss": 0.1048, "step": 1480 }, { "epoch": 0.6261089987325729, "grad_norm": 1.5748566389083862, "learning_rate": 4.937389100126743e-05, "loss": 0.0779, "step": 1482 }, { "epoch": 0.6269539501478665, "grad_norm": 1.3378995656967163, "learning_rate": 4.937304604985213e-05, "loss": 0.082, "step": 1484 }, { "epoch": 0.6277989015631601, "grad_norm": 1.0639171600341797, "learning_rate": 4.937220109843684e-05, "loss": 0.051, "step": 1486 }, { "epoch": 0.6286438529784537, "grad_norm": 1.6450079679489136, "learning_rate": 4.937135614702155e-05, "loss": 0.1282, "step": 1488 }, { "epoch": 0.6294888043937473, "grad_norm": 1.433846354484558, "learning_rate": 4.9370511195606255e-05, "loss": 0.091, "step": 1490 }, { "epoch": 0.6303337558090409, "grad_norm": 1.7269262075424194, "learning_rate": 4.936966624419096e-05, "loss": 0.0804, "step": 1492 }, { "epoch": 0.6311787072243346, "grad_norm": 1.0868664979934692, "learning_rate": 4.936882129277567e-05, "loss": 0.0454, "step": 1494 }, { "epoch": 0.6320236586396282, "grad_norm": 1.883876085281372, "learning_rate": 4.936797634136037e-05, "loss": 0.0747, "step": 1496 }, { "epoch": 0.6328686100549218, "grad_norm": 1.3019837141036987, "learning_rate": 4.9367131389945084e-05, "loss": 0.0904, "step": 1498 }, { "epoch": 0.6337135614702155, "grad_norm": 1.3057466745376587, "learning_rate": 4.9366286438529784e-05, "loss": 0.0998, "step": 1500 }, { "epoch": 0.6337135614702155, "eval_accuracy": 0.6930505259527505, "eval_cer": 0.08929311820178727, "eval_loss": 0.19271579384803772, "eval_runtime": 852.7052, "eval_samples_per_second": 13.601, "eval_steps_per_second": 0.426, "step": 1500 }, { "epoch": 0.6345585128855091, "grad_norm": 1.0389184951782227, "learning_rate": 4.936544148711449e-05, "loss": 0.063, "step": 1502 }, { "epoch": 0.6354034643008027, "grad_norm": 1.1744555234909058, "learning_rate": 4.93645965356992e-05, "loss": 0.0855, "step": 1504 }, { "epoch": 0.6362484157160964, "grad_norm": 1.6268651485443115, "learning_rate": 4.9363751584283906e-05, "loss": 0.0518, "step": 1506 }, { "epoch": 0.63709336713139, "grad_norm": 1.0523124933242798, "learning_rate": 4.936290663286861e-05, "loss": 0.0617, "step": 1508 }, { "epoch": 0.6379383185466836, "grad_norm": 1.024778962135315, "learning_rate": 4.936206168145332e-05, "loss": 0.1021, "step": 1510 }, { "epoch": 0.6387832699619772, "grad_norm": 1.15168297290802, "learning_rate": 4.936121673003802e-05, "loss": 0.076, "step": 1512 }, { "epoch": 0.6396282213772708, "grad_norm": 1.4460582733154297, "learning_rate": 4.9360371778622734e-05, "loss": 0.0866, "step": 1514 }, { "epoch": 0.6404731727925644, "grad_norm": 1.1238740682601929, "learning_rate": 4.9359526827207435e-05, "loss": 0.0708, "step": 1516 }, { "epoch": 0.641318124207858, "grad_norm": 1.175075650215149, "learning_rate": 4.935868187579215e-05, "loss": 0.0893, "step": 1518 }, { "epoch": 0.6421630756231517, "grad_norm": 1.2177211046218872, "learning_rate": 4.935783692437685e-05, "loss": 0.0703, "step": 1520 }, { "epoch": 0.6430080270384453, "grad_norm": 1.4435173273086548, "learning_rate": 4.9356991972961556e-05, "loss": 0.0947, "step": 1522 }, { "epoch": 0.6438529784537389, "grad_norm": 2.4668965339660645, "learning_rate": 4.9356147021546264e-05, "loss": 0.0882, "step": 1524 }, { "epoch": 0.6446979298690325, "grad_norm": 1.096189260482788, "learning_rate": 4.935530207013097e-05, "loss": 0.0671, "step": 1526 }, { "epoch": 0.6455428812843261, "grad_norm": 1.430290937423706, "learning_rate": 4.935445711871567e-05, "loss": 0.1009, "step": 1528 }, { "epoch": 0.6463878326996197, "grad_norm": 1.5221188068389893, "learning_rate": 4.9353612167300385e-05, "loss": 0.0941, "step": 1530 }, { "epoch": 0.6472327841149134, "grad_norm": 1.0545361042022705, "learning_rate": 4.9352767215885086e-05, "loss": 0.0741, "step": 1532 }, { "epoch": 0.648077735530207, "grad_norm": 1.3876293897628784, "learning_rate": 4.93519222644698e-05, "loss": 0.0963, "step": 1534 }, { "epoch": 0.6489226869455006, "grad_norm": 1.425647497177124, "learning_rate": 4.93510773130545e-05, "loss": 0.0768, "step": 1536 }, { "epoch": 0.6497676383607942, "grad_norm": 2.1672873497009277, "learning_rate": 4.935023236163921e-05, "loss": 0.0723, "step": 1538 }, { "epoch": 0.6506125897760878, "grad_norm": 1.7907317876815796, "learning_rate": 4.9349387410223914e-05, "loss": 0.0776, "step": 1540 }, { "epoch": 0.6514575411913816, "grad_norm": 1.7650822401046753, "learning_rate": 4.934854245880862e-05, "loss": 0.0895, "step": 1542 }, { "epoch": 0.6523024926066752, "grad_norm": 1.2962666749954224, "learning_rate": 4.934769750739333e-05, "loss": 0.0661, "step": 1544 }, { "epoch": 0.6531474440219688, "grad_norm": 1.4323803186416626, "learning_rate": 4.9346852555978036e-05, "loss": 0.1123, "step": 1546 }, { "epoch": 0.6539923954372624, "grad_norm": 1.3358235359191895, "learning_rate": 4.9346007604562736e-05, "loss": 0.0559, "step": 1548 }, { "epoch": 0.654837346852556, "grad_norm": 1.4420747756958008, "learning_rate": 4.934516265314745e-05, "loss": 0.0939, "step": 1550 }, { "epoch": 0.6556822982678496, "grad_norm": 1.780863881111145, "learning_rate": 4.934431770173215e-05, "loss": 0.1005, "step": 1552 }, { "epoch": 0.6565272496831432, "grad_norm": 1.2848925590515137, "learning_rate": 4.934347275031686e-05, "loss": 0.0594, "step": 1554 }, { "epoch": 0.6573722010984369, "grad_norm": 2.379051923751831, "learning_rate": 4.9342627798901565e-05, "loss": 0.1367, "step": 1556 }, { "epoch": 0.6582171525137305, "grad_norm": 1.4313839673995972, "learning_rate": 4.934178284748627e-05, "loss": 0.065, "step": 1558 }, { "epoch": 0.6590621039290241, "grad_norm": 0.9839615821838379, "learning_rate": 4.934093789607098e-05, "loss": 0.0803, "step": 1560 }, { "epoch": 0.6599070553443177, "grad_norm": 2.2462387084960938, "learning_rate": 4.934009294465569e-05, "loss": 0.1078, "step": 1562 }, { "epoch": 0.6607520067596113, "grad_norm": 1.4986047744750977, "learning_rate": 4.933924799324039e-05, "loss": 0.0967, "step": 1564 }, { "epoch": 0.6615969581749049, "grad_norm": 1.3797781467437744, "learning_rate": 4.93384030418251e-05, "loss": 0.0631, "step": 1566 }, { "epoch": 0.6624419095901986, "grad_norm": 2.0769073963165283, "learning_rate": 4.93375580904098e-05, "loss": 0.1283, "step": 1568 }, { "epoch": 0.6632868610054922, "grad_norm": 0.8822681307792664, "learning_rate": 4.933671313899451e-05, "loss": 0.0781, "step": 1570 }, { "epoch": 0.6641318124207858, "grad_norm": 1.346742033958435, "learning_rate": 4.9335868187579216e-05, "loss": 0.0983, "step": 1572 }, { "epoch": 0.6649767638360794, "grad_norm": 1.2557731866836548, "learning_rate": 4.933502323616392e-05, "loss": 0.0548, "step": 1574 }, { "epoch": 0.665821715251373, "grad_norm": 1.5680410861968994, "learning_rate": 4.933417828474863e-05, "loss": 0.0855, "step": 1576 }, { "epoch": 0.6666666666666666, "grad_norm": 1.2320771217346191, "learning_rate": 4.933333333333334e-05, "loss": 0.0845, "step": 1578 }, { "epoch": 0.6675116180819602, "grad_norm": 1.3170132637023926, "learning_rate": 4.933248838191804e-05, "loss": 0.067, "step": 1580 }, { "epoch": 0.6683565694972539, "grad_norm": 0.8890075087547302, "learning_rate": 4.933164343050275e-05, "loss": 0.055, "step": 1582 }, { "epoch": 0.6692015209125475, "grad_norm": 1.6186269521713257, "learning_rate": 4.933079847908745e-05, "loss": 0.0718, "step": 1584 }, { "epoch": 0.6700464723278412, "grad_norm": 1.8913078308105469, "learning_rate": 4.932995352767216e-05, "loss": 0.1134, "step": 1586 }, { "epoch": 0.6708914237431348, "grad_norm": 1.2571996450424194, "learning_rate": 4.932910857625687e-05, "loss": 0.0947, "step": 1588 }, { "epoch": 0.6717363751584284, "grad_norm": 1.6301320791244507, "learning_rate": 4.9328263624841574e-05, "loss": 0.0976, "step": 1590 }, { "epoch": 0.672581326573722, "grad_norm": 1.32038414478302, "learning_rate": 4.932741867342628e-05, "loss": 0.0967, "step": 1592 }, { "epoch": 0.6734262779890157, "grad_norm": 0.9501339197158813, "learning_rate": 4.932657372201099e-05, "loss": 0.0651, "step": 1594 }, { "epoch": 0.6742712294043093, "grad_norm": 1.1755475997924805, "learning_rate": 4.932572877059569e-05, "loss": 0.0383, "step": 1596 }, { "epoch": 0.6751161808196029, "grad_norm": 1.402435302734375, "learning_rate": 4.93248838191804e-05, "loss": 0.0969, "step": 1598 }, { "epoch": 0.6759611322348965, "grad_norm": 1.6371697187423706, "learning_rate": 4.93240388677651e-05, "loss": 0.1062, "step": 1600 }, { "epoch": 0.6759611322348965, "eval_accuracy": 0.6960682876357993, "eval_cer": 0.08430886491350335, "eval_loss": 0.1908927857875824, "eval_runtime": 855.2921, "eval_samples_per_second": 13.56, "eval_steps_per_second": 0.424, "step": 1600 }, { "epoch": 0.6768060836501901, "grad_norm": 1.6198582649230957, "learning_rate": 4.932319391634981e-05, "loss": 0.0832, "step": 1602 }, { "epoch": 0.6776510350654837, "grad_norm": 1.237261414527893, "learning_rate": 4.932234896493452e-05, "loss": 0.0604, "step": 1604 }, { "epoch": 0.6784959864807774, "grad_norm": 1.5254098176956177, "learning_rate": 4.9321504013519225e-05, "loss": 0.0925, "step": 1606 }, { "epoch": 0.679340937896071, "grad_norm": 1.858588457107544, "learning_rate": 4.932065906210393e-05, "loss": 0.1063, "step": 1608 }, { "epoch": 0.6801858893113646, "grad_norm": 1.6307566165924072, "learning_rate": 4.931981411068864e-05, "loss": 0.1097, "step": 1610 }, { "epoch": 0.6810308407266582, "grad_norm": 1.245123267173767, "learning_rate": 4.931896915927334e-05, "loss": 0.0468, "step": 1612 }, { "epoch": 0.6818757921419518, "grad_norm": 1.2134346961975098, "learning_rate": 4.9318124207858054e-05, "loss": 0.0527, "step": 1614 }, { "epoch": 0.6827207435572454, "grad_norm": 1.724666953086853, "learning_rate": 4.9317279256442754e-05, "loss": 0.067, "step": 1616 }, { "epoch": 0.683565694972539, "grad_norm": 1.303871750831604, "learning_rate": 4.931643430502747e-05, "loss": 0.089, "step": 1618 }, { "epoch": 0.6844106463878327, "grad_norm": 1.2575643062591553, "learning_rate": 4.931558935361217e-05, "loss": 0.0684, "step": 1620 }, { "epoch": 0.6852555978031263, "grad_norm": 0.8384171724319458, "learning_rate": 4.9314744402196876e-05, "loss": 0.0726, "step": 1622 }, { "epoch": 0.6861005492184199, "grad_norm": 1.2416189908981323, "learning_rate": 4.931389945078158e-05, "loss": 0.0519, "step": 1624 }, { "epoch": 0.6869455006337135, "grad_norm": 1.111448884010315, "learning_rate": 4.931305449936629e-05, "loss": 0.0773, "step": 1626 }, { "epoch": 0.6877904520490071, "grad_norm": 1.8492799997329712, "learning_rate": 4.931220954795099e-05, "loss": 0.0861, "step": 1628 }, { "epoch": 0.6886354034643009, "grad_norm": 1.129862666130066, "learning_rate": 4.9311364596535704e-05, "loss": 0.063, "step": 1630 }, { "epoch": 0.6894803548795945, "grad_norm": 1.5507149696350098, "learning_rate": 4.9310519645120405e-05, "loss": 0.0924, "step": 1632 }, { "epoch": 0.6903253062948881, "grad_norm": 1.3672925233840942, "learning_rate": 4.930967469370512e-05, "loss": 0.098, "step": 1634 }, { "epoch": 0.6911702577101817, "grad_norm": 1.8510890007019043, "learning_rate": 4.930882974228982e-05, "loss": 0.0784, "step": 1636 }, { "epoch": 0.6920152091254753, "grad_norm": 1.4934600591659546, "learning_rate": 4.9307984790874526e-05, "loss": 0.1049, "step": 1638 }, { "epoch": 0.6928601605407689, "grad_norm": 1.453052282333374, "learning_rate": 4.9307139839459234e-05, "loss": 0.0774, "step": 1640 }, { "epoch": 0.6937051119560625, "grad_norm": 2.2687666416168213, "learning_rate": 4.930629488804394e-05, "loss": 0.1204, "step": 1642 }, { "epoch": 0.6945500633713562, "grad_norm": 1.256612777709961, "learning_rate": 4.930544993662865e-05, "loss": 0.0879, "step": 1644 }, { "epoch": 0.6953950147866498, "grad_norm": 1.2979470491409302, "learning_rate": 4.9304604985213355e-05, "loss": 0.0789, "step": 1646 }, { "epoch": 0.6962399662019434, "grad_norm": 1.1799136400222778, "learning_rate": 4.9303760033798056e-05, "loss": 0.0836, "step": 1648 }, { "epoch": 0.697084917617237, "grad_norm": 1.8143103122711182, "learning_rate": 4.930291508238277e-05, "loss": 0.0853, "step": 1650 }, { "epoch": 0.6979298690325306, "grad_norm": 1.3416088819503784, "learning_rate": 4.930207013096747e-05, "loss": 0.0847, "step": 1652 }, { "epoch": 0.6987748204478242, "grad_norm": 1.4958585500717163, "learning_rate": 4.930122517955218e-05, "loss": 0.0858, "step": 1654 }, { "epoch": 0.6996197718631179, "grad_norm": 1.0670164823532104, "learning_rate": 4.9300380228136884e-05, "loss": 0.069, "step": 1656 }, { "epoch": 0.7004647232784115, "grad_norm": 1.2606033086776733, "learning_rate": 4.929953527672159e-05, "loss": 0.0869, "step": 1658 }, { "epoch": 0.7013096746937051, "grad_norm": 1.3816112279891968, "learning_rate": 4.92986903253063e-05, "loss": 0.0723, "step": 1660 }, { "epoch": 0.7021546261089987, "grad_norm": 1.619861125946045, "learning_rate": 4.9297845373891006e-05, "loss": 0.1112, "step": 1662 }, { "epoch": 0.7029995775242923, "grad_norm": 1.5590943098068237, "learning_rate": 4.9297000422475706e-05, "loss": 0.1013, "step": 1664 }, { "epoch": 0.7038445289395859, "grad_norm": 1.2259608507156372, "learning_rate": 4.929615547106042e-05, "loss": 0.0855, "step": 1666 }, { "epoch": 0.7046894803548795, "grad_norm": 1.231600284576416, "learning_rate": 4.929531051964512e-05, "loss": 0.0624, "step": 1668 }, { "epoch": 0.7055344317701732, "grad_norm": 1.3199141025543213, "learning_rate": 4.929446556822983e-05, "loss": 0.0742, "step": 1670 }, { "epoch": 0.7063793831854668, "grad_norm": 1.259050726890564, "learning_rate": 4.9293620616814535e-05, "loss": 0.0468, "step": 1672 }, { "epoch": 0.7072243346007605, "grad_norm": 1.5264647006988525, "learning_rate": 4.929277566539924e-05, "loss": 0.0859, "step": 1674 }, { "epoch": 0.7080692860160541, "grad_norm": 1.6180810928344727, "learning_rate": 4.929193071398395e-05, "loss": 0.0636, "step": 1676 }, { "epoch": 0.7089142374313477, "grad_norm": 0.8615092635154724, "learning_rate": 4.929108576256866e-05, "loss": 0.0533, "step": 1678 }, { "epoch": 0.7097591888466414, "grad_norm": 1.568564534187317, "learning_rate": 4.929024081115336e-05, "loss": 0.0794, "step": 1680 }, { "epoch": 0.710604140261935, "grad_norm": 1.2899255752563477, "learning_rate": 4.928939585973807e-05, "loss": 0.0938, "step": 1682 }, { "epoch": 0.7114490916772286, "grad_norm": 1.375060796737671, "learning_rate": 4.928855090832277e-05, "loss": 0.0741, "step": 1684 }, { "epoch": 0.7122940430925222, "grad_norm": 1.205748200416565, "learning_rate": 4.928770595690748e-05, "loss": 0.0844, "step": 1686 }, { "epoch": 0.7131389945078158, "grad_norm": 1.0106422901153564, "learning_rate": 4.9286861005492186e-05, "loss": 0.0594, "step": 1688 }, { "epoch": 0.7139839459231094, "grad_norm": 1.668855905532837, "learning_rate": 4.928601605407689e-05, "loss": 0.1166, "step": 1690 }, { "epoch": 0.714828897338403, "grad_norm": 1.094852089881897, "learning_rate": 4.92851711026616e-05, "loss": 0.0687, "step": 1692 }, { "epoch": 0.7156738487536967, "grad_norm": 1.2307512760162354, "learning_rate": 4.928432615124631e-05, "loss": 0.0645, "step": 1694 }, { "epoch": 0.7165188001689903, "grad_norm": 1.497880220413208, "learning_rate": 4.928348119983101e-05, "loss": 0.0998, "step": 1696 }, { "epoch": 0.7173637515842839, "grad_norm": 1.546085238456726, "learning_rate": 4.928263624841572e-05, "loss": 0.0825, "step": 1698 }, { "epoch": 0.7182087029995775, "grad_norm": 1.2614907026290894, "learning_rate": 4.928179129700042e-05, "loss": 0.0985, "step": 1700 }, { "epoch": 0.7182087029995775, "eval_accuracy": 0.7092602172788411, "eval_cer": 0.08464408902846758, "eval_loss": 0.18686163425445557, "eval_runtime": 849.9029, "eval_samples_per_second": 13.646, "eval_steps_per_second": 0.427, "step": 1700 }, { "epoch": 0.7190536544148711, "grad_norm": 1.5614362955093384, "learning_rate": 4.928094634558513e-05, "loss": 0.0727, "step": 1702 }, { "epoch": 0.7198986058301647, "grad_norm": 1.0478379726409912, "learning_rate": 4.928010139416984e-05, "loss": 0.0629, "step": 1704 }, { "epoch": 0.7207435572454584, "grad_norm": 1.7387198209762573, "learning_rate": 4.9279256442754544e-05, "loss": 0.097, "step": 1706 }, { "epoch": 0.721588508660752, "grad_norm": 1.8205493688583374, "learning_rate": 4.927841149133925e-05, "loss": 0.1223, "step": 1708 }, { "epoch": 0.7224334600760456, "grad_norm": 1.3796732425689697, "learning_rate": 4.927756653992396e-05, "loss": 0.0864, "step": 1710 }, { "epoch": 0.7232784114913392, "grad_norm": 1.7345277070999146, "learning_rate": 4.927672158850866e-05, "loss": 0.0995, "step": 1712 }, { "epoch": 0.7241233629066328, "grad_norm": 0.9681912660598755, "learning_rate": 4.927587663709337e-05, "loss": 0.05, "step": 1714 }, { "epoch": 0.7249683143219265, "grad_norm": 1.7644003629684448, "learning_rate": 4.927503168567807e-05, "loss": 0.1208, "step": 1716 }, { "epoch": 0.7258132657372202, "grad_norm": 1.760344386100769, "learning_rate": 4.927418673426279e-05, "loss": 0.08, "step": 1718 }, { "epoch": 0.7266582171525138, "grad_norm": 1.5169976949691772, "learning_rate": 4.927334178284749e-05, "loss": 0.0945, "step": 1720 }, { "epoch": 0.7275031685678074, "grad_norm": 1.2515987157821655, "learning_rate": 4.9272496831432195e-05, "loss": 0.0964, "step": 1722 }, { "epoch": 0.728348119983101, "grad_norm": 1.1804972887039185, "learning_rate": 4.92716518800169e-05, "loss": 0.0945, "step": 1724 }, { "epoch": 0.7291930713983946, "grad_norm": 1.7442156076431274, "learning_rate": 4.927080692860161e-05, "loss": 0.1006, "step": 1726 }, { "epoch": 0.7300380228136882, "grad_norm": 1.4811662435531616, "learning_rate": 4.926996197718631e-05, "loss": 0.1162, "step": 1728 }, { "epoch": 0.7308829742289819, "grad_norm": 1.6214262247085571, "learning_rate": 4.9269117025771024e-05, "loss": 0.0813, "step": 1730 }, { "epoch": 0.7317279256442755, "grad_norm": 1.4832850694656372, "learning_rate": 4.9268272074355724e-05, "loss": 0.1104, "step": 1732 }, { "epoch": 0.7325728770595691, "grad_norm": 1.1491117477416992, "learning_rate": 4.926742712294044e-05, "loss": 0.0714, "step": 1734 }, { "epoch": 0.7334178284748627, "grad_norm": 1.9180891513824463, "learning_rate": 4.926658217152514e-05, "loss": 0.0781, "step": 1736 }, { "epoch": 0.7342627798901563, "grad_norm": 1.390456199645996, "learning_rate": 4.9265737220109846e-05, "loss": 0.1352, "step": 1738 }, { "epoch": 0.7351077313054499, "grad_norm": 1.0331450700759888, "learning_rate": 4.926489226869455e-05, "loss": 0.0808, "step": 1740 }, { "epoch": 0.7359526827207435, "grad_norm": 1.2622566223144531, "learning_rate": 4.926404731727926e-05, "loss": 0.055, "step": 1742 }, { "epoch": 0.7367976341360372, "grad_norm": 1.1740683317184448, "learning_rate": 4.926320236586397e-05, "loss": 0.0493, "step": 1744 }, { "epoch": 0.7376425855513308, "grad_norm": 1.8595513105392456, "learning_rate": 4.9262357414448674e-05, "loss": 0.1211, "step": 1746 }, { "epoch": 0.7384875369666244, "grad_norm": 1.0683369636535645, "learning_rate": 4.9261512463033375e-05, "loss": 0.0583, "step": 1748 }, { "epoch": 0.739332488381918, "grad_norm": 1.4157065153121948, "learning_rate": 4.926066751161809e-05, "loss": 0.0554, "step": 1750 }, { "epoch": 0.7401774397972116, "grad_norm": 1.5190364122390747, "learning_rate": 4.925982256020279e-05, "loss": 0.0631, "step": 1752 }, { "epoch": 0.7410223912125052, "grad_norm": 2.315302848815918, "learning_rate": 4.9258977608787496e-05, "loss": 0.1047, "step": 1754 }, { "epoch": 0.7418673426277989, "grad_norm": 0.8665867447853088, "learning_rate": 4.9258132657372204e-05, "loss": 0.0612, "step": 1756 }, { "epoch": 0.7427122940430925, "grad_norm": 2.6190311908721924, "learning_rate": 4.925728770595691e-05, "loss": 0.1227, "step": 1758 }, { "epoch": 0.7435572454583862, "grad_norm": 1.6784873008728027, "learning_rate": 4.925644275454162e-05, "loss": 0.0606, "step": 1760 }, { "epoch": 0.7444021968736798, "grad_norm": 1.4590693712234497, "learning_rate": 4.9255597803126325e-05, "loss": 0.0942, "step": 1762 }, { "epoch": 0.7452471482889734, "grad_norm": 1.7136425971984863, "learning_rate": 4.9254752851711026e-05, "loss": 0.1141, "step": 1764 }, { "epoch": 0.746092099704267, "grad_norm": 0.9946573376655579, "learning_rate": 4.925390790029574e-05, "loss": 0.0359, "step": 1766 }, { "epoch": 0.7469370511195607, "grad_norm": 1.1567507982254028, "learning_rate": 4.925306294888044e-05, "loss": 0.055, "step": 1768 }, { "epoch": 0.7477820025348543, "grad_norm": 1.3129973411560059, "learning_rate": 4.925221799746515e-05, "loss": 0.0654, "step": 1770 }, { "epoch": 0.7486269539501479, "grad_norm": 2.021648645401001, "learning_rate": 4.9251373046049854e-05, "loss": 0.0965, "step": 1772 }, { "epoch": 0.7494719053654415, "grad_norm": 1.6443380117416382, "learning_rate": 4.925052809463456e-05, "loss": 0.1043, "step": 1774 }, { "epoch": 0.7503168567807351, "grad_norm": 1.7316606044769287, "learning_rate": 4.924968314321927e-05, "loss": 0.0941, "step": 1776 }, { "epoch": 0.7511618081960287, "grad_norm": 1.0968530178070068, "learning_rate": 4.9248838191803976e-05, "loss": 0.0676, "step": 1778 }, { "epoch": 0.7520067596113224, "grad_norm": 0.9221205115318298, "learning_rate": 4.9247993240388676e-05, "loss": 0.0763, "step": 1780 }, { "epoch": 0.752851711026616, "grad_norm": 0.9875777363777161, "learning_rate": 4.924714828897339e-05, "loss": 0.0691, "step": 1782 }, { "epoch": 0.7536966624419096, "grad_norm": 1.3123608827590942, "learning_rate": 4.924630333755809e-05, "loss": 0.0727, "step": 1784 }, { "epoch": 0.7545416138572032, "grad_norm": 1.5449692010879517, "learning_rate": 4.92454583861428e-05, "loss": 0.0764, "step": 1786 }, { "epoch": 0.7553865652724968, "grad_norm": 1.8921825885772705, "learning_rate": 4.9244613434727505e-05, "loss": 0.1129, "step": 1788 }, { "epoch": 0.7562315166877904, "grad_norm": 1.119535207748413, "learning_rate": 4.924376848331221e-05, "loss": 0.0859, "step": 1790 }, { "epoch": 0.757076468103084, "grad_norm": 1.4250285625457764, "learning_rate": 4.924292353189692e-05, "loss": 0.085, "step": 1792 }, { "epoch": 0.7579214195183777, "grad_norm": 1.9496543407440186, "learning_rate": 4.924207858048163e-05, "loss": 0.0963, "step": 1794 }, { "epoch": 0.7587663709336713, "grad_norm": 1.6314023733139038, "learning_rate": 4.924123362906633e-05, "loss": 0.079, "step": 1796 }, { "epoch": 0.7596113223489649, "grad_norm": 1.3706735372543335, "learning_rate": 4.924038867765104e-05, "loss": 0.0813, "step": 1798 }, { "epoch": 0.7604562737642585, "grad_norm": 1.6105067729949951, "learning_rate": 4.923954372623574e-05, "loss": 0.0793, "step": 1800 }, { "epoch": 0.7604562737642585, "eval_accuracy": 0.7114157613381618, "eval_cer": 0.08386778055170832, "eval_loss": 0.18643230199813843, "eval_runtime": 864.6046, "eval_samples_per_second": 13.414, "eval_steps_per_second": 0.42, "step": 1800 }, { "epoch": 0.7613012251795521, "grad_norm": 1.3744542598724365, "learning_rate": 4.923869877482045e-05, "loss": 0.1043, "step": 1802 }, { "epoch": 0.7621461765948458, "grad_norm": 1.2851905822753906, "learning_rate": 4.9237853823405156e-05, "loss": 0.0903, "step": 1804 }, { "epoch": 0.7629911280101395, "grad_norm": 1.7337363958358765, "learning_rate": 4.923700887198986e-05, "loss": 0.0664, "step": 1806 }, { "epoch": 0.7638360794254331, "grad_norm": 1.3106553554534912, "learning_rate": 4.923616392057457e-05, "loss": 0.0589, "step": 1808 }, { "epoch": 0.7646810308407267, "grad_norm": 1.8125030994415283, "learning_rate": 4.923531896915928e-05, "loss": 0.1044, "step": 1810 }, { "epoch": 0.7655259822560203, "grad_norm": 0.759692907333374, "learning_rate": 4.923447401774398e-05, "loss": 0.0423, "step": 1812 }, { "epoch": 0.7663709336713139, "grad_norm": 1.4425475597381592, "learning_rate": 4.923362906632869e-05, "loss": 0.0742, "step": 1814 }, { "epoch": 0.7672158850866075, "grad_norm": 1.7777689695358276, "learning_rate": 4.923278411491339e-05, "loss": 0.1033, "step": 1816 }, { "epoch": 0.7680608365019012, "grad_norm": 1.4362133741378784, "learning_rate": 4.9231939163498106e-05, "loss": 0.0826, "step": 1818 }, { "epoch": 0.7689057879171948, "grad_norm": 1.4595433473587036, "learning_rate": 4.923109421208281e-05, "loss": 0.0929, "step": 1820 }, { "epoch": 0.7697507393324884, "grad_norm": 1.1807221174240112, "learning_rate": 4.9230249260667514e-05, "loss": 0.0803, "step": 1822 }, { "epoch": 0.770595690747782, "grad_norm": 1.3756366968154907, "learning_rate": 4.922940430925222e-05, "loss": 0.0933, "step": 1824 }, { "epoch": 0.7714406421630756, "grad_norm": 1.903717041015625, "learning_rate": 4.922855935783693e-05, "loss": 0.1241, "step": 1826 }, { "epoch": 0.7722855935783692, "grad_norm": 1.8950796127319336, "learning_rate": 4.922771440642163e-05, "loss": 0.113, "step": 1828 }, { "epoch": 0.7731305449936628, "grad_norm": 1.7587209939956665, "learning_rate": 4.922686945500634e-05, "loss": 0.0748, "step": 1830 }, { "epoch": 0.7739754964089565, "grad_norm": 1.414786458015442, "learning_rate": 4.922602450359104e-05, "loss": 0.0958, "step": 1832 }, { "epoch": 0.7748204478242501, "grad_norm": 1.7570750713348389, "learning_rate": 4.922517955217576e-05, "loss": 0.0989, "step": 1834 }, { "epoch": 0.7756653992395437, "grad_norm": 0.9731429815292358, "learning_rate": 4.922433460076046e-05, "loss": 0.0659, "step": 1836 }, { "epoch": 0.7765103506548373, "grad_norm": 1.2279034852981567, "learning_rate": 4.9223489649345165e-05, "loss": 0.0802, "step": 1838 }, { "epoch": 0.7773553020701309, "grad_norm": 1.8844351768493652, "learning_rate": 4.922264469792987e-05, "loss": 0.1129, "step": 1840 }, { "epoch": 0.7782002534854245, "grad_norm": 1.544244408607483, "learning_rate": 4.922179974651458e-05, "loss": 0.0857, "step": 1842 }, { "epoch": 0.7790452049007182, "grad_norm": 1.417275071144104, "learning_rate": 4.9220954795099286e-05, "loss": 0.0735, "step": 1844 }, { "epoch": 0.7798901563160119, "grad_norm": 1.0909690856933594, "learning_rate": 4.9220109843683994e-05, "loss": 0.0635, "step": 1846 }, { "epoch": 0.7807351077313055, "grad_norm": 1.1615931987762451, "learning_rate": 4.9219264892268694e-05, "loss": 0.0632, "step": 1848 }, { "epoch": 0.7815800591465991, "grad_norm": 1.4405494928359985, "learning_rate": 4.921841994085341e-05, "loss": 0.0848, "step": 1850 }, { "epoch": 0.7824250105618927, "grad_norm": 0.6920841932296753, "learning_rate": 4.921757498943811e-05, "loss": 0.0404, "step": 1852 }, { "epoch": 0.7832699619771863, "grad_norm": 0.9411987662315369, "learning_rate": 4.9216730038022816e-05, "loss": 0.0662, "step": 1854 }, { "epoch": 0.78411491339248, "grad_norm": 1.6385960578918457, "learning_rate": 4.921588508660752e-05, "loss": 0.0484, "step": 1856 }, { "epoch": 0.7849598648077736, "grad_norm": 1.655723214149475, "learning_rate": 4.921504013519223e-05, "loss": 0.1107, "step": 1858 }, { "epoch": 0.7858048162230672, "grad_norm": 1.5613646507263184, "learning_rate": 4.921419518377694e-05, "loss": 0.0997, "step": 1860 }, { "epoch": 0.7866497676383608, "grad_norm": 1.729367971420288, "learning_rate": 4.9213350232361644e-05, "loss": 0.0823, "step": 1862 }, { "epoch": 0.7874947190536544, "grad_norm": 1.5075145959854126, "learning_rate": 4.9212505280946345e-05, "loss": 0.0687, "step": 1864 }, { "epoch": 0.788339670468948, "grad_norm": 1.4084213972091675, "learning_rate": 4.921166032953106e-05, "loss": 0.0671, "step": 1866 }, { "epoch": 0.7891846218842417, "grad_norm": 1.0190798044204712, "learning_rate": 4.921081537811576e-05, "loss": 0.0727, "step": 1868 }, { "epoch": 0.7900295732995353, "grad_norm": 0.8679783940315247, "learning_rate": 4.9209970426700466e-05, "loss": 0.0804, "step": 1870 }, { "epoch": 0.7908745247148289, "grad_norm": 1.3874335289001465, "learning_rate": 4.9209125475285174e-05, "loss": 0.0753, "step": 1872 }, { "epoch": 0.7917194761301225, "grad_norm": 1.33711576461792, "learning_rate": 4.920828052386988e-05, "loss": 0.0826, "step": 1874 }, { "epoch": 0.7925644275454161, "grad_norm": 1.362648606300354, "learning_rate": 4.920743557245459e-05, "loss": 0.0591, "step": 1876 }, { "epoch": 0.7934093789607097, "grad_norm": 1.0619179010391235, "learning_rate": 4.9206590621039295e-05, "loss": 0.0495, "step": 1878 }, { "epoch": 0.7942543303760033, "grad_norm": 2.31254506111145, "learning_rate": 4.9205745669623996e-05, "loss": 0.0985, "step": 1880 }, { "epoch": 0.795099281791297, "grad_norm": 1.3235636949539185, "learning_rate": 4.920490071820871e-05, "loss": 0.1215, "step": 1882 }, { "epoch": 0.7959442332065906, "grad_norm": 1.0667047500610352, "learning_rate": 4.920405576679341e-05, "loss": 0.0401, "step": 1884 }, { "epoch": 0.7967891846218842, "grad_norm": 1.7155953645706177, "learning_rate": 4.920321081537812e-05, "loss": 0.087, "step": 1886 }, { "epoch": 0.7976341360371778, "grad_norm": 1.1526762247085571, "learning_rate": 4.9202365863962824e-05, "loss": 0.0752, "step": 1888 }, { "epoch": 0.7984790874524715, "grad_norm": 1.039081335067749, "learning_rate": 4.920152091254753e-05, "loss": 0.0882, "step": 1890 }, { "epoch": 0.7993240388677652, "grad_norm": 1.4192873239517212, "learning_rate": 4.920067596113224e-05, "loss": 0.0692, "step": 1892 }, { "epoch": 0.8001689902830588, "grad_norm": 0.9547431468963623, "learning_rate": 4.9199831009716946e-05, "loss": 0.0972, "step": 1894 }, { "epoch": 0.8010139416983524, "grad_norm": 1.382888674736023, "learning_rate": 4.9198986058301646e-05, "loss": 0.1152, "step": 1896 }, { "epoch": 0.801858893113646, "grad_norm": 1.5938488245010376, "learning_rate": 4.919814110688636e-05, "loss": 0.0875, "step": 1898 }, { "epoch": 0.8027038445289396, "grad_norm": 1.211908221244812, "learning_rate": 4.919729615547106e-05, "loss": 0.0808, "step": 1900 }, { "epoch": 0.8027038445289396, "eval_accuracy": 0.7140886359717192, "eval_cer": 0.08334730100479018, "eval_loss": 0.18288320302963257, "eval_runtime": 852.7471, "eval_samples_per_second": 13.601, "eval_steps_per_second": 0.426, "step": 1900 }, { "epoch": 0.8035487959442332, "grad_norm": 1.273587703704834, "learning_rate": 4.919645120405577e-05, "loss": 0.0567, "step": 1902 }, { "epoch": 0.8043937473595268, "grad_norm": 1.4771486520767212, "learning_rate": 4.9195606252640475e-05, "loss": 0.0687, "step": 1904 }, { "epoch": 0.8052386987748205, "grad_norm": 1.6391814947128296, "learning_rate": 4.919476130122518e-05, "loss": 0.0936, "step": 1906 }, { "epoch": 0.8060836501901141, "grad_norm": 1.199585199356079, "learning_rate": 4.919391634980989e-05, "loss": 0.0855, "step": 1908 }, { "epoch": 0.8069286016054077, "grad_norm": 1.0377155542373657, "learning_rate": 4.91930713983946e-05, "loss": 0.0669, "step": 1910 }, { "epoch": 0.8077735530207013, "grad_norm": 1.5404986143112183, "learning_rate": 4.91922264469793e-05, "loss": 0.1058, "step": 1912 }, { "epoch": 0.8086185044359949, "grad_norm": 1.612430214881897, "learning_rate": 4.919138149556401e-05, "loss": 0.074, "step": 1914 }, { "epoch": 0.8094634558512885, "grad_norm": 1.8857512474060059, "learning_rate": 4.919053654414871e-05, "loss": 0.0916, "step": 1916 }, { "epoch": 0.8103084072665822, "grad_norm": 1.3807179927825928, "learning_rate": 4.9189691592733426e-05, "loss": 0.0728, "step": 1918 }, { "epoch": 0.8111533586818758, "grad_norm": 1.6425131559371948, "learning_rate": 4.9188846641318126e-05, "loss": 0.1021, "step": 1920 }, { "epoch": 0.8119983100971694, "grad_norm": 1.344103217124939, "learning_rate": 4.918800168990283e-05, "loss": 0.0515, "step": 1922 }, { "epoch": 0.812843261512463, "grad_norm": 1.3962315320968628, "learning_rate": 4.918715673848754e-05, "loss": 0.0619, "step": 1924 }, { "epoch": 0.8136882129277566, "grad_norm": 1.2917231321334839, "learning_rate": 4.918631178707225e-05, "loss": 0.0795, "step": 1926 }, { "epoch": 0.8145331643430502, "grad_norm": 2.180860996246338, "learning_rate": 4.918546683565695e-05, "loss": 0.1103, "step": 1928 }, { "epoch": 0.8153781157583438, "grad_norm": 2.4335546493530273, "learning_rate": 4.918462188424166e-05, "loss": 0.1115, "step": 1930 }, { "epoch": 0.8162230671736375, "grad_norm": 1.399709939956665, "learning_rate": 4.918377693282636e-05, "loss": 0.0957, "step": 1932 }, { "epoch": 0.8170680185889312, "grad_norm": 1.5264992713928223, "learning_rate": 4.9182931981411076e-05, "loss": 0.0728, "step": 1934 }, { "epoch": 0.8179129700042248, "grad_norm": 1.3386390209197998, "learning_rate": 4.918208702999578e-05, "loss": 0.0642, "step": 1936 }, { "epoch": 0.8187579214195184, "grad_norm": 2.438541889190674, "learning_rate": 4.9181242078580484e-05, "loss": 0.0949, "step": 1938 }, { "epoch": 0.819602872834812, "grad_norm": 0.7417229413986206, "learning_rate": 4.918039712716519e-05, "loss": 0.029, "step": 1940 }, { "epoch": 0.8204478242501057, "grad_norm": 1.8493363857269287, "learning_rate": 4.91795521757499e-05, "loss": 0.096, "step": 1942 }, { "epoch": 0.8212927756653993, "grad_norm": 0.9756290912628174, "learning_rate": 4.9178707224334606e-05, "loss": 0.064, "step": 1944 }, { "epoch": 0.8221377270806929, "grad_norm": 1.0960304737091064, "learning_rate": 4.917786227291931e-05, "loss": 0.0671, "step": 1946 }, { "epoch": 0.8229826784959865, "grad_norm": 1.3161073923110962, "learning_rate": 4.917701732150401e-05, "loss": 0.0671, "step": 1948 }, { "epoch": 0.8238276299112801, "grad_norm": 0.829484224319458, "learning_rate": 4.917617237008873e-05, "loss": 0.0633, "step": 1950 }, { "epoch": 0.8246725813265737, "grad_norm": 1.1128253936767578, "learning_rate": 4.917532741867343e-05, "loss": 0.0658, "step": 1952 }, { "epoch": 0.8255175327418673, "grad_norm": 1.2087801694869995, "learning_rate": 4.9174482467258135e-05, "loss": 0.0688, "step": 1954 }, { "epoch": 0.826362484157161, "grad_norm": 1.6095219850540161, "learning_rate": 4.917363751584284e-05, "loss": 0.1243, "step": 1956 }, { "epoch": 0.8272074355724546, "grad_norm": 1.4487024545669556, "learning_rate": 4.917279256442755e-05, "loss": 0.0848, "step": 1958 }, { "epoch": 0.8280523869877482, "grad_norm": 1.430047869682312, "learning_rate": 4.9171947613012256e-05, "loss": 0.0731, "step": 1960 }, { "epoch": 0.8288973384030418, "grad_norm": 1.377947449684143, "learning_rate": 4.9171102661596964e-05, "loss": 0.0692, "step": 1962 }, { "epoch": 0.8297422898183354, "grad_norm": 1.4219231605529785, "learning_rate": 4.9170257710181664e-05, "loss": 0.0585, "step": 1964 }, { "epoch": 0.830587241233629, "grad_norm": 1.647648811340332, "learning_rate": 4.916941275876638e-05, "loss": 0.0719, "step": 1966 }, { "epoch": 0.8314321926489227, "grad_norm": 1.743295669555664, "learning_rate": 4.916856780735108e-05, "loss": 0.0811, "step": 1968 }, { "epoch": 0.8322771440642163, "grad_norm": 1.3732768297195435, "learning_rate": 4.9167722855935786e-05, "loss": 0.0942, "step": 1970 }, { "epoch": 0.8331220954795099, "grad_norm": 2.2335240840911865, "learning_rate": 4.916687790452049e-05, "loss": 0.1215, "step": 1972 }, { "epoch": 0.8339670468948035, "grad_norm": 1.5069928169250488, "learning_rate": 4.91660329531052e-05, "loss": 0.0673, "step": 1974 }, { "epoch": 0.8348119983100971, "grad_norm": 1.2649790048599243, "learning_rate": 4.916518800168991e-05, "loss": 0.0851, "step": 1976 }, { "epoch": 0.8356569497253908, "grad_norm": 1.7199008464813232, "learning_rate": 4.9164343050274614e-05, "loss": 0.0936, "step": 1978 }, { "epoch": 0.8365019011406845, "grad_norm": 0.6023948192596436, "learning_rate": 4.9163498098859315e-05, "loss": 0.0624, "step": 1980 }, { "epoch": 0.8373468525559781, "grad_norm": 1.3942101001739502, "learning_rate": 4.916265314744403e-05, "loss": 0.0832, "step": 1982 }, { "epoch": 0.8381918039712717, "grad_norm": 0.856122612953186, "learning_rate": 4.916180819602873e-05, "loss": 0.0485, "step": 1984 }, { "epoch": 0.8390367553865653, "grad_norm": 1.3273183107376099, "learning_rate": 4.9160963244613437e-05, "loss": 0.0634, "step": 1986 }, { "epoch": 0.8398817068018589, "grad_norm": 1.4347068071365356, "learning_rate": 4.9160118293198144e-05, "loss": 0.0599, "step": 1988 }, { "epoch": 0.8407266582171525, "grad_norm": 1.5340876579284668, "learning_rate": 4.915927334178285e-05, "loss": 0.0607, "step": 1990 }, { "epoch": 0.8415716096324461, "grad_norm": 1.6133233308792114, "learning_rate": 4.915842839036756e-05, "loss": 0.076, "step": 1992 }, { "epoch": 0.8424165610477398, "grad_norm": 2.004894971847534, "learning_rate": 4.9157583438952265e-05, "loss": 0.1015, "step": 1994 }, { "epoch": 0.8432615124630334, "grad_norm": 1.0740406513214111, "learning_rate": 4.9156738487536966e-05, "loss": 0.0576, "step": 1996 }, { "epoch": 0.844106463878327, "grad_norm": 1.0599428415298462, "learning_rate": 4.915589353612168e-05, "loss": 0.0561, "step": 1998 }, { "epoch": 0.8449514152936206, "grad_norm": 1.134386420249939, "learning_rate": 4.915504858470638e-05, "loss": 0.0546, "step": 2000 }, { "epoch": 0.8449514152936206, "eval_accuracy": 0.7053802379720642, "eval_cer": 0.0823592720343693, "eval_loss": 0.18670396506786346, "eval_runtime": 843.4731, "eval_samples_per_second": 13.75, "eval_steps_per_second": 0.43, "step": 2000 }, { "epoch": 0.8457963667089142, "grad_norm": 1.8255170583724976, "learning_rate": 4.915420363329109e-05, "loss": 0.0661, "step": 2002 }, { "epoch": 0.8466413181242078, "grad_norm": 1.5995157957077026, "learning_rate": 4.9153358681875795e-05, "loss": 0.0905, "step": 2004 }, { "epoch": 0.8474862695395015, "grad_norm": 1.5706533193588257, "learning_rate": 4.91525137304605e-05, "loss": 0.1004, "step": 2006 }, { "epoch": 0.8483312209547951, "grad_norm": 1.2460238933563232, "learning_rate": 4.915166877904521e-05, "loss": 0.0841, "step": 2008 }, { "epoch": 0.8491761723700887, "grad_norm": 1.2849513292312622, "learning_rate": 4.9150823827629916e-05, "loss": 0.1101, "step": 2010 }, { "epoch": 0.8500211237853823, "grad_norm": 1.5081242322921753, "learning_rate": 4.9149978876214617e-05, "loss": 0.0944, "step": 2012 }, { "epoch": 0.8508660752006759, "grad_norm": 0.9104534387588501, "learning_rate": 4.914913392479933e-05, "loss": 0.0442, "step": 2014 }, { "epoch": 0.8517110266159695, "grad_norm": 1.4931339025497437, "learning_rate": 4.914828897338403e-05, "loss": 0.0814, "step": 2016 }, { "epoch": 0.8525559780312632, "grad_norm": 1.0569278001785278, "learning_rate": 4.9147444021968745e-05, "loss": 0.0601, "step": 2018 }, { "epoch": 0.8534009294465569, "grad_norm": 1.3178728818893433, "learning_rate": 4.9146599070553445e-05, "loss": 0.0734, "step": 2020 }, { "epoch": 0.8542458808618505, "grad_norm": 1.1889725923538208, "learning_rate": 4.914575411913815e-05, "loss": 0.0756, "step": 2022 }, { "epoch": 0.8550908322771441, "grad_norm": 1.3995158672332764, "learning_rate": 4.914490916772286e-05, "loss": 0.0758, "step": 2024 }, { "epoch": 0.8559357836924377, "grad_norm": 1.3304693698883057, "learning_rate": 4.914406421630757e-05, "loss": 0.1291, "step": 2026 }, { "epoch": 0.8567807351077313, "grad_norm": 1.1206573247909546, "learning_rate": 4.914321926489227e-05, "loss": 0.0801, "step": 2028 }, { "epoch": 0.857625686523025, "grad_norm": 1.374193787574768, "learning_rate": 4.914237431347698e-05, "loss": 0.0698, "step": 2030 }, { "epoch": 0.8584706379383186, "grad_norm": 1.6789133548736572, "learning_rate": 4.914152936206168e-05, "loss": 0.0965, "step": 2032 }, { "epoch": 0.8593155893536122, "grad_norm": 1.7472199201583862, "learning_rate": 4.9140684410646396e-05, "loss": 0.0747, "step": 2034 }, { "epoch": 0.8601605407689058, "grad_norm": 1.2843263149261475, "learning_rate": 4.9139839459231096e-05, "loss": 0.13, "step": 2036 }, { "epoch": 0.8610054921841994, "grad_norm": 1.7041819095611572, "learning_rate": 4.91389945078158e-05, "loss": 0.0828, "step": 2038 }, { "epoch": 0.861850443599493, "grad_norm": 2.2484312057495117, "learning_rate": 4.913814955640051e-05, "loss": 0.0595, "step": 2040 }, { "epoch": 0.8626953950147866, "grad_norm": 1.134190559387207, "learning_rate": 4.913730460498522e-05, "loss": 0.0963, "step": 2042 }, { "epoch": 0.8635403464300803, "grad_norm": 1.1661795377731323, "learning_rate": 4.9136459653569925e-05, "loss": 0.0601, "step": 2044 }, { "epoch": 0.8643852978453739, "grad_norm": 1.5845274925231934, "learning_rate": 4.913561470215463e-05, "loss": 0.0888, "step": 2046 }, { "epoch": 0.8652302492606675, "grad_norm": 0.9580950140953064, "learning_rate": 4.913476975073933e-05, "loss": 0.0604, "step": 2048 }, { "epoch": 0.8660752006759611, "grad_norm": 0.8986382484436035, "learning_rate": 4.9133924799324047e-05, "loss": 0.0651, "step": 2050 }, { "epoch": 0.8669201520912547, "grad_norm": 1.660507321357727, "learning_rate": 4.913307984790875e-05, "loss": 0.098, "step": 2052 }, { "epoch": 0.8677651035065483, "grad_norm": 1.333807349205017, "learning_rate": 4.9132234896493454e-05, "loss": 0.0846, "step": 2054 }, { "epoch": 0.868610054921842, "grad_norm": 1.2735717296600342, "learning_rate": 4.913138994507816e-05, "loss": 0.0843, "step": 2056 }, { "epoch": 0.8694550063371356, "grad_norm": 1.303557276725769, "learning_rate": 4.913054499366287e-05, "loss": 0.0682, "step": 2058 }, { "epoch": 0.8702999577524292, "grad_norm": 0.9738208651542664, "learning_rate": 4.9129700042247576e-05, "loss": 0.0428, "step": 2060 }, { "epoch": 0.8711449091677228, "grad_norm": 1.4468063116073608, "learning_rate": 4.912885509083228e-05, "loss": 0.0753, "step": 2062 }, { "epoch": 0.8719898605830165, "grad_norm": 1.511555790901184, "learning_rate": 4.912801013941698e-05, "loss": 0.0601, "step": 2064 }, { "epoch": 0.8728348119983101, "grad_norm": 1.3249778747558594, "learning_rate": 4.91271651880017e-05, "loss": 0.0742, "step": 2066 }, { "epoch": 0.8736797634136038, "grad_norm": 1.4642529487609863, "learning_rate": 4.91263202365864e-05, "loss": 0.0613, "step": 2068 }, { "epoch": 0.8745247148288974, "grad_norm": 1.8487999439239502, "learning_rate": 4.9125475285171105e-05, "loss": 0.097, "step": 2070 }, { "epoch": 0.875369666244191, "grad_norm": 1.1964668035507202, "learning_rate": 4.912463033375581e-05, "loss": 0.0535, "step": 2072 }, { "epoch": 0.8762146176594846, "grad_norm": 1.6669561862945557, "learning_rate": 4.912378538234052e-05, "loss": 0.0901, "step": 2074 }, { "epoch": 0.8770595690747782, "grad_norm": 1.4297568798065186, "learning_rate": 4.9122940430925227e-05, "loss": 0.0731, "step": 2076 }, { "epoch": 0.8779045204900718, "grad_norm": 1.4387907981872559, "learning_rate": 4.9122095479509934e-05, "loss": 0.0998, "step": 2078 }, { "epoch": 0.8787494719053655, "grad_norm": 1.2306314706802368, "learning_rate": 4.9121250528094634e-05, "loss": 0.0688, "step": 2080 }, { "epoch": 0.8795944233206591, "grad_norm": 1.894845724105835, "learning_rate": 4.912040557667935e-05, "loss": 0.0986, "step": 2082 }, { "epoch": 0.8804393747359527, "grad_norm": 2.315152645111084, "learning_rate": 4.911956062526405e-05, "loss": 0.0945, "step": 2084 }, { "epoch": 0.8812843261512463, "grad_norm": 2.2627370357513428, "learning_rate": 4.9118715673848756e-05, "loss": 0.0726, "step": 2086 }, { "epoch": 0.8821292775665399, "grad_norm": 1.7095221281051636, "learning_rate": 4.911787072243346e-05, "loss": 0.1009, "step": 2088 }, { "epoch": 0.8829742289818335, "grad_norm": 1.60263991355896, "learning_rate": 4.911702577101817e-05, "loss": 0.0422, "step": 2090 }, { "epoch": 0.8838191803971271, "grad_norm": 0.9724740982055664, "learning_rate": 4.911618081960288e-05, "loss": 0.0434, "step": 2092 }, { "epoch": 0.8846641318124208, "grad_norm": 1.3910263776779175, "learning_rate": 4.9115335868187585e-05, "loss": 0.0725, "step": 2094 }, { "epoch": 0.8855090832277144, "grad_norm": 1.592474341392517, "learning_rate": 4.9114490916772285e-05, "loss": 0.0549, "step": 2096 }, { "epoch": 0.886354034643008, "grad_norm": 0.9358974099159241, "learning_rate": 4.9113645965357e-05, "loss": 0.0485, "step": 2098 }, { "epoch": 0.8871989860583016, "grad_norm": 1.6186065673828125, "learning_rate": 4.91128010139417e-05, "loss": 0.0803, "step": 2100 }, { "epoch": 0.8871989860583016, "eval_accuracy": 0.7154681841696844, "eval_cer": 0.08182997080021524, "eval_loss": 0.18388408422470093, "eval_runtime": 862.7282, "eval_samples_per_second": 13.443, "eval_steps_per_second": 0.421, "step": 2100 }, { "epoch": 0.8880439374735952, "grad_norm": 1.2380942106246948, "learning_rate": 4.9111956062526407e-05, "loss": 0.0668, "step": 2102 }, { "epoch": 0.8888888888888888, "grad_norm": 1.4649783372879028, "learning_rate": 4.9111111111111114e-05, "loss": 0.0652, "step": 2104 }, { "epoch": 0.8897338403041825, "grad_norm": 1.243154525756836, "learning_rate": 4.911026615969582e-05, "loss": 0.0686, "step": 2106 }, { "epoch": 0.8905787917194762, "grad_norm": 1.6419312953948975, "learning_rate": 4.910942120828053e-05, "loss": 0.0678, "step": 2108 }, { "epoch": 0.8914237431347698, "grad_norm": 1.3396897315979004, "learning_rate": 4.9108576256865235e-05, "loss": 0.0713, "step": 2110 }, { "epoch": 0.8922686945500634, "grad_norm": 1.093079686164856, "learning_rate": 4.9107731305449936e-05, "loss": 0.071, "step": 2112 }, { "epoch": 0.893113645965357, "grad_norm": 1.6767911911010742, "learning_rate": 4.910688635403465e-05, "loss": 0.0732, "step": 2114 }, { "epoch": 0.8939585973806506, "grad_norm": 1.8932477235794067, "learning_rate": 4.910604140261935e-05, "loss": 0.0712, "step": 2116 }, { "epoch": 0.8948035487959443, "grad_norm": 1.5414156913757324, "learning_rate": 4.9105196451204064e-05, "loss": 0.0815, "step": 2118 }, { "epoch": 0.8956485002112379, "grad_norm": 1.5718928575515747, "learning_rate": 4.9104351499788765e-05, "loss": 0.0805, "step": 2120 }, { "epoch": 0.8964934516265315, "grad_norm": 1.5843585729599, "learning_rate": 4.910350654837347e-05, "loss": 0.0796, "step": 2122 }, { "epoch": 0.8973384030418251, "grad_norm": 1.3187426328659058, "learning_rate": 4.910266159695818e-05, "loss": 0.0629, "step": 2124 }, { "epoch": 0.8981833544571187, "grad_norm": 1.3708245754241943, "learning_rate": 4.9101816645542886e-05, "loss": 0.0706, "step": 2126 }, { "epoch": 0.8990283058724123, "grad_norm": 1.081969976425171, "learning_rate": 4.9100971694127587e-05, "loss": 0.0749, "step": 2128 }, { "epoch": 0.899873257287706, "grad_norm": 1.7093164920806885, "learning_rate": 4.91001267427123e-05, "loss": 0.1136, "step": 2130 }, { "epoch": 0.9007182087029996, "grad_norm": 1.232445240020752, "learning_rate": 4.9099281791297e-05, "loss": 0.054, "step": 2132 }, { "epoch": 0.9015631601182932, "grad_norm": 0.976844847202301, "learning_rate": 4.9098436839881715e-05, "loss": 0.0713, "step": 2134 }, { "epoch": 0.9024081115335868, "grad_norm": 1.0007492303848267, "learning_rate": 4.9097591888466415e-05, "loss": 0.0991, "step": 2136 }, { "epoch": 0.9032530629488804, "grad_norm": 1.6573784351348877, "learning_rate": 4.909674693705112e-05, "loss": 0.0958, "step": 2138 }, { "epoch": 0.904098014364174, "grad_norm": 0.9490051865577698, "learning_rate": 4.909590198563583e-05, "loss": 0.0394, "step": 2140 }, { "epoch": 0.9049429657794676, "grad_norm": 1.0081024169921875, "learning_rate": 4.909505703422054e-05, "loss": 0.0573, "step": 2142 }, { "epoch": 0.9057879171947613, "grad_norm": 1.080897569656372, "learning_rate": 4.9094212082805244e-05, "loss": 0.0745, "step": 2144 }, { "epoch": 0.9066328686100549, "grad_norm": 1.7212154865264893, "learning_rate": 4.909336713138995e-05, "loss": 0.0968, "step": 2146 }, { "epoch": 0.9074778200253485, "grad_norm": 1.7466483116149902, "learning_rate": 4.909252217997465e-05, "loss": 0.101, "step": 2148 }, { "epoch": 0.9083227714406422, "grad_norm": 1.1636947393417358, "learning_rate": 4.9091677228559366e-05, "loss": 0.0816, "step": 2150 }, { "epoch": 0.9091677228559358, "grad_norm": 1.3398698568344116, "learning_rate": 4.9090832277144066e-05, "loss": 0.0766, "step": 2152 }, { "epoch": 0.9100126742712294, "grad_norm": 1.0851556062698364, "learning_rate": 4.908998732572877e-05, "loss": 0.0523, "step": 2154 }, { "epoch": 0.9108576256865231, "grad_norm": 1.4844582080841064, "learning_rate": 4.908914237431348e-05, "loss": 0.0495, "step": 2156 }, { "epoch": 0.9117025771018167, "grad_norm": 1.3978465795516968, "learning_rate": 4.908829742289819e-05, "loss": 0.0908, "step": 2158 }, { "epoch": 0.9125475285171103, "grad_norm": 1.431995153427124, "learning_rate": 4.9087452471482895e-05, "loss": 0.1416, "step": 2160 }, { "epoch": 0.9133924799324039, "grad_norm": 0.8778975009918213, "learning_rate": 4.90866075200676e-05, "loss": 0.0601, "step": 2162 }, { "epoch": 0.9142374313476975, "grad_norm": 1.388907551765442, "learning_rate": 4.90857625686523e-05, "loss": 0.0787, "step": 2164 }, { "epoch": 0.9150823827629911, "grad_norm": 1.7531378269195557, "learning_rate": 4.9084917617237017e-05, "loss": 0.0747, "step": 2166 }, { "epoch": 0.9159273341782848, "grad_norm": 1.6861788034439087, "learning_rate": 4.908407266582172e-05, "loss": 0.0863, "step": 2168 }, { "epoch": 0.9167722855935784, "grad_norm": 1.4938262701034546, "learning_rate": 4.9083227714406424e-05, "loss": 0.0833, "step": 2170 }, { "epoch": 0.917617237008872, "grad_norm": 1.2034574747085571, "learning_rate": 4.908238276299113e-05, "loss": 0.0638, "step": 2172 }, { "epoch": 0.9184621884241656, "grad_norm": 1.2034447193145752, "learning_rate": 4.908153781157583e-05, "loss": 0.0532, "step": 2174 }, { "epoch": 0.9193071398394592, "grad_norm": 1.0579421520233154, "learning_rate": 4.9080692860160546e-05, "loss": 0.0811, "step": 2176 }, { "epoch": 0.9201520912547528, "grad_norm": 0.9848757386207581, "learning_rate": 4.9079847908745246e-05, "loss": 0.031, "step": 2178 }, { "epoch": 0.9209970426700465, "grad_norm": 1.3882626295089722, "learning_rate": 4.907900295732995e-05, "loss": 0.0778, "step": 2180 }, { "epoch": 0.9218419940853401, "grad_norm": 1.6235911846160889, "learning_rate": 4.907815800591466e-05, "loss": 0.0945, "step": 2182 }, { "epoch": 0.9226869455006337, "grad_norm": 1.860224723815918, "learning_rate": 4.907731305449937e-05, "loss": 0.0859, "step": 2184 }, { "epoch": 0.9235318969159273, "grad_norm": 1.091496229171753, "learning_rate": 4.9076468103084075e-05, "loss": 0.0622, "step": 2186 }, { "epoch": 0.9243768483312209, "grad_norm": 1.7291980981826782, "learning_rate": 4.907562315166878e-05, "loss": 0.0749, "step": 2188 }, { "epoch": 0.9252217997465145, "grad_norm": 1.0687997341156006, "learning_rate": 4.907477820025348e-05, "loss": 0.0507, "step": 2190 }, { "epoch": 0.9260667511618081, "grad_norm": 0.9708640575408936, "learning_rate": 4.9073933248838197e-05, "loss": 0.0558, "step": 2192 }, { "epoch": 0.9269117025771019, "grad_norm": 1.3645669221878052, "learning_rate": 4.90730882974229e-05, "loss": 0.0906, "step": 2194 }, { "epoch": 0.9277566539923955, "grad_norm": 1.1811169385910034, "learning_rate": 4.9072243346007604e-05, "loss": 0.0693, "step": 2196 }, { "epoch": 0.9286016054076891, "grad_norm": 1.0970830917358398, "learning_rate": 4.907139839459231e-05, "loss": 0.0704, "step": 2198 }, { "epoch": 0.9294465568229827, "grad_norm": 1.5071057081222534, "learning_rate": 4.907055344317702e-05, "loss": 0.0932, "step": 2200 }, { "epoch": 0.9294465568229827, "eval_accuracy": 0.7148646318330747, "eval_cer": 0.0812124526937022, "eval_loss": 0.1800081431865692, "eval_runtime": 846.8242, "eval_samples_per_second": 13.696, "eval_steps_per_second": 0.429, "step": 2200 }, { "epoch": 0.9302915082382763, "grad_norm": 1.3840326070785522, "learning_rate": 4.9069708491761726e-05, "loss": 0.0484, "step": 2202 }, { "epoch": 0.93113645965357, "grad_norm": 1.4783692359924316, "learning_rate": 4.906886354034643e-05, "loss": 0.1473, "step": 2204 }, { "epoch": 0.9319814110688636, "grad_norm": 1.3358536958694458, "learning_rate": 4.906801858893113e-05, "loss": 0.0753, "step": 2206 }, { "epoch": 0.9328263624841572, "grad_norm": 0.773380696773529, "learning_rate": 4.906717363751585e-05, "loss": 0.0335, "step": 2208 }, { "epoch": 0.9336713138994508, "grad_norm": 1.2706904411315918, "learning_rate": 4.906632868610055e-05, "loss": 0.0471, "step": 2210 }, { "epoch": 0.9345162653147444, "grad_norm": 0.99266517162323, "learning_rate": 4.9065483734685255e-05, "loss": 0.067, "step": 2212 }, { "epoch": 0.935361216730038, "grad_norm": 0.7559247612953186, "learning_rate": 4.906463878326996e-05, "loss": 0.0748, "step": 2214 }, { "epoch": 0.9362061681453316, "grad_norm": 1.2688716650009155, "learning_rate": 4.906379383185467e-05, "loss": 0.0792, "step": 2216 }, { "epoch": 0.9370511195606253, "grad_norm": 1.3983566761016846, "learning_rate": 4.9062948880439377e-05, "loss": 0.0737, "step": 2218 }, { "epoch": 0.9378960709759189, "grad_norm": 1.3656744956970215, "learning_rate": 4.9062103929024084e-05, "loss": 0.0564, "step": 2220 }, { "epoch": 0.9387410223912125, "grad_norm": 1.1833630800247192, "learning_rate": 4.9061258977608784e-05, "loss": 0.0567, "step": 2222 }, { "epoch": 0.9395859738065061, "grad_norm": 1.6361331939697266, "learning_rate": 4.90604140261935e-05, "loss": 0.0693, "step": 2224 }, { "epoch": 0.9404309252217997, "grad_norm": 1.5020231008529663, "learning_rate": 4.90595690747782e-05, "loss": 0.0655, "step": 2226 }, { "epoch": 0.9412758766370933, "grad_norm": 1.5959280729293823, "learning_rate": 4.9058724123362906e-05, "loss": 0.1087, "step": 2228 }, { "epoch": 0.942120828052387, "grad_norm": 1.0358680486679077, "learning_rate": 4.905787917194761e-05, "loss": 0.0493, "step": 2230 }, { "epoch": 0.9429657794676806, "grad_norm": 1.8778531551361084, "learning_rate": 4.905703422053232e-05, "loss": 0.0577, "step": 2232 }, { "epoch": 0.9438107308829742, "grad_norm": 2.4077320098876953, "learning_rate": 4.905618926911703e-05, "loss": 0.0651, "step": 2234 }, { "epoch": 0.9446556822982678, "grad_norm": 1.2215629816055298, "learning_rate": 4.9055344317701735e-05, "loss": 0.0704, "step": 2236 }, { "epoch": 0.9455006337135615, "grad_norm": 0.6339534521102905, "learning_rate": 4.9054499366286435e-05, "loss": 0.0603, "step": 2238 }, { "epoch": 0.9463455851288551, "grad_norm": 1.2658920288085938, "learning_rate": 4.905365441487115e-05, "loss": 0.0615, "step": 2240 }, { "epoch": 0.9471905365441488, "grad_norm": 0.8724871277809143, "learning_rate": 4.905280946345585e-05, "loss": 0.0728, "step": 2242 }, { "epoch": 0.9480354879594424, "grad_norm": 1.5649269819259644, "learning_rate": 4.905196451204056e-05, "loss": 0.1156, "step": 2244 }, { "epoch": 0.948880439374736, "grad_norm": 1.4007797241210938, "learning_rate": 4.9051119560625264e-05, "loss": 0.0734, "step": 2246 }, { "epoch": 0.9497253907900296, "grad_norm": 0.7925835251808167, "learning_rate": 4.905027460920997e-05, "loss": 0.1002, "step": 2248 }, { "epoch": 0.9505703422053232, "grad_norm": 0.9535337090492249, "learning_rate": 4.904942965779468e-05, "loss": 0.0518, "step": 2250 }, { "epoch": 0.9514152936206168, "grad_norm": 0.983545184135437, "learning_rate": 4.9048584706379385e-05, "loss": 0.0521, "step": 2252 }, { "epoch": 0.9522602450359104, "grad_norm": 1.7866032123565674, "learning_rate": 4.904773975496409e-05, "loss": 0.0574, "step": 2254 }, { "epoch": 0.9531051964512041, "grad_norm": 1.5236202478408813, "learning_rate": 4.90468948035488e-05, "loss": 0.0679, "step": 2256 }, { "epoch": 0.9539501478664977, "grad_norm": 0.9741945266723633, "learning_rate": 4.90460498521335e-05, "loss": 0.0574, "step": 2258 }, { "epoch": 0.9547950992817913, "grad_norm": 1.6713435649871826, "learning_rate": 4.9045204900718214e-05, "loss": 0.0955, "step": 2260 }, { "epoch": 0.9556400506970849, "grad_norm": 1.0833595991134644, "learning_rate": 4.9044359949302915e-05, "loss": 0.0803, "step": 2262 }, { "epoch": 0.9564850021123785, "grad_norm": 1.2216488122940063, "learning_rate": 4.904351499788762e-05, "loss": 0.0859, "step": 2264 }, { "epoch": 0.9573299535276721, "grad_norm": 1.2390130758285522, "learning_rate": 4.904267004647233e-05, "loss": 0.0506, "step": 2266 }, { "epoch": 0.9581749049429658, "grad_norm": 1.330955982208252, "learning_rate": 4.9041825095057036e-05, "loss": 0.0526, "step": 2268 }, { "epoch": 0.9590198563582594, "grad_norm": 1.5434483289718628, "learning_rate": 4.904098014364174e-05, "loss": 0.0539, "step": 2270 }, { "epoch": 0.959864807773553, "grad_norm": 0.4926339387893677, "learning_rate": 4.904013519222645e-05, "loss": 0.051, "step": 2272 }, { "epoch": 0.9607097591888466, "grad_norm": 1.3270699977874756, "learning_rate": 4.903929024081115e-05, "loss": 0.0641, "step": 2274 }, { "epoch": 0.9615547106041402, "grad_norm": 1.2787952423095703, "learning_rate": 4.9038445289395865e-05, "loss": 0.0713, "step": 2276 }, { "epoch": 0.9623996620194338, "grad_norm": 2.050325632095337, "learning_rate": 4.9037600337980565e-05, "loss": 0.0765, "step": 2278 }, { "epoch": 0.9632446134347274, "grad_norm": 1.6342370510101318, "learning_rate": 4.903675538656527e-05, "loss": 0.1075, "step": 2280 }, { "epoch": 0.9640895648500212, "grad_norm": 1.5692452192306519, "learning_rate": 4.903591043514998e-05, "loss": 0.0795, "step": 2282 }, { "epoch": 0.9649345162653148, "grad_norm": 0.9670857787132263, "learning_rate": 4.903506548373469e-05, "loss": 0.0611, "step": 2284 }, { "epoch": 0.9657794676806084, "grad_norm": 1.2690353393554688, "learning_rate": 4.9034220532319394e-05, "loss": 0.0779, "step": 2286 }, { "epoch": 0.966624419095902, "grad_norm": 1.3158725500106812, "learning_rate": 4.90333755809041e-05, "loss": 0.0876, "step": 2288 }, { "epoch": 0.9674693705111956, "grad_norm": 1.4484848976135254, "learning_rate": 4.90325306294888e-05, "loss": 0.0851, "step": 2290 }, { "epoch": 0.9683143219264893, "grad_norm": 0.6505631804466248, "learning_rate": 4.9031685678073516e-05, "loss": 0.026, "step": 2292 }, { "epoch": 0.9691592733417829, "grad_norm": 1.601660132408142, "learning_rate": 4.9030840726658216e-05, "loss": 0.0897, "step": 2294 }, { "epoch": 0.9700042247570765, "grad_norm": 1.488582730293274, "learning_rate": 4.9029995775242923e-05, "loss": 0.1167, "step": 2296 }, { "epoch": 0.9708491761723701, "grad_norm": 2.107656955718994, "learning_rate": 4.902915082382763e-05, "loss": 0.1119, "step": 2298 }, { "epoch": 0.9716941275876637, "grad_norm": 0.8586990237236023, "learning_rate": 4.902830587241234e-05, "loss": 0.0527, "step": 2300 }, { "epoch": 0.9716941275876637, "eval_accuracy": 0.7121917571995171, "eval_cer": 0.08185643586192295, "eval_loss": 0.1810568869113922, "eval_runtime": 867.447, "eval_samples_per_second": 13.37, "eval_steps_per_second": 0.418, "step": 2300 }, { "epoch": 0.9725390790029573, "grad_norm": 1.5183982849121094, "learning_rate": 4.9027460920997045e-05, "loss": 0.0819, "step": 2302 }, { "epoch": 0.973384030418251, "grad_norm": 0.7407907247543335, "learning_rate": 4.902661596958175e-05, "loss": 0.0496, "step": 2304 }, { "epoch": 0.9742289818335446, "grad_norm": 1.159289002418518, "learning_rate": 4.902577101816645e-05, "loss": 0.0925, "step": 2306 }, { "epoch": 0.9750739332488382, "grad_norm": 1.0706584453582764, "learning_rate": 4.9024926066751167e-05, "loss": 0.0459, "step": 2308 }, { "epoch": 0.9759188846641318, "grad_norm": 1.2856115102767944, "learning_rate": 4.902408111533587e-05, "loss": 0.1012, "step": 2310 }, { "epoch": 0.9767638360794254, "grad_norm": 1.437662124633789, "learning_rate": 4.9023236163920574e-05, "loss": 0.0896, "step": 2312 }, { "epoch": 0.977608787494719, "grad_norm": 1.2910943031311035, "learning_rate": 4.902239121250528e-05, "loss": 0.0656, "step": 2314 }, { "epoch": 0.9784537389100126, "grad_norm": 1.636964201927185, "learning_rate": 4.902154626108999e-05, "loss": 0.0864, "step": 2316 }, { "epoch": 0.9792986903253063, "grad_norm": 1.5599114894866943, "learning_rate": 4.9020701309674696e-05, "loss": 0.0813, "step": 2318 }, { "epoch": 0.9801436417405999, "grad_norm": 1.6414740085601807, "learning_rate": 4.90198563582594e-05, "loss": 0.066, "step": 2320 }, { "epoch": 0.9809885931558935, "grad_norm": 0.8238911032676697, "learning_rate": 4.9019011406844103e-05, "loss": 0.0533, "step": 2322 }, { "epoch": 0.9818335445711872, "grad_norm": 1.3339018821716309, "learning_rate": 4.901816645542882e-05, "loss": 0.0765, "step": 2324 }, { "epoch": 0.9826784959864808, "grad_norm": 1.6035016775131226, "learning_rate": 4.901732150401352e-05, "loss": 0.0885, "step": 2326 }, { "epoch": 0.9835234474017744, "grad_norm": 0.8856274485588074, "learning_rate": 4.9016476552598225e-05, "loss": 0.0627, "step": 2328 }, { "epoch": 0.9843683988170681, "grad_norm": 1.3794585466384888, "learning_rate": 4.901563160118293e-05, "loss": 0.072, "step": 2330 }, { "epoch": 0.9852133502323617, "grad_norm": 1.3091775178909302, "learning_rate": 4.901478664976764e-05, "loss": 0.1265, "step": 2332 }, { "epoch": 0.9860583016476553, "grad_norm": 1.0958503484725952, "learning_rate": 4.9013941698352347e-05, "loss": 0.0409, "step": 2334 }, { "epoch": 0.9869032530629489, "grad_norm": 1.701104760169983, "learning_rate": 4.9013096746937054e-05, "loss": 0.0665, "step": 2336 }, { "epoch": 0.9877482044782425, "grad_norm": 1.8106247186660767, "learning_rate": 4.9012251795521754e-05, "loss": 0.0842, "step": 2338 }, { "epoch": 0.9885931558935361, "grad_norm": 1.0502506494522095, "learning_rate": 4.901140684410647e-05, "loss": 0.0682, "step": 2340 }, { "epoch": 0.9894381073088298, "grad_norm": 1.2314811944961548, "learning_rate": 4.901056189269117e-05, "loss": 0.0712, "step": 2342 }, { "epoch": 0.9902830587241234, "grad_norm": 1.5358272790908813, "learning_rate": 4.900971694127588e-05, "loss": 0.0675, "step": 2344 }, { "epoch": 0.991128010139417, "grad_norm": 1.1753113269805908, "learning_rate": 4.900887198986058e-05, "loss": 0.07, "step": 2346 }, { "epoch": 0.9919729615547106, "grad_norm": 1.636322021484375, "learning_rate": 4.900802703844529e-05, "loss": 0.0772, "step": 2348 }, { "epoch": 0.9928179129700042, "grad_norm": 1.526485562324524, "learning_rate": 4.900718208703e-05, "loss": 0.0975, "step": 2350 }, { "epoch": 0.9936628643852978, "grad_norm": 1.3800063133239746, "learning_rate": 4.9006337135614705e-05, "loss": 0.0984, "step": 2352 }, { "epoch": 0.9945078158005914, "grad_norm": 1.46196711063385, "learning_rate": 4.900549218419941e-05, "loss": 0.0758, "step": 2354 }, { "epoch": 0.9953527672158851, "grad_norm": 0.7219085693359375, "learning_rate": 4.900464723278412e-05, "loss": 0.0641, "step": 2356 }, { "epoch": 0.9961977186311787, "grad_norm": 1.4330068826675415, "learning_rate": 4.900380228136882e-05, "loss": 0.1131, "step": 2358 }, { "epoch": 0.9970426700464723, "grad_norm": 0.9048241972923279, "learning_rate": 4.9002957329953533e-05, "loss": 0.0586, "step": 2360 }, { "epoch": 0.9978876214617659, "grad_norm": 1.220953106880188, "learning_rate": 4.9002112378538234e-05, "loss": 0.0737, "step": 2362 }, { "epoch": 0.9987325728770595, "grad_norm": 1.8127233982086182, "learning_rate": 4.900126742712294e-05, "loss": 0.0779, "step": 2364 }, { "epoch": 0.9995775242923531, "grad_norm": 0.9549395442008972, "learning_rate": 4.900042247570765e-05, "loss": 0.0507, "step": 2366 }, { "epoch": 1.0004224757076468, "grad_norm": 1.0994274616241455, "learning_rate": 4.8999577524292355e-05, "loss": 0.0667, "step": 2368 }, { "epoch": 1.0012674271229405, "grad_norm": 1.3673089742660522, "learning_rate": 4.899873257287706e-05, "loss": 0.0409, "step": 2370 }, { "epoch": 1.002112378538234, "grad_norm": 1.2117745876312256, "learning_rate": 4.899788762146177e-05, "loss": 0.0487, "step": 2372 }, { "epoch": 1.0029573299535277, "grad_norm": 1.5104575157165527, "learning_rate": 4.899704267004647e-05, "loss": 0.0739, "step": 2374 }, { "epoch": 1.0038022813688212, "grad_norm": 1.4151710271835327, "learning_rate": 4.8996197718631184e-05, "loss": 0.0622, "step": 2376 }, { "epoch": 1.004647232784115, "grad_norm": 0.9456071853637695, "learning_rate": 4.8995352767215885e-05, "loss": 0.0614, "step": 2378 }, { "epoch": 1.0054921841994084, "grad_norm": 0.9996126890182495, "learning_rate": 4.899450781580059e-05, "loss": 0.0477, "step": 2380 }, { "epoch": 1.0063371356147022, "grad_norm": 0.665332019329071, "learning_rate": 4.89936628643853e-05, "loss": 0.033, "step": 2382 }, { "epoch": 1.0071820870299957, "grad_norm": 0.9107492566108704, "learning_rate": 4.8992817912970006e-05, "loss": 0.0541, "step": 2384 }, { "epoch": 1.0080270384452894, "grad_norm": 1.0256426334381104, "learning_rate": 4.8991972961554713e-05, "loss": 0.0438, "step": 2386 }, { "epoch": 1.008871989860583, "grad_norm": 1.5273648500442505, "learning_rate": 4.899112801013942e-05, "loss": 0.0486, "step": 2388 }, { "epoch": 1.0097169412758766, "grad_norm": 0.8203524351119995, "learning_rate": 4.899028305872412e-05, "loss": 0.0556, "step": 2390 }, { "epoch": 1.0105618926911704, "grad_norm": 1.0912214517593384, "learning_rate": 4.8989438107308835e-05, "loss": 0.0579, "step": 2392 }, { "epoch": 1.0114068441064639, "grad_norm": 1.2179914712905884, "learning_rate": 4.8988593155893535e-05, "loss": 0.0423, "step": 2394 }, { "epoch": 1.0122517955217576, "grad_norm": 0.6758914589881897, "learning_rate": 4.898774820447824e-05, "loss": 0.0311, "step": 2396 }, { "epoch": 1.013096746937051, "grad_norm": 1.940477967262268, "learning_rate": 4.898690325306295e-05, "loss": 0.0979, "step": 2398 }, { "epoch": 1.0139416983523448, "grad_norm": 1.4944899082183838, "learning_rate": 4.898605830164766e-05, "loss": 0.0532, "step": 2400 }, { "epoch": 1.0139416983523448, "eval_accuracy": 0.7180548370408691, "eval_cer": 0.07823954409520364, "eval_loss": 0.17912158370018005, "eval_runtime": 844.0986, "eval_samples_per_second": 13.74, "eval_steps_per_second": 0.43, "step": 2400 }, { "epoch": 1.0147866497676383, "grad_norm": 1.0510189533233643, "learning_rate": 4.8985213350232364e-05, "loss": 0.0689, "step": 2402 }, { "epoch": 1.015631601182932, "grad_norm": 0.9086995720863342, "learning_rate": 4.898436839881707e-05, "loss": 0.0534, "step": 2404 }, { "epoch": 1.0164765525982256, "grad_norm": 1.3393218517303467, "learning_rate": 4.898352344740177e-05, "loss": 0.0418, "step": 2406 }, { "epoch": 1.0173215040135193, "grad_norm": 0.9823423027992249, "learning_rate": 4.8982678495986486e-05, "loss": 0.0455, "step": 2408 }, { "epoch": 1.0181664554288128, "grad_norm": 0.6783949136734009, "learning_rate": 4.8981833544571186e-05, "loss": 0.0495, "step": 2410 }, { "epoch": 1.0190114068441065, "grad_norm": 1.0267900228500366, "learning_rate": 4.8980988593155893e-05, "loss": 0.0452, "step": 2412 }, { "epoch": 1.0198563582594, "grad_norm": 0.7721602320671082, "learning_rate": 4.89801436417406e-05, "loss": 0.0577, "step": 2414 }, { "epoch": 1.0207013096746937, "grad_norm": 1.2067053318023682, "learning_rate": 4.897929869032531e-05, "loss": 0.0488, "step": 2416 }, { "epoch": 1.0215462610899873, "grad_norm": 1.3156932592391968, "learning_rate": 4.8978453738910015e-05, "loss": 0.0601, "step": 2418 }, { "epoch": 1.022391212505281, "grad_norm": 1.6176562309265137, "learning_rate": 4.897760878749472e-05, "loss": 0.0538, "step": 2420 }, { "epoch": 1.0232361639205745, "grad_norm": 1.0514973402023315, "learning_rate": 4.897676383607942e-05, "loss": 0.0583, "step": 2422 }, { "epoch": 1.0240811153358682, "grad_norm": 1.4830445051193237, "learning_rate": 4.8975918884664137e-05, "loss": 0.0643, "step": 2424 }, { "epoch": 1.0249260667511617, "grad_norm": 2.010371208190918, "learning_rate": 4.897507393324884e-05, "loss": 0.0437, "step": 2426 }, { "epoch": 1.0257710181664554, "grad_norm": 0.8634136319160461, "learning_rate": 4.8974228981833544e-05, "loss": 0.0405, "step": 2428 }, { "epoch": 1.026615969581749, "grad_norm": 1.5516853332519531, "learning_rate": 4.897338403041825e-05, "loss": 0.0441, "step": 2430 }, { "epoch": 1.0274609209970427, "grad_norm": 1.314122200012207, "learning_rate": 4.897253907900296e-05, "loss": 0.0669, "step": 2432 }, { "epoch": 1.0283058724123364, "grad_norm": 1.0754412412643433, "learning_rate": 4.8971694127587666e-05, "loss": 0.0333, "step": 2434 }, { "epoch": 1.02915082382763, "grad_norm": 0.8331787586212158, "learning_rate": 4.897084917617237e-05, "loss": 0.066, "step": 2436 }, { "epoch": 1.0299957752429236, "grad_norm": 1.0247435569763184, "learning_rate": 4.8970004224757073e-05, "loss": 0.0465, "step": 2438 }, { "epoch": 1.0308407266582171, "grad_norm": 1.3126165866851807, "learning_rate": 4.896915927334179e-05, "loss": 0.0551, "step": 2440 }, { "epoch": 1.0316856780735109, "grad_norm": 1.8739526271820068, "learning_rate": 4.896831432192649e-05, "loss": 0.0519, "step": 2442 }, { "epoch": 1.0325306294888044, "grad_norm": 1.7847691774368286, "learning_rate": 4.89674693705112e-05, "loss": 0.057, "step": 2444 }, { "epoch": 1.033375580904098, "grad_norm": 1.296621322631836, "learning_rate": 4.89666244190959e-05, "loss": 0.0829, "step": 2446 }, { "epoch": 1.0342205323193916, "grad_norm": 1.0262562036514282, "learning_rate": 4.896577946768061e-05, "loss": 0.0656, "step": 2448 }, { "epoch": 1.0350654837346853, "grad_norm": 1.4114569425582886, "learning_rate": 4.896493451626532e-05, "loss": 0.063, "step": 2450 }, { "epoch": 1.0359104351499788, "grad_norm": 1.8110418319702148, "learning_rate": 4.8964089564850024e-05, "loss": 0.0508, "step": 2452 }, { "epoch": 1.0367553865652726, "grad_norm": 1.9191076755523682, "learning_rate": 4.896324461343473e-05, "loss": 0.0761, "step": 2454 }, { "epoch": 1.037600337980566, "grad_norm": 1.3961220979690552, "learning_rate": 4.896239966201944e-05, "loss": 0.061, "step": 2456 }, { "epoch": 1.0384452893958598, "grad_norm": 0.8220006823539734, "learning_rate": 4.896155471060414e-05, "loss": 0.0524, "step": 2458 }, { "epoch": 1.0392902408111533, "grad_norm": 1.0066295862197876, "learning_rate": 4.896070975918885e-05, "loss": 0.0468, "step": 2460 }, { "epoch": 1.040135192226447, "grad_norm": 1.067622184753418, "learning_rate": 4.895986480777355e-05, "loss": 0.0476, "step": 2462 }, { "epoch": 1.0409801436417405, "grad_norm": 2.0356531143188477, "learning_rate": 4.895901985635826e-05, "loss": 0.0969, "step": 2464 }, { "epoch": 1.0418250950570342, "grad_norm": 2.8178772926330566, "learning_rate": 4.895817490494297e-05, "loss": 0.0532, "step": 2466 }, { "epoch": 1.0426700464723277, "grad_norm": 2.3388311862945557, "learning_rate": 4.8957329953527675e-05, "loss": 0.0768, "step": 2468 }, { "epoch": 1.0435149978876215, "grad_norm": 0.9694822430610657, "learning_rate": 4.895648500211238e-05, "loss": 0.0359, "step": 2470 }, { "epoch": 1.044359949302915, "grad_norm": 1.434303879737854, "learning_rate": 4.895564005069709e-05, "loss": 0.0581, "step": 2472 }, { "epoch": 1.0452049007182087, "grad_norm": 1.204118013381958, "learning_rate": 4.895479509928179e-05, "loss": 0.0881, "step": 2474 }, { "epoch": 1.0460498521335024, "grad_norm": 1.4074668884277344, "learning_rate": 4.8953950147866503e-05, "loss": 0.0411, "step": 2476 }, { "epoch": 1.046894803548796, "grad_norm": 1.492323637008667, "learning_rate": 4.8953105196451204e-05, "loss": 0.0401, "step": 2478 }, { "epoch": 1.0477397549640897, "grad_norm": 1.0911036729812622, "learning_rate": 4.895226024503591e-05, "loss": 0.0441, "step": 2480 }, { "epoch": 1.0485847063793832, "grad_norm": 1.6560875177383423, "learning_rate": 4.895141529362062e-05, "loss": 0.0521, "step": 2482 }, { "epoch": 1.049429657794677, "grad_norm": 1.2704576253890991, "learning_rate": 4.8950570342205325e-05, "loss": 0.064, "step": 2484 }, { "epoch": 1.0502746092099704, "grad_norm": 0.9503681063652039, "learning_rate": 4.894972539079003e-05, "loss": 0.0467, "step": 2486 }, { "epoch": 1.0511195606252641, "grad_norm": 0.7836410403251648, "learning_rate": 4.894888043937474e-05, "loss": 0.0411, "step": 2488 }, { "epoch": 1.0519645120405576, "grad_norm": 1.5865596532821655, "learning_rate": 4.894803548795944e-05, "loss": 0.0583, "step": 2490 }, { "epoch": 1.0528094634558514, "grad_norm": 1.2086788415908813, "learning_rate": 4.8947190536544154e-05, "loss": 0.0408, "step": 2492 }, { "epoch": 1.0536544148711449, "grad_norm": 0.8142328262329102, "learning_rate": 4.8946345585128855e-05, "loss": 0.0322, "step": 2494 }, { "epoch": 1.0544993662864386, "grad_norm": 0.7533618211746216, "learning_rate": 4.894550063371356e-05, "loss": 0.0454, "step": 2496 }, { "epoch": 1.055344317701732, "grad_norm": 0.9661786556243896, "learning_rate": 4.894465568229827e-05, "loss": 0.0479, "step": 2498 }, { "epoch": 1.0561892691170258, "grad_norm": 1.2609660625457764, "learning_rate": 4.8943810730882976e-05, "loss": 0.0653, "step": 2500 }, { "epoch": 1.0561892691170258, "eval_accuracy": 0.7173650629418865, "eval_cer": 0.08063904302336865, "eval_loss": 0.17944836616516113, "eval_runtime": 861.1084, "eval_samples_per_second": 13.469, "eval_steps_per_second": 0.422, "step": 2500 }, { "epoch": 1.0570342205323193, "grad_norm": 0.6538660526275635, "learning_rate": 4.8942965779467683e-05, "loss": 0.0483, "step": 2502 }, { "epoch": 1.057879171947613, "grad_norm": 0.7036990523338318, "learning_rate": 4.894212082805239e-05, "loss": 0.0314, "step": 2504 }, { "epoch": 1.0587241233629066, "grad_norm": 1.2298040390014648, "learning_rate": 4.894127587663709e-05, "loss": 0.0485, "step": 2506 }, { "epoch": 1.0595690747782003, "grad_norm": 0.9623536467552185, "learning_rate": 4.8940430925221805e-05, "loss": 0.0521, "step": 2508 }, { "epoch": 1.0604140261934938, "grad_norm": 1.2405459880828857, "learning_rate": 4.8939585973806505e-05, "loss": 0.0592, "step": 2510 }, { "epoch": 1.0612589776087875, "grad_norm": 0.9672164916992188, "learning_rate": 4.893874102239121e-05, "loss": 0.0688, "step": 2512 }, { "epoch": 1.062103929024081, "grad_norm": 1.3323535919189453, "learning_rate": 4.893789607097592e-05, "loss": 0.0475, "step": 2514 }, { "epoch": 1.0629488804393747, "grad_norm": 0.9052666425704956, "learning_rate": 4.893705111956063e-05, "loss": 0.0275, "step": 2516 }, { "epoch": 1.0637938318546682, "grad_norm": 0.8641655445098877, "learning_rate": 4.8936206168145334e-05, "loss": 0.0344, "step": 2518 }, { "epoch": 1.064638783269962, "grad_norm": 1.5869804620742798, "learning_rate": 4.893536121673004e-05, "loss": 0.0674, "step": 2520 }, { "epoch": 1.0654837346852557, "grad_norm": 1.136039137840271, "learning_rate": 4.893451626531474e-05, "loss": 0.0507, "step": 2522 }, { "epoch": 1.0663286861005492, "grad_norm": 2.1299498081207275, "learning_rate": 4.8933671313899456e-05, "loss": 0.0856, "step": 2524 }, { "epoch": 1.067173637515843, "grad_norm": 1.477407693862915, "learning_rate": 4.8932826362484156e-05, "loss": 0.0644, "step": 2526 }, { "epoch": 1.0680185889311364, "grad_norm": 1.1373028755187988, "learning_rate": 4.8931981411068863e-05, "loss": 0.0342, "step": 2528 }, { "epoch": 1.0688635403464302, "grad_norm": 1.0594611167907715, "learning_rate": 4.893113645965357e-05, "loss": 0.0743, "step": 2530 }, { "epoch": 1.0697084917617237, "grad_norm": 1.1284602880477905, "learning_rate": 4.893029150823828e-05, "loss": 0.0621, "step": 2532 }, { "epoch": 1.0705534431770174, "grad_norm": 1.341365933418274, "learning_rate": 4.8929446556822985e-05, "loss": 0.0596, "step": 2534 }, { "epoch": 1.071398394592311, "grad_norm": 1.3680822849273682, "learning_rate": 4.892860160540769e-05, "loss": 0.0457, "step": 2536 }, { "epoch": 1.0722433460076046, "grad_norm": 1.4528318643569946, "learning_rate": 4.892775665399239e-05, "loss": 0.0551, "step": 2538 }, { "epoch": 1.0730882974228981, "grad_norm": 1.788142442703247, "learning_rate": 4.892691170257711e-05, "loss": 0.0783, "step": 2540 }, { "epoch": 1.0739332488381919, "grad_norm": 1.1063508987426758, "learning_rate": 4.892606675116181e-05, "loss": 0.0448, "step": 2542 }, { "epoch": 1.0747782002534854, "grad_norm": 1.275145173072815, "learning_rate": 4.892522179974652e-05, "loss": 0.0513, "step": 2544 }, { "epoch": 1.075623151668779, "grad_norm": 1.1520663499832153, "learning_rate": 4.892437684833122e-05, "loss": 0.0384, "step": 2546 }, { "epoch": 1.0764681030840726, "grad_norm": 1.0242836475372314, "learning_rate": 4.892353189691593e-05, "loss": 0.0444, "step": 2548 }, { "epoch": 1.0773130544993663, "grad_norm": 1.0930004119873047, "learning_rate": 4.8922686945500636e-05, "loss": 0.0381, "step": 2550 }, { "epoch": 1.0781580059146598, "grad_norm": 1.1523323059082031, "learning_rate": 4.892184199408534e-05, "loss": 0.0565, "step": 2552 }, { "epoch": 1.0790029573299535, "grad_norm": 1.0468764305114746, "learning_rate": 4.892099704267005e-05, "loss": 0.0436, "step": 2554 }, { "epoch": 1.079847908745247, "grad_norm": 1.1597340106964111, "learning_rate": 4.892015209125476e-05, "loss": 0.0477, "step": 2556 }, { "epoch": 1.0806928601605408, "grad_norm": 1.3584051132202148, "learning_rate": 4.891930713983946e-05, "loss": 0.0652, "step": 2558 }, { "epoch": 1.0815378115758345, "grad_norm": 0.9354948401451111, "learning_rate": 4.891846218842417e-05, "loss": 0.0463, "step": 2560 }, { "epoch": 1.082382762991128, "grad_norm": 1.0756053924560547, "learning_rate": 4.891761723700887e-05, "loss": 0.0427, "step": 2562 }, { "epoch": 1.0832277144064215, "grad_norm": 1.3494528532028198, "learning_rate": 4.891677228559358e-05, "loss": 0.0559, "step": 2564 }, { "epoch": 1.0840726658217152, "grad_norm": 3.9756555557250977, "learning_rate": 4.891592733417829e-05, "loss": 0.0671, "step": 2566 }, { "epoch": 1.084917617237009, "grad_norm": 1.4587855339050293, "learning_rate": 4.8915082382762994e-05, "loss": 0.042, "step": 2568 }, { "epoch": 1.0857625686523025, "grad_norm": 1.1794617176055908, "learning_rate": 4.89142374313477e-05, "loss": 0.0722, "step": 2570 }, { "epoch": 1.0866075200675962, "grad_norm": 1.3769999742507935, "learning_rate": 4.891339247993241e-05, "loss": 0.0846, "step": 2572 }, { "epoch": 1.0874524714828897, "grad_norm": 0.7593065500259399, "learning_rate": 4.891254752851711e-05, "loss": 0.0436, "step": 2574 }, { "epoch": 1.0882974228981834, "grad_norm": 1.1376982927322388, "learning_rate": 4.891170257710182e-05, "loss": 0.048, "step": 2576 }, { "epoch": 1.089142374313477, "grad_norm": 0.9180907011032104, "learning_rate": 4.891085762568652e-05, "loss": 0.0582, "step": 2578 }, { "epoch": 1.0899873257287707, "grad_norm": 1.5679020881652832, "learning_rate": 4.891001267427123e-05, "loss": 0.0565, "step": 2580 }, { "epoch": 1.0908322771440642, "grad_norm": 1.0620578527450562, "learning_rate": 4.890916772285594e-05, "loss": 0.0823, "step": 2582 }, { "epoch": 1.091677228559358, "grad_norm": 0.9624361395835876, "learning_rate": 4.8908322771440645e-05, "loss": 0.0332, "step": 2584 }, { "epoch": 1.0925221799746514, "grad_norm": 1.1361215114593506, "learning_rate": 4.890747782002535e-05, "loss": 0.0579, "step": 2586 }, { "epoch": 1.0933671313899451, "grad_norm": 1.0327388048171997, "learning_rate": 4.890663286861006e-05, "loss": 0.0408, "step": 2588 }, { "epoch": 1.0942120828052386, "grad_norm": 1.140649676322937, "learning_rate": 4.890578791719476e-05, "loss": 0.0414, "step": 2590 }, { "epoch": 1.0950570342205324, "grad_norm": 1.0419838428497314, "learning_rate": 4.8904942965779473e-05, "loss": 0.0517, "step": 2592 }, { "epoch": 1.0959019856358259, "grad_norm": 0.7673367261886597, "learning_rate": 4.8904098014364174e-05, "loss": 0.0279, "step": 2594 }, { "epoch": 1.0967469370511196, "grad_norm": 1.4603475332260132, "learning_rate": 4.890325306294888e-05, "loss": 0.0661, "step": 2596 }, { "epoch": 1.097591888466413, "grad_norm": 0.6877373456954956, "learning_rate": 4.890240811153359e-05, "loss": 0.0309, "step": 2598 }, { "epoch": 1.0984368398817068, "grad_norm": 0.6919780969619751, "learning_rate": 4.8901563160118295e-05, "loss": 0.045, "step": 2600 }, { "epoch": 1.0984368398817068, "eval_accuracy": 0.7184859458527332, "eval_cer": 0.07980098273595808, "eval_loss": 0.1835366040468216, "eval_runtime": 857.7507, "eval_samples_per_second": 13.521, "eval_steps_per_second": 0.423, "step": 2600 }, { "epoch": 1.0992817912970003, "grad_norm": 1.1517709493637085, "learning_rate": 4.8900718208703e-05, "loss": 0.0561, "step": 2602 }, { "epoch": 1.100126742712294, "grad_norm": 1.0358161926269531, "learning_rate": 4.889987325728771e-05, "loss": 0.046, "step": 2604 }, { "epoch": 1.1009716941275878, "grad_norm": 1.4217817783355713, "learning_rate": 4.889902830587241e-05, "loss": 0.073, "step": 2606 }, { "epoch": 1.1018166455428813, "grad_norm": 1.2074991464614868, "learning_rate": 4.8898183354457124e-05, "loss": 0.0871, "step": 2608 }, { "epoch": 1.102661596958175, "grad_norm": 2.3143765926361084, "learning_rate": 4.8897338403041825e-05, "loss": 0.0497, "step": 2610 }, { "epoch": 1.1035065483734685, "grad_norm": 1.1069965362548828, "learning_rate": 4.889649345162653e-05, "loss": 0.0511, "step": 2612 }, { "epoch": 1.1043514997887622, "grad_norm": 0.9317350387573242, "learning_rate": 4.889564850021124e-05, "loss": 0.0506, "step": 2614 }, { "epoch": 1.1051964512040557, "grad_norm": 0.9004279971122742, "learning_rate": 4.8894803548795946e-05, "loss": 0.0338, "step": 2616 }, { "epoch": 1.1060414026193495, "grad_norm": 2.0052106380462646, "learning_rate": 4.8893958597380653e-05, "loss": 0.0945, "step": 2618 }, { "epoch": 1.106886354034643, "grad_norm": 1.1178474426269531, "learning_rate": 4.889311364596536e-05, "loss": 0.0474, "step": 2620 }, { "epoch": 1.1077313054499367, "grad_norm": 0.9016062617301941, "learning_rate": 4.889226869455006e-05, "loss": 0.0391, "step": 2622 }, { "epoch": 1.1085762568652302, "grad_norm": 1.6335241794586182, "learning_rate": 4.8891423743134775e-05, "loss": 0.0487, "step": 2624 }, { "epoch": 1.109421208280524, "grad_norm": 1.228885293006897, "learning_rate": 4.8890578791719475e-05, "loss": 0.0529, "step": 2626 }, { "epoch": 1.1102661596958174, "grad_norm": 0.7745740413665771, "learning_rate": 4.888973384030418e-05, "loss": 0.0231, "step": 2628 }, { "epoch": 1.1111111111111112, "grad_norm": 1.8813176155090332, "learning_rate": 4.888888888888889e-05, "loss": 0.054, "step": 2630 }, { "epoch": 1.1119560625264047, "grad_norm": 1.0765502452850342, "learning_rate": 4.88880439374736e-05, "loss": 0.0518, "step": 2632 }, { "epoch": 1.1128010139416984, "grad_norm": 1.6047254800796509, "learning_rate": 4.8887198986058304e-05, "loss": 0.0632, "step": 2634 }, { "epoch": 1.113645965356992, "grad_norm": 0.8298768997192383, "learning_rate": 4.888635403464301e-05, "loss": 0.031, "step": 2636 }, { "epoch": 1.1144909167722856, "grad_norm": 0.9682705402374268, "learning_rate": 4.888550908322771e-05, "loss": 0.0421, "step": 2638 }, { "epoch": 1.1153358681875791, "grad_norm": 0.9020190834999084, "learning_rate": 4.8884664131812426e-05, "loss": 0.0587, "step": 2640 }, { "epoch": 1.1161808196028729, "grad_norm": 1.5844128131866455, "learning_rate": 4.8883819180397126e-05, "loss": 0.0633, "step": 2642 }, { "epoch": 1.1170257710181664, "grad_norm": 1.0159263610839844, "learning_rate": 4.888297422898184e-05, "loss": 0.0635, "step": 2644 }, { "epoch": 1.11787072243346, "grad_norm": 1.2040449380874634, "learning_rate": 4.888212927756654e-05, "loss": 0.0546, "step": 2646 }, { "epoch": 1.1187156738487536, "grad_norm": 1.333282232284546, "learning_rate": 4.888128432615125e-05, "loss": 0.0415, "step": 2648 }, { "epoch": 1.1195606252640473, "grad_norm": 0.9611084461212158, "learning_rate": 4.8880439374735955e-05, "loss": 0.0467, "step": 2650 }, { "epoch": 1.120405576679341, "grad_norm": 1.6842930316925049, "learning_rate": 4.887959442332066e-05, "loss": 0.0573, "step": 2652 }, { "epoch": 1.1212505280946345, "grad_norm": 1.4707005023956299, "learning_rate": 4.887874947190537e-05, "loss": 0.0691, "step": 2654 }, { "epoch": 1.1220954795099283, "grad_norm": 1.7009764909744263, "learning_rate": 4.887790452049008e-05, "loss": 0.0526, "step": 2656 }, { "epoch": 1.1229404309252218, "grad_norm": 0.4141417145729065, "learning_rate": 4.887705956907478e-05, "loss": 0.0406, "step": 2658 }, { "epoch": 1.1237853823405155, "grad_norm": 1.2767736911773682, "learning_rate": 4.887621461765949e-05, "loss": 0.0599, "step": 2660 }, { "epoch": 1.124630333755809, "grad_norm": 1.0069574117660522, "learning_rate": 4.887536966624419e-05, "loss": 0.0733, "step": 2662 }, { "epoch": 1.1254752851711027, "grad_norm": 1.3213729858398438, "learning_rate": 4.88745247148289e-05, "loss": 0.0641, "step": 2664 }, { "epoch": 1.1263202365863962, "grad_norm": 0.9671632051467896, "learning_rate": 4.8873679763413606e-05, "loss": 0.065, "step": 2666 }, { "epoch": 1.12716518800169, "grad_norm": 1.066148042678833, "learning_rate": 4.887283481199831e-05, "loss": 0.0689, "step": 2668 }, { "epoch": 1.1280101394169835, "grad_norm": 1.6266218423843384, "learning_rate": 4.887198986058302e-05, "loss": 0.0444, "step": 2670 }, { "epoch": 1.1288550908322772, "grad_norm": 0.7963671088218689, "learning_rate": 4.887114490916773e-05, "loss": 0.0423, "step": 2672 }, { "epoch": 1.1297000422475707, "grad_norm": 5.003674507141113, "learning_rate": 4.887029995775243e-05, "loss": 0.0406, "step": 2674 }, { "epoch": 1.1305449936628644, "grad_norm": 1.4064865112304688, "learning_rate": 4.886945500633714e-05, "loss": 0.0567, "step": 2676 }, { "epoch": 1.131389945078158, "grad_norm": 1.2468315362930298, "learning_rate": 4.886861005492184e-05, "loss": 0.0517, "step": 2678 }, { "epoch": 1.1322348964934517, "grad_norm": 1.068264365196228, "learning_rate": 4.886776510350655e-05, "loss": 0.0442, "step": 2680 }, { "epoch": 1.1330798479087452, "grad_norm": 1.4150062799453735, "learning_rate": 4.886692015209126e-05, "loss": 0.0397, "step": 2682 }, { "epoch": 1.133924799324039, "grad_norm": 1.4678819179534912, "learning_rate": 4.8866075200675964e-05, "loss": 0.061, "step": 2684 }, { "epoch": 1.1347697507393324, "grad_norm": 1.3416491746902466, "learning_rate": 4.886523024926067e-05, "loss": 0.0644, "step": 2686 }, { "epoch": 1.1356147021546261, "grad_norm": 1.5595964193344116, "learning_rate": 4.886438529784538e-05, "loss": 0.051, "step": 2688 }, { "epoch": 1.1364596535699198, "grad_norm": 0.7982243895530701, "learning_rate": 4.886354034643008e-05, "loss": 0.056, "step": 2690 }, { "epoch": 1.1373046049852134, "grad_norm": 1.517142415046692, "learning_rate": 4.886269539501479e-05, "loss": 0.0659, "step": 2692 }, { "epoch": 1.1381495564005069, "grad_norm": 0.7119234800338745, "learning_rate": 4.886185044359949e-05, "loss": 0.0539, "step": 2694 }, { "epoch": 1.1389945078158006, "grad_norm": 1.0239163637161255, "learning_rate": 4.88610054921842e-05, "loss": 0.0475, "step": 2696 }, { "epoch": 1.1398394592310943, "grad_norm": 1.3597328662872314, "learning_rate": 4.886016054076891e-05, "loss": 0.0515, "step": 2698 }, { "epoch": 1.1406844106463878, "grad_norm": 0.914358377456665, "learning_rate": 4.8859315589353615e-05, "loss": 0.0496, "step": 2700 }, { "epoch": 1.1406844106463878, "eval_accuracy": 0.7282290050008622, "eval_cer": 0.07597237047557716, "eval_loss": 0.1734546571969986, "eval_runtime": 866.2492, "eval_samples_per_second": 13.389, "eval_steps_per_second": 0.419, "step": 2700 }, { "epoch": 1.1415293620616815, "grad_norm": 1.1831682920455933, "learning_rate": 4.885847063793832e-05, "loss": 0.0741, "step": 2702 }, { "epoch": 1.142374313476975, "grad_norm": 0.8696686029434204, "learning_rate": 4.885762568652303e-05, "loss": 0.0446, "step": 2704 }, { "epoch": 1.1432192648922688, "grad_norm": 0.9323683977127075, "learning_rate": 4.885678073510773e-05, "loss": 0.0605, "step": 2706 }, { "epoch": 1.1440642163075623, "grad_norm": 1.124110460281372, "learning_rate": 4.8855935783692443e-05, "loss": 0.036, "step": 2708 }, { "epoch": 1.144909167722856, "grad_norm": 1.1443071365356445, "learning_rate": 4.8855090832277144e-05, "loss": 0.035, "step": 2710 }, { "epoch": 1.1457541191381495, "grad_norm": 0.8278305530548096, "learning_rate": 4.885424588086185e-05, "loss": 0.0369, "step": 2712 }, { "epoch": 1.1465990705534432, "grad_norm": 1.1934250593185425, "learning_rate": 4.885340092944656e-05, "loss": 0.0539, "step": 2714 }, { "epoch": 1.1474440219687367, "grad_norm": 1.858482003211975, "learning_rate": 4.8852555978031265e-05, "loss": 0.0523, "step": 2716 }, { "epoch": 1.1482889733840305, "grad_norm": 1.45504891872406, "learning_rate": 4.885171102661597e-05, "loss": 0.0532, "step": 2718 }, { "epoch": 1.149133924799324, "grad_norm": 1.0459725856781006, "learning_rate": 4.885086607520068e-05, "loss": 0.0487, "step": 2720 }, { "epoch": 1.1499788762146177, "grad_norm": 2.038820743560791, "learning_rate": 4.885002112378538e-05, "loss": 0.0513, "step": 2722 }, { "epoch": 1.1508238276299112, "grad_norm": 0.9835671186447144, "learning_rate": 4.8849176172370094e-05, "loss": 0.0536, "step": 2724 }, { "epoch": 1.151668779045205, "grad_norm": 1.1258163452148438, "learning_rate": 4.8848331220954795e-05, "loss": 0.0583, "step": 2726 }, { "epoch": 1.1525137304604984, "grad_norm": 0.7174440026283264, "learning_rate": 4.88474862695395e-05, "loss": 0.0283, "step": 2728 }, { "epoch": 1.1533586818757922, "grad_norm": 1.195196509361267, "learning_rate": 4.884664131812421e-05, "loss": 0.0518, "step": 2730 }, { "epoch": 1.1542036332910857, "grad_norm": 0.956576943397522, "learning_rate": 4.8845796366708916e-05, "loss": 0.0562, "step": 2732 }, { "epoch": 1.1550485847063794, "grad_norm": 1.3628367185592651, "learning_rate": 4.8844951415293623e-05, "loss": 0.0548, "step": 2734 }, { "epoch": 1.1558935361216731, "grad_norm": 0.6316198110580444, "learning_rate": 4.884410646387833e-05, "loss": 0.0293, "step": 2736 }, { "epoch": 1.1567384875369666, "grad_norm": 0.6486905217170715, "learning_rate": 4.884326151246303e-05, "loss": 0.043, "step": 2738 }, { "epoch": 1.1575834389522601, "grad_norm": 1.5592443943023682, "learning_rate": 4.8842416561047745e-05, "loss": 0.0589, "step": 2740 }, { "epoch": 1.1584283903675539, "grad_norm": 1.3737517595291138, "learning_rate": 4.8841571609632446e-05, "loss": 0.0638, "step": 2742 }, { "epoch": 1.1592733417828476, "grad_norm": 0.8782877922058105, "learning_rate": 4.884072665821716e-05, "loss": 0.0582, "step": 2744 }, { "epoch": 1.160118293198141, "grad_norm": 0.960125207901001, "learning_rate": 4.883988170680186e-05, "loss": 0.0655, "step": 2746 }, { "epoch": 1.1609632446134348, "grad_norm": 1.4179805517196655, "learning_rate": 4.883903675538657e-05, "loss": 0.0614, "step": 2748 }, { "epoch": 1.1618081960287283, "grad_norm": 0.8707264065742493, "learning_rate": 4.8838191803971274e-05, "loss": 0.0385, "step": 2750 }, { "epoch": 1.162653147444022, "grad_norm": 1.0426108837127686, "learning_rate": 4.883734685255598e-05, "loss": 0.07, "step": 2752 }, { "epoch": 1.1634980988593155, "grad_norm": 1.4466257095336914, "learning_rate": 4.883650190114069e-05, "loss": 0.0584, "step": 2754 }, { "epoch": 1.1643430502746093, "grad_norm": 1.018989086151123, "learning_rate": 4.8835656949725396e-05, "loss": 0.0623, "step": 2756 }, { "epoch": 1.1651880016899028, "grad_norm": 1.6744930744171143, "learning_rate": 4.8834811998310096e-05, "loss": 0.0807, "step": 2758 }, { "epoch": 1.1660329531051965, "grad_norm": 1.2810227870941162, "learning_rate": 4.883396704689481e-05, "loss": 0.0462, "step": 2760 }, { "epoch": 1.16687790452049, "grad_norm": 0.7232649326324463, "learning_rate": 4.883312209547951e-05, "loss": 0.0473, "step": 2762 }, { "epoch": 1.1677228559357837, "grad_norm": 1.4324157238006592, "learning_rate": 4.883227714406422e-05, "loss": 0.0736, "step": 2764 }, { "epoch": 1.1685678073510772, "grad_norm": 0.6245022416114807, "learning_rate": 4.8831432192648925e-05, "loss": 0.0281, "step": 2766 }, { "epoch": 1.169412758766371, "grad_norm": 1.720564842224121, "learning_rate": 4.883058724123363e-05, "loss": 0.0831, "step": 2768 }, { "epoch": 1.1702577101816645, "grad_norm": 1.7815865278244019, "learning_rate": 4.882974228981834e-05, "loss": 0.0448, "step": 2770 }, { "epoch": 1.1711026615969582, "grad_norm": 1.1956509351730347, "learning_rate": 4.882889733840305e-05, "loss": 0.0696, "step": 2772 }, { "epoch": 1.1719476130122517, "grad_norm": 0.9493592381477356, "learning_rate": 4.882805238698775e-05, "loss": 0.0671, "step": 2774 }, { "epoch": 1.1727925644275454, "grad_norm": 0.7172707319259644, "learning_rate": 4.882720743557246e-05, "loss": 0.0495, "step": 2776 }, { "epoch": 1.173637515842839, "grad_norm": 1.3545876741409302, "learning_rate": 4.882636248415716e-05, "loss": 0.0609, "step": 2778 }, { "epoch": 1.1744824672581327, "grad_norm": 0.9758241772651672, "learning_rate": 4.882551753274187e-05, "loss": 0.0454, "step": 2780 }, { "epoch": 1.1753274186734264, "grad_norm": 1.241162657737732, "learning_rate": 4.8824672581326576e-05, "loss": 0.0447, "step": 2782 }, { "epoch": 1.1761723700887199, "grad_norm": 1.0415271520614624, "learning_rate": 4.882382762991128e-05, "loss": 0.0389, "step": 2784 }, { "epoch": 1.1770173215040136, "grad_norm": 1.916858196258545, "learning_rate": 4.882298267849599e-05, "loss": 0.0658, "step": 2786 }, { "epoch": 1.1778622729193071, "grad_norm": 1.2773157358169556, "learning_rate": 4.88221377270807e-05, "loss": 0.0654, "step": 2788 }, { "epoch": 1.1787072243346008, "grad_norm": 1.5616599321365356, "learning_rate": 4.88212927756654e-05, "loss": 0.0643, "step": 2790 }, { "epoch": 1.1795521757498943, "grad_norm": 1.3588258028030396, "learning_rate": 4.882044782425011e-05, "loss": 0.0546, "step": 2792 }, { "epoch": 1.180397127165188, "grad_norm": 0.8794555068016052, "learning_rate": 4.881960287283481e-05, "loss": 0.0537, "step": 2794 }, { "epoch": 1.1812420785804816, "grad_norm": 1.2843623161315918, "learning_rate": 4.881875792141952e-05, "loss": 0.0349, "step": 2796 }, { "epoch": 1.1820870299957753, "grad_norm": 1.0971851348876953, "learning_rate": 4.881791297000423e-05, "loss": 0.0529, "step": 2798 }, { "epoch": 1.1829319814110688, "grad_norm": 1.3400644063949585, "learning_rate": 4.8817068018588934e-05, "loss": 0.0555, "step": 2800 }, { "epoch": 1.1829319814110688, "eval_accuracy": 0.7257285738920504, "eval_cer": 0.07709272475453656, "eval_loss": 0.1766710728406906, "eval_runtime": 853.1559, "eval_samples_per_second": 13.594, "eval_steps_per_second": 0.425, "step": 2800 }, { "epoch": 1.1837769328263625, "grad_norm": 0.753764271736145, "learning_rate": 4.881622306717364e-05, "loss": 0.0612, "step": 2802 }, { "epoch": 1.184621884241656, "grad_norm": 3.2383697032928467, "learning_rate": 4.881537811575835e-05, "loss": 0.0604, "step": 2804 }, { "epoch": 1.1854668356569498, "grad_norm": 0.9122393727302551, "learning_rate": 4.881453316434305e-05, "loss": 0.0318, "step": 2806 }, { "epoch": 1.1863117870722433, "grad_norm": 1.2476575374603271, "learning_rate": 4.881368821292776e-05, "loss": 0.0407, "step": 2808 }, { "epoch": 1.187156738487537, "grad_norm": 1.0261362791061401, "learning_rate": 4.881284326151246e-05, "loss": 0.0529, "step": 2810 }, { "epoch": 1.1880016899028305, "grad_norm": 1.278996467590332, "learning_rate": 4.881199831009717e-05, "loss": 0.0606, "step": 2812 }, { "epoch": 1.1888466413181242, "grad_norm": 1.2511422634124756, "learning_rate": 4.881115335868188e-05, "loss": 0.0555, "step": 2814 }, { "epoch": 1.1896915927334177, "grad_norm": 0.9766137599945068, "learning_rate": 4.8810308407266585e-05, "loss": 0.0435, "step": 2816 }, { "epoch": 1.1905365441487115, "grad_norm": 1.5147414207458496, "learning_rate": 4.880946345585129e-05, "loss": 0.0418, "step": 2818 }, { "epoch": 1.1913814955640052, "grad_norm": 1.3996652364730835, "learning_rate": 4.8808618504436e-05, "loss": 0.0914, "step": 2820 }, { "epoch": 1.1922264469792987, "grad_norm": 1.1199573278427124, "learning_rate": 4.88077735530207e-05, "loss": 0.0549, "step": 2822 }, { "epoch": 1.1930713983945922, "grad_norm": 0.946550726890564, "learning_rate": 4.8806928601605414e-05, "loss": 0.0356, "step": 2824 }, { "epoch": 1.193916349809886, "grad_norm": 0.9064375162124634, "learning_rate": 4.8806083650190114e-05, "loss": 0.0669, "step": 2826 }, { "epoch": 1.1947613012251797, "grad_norm": 1.4223860502243042, "learning_rate": 4.880523869877482e-05, "loss": 0.0581, "step": 2828 }, { "epoch": 1.1956062526404732, "grad_norm": 1.123694658279419, "learning_rate": 4.880439374735953e-05, "loss": 0.0437, "step": 2830 }, { "epoch": 1.1964512040557669, "grad_norm": 1.1511688232421875, "learning_rate": 4.8803548795944236e-05, "loss": 0.056, "step": 2832 }, { "epoch": 1.1972961554710604, "grad_norm": 1.4350991249084473, "learning_rate": 4.880270384452894e-05, "loss": 0.044, "step": 2834 }, { "epoch": 1.1981411068863541, "grad_norm": 1.211211919784546, "learning_rate": 4.880185889311365e-05, "loss": 0.0378, "step": 2836 }, { "epoch": 1.1989860583016476, "grad_norm": 1.3658467531204224, "learning_rate": 4.880101394169835e-05, "loss": 0.0848, "step": 2838 }, { "epoch": 1.1998310097169413, "grad_norm": 1.6708577871322632, "learning_rate": 4.8800168990283064e-05, "loss": 0.0461, "step": 2840 }, { "epoch": 1.2006759611322348, "grad_norm": 1.1294584274291992, "learning_rate": 4.8799324038867765e-05, "loss": 0.0485, "step": 2842 }, { "epoch": 1.2015209125475286, "grad_norm": 1.5034394264221191, "learning_rate": 4.879847908745248e-05, "loss": 0.0611, "step": 2844 }, { "epoch": 1.202365863962822, "grad_norm": 1.4616371393203735, "learning_rate": 4.879763413603718e-05, "loss": 0.0576, "step": 2846 }, { "epoch": 1.2032108153781158, "grad_norm": 1.160904049873352, "learning_rate": 4.8796789184621886e-05, "loss": 0.0682, "step": 2848 }, { "epoch": 1.2040557667934093, "grad_norm": 0.753709614276886, "learning_rate": 4.8795944233206594e-05, "loss": 0.0333, "step": 2850 }, { "epoch": 1.204900718208703, "grad_norm": 1.6820114850997925, "learning_rate": 4.87950992817913e-05, "loss": 0.0587, "step": 2852 }, { "epoch": 1.2057456696239965, "grad_norm": 1.2073854207992554, "learning_rate": 4.879425433037601e-05, "loss": 0.0678, "step": 2854 }, { "epoch": 1.2065906210392903, "grad_norm": 2.104133129119873, "learning_rate": 4.8793409378960715e-05, "loss": 0.0868, "step": 2856 }, { "epoch": 1.2074355724545838, "grad_norm": 1.4604604244232178, "learning_rate": 4.8792564427545416e-05, "loss": 0.0587, "step": 2858 }, { "epoch": 1.2082805238698775, "grad_norm": 1.0882490873336792, "learning_rate": 4.879171947613013e-05, "loss": 0.0493, "step": 2860 }, { "epoch": 1.209125475285171, "grad_norm": 1.402060866355896, "learning_rate": 4.879087452471483e-05, "loss": 0.0895, "step": 2862 }, { "epoch": 1.2099704267004647, "grad_norm": 1.4716256856918335, "learning_rate": 4.879002957329954e-05, "loss": 0.0625, "step": 2864 }, { "epoch": 1.2108153781157585, "grad_norm": 1.0345948934555054, "learning_rate": 4.8789184621884244e-05, "loss": 0.0621, "step": 2866 }, { "epoch": 1.211660329531052, "grad_norm": 1.3120019435882568, "learning_rate": 4.878833967046895e-05, "loss": 0.0606, "step": 2868 }, { "epoch": 1.2125052809463455, "grad_norm": 1.2341971397399902, "learning_rate": 4.878749471905366e-05, "loss": 0.0595, "step": 2870 }, { "epoch": 1.2133502323616392, "grad_norm": 1.1355195045471191, "learning_rate": 4.8786649767638366e-05, "loss": 0.0392, "step": 2872 }, { "epoch": 1.214195183776933, "grad_norm": 1.4073996543884277, "learning_rate": 4.8785804816223066e-05, "loss": 0.048, "step": 2874 }, { "epoch": 1.2150401351922264, "grad_norm": 0.8575471043586731, "learning_rate": 4.878495986480778e-05, "loss": 0.0436, "step": 2876 }, { "epoch": 1.2158850866075201, "grad_norm": 1.1738005876541138, "learning_rate": 4.878411491339248e-05, "loss": 0.0434, "step": 2878 }, { "epoch": 1.2167300380228137, "grad_norm": 1.412050724029541, "learning_rate": 4.878326996197719e-05, "loss": 0.0475, "step": 2880 }, { "epoch": 1.2175749894381074, "grad_norm": 1.4315383434295654, "learning_rate": 4.8782425010561895e-05, "loss": 0.063, "step": 2882 }, { "epoch": 1.2184199408534009, "grad_norm": 1.2847777605056763, "learning_rate": 4.87815800591466e-05, "loss": 0.0495, "step": 2884 }, { "epoch": 1.2192648922686946, "grad_norm": 1.1097843647003174, "learning_rate": 4.878073510773131e-05, "loss": 0.0836, "step": 2886 }, { "epoch": 1.2201098436839881, "grad_norm": 1.2086609601974487, "learning_rate": 4.877989015631602e-05, "loss": 0.069, "step": 2888 }, { "epoch": 1.2209547950992818, "grad_norm": 1.2778347730636597, "learning_rate": 4.877904520490072e-05, "loss": 0.0595, "step": 2890 }, { "epoch": 1.2217997465145753, "grad_norm": 1.0457631349563599, "learning_rate": 4.877820025348543e-05, "loss": 0.0322, "step": 2892 }, { "epoch": 1.222644697929869, "grad_norm": 1.6547378301620483, "learning_rate": 4.877735530207013e-05, "loss": 0.0622, "step": 2894 }, { "epoch": 1.2234896493451626, "grad_norm": 1.1878632307052612, "learning_rate": 4.877651035065484e-05, "loss": 0.0549, "step": 2896 }, { "epoch": 1.2243346007604563, "grad_norm": 1.374286413192749, "learning_rate": 4.8775665399239546e-05, "loss": 0.0628, "step": 2898 }, { "epoch": 1.2251795521757498, "grad_norm": 0.7572351098060608, "learning_rate": 4.877482044782425e-05, "loss": 0.0645, "step": 2900 }, { "epoch": 1.2251795521757498, "eval_accuracy": 0.7215899292981549, "eval_cer": 0.07695157775876214, "eval_loss": 0.17909274995326996, "eval_runtime": 861.6493, "eval_samples_per_second": 13.46, "eval_steps_per_second": 0.421, "step": 2900 }, { "epoch": 1.2260245035910435, "grad_norm": 1.633081316947937, "learning_rate": 4.877397549640896e-05, "loss": 0.0579, "step": 2902 }, { "epoch": 1.226869455006337, "grad_norm": 1.0226025581359863, "learning_rate": 4.877313054499367e-05, "loss": 0.0822, "step": 2904 }, { "epoch": 1.2277144064216308, "grad_norm": 1.0272724628448486, "learning_rate": 4.877228559357837e-05, "loss": 0.0706, "step": 2906 }, { "epoch": 1.2285593578369243, "grad_norm": 0.925186812877655, "learning_rate": 4.877144064216308e-05, "loss": 0.0578, "step": 2908 }, { "epoch": 1.229404309252218, "grad_norm": 0.5941814184188843, "learning_rate": 4.877059569074778e-05, "loss": 0.0352, "step": 2910 }, { "epoch": 1.2302492606675117, "grad_norm": 1.436691164970398, "learning_rate": 4.876975073933249e-05, "loss": 0.0637, "step": 2912 }, { "epoch": 1.2310942120828052, "grad_norm": 0.9653565883636475, "learning_rate": 4.87689057879172e-05, "loss": 0.0374, "step": 2914 }, { "epoch": 1.231939163498099, "grad_norm": 0.8369848132133484, "learning_rate": 4.8768060836501904e-05, "loss": 0.0431, "step": 2916 }, { "epoch": 1.2327841149133925, "grad_norm": 1.2851033210754395, "learning_rate": 4.876721588508661e-05, "loss": 0.0618, "step": 2918 }, { "epoch": 1.2336290663286862, "grad_norm": 1.213028073310852, "learning_rate": 4.876637093367132e-05, "loss": 0.0585, "step": 2920 }, { "epoch": 1.2344740177439797, "grad_norm": 1.9548083543777466, "learning_rate": 4.876552598225602e-05, "loss": 0.0659, "step": 2922 }, { "epoch": 1.2353189691592734, "grad_norm": 1.0520684719085693, "learning_rate": 4.876468103084073e-05, "loss": 0.0374, "step": 2924 }, { "epoch": 1.236163920574567, "grad_norm": 0.8738290667533875, "learning_rate": 4.876383607942543e-05, "loss": 0.0727, "step": 2926 }, { "epoch": 1.2370088719898606, "grad_norm": 1.4663364887237549, "learning_rate": 4.876299112801014e-05, "loss": 0.0471, "step": 2928 }, { "epoch": 1.2378538234051542, "grad_norm": 1.1130000352859497, "learning_rate": 4.876214617659485e-05, "loss": 0.053, "step": 2930 }, { "epoch": 1.2386987748204479, "grad_norm": 0.9483621120452881, "learning_rate": 4.8761301225179555e-05, "loss": 0.058, "step": 2932 }, { "epoch": 1.2395437262357414, "grad_norm": 0.7923450469970703, "learning_rate": 4.876045627376426e-05, "loss": 0.0384, "step": 2934 }, { "epoch": 1.240388677651035, "grad_norm": 1.8122419118881226, "learning_rate": 4.875961132234897e-05, "loss": 0.076, "step": 2936 }, { "epoch": 1.2412336290663286, "grad_norm": 1.1781667470932007, "learning_rate": 4.875876637093367e-05, "loss": 0.053, "step": 2938 }, { "epoch": 1.2420785804816223, "grad_norm": 1.2569353580474854, "learning_rate": 4.8757921419518384e-05, "loss": 0.0514, "step": 2940 }, { "epoch": 1.2429235318969158, "grad_norm": 1.32847261428833, "learning_rate": 4.8757076468103084e-05, "loss": 0.0648, "step": 2942 }, { "epoch": 1.2437684833122096, "grad_norm": 0.7314063310623169, "learning_rate": 4.87562315166878e-05, "loss": 0.0582, "step": 2944 }, { "epoch": 1.244613434727503, "grad_norm": 1.6381207704544067, "learning_rate": 4.87553865652725e-05, "loss": 0.0582, "step": 2946 }, { "epoch": 1.2454583861427968, "grad_norm": 1.02126145362854, "learning_rate": 4.8754541613857206e-05, "loss": 0.0705, "step": 2948 }, { "epoch": 1.2463033375580905, "grad_norm": 1.4831101894378662, "learning_rate": 4.875369666244191e-05, "loss": 0.0784, "step": 2950 }, { "epoch": 1.247148288973384, "grad_norm": 1.5562660694122314, "learning_rate": 4.875285171102662e-05, "loss": 0.064, "step": 2952 }, { "epoch": 1.2479932403886775, "grad_norm": 0.9851226210594177, "learning_rate": 4.875200675961133e-05, "loss": 0.0513, "step": 2954 }, { "epoch": 1.2488381918039713, "grad_norm": 1.7277909517288208, "learning_rate": 4.8751161808196034e-05, "loss": 0.0535, "step": 2956 }, { "epoch": 1.249683143219265, "grad_norm": 1.9152166843414307, "learning_rate": 4.8750316856780735e-05, "loss": 0.0508, "step": 2958 }, { "epoch": 1.2505280946345585, "grad_norm": 0.7767104506492615, "learning_rate": 4.874947190536545e-05, "loss": 0.0317, "step": 2960 }, { "epoch": 1.251373046049852, "grad_norm": 1.0719561576843262, "learning_rate": 4.874862695395015e-05, "loss": 0.0338, "step": 2962 }, { "epoch": 1.2522179974651457, "grad_norm": 0.9941785931587219, "learning_rate": 4.8747782002534856e-05, "loss": 0.0297, "step": 2964 }, { "epoch": 1.2530629488804395, "grad_norm": 1.413793683052063, "learning_rate": 4.8746937051119564e-05, "loss": 0.0414, "step": 2966 }, { "epoch": 1.253907900295733, "grad_norm": 0.8977073431015015, "learning_rate": 4.874609209970427e-05, "loss": 0.0697, "step": 2968 }, { "epoch": 1.2547528517110267, "grad_norm": 1.1440623998641968, "learning_rate": 4.874524714828898e-05, "loss": 0.0727, "step": 2970 }, { "epoch": 1.2555978031263202, "grad_norm": 0.648844301700592, "learning_rate": 4.8744402196873685e-05, "loss": 0.0411, "step": 2972 }, { "epoch": 1.256442754541614, "grad_norm": 1.1983152627944946, "learning_rate": 4.8743557245458386e-05, "loss": 0.0414, "step": 2974 }, { "epoch": 1.2572877059569074, "grad_norm": 1.0279157161712646, "learning_rate": 4.87427122940431e-05, "loss": 0.0489, "step": 2976 }, { "epoch": 1.2581326573722011, "grad_norm": 1.3486169576644897, "learning_rate": 4.87418673426278e-05, "loss": 0.0751, "step": 2978 }, { "epoch": 1.2589776087874947, "grad_norm": 0.9830628037452698, "learning_rate": 4.874102239121251e-05, "loss": 0.0492, "step": 2980 }, { "epoch": 1.2598225602027884, "grad_norm": 1.5126888751983643, "learning_rate": 4.8740177439797214e-05, "loss": 0.0404, "step": 2982 }, { "epoch": 1.2606675116180819, "grad_norm": 0.9845932126045227, "learning_rate": 4.873933248838192e-05, "loss": 0.077, "step": 2984 }, { "epoch": 1.2615124630333756, "grad_norm": 0.8719518780708313, "learning_rate": 4.873848753696663e-05, "loss": 0.0355, "step": 2986 }, { "epoch": 1.2623574144486693, "grad_norm": 1.3625831604003906, "learning_rate": 4.8737642585551336e-05, "loss": 0.0571, "step": 2988 }, { "epoch": 1.2632023658639628, "grad_norm": 1.1835618019104004, "learning_rate": 4.8736797634136036e-05, "loss": 0.0423, "step": 2990 }, { "epoch": 1.2640473172792563, "grad_norm": 1.0713012218475342, "learning_rate": 4.873595268272075e-05, "loss": 0.0571, "step": 2992 }, { "epoch": 1.26489226869455, "grad_norm": 1.7218633890151978, "learning_rate": 4.873510773130545e-05, "loss": 0.0462, "step": 2994 }, { "epoch": 1.2657372201098438, "grad_norm": 1.8238509893417358, "learning_rate": 4.873426277989016e-05, "loss": 0.0559, "step": 2996 }, { "epoch": 1.2665821715251373, "grad_norm": 1.0371171236038208, "learning_rate": 4.8733417828474865e-05, "loss": 0.0469, "step": 2998 }, { "epoch": 1.2674271229404308, "grad_norm": 1.3527776002883911, "learning_rate": 4.873257287705957e-05, "loss": 0.0402, "step": 3000 }, { "epoch": 1.2674271229404308, "eval_accuracy": 0.7070184514571478, "eval_cer": 0.0812742045043535, "eval_loss": 0.18232469260692596, "eval_runtime": 842.602, "eval_samples_per_second": 13.765, "eval_steps_per_second": 0.431, "step": 3000 }, { "epoch": 1.2682720743557245, "grad_norm": 0.8487452268600464, "learning_rate": 4.873172792564428e-05, "loss": 0.0171, "step": 3002 }, { "epoch": 1.2691170257710183, "grad_norm": 1.096564531326294, "learning_rate": 4.873088297422899e-05, "loss": 0.0425, "step": 3004 }, { "epoch": 1.2699619771863118, "grad_norm": 1.2362899780273438, "learning_rate": 4.873003802281369e-05, "loss": 0.0334, "step": 3006 }, { "epoch": 1.2708069286016055, "grad_norm": 1.4478521347045898, "learning_rate": 4.87291930713984e-05, "loss": 0.0666, "step": 3008 }, { "epoch": 1.271651880016899, "grad_norm": 0.9289385080337524, "learning_rate": 4.87283481199831e-05, "loss": 0.0588, "step": 3010 }, { "epoch": 1.2724968314321927, "grad_norm": 0.5512641668319702, "learning_rate": 4.872750316856781e-05, "loss": 0.0457, "step": 3012 }, { "epoch": 1.2733417828474862, "grad_norm": 1.2245798110961914, "learning_rate": 4.8726658217152516e-05, "loss": 0.06, "step": 3014 }, { "epoch": 1.27418673426278, "grad_norm": 1.5422766208648682, "learning_rate": 4.872581326573722e-05, "loss": 0.0587, "step": 3016 }, { "epoch": 1.2750316856780735, "grad_norm": 1.526551604270935, "learning_rate": 4.872496831432193e-05, "loss": 0.0808, "step": 3018 }, { "epoch": 1.2758766370933672, "grad_norm": 1.5560036897659302, "learning_rate": 4.872412336290664e-05, "loss": 0.0531, "step": 3020 }, { "epoch": 1.2767215885086607, "grad_norm": 1.0509698390960693, "learning_rate": 4.872327841149134e-05, "loss": 0.0342, "step": 3022 }, { "epoch": 1.2775665399239544, "grad_norm": 1.421242356300354, "learning_rate": 4.872243346007605e-05, "loss": 0.0536, "step": 3024 }, { "epoch": 1.278411491339248, "grad_norm": 0.5734511613845825, "learning_rate": 4.872158850866075e-05, "loss": 0.0293, "step": 3026 }, { "epoch": 1.2792564427545416, "grad_norm": 1.554356336593628, "learning_rate": 4.872074355724546e-05, "loss": 0.1014, "step": 3028 }, { "epoch": 1.2801013941698351, "grad_norm": 0.9434201717376709, "learning_rate": 4.871989860583017e-05, "loss": 0.0332, "step": 3030 }, { "epoch": 1.2809463455851289, "grad_norm": 1.089858055114746, "learning_rate": 4.8719053654414874e-05, "loss": 0.0408, "step": 3032 }, { "epoch": 1.2817912970004226, "grad_norm": 1.2021937370300293, "learning_rate": 4.871820870299958e-05, "loss": 0.0588, "step": 3034 }, { "epoch": 1.282636248415716, "grad_norm": 0.9895578622817993, "learning_rate": 4.871736375158429e-05, "loss": 0.0568, "step": 3036 }, { "epoch": 1.2834811998310096, "grad_norm": 1.1666065454483032, "learning_rate": 4.871651880016899e-05, "loss": 0.0351, "step": 3038 }, { "epoch": 1.2843261512463033, "grad_norm": 0.9048338532447815, "learning_rate": 4.87156738487537e-05, "loss": 0.0455, "step": 3040 }, { "epoch": 1.285171102661597, "grad_norm": 1.392256259918213, "learning_rate": 4.87148288973384e-05, "loss": 0.0706, "step": 3042 }, { "epoch": 1.2860160540768906, "grad_norm": 1.8354949951171875, "learning_rate": 4.871398394592312e-05, "loss": 0.0558, "step": 3044 }, { "epoch": 1.286861005492184, "grad_norm": 1.7575472593307495, "learning_rate": 4.871313899450782e-05, "loss": 0.0918, "step": 3046 }, { "epoch": 1.2877059569074778, "grad_norm": 1.5376323461532593, "learning_rate": 4.8712294043092525e-05, "loss": 0.0524, "step": 3048 }, { "epoch": 1.2885509083227715, "grad_norm": 1.8223731517791748, "learning_rate": 4.871144909167723e-05, "loss": 0.0825, "step": 3050 }, { "epoch": 1.289395859738065, "grad_norm": 0.7369791269302368, "learning_rate": 4.871060414026194e-05, "loss": 0.0392, "step": 3052 }, { "epoch": 1.2902408111533588, "grad_norm": 1.4611024856567383, "learning_rate": 4.8709759188846646e-05, "loss": 0.0358, "step": 3054 }, { "epoch": 1.2910857625686523, "grad_norm": 0.8670369386672974, "learning_rate": 4.8708914237431354e-05, "loss": 0.0487, "step": 3056 }, { "epoch": 1.291930713983946, "grad_norm": 1.0213311910629272, "learning_rate": 4.8708069286016054e-05, "loss": 0.0559, "step": 3058 }, { "epoch": 1.2927756653992395, "grad_norm": 1.72591233253479, "learning_rate": 4.870722433460077e-05, "loss": 0.0665, "step": 3060 }, { "epoch": 1.2936206168145332, "grad_norm": 0.8167611360549927, "learning_rate": 4.870637938318547e-05, "loss": 0.0359, "step": 3062 }, { "epoch": 1.2944655682298267, "grad_norm": 0.7950469255447388, "learning_rate": 4.8705534431770176e-05, "loss": 0.0254, "step": 3064 }, { "epoch": 1.2953105196451205, "grad_norm": 0.8578920364379883, "learning_rate": 4.870468948035488e-05, "loss": 0.037, "step": 3066 }, { "epoch": 1.296155471060414, "grad_norm": 1.4960883855819702, "learning_rate": 4.870384452893959e-05, "loss": 0.0682, "step": 3068 }, { "epoch": 1.2970004224757077, "grad_norm": 1.0831998586654663, "learning_rate": 4.87029995775243e-05, "loss": 0.0592, "step": 3070 }, { "epoch": 1.2978453738910012, "grad_norm": 0.7494056820869446, "learning_rate": 4.8702154626109004e-05, "loss": 0.0346, "step": 3072 }, { "epoch": 1.298690325306295, "grad_norm": 1.0414024591445923, "learning_rate": 4.8701309674693705e-05, "loss": 0.0519, "step": 3074 }, { "epoch": 1.2995352767215884, "grad_norm": 1.9897801876068115, "learning_rate": 4.870046472327842e-05, "loss": 0.0363, "step": 3076 }, { "epoch": 1.3003802281368821, "grad_norm": 1.645512580871582, "learning_rate": 4.869961977186312e-05, "loss": 0.0679, "step": 3078 }, { "epoch": 1.3012251795521759, "grad_norm": 0.9590452909469604, "learning_rate": 4.8698774820447826e-05, "loss": 0.0389, "step": 3080 }, { "epoch": 1.3020701309674694, "grad_norm": 1.0363378524780273, "learning_rate": 4.8697929869032534e-05, "loss": 0.0662, "step": 3082 }, { "epoch": 1.3029150823827629, "grad_norm": 0.8959032297134399, "learning_rate": 4.869708491761724e-05, "loss": 0.0451, "step": 3084 }, { "epoch": 1.3037600337980566, "grad_norm": 1.110154628753662, "learning_rate": 4.869623996620195e-05, "loss": 0.0425, "step": 3086 }, { "epoch": 1.3046049852133503, "grad_norm": 1.2252897024154663, "learning_rate": 4.8695395014786655e-05, "loss": 0.0492, "step": 3088 }, { "epoch": 1.3054499366286438, "grad_norm": 0.8930359482765198, "learning_rate": 4.8694550063371356e-05, "loss": 0.0376, "step": 3090 }, { "epoch": 1.3062948880439373, "grad_norm": 1.2637935876846313, "learning_rate": 4.869370511195607e-05, "loss": 0.0387, "step": 3092 }, { "epoch": 1.307139839459231, "grad_norm": 1.6533139944076538, "learning_rate": 4.869286016054077e-05, "loss": 0.068, "step": 3094 }, { "epoch": 1.3079847908745248, "grad_norm": 1.4910434484481812, "learning_rate": 4.869201520912548e-05, "loss": 0.0341, "step": 3096 }, { "epoch": 1.3088297422898183, "grad_norm": 1.8592034578323364, "learning_rate": 4.8691170257710184e-05, "loss": 0.0765, "step": 3098 }, { "epoch": 1.309674693705112, "grad_norm": 1.5364049673080444, "learning_rate": 4.869032530629489e-05, "loss": 0.0805, "step": 3100 }, { "epoch": 1.309674693705112, "eval_accuracy": 0.7233143645456113, "eval_cer": 0.07973040923807087, "eval_loss": 0.183185413479805, "eval_runtime": 858.546, "eval_samples_per_second": 13.509, "eval_steps_per_second": 0.423, "step": 3100 }, { "epoch": 1.3105196451204055, "grad_norm": 0.9121136665344238, "learning_rate": 4.86894803548796e-05, "loss": 0.0553, "step": 3102 }, { "epoch": 1.3113645965356993, "grad_norm": 0.8206782341003418, "learning_rate": 4.8688635403464306e-05, "loss": 0.0856, "step": 3104 }, { "epoch": 1.3122095479509928, "grad_norm": 1.5402354001998901, "learning_rate": 4.8687790452049006e-05, "loss": 0.0776, "step": 3106 }, { "epoch": 1.3130544993662865, "grad_norm": 1.8080055713653564, "learning_rate": 4.868694550063372e-05, "loss": 0.0838, "step": 3108 }, { "epoch": 1.31389945078158, "grad_norm": 1.5559817552566528, "learning_rate": 4.868610054921842e-05, "loss": 0.0727, "step": 3110 }, { "epoch": 1.3147444021968737, "grad_norm": 1.1048095226287842, "learning_rate": 4.868525559780313e-05, "loss": 0.042, "step": 3112 }, { "epoch": 1.3155893536121672, "grad_norm": 1.1088711023330688, "learning_rate": 4.8684410646387835e-05, "loss": 0.0363, "step": 3114 }, { "epoch": 1.316434305027461, "grad_norm": 0.7765578031539917, "learning_rate": 4.868356569497254e-05, "loss": 0.0496, "step": 3116 }, { "epoch": 1.3172792564427547, "grad_norm": 1.4490267038345337, "learning_rate": 4.868272074355725e-05, "loss": 0.0701, "step": 3118 }, { "epoch": 1.3181242078580482, "grad_norm": 1.2103753089904785, "learning_rate": 4.868187579214196e-05, "loss": 0.0698, "step": 3120 }, { "epoch": 1.3189691592733417, "grad_norm": 0.9798583984375, "learning_rate": 4.868103084072666e-05, "loss": 0.0581, "step": 3122 }, { "epoch": 1.3198141106886354, "grad_norm": 1.6193875074386597, "learning_rate": 4.868018588931137e-05, "loss": 0.0594, "step": 3124 }, { "epoch": 1.3206590621039291, "grad_norm": 1.3513240814208984, "learning_rate": 4.867934093789607e-05, "loss": 0.0716, "step": 3126 }, { "epoch": 1.3215040135192226, "grad_norm": 2.1795995235443115, "learning_rate": 4.867849598648078e-05, "loss": 0.0645, "step": 3128 }, { "epoch": 1.3223489649345161, "grad_norm": 1.0579677820205688, "learning_rate": 4.8677651035065486e-05, "loss": 0.0576, "step": 3130 }, { "epoch": 1.3231939163498099, "grad_norm": 1.3420937061309814, "learning_rate": 4.867680608365019e-05, "loss": 0.0651, "step": 3132 }, { "epoch": 1.3240388677651036, "grad_norm": 1.300595760345459, "learning_rate": 4.86759611322349e-05, "loss": 0.0684, "step": 3134 }, { "epoch": 1.324883819180397, "grad_norm": 1.1893776655197144, "learning_rate": 4.867511618081961e-05, "loss": 0.0447, "step": 3136 }, { "epoch": 1.3257287705956908, "grad_norm": 0.800032913684845, "learning_rate": 4.867427122940431e-05, "loss": 0.0312, "step": 3138 }, { "epoch": 1.3265737220109843, "grad_norm": 0.8352811932563782, "learning_rate": 4.867342627798902e-05, "loss": 0.0304, "step": 3140 }, { "epoch": 1.327418673426278, "grad_norm": 1.032954454421997, "learning_rate": 4.867258132657372e-05, "loss": 0.05, "step": 3142 }, { "epoch": 1.3282636248415716, "grad_norm": 1.1263511180877686, "learning_rate": 4.8671736375158436e-05, "loss": 0.0373, "step": 3144 }, { "epoch": 1.3291085762568653, "grad_norm": 0.6103000044822693, "learning_rate": 4.867089142374314e-05, "loss": 0.0483, "step": 3146 }, { "epoch": 1.3299535276721588, "grad_norm": 0.8234397768974304, "learning_rate": 4.8670046472327844e-05, "loss": 0.0406, "step": 3148 }, { "epoch": 1.3307984790874525, "grad_norm": 1.4005287885665894, "learning_rate": 4.866920152091255e-05, "loss": 0.0543, "step": 3150 }, { "epoch": 1.331643430502746, "grad_norm": 0.7845334410667419, "learning_rate": 4.866835656949726e-05, "loss": 0.0483, "step": 3152 }, { "epoch": 1.3324883819180398, "grad_norm": 1.4690190553665161, "learning_rate": 4.8667511618081966e-05, "loss": 0.031, "step": 3154 }, { "epoch": 1.3333333333333333, "grad_norm": 1.3612943887710571, "learning_rate": 4.866666666666667e-05, "loss": 0.0605, "step": 3156 }, { "epoch": 1.334178284748627, "grad_norm": 1.120639681816101, "learning_rate": 4.866582171525137e-05, "loss": 0.0481, "step": 3158 }, { "epoch": 1.3350232361639205, "grad_norm": 1.0624009370803833, "learning_rate": 4.866497676383609e-05, "loss": 0.0894, "step": 3160 }, { "epoch": 1.3358681875792142, "grad_norm": 0.8806793689727783, "learning_rate": 4.866413181242079e-05, "loss": 0.0623, "step": 3162 }, { "epoch": 1.336713138994508, "grad_norm": 1.1015374660491943, "learning_rate": 4.8663286861005495e-05, "loss": 0.0596, "step": 3164 }, { "epoch": 1.3375580904098014, "grad_norm": 1.0393707752227783, "learning_rate": 4.86624419095902e-05, "loss": 0.0635, "step": 3166 }, { "epoch": 1.338403041825095, "grad_norm": 1.5667744874954224, "learning_rate": 4.866159695817491e-05, "loss": 0.0555, "step": 3168 }, { "epoch": 1.3392479932403887, "grad_norm": 0.5672454237937927, "learning_rate": 4.8660752006759616e-05, "loss": 0.0206, "step": 3170 }, { "epoch": 1.3400929446556824, "grad_norm": 1.0931203365325928, "learning_rate": 4.8659907055344324e-05, "loss": 0.0463, "step": 3172 }, { "epoch": 1.340937896070976, "grad_norm": 1.1247515678405762, "learning_rate": 4.8659062103929024e-05, "loss": 0.0466, "step": 3174 }, { "epoch": 1.3417828474862694, "grad_norm": 1.6329139471054077, "learning_rate": 4.865821715251374e-05, "loss": 0.0686, "step": 3176 }, { "epoch": 1.3426277989015631, "grad_norm": 0.6330051422119141, "learning_rate": 4.865737220109844e-05, "loss": 0.0517, "step": 3178 }, { "epoch": 1.3434727503168569, "grad_norm": 1.0388518571853638, "learning_rate": 4.8656527249683146e-05, "loss": 0.0455, "step": 3180 }, { "epoch": 1.3443177017321504, "grad_norm": 1.2583259344100952, "learning_rate": 4.865568229826785e-05, "loss": 0.0837, "step": 3182 }, { "epoch": 1.345162653147444, "grad_norm": 1.0448977947235107, "learning_rate": 4.865483734685256e-05, "loss": 0.0481, "step": 3184 }, { "epoch": 1.3460076045627376, "grad_norm": 1.3680537939071655, "learning_rate": 4.865399239543727e-05, "loss": 0.0615, "step": 3186 }, { "epoch": 1.3468525559780313, "grad_norm": 1.131136417388916, "learning_rate": 4.8653147444021974e-05, "loss": 0.0402, "step": 3188 }, { "epoch": 1.3476975073933248, "grad_norm": 1.0717076063156128, "learning_rate": 4.8652302492606675e-05, "loss": 0.0569, "step": 3190 }, { "epoch": 1.3485424588086186, "grad_norm": 0.7371670603752136, "learning_rate": 4.865145754119139e-05, "loss": 0.0498, "step": 3192 }, { "epoch": 1.349387410223912, "grad_norm": 1.0945547819137573, "learning_rate": 4.865061258977609e-05, "loss": 0.0543, "step": 3194 }, { "epoch": 1.3502323616392058, "grad_norm": 1.342444658279419, "learning_rate": 4.8649767638360796e-05, "loss": 0.0763, "step": 3196 }, { "epoch": 1.3510773130544993, "grad_norm": 0.6870439648628235, "learning_rate": 4.8648922686945504e-05, "loss": 0.0454, "step": 3198 }, { "epoch": 1.351922264469793, "grad_norm": 1.172759771347046, "learning_rate": 4.864807773553021e-05, "loss": 0.055, "step": 3200 }, { "epoch": 1.351922264469793, "eval_accuracy": 0.7313329884462838, "eval_cer": 0.07825718746967544, "eval_loss": 0.1792062371969223, "eval_runtime": 861.651, "eval_samples_per_second": 13.46, "eval_steps_per_second": 0.421, "step": 3200 }, { "epoch": 1.3527672158850865, "grad_norm": 1.0497750043869019, "learning_rate": 4.864723278411492e-05, "loss": 0.0348, "step": 3202 }, { "epoch": 1.3536121673003803, "grad_norm": 0.7206516265869141, "learning_rate": 4.8646387832699625e-05, "loss": 0.0368, "step": 3204 }, { "epoch": 1.3544571187156738, "grad_norm": 1.5473790168762207, "learning_rate": 4.8645542881284326e-05, "loss": 0.053, "step": 3206 }, { "epoch": 1.3553020701309675, "grad_norm": 1.5096811056137085, "learning_rate": 4.864469792986904e-05, "loss": 0.0786, "step": 3208 }, { "epoch": 1.3561470215462612, "grad_norm": 1.3632938861846924, "learning_rate": 4.864385297845374e-05, "loss": 0.08, "step": 3210 }, { "epoch": 1.3569919729615547, "grad_norm": 1.0166387557983398, "learning_rate": 4.864300802703845e-05, "loss": 0.0559, "step": 3212 }, { "epoch": 1.3578369243768482, "grad_norm": 1.3810648918151855, "learning_rate": 4.8642163075623154e-05, "loss": 0.0533, "step": 3214 }, { "epoch": 1.358681875792142, "grad_norm": 1.6063209772109985, "learning_rate": 4.864131812420786e-05, "loss": 0.0802, "step": 3216 }, { "epoch": 1.3595268272074357, "grad_norm": 0.754176676273346, "learning_rate": 4.864047317279257e-05, "loss": 0.056, "step": 3218 }, { "epoch": 1.3603717786227292, "grad_norm": 0.9545078873634338, "learning_rate": 4.8639628221377276e-05, "loss": 0.0346, "step": 3220 }, { "epoch": 1.3612167300380227, "grad_norm": 1.2821192741394043, "learning_rate": 4.8638783269961976e-05, "loss": 0.0522, "step": 3222 }, { "epoch": 1.3620616814533164, "grad_norm": 1.6496917009353638, "learning_rate": 4.863793831854669e-05, "loss": 0.0552, "step": 3224 }, { "epoch": 1.3629066328686101, "grad_norm": 0.9524357318878174, "learning_rate": 4.863709336713139e-05, "loss": 0.0443, "step": 3226 }, { "epoch": 1.3637515842839036, "grad_norm": 2.012040138244629, "learning_rate": 4.86362484157161e-05, "loss": 0.106, "step": 3228 }, { "epoch": 1.3645965356991974, "grad_norm": 1.7522271871566772, "learning_rate": 4.8635403464300805e-05, "loss": 0.0511, "step": 3230 }, { "epoch": 1.3654414871144909, "grad_norm": 1.4558488130569458, "learning_rate": 4.863455851288551e-05, "loss": 0.0682, "step": 3232 }, { "epoch": 1.3662864385297846, "grad_norm": 1.263836145401001, "learning_rate": 4.863371356147022e-05, "loss": 0.0678, "step": 3234 }, { "epoch": 1.367131389945078, "grad_norm": 1.5037480592727661, "learning_rate": 4.863286861005493e-05, "loss": 0.0764, "step": 3236 }, { "epoch": 1.3679763413603718, "grad_norm": 0.8802792429924011, "learning_rate": 4.863202365863963e-05, "loss": 0.0575, "step": 3238 }, { "epoch": 1.3688212927756653, "grad_norm": 1.4338186979293823, "learning_rate": 4.863117870722434e-05, "loss": 0.0469, "step": 3240 }, { "epoch": 1.369666244190959, "grad_norm": 1.3018105030059814, "learning_rate": 4.863033375580904e-05, "loss": 0.0542, "step": 3242 }, { "epoch": 1.3705111956062526, "grad_norm": 0.8074570894241333, "learning_rate": 4.8629488804393756e-05, "loss": 0.0392, "step": 3244 }, { "epoch": 1.3713561470215463, "grad_norm": 1.513663411140442, "learning_rate": 4.8628643852978456e-05, "loss": 0.0563, "step": 3246 }, { "epoch": 1.37220109843684, "grad_norm": 1.381496787071228, "learning_rate": 4.862779890156316e-05, "loss": 0.0526, "step": 3248 }, { "epoch": 1.3730460498521335, "grad_norm": 1.0638294219970703, "learning_rate": 4.862695395014787e-05, "loss": 0.0497, "step": 3250 }, { "epoch": 1.373891001267427, "grad_norm": 1.7604972124099731, "learning_rate": 4.862610899873258e-05, "loss": 0.0656, "step": 3252 }, { "epoch": 1.3747359526827208, "grad_norm": 0.8120830059051514, "learning_rate": 4.8625264047317285e-05, "loss": 0.033, "step": 3254 }, { "epoch": 1.3755809040980145, "grad_norm": 0.7674900889396667, "learning_rate": 4.862441909590199e-05, "loss": 0.0427, "step": 3256 }, { "epoch": 1.376425855513308, "grad_norm": 0.8798816800117493, "learning_rate": 4.862357414448669e-05, "loss": 0.0474, "step": 3258 }, { "epoch": 1.3772708069286015, "grad_norm": 0.8720738887786865, "learning_rate": 4.86227291930714e-05, "loss": 0.053, "step": 3260 }, { "epoch": 1.3781157583438952, "grad_norm": 1.4881258010864258, "learning_rate": 4.862188424165611e-05, "loss": 0.0565, "step": 3262 }, { "epoch": 1.378960709759189, "grad_norm": 1.2955182790756226, "learning_rate": 4.8621039290240814e-05, "loss": 0.0409, "step": 3264 }, { "epoch": 1.3798056611744824, "grad_norm": 0.8371695876121521, "learning_rate": 4.862019433882552e-05, "loss": 0.0453, "step": 3266 }, { "epoch": 1.380650612589776, "grad_norm": 0.7356275916099548, "learning_rate": 4.861934938741022e-05, "loss": 0.0385, "step": 3268 }, { "epoch": 1.3814955640050697, "grad_norm": 0.7049744725227356, "learning_rate": 4.8618504435994936e-05, "loss": 0.0311, "step": 3270 }, { "epoch": 1.3823405154203634, "grad_norm": 1.2064480781555176, "learning_rate": 4.8617659484579636e-05, "loss": 0.0689, "step": 3272 }, { "epoch": 1.383185466835657, "grad_norm": 1.173024296760559, "learning_rate": 4.861681453316434e-05, "loss": 0.0644, "step": 3274 }, { "epoch": 1.3840304182509506, "grad_norm": 1.1009023189544678, "learning_rate": 4.861596958174905e-05, "loss": 0.0343, "step": 3276 }, { "epoch": 1.3848753696662441, "grad_norm": 1.1866307258605957, "learning_rate": 4.861512463033376e-05, "loss": 0.0398, "step": 3278 }, { "epoch": 1.3857203210815379, "grad_norm": 0.8218021988868713, "learning_rate": 4.8614279678918465e-05, "loss": 0.0483, "step": 3280 }, { "epoch": 1.3865652724968314, "grad_norm": 1.1046007871627808, "learning_rate": 4.861343472750317e-05, "loss": 0.0676, "step": 3282 }, { "epoch": 1.387410223912125, "grad_norm": 0.6497601866722107, "learning_rate": 4.861258977608787e-05, "loss": 0.0293, "step": 3284 }, { "epoch": 1.3882551753274186, "grad_norm": 1.103437066078186, "learning_rate": 4.8611744824672586e-05, "loss": 0.0379, "step": 3286 }, { "epoch": 1.3891001267427123, "grad_norm": 0.9892705678939819, "learning_rate": 4.861089987325729e-05, "loss": 0.0557, "step": 3288 }, { "epoch": 1.3899450781580058, "grad_norm": 0.8333448767662048, "learning_rate": 4.8610054921841994e-05, "loss": 0.0275, "step": 3290 }, { "epoch": 1.3907900295732996, "grad_norm": 1.19562828540802, "learning_rate": 4.86092099704267e-05, "loss": 0.0497, "step": 3292 }, { "epoch": 1.3916349809885933, "grad_norm": 1.448594093322754, "learning_rate": 4.860836501901141e-05, "loss": 0.0694, "step": 3294 }, { "epoch": 1.3924799324038868, "grad_norm": 1.5646843910217285, "learning_rate": 4.8607520067596116e-05, "loss": 0.0426, "step": 3296 }, { "epoch": 1.3933248838191803, "grad_norm": 1.2052522897720337, "learning_rate": 4.860667511618082e-05, "loss": 0.0654, "step": 3298 }, { "epoch": 1.394169835234474, "grad_norm": 0.7969152331352234, "learning_rate": 4.860583016476552e-05, "loss": 0.0585, "step": 3300 }, { "epoch": 1.394169835234474, "eval_accuracy": 0.74098982583204, "eval_cer": 0.07193203772153461, "eval_loss": 0.17303836345672607, "eval_runtime": 852.7734, "eval_samples_per_second": 13.6, "eval_steps_per_second": 0.426, "step": 3300 }, { "epoch": 1.3950147866497677, "grad_norm": 1.1091243028640747, "learning_rate": 4.860498521335024e-05, "loss": 0.0598, "step": 3302 }, { "epoch": 1.3958597380650613, "grad_norm": 1.6562272310256958, "learning_rate": 4.860414026193494e-05, "loss": 0.0402, "step": 3304 }, { "epoch": 1.3967046894803548, "grad_norm": 0.8330667018890381, "learning_rate": 4.8603295310519645e-05, "loss": 0.0563, "step": 3306 }, { "epoch": 1.3975496408956485, "grad_norm": 1.4728366136550903, "learning_rate": 4.860245035910435e-05, "loss": 0.0635, "step": 3308 }, { "epoch": 1.3983945923109422, "grad_norm": 1.3313997983932495, "learning_rate": 4.860160540768906e-05, "loss": 0.0547, "step": 3310 }, { "epoch": 1.3992395437262357, "grad_norm": 1.9861879348754883, "learning_rate": 4.8600760456273766e-05, "loss": 0.0567, "step": 3312 }, { "epoch": 1.4000844951415294, "grad_norm": 0.7557132244110107, "learning_rate": 4.8599915504858474e-05, "loss": 0.0478, "step": 3314 }, { "epoch": 1.400929446556823, "grad_norm": 0.5707297325134277, "learning_rate": 4.8599070553443174e-05, "loss": 0.0377, "step": 3316 }, { "epoch": 1.4017743979721167, "grad_norm": 1.3128173351287842, "learning_rate": 4.859822560202789e-05, "loss": 0.0579, "step": 3318 }, { "epoch": 1.4026193493874102, "grad_norm": 1.7858682870864868, "learning_rate": 4.859738065061259e-05, "loss": 0.0718, "step": 3320 }, { "epoch": 1.403464300802704, "grad_norm": 1.2796276807785034, "learning_rate": 4.8596535699197296e-05, "loss": 0.0408, "step": 3322 }, { "epoch": 1.4043092522179974, "grad_norm": 0.8803476095199585, "learning_rate": 4.8595690747782e-05, "loss": 0.0522, "step": 3324 }, { "epoch": 1.4051542036332911, "grad_norm": 0.9975403547286987, "learning_rate": 4.859484579636671e-05, "loss": 0.0305, "step": 3326 }, { "epoch": 1.4059991550485846, "grad_norm": 0.4852333962917328, "learning_rate": 4.859400084495142e-05, "loss": 0.021, "step": 3328 }, { "epoch": 1.4068441064638784, "grad_norm": 1.1154181957244873, "learning_rate": 4.8593155893536124e-05, "loss": 0.0351, "step": 3330 }, { "epoch": 1.4076890578791719, "grad_norm": 0.9479047656059265, "learning_rate": 4.8592310942120825e-05, "loss": 0.0219, "step": 3332 }, { "epoch": 1.4085340092944656, "grad_norm": 0.5760020017623901, "learning_rate": 4.859146599070554e-05, "loss": 0.0527, "step": 3334 }, { "epoch": 1.409378960709759, "grad_norm": 1.5132248401641846, "learning_rate": 4.859062103929024e-05, "loss": 0.05, "step": 3336 }, { "epoch": 1.4102239121250528, "grad_norm": 1.0323017835617065, "learning_rate": 4.8589776087874946e-05, "loss": 0.044, "step": 3338 }, { "epoch": 1.4110688635403466, "grad_norm": 1.1576844453811646, "learning_rate": 4.8588931136459654e-05, "loss": 0.0751, "step": 3340 }, { "epoch": 1.41191381495564, "grad_norm": 1.1319127082824707, "learning_rate": 4.858808618504436e-05, "loss": 0.0598, "step": 3342 }, { "epoch": 1.4127587663709336, "grad_norm": 1.139543056488037, "learning_rate": 4.858724123362907e-05, "loss": 0.0542, "step": 3344 }, { "epoch": 1.4136037177862273, "grad_norm": 1.0183779001235962, "learning_rate": 4.8586396282213775e-05, "loss": 0.0385, "step": 3346 }, { "epoch": 1.414448669201521, "grad_norm": 0.6774150729179382, "learning_rate": 4.8585551330798476e-05, "loss": 0.0225, "step": 3348 }, { "epoch": 1.4152936206168145, "grad_norm": 1.525310754776001, "learning_rate": 4.858470637938319e-05, "loss": 0.071, "step": 3350 }, { "epoch": 1.416138572032108, "grad_norm": 1.1040525436401367, "learning_rate": 4.858386142796789e-05, "loss": 0.0322, "step": 3352 }, { "epoch": 1.4169835234474017, "grad_norm": 1.2062792778015137, "learning_rate": 4.8583016476552604e-05, "loss": 0.0679, "step": 3354 }, { "epoch": 1.4178284748626955, "grad_norm": 1.0307128429412842, "learning_rate": 4.8582171525137304e-05, "loss": 0.0512, "step": 3356 }, { "epoch": 1.418673426277989, "grad_norm": 1.3051033020019531, "learning_rate": 4.858132657372201e-05, "loss": 0.045, "step": 3358 }, { "epoch": 1.4195183776932827, "grad_norm": 0.960086464881897, "learning_rate": 4.858048162230672e-05, "loss": 0.065, "step": 3360 }, { "epoch": 1.4203633291085762, "grad_norm": 1.8996869325637817, "learning_rate": 4.8579636670891426e-05, "loss": 0.0925, "step": 3362 }, { "epoch": 1.42120828052387, "grad_norm": 1.6697046756744385, "learning_rate": 4.857879171947613e-05, "loss": 0.0473, "step": 3364 }, { "epoch": 1.4220532319391634, "grad_norm": 1.2368358373641968, "learning_rate": 4.857794676806084e-05, "loss": 0.0495, "step": 3366 }, { "epoch": 1.4228981833544572, "grad_norm": 1.3032385110855103, "learning_rate": 4.857710181664554e-05, "loss": 0.042, "step": 3368 }, { "epoch": 1.4237431347697507, "grad_norm": 1.1829075813293457, "learning_rate": 4.8576256865230255e-05, "loss": 0.0909, "step": 3370 }, { "epoch": 1.4245880861850444, "grad_norm": 1.2332669496536255, "learning_rate": 4.8575411913814955e-05, "loss": 0.0592, "step": 3372 }, { "epoch": 1.425433037600338, "grad_norm": 1.6120294332504272, "learning_rate": 4.857456696239966e-05, "loss": 0.0711, "step": 3374 }, { "epoch": 1.4262779890156316, "grad_norm": 1.1956902742385864, "learning_rate": 4.857372201098437e-05, "loss": 0.04, "step": 3376 }, { "epoch": 1.4271229404309254, "grad_norm": 1.5087355375289917, "learning_rate": 4.857287705956908e-05, "loss": 0.0488, "step": 3378 }, { "epoch": 1.4279678918462189, "grad_norm": 1.6760591268539429, "learning_rate": 4.8572032108153784e-05, "loss": 0.0851, "step": 3380 }, { "epoch": 1.4288128432615124, "grad_norm": 0.8411288857460022, "learning_rate": 4.857118715673849e-05, "loss": 0.023, "step": 3382 }, { "epoch": 1.429657794676806, "grad_norm": 1.0701520442962646, "learning_rate": 4.857034220532319e-05, "loss": 0.0479, "step": 3384 }, { "epoch": 1.4305027460920998, "grad_norm": 0.9319422841072083, "learning_rate": 4.8569497253907906e-05, "loss": 0.0686, "step": 3386 }, { "epoch": 1.4313476975073933, "grad_norm": 0.8737354278564453, "learning_rate": 4.8568652302492606e-05, "loss": 0.0662, "step": 3388 }, { "epoch": 1.4321926489226868, "grad_norm": 0.5453359484672546, "learning_rate": 4.856780735107731e-05, "loss": 0.0324, "step": 3390 }, { "epoch": 1.4330376003379806, "grad_norm": 0.6740847229957581, "learning_rate": 4.856696239966202e-05, "loss": 0.0392, "step": 3392 }, { "epoch": 1.4338825517532743, "grad_norm": 1.1385046243667603, "learning_rate": 4.856611744824673e-05, "loss": 0.0392, "step": 3394 }, { "epoch": 1.4347275031685678, "grad_norm": 0.7803374528884888, "learning_rate": 4.8565272496831435e-05, "loss": 0.0372, "step": 3396 }, { "epoch": 1.4355724545838613, "grad_norm": 0.8832899928092957, "learning_rate": 4.856442754541614e-05, "loss": 0.0406, "step": 3398 }, { "epoch": 1.436417405999155, "grad_norm": 1.3527898788452148, "learning_rate": 4.856358259400084e-05, "loss": 0.048, "step": 3400 }, { "epoch": 1.436417405999155, "eval_accuracy": 0.7294361096740818, "eval_cer": 0.07786903323129582, "eval_loss": 0.1757935732603073, "eval_runtime": 862.8655, "eval_samples_per_second": 13.441, "eval_steps_per_second": 0.421, "step": 3400 }, { "epoch": 1.4372623574144487, "grad_norm": 0.9552874565124512, "learning_rate": 4.8562737642585556e-05, "loss": 0.0644, "step": 3402 }, { "epoch": 1.4381073088297422, "grad_norm": 1.3589330911636353, "learning_rate": 4.856189269117026e-05, "loss": 0.0489, "step": 3404 }, { "epoch": 1.438952260245036, "grad_norm": 0.9378911852836609, "learning_rate": 4.8561047739754964e-05, "loss": 0.0302, "step": 3406 }, { "epoch": 1.4397972116603295, "grad_norm": 1.4078298807144165, "learning_rate": 4.856020278833967e-05, "loss": 0.0621, "step": 3408 }, { "epoch": 1.4406421630756232, "grad_norm": 0.6866748929023743, "learning_rate": 4.855935783692438e-05, "loss": 0.0289, "step": 3410 }, { "epoch": 1.4414871144909167, "grad_norm": 1.534067988395691, "learning_rate": 4.8558512885509086e-05, "loss": 0.0658, "step": 3412 }, { "epoch": 1.4423320659062104, "grad_norm": 1.2581876516342163, "learning_rate": 4.855766793409379e-05, "loss": 0.0338, "step": 3414 }, { "epoch": 1.443177017321504, "grad_norm": 0.8533525466918945, "learning_rate": 4.855682298267849e-05, "loss": 0.0269, "step": 3416 }, { "epoch": 1.4440219687367977, "grad_norm": 1.2948267459869385, "learning_rate": 4.855597803126321e-05, "loss": 0.0476, "step": 3418 }, { "epoch": 1.4448669201520912, "grad_norm": 1.1799198389053345, "learning_rate": 4.855513307984791e-05, "loss": 0.0315, "step": 3420 }, { "epoch": 1.445711871567385, "grad_norm": 1.5496565103530884, "learning_rate": 4.8554288128432615e-05, "loss": 0.0595, "step": 3422 }, { "epoch": 1.4465568229826786, "grad_norm": 1.3309407234191895, "learning_rate": 4.855344317701732e-05, "loss": 0.0335, "step": 3424 }, { "epoch": 1.4474017743979721, "grad_norm": 0.614343523979187, "learning_rate": 4.855259822560203e-05, "loss": 0.0232, "step": 3426 }, { "epoch": 1.4482467258132656, "grad_norm": 1.4628639221191406, "learning_rate": 4.8551753274186736e-05, "loss": 0.0552, "step": 3428 }, { "epoch": 1.4490916772285594, "grad_norm": 1.2969770431518555, "learning_rate": 4.8550908322771444e-05, "loss": 0.058, "step": 3430 }, { "epoch": 1.449936628643853, "grad_norm": 1.3628487586975098, "learning_rate": 4.8550063371356144e-05, "loss": 0.0558, "step": 3432 }, { "epoch": 1.4507815800591466, "grad_norm": 1.4558742046356201, "learning_rate": 4.854921841994086e-05, "loss": 0.0619, "step": 3434 }, { "epoch": 1.45162653147444, "grad_norm": 1.390631079673767, "learning_rate": 4.854837346852556e-05, "loss": 0.0686, "step": 3436 }, { "epoch": 1.4524714828897338, "grad_norm": 1.0281144380569458, "learning_rate": 4.8547528517110266e-05, "loss": 0.039, "step": 3438 }, { "epoch": 1.4533164343050275, "grad_norm": 1.603379726409912, "learning_rate": 4.854668356569497e-05, "loss": 0.0877, "step": 3440 }, { "epoch": 1.454161385720321, "grad_norm": 0.7130106091499329, "learning_rate": 4.854583861427968e-05, "loss": 0.0424, "step": 3442 }, { "epoch": 1.4550063371356148, "grad_norm": 1.1394532918930054, "learning_rate": 4.854499366286439e-05, "loss": 0.0464, "step": 3444 }, { "epoch": 1.4558512885509083, "grad_norm": 1.2217916250228882, "learning_rate": 4.8544148711449094e-05, "loss": 0.0582, "step": 3446 }, { "epoch": 1.456696239966202, "grad_norm": 1.2456247806549072, "learning_rate": 4.8543303760033795e-05, "loss": 0.0477, "step": 3448 }, { "epoch": 1.4575411913814955, "grad_norm": 0.7696735858917236, "learning_rate": 4.854245880861851e-05, "loss": 0.0342, "step": 3450 }, { "epoch": 1.4583861427967892, "grad_norm": 1.327634334564209, "learning_rate": 4.854161385720321e-05, "loss": 0.0875, "step": 3452 }, { "epoch": 1.4592310942120827, "grad_norm": 1.5110286474227905, "learning_rate": 4.854076890578792e-05, "loss": 0.057, "step": 3454 }, { "epoch": 1.4600760456273765, "grad_norm": 1.0184332132339478, "learning_rate": 4.8539923954372624e-05, "loss": 0.0526, "step": 3456 }, { "epoch": 1.46092099704267, "grad_norm": 1.3125816583633423, "learning_rate": 4.853907900295733e-05, "loss": 0.0311, "step": 3458 }, { "epoch": 1.4617659484579637, "grad_norm": 1.00360906124115, "learning_rate": 4.853823405154204e-05, "loss": 0.0511, "step": 3460 }, { "epoch": 1.4626108998732572, "grad_norm": 0.9407820701599121, "learning_rate": 4.8537389100126745e-05, "loss": 0.041, "step": 3462 }, { "epoch": 1.463455851288551, "grad_norm": 1.2548154592514038, "learning_rate": 4.853654414871145e-05, "loss": 0.0588, "step": 3464 }, { "epoch": 1.4643008027038444, "grad_norm": 1.1931437253952026, "learning_rate": 4.853569919729616e-05, "loss": 0.0372, "step": 3466 }, { "epoch": 1.4651457541191382, "grad_norm": 1.3026175498962402, "learning_rate": 4.853485424588086e-05, "loss": 0.0482, "step": 3468 }, { "epoch": 1.465990705534432, "grad_norm": 1.1066519021987915, "learning_rate": 4.8534009294465574e-05, "loss": 0.0604, "step": 3470 }, { "epoch": 1.4668356569497254, "grad_norm": 1.1285970211029053, "learning_rate": 4.8533164343050274e-05, "loss": 0.0729, "step": 3472 }, { "epoch": 1.467680608365019, "grad_norm": 1.0288052558898926, "learning_rate": 4.853231939163498e-05, "loss": 0.0488, "step": 3474 }, { "epoch": 1.4685255597803126, "grad_norm": 1.7681716680526733, "learning_rate": 4.853147444021969e-05, "loss": 0.0572, "step": 3476 }, { "epoch": 1.4693705111956064, "grad_norm": 1.4394867420196533, "learning_rate": 4.8530629488804396e-05, "loss": 0.0669, "step": 3478 }, { "epoch": 1.4702154626108999, "grad_norm": 0.686235249042511, "learning_rate": 4.85297845373891e-05, "loss": 0.0246, "step": 3480 }, { "epoch": 1.4710604140261934, "grad_norm": 2.047348976135254, "learning_rate": 4.852893958597381e-05, "loss": 0.0798, "step": 3482 }, { "epoch": 1.471905365441487, "grad_norm": 1.1849197149276733, "learning_rate": 4.852809463455851e-05, "loss": 0.0487, "step": 3484 }, { "epoch": 1.4727503168567808, "grad_norm": 0.973675012588501, "learning_rate": 4.8527249683143225e-05, "loss": 0.0483, "step": 3486 }, { "epoch": 1.4735952682720743, "grad_norm": 1.173438549041748, "learning_rate": 4.8526404731727925e-05, "loss": 0.0497, "step": 3488 }, { "epoch": 1.474440219687368, "grad_norm": 0.9411981701850891, "learning_rate": 4.852555978031263e-05, "loss": 0.0599, "step": 3490 }, { "epoch": 1.4752851711026616, "grad_norm": 0.963691771030426, "learning_rate": 4.852471482889734e-05, "loss": 0.0372, "step": 3492 }, { "epoch": 1.4761301225179553, "grad_norm": 1.085005283355713, "learning_rate": 4.852386987748205e-05, "loss": 0.0612, "step": 3494 }, { "epoch": 1.4769750739332488, "grad_norm": 1.5302870273590088, "learning_rate": 4.8523024926066754e-05, "loss": 0.0519, "step": 3496 }, { "epoch": 1.4778200253485425, "grad_norm": 1.8927478790283203, "learning_rate": 4.852217997465146e-05, "loss": 0.0782, "step": 3498 }, { "epoch": 1.478664976763836, "grad_norm": 1.595861554145813, "learning_rate": 4.852133502323616e-05, "loss": 0.0766, "step": 3500 }, { "epoch": 1.478664976763836, "eval_accuracy": 0.7130539748232454, "eval_cer": 0.08314440219836446, "eval_loss": 0.18499918282032013, "eval_runtime": 847.7155, "eval_samples_per_second": 13.681, "eval_steps_per_second": 0.428, "step": 3500 }, { "epoch": 1.4795099281791297, "grad_norm": 2.2448182106018066, "learning_rate": 4.8520490071820876e-05, "loss": 0.0531, "step": 3502 }, { "epoch": 1.4803548795944232, "grad_norm": 1.2173892259597778, "learning_rate": 4.8519645120405576e-05, "loss": 0.0576, "step": 3504 }, { "epoch": 1.481199831009717, "grad_norm": 0.8814436793327332, "learning_rate": 4.851880016899028e-05, "loss": 0.0401, "step": 3506 }, { "epoch": 1.4820447824250107, "grad_norm": 1.2326481342315674, "learning_rate": 4.851795521757499e-05, "loss": 0.0289, "step": 3508 }, { "epoch": 1.4828897338403042, "grad_norm": 1.1329704523086548, "learning_rate": 4.85171102661597e-05, "loss": 0.0597, "step": 3510 }, { "epoch": 1.4837346852555977, "grad_norm": 1.6266759634017944, "learning_rate": 4.8516265314744405e-05, "loss": 0.0549, "step": 3512 }, { "epoch": 1.4845796366708914, "grad_norm": 0.7869406342506409, "learning_rate": 4.851542036332911e-05, "loss": 0.0411, "step": 3514 }, { "epoch": 1.4854245880861852, "grad_norm": 1.4602829217910767, "learning_rate": 4.851457541191381e-05, "loss": 0.0635, "step": 3516 }, { "epoch": 1.4862695395014787, "grad_norm": 1.53984534740448, "learning_rate": 4.8513730460498526e-05, "loss": 0.062, "step": 3518 }, { "epoch": 1.4871144909167722, "grad_norm": 1.5901530981063843, "learning_rate": 4.851288550908323e-05, "loss": 0.0504, "step": 3520 }, { "epoch": 1.487959442332066, "grad_norm": 1.6808077096939087, "learning_rate": 4.8512040557667934e-05, "loss": 0.0728, "step": 3522 }, { "epoch": 1.4888043937473596, "grad_norm": 1.1791267395019531, "learning_rate": 4.851119560625264e-05, "loss": 0.0568, "step": 3524 }, { "epoch": 1.4896493451626531, "grad_norm": 0.7604468464851379, "learning_rate": 4.851035065483735e-05, "loss": 0.0348, "step": 3526 }, { "epoch": 1.4904942965779466, "grad_norm": 0.5152177214622498, "learning_rate": 4.8509505703422056e-05, "loss": 0.0358, "step": 3528 }, { "epoch": 1.4913392479932404, "grad_norm": 1.253456473350525, "learning_rate": 4.850866075200676e-05, "loss": 0.0637, "step": 3530 }, { "epoch": 1.492184199408534, "grad_norm": 1.5630178451538086, "learning_rate": 4.850781580059146e-05, "loss": 0.0478, "step": 3532 }, { "epoch": 1.4930291508238276, "grad_norm": 0.7693706154823303, "learning_rate": 4.850697084917618e-05, "loss": 0.0295, "step": 3534 }, { "epoch": 1.4938741022391213, "grad_norm": 1.4552922248840332, "learning_rate": 4.850612589776088e-05, "loss": 0.0498, "step": 3536 }, { "epoch": 1.4947190536544148, "grad_norm": 1.1829502582550049, "learning_rate": 4.8505280946345585e-05, "loss": 0.036, "step": 3538 }, { "epoch": 1.4955640050697085, "grad_norm": 1.4774078130722046, "learning_rate": 4.850443599493029e-05, "loss": 0.0378, "step": 3540 }, { "epoch": 1.496408956485002, "grad_norm": 1.580325961112976, "learning_rate": 4.8503591043515e-05, "loss": 0.0629, "step": 3542 }, { "epoch": 1.4972539079002958, "grad_norm": 0.8769638538360596, "learning_rate": 4.8502746092099707e-05, "loss": 0.0229, "step": 3544 }, { "epoch": 1.4980988593155893, "grad_norm": 0.45959189534187317, "learning_rate": 4.8501901140684414e-05, "loss": 0.035, "step": 3546 }, { "epoch": 1.498943810730883, "grad_norm": 1.3305035829544067, "learning_rate": 4.8501056189269114e-05, "loss": 0.0617, "step": 3548 }, { "epoch": 1.4997887621461765, "grad_norm": 0.7923805713653564, "learning_rate": 4.850021123785383e-05, "loss": 0.0263, "step": 3550 }, { "epoch": 1.5006337135614702, "grad_norm": 1.4536417722702026, "learning_rate": 4.849936628643853e-05, "loss": 0.0513, "step": 3552 }, { "epoch": 1.501478664976764, "grad_norm": 1.329981803894043, "learning_rate": 4.849852133502324e-05, "loss": 0.0615, "step": 3554 }, { "epoch": 1.5023236163920575, "grad_norm": 0.9799890518188477, "learning_rate": 4.849767638360794e-05, "loss": 0.0483, "step": 3556 }, { "epoch": 1.503168567807351, "grad_norm": 0.7838258743286133, "learning_rate": 4.849683143219265e-05, "loss": 0.0378, "step": 3558 }, { "epoch": 1.5040135192226447, "grad_norm": 1.4174835681915283, "learning_rate": 4.849598648077736e-05, "loss": 0.057, "step": 3560 }, { "epoch": 1.5048584706379384, "grad_norm": 1.955910325050354, "learning_rate": 4.8495141529362065e-05, "loss": 0.0573, "step": 3562 }, { "epoch": 1.505703422053232, "grad_norm": 1.1473673582077026, "learning_rate": 4.849429657794677e-05, "loss": 0.0664, "step": 3564 }, { "epoch": 1.5065483734685254, "grad_norm": 1.1155494451522827, "learning_rate": 4.849345162653148e-05, "loss": 0.0519, "step": 3566 }, { "epoch": 1.5073933248838192, "grad_norm": 1.3899874687194824, "learning_rate": 4.849260667511618e-05, "loss": 0.0513, "step": 3568 }, { "epoch": 1.508238276299113, "grad_norm": 1.4755295515060425, "learning_rate": 4.849176172370089e-05, "loss": 0.0808, "step": 3570 }, { "epoch": 1.5090832277144064, "grad_norm": 2.025038242340088, "learning_rate": 4.8490916772285594e-05, "loss": 0.07, "step": 3572 }, { "epoch": 1.5099281791297, "grad_norm": 1.5562915802001953, "learning_rate": 4.84900718208703e-05, "loss": 0.0648, "step": 3574 }, { "epoch": 1.5107731305449936, "grad_norm": 1.214245319366455, "learning_rate": 4.848922686945501e-05, "loss": 0.031, "step": 3576 }, { "epoch": 1.5116180819602874, "grad_norm": 1.378103256225586, "learning_rate": 4.8488381918039715e-05, "loss": 0.0728, "step": 3578 }, { "epoch": 1.5124630333755809, "grad_norm": 1.2112326622009277, "learning_rate": 4.848753696662442e-05, "loss": 0.0583, "step": 3580 }, { "epoch": 1.5133079847908744, "grad_norm": 0.9792726039886475, "learning_rate": 4.848669201520913e-05, "loss": 0.0547, "step": 3582 }, { "epoch": 1.5141529362061683, "grad_norm": 0.9429163336753845, "learning_rate": 4.848584706379383e-05, "loss": 0.0405, "step": 3584 }, { "epoch": 1.5149978876214618, "grad_norm": 1.9372706413269043, "learning_rate": 4.8485002112378544e-05, "loss": 0.0825, "step": 3586 }, { "epoch": 1.5158428390367553, "grad_norm": 0.8863945007324219, "learning_rate": 4.8484157160963245e-05, "loss": 0.0645, "step": 3588 }, { "epoch": 1.516687790452049, "grad_norm": 0.756432831287384, "learning_rate": 4.848331220954795e-05, "loss": 0.0399, "step": 3590 }, { "epoch": 1.5175327418673428, "grad_norm": 0.6821669936180115, "learning_rate": 4.848246725813266e-05, "loss": 0.0479, "step": 3592 }, { "epoch": 1.5183776932826363, "grad_norm": 0.9661495089530945, "learning_rate": 4.8481622306717366e-05, "loss": 0.0469, "step": 3594 }, { "epoch": 1.5192226446979298, "grad_norm": 0.7377157807350159, "learning_rate": 4.848077735530207e-05, "loss": 0.0441, "step": 3596 }, { "epoch": 1.5200675961132235, "grad_norm": 1.2411776781082153, "learning_rate": 4.847993240388678e-05, "loss": 0.0629, "step": 3598 }, { "epoch": 1.5209125475285172, "grad_norm": 0.49103307723999023, "learning_rate": 4.847908745247148e-05, "loss": 0.0325, "step": 3600 }, { "epoch": 1.5209125475285172, "eval_accuracy": 0.7358165200896706, "eval_cer": 0.07696039944599804, "eval_loss": 0.17656931281089783, "eval_runtime": 856.6087, "eval_samples_per_second": 13.539, "eval_steps_per_second": 0.424, "step": 3600 }, { "epoch": 1.5217574989438107, "grad_norm": 1.3446134328842163, "learning_rate": 4.8478242501056195e-05, "loss": 0.0377, "step": 3602 }, { "epoch": 1.5226024503591042, "grad_norm": 1.5772300958633423, "learning_rate": 4.8477397549640895e-05, "loss": 0.0764, "step": 3604 }, { "epoch": 1.523447401774398, "grad_norm": 1.2345671653747559, "learning_rate": 4.84765525982256e-05, "loss": 0.0713, "step": 3606 }, { "epoch": 1.5242923531896917, "grad_norm": 0.7605233192443848, "learning_rate": 4.847570764681031e-05, "loss": 0.0466, "step": 3608 }, { "epoch": 1.5251373046049852, "grad_norm": 1.6644865274429321, "learning_rate": 4.847486269539502e-05, "loss": 0.0703, "step": 3610 }, { "epoch": 1.5259822560202787, "grad_norm": 1.4693437814712524, "learning_rate": 4.8474017743979724e-05, "loss": 0.041, "step": 3612 }, { "epoch": 1.5268272074355724, "grad_norm": 0.45182543992996216, "learning_rate": 4.847317279256443e-05, "loss": 0.0364, "step": 3614 }, { "epoch": 1.5276721588508662, "grad_norm": 0.8842322826385498, "learning_rate": 4.847232784114913e-05, "loss": 0.0299, "step": 3616 }, { "epoch": 1.5285171102661597, "grad_norm": 1.3502392768859863, "learning_rate": 4.8471482889733846e-05, "loss": 0.0492, "step": 3618 }, { "epoch": 1.5293620616814532, "grad_norm": 0.7896035313606262, "learning_rate": 4.8470637938318546e-05, "loss": 0.0429, "step": 3620 }, { "epoch": 1.530207013096747, "grad_norm": 0.8707165122032166, "learning_rate": 4.846979298690325e-05, "loss": 0.0199, "step": 3622 }, { "epoch": 1.5310519645120406, "grad_norm": 1.013792634010315, "learning_rate": 4.846894803548796e-05, "loss": 0.0599, "step": 3624 }, { "epoch": 1.5318969159273341, "grad_norm": 1.2326784133911133, "learning_rate": 4.846810308407267e-05, "loss": 0.05, "step": 3626 }, { "epoch": 1.5327418673426279, "grad_norm": 1.147342562675476, "learning_rate": 4.8467258132657375e-05, "loss": 0.0525, "step": 3628 }, { "epoch": 1.5335868187579216, "grad_norm": 1.6748418807983398, "learning_rate": 4.846641318124208e-05, "loss": 0.0703, "step": 3630 }, { "epoch": 1.534431770173215, "grad_norm": 0.6268454790115356, "learning_rate": 4.846556822982678e-05, "loss": 0.0372, "step": 3632 }, { "epoch": 1.5352767215885086, "grad_norm": 0.7244894504547119, "learning_rate": 4.8464723278411497e-05, "loss": 0.0441, "step": 3634 }, { "epoch": 1.5361216730038023, "grad_norm": 1.510006070137024, "learning_rate": 4.84638783269962e-05, "loss": 0.0542, "step": 3636 }, { "epoch": 1.536966624419096, "grad_norm": 1.2709475755691528, "learning_rate": 4.8463033375580904e-05, "loss": 0.0383, "step": 3638 }, { "epoch": 1.5378115758343895, "grad_norm": 1.0092389583587646, "learning_rate": 4.846218842416561e-05, "loss": 0.0452, "step": 3640 }, { "epoch": 1.538656527249683, "grad_norm": 1.1898479461669922, "learning_rate": 4.846134347275032e-05, "loss": 0.0454, "step": 3642 }, { "epoch": 1.5395014786649768, "grad_norm": 1.168468952178955, "learning_rate": 4.8460498521335026e-05, "loss": 0.0367, "step": 3644 }, { "epoch": 1.5403464300802705, "grad_norm": 0.954703152179718, "learning_rate": 4.845965356991973e-05, "loss": 0.0181, "step": 3646 }, { "epoch": 1.541191381495564, "grad_norm": 1.3118579387664795, "learning_rate": 4.845880861850443e-05, "loss": 0.0656, "step": 3648 }, { "epoch": 1.5420363329108575, "grad_norm": 0.525249719619751, "learning_rate": 4.845796366708915e-05, "loss": 0.0246, "step": 3650 }, { "epoch": 1.5428812843261512, "grad_norm": 1.0019251108169556, "learning_rate": 4.845711871567385e-05, "loss": 0.0469, "step": 3652 }, { "epoch": 1.543726235741445, "grad_norm": 1.4187920093536377, "learning_rate": 4.845627376425856e-05, "loss": 0.0472, "step": 3654 }, { "epoch": 1.5445711871567385, "grad_norm": 1.3827303647994995, "learning_rate": 4.845542881284326e-05, "loss": 0.0635, "step": 3656 }, { "epoch": 1.545416138572032, "grad_norm": 1.0358022451400757, "learning_rate": 4.845458386142797e-05, "loss": 0.0414, "step": 3658 }, { "epoch": 1.5462610899873257, "grad_norm": 1.110398530960083, "learning_rate": 4.8453738910012677e-05, "loss": 0.036, "step": 3660 }, { "epoch": 1.5471060414026194, "grad_norm": 0.48275184631347656, "learning_rate": 4.8452893958597384e-05, "loss": 0.0356, "step": 3662 }, { "epoch": 1.547950992817913, "grad_norm": 0.5242049694061279, "learning_rate": 4.845204900718209e-05, "loss": 0.0235, "step": 3664 }, { "epoch": 1.5487959442332064, "grad_norm": 1.1783398389816284, "learning_rate": 4.84512040557668e-05, "loss": 0.0572, "step": 3666 }, { "epoch": 1.5496408956485002, "grad_norm": 1.400747299194336, "learning_rate": 4.84503591043515e-05, "loss": 0.0575, "step": 3668 }, { "epoch": 1.5504858470637939, "grad_norm": 1.717510461807251, "learning_rate": 4.844951415293621e-05, "loss": 0.0668, "step": 3670 }, { "epoch": 1.5513307984790874, "grad_norm": 1.1266288757324219, "learning_rate": 4.844866920152091e-05, "loss": 0.0462, "step": 3672 }, { "epoch": 1.5521757498943811, "grad_norm": 0.7779021263122559, "learning_rate": 4.844782425010562e-05, "loss": 0.046, "step": 3674 }, { "epoch": 1.5530207013096748, "grad_norm": 1.299617886543274, "learning_rate": 4.844697929869033e-05, "loss": 0.0495, "step": 3676 }, { "epoch": 1.5538656527249683, "grad_norm": 0.6191155910491943, "learning_rate": 4.8446134347275035e-05, "loss": 0.0419, "step": 3678 }, { "epoch": 1.5547106041402619, "grad_norm": 1.5401053428649902, "learning_rate": 4.844528939585974e-05, "loss": 0.0492, "step": 3680 }, { "epoch": 1.5555555555555556, "grad_norm": 1.5327844619750977, "learning_rate": 4.844444444444445e-05, "loss": 0.0692, "step": 3682 }, { "epoch": 1.5564005069708493, "grad_norm": 0.7170135378837585, "learning_rate": 4.844359949302915e-05, "loss": 0.0327, "step": 3684 }, { "epoch": 1.5572454583861428, "grad_norm": 1.2383910417556763, "learning_rate": 4.844275454161386e-05, "loss": 0.0454, "step": 3686 }, { "epoch": 1.5580904098014363, "grad_norm": 1.0864990949630737, "learning_rate": 4.8441909590198564e-05, "loss": 0.0492, "step": 3688 }, { "epoch": 1.55893536121673, "grad_norm": 1.1290335655212402, "learning_rate": 4.844106463878327e-05, "loss": 0.0443, "step": 3690 }, { "epoch": 1.5597803126320238, "grad_norm": 1.9262590408325195, "learning_rate": 4.844021968736798e-05, "loss": 0.06, "step": 3692 }, { "epoch": 1.5606252640473173, "grad_norm": 1.2128050327301025, "learning_rate": 4.8439374735952685e-05, "loss": 0.0707, "step": 3694 }, { "epoch": 1.5614702154626108, "grad_norm": 0.8806398510932922, "learning_rate": 4.843852978453739e-05, "loss": 0.0451, "step": 3696 }, { "epoch": 1.5623151668779045, "grad_norm": 1.6122541427612305, "learning_rate": 4.84376848331221e-05, "loss": 0.0485, "step": 3698 }, { "epoch": 1.5631601182931982, "grad_norm": 1.153076171875, "learning_rate": 4.84368398817068e-05, "loss": 0.0568, "step": 3700 }, { "epoch": 1.5631601182931982, "eval_accuracy": 0.7448698051388171, "eval_cer": 0.07185264253641152, "eval_loss": 0.16689425706863403, "eval_runtime": 859.966, "eval_samples_per_second": 13.487, "eval_steps_per_second": 0.422, "step": 3700 }, { "epoch": 1.5640050697084917, "grad_norm": 1.530321478843689, "learning_rate": 4.8435994930291514e-05, "loss": 0.0513, "step": 3702 }, { "epoch": 1.5648500211237852, "grad_norm": 0.5067934989929199, "learning_rate": 4.8435149978876215e-05, "loss": 0.0333, "step": 3704 }, { "epoch": 1.565694972539079, "grad_norm": 0.6863754987716675, "learning_rate": 4.843430502746092e-05, "loss": 0.0331, "step": 3706 }, { "epoch": 1.5665399239543727, "grad_norm": 0.6762244701385498, "learning_rate": 4.843346007604563e-05, "loss": 0.0281, "step": 3708 }, { "epoch": 1.5673848753696662, "grad_norm": 0.8880215287208557, "learning_rate": 4.8432615124630336e-05, "loss": 0.029, "step": 3710 }, { "epoch": 1.5682298267849597, "grad_norm": 1.9256477355957031, "learning_rate": 4.843177017321504e-05, "loss": 0.0542, "step": 3712 }, { "epoch": 1.5690747782002537, "grad_norm": 1.291383147239685, "learning_rate": 4.843092522179975e-05, "loss": 0.0429, "step": 3714 }, { "epoch": 1.5699197296155472, "grad_norm": 0.8845472931861877, "learning_rate": 4.843008027038445e-05, "loss": 0.0295, "step": 3716 }, { "epoch": 1.5707646810308407, "grad_norm": 1.2028437852859497, "learning_rate": 4.8429235318969165e-05, "loss": 0.0485, "step": 3718 }, { "epoch": 1.5716096324461344, "grad_norm": 1.249176025390625, "learning_rate": 4.8428390367553865e-05, "loss": 0.0261, "step": 3720 }, { "epoch": 1.5724545838614281, "grad_norm": 1.1442838907241821, "learning_rate": 4.842754541613857e-05, "loss": 0.0597, "step": 3722 }, { "epoch": 1.5732995352767216, "grad_norm": 1.2954820394515991, "learning_rate": 4.842670046472328e-05, "loss": 0.047, "step": 3724 }, { "epoch": 1.5741444866920151, "grad_norm": 1.1261757612228394, "learning_rate": 4.842585551330799e-05, "loss": 0.0375, "step": 3726 }, { "epoch": 1.5749894381073088, "grad_norm": 0.8863781094551086, "learning_rate": 4.8425010561892694e-05, "loss": 0.0332, "step": 3728 }, { "epoch": 1.5758343895226026, "grad_norm": 0.660490095615387, "learning_rate": 4.84241656104774e-05, "loss": 0.0293, "step": 3730 }, { "epoch": 1.576679340937896, "grad_norm": 0.3680121600627899, "learning_rate": 4.84233206590621e-05, "loss": 0.0454, "step": 3732 }, { "epoch": 1.5775242923531896, "grad_norm": 1.0620887279510498, "learning_rate": 4.8422475707646816e-05, "loss": 0.0517, "step": 3734 }, { "epoch": 1.5783692437684833, "grad_norm": 0.967285692691803, "learning_rate": 4.8421630756231516e-05, "loss": 0.0304, "step": 3736 }, { "epoch": 1.579214195183777, "grad_norm": 1.02311372756958, "learning_rate": 4.842078580481622e-05, "loss": 0.0482, "step": 3738 }, { "epoch": 1.5800591465990705, "grad_norm": 1.0686031579971313, "learning_rate": 4.841994085340093e-05, "loss": 0.0298, "step": 3740 }, { "epoch": 1.580904098014364, "grad_norm": 1.1448440551757812, "learning_rate": 4.841909590198564e-05, "loss": 0.0596, "step": 3742 }, { "epoch": 1.5817490494296578, "grad_norm": 0.899435818195343, "learning_rate": 4.8418250950570345e-05, "loss": 0.0393, "step": 3744 }, { "epoch": 1.5825940008449515, "grad_norm": 0.6797332167625427, "learning_rate": 4.841740599915505e-05, "loss": 0.0286, "step": 3746 }, { "epoch": 1.583438952260245, "grad_norm": 1.0120704174041748, "learning_rate": 4.841656104773975e-05, "loss": 0.0597, "step": 3748 }, { "epoch": 1.5842839036755385, "grad_norm": 1.1370409727096558, "learning_rate": 4.8415716096324467e-05, "loss": 0.0551, "step": 3750 }, { "epoch": 1.5851288550908322, "grad_norm": 1.4799180030822754, "learning_rate": 4.841487114490917e-05, "loss": 0.03, "step": 3752 }, { "epoch": 1.585973806506126, "grad_norm": 1.0635912418365479, "learning_rate": 4.841402619349388e-05, "loss": 0.0513, "step": 3754 }, { "epoch": 1.5868187579214195, "grad_norm": 0.6243870854377747, "learning_rate": 4.841318124207858e-05, "loss": 0.0308, "step": 3756 }, { "epoch": 1.5876637093367132, "grad_norm": 1.172440528869629, "learning_rate": 4.841233629066329e-05, "loss": 0.0428, "step": 3758 }, { "epoch": 1.588508660752007, "grad_norm": 1.209961175918579, "learning_rate": 4.8411491339247996e-05, "loss": 0.0458, "step": 3760 }, { "epoch": 1.5893536121673004, "grad_norm": 1.1312841176986694, "learning_rate": 4.84106463878327e-05, "loss": 0.054, "step": 3762 }, { "epoch": 1.590198563582594, "grad_norm": 0.6913443207740784, "learning_rate": 4.840980143641741e-05, "loss": 0.0403, "step": 3764 }, { "epoch": 1.5910435149978877, "grad_norm": 1.1605889797210693, "learning_rate": 4.840895648500212e-05, "loss": 0.0479, "step": 3766 }, { "epoch": 1.5918884664131814, "grad_norm": 0.7447473406791687, "learning_rate": 4.840811153358682e-05, "loss": 0.0268, "step": 3768 }, { "epoch": 1.5927334178284749, "grad_norm": 0.9783474206924438, "learning_rate": 4.840726658217153e-05, "loss": 0.0465, "step": 3770 }, { "epoch": 1.5935783692437684, "grad_norm": 1.1488279104232788, "learning_rate": 4.840642163075623e-05, "loss": 0.0517, "step": 3772 }, { "epoch": 1.5944233206590621, "grad_norm": 0.8444589376449585, "learning_rate": 4.840557667934094e-05, "loss": 0.0339, "step": 3774 }, { "epoch": 1.5952682720743558, "grad_norm": 1.1645581722259521, "learning_rate": 4.8404731727925647e-05, "loss": 0.041, "step": 3776 }, { "epoch": 1.5961132234896493, "grad_norm": 1.6231311559677124, "learning_rate": 4.8403886776510354e-05, "loss": 0.054, "step": 3778 }, { "epoch": 1.5969581749049429, "grad_norm": 0.9748203754425049, "learning_rate": 4.840304182509506e-05, "loss": 0.0428, "step": 3780 }, { "epoch": 1.5978031263202366, "grad_norm": 1.2018306255340576, "learning_rate": 4.840219687367977e-05, "loss": 0.0527, "step": 3782 }, { "epoch": 1.5986480777355303, "grad_norm": 1.094373345375061, "learning_rate": 4.840135192226447e-05, "loss": 0.0628, "step": 3784 }, { "epoch": 1.5994930291508238, "grad_norm": 0.814469575881958, "learning_rate": 4.840050697084918e-05, "loss": 0.0224, "step": 3786 }, { "epoch": 1.6003379805661173, "grad_norm": 0.8022437691688538, "learning_rate": 4.839966201943388e-05, "loss": 0.0482, "step": 3788 }, { "epoch": 1.601182931981411, "grad_norm": 1.218186855316162, "learning_rate": 4.839881706801859e-05, "loss": 0.0675, "step": 3790 }, { "epoch": 1.6020278833967048, "grad_norm": 1.626511573791504, "learning_rate": 4.83979721166033e-05, "loss": 0.0712, "step": 3792 }, { "epoch": 1.6028728348119983, "grad_norm": 1.63881516456604, "learning_rate": 4.8397127165188005e-05, "loss": 0.0836, "step": 3794 }, { "epoch": 1.6037177862272918, "grad_norm": 1.0243483781814575, "learning_rate": 4.839628221377271e-05, "loss": 0.0255, "step": 3796 }, { "epoch": 1.6045627376425855, "grad_norm": 1.4431846141815186, "learning_rate": 4.839543726235742e-05, "loss": 0.0573, "step": 3798 }, { "epoch": 1.6054076890578792, "grad_norm": 1.132359266281128, "learning_rate": 4.839459231094212e-05, "loss": 0.0403, "step": 3800 }, { "epoch": 1.6054076890578792, "eval_accuracy": 0.7347818589411967, "eval_cer": 0.075275457183941, "eval_loss": 0.17756511270999908, "eval_runtime": 841.6089, "eval_samples_per_second": 13.781, "eval_steps_per_second": 0.431, "step": 3800 }, { "epoch": 1.6062526404731727, "grad_norm": 1.0357937812805176, "learning_rate": 4.839374735952683e-05, "loss": 0.0542, "step": 3802 }, { "epoch": 1.6070975918884665, "grad_norm": 0.7196958661079407, "learning_rate": 4.8392902408111534e-05, "loss": 0.0331, "step": 3804 }, { "epoch": 1.6079425433037602, "grad_norm": 0.6513099670410156, "learning_rate": 4.839205745669624e-05, "loss": 0.0424, "step": 3806 }, { "epoch": 1.6087874947190537, "grad_norm": 1.5637171268463135, "learning_rate": 4.839121250528095e-05, "loss": 0.0593, "step": 3808 }, { "epoch": 1.6096324461343472, "grad_norm": 0.7417770028114319, "learning_rate": 4.8390367553865655e-05, "loss": 0.0378, "step": 3810 }, { "epoch": 1.610477397549641, "grad_norm": 0.97124183177948, "learning_rate": 4.838952260245036e-05, "loss": 0.0405, "step": 3812 }, { "epoch": 1.6113223489649346, "grad_norm": 1.1514919996261597, "learning_rate": 4.838867765103507e-05, "loss": 0.0416, "step": 3814 }, { "epoch": 1.6121673003802282, "grad_norm": 1.0334582328796387, "learning_rate": 4.838783269961977e-05, "loss": 0.0359, "step": 3816 }, { "epoch": 1.6130122517955217, "grad_norm": 0.6415423154830933, "learning_rate": 4.8386987748204484e-05, "loss": 0.0269, "step": 3818 }, { "epoch": 1.6138572032108154, "grad_norm": 0.9257076382637024, "learning_rate": 4.8386142796789185e-05, "loss": 0.0433, "step": 3820 }, { "epoch": 1.614702154626109, "grad_norm": 1.0483789443969727, "learning_rate": 4.838529784537389e-05, "loss": 0.0402, "step": 3822 }, { "epoch": 1.6155471060414026, "grad_norm": 1.224219799041748, "learning_rate": 4.83844528939586e-05, "loss": 0.0515, "step": 3824 }, { "epoch": 1.6163920574566961, "grad_norm": 2.190443992614746, "learning_rate": 4.8383607942543306e-05, "loss": 0.0708, "step": 3826 }, { "epoch": 1.6172370088719898, "grad_norm": 0.5145219564437866, "learning_rate": 4.838276299112801e-05, "loss": 0.0281, "step": 3828 }, { "epoch": 1.6180819602872836, "grad_norm": 0.6071678400039673, "learning_rate": 4.838191803971272e-05, "loss": 0.0281, "step": 3830 }, { "epoch": 1.618926911702577, "grad_norm": 1.6799170970916748, "learning_rate": 4.838107308829742e-05, "loss": 0.0694, "step": 3832 }, { "epoch": 1.6197718631178706, "grad_norm": 1.0800015926361084, "learning_rate": 4.8380228136882135e-05, "loss": 0.0589, "step": 3834 }, { "epoch": 1.6206168145331643, "grad_norm": 0.9297267198562622, "learning_rate": 4.8379383185466835e-05, "loss": 0.0276, "step": 3836 }, { "epoch": 1.621461765948458, "grad_norm": 0.6874012351036072, "learning_rate": 4.837853823405154e-05, "loss": 0.0321, "step": 3838 }, { "epoch": 1.6223067173637515, "grad_norm": 0.8664073944091797, "learning_rate": 4.837769328263625e-05, "loss": 0.0322, "step": 3840 }, { "epoch": 1.623151668779045, "grad_norm": 1.3340883255004883, "learning_rate": 4.837684833122096e-05, "loss": 0.0442, "step": 3842 }, { "epoch": 1.623996620194339, "grad_norm": 1.2647576332092285, "learning_rate": 4.8376003379805664e-05, "loss": 0.0795, "step": 3844 }, { "epoch": 1.6248415716096325, "grad_norm": 1.0526044368743896, "learning_rate": 4.837515842839037e-05, "loss": 0.0379, "step": 3846 }, { "epoch": 1.625686523024926, "grad_norm": 0.8544779419898987, "learning_rate": 4.837431347697507e-05, "loss": 0.0446, "step": 3848 }, { "epoch": 1.6265314744402197, "grad_norm": 1.369625449180603, "learning_rate": 4.8373468525559786e-05, "loss": 0.0624, "step": 3850 }, { "epoch": 1.6273764258555135, "grad_norm": 0.6502795219421387, "learning_rate": 4.8372623574144486e-05, "loss": 0.0625, "step": 3852 }, { "epoch": 1.628221377270807, "grad_norm": 1.5579420328140259, "learning_rate": 4.83717786227292e-05, "loss": 0.0634, "step": 3854 }, { "epoch": 1.6290663286861005, "grad_norm": 1.384822130203247, "learning_rate": 4.83709336713139e-05, "loss": 0.0428, "step": 3856 }, { "epoch": 1.6299112801013942, "grad_norm": 0.95893394947052, "learning_rate": 4.837008871989861e-05, "loss": 0.0487, "step": 3858 }, { "epoch": 1.630756231516688, "grad_norm": 1.7926594018936157, "learning_rate": 4.8369243768483315e-05, "loss": 0.056, "step": 3860 }, { "epoch": 1.6316011829319814, "grad_norm": 2.259760856628418, "learning_rate": 4.836839881706802e-05, "loss": 0.0356, "step": 3862 }, { "epoch": 1.632446134347275, "grad_norm": 0.7830055356025696, "learning_rate": 4.836755386565273e-05, "loss": 0.0371, "step": 3864 }, { "epoch": 1.6332910857625687, "grad_norm": 0.8463422060012817, "learning_rate": 4.8366708914237437e-05, "loss": 0.0532, "step": 3866 }, { "epoch": 1.6341360371778624, "grad_norm": 1.0259829759597778, "learning_rate": 4.836586396282214e-05, "loss": 0.0461, "step": 3868 }, { "epoch": 1.6349809885931559, "grad_norm": 0.7621318697929382, "learning_rate": 4.836501901140685e-05, "loss": 0.0515, "step": 3870 }, { "epoch": 1.6358259400084494, "grad_norm": 1.094347596168518, "learning_rate": 4.836417405999155e-05, "loss": 0.0504, "step": 3872 }, { "epoch": 1.6366708914237431, "grad_norm": 1.4083515405654907, "learning_rate": 4.836332910857626e-05, "loss": 0.0464, "step": 3874 }, { "epoch": 1.6375158428390368, "grad_norm": 1.221833348274231, "learning_rate": 4.8362484157160966e-05, "loss": 0.0393, "step": 3876 }, { "epoch": 1.6383607942543303, "grad_norm": 1.5514699220657349, "learning_rate": 4.836163920574567e-05, "loss": 0.0591, "step": 3878 }, { "epoch": 1.6392057456696238, "grad_norm": 1.4877634048461914, "learning_rate": 4.836079425433038e-05, "loss": 0.0546, "step": 3880 }, { "epoch": 1.6400506970849176, "grad_norm": 1.098713994026184, "learning_rate": 4.835994930291509e-05, "loss": 0.0385, "step": 3882 }, { "epoch": 1.6408956485002113, "grad_norm": 1.0514299869537354, "learning_rate": 4.835910435149979e-05, "loss": 0.0556, "step": 3884 }, { "epoch": 1.6417405999155048, "grad_norm": 0.7681018710136414, "learning_rate": 4.83582594000845e-05, "loss": 0.0355, "step": 3886 }, { "epoch": 1.6425855513307985, "grad_norm": 1.322800636291504, "learning_rate": 4.83574144486692e-05, "loss": 0.0453, "step": 3888 }, { "epoch": 1.6434305027460923, "grad_norm": 1.2197022438049316, "learning_rate": 4.835656949725391e-05, "loss": 0.0542, "step": 3890 }, { "epoch": 1.6442754541613858, "grad_norm": 1.9387176036834717, "learning_rate": 4.8355724545838617e-05, "loss": 0.06, "step": 3892 }, { "epoch": 1.6451204055766793, "grad_norm": 0.9585774540901184, "learning_rate": 4.8354879594423324e-05, "loss": 0.0847, "step": 3894 }, { "epoch": 1.645965356991973, "grad_norm": 0.7485412359237671, "learning_rate": 4.835403464300803e-05, "loss": 0.0278, "step": 3896 }, { "epoch": 1.6468103084072667, "grad_norm": 0.7363729476928711, "learning_rate": 4.835318969159274e-05, "loss": 0.0548, "step": 3898 }, { "epoch": 1.6476552598225602, "grad_norm": 2.1429085731506348, "learning_rate": 4.835234474017744e-05, "loss": 0.044, "step": 3900 }, { "epoch": 1.6476552598225602, "eval_accuracy": 0.7290050008622176, "eval_cer": 0.07787785491853172, "eval_loss": 0.17509505152702332, "eval_runtime": 862.0291, "eval_samples_per_second": 13.454, "eval_steps_per_second": 0.421, "step": 3900 }, { "epoch": 1.6485002112378537, "grad_norm": 1.3191771507263184, "learning_rate": 4.835149978876215e-05, "loss": 0.05, "step": 3902 }, { "epoch": 1.6493451626531475, "grad_norm": 1.1920831203460693, "learning_rate": 4.835065483734685e-05, "loss": 0.045, "step": 3904 }, { "epoch": 1.6501901140684412, "grad_norm": 0.7942538261413574, "learning_rate": 4.834980988593156e-05, "loss": 0.0335, "step": 3906 }, { "epoch": 1.6510350654837347, "grad_norm": 1.081873893737793, "learning_rate": 4.834896493451627e-05, "loss": 0.0959, "step": 3908 }, { "epoch": 1.6518800168990282, "grad_norm": 0.9521547555923462, "learning_rate": 4.8348119983100975e-05, "loss": 0.0277, "step": 3910 }, { "epoch": 1.652724968314322, "grad_norm": 1.2026457786560059, "learning_rate": 4.834727503168568e-05, "loss": 0.0523, "step": 3912 }, { "epoch": 1.6535699197296156, "grad_norm": 0.7098683714866638, "learning_rate": 4.834643008027039e-05, "loss": 0.0256, "step": 3914 }, { "epoch": 1.6544148711449091, "grad_norm": 1.0528465509414673, "learning_rate": 4.834558512885509e-05, "loss": 0.064, "step": 3916 }, { "epoch": 1.6552598225602027, "grad_norm": 1.4617663621902466, "learning_rate": 4.8344740177439803e-05, "loss": 0.0489, "step": 3918 }, { "epoch": 1.6561047739754964, "grad_norm": 1.6092969179153442, "learning_rate": 4.8343895226024504e-05, "loss": 0.0505, "step": 3920 }, { "epoch": 1.65694972539079, "grad_norm": 1.4073854684829712, "learning_rate": 4.834305027460921e-05, "loss": 0.0411, "step": 3922 }, { "epoch": 1.6577946768060836, "grad_norm": 1.2704623937606812, "learning_rate": 4.834220532319392e-05, "loss": 0.0713, "step": 3924 }, { "epoch": 1.6586396282213771, "grad_norm": 1.0156190395355225, "learning_rate": 4.8341360371778625e-05, "loss": 0.0335, "step": 3926 }, { "epoch": 1.6594845796366708, "grad_norm": 1.200533151626587, "learning_rate": 4.834051542036333e-05, "loss": 0.0465, "step": 3928 }, { "epoch": 1.6603295310519646, "grad_norm": 0.7413454651832581, "learning_rate": 4.833967046894804e-05, "loss": 0.044, "step": 3930 }, { "epoch": 1.661174482467258, "grad_norm": 1.4523422718048096, "learning_rate": 4.833882551753274e-05, "loss": 0.0612, "step": 3932 }, { "epoch": 1.6620194338825518, "grad_norm": 1.259691596031189, "learning_rate": 4.8337980566117454e-05, "loss": 0.061, "step": 3934 }, { "epoch": 1.6628643852978455, "grad_norm": 1.7233320474624634, "learning_rate": 4.8337135614702155e-05, "loss": 0.0653, "step": 3936 }, { "epoch": 1.663709336713139, "grad_norm": 1.2285038232803345, "learning_rate": 4.833629066328686e-05, "loss": 0.0703, "step": 3938 }, { "epoch": 1.6645542881284325, "grad_norm": 1.288918375968933, "learning_rate": 4.833544571187157e-05, "loss": 0.0618, "step": 3940 }, { "epoch": 1.6653992395437263, "grad_norm": 0.7824044823646545, "learning_rate": 4.8334600760456276e-05, "loss": 0.048, "step": 3942 }, { "epoch": 1.66624419095902, "grad_norm": 1.211120843887329, "learning_rate": 4.8333755809040983e-05, "loss": 0.0669, "step": 3944 }, { "epoch": 1.6670891423743135, "grad_norm": 2.0919220447540283, "learning_rate": 4.833291085762569e-05, "loss": 0.0803, "step": 3946 }, { "epoch": 1.667934093789607, "grad_norm": 0.8903474807739258, "learning_rate": 4.833206590621039e-05, "loss": 0.063, "step": 3948 }, { "epoch": 1.6687790452049007, "grad_norm": 1.258203148841858, "learning_rate": 4.8331220954795105e-05, "loss": 0.0331, "step": 3950 }, { "epoch": 1.6696239966201945, "grad_norm": 1.2711974382400513, "learning_rate": 4.8330376003379805e-05, "loss": 0.0414, "step": 3952 }, { "epoch": 1.670468948035488, "grad_norm": 0.8380125164985657, "learning_rate": 4.832953105196452e-05, "loss": 0.0445, "step": 3954 }, { "epoch": 1.6713138994507815, "grad_norm": 1.392994999885559, "learning_rate": 4.832868610054922e-05, "loss": 0.0605, "step": 3956 }, { "epoch": 1.6721588508660752, "grad_norm": 1.1180285215377808, "learning_rate": 4.832784114913393e-05, "loss": 0.0493, "step": 3958 }, { "epoch": 1.673003802281369, "grad_norm": 1.3766900300979614, "learning_rate": 4.8326996197718634e-05, "loss": 0.057, "step": 3960 }, { "epoch": 1.6738487536966624, "grad_norm": 1.6987857818603516, "learning_rate": 4.832615124630334e-05, "loss": 0.0476, "step": 3962 }, { "epoch": 1.674693705111956, "grad_norm": 1.4040523767471313, "learning_rate": 4.832530629488805e-05, "loss": 0.0397, "step": 3964 }, { "epoch": 1.6755386565272496, "grad_norm": 0.45623424649238586, "learning_rate": 4.8324461343472756e-05, "loss": 0.0455, "step": 3966 }, { "epoch": 1.6763836079425434, "grad_norm": 0.9928030967712402, "learning_rate": 4.8323616392057456e-05, "loss": 0.0448, "step": 3968 }, { "epoch": 1.6772285593578369, "grad_norm": 0.9238046407699585, "learning_rate": 4.832277144064217e-05, "loss": 0.0295, "step": 3970 }, { "epoch": 1.6780735107731304, "grad_norm": 0.2931238114833832, "learning_rate": 4.832192648922687e-05, "loss": 0.0247, "step": 3972 }, { "epoch": 1.6789184621884243, "grad_norm": 0.8757814168930054, "learning_rate": 4.832108153781158e-05, "loss": 0.0358, "step": 3974 }, { "epoch": 1.6797634136037178, "grad_norm": 0.8881898522377014, "learning_rate": 4.8320236586396285e-05, "loss": 0.0331, "step": 3976 }, { "epoch": 1.6806083650190113, "grad_norm": 0.7834998965263367, "learning_rate": 4.831939163498099e-05, "loss": 0.0407, "step": 3978 }, { "epoch": 1.681453316434305, "grad_norm": 1.1790554523468018, "learning_rate": 4.83185466835657e-05, "loss": 0.0513, "step": 3980 }, { "epoch": 1.6822982678495988, "grad_norm": 0.5270421504974365, "learning_rate": 4.8317701732150407e-05, "loss": 0.0395, "step": 3982 }, { "epoch": 1.6831432192648923, "grad_norm": 1.120709776878357, "learning_rate": 4.831685678073511e-05, "loss": 0.0394, "step": 3984 }, { "epoch": 1.6839881706801858, "grad_norm": 0.8393281102180481, "learning_rate": 4.831601182931982e-05, "loss": 0.0342, "step": 3986 }, { "epoch": 1.6848331220954795, "grad_norm": 0.6471826434135437, "learning_rate": 4.831516687790452e-05, "loss": 0.0257, "step": 3988 }, { "epoch": 1.6856780735107733, "grad_norm": 1.7182902097702026, "learning_rate": 4.831432192648923e-05, "loss": 0.0506, "step": 3990 }, { "epoch": 1.6865230249260668, "grad_norm": 0.7699361443519592, "learning_rate": 4.8313476975073936e-05, "loss": 0.0408, "step": 3992 }, { "epoch": 1.6873679763413603, "grad_norm": 0.8868207335472107, "learning_rate": 4.831263202365864e-05, "loss": 0.0359, "step": 3994 }, { "epoch": 1.688212927756654, "grad_norm": 1.3517881631851196, "learning_rate": 4.831178707224335e-05, "loss": 0.0439, "step": 3996 }, { "epoch": 1.6890578791719477, "grad_norm": 1.8134877681732178, "learning_rate": 4.831094212082806e-05, "loss": 0.0508, "step": 3998 }, { "epoch": 1.6899028305872412, "grad_norm": 0.8728620409965515, "learning_rate": 4.831009716941276e-05, "loss": 0.0638, "step": 4000 }, { "epoch": 1.6899028305872412, "eval_accuracy": 0.7381445076737368, "eval_cer": 0.07271716788552979, "eval_loss": 0.17253856360912323, "eval_runtime": 854.712, "eval_samples_per_second": 13.569, "eval_steps_per_second": 0.425, "step": 4000 } ], "logging_steps": 2, "max_steps": 118350, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.6614257405400023e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }