{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.65345170671973, "eval_steps": 1000, "global_step": 200000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019133629266799325, "grad_norm": 3.534731864929199, "learning_rate": 0.0002982, "loss": 2.0385, "step": 500 }, { "epoch": 0.03826725853359865, "grad_norm": 3.997960090637207, "learning_rate": 0.00029925413533834583, "loss": 1.5221, "step": 1000 }, { "epoch": 0.03826725853359865, "eval_loss": Infinity, "eval_runtime": 316.3189, "eval_samples_per_second": 22.199, "eval_steps_per_second": 2.776, "eval_wer": 0.9139914321980114, "step": 1000 }, { "epoch": 0.05740088780039798, "grad_norm": 2.7378287315368652, "learning_rate": 0.0002985022556390977, "loss": 1.4633, "step": 1500 }, { "epoch": 0.0765345170671973, "grad_norm": 3.522592782974243, "learning_rate": 0.0002977503759398496, "loss": 1.4078, "step": 2000 }, { "epoch": 0.0765345170671973, "eval_loss": Infinity, "eval_runtime": 316.2822, "eval_samples_per_second": 22.202, "eval_steps_per_second": 2.776, "eval_wer": 0.9065607150412524, "step": 2000 }, { "epoch": 0.09566814633399663, "grad_norm": 8.136958122253418, "learning_rate": 0.0002969984962406015, "loss": 1.3589, "step": 2500 }, { "epoch": 0.11480177560079596, "grad_norm": 2.9784021377563477, "learning_rate": 0.00029624661654135335, "loss": 1.336, "step": 3000 }, { "epoch": 0.11480177560079596, "eval_loss": Infinity, "eval_runtime": 315.7333, "eval_samples_per_second": 22.24, "eval_steps_per_second": 2.781, "eval_wer": 0.8949386503067485, "step": 3000 }, { "epoch": 0.13393540486759528, "grad_norm": 3.5435404777526855, "learning_rate": 0.0002954947368421052, "loss": 1.3258, "step": 3500 }, { "epoch": 0.1530690341343946, "grad_norm": 2.8903892040252686, "learning_rate": 0.0002947443609022556, "loss": 1.2887, "step": 4000 }, { "epoch": 0.1530690341343946, "eval_loss": Infinity, "eval_runtime": 316.2047, "eval_samples_per_second": 22.207, "eval_steps_per_second": 2.777, "eval_wer": 0.8745901205838799, "step": 4000 }, { "epoch": 0.17220266340119394, "grad_norm": 2.647843360900879, "learning_rate": 0.000293993984962406, "loss": 1.2713, "step": 4500 }, { "epoch": 0.19133629266799326, "grad_norm": 3.8844950199127197, "learning_rate": 0.00029324210526315787, "loss": 1.26, "step": 5000 }, { "epoch": 0.19133629266799326, "eval_loss": Infinity, "eval_runtime": 315.2217, "eval_samples_per_second": 22.276, "eval_steps_per_second": 2.785, "eval_wer": 0.8671197376771737, "step": 5000 }, { "epoch": 0.21046992193479258, "grad_norm": 3.8498334884643555, "learning_rate": 0.00029249022556390974, "loss": 1.231, "step": 5500 }, { "epoch": 0.22960355120159193, "grad_norm": 2.3221595287323, "learning_rate": 0.0002917383458646616, "loss": 1.2188, "step": 6000 }, { "epoch": 0.22960355120159193, "eval_loss": Infinity, "eval_runtime": 315.8377, "eval_samples_per_second": 22.233, "eval_steps_per_second": 2.78, "eval_wer": 0.8699756716733658, "step": 6000 }, { "epoch": 0.24873718046839124, "grad_norm": 2.9522998332977295, "learning_rate": 0.00029098646616541353, "loss": 1.1974, "step": 6500 }, { "epoch": 0.26787080973519056, "grad_norm": 5.0502214431762695, "learning_rate": 0.0002902345864661654, "loss": 1.1992, "step": 7000 }, { "epoch": 0.26787080973519056, "eval_loss": Infinity, "eval_runtime": 315.6699, "eval_samples_per_second": 22.245, "eval_steps_per_second": 2.781, "eval_wer": 0.8537920456949439, "step": 7000 }, { "epoch": 0.2870044390019899, "grad_norm": 3.480316400527954, "learning_rate": 0.00028948270676691727, "loss": 1.1786, "step": 7500 }, { "epoch": 0.3061380682687892, "grad_norm": 3.4583587646484375, "learning_rate": 0.00028873082706766913, "loss": 1.1773, "step": 8000 }, { "epoch": 0.3061380682687892, "eval_loss": Infinity, "eval_runtime": 315.6868, "eval_samples_per_second": 22.244, "eval_steps_per_second": 2.781, "eval_wer": 0.8329278612227629, "step": 8000 }, { "epoch": 0.32527169753558854, "grad_norm": 3.2121310234069824, "learning_rate": 0.00028797894736842106, "loss": 1.1475, "step": 8500 }, { "epoch": 0.3444053268023879, "grad_norm": 6.1687846183776855, "learning_rate": 0.0002872285714285714, "loss": 1.1602, "step": 9000 }, { "epoch": 0.3444053268023879, "eval_loss": Infinity, "eval_runtime": 315.9185, "eval_samples_per_second": 22.227, "eval_steps_per_second": 2.779, "eval_wer": 0.8200497144066003, "step": 9000 }, { "epoch": 0.3635389560691872, "grad_norm": 2.9368505477905273, "learning_rate": 0.0002864766917293233, "loss": 1.1233, "step": 9500 }, { "epoch": 0.3826725853359865, "grad_norm": 2.0912511348724365, "learning_rate": 0.00028572481203007513, "loss": 1.1128, "step": 10000 }, { "epoch": 0.3826725853359865, "eval_loss": Infinity, "eval_runtime": 316.2709, "eval_samples_per_second": 22.202, "eval_steps_per_second": 2.776, "eval_wer": 0.8059287074254284, "step": 10000 }, { "epoch": 0.40180621460278587, "grad_norm": 2.822795867919922, "learning_rate": 0.00028497293233082705, "loss": 1.096, "step": 10500 }, { "epoch": 0.42093984386958516, "grad_norm": 2.4663002490997314, "learning_rate": 0.0002842210526315789, "loss": 1.0893, "step": 11000 }, { "epoch": 0.42093984386958516, "eval_loss": Infinity, "eval_runtime": 317.2266, "eval_samples_per_second": 22.136, "eval_steps_per_second": 2.768, "eval_wer": 0.827678760313095, "step": 11000 }, { "epoch": 0.4400734731363845, "grad_norm": 1.9610426425933838, "learning_rate": 0.0002834691729323308, "loss": 1.0702, "step": 11500 }, { "epoch": 0.45920710240318385, "grad_norm": 3.893796682357788, "learning_rate": 0.00028271729323308266, "loss": 1.0809, "step": 12000 }, { "epoch": 0.45920710240318385, "eval_loss": Infinity, "eval_runtime": 316.2707, "eval_samples_per_second": 22.202, "eval_steps_per_second": 2.776, "eval_wer": 0.8028480008462027, "step": 12000 }, { "epoch": 0.47834073166998314, "grad_norm": 3.08317494392395, "learning_rate": 0.0002819669172932331, "loss": 1.0616, "step": 12500 }, { "epoch": 0.4974743609367825, "grad_norm": 1.9941602945327759, "learning_rate": 0.0002812150375939849, "loss": 1.0426, "step": 13000 }, { "epoch": 0.4974743609367825, "eval_loss": Infinity, "eval_runtime": 314.9265, "eval_samples_per_second": 22.297, "eval_steps_per_second": 2.788, "eval_wer": 0.8004416120160779, "step": 13000 }, { "epoch": 0.5166079902035818, "grad_norm": 2.075686454772949, "learning_rate": 0.0002804646616541353, "loss": 1.0316, "step": 13500 }, { "epoch": 0.5357416194703811, "grad_norm": 1.7053288221359253, "learning_rate": 0.0002797127819548872, "loss": 1.0202, "step": 14000 }, { "epoch": 0.5357416194703811, "eval_loss": Infinity, "eval_runtime": 315.6762, "eval_samples_per_second": 22.244, "eval_steps_per_second": 2.781, "eval_wer": 0.7824598053733869, "step": 14000 }, { "epoch": 0.5548752487371804, "grad_norm": 2.8614988327026367, "learning_rate": 0.00027896240601503757, "loss": 1.0398, "step": 14500 }, { "epoch": 0.5740088780039798, "grad_norm": 2.7677505016326904, "learning_rate": 0.0002782105263157895, "loss": 1.0005, "step": 15000 }, { "epoch": 0.5740088780039798, "eval_loss": Infinity, "eval_runtime": 316.6237, "eval_samples_per_second": 22.178, "eval_steps_per_second": 2.773, "eval_wer": 0.7785725618785699, "step": 15000 }, { "epoch": 0.5931425072707791, "grad_norm": 1.8297598361968994, "learning_rate": 0.0002774586466165413, "loss": 1.0069, "step": 15500 }, { "epoch": 0.6122761365375784, "grad_norm": 3.259901285171509, "learning_rate": 0.0002767067669172932, "loss": 0.9987, "step": 16000 }, { "epoch": 0.6122761365375784, "eval_loss": Infinity, "eval_runtime": 317.076, "eval_samples_per_second": 22.146, "eval_steps_per_second": 2.769, "eval_wer": 0.752102284747197, "step": 16000 }, { "epoch": 0.6314097658043778, "grad_norm": 2.1204450130462646, "learning_rate": 0.0002759548872180451, "loss": 0.9862, "step": 16500 }, { "epoch": 0.6505433950711771, "grad_norm": 4.308948993682861, "learning_rate": 0.00027520300751879696, "loss": 0.9705, "step": 17000 }, { "epoch": 0.6505433950711771, "eval_loss": Infinity, "eval_runtime": 316.4489, "eval_samples_per_second": 22.19, "eval_steps_per_second": 2.775, "eval_wer": 0.7592817854876243, "step": 17000 }, { "epoch": 0.6696770243379764, "grad_norm": 3.809417247772217, "learning_rate": 0.00027445112781954883, "loss": 0.9836, "step": 17500 }, { "epoch": 0.6888106536047758, "grad_norm": 8.3826904296875, "learning_rate": 0.00027369924812030075, "loss": 0.9884, "step": 18000 }, { "epoch": 0.6888106536047758, "eval_loss": Infinity, "eval_runtime": 317.2775, "eval_samples_per_second": 22.132, "eval_steps_per_second": 2.767, "eval_wer": 0.7380606092659192, "step": 18000 }, { "epoch": 0.7079442828715751, "grad_norm": 2.119809627532959, "learning_rate": 0.0002729473684210526, "loss": 0.9554, "step": 18500 }, { "epoch": 0.7270779121383744, "grad_norm": 2.714183807373047, "learning_rate": 0.0002721954887218045, "loss": 0.9618, "step": 19000 }, { "epoch": 0.7270779121383744, "eval_loss": Infinity, "eval_runtime": 316.8028, "eval_samples_per_second": 22.165, "eval_steps_per_second": 2.771, "eval_wer": 0.7364739792680347, "step": 19000 }, { "epoch": 0.7462115414051738, "grad_norm": 4.593650817871094, "learning_rate": 0.00027144360902255635, "loss": 0.9389, "step": 19500 }, { "epoch": 0.765345170671973, "grad_norm": 1.7419074773788452, "learning_rate": 0.0002706917293233083, "loss": 0.9374, "step": 20000 }, { "epoch": 0.765345170671973, "eval_loss": Infinity, "eval_runtime": 316.3558, "eval_samples_per_second": 22.197, "eval_steps_per_second": 2.775, "eval_wer": 0.7415644171779141, "step": 20000 }, { "epoch": 0.7844787999387723, "grad_norm": NaN, "learning_rate": 0.0002699413533834586, "loss": 0.9241, "step": 20500 }, { "epoch": 0.8036124292055717, "grad_norm": 2.104843854904175, "learning_rate": 0.0002691894736842105, "loss": 0.9175, "step": 21000 }, { "epoch": 0.8036124292055717, "eval_loss": Infinity, "eval_runtime": 315.3422, "eval_samples_per_second": 22.268, "eval_steps_per_second": 2.784, "eval_wer": 0.7299291305267611, "step": 21000 }, { "epoch": 0.822746058472371, "grad_norm": 3.1283345222473145, "learning_rate": 0.0002684390977443609, "loss": 0.9193, "step": 21500 }, { "epoch": 0.8418796877391703, "grad_norm": 3.1702754497528076, "learning_rate": 0.00026768721804511274, "loss": 0.9247, "step": 22000 }, { "epoch": 0.8418796877391703, "eval_loss": Infinity, "eval_runtime": 316.0504, "eval_samples_per_second": 22.218, "eval_steps_per_second": 2.778, "eval_wer": 0.7260154431986461, "step": 22000 }, { "epoch": 0.8610133170059697, "grad_norm": 4.537879943847656, "learning_rate": 0.00026693533834586466, "loss": 0.9154, "step": 22500 }, { "epoch": 0.880146946272769, "grad_norm": 3.9466328620910645, "learning_rate": 0.00026618345864661653, "loss": 0.9001, "step": 23000 }, { "epoch": 0.880146946272769, "eval_loss": Infinity, "eval_runtime": 315.881, "eval_samples_per_second": 22.23, "eval_steps_per_second": 2.78, "eval_wer": 0.72349005711868, "step": 23000 }, { "epoch": 0.8992805755395683, "grad_norm": 5.256113052368164, "learning_rate": 0.0002654315789473684, "loss": 0.9027, "step": 23500 }, { "epoch": 0.9184142048063677, "grad_norm": 2.1492791175842285, "learning_rate": 0.00026467969924812027, "loss": 0.8836, "step": 24000 }, { "epoch": 0.9184142048063677, "eval_loss": Infinity, "eval_runtime": 315.4361, "eval_samples_per_second": 22.261, "eval_steps_per_second": 2.783, "eval_wer": 0.7085757351385656, "step": 24000 }, { "epoch": 0.937547834073167, "grad_norm": 3.541306734085083, "learning_rate": 0.0002639278195488722, "loss": 0.8782, "step": 24500 }, { "epoch": 0.9566814633399663, "grad_norm": 1.7392828464508057, "learning_rate": 0.000263175939849624, "loss": 0.8789, "step": 25000 }, { "epoch": 0.9566814633399663, "eval_loss": Infinity, "eval_runtime": 316.1779, "eval_samples_per_second": 22.209, "eval_steps_per_second": 2.777, "eval_wer": 0.7144859318806854, "step": 25000 }, { "epoch": 0.9758150926067657, "grad_norm": 2.769277572631836, "learning_rate": 0.0002624240601503759, "loss": 0.8776, "step": 25500 }, { "epoch": 0.994948721873565, "grad_norm": 3.246126413345337, "learning_rate": 0.0002616721804511278, "loss": 0.8734, "step": 26000 }, { "epoch": 0.994948721873565, "eval_loss": Infinity, "eval_runtime": 315.5548, "eval_samples_per_second": 22.253, "eval_steps_per_second": 2.782, "eval_wer": 0.7195102602073197, "step": 26000 }, { "epoch": 1.0140823511403643, "grad_norm": 1.4256001710891724, "learning_rate": 0.00026092030075187966, "loss": 0.8471, "step": 26500 }, { "epoch": 1.0332159804071637, "grad_norm": 1.2950644493103027, "learning_rate": 0.00026016992481203005, "loss": 0.8398, "step": 27000 }, { "epoch": 1.0332159804071637, "eval_loss": Infinity, "eval_runtime": 314.7022, "eval_samples_per_second": 22.313, "eval_steps_per_second": 2.79, "eval_wer": 0.6837978633382695, "step": 27000 }, { "epoch": 1.0523496096739628, "grad_norm": 0.8874345421791077, "learning_rate": 0.0002594180451127819, "loss": 0.8403, "step": 27500 }, { "epoch": 1.0714832389407623, "grad_norm": 1.146148920059204, "learning_rate": 0.0002586661654135338, "loss": 0.8268, "step": 28000 }, { "epoch": 1.0714832389407623, "eval_loss": Infinity, "eval_runtime": 313.7884, "eval_samples_per_second": 22.378, "eval_steps_per_second": 2.798, "eval_wer": 0.6793288555108948, "step": 28000 }, { "epoch": 1.0906168682075617, "grad_norm": 0.687147319316864, "learning_rate": 0.0002579142857142857, "loss": 0.8228, "step": 28500 }, { "epoch": 1.1097504974743608, "grad_norm": 0.614025890827179, "learning_rate": 0.00025716390977443605, "loss": 0.8196, "step": 29000 }, { "epoch": 1.1097504974743608, "eval_loss": Infinity, "eval_runtime": 315.3179, "eval_samples_per_second": 22.27, "eval_steps_per_second": 2.784, "eval_wer": 0.66387243494817, "step": 29000 }, { "epoch": 1.1288841267411602, "grad_norm": 0.7900418043136597, "learning_rate": 0.0002564120300751879, "loss": 0.8262, "step": 29500 }, { "epoch": 1.1480177560079596, "grad_norm": 0.9414839148521423, "learning_rate": 0.00025566015037593984, "loss": 0.8124, "step": 30000 }, { "epoch": 1.1480177560079596, "eval_loss": Infinity, "eval_runtime": 314.1972, "eval_samples_per_second": 22.349, "eval_steps_per_second": 2.794, "eval_wer": 0.6615453776179395, "step": 30000 }, { "epoch": 1.1671513852747588, "grad_norm": 0.9619298577308655, "learning_rate": 0.0002549082706766917, "loss": 0.8052, "step": 30500 }, { "epoch": 1.1862850145415582, "grad_norm": 1.3762531280517578, "learning_rate": 0.0002541563909774436, "loss": 0.7935, "step": 31000 }, { "epoch": 1.1862850145415582, "eval_loss": Infinity, "eval_runtime": 315.3254, "eval_samples_per_second": 22.269, "eval_steps_per_second": 2.784, "eval_wer": 0.6607520626189972, "step": 31000 }, { "epoch": 1.2054186438083576, "grad_norm": 1.0670289993286133, "learning_rate": 0.00025340451127819544, "loss": 0.799, "step": 31500 }, { "epoch": 1.2245522730751568, "grad_norm": 1.5455262660980225, "learning_rate": 0.00025265263157894736, "loss": 0.817, "step": 32000 }, { "epoch": 1.2245522730751568, "eval_loss": Infinity, "eval_runtime": 314.3401, "eval_samples_per_second": 22.339, "eval_steps_per_second": 2.793, "eval_wer": 0.6709726041887032, "step": 32000 }, { "epoch": 1.2436859023419562, "grad_norm": 1.185735821723938, "learning_rate": 0.00025190075187969923, "loss": 0.7835, "step": 32500 }, { "epoch": 1.2628195316087556, "grad_norm": 0.8058122396469116, "learning_rate": 0.0002511503759398496, "loss": 0.7975, "step": 33000 }, { "epoch": 1.2628195316087556, "eval_loss": Infinity, "eval_runtime": 315.7071, "eval_samples_per_second": 22.242, "eval_steps_per_second": 2.781, "eval_wer": 0.66950497144066, "step": 33000 }, { "epoch": 1.2819531608755548, "grad_norm": 0.6225046515464783, "learning_rate": 0.0002503984962406015, "loss": 0.8023, "step": 33500 }, { "epoch": 1.3010867901423542, "grad_norm": 0.8266538381576538, "learning_rate": 0.00024964661654135336, "loss": 0.7746, "step": 34000 }, { "epoch": 1.3010867901423542, "eval_loss": Infinity, "eval_runtime": 316.3157, "eval_samples_per_second": 22.199, "eval_steps_per_second": 2.776, "eval_wer": 0.6674423524434102, "step": 34000 }, { "epoch": 1.3202204194091536, "grad_norm": 0.8520437479019165, "learning_rate": 0.00024889624060150375, "loss": 0.7828, "step": 34500 }, { "epoch": 1.3393540486759528, "grad_norm": 0.9565845131874084, "learning_rate": 0.0002481443609022556, "loss": 0.8013, "step": 35000 }, { "epoch": 1.3393540486759528, "eval_loss": Infinity, "eval_runtime": 315.1504, "eval_samples_per_second": 22.281, "eval_steps_per_second": 2.786, "eval_wer": 0.6586762217050983, "step": 35000 }, { "epoch": 1.3584876779427522, "grad_norm": 1.2239874601364136, "learning_rate": 0.0002473924812030075, "loss": 0.7765, "step": 35500 }, { "epoch": 1.3776213072095516, "grad_norm": 0.6564140319824219, "learning_rate": 0.00024664060150375935, "loss": 0.7703, "step": 36000 }, { "epoch": 1.3776213072095516, "eval_loss": Infinity, "eval_runtime": 316.0508, "eval_samples_per_second": 22.218, "eval_steps_per_second": 2.778, "eval_wer": 0.6388301248148932, "step": 36000 }, { "epoch": 1.3967549364763507, "grad_norm": 0.7060734629631042, "learning_rate": 0.0002458887218045113, "loss": 0.7685, "step": 36500 }, { "epoch": 1.4158885657431501, "grad_norm": 1.3065494298934937, "learning_rate": 0.00024513684210526314, "loss": 0.7581, "step": 37000 }, { "epoch": 1.4158885657431501, "eval_loss": Infinity, "eval_runtime": 315.8618, "eval_samples_per_second": 22.231, "eval_steps_per_second": 2.78, "eval_wer": 0.6461021789718637, "step": 37000 }, { "epoch": 1.4350221950099495, "grad_norm": 1.282378911972046, "learning_rate": 0.00024438646616541354, "loss": 0.7693, "step": 37500 }, { "epoch": 1.4541558242767487, "grad_norm": 1.1911076307296753, "learning_rate": 0.00024363458646616538, "loss": 0.7468, "step": 38000 }, { "epoch": 1.4541558242767487, "eval_loss": Infinity, "eval_runtime": 316.6111, "eval_samples_per_second": 22.179, "eval_steps_per_second": 2.773, "eval_wer": 0.6333959170721388, "step": 38000 }, { "epoch": 1.4732894535435481, "grad_norm": 0.7600271105766296, "learning_rate": 0.00024288270676691727, "loss": 0.7582, "step": 38500 }, { "epoch": 1.4924230828103475, "grad_norm": 0.6814852356910706, "learning_rate": 0.00024213082706766914, "loss": 0.7534, "step": 39000 }, { "epoch": 1.4924230828103475, "eval_loss": Infinity, "eval_runtime": 316.0864, "eval_samples_per_second": 22.215, "eval_steps_per_second": 2.778, "eval_wer": 0.6300639940765813, "step": 39000 }, { "epoch": 1.5115567120771467, "grad_norm": 0.7314792275428772, "learning_rate": 0.00024137894736842104, "loss": 0.7336, "step": 39500 }, { "epoch": 1.530690341343946, "grad_norm": 1.1860034465789795, "learning_rate": 0.0002406270676691729, "loss": 0.752, "step": 40000 }, { "epoch": 1.530690341343946, "eval_loss": Infinity, "eval_runtime": 318.462, "eval_samples_per_second": 22.05, "eval_steps_per_second": 2.757, "eval_wer": 0.6221969536704041, "step": 40000 }, { "epoch": 1.5498239706107455, "grad_norm": 1.3469467163085938, "learning_rate": 0.0002398766917293233, "loss": 0.7508, "step": 40500 }, { "epoch": 1.5689575998775447, "grad_norm": 0.7789831757545471, "learning_rate": 0.00023912481203007516, "loss": 0.736, "step": 41000 }, { "epoch": 1.5689575998775447, "eval_loss": Infinity, "eval_runtime": 316.8112, "eval_samples_per_second": 22.165, "eval_steps_per_second": 2.771, "eval_wer": 0.6203326634228897, "step": 41000 }, { "epoch": 1.588091229144344, "grad_norm": 1.3181277513504028, "learning_rate": 0.00023837443609022553, "loss": 0.752, "step": 41500 }, { "epoch": 1.6072248584111435, "grad_norm": 0.6017114520072937, "learning_rate": 0.00023762255639097742, "loss": 0.7188, "step": 42000 }, { "epoch": 1.6072248584111435, "eval_loss": Infinity, "eval_runtime": 315.1913, "eval_samples_per_second": 22.279, "eval_steps_per_second": 2.786, "eval_wer": 0.620795430505606, "step": 42000 }, { "epoch": 1.6263584876779427, "grad_norm": 0.7502321600914001, "learning_rate": 0.0002368706766917293, "loss": 0.7138, "step": 42500 }, { "epoch": 1.645492116944742, "grad_norm": 0.6769944429397583, "learning_rate": 0.00023611879699248119, "loss": 0.7308, "step": 43000 }, { "epoch": 1.645492116944742, "eval_loss": Infinity, "eval_runtime": 316.0035, "eval_samples_per_second": 22.221, "eval_steps_per_second": 2.778, "eval_wer": 0.6056695578591073, "step": 43000 }, { "epoch": 1.6646257462115415, "grad_norm": 1.050374984741211, "learning_rate": 0.00023536691729323305, "loss": 0.7145, "step": 43500 }, { "epoch": 1.6837593754783406, "grad_norm": 1.2912209033966064, "learning_rate": 0.00023461503759398495, "loss": 0.7179, "step": 44000 }, { "epoch": 1.6837593754783406, "eval_loss": Infinity, "eval_runtime": 315.6998, "eval_samples_per_second": 22.243, "eval_steps_per_second": 2.781, "eval_wer": 0.6291649037444468, "step": 44000 }, { "epoch": 1.70289300474514, "grad_norm": 0.8525875210762024, "learning_rate": 0.00023386315789473682, "loss": 0.7237, "step": 44500 }, { "epoch": 1.7220266340119394, "grad_norm": 1.1356332302093506, "learning_rate": 0.0002331112781954887, "loss": 0.7341, "step": 45000 }, { "epoch": 1.7220266340119394, "eval_loss": Infinity, "eval_runtime": 316.323, "eval_samples_per_second": 22.199, "eval_steps_per_second": 2.776, "eval_wer": 0.6034218320287709, "step": 45000 }, { "epoch": 1.7411602632787386, "grad_norm": 0.6900098323822021, "learning_rate": 0.00023235939849624058, "loss": 0.7166, "step": 45500 }, { "epoch": 1.760293892545538, "grad_norm": 0.7305801510810852, "learning_rate": 0.00023160902255639097, "loss": 0.7061, "step": 46000 }, { "epoch": 1.760293892545538, "eval_loss": Infinity, "eval_runtime": 315.9517, "eval_samples_per_second": 22.225, "eval_steps_per_second": 2.779, "eval_wer": 0.6136555955151258, "step": 46000 }, { "epoch": 1.7794275218123374, "grad_norm": 1.960204839706421, "learning_rate": 0.00023085714285714284, "loss": 0.688, "step": 46500 }, { "epoch": 1.7985611510791366, "grad_norm": 1.001535415649414, "learning_rate": 0.00023010526315789473, "loss": 0.7081, "step": 47000 }, { "epoch": 1.7985611510791366, "eval_loss": Infinity, "eval_runtime": 316.7057, "eval_samples_per_second": 22.172, "eval_steps_per_second": 2.772, "eval_wer": 0.6123334038502222, "step": 47000 }, { "epoch": 1.817694780345936, "grad_norm": 0.8360883593559265, "learning_rate": 0.0002293533834586466, "loss": 0.704, "step": 47500 }, { "epoch": 1.8368284096127354, "grad_norm": 1.349821925163269, "learning_rate": 0.0002286015037593985, "loss": 0.6957, "step": 48000 }, { "epoch": 1.8368284096127354, "eval_loss": Infinity, "eval_runtime": 316.3207, "eval_samples_per_second": 22.199, "eval_steps_per_second": 2.776, "eval_wer": 0.6053522318595304, "step": 48000 }, { "epoch": 1.8559620388795346, "grad_norm": 0.9751301407814026, "learning_rate": 0.00022785112781954886, "loss": 0.6974, "step": 48500 }, { "epoch": 1.875095668146334, "grad_norm": 0.7246661186218262, "learning_rate": 0.00022710075187969923, "loss": 0.7052, "step": 49000 }, { "epoch": 1.875095668146334, "eval_loss": Infinity, "eval_runtime": 315.447, "eval_samples_per_second": 22.26, "eval_steps_per_second": 2.783, "eval_wer": 0.616498307594669, "step": 49000 }, { "epoch": 1.8942292974131334, "grad_norm": 0.716491162776947, "learning_rate": 0.00022634887218045112, "loss": 0.6898, "step": 49500 }, { "epoch": 1.9133629266799326, "grad_norm": 0.7438942193984985, "learning_rate": 0.000225596992481203, "loss": 0.6833, "step": 50000 }, { "epoch": 1.9133629266799326, "eval_loss": Infinity, "eval_runtime": 316.1363, "eval_samples_per_second": 22.212, "eval_steps_per_second": 2.777, "eval_wer": 0.5887455045483393, "step": 50000 }, { "epoch": 1.932496555946732, "grad_norm": 0.8066436648368835, "learning_rate": 0.00022484511278195488, "loss": 0.6755, "step": 50500 }, { "epoch": 1.9516301852135314, "grad_norm": 0.6446587443351746, "learning_rate": 0.00022409323308270672, "loss": 0.6995, "step": 51000 }, { "epoch": 1.9516301852135314, "eval_loss": Infinity, "eval_runtime": 315.7016, "eval_samples_per_second": 22.243, "eval_steps_per_second": 2.781, "eval_wer": 0.5871456526338058, "step": 51000 }, { "epoch": 1.9707638144803306, "grad_norm": 0.8823833465576172, "learning_rate": 0.00022334135338345862, "loss": 0.6909, "step": 51500 }, { "epoch": 1.98989744374713, "grad_norm": 0.8129588961601257, "learning_rate": 0.0002225894736842105, "loss": 0.6703, "step": 52000 }, { "epoch": 1.98989744374713, "eval_loss": Infinity, "eval_runtime": 316.397, "eval_samples_per_second": 22.194, "eval_steps_per_second": 2.775, "eval_wer": 0.5954357943727523, "step": 52000 }, { "epoch": 2.0090310730139294, "grad_norm": 0.9364180564880371, "learning_rate": 0.0002218390977443609, "loss": 0.6462, "step": 52500 }, { "epoch": 2.0281647022807285, "grad_norm": 0.5429893732070923, "learning_rate": 0.00022108721804511275, "loss": 0.6265, "step": 53000 }, { "epoch": 2.0281647022807285, "eval_loss": Infinity, "eval_runtime": 315.6741, "eval_samples_per_second": 22.244, "eval_steps_per_second": 2.781, "eval_wer": 0.5791728368944362, "step": 53000 }, { "epoch": 2.0472983315475277, "grad_norm": 0.7964287996292114, "learning_rate": 0.00022033533834586464, "loss": 0.6283, "step": 53500 }, { "epoch": 2.0664319608143273, "grad_norm": 0.928997278213501, "learning_rate": 0.0002195834586466165, "loss": 0.633, "step": 54000 }, { "epoch": 2.0664319608143273, "eval_loss": Infinity, "eval_runtime": 316.8143, "eval_samples_per_second": 22.164, "eval_steps_per_second": 2.771, "eval_wer": 0.5696001692405331, "step": 54000 }, { "epoch": 2.0855655900811265, "grad_norm": 0.9358041882514954, "learning_rate": 0.0002188315789473684, "loss": 0.6608, "step": 54500 }, { "epoch": 2.1046992193479257, "grad_norm": 0.49673086404800415, "learning_rate": 0.00021807969924812027, "loss": 0.6399, "step": 55000 }, { "epoch": 2.1046992193479257, "eval_loss": Infinity, "eval_runtime": 316.4793, "eval_samples_per_second": 22.188, "eval_steps_per_second": 2.774, "eval_wer": 0.5717685635709753, "step": 55000 }, { "epoch": 2.1238328486147253, "grad_norm": 0.824380099773407, "learning_rate": 0.00021732932330827064, "loss": 0.6317, "step": 55500 }, { "epoch": 2.1429664778815245, "grad_norm": 0.784487783908844, "learning_rate": 0.00021657744360902253, "loss": 0.6165, "step": 56000 }, { "epoch": 2.1429664778815245, "eval_loss": Infinity, "eval_runtime": 314.9557, "eval_samples_per_second": 22.295, "eval_steps_per_second": 2.788, "eval_wer": 0.5836550666384599, "step": 56000 }, { "epoch": 2.1621001071483237, "grad_norm": 1.1931605339050293, "learning_rate": 0.0002158270676691729, "loss": 0.6268, "step": 56500 }, { "epoch": 2.1812337364151233, "grad_norm": 1.4376397132873535, "learning_rate": 0.0002150751879699248, "loss": 0.6148, "step": 57000 }, { "epoch": 2.1812337364151233, "eval_loss": Infinity, "eval_runtime": 314.0058, "eval_samples_per_second": 22.363, "eval_steps_per_second": 2.796, "eval_wer": 0.5597233975037021, "step": 57000 }, { "epoch": 2.2003673656819225, "grad_norm": 1.6363264322280884, "learning_rate": 0.00021432330827067666, "loss": 0.6377, "step": 57500 }, { "epoch": 2.2195009949487217, "grad_norm": 4.29092264175415, "learning_rate": 0.00021357142857142855, "loss": 0.6228, "step": 58000 }, { "epoch": 2.2195009949487217, "eval_loss": Infinity, "eval_runtime": 314.1699, "eval_samples_per_second": 22.351, "eval_steps_per_second": 2.795, "eval_wer": 0.5706843664057542, "step": 58000 }, { "epoch": 2.2386346242155213, "grad_norm": 1.1523572206497192, "learning_rate": 0.00021281954887218042, "loss": 0.6243, "step": 58500 }, { "epoch": 2.2577682534823205, "grad_norm": 2.0131170749664307, "learning_rate": 0.00021206766917293232, "loss": 0.6302, "step": 59000 }, { "epoch": 2.2577682534823205, "eval_loss": Infinity, "eval_runtime": 315.1147, "eval_samples_per_second": 22.284, "eval_steps_per_second": 2.786, "eval_wer": 0.5717685635709753, "step": 59000 }, { "epoch": 2.2769018827491196, "grad_norm": 1.358688235282898, "learning_rate": 0.00021131578947368419, "loss": 0.612, "step": 59500 }, { "epoch": 2.2960355120159193, "grad_norm": 0.5921105742454529, "learning_rate": 0.00021056390977443608, "loss": 0.6035, "step": 60000 }, { "epoch": 2.2960355120159193, "eval_loss": Infinity, "eval_runtime": 314.6822, "eval_samples_per_second": 22.315, "eval_steps_per_second": 2.79, "eval_wer": 0.5638883012481489, "step": 60000 }, { "epoch": 2.3151691412827184, "grad_norm": 0.600351095199585, "learning_rate": 0.00020981203007518795, "loss": 0.6157, "step": 60500 }, { "epoch": 2.3343027705495176, "grad_norm": 1.6874371767044067, "learning_rate": 0.00020906015037593984, "loss": 0.602, "step": 61000 }, { "epoch": 2.3343027705495176, "eval_loss": Infinity, "eval_runtime": 316.103, "eval_samples_per_second": 22.214, "eval_steps_per_second": 2.778, "eval_wer": 0.5633197588322403, "step": 61000 }, { "epoch": 2.3534363998163172, "grad_norm": 0.5364500284194946, "learning_rate": 0.0002083082706766917, "loss": 0.6057, "step": 61500 }, { "epoch": 2.3725700290831164, "grad_norm": 1.9607787132263184, "learning_rate": 0.0002075563909774436, "loss": 0.6023, "step": 62000 }, { "epoch": 2.3725700290831164, "eval_loss": Infinity, "eval_runtime": 314.4546, "eval_samples_per_second": 22.331, "eval_steps_per_second": 2.792, "eval_wer": 0.5581103236725196, "step": 62000 }, { "epoch": 2.3917036583499156, "grad_norm": 1.1173665523529053, "learning_rate": 0.00020680451127819547, "loss": 0.6097, "step": 62500 }, { "epoch": 2.4108372876167152, "grad_norm": 1.7585097551345825, "learning_rate": 0.00020605563909774434, "loss": 0.5924, "step": 63000 }, { "epoch": 2.4108372876167152, "eval_loss": Infinity, "eval_runtime": 314.7721, "eval_samples_per_second": 22.308, "eval_steps_per_second": 2.789, "eval_wer": 0.5511820393484239, "step": 63000 }, { "epoch": 2.4299709168835144, "grad_norm": 0.9437362551689148, "learning_rate": 0.00020530375939849623, "loss": 0.5968, "step": 63500 }, { "epoch": 2.4491045461503136, "grad_norm": 0.6077060103416443, "learning_rate": 0.0002045518796992481, "loss": 0.5969, "step": 64000 }, { "epoch": 2.4491045461503136, "eval_loss": Infinity, "eval_runtime": 314.4786, "eval_samples_per_second": 22.329, "eval_steps_per_second": 2.792, "eval_wer": 0.5489739792680347, "step": 64000 }, { "epoch": 2.468238175417113, "grad_norm": 1.9816350936889648, "learning_rate": 0.0002038, "loss": 0.5978, "step": 64500 }, { "epoch": 2.4873718046839124, "grad_norm": 1.865081787109375, "learning_rate": 0.00020304812030075186, "loss": 0.6029, "step": 65000 }, { "epoch": 2.4873718046839124, "eval_loss": Infinity, "eval_runtime": 314.4282, "eval_samples_per_second": 22.333, "eval_steps_per_second": 2.792, "eval_wer": 0.5444124180241168, "step": 65000 }, { "epoch": 2.5065054339507116, "grad_norm": 0.9471901059150696, "learning_rate": 0.00020229624060150376, "loss": 0.6068, "step": 65500 }, { "epoch": 2.525639063217511, "grad_norm": 1.5803519487380981, "learning_rate": 0.0002015443609022556, "loss": 0.6046, "step": 66000 }, { "epoch": 2.525639063217511, "eval_loss": Infinity, "eval_runtime": 316.3481, "eval_samples_per_second": 22.197, "eval_steps_per_second": 2.775, "eval_wer": 0.5460519356885974, "step": 66000 }, { "epoch": 2.5447726924843104, "grad_norm": 0.7890714406967163, "learning_rate": 0.0002007924812030075, "loss": 0.5865, "step": 66500 }, { "epoch": 2.5639063217511096, "grad_norm": 0.7467088103294373, "learning_rate": 0.00020004060150375936, "loss": 0.6095, "step": 67000 }, { "epoch": 2.5639063217511096, "eval_loss": Infinity, "eval_runtime": 316.5673, "eval_samples_per_second": 22.182, "eval_steps_per_second": 2.774, "eval_wer": 0.5476253437698329, "step": 67000 }, { "epoch": 2.583039951017909, "grad_norm": 0.7762987613677979, "learning_rate": 0.00019928872180451126, "loss": 0.5869, "step": 67500 }, { "epoch": 2.6021735802847084, "grad_norm": 0.5018890500068665, "learning_rate": 0.00019853684210526312, "loss": 0.598, "step": 68000 }, { "epoch": 2.6021735802847084, "eval_loss": Infinity, "eval_runtime": 316.3997, "eval_samples_per_second": 22.193, "eval_steps_per_second": 2.775, "eval_wer": 0.532129257457161, "step": 68000 }, { "epoch": 2.6213072095515075, "grad_norm": 1.0056216716766357, "learning_rate": 0.00019778496240601502, "loss": 0.5821, "step": 68500 }, { "epoch": 2.640440838818307, "grad_norm": 1.3957178592681885, "learning_rate": 0.00019703308270676689, "loss": 0.5812, "step": 69000 }, { "epoch": 2.640440838818307, "eval_loss": Infinity, "eval_runtime": 315.6807, "eval_samples_per_second": 22.244, "eval_steps_per_second": 2.781, "eval_wer": 0.5357388407023482, "step": 69000 }, { "epoch": 2.6595744680851063, "grad_norm": 0.6841593384742737, "learning_rate": 0.00019628120300751878, "loss": 0.5802, "step": 69500 }, { "epoch": 2.6787080973519055, "grad_norm": 0.8700592517852783, "learning_rate": 0.00019552932330827065, "loss": 0.5957, "step": 70000 }, { "epoch": 2.6787080973519055, "eval_loss": Infinity, "eval_runtime": 316.4344, "eval_samples_per_second": 22.191, "eval_steps_per_second": 2.775, "eval_wer": 0.5368230378675692, "step": 70000 }, { "epoch": 2.697841726618705, "grad_norm": 1.0066908597946167, "learning_rate": 0.00019477894736842104, "loss": 0.5817, "step": 70500 }, { "epoch": 2.7169753558855043, "grad_norm": 0.8253029584884644, "learning_rate": 0.0001940270676691729, "loss": 0.5909, "step": 71000 }, { "epoch": 2.7169753558855043, "eval_loss": Infinity, "eval_runtime": 318.9129, "eval_samples_per_second": 22.019, "eval_steps_per_second": 2.753, "eval_wer": 0.5239448910514068, "step": 71000 }, { "epoch": 2.7361089851523035, "grad_norm": 0.9649154543876648, "learning_rate": 0.0001932751879699248, "loss": 0.5877, "step": 71500 }, { "epoch": 2.755242614419103, "grad_norm": 1.6835025548934937, "learning_rate": 0.00019252330827067667, "loss": 0.5953, "step": 72000 }, { "epoch": 2.755242614419103, "eval_loss": Infinity, "eval_runtime": 316.62, "eval_samples_per_second": 22.178, "eval_steps_per_second": 2.773, "eval_wer": 0.5421911360270785, "step": 72000 }, { "epoch": 2.7743762436859023, "grad_norm": 1.4002177715301514, "learning_rate": 0.00019177443609022553, "loss": 0.5811, "step": 72500 }, { "epoch": 2.7935098729527015, "grad_norm": 1.1865595579147339, "learning_rate": 0.00019102255639097743, "loss": 0.5702, "step": 73000 }, { "epoch": 2.7935098729527015, "eval_loss": Infinity, "eval_runtime": 315.9208, "eval_samples_per_second": 22.227, "eval_steps_per_second": 2.779, "eval_wer": 0.5225698117199069, "step": 73000 }, { "epoch": 2.812643502219501, "grad_norm": 0.8575685620307922, "learning_rate": 0.0001902706766917293, "loss": 0.5806, "step": 73500 }, { "epoch": 2.8317771314863003, "grad_norm": 0.9127354025840759, "learning_rate": 0.0001895187969924812, "loss": 0.5755, "step": 74000 }, { "epoch": 2.8317771314863003, "eval_loss": Infinity, "eval_runtime": 315.601, "eval_samples_per_second": 22.25, "eval_steps_per_second": 2.782, "eval_wer": 0.5319441506240745, "step": 74000 }, { "epoch": 2.8509107607530995, "grad_norm": 1.893068552017212, "learning_rate": 0.00018876691729323306, "loss": 0.574, "step": 74500 }, { "epoch": 2.870044390019899, "grad_norm": 0.7603012323379517, "learning_rate": 0.00018801654135338345, "loss": 0.5659, "step": 75000 }, { "epoch": 2.870044390019899, "eval_loss": Infinity, "eval_runtime": 315.7507, "eval_samples_per_second": 22.239, "eval_steps_per_second": 2.781, "eval_wer": 0.5286783372117622, "step": 75000 }, { "epoch": 2.8891780192866983, "grad_norm": 1.636072039604187, "learning_rate": 0.00018726466165413532, "loss": 0.5645, "step": 75500 }, { "epoch": 2.9083116485534974, "grad_norm": 0.8112033605575562, "learning_rate": 0.0001865127819548872, "loss": 0.5581, "step": 76000 }, { "epoch": 2.9083116485534974, "eval_loss": Infinity, "eval_runtime": 315.6028, "eval_samples_per_second": 22.249, "eval_steps_per_second": 2.782, "eval_wer": 0.5277924687962767, "step": 76000 }, { "epoch": 2.927445277820297, "grad_norm": 0.8813944458961487, "learning_rate": 0.00018576090225563908, "loss": 0.573, "step": 76500 }, { "epoch": 2.9465789070870962, "grad_norm": 0.7851129174232483, "learning_rate": 0.00018501052631578945, "loss": 0.5786, "step": 77000 }, { "epoch": 2.9465789070870962, "eval_loss": Infinity, "eval_runtime": 316.8095, "eval_samples_per_second": 22.165, "eval_steps_per_second": 2.771, "eval_wer": 0.5194626613073832, "step": 77000 }, { "epoch": 2.9657125363538954, "grad_norm": 0.9913876056671143, "learning_rate": 0.00018425864661654134, "loss": 0.5783, "step": 77500 }, { "epoch": 2.984846165620695, "grad_norm": 0.9374109506607056, "learning_rate": 0.0001835067669172932, "loss": 0.5485, "step": 78000 }, { "epoch": 2.984846165620695, "eval_loss": Infinity, "eval_runtime": 316.6351, "eval_samples_per_second": 22.177, "eval_steps_per_second": 2.773, "eval_wer": 0.5255976306325365, "step": 78000 }, { "epoch": 3.0039797948874942, "grad_norm": 0.9412303566932678, "learning_rate": 0.0001827548872180451, "loss": 0.543, "step": 78500 }, { "epoch": 3.0231134241542934, "grad_norm": 0.9871559739112854, "learning_rate": 0.00018200300751879697, "loss": 0.5113, "step": 79000 }, { "epoch": 3.0231134241542934, "eval_loss": Infinity, "eval_runtime": 316.7492, "eval_samples_per_second": 22.169, "eval_steps_per_second": 2.772, "eval_wer": 0.5220277131372963, "step": 79000 }, { "epoch": 3.042247053421093, "grad_norm": 1.3727389574050903, "learning_rate": 0.00018125112781954887, "loss": 0.5075, "step": 79500 }, { "epoch": 3.061380682687892, "grad_norm": 1.3193981647491455, "learning_rate": 0.00018049924812030073, "loss": 0.4973, "step": 80000 }, { "epoch": 3.061380682687892, "eval_loss": Infinity, "eval_runtime": 315.9407, "eval_samples_per_second": 22.226, "eval_steps_per_second": 2.779, "eval_wer": 0.5146102178971864, "step": 80000 }, { "epoch": 3.0805143119546914, "grad_norm": 1.4254885911941528, "learning_rate": 0.00017974887218045113, "loss": 0.5063, "step": 80500 }, { "epoch": 3.099647941221491, "grad_norm": 1.1925376653671265, "learning_rate": 0.000178996992481203, "loss": 0.5085, "step": 81000 }, { "epoch": 3.099647941221491, "eval_loss": Infinity, "eval_runtime": 315.3194, "eval_samples_per_second": 22.269, "eval_steps_per_second": 2.784, "eval_wer": 0.5240903321345463, "step": 81000 }, { "epoch": 3.11878157048829, "grad_norm": 0.8363128304481506, "learning_rate": 0.00017824661654135339, "loss": 0.5263, "step": 81500 }, { "epoch": 3.1379151997550894, "grad_norm": 0.8608238697052002, "learning_rate": 0.00017749473684210525, "loss": 0.5111, "step": 82000 }, { "epoch": 3.1379151997550894, "eval_loss": Infinity, "eval_runtime": 316.6205, "eval_samples_per_second": 22.178, "eval_steps_per_second": 2.773, "eval_wer": 0.5104585360693886, "step": 82000 }, { "epoch": 3.157048829021889, "grad_norm": 0.7947099208831787, "learning_rate": 0.00017674285714285715, "loss": 0.496, "step": 82500 }, { "epoch": 3.176182458288688, "grad_norm": 0.629405677318573, "learning_rate": 0.00017599097744360902, "loss": 0.5047, "step": 83000 }, { "epoch": 3.176182458288688, "eval_loss": Infinity, "eval_runtime": 315.567, "eval_samples_per_second": 22.252, "eval_steps_per_second": 2.782, "eval_wer": 0.5117675058176433, "step": 83000 }, { "epoch": 3.1953160875554873, "grad_norm": 0.6956018209457397, "learning_rate": 0.0001752390977443609, "loss": 0.519, "step": 83500 }, { "epoch": 3.214449716822287, "grad_norm": 0.4928194284439087, "learning_rate": 0.00017448721804511275, "loss": 0.4994, "step": 84000 }, { "epoch": 3.214449716822287, "eval_loss": Infinity, "eval_runtime": 316.5623, "eval_samples_per_second": 22.182, "eval_steps_per_second": 2.774, "eval_wer": 0.49931246033425003, "step": 84000 }, { "epoch": 3.233583346089086, "grad_norm": 0.9308450222015381, "learning_rate": 0.00017373533834586465, "loss": 0.5108, "step": 84500 }, { "epoch": 3.2527169753558853, "grad_norm": 1.1794687509536743, "learning_rate": 0.00017298345864661652, "loss": 0.5077, "step": 85000 }, { "epoch": 3.2527169753558853, "eval_loss": Infinity, "eval_runtime": 316.1672, "eval_samples_per_second": 22.21, "eval_steps_per_second": 2.777, "eval_wer": 0.5099825470700232, "step": 85000 }, { "epoch": 3.271850604622685, "grad_norm": 0.6575067639350891, "learning_rate": 0.0001722315789473684, "loss": 0.5123, "step": 85500 }, { "epoch": 3.290984233889484, "grad_norm": 0.60300612449646, "learning_rate": 0.00017148120300751877, "loss": 0.5035, "step": 86000 }, { "epoch": 3.290984233889484, "eval_loss": Infinity, "eval_runtime": 316.8684, "eval_samples_per_second": 22.161, "eval_steps_per_second": 2.771, "eval_wer": 0.49292627459276495, "step": 86000 }, { "epoch": 3.3101178631562833, "grad_norm": 1.378197193145752, "learning_rate": 0.00017072932330827064, "loss": 0.5036, "step": 86500 }, { "epoch": 3.329251492423083, "grad_norm": 0.6758792996406555, "learning_rate": 0.00016997894736842103, "loss": 0.5045, "step": 87000 }, { "epoch": 3.329251492423083, "eval_loss": Infinity, "eval_runtime": 315.6861, "eval_samples_per_second": 22.244, "eval_steps_per_second": 2.781, "eval_wer": 0.5026311614131584, "step": 87000 }, { "epoch": 3.348385121689882, "grad_norm": 0.708972692489624, "learning_rate": 0.0001692270676691729, "loss": 0.5057, "step": 87500 }, { "epoch": 3.3675187509566813, "grad_norm": 1.0322130918502808, "learning_rate": 0.0001684751879699248, "loss": 0.4951, "step": 88000 }, { "epoch": 3.3675187509566813, "eval_loss": Infinity, "eval_runtime": 316.1978, "eval_samples_per_second": 22.208, "eval_steps_per_second": 2.777, "eval_wer": 0.49707795642056274, "step": 88000 }, { "epoch": 3.386652380223481, "grad_norm": 0.5131962299346924, "learning_rate": 0.00016772330827067667, "loss": 0.4925, "step": 88500 }, { "epoch": 3.40578600949028, "grad_norm": 0.7158399820327759, "learning_rate": 0.00016697142857142856, "loss": 0.4915, "step": 89000 }, { "epoch": 3.40578600949028, "eval_loss": Infinity, "eval_runtime": 319.2292, "eval_samples_per_second": 21.997, "eval_steps_per_second": 2.75, "eval_wer": 0.4984133700021155, "step": 89000 }, { "epoch": 3.4249196387570793, "grad_norm": 1.1134260892868042, "learning_rate": 0.00016621954887218043, "loss": 0.5056, "step": 89500 }, { "epoch": 3.444053268023879, "grad_norm": 0.7996990084648132, "learning_rate": 0.00016546766917293232, "loss": 0.4875, "step": 90000 }, { "epoch": 3.444053268023879, "eval_loss": Infinity, "eval_runtime": 316.288, "eval_samples_per_second": 22.201, "eval_steps_per_second": 2.776, "eval_wer": 0.49683996192088004, "step": 90000 }, { "epoch": 3.463186897290678, "grad_norm": 1.4016754627227783, "learning_rate": 0.0001647157894736842, "loss": 0.5019, "step": 90500 }, { "epoch": 3.4823205265574773, "grad_norm": 0.6514917016029358, "learning_rate": 0.00016396541353383458, "loss": 0.4964, "step": 91000 }, { "epoch": 3.4823205265574773, "eval_loss": Infinity, "eval_runtime": 316.9586, "eval_samples_per_second": 22.154, "eval_steps_per_second": 2.77, "eval_wer": 0.49888935900148085, "step": 91000 }, { "epoch": 3.501454155824277, "grad_norm": 0.3953873813152313, "learning_rate": 0.00016321353383458645, "loss": 0.4878, "step": 91500 }, { "epoch": 3.520587785091076, "grad_norm": 0.6485087871551514, "learning_rate": 0.00016246165413533832, "loss": 0.4767, "step": 92000 }, { "epoch": 3.520587785091076, "eval_loss": Infinity, "eval_runtime": 317.0491, "eval_samples_per_second": 22.148, "eval_steps_per_second": 2.769, "eval_wer": 0.4921594034271208, "step": 92000 }, { "epoch": 3.5397214143578752, "grad_norm": 0.649442732334137, "learning_rate": 0.00016170977443609021, "loss": 0.491, "step": 92500 }, { "epoch": 3.558855043624675, "grad_norm": 0.7617647051811218, "learning_rate": 0.00016095939849624058, "loss": 0.4765, "step": 93000 }, { "epoch": 3.558855043624675, "eval_loss": Infinity, "eval_runtime": 317.7633, "eval_samples_per_second": 22.098, "eval_steps_per_second": 2.763, "eval_wer": 0.48691030251745293, "step": 93000 }, { "epoch": 3.577988672891474, "grad_norm": 0.5942517518997192, "learning_rate": 0.00016020751879699247, "loss": 0.4805, "step": 93500 }, { "epoch": 3.597122302158273, "grad_norm": 0.9230866432189941, "learning_rate": 0.00015945563909774434, "loss": 0.4967, "step": 94000 }, { "epoch": 3.597122302158273, "eval_loss": Infinity, "eval_runtime": 317.3012, "eval_samples_per_second": 22.13, "eval_steps_per_second": 2.767, "eval_wer": 0.49814893166913476, "step": 94000 }, { "epoch": 3.616255931425073, "grad_norm": 0.601637065410614, "learning_rate": 0.00015870375939849624, "loss": 0.5055, "step": 94500 }, { "epoch": 3.635389560691872, "grad_norm": 0.6463965773582458, "learning_rate": 0.0001579518796992481, "loss": 0.4941, "step": 95000 }, { "epoch": 3.635389560691872, "eval_loss": Infinity, "eval_runtime": 318.5729, "eval_samples_per_second": 22.042, "eval_steps_per_second": 2.756, "eval_wer": 0.49616564417177916, "step": 95000 }, { "epoch": 3.654523189958671, "grad_norm": 0.8088521957397461, "learning_rate": 0.0001572, "loss": 0.4851, "step": 95500 }, { "epoch": 3.673656819225471, "grad_norm": 1.0758212804794312, "learning_rate": 0.00015644812030075187, "loss": 0.4808, "step": 96000 }, { "epoch": 3.673656819225471, "eval_loss": Infinity, "eval_runtime": 317.3887, "eval_samples_per_second": 22.124, "eval_steps_per_second": 2.766, "eval_wer": 0.4856277766024963, "step": 96000 }, { "epoch": 3.69279044849227, "grad_norm": 2.3342583179473877, "learning_rate": 0.00015569624060150376, "loss": 0.4682, "step": 96500 }, { "epoch": 3.711924077759069, "grad_norm": 0.9281033873558044, "learning_rate": 0.0001549443609022556, "loss": 0.4838, "step": 97000 }, { "epoch": 3.711924077759069, "eval_loss": Infinity, "eval_runtime": 317.8326, "eval_samples_per_second": 22.093, "eval_steps_per_second": 2.762, "eval_wer": 0.47491802411677597, "step": 97000 }, { "epoch": 3.731057707025869, "grad_norm": 0.6656193733215332, "learning_rate": 0.00015419248120300753, "loss": 0.4893, "step": 97500 }, { "epoch": 3.750191336292668, "grad_norm": 0.8286083936691284, "learning_rate": 0.0001534421052631579, "loss": 0.4644, "step": 98000 }, { "epoch": 3.750191336292668, "eval_loss": Infinity, "eval_runtime": 317.0552, "eval_samples_per_second": 22.148, "eval_steps_per_second": 2.769, "eval_wer": 0.4738073831182568, "step": 98000 }, { "epoch": 3.769324965559467, "grad_norm": 0.7517048716545105, "learning_rate": 0.00015269022556390978, "loss": 0.4828, "step": 98500 }, { "epoch": 3.788458594826267, "grad_norm": 1.126383662223816, "learning_rate": 0.00015193834586466163, "loss": 0.4818, "step": 99000 }, { "epoch": 3.788458594826267, "eval_loss": Infinity, "eval_runtime": 317.5174, "eval_samples_per_second": 22.115, "eval_steps_per_second": 2.765, "eval_wer": 0.47370160778506454, "step": 99000 }, { "epoch": 3.807592224093066, "grad_norm": 1.257995843887329, "learning_rate": 0.00015118646616541352, "loss": 0.485, "step": 99500 }, { "epoch": 3.826725853359865, "grad_norm": 1.0233116149902344, "learning_rate": 0.0001504345864661654, "loss": 0.4741, "step": 100000 }, { "epoch": 3.826725853359865, "eval_loss": Infinity, "eval_runtime": 317.1035, "eval_samples_per_second": 22.144, "eval_steps_per_second": 2.769, "eval_wer": 0.48123810027501585, "step": 100000 }, { "epoch": 3.8458594826266648, "grad_norm": 1.8550606966018677, "learning_rate": 0.00014968270676691728, "loss": 0.4763, "step": 100500 }, { "epoch": 3.864993111893464, "grad_norm": 0.8199677467346191, "learning_rate": 0.00014893082706766915, "loss": 0.4734, "step": 101000 }, { "epoch": 3.864993111893464, "eval_loss": Infinity, "eval_runtime": 317.4217, "eval_samples_per_second": 22.122, "eval_steps_per_second": 2.766, "eval_wer": 0.4772186376137085, "step": 101000 }, { "epoch": 3.884126741160263, "grad_norm": 0.5380846261978149, "learning_rate": 0.00014817894736842105, "loss": 0.4602, "step": 101500 }, { "epoch": 3.9032603704270628, "grad_norm": 1.026496410369873, "learning_rate": 0.00014742706766917291, "loss": 0.4733, "step": 102000 }, { "epoch": 3.9032603704270628, "eval_loss": Infinity, "eval_runtime": 317.6547, "eval_samples_per_second": 22.106, "eval_steps_per_second": 2.764, "eval_wer": 0.47355616670192513, "step": 102000 }, { "epoch": 3.922393999693862, "grad_norm": 1.4769624471664429, "learning_rate": 0.0001466766917293233, "loss": 0.477, "step": 102500 }, { "epoch": 3.941527628960661, "grad_norm": 1.2881931066513062, "learning_rate": 0.00014592481203007517, "loss": 0.4937, "step": 103000 }, { "epoch": 3.941527628960661, "eval_loss": Infinity, "eval_runtime": 317.1763, "eval_samples_per_second": 22.139, "eval_steps_per_second": 2.768, "eval_wer": 0.4694573725407235, "step": 103000 }, { "epoch": 3.9606612582274607, "grad_norm": 0.6641072630882263, "learning_rate": 0.00014517443609022554, "loss": 0.4728, "step": 103500 }, { "epoch": 3.97979488749426, "grad_norm": 0.7772675156593323, "learning_rate": 0.00014442255639097743, "loss": 0.4864, "step": 104000 }, { "epoch": 3.97979488749426, "eval_loss": Infinity, "eval_runtime": 316.8284, "eval_samples_per_second": 22.163, "eval_steps_per_second": 2.771, "eval_wer": 0.47485191453353076, "step": 104000 }, { "epoch": 3.998928516761059, "grad_norm": 0.4977366626262665, "learning_rate": 0.0001436706766917293, "loss": 0.4671, "step": 104500 }, { "epoch": 4.018062146027859, "grad_norm": 1.0952422618865967, "learning_rate": 0.00014291879699248117, "loss": 0.4126, "step": 105000 }, { "epoch": 4.018062146027859, "eval_loss": Infinity, "eval_runtime": 318.0988, "eval_samples_per_second": 22.075, "eval_steps_per_second": 2.76, "eval_wer": 0.4635207319653057, "step": 105000 }, { "epoch": 4.0371957752946575, "grad_norm": 0.694837212562561, "learning_rate": 0.00014216691729323306, "loss": 0.4202, "step": 105500 }, { "epoch": 4.056329404561457, "grad_norm": 0.7097035646438599, "learning_rate": 0.00014141503759398493, "loss": 0.4228, "step": 106000 }, { "epoch": 4.056329404561457, "eval_loss": Infinity, "eval_runtime": 315.7201, "eval_samples_per_second": 22.241, "eval_steps_per_second": 2.781, "eval_wer": 0.47007880262322826, "step": 106000 }, { "epoch": 4.075463033828257, "grad_norm": 1.1219637393951416, "learning_rate": 0.00014066315789473683, "loss": 0.4349, "step": 106500 }, { "epoch": 4.094596663095055, "grad_norm": 0.5968381762504578, "learning_rate": 0.0001399127819548872, "loss": 0.4098, "step": 107000 }, { "epoch": 4.094596663095055, "eval_loss": Infinity, "eval_runtime": 316.2686, "eval_samples_per_second": 22.203, "eval_steps_per_second": 2.776, "eval_wer": 0.45890628305479164, "step": 107000 }, { "epoch": 4.113730292361855, "grad_norm": 0.5920900106430054, "learning_rate": 0.0001391609022556391, "loss": 0.4182, "step": 107500 }, { "epoch": 4.132863921628655, "grad_norm": 0.45823031663894653, "learning_rate": 0.00013840902255639095, "loss": 0.4193, "step": 108000 }, { "epoch": 4.132863921628655, "eval_loss": Infinity, "eval_runtime": 316.9696, "eval_samples_per_second": 22.154, "eval_steps_per_second": 2.77, "eval_wer": 0.46152422255130104, "step": 108000 }, { "epoch": 4.151997550895453, "grad_norm": 0.6653383374214172, "learning_rate": 0.00013765714285714285, "loss": 0.4087, "step": 108500 }, { "epoch": 4.171131180162253, "grad_norm": 0.5999200940132141, "learning_rate": 0.00013690526315789472, "loss": 0.4083, "step": 109000 }, { "epoch": 4.171131180162253, "eval_loss": Infinity, "eval_runtime": 318.122, "eval_samples_per_second": 22.073, "eval_steps_per_second": 2.76, "eval_wer": 0.46397027713137295, "step": 109000 }, { "epoch": 4.190264809429053, "grad_norm": 0.5787246823310852, "learning_rate": 0.0001361533834586466, "loss": 0.4075, "step": 109500 }, { "epoch": 4.209398438695851, "grad_norm": 2.8409461975097656, "learning_rate": 0.00013540150375939848, "loss": 0.406, "step": 110000 }, { "epoch": 4.209398438695851, "eval_loss": Infinity, "eval_runtime": 316.8914, "eval_samples_per_second": 22.159, "eval_steps_per_second": 2.771, "eval_wer": 0.46135233763486355, "step": 110000 }, { "epoch": 4.228532067962651, "grad_norm": 0.8015612959861755, "learning_rate": 0.00013464962406015038, "loss": 0.4167, "step": 110500 }, { "epoch": 4.247665697229451, "grad_norm": 3.532646417617798, "learning_rate": 0.00013389774436090224, "loss": 0.4125, "step": 111000 }, { "epoch": 4.247665697229451, "eval_loss": Infinity, "eval_runtime": 316.906, "eval_samples_per_second": 22.158, "eval_steps_per_second": 2.771, "eval_wer": 0.4608366828855511, "step": 111000 }, { "epoch": 4.266799326496249, "grad_norm": 0.7524324059486389, "learning_rate": 0.00013314736842105264, "loss": 0.4205, "step": 111500 }, { "epoch": 4.285932955763049, "grad_norm": 2.5353856086730957, "learning_rate": 0.0001323954887218045, "loss": 0.4104, "step": 112000 }, { "epoch": 4.285932955763049, "eval_loss": Infinity, "eval_runtime": 318.4796, "eval_samples_per_second": 22.049, "eval_steps_per_second": 2.757, "eval_wer": 0.44868574148508567, "step": 112000 }, { "epoch": 4.305066585029849, "grad_norm": 0.9442459940910339, "learning_rate": 0.00013164360902255637, "loss": 0.4114, "step": 112500 }, { "epoch": 4.324200214296647, "grad_norm": 1.0531048774719238, "learning_rate": 0.00013089172932330827, "loss": 0.3988, "step": 113000 }, { "epoch": 4.324200214296647, "eval_loss": Infinity, "eval_runtime": 317.0914, "eval_samples_per_second": 22.145, "eval_steps_per_second": 2.769, "eval_wer": 0.45987148297017133, "step": 113000 }, { "epoch": 4.343333843563447, "grad_norm": 0.7277682423591614, "learning_rate": 0.00013013984962406013, "loss": 0.4164, "step": 113500 }, { "epoch": 4.362467472830247, "grad_norm": 0.48210740089416504, "learning_rate": 0.00012938796992481203, "loss": 0.4034, "step": 114000 }, { "epoch": 4.362467472830247, "eval_loss": Infinity, "eval_runtime": 318.084, "eval_samples_per_second": 22.076, "eval_steps_per_second": 2.76, "eval_wer": 0.45389517664480644, "step": 114000 }, { "epoch": 4.381601102097045, "grad_norm": 1.0025782585144043, "learning_rate": 0.0001286375939849624, "loss": 0.423, "step": 114500 }, { "epoch": 4.400734731363845, "grad_norm": 0.5586313009262085, "learning_rate": 0.00012788571428571426, "loss": 0.4023, "step": 115000 }, { "epoch": 4.400734731363845, "eval_loss": Infinity, "eval_runtime": 318.3491, "eval_samples_per_second": 22.058, "eval_steps_per_second": 2.758, "eval_wer": 0.4479585360693886, "step": 115000 }, { "epoch": 4.419868360630645, "grad_norm": 0.6510444283485413, "learning_rate": 0.00012713383458646616, "loss": 0.4041, "step": 115500 }, { "epoch": 4.439001989897443, "grad_norm": 0.6380518674850464, "learning_rate": 0.00012638195488721802, "loss": 0.4026, "step": 116000 }, { "epoch": 4.439001989897443, "eval_loss": Infinity, "eval_runtime": 318.4508, "eval_samples_per_second": 22.051, "eval_steps_per_second": 2.757, "eval_wer": 0.45242754389676326, "step": 116000 }, { "epoch": 4.458135619164243, "grad_norm": 0.7297781109809875, "learning_rate": 0.00012563157894736842, "loss": 0.4124, "step": 116500 }, { "epoch": 4.477269248431043, "grad_norm": 0.9323301911354065, "learning_rate": 0.00012487969924812028, "loss": 0.4182, "step": 117000 }, { "epoch": 4.477269248431043, "eval_loss": Infinity, "eval_runtime": 316.9248, "eval_samples_per_second": 22.157, "eval_steps_per_second": 2.77, "eval_wer": 0.44729744023693674, "step": 117000 }, { "epoch": 4.496402877697841, "grad_norm": 0.7702882289886475, "learning_rate": 0.00012412781954887218, "loss": 0.4137, "step": 117500 }, { "epoch": 4.515536506964641, "grad_norm": 0.45166295766830444, "learning_rate": 0.00012337593984962405, "loss": 0.4046, "step": 118000 }, { "epoch": 4.515536506964641, "eval_loss": Infinity, "eval_runtime": 313.8786, "eval_samples_per_second": 22.372, "eval_steps_per_second": 2.797, "eval_wer": 0.44563147873915804, "step": 118000 }, { "epoch": 4.5346701362314406, "grad_norm": 0.8825483322143555, "learning_rate": 0.00012262406015037594, "loss": 0.4001, "step": 118500 }, { "epoch": 4.553803765498239, "grad_norm": 0.5982456207275391, "learning_rate": 0.00012187218045112781, "loss": 0.4126, "step": 119000 }, { "epoch": 4.553803765498239, "eval_loss": Infinity, "eval_runtime": 313.2679, "eval_samples_per_second": 22.415, "eval_steps_per_second": 2.803, "eval_wer": 0.4406071504125238, "step": 119000 }, { "epoch": 4.572937394765039, "grad_norm": 0.49651646614074707, "learning_rate": 0.00012112030075187969, "loss": 0.3899, "step": 119500 }, { "epoch": 4.5920710240318385, "grad_norm": 0.6609870195388794, "learning_rate": 0.00012036842105263157, "loss": 0.4144, "step": 120000 }, { "epoch": 4.5920710240318385, "eval_loss": Infinity, "eval_runtime": 313.9675, "eval_samples_per_second": 22.365, "eval_steps_per_second": 2.796, "eval_wer": 0.44486460757351387, "step": 120000 }, { "epoch": 4.611204653298637, "grad_norm": 0.8339817523956299, "learning_rate": 0.00011961804511278195, "loss": 0.4067, "step": 120500 }, { "epoch": 4.630338282565437, "grad_norm": 0.683476984500885, "learning_rate": 0.00011886766917293232, "loss": 0.4074, "step": 121000 }, { "epoch": 4.630338282565437, "eval_loss": Infinity, "eval_runtime": 314.2537, "eval_samples_per_second": 22.345, "eval_steps_per_second": 2.794, "eval_wer": 0.44750899090332136, "step": 121000 }, { "epoch": 4.6494719118322365, "grad_norm": 0.6157557964324951, "learning_rate": 0.0001181172932330827, "loss": 0.3947, "step": 121500 }, { "epoch": 4.668605541099035, "grad_norm": 0.7728904485702515, "learning_rate": 0.00011736541353383457, "loss": 0.3922, "step": 122000 }, { "epoch": 4.668605541099035, "eval_loss": Infinity, "eval_runtime": 312.5029, "eval_samples_per_second": 22.47, "eval_steps_per_second": 2.81, "eval_wer": 0.4387693039983076, "step": 122000 }, { "epoch": 4.687739170365835, "grad_norm": 0.6580181121826172, "learning_rate": 0.00011661353383458646, "loss": 0.3969, "step": 122500 }, { "epoch": 4.7068727996326345, "grad_norm": 3.9013619422912598, "learning_rate": 0.00011586165413533834, "loss": 0.3866, "step": 123000 }, { "epoch": 4.7068727996326345, "eval_loss": Infinity, "eval_runtime": 312.7382, "eval_samples_per_second": 22.453, "eval_steps_per_second": 2.807, "eval_wer": 0.44741643748677806, "step": 123000 }, { "epoch": 4.726006428899433, "grad_norm": 0.7487606406211853, "learning_rate": 0.00011510977443609022, "loss": 0.4057, "step": 123500 }, { "epoch": 4.745140058166233, "grad_norm": 0.9207865595817566, "learning_rate": 0.0001143578947368421, "loss": 0.3873, "step": 124000 }, { "epoch": 4.745140058166233, "eval_loss": Infinity, "eval_runtime": 312.9132, "eval_samples_per_second": 22.441, "eval_steps_per_second": 2.806, "eval_wer": 0.4344986249206685, "step": 124000 }, { "epoch": 4.7642736874330325, "grad_norm": 0.6548067331314087, "learning_rate": 0.00011360601503759398, "loss": 0.4058, "step": 124500 }, { "epoch": 4.783407316699831, "grad_norm": 0.97373366355896, "learning_rate": 0.00011285413533834586, "loss": 0.3917, "step": 125000 }, { "epoch": 4.783407316699831, "eval_loss": Infinity, "eval_runtime": 313.7438, "eval_samples_per_second": 22.381, "eval_steps_per_second": 2.798, "eval_wer": 0.4338243071715676, "step": 125000 }, { "epoch": 4.802540945966631, "grad_norm": 0.7585910558700562, "learning_rate": 0.00011210375939849623, "loss": 0.4095, "step": 125500 }, { "epoch": 4.8216745752334305, "grad_norm": 1.3345550298690796, "learning_rate": 0.0001113533834586466, "loss": 0.3864, "step": 126000 }, { "epoch": 4.8216745752334305, "eval_loss": Infinity, "eval_runtime": 312.643, "eval_samples_per_second": 22.46, "eval_steps_per_second": 2.808, "eval_wer": 0.4350803892532262, "step": 126000 }, { "epoch": 4.840808204500229, "grad_norm": 0.6124061346054077, "learning_rate": 0.00011060150375939849, "loss": 0.3909, "step": 126500 }, { "epoch": 4.859941833767029, "grad_norm": 0.7190678119659424, "learning_rate": 0.00010984962406015037, "loss": 0.3826, "step": 127000 }, { "epoch": 4.859941833767029, "eval_loss": Infinity, "eval_runtime": 312.9649, "eval_samples_per_second": 22.437, "eval_steps_per_second": 2.805, "eval_wer": 0.430783266342289, "step": 127000 }, { "epoch": 4.879075463033828, "grad_norm": 0.9486848711967468, "learning_rate": 0.00010909774436090225, "loss": 0.3974, "step": 127500 }, { "epoch": 4.898209092300627, "grad_norm": 3.170478343963623, "learning_rate": 0.00010834736842105263, "loss": 0.391, "step": 128000 }, { "epoch": 4.898209092300627, "eval_loss": Infinity, "eval_runtime": 313.5327, "eval_samples_per_second": 22.396, "eval_steps_per_second": 2.8, "eval_wer": 0.431497249841337, "step": 128000 }, { "epoch": 4.917342721567427, "grad_norm": 0.462166428565979, "learning_rate": 0.000107596992481203, "loss": 0.3817, "step": 128500 }, { "epoch": 4.936476350834226, "grad_norm": 1.1014796495437622, "learning_rate": 0.00010684511278195487, "loss": 0.394, "step": 129000 }, { "epoch": 4.936476350834226, "eval_loss": Infinity, "eval_runtime": 313.0162, "eval_samples_per_second": 22.433, "eval_steps_per_second": 2.805, "eval_wer": 0.42787444467950075, "step": 129000 }, { "epoch": 4.955609980101025, "grad_norm": 0.624528706073761, "learning_rate": 0.00010609323308270676, "loss": 0.3821, "step": 129500 }, { "epoch": 4.974743609367825, "grad_norm": 0.5960122346878052, "learning_rate": 0.00010534135338345864, "loss": 0.3957, "step": 130000 }, { "epoch": 4.974743609367825, "eval_loss": Infinity, "eval_runtime": 313.106, "eval_samples_per_second": 22.427, "eval_steps_per_second": 2.804, "eval_wer": 0.4235112121853184, "step": 130000 }, { "epoch": 4.993877238634624, "grad_norm": 0.7740840911865234, "learning_rate": 0.0001045894736842105, "loss": 0.4012, "step": 130500 }, { "epoch": 5.013010867901423, "grad_norm": 0.39604371786117554, "learning_rate": 0.00010383759398496239, "loss": 0.3515, "step": 131000 }, { "epoch": 5.013010867901423, "eval_loss": Infinity, "eval_runtime": 312.7575, "eval_samples_per_second": 22.452, "eval_steps_per_second": 2.807, "eval_wer": 0.42159403427120795, "step": 131000 }, { "epoch": 5.032144497168223, "grad_norm": 0.582255482673645, "learning_rate": 0.00010308571428571427, "loss": 0.332, "step": 131500 }, { "epoch": 5.051278126435022, "grad_norm": 1.186954379081726, "learning_rate": 0.00010233383458646615, "loss": 0.3389, "step": 132000 }, { "epoch": 5.051278126435022, "eval_loss": Infinity, "eval_runtime": 312.5025, "eval_samples_per_second": 22.47, "eval_steps_per_second": 2.81, "eval_wer": 0.42548127776602496, "step": 132000 }, { "epoch": 5.070411755701821, "grad_norm": 0.7782790064811707, "learning_rate": 0.00010158195488721803, "loss": 0.3352, "step": 132500 }, { "epoch": 5.089545384968621, "grad_norm": 0.5987495183944702, "learning_rate": 0.00010083157894736841, "loss": 0.333, "step": 133000 }, { "epoch": 5.089545384968621, "eval_loss": Infinity, "eval_runtime": 514.1295, "eval_samples_per_second": 13.658, "eval_steps_per_second": 1.708, "eval_wer": 0.42526972709964034, "step": 133000 }, { "epoch": 5.10867901423542, "grad_norm": 0.714470624923706, "learning_rate": 0.0001000812030075188, "loss": 0.3334, "step": 133500 }, { "epoch": 5.127812643502219, "grad_norm": 0.554375171661377, "learning_rate": 9.932932330827067e-05, "loss": 0.3313, "step": 134000 }, { "epoch": 5.127812643502219, "eval_loss": Infinity, "eval_runtime": 313.7907, "eval_samples_per_second": 22.378, "eval_steps_per_second": 2.798, "eval_wer": 0.4178390099428813, "step": 134000 }, { "epoch": 5.146946272769019, "grad_norm": 0.8462457060813904, "learning_rate": 9.857744360902255e-05, "loss": 0.3391, "step": 134500 }, { "epoch": 5.166079902035818, "grad_norm": 0.5063708424568176, "learning_rate": 9.782556390977443e-05, "loss": 0.3351, "step": 135000 }, { "epoch": 5.166079902035818, "eval_loss": Infinity, "eval_runtime": 314.6864, "eval_samples_per_second": 22.314, "eval_steps_per_second": 2.79, "eval_wer": 0.422321239686905, "step": 135000 }, { "epoch": 5.185213531302617, "grad_norm": 0.5310961008071899, "learning_rate": 9.707368421052631e-05, "loss": 0.3435, "step": 135500 }, { "epoch": 5.204347160569417, "grad_norm": 0.7004749774932861, "learning_rate": 9.63218045112782e-05, "loss": 0.3262, "step": 136000 }, { "epoch": 5.204347160569417, "eval_loss": Infinity, "eval_runtime": 314.217, "eval_samples_per_second": 22.348, "eval_steps_per_second": 2.794, "eval_wer": 0.416318489528242, "step": 136000 }, { "epoch": 5.223480789836216, "grad_norm": 0.5730725526809692, "learning_rate": 9.556992481203008e-05, "loss": 0.333, "step": 136500 }, { "epoch": 5.242614419103015, "grad_norm": 1.6563917398452759, "learning_rate": 9.481804511278196e-05, "loss": 0.3333, "step": 137000 }, { "epoch": 5.242614419103015, "eval_loss": Infinity, "eval_runtime": 315.3953, "eval_samples_per_second": 22.264, "eval_steps_per_second": 2.784, "eval_wer": 0.42156759043790987, "step": 137000 }, { "epoch": 5.261748048369815, "grad_norm": 0.5844420790672302, "learning_rate": 9.406616541353384e-05, "loss": 0.3369, "step": 137500 }, { "epoch": 5.280881677636614, "grad_norm": 0.6321229934692383, "learning_rate": 9.331428571428571e-05, "loss": 0.3229, "step": 138000 }, { "epoch": 5.280881677636614, "eval_loss": Infinity, "eval_runtime": 315.3276, "eval_samples_per_second": 22.269, "eval_steps_per_second": 2.784, "eval_wer": 0.41326422678231434, "step": 138000 }, { "epoch": 5.300015306903413, "grad_norm": 0.6538165807723999, "learning_rate": 9.256240601503759e-05, "loss": 0.3382, "step": 138500 }, { "epoch": 5.319148936170213, "grad_norm": 0.7837240099906921, "learning_rate": 9.181052631578947e-05, "loss": 0.3345, "step": 139000 }, { "epoch": 5.319148936170213, "eval_loss": Infinity, "eval_runtime": 316.2249, "eval_samples_per_second": 22.206, "eval_steps_per_second": 2.777, "eval_wer": 0.4136079966151893, "step": 139000 }, { "epoch": 5.338282565437012, "grad_norm": 0.8858345150947571, "learning_rate": 9.106015037593984e-05, "loss": 0.3293, "step": 139500 }, { "epoch": 5.357416194703811, "grad_norm": 1.3112056255340576, "learning_rate": 9.030827067669172e-05, "loss": 0.3365, "step": 140000 }, { "epoch": 5.357416194703811, "eval_loss": Infinity, "eval_runtime": 316.1322, "eval_samples_per_second": 22.212, "eval_steps_per_second": 2.777, "eval_wer": 0.4193330865242226, "step": 140000 }, { "epoch": 5.376549823970611, "grad_norm": 0.720756471157074, "learning_rate": 8.95563909774436e-05, "loss": 0.3318, "step": 140500 }, { "epoch": 5.39568345323741, "grad_norm": 0.46977701783180237, "learning_rate": 8.880451127819548e-05, "loss": 0.3165, "step": 141000 }, { "epoch": 5.39568345323741, "eval_loss": Infinity, "eval_runtime": 314.295, "eval_samples_per_second": 22.342, "eval_steps_per_second": 2.794, "eval_wer": 0.4112148297017136, "step": 141000 }, { "epoch": 5.414817082504209, "grad_norm": 0.7260543704032898, "learning_rate": 8.805263157894736e-05, "loss": 0.337, "step": 141500 }, { "epoch": 5.433950711771009, "grad_norm": 1.3891347646713257, "learning_rate": 8.730075187969924e-05, "loss": 0.3224, "step": 142000 }, { "epoch": 5.433950711771009, "eval_loss": Infinity, "eval_runtime": 315.0255, "eval_samples_per_second": 22.29, "eval_steps_per_second": 2.787, "eval_wer": 0.40745980537338694, "step": 142000 }, { "epoch": 5.453084341037808, "grad_norm": 0.4979989528656006, "learning_rate": 8.654887218045112e-05, "loss": 0.3341, "step": 142500 }, { "epoch": 5.472217970304607, "grad_norm": 0.9820772409439087, "learning_rate": 8.579699248120299e-05, "loss": 0.335, "step": 143000 }, { "epoch": 5.472217970304607, "eval_loss": Infinity, "eval_runtime": 314.1397, "eval_samples_per_second": 22.353, "eval_steps_per_second": 2.795, "eval_wer": 0.4112941612016078, "step": 143000 }, { "epoch": 5.491351599571407, "grad_norm": 1.6596304178237915, "learning_rate": 8.504511278195487e-05, "loss": 0.3243, "step": 143500 }, { "epoch": 5.510485228838206, "grad_norm": 0.687848687171936, "learning_rate": 8.429323308270675e-05, "loss": 0.3377, "step": 144000 }, { "epoch": 5.510485228838206, "eval_loss": Infinity, "eval_runtime": 315.8491, "eval_samples_per_second": 22.232, "eval_steps_per_second": 2.78, "eval_wer": 0.41762745927649675, "step": 144000 }, { "epoch": 5.529618858105005, "grad_norm": 1.0538053512573242, "learning_rate": 8.354135338345864e-05, "loss": 0.3195, "step": 144500 }, { "epoch": 5.548752487371805, "grad_norm": 0.6369953751564026, "learning_rate": 8.278947368421052e-05, "loss": 0.3411, "step": 145000 }, { "epoch": 5.548752487371805, "eval_loss": Infinity, "eval_runtime": 316.0558, "eval_samples_per_second": 22.218, "eval_steps_per_second": 2.778, "eval_wer": 0.4091125449545166, "step": 145000 }, { "epoch": 5.567886116638604, "grad_norm": 1.193766474723816, "learning_rate": 8.20390977443609e-05, "loss": 0.3385, "step": 145500 }, { "epoch": 5.587019745905403, "grad_norm": 0.5476765632629395, "learning_rate": 8.128721804511278e-05, "loss": 0.3247, "step": 146000 }, { "epoch": 5.587019745905403, "eval_loss": Infinity, "eval_runtime": 315.7439, "eval_samples_per_second": 22.24, "eval_steps_per_second": 2.781, "eval_wer": 0.4096414216204781, "step": 146000 }, { "epoch": 5.606153375172203, "grad_norm": 0.9929884076118469, "learning_rate": 8.053533834586466e-05, "loss": 0.3305, "step": 146500 }, { "epoch": 5.625287004439002, "grad_norm": 0.3942908048629761, "learning_rate": 7.978345864661654e-05, "loss": 0.3304, "step": 147000 }, { "epoch": 5.625287004439002, "eval_loss": Infinity, "eval_runtime": 314.4131, "eval_samples_per_second": 22.334, "eval_steps_per_second": 2.793, "eval_wer": 0.40843822720541567, "step": 147000 }, { "epoch": 5.644420633705801, "grad_norm": 0.7728341221809387, "learning_rate": 7.903157894736842e-05, "loss": 0.3301, "step": 147500 }, { "epoch": 5.663554262972601, "grad_norm": 1.4606784582138062, "learning_rate": 7.82796992481203e-05, "loss": 0.3267, "step": 148000 }, { "epoch": 5.663554262972601, "eval_loss": Infinity, "eval_runtime": 314.4256, "eval_samples_per_second": 22.333, "eval_steps_per_second": 2.792, "eval_wer": 0.40423365771102177, "step": 148000 }, { "epoch": 5.6826878922394, "grad_norm": 0.6086077690124512, "learning_rate": 7.752781954887217e-05, "loss": 0.3195, "step": 148500 }, { "epoch": 5.701821521506199, "grad_norm": 0.5943909883499146, "learning_rate": 7.677593984962405e-05, "loss": 0.3193, "step": 149000 }, { "epoch": 5.701821521506199, "eval_loss": Infinity, "eval_runtime": 314.9231, "eval_samples_per_second": 22.298, "eval_steps_per_second": 2.788, "eval_wer": 0.4026470277131373, "step": 149000 }, { "epoch": 5.7209551507729985, "grad_norm": 0.5374177098274231, "learning_rate": 7.602556390977442e-05, "loss": 0.3374, "step": 149500 }, { "epoch": 5.740088780039798, "grad_norm": 0.675542950630188, "learning_rate": 7.52736842105263e-05, "loss": 0.3155, "step": 150000 }, { "epoch": 5.740088780039798, "eval_loss": Infinity, "eval_runtime": 316.593, "eval_samples_per_second": 22.18, "eval_steps_per_second": 2.773, "eval_wer": 0.40481542204357945, "step": 150000 }, { "epoch": 5.759222409306597, "grad_norm": 1.1648385524749756, "learning_rate": 7.45218045112782e-05, "loss": 0.3247, "step": 150500 }, { "epoch": 5.7783560385733965, "grad_norm": 0.6115811467170715, "learning_rate": 7.376992481203008e-05, "loss": 0.3238, "step": 151000 }, { "epoch": 5.7783560385733965, "eval_loss": Infinity, "eval_runtime": 318.3837, "eval_samples_per_second": 22.055, "eval_steps_per_second": 2.758, "eval_wer": 0.40333456737888723, "step": 151000 }, { "epoch": 5.797489667840196, "grad_norm": 1.0145585536956787, "learning_rate": 7.301804511278196e-05, "loss": 0.3175, "step": 151500 }, { "epoch": 5.816623297106995, "grad_norm": 0.7065938115119934, "learning_rate": 7.226766917293232e-05, "loss": 0.3172, "step": 152000 }, { "epoch": 5.816623297106995, "eval_loss": Infinity, "eval_runtime": 318.6878, "eval_samples_per_second": 22.034, "eval_steps_per_second": 2.755, "eval_wer": 0.40486830971017557, "step": 152000 }, { "epoch": 5.8357569263737945, "grad_norm": 0.7309425473213196, "learning_rate": 7.15172932330827e-05, "loss": 0.3229, "step": 152500 }, { "epoch": 5.854890555640594, "grad_norm": 1.0197374820709229, "learning_rate": 7.076541353383458e-05, "loss": 0.3148, "step": 153000 }, { "epoch": 5.854890555640594, "eval_loss": Infinity, "eval_runtime": 323.1191, "eval_samples_per_second": 21.732, "eval_steps_per_second": 2.717, "eval_wer": 0.3989052253014597, "step": 153000 }, { "epoch": 5.874024184907393, "grad_norm": 0.7352388501167297, "learning_rate": 7.001503759398496e-05, "loss": 0.313, "step": 153500 }, { "epoch": 5.8931578141741925, "grad_norm": 0.6405870914459229, "learning_rate": 6.926315789473684e-05, "loss": 0.3217, "step": 154000 }, { "epoch": 5.8931578141741925, "eval_loss": Infinity, "eval_runtime": 319.2909, "eval_samples_per_second": 21.992, "eval_steps_per_second": 2.75, "eval_wer": 0.39782102813623865, "step": 154000 }, { "epoch": 5.912291443440992, "grad_norm": 0.711800754070282, "learning_rate": 6.851127819548872e-05, "loss": 0.3206, "step": 154500 }, { "epoch": 5.931425072707791, "grad_norm": 0.41337111592292786, "learning_rate": 6.77593984962406e-05, "loss": 0.3145, "step": 155000 }, { "epoch": 5.931425072707791, "eval_loss": Infinity, "eval_runtime": 318.549, "eval_samples_per_second": 22.044, "eval_steps_per_second": 2.756, "eval_wer": 0.39298180664269095, "step": 155000 }, { "epoch": 5.9505587019745905, "grad_norm": 0.3790297210216522, "learning_rate": 6.700751879699248e-05, "loss": 0.3185, "step": 155500 }, { "epoch": 5.96969233124139, "grad_norm": 0.7092337012290955, "learning_rate": 6.625563909774435e-05, "loss": 0.3178, "step": 156000 }, { "epoch": 5.96969233124139, "eval_loss": Infinity, "eval_runtime": 318.815, "eval_samples_per_second": 22.025, "eval_steps_per_second": 2.754, "eval_wer": 0.39946054580071927, "step": 156000 }, { "epoch": 5.988825960508189, "grad_norm": 1.5943549871444702, "learning_rate": 6.550375939849623e-05, "loss": 0.3154, "step": 156500 }, { "epoch": 6.0079595897749885, "grad_norm": 1.1709485054016113, "learning_rate": 6.475187969924812e-05, "loss": 0.2895, "step": 157000 }, { "epoch": 6.0079595897749885, "eval_loss": Infinity, "eval_runtime": 318.8329, "eval_samples_per_second": 22.024, "eval_steps_per_second": 2.754, "eval_wer": 0.39980431563359425, "step": 157000 }, { "epoch": 6.027093219041788, "grad_norm": 0.6642709374427795, "learning_rate": 6.4e-05, "loss": 0.2714, "step": 157500 }, { "epoch": 6.046226848308587, "grad_norm": 0.7695789337158203, "learning_rate": 6.324812030075188e-05, "loss": 0.269, "step": 158000 }, { "epoch": 6.046226848308587, "eval_loss": Infinity, "eval_runtime": 318.0108, "eval_samples_per_second": 22.081, "eval_steps_per_second": 2.761, "eval_wer": 0.3926248148931669, "step": 158000 }, { "epoch": 6.065360477575386, "grad_norm": 0.6437819600105286, "learning_rate": 6.249624060150375e-05, "loss": 0.2651, "step": 158500 }, { "epoch": 6.084494106842186, "grad_norm": 0.9133914113044739, "learning_rate": 6.174436090225563e-05, "loss": 0.2757, "step": 159000 }, { "epoch": 6.084494106842186, "eval_loss": Infinity, "eval_runtime": 317.9915, "eval_samples_per_second": 22.082, "eval_steps_per_second": 2.761, "eval_wer": 0.39225460122699385, "step": 159000 }, { "epoch": 6.103627736108985, "grad_norm": 0.8651337623596191, "learning_rate": 6.099398496240601e-05, "loss": 0.2763, "step": 159500 }, { "epoch": 6.122761365375784, "grad_norm": 0.6800199151039124, "learning_rate": 6.0243609022556384e-05, "loss": 0.2573, "step": 160000 }, { "epoch": 6.122761365375784, "eval_loss": Infinity, "eval_runtime": 318.7524, "eval_samples_per_second": 22.03, "eval_steps_per_second": 2.754, "eval_wer": 0.3906283054791623, "step": 160000 }, { "epoch": 6.141894994642584, "grad_norm": 0.7264246940612793, "learning_rate": 5.9491729323308265e-05, "loss": 0.2632, "step": 160500 }, { "epoch": 6.161028623909383, "grad_norm": 0.2874845862388611, "learning_rate": 5.873984962406015e-05, "loss": 0.2666, "step": 161000 }, { "epoch": 6.161028623909383, "eval_loss": Infinity, "eval_runtime": 318.0588, "eval_samples_per_second": 22.078, "eval_steps_per_second": 2.76, "eval_wer": 0.38831447006558073, "step": 161000 }, { "epoch": 6.180162253176182, "grad_norm": 0.36712953448295593, "learning_rate": 5.798796992481202e-05, "loss": 0.2719, "step": 161500 }, { "epoch": 6.199295882442982, "grad_norm": 1.4700485467910767, "learning_rate": 5.72360902255639e-05, "loss": 0.2691, "step": 162000 }, { "epoch": 6.199295882442982, "eval_loss": Infinity, "eval_runtime": 318.6549, "eval_samples_per_second": 22.036, "eval_steps_per_second": 2.755, "eval_wer": 0.391950497144066, "step": 162000 }, { "epoch": 6.218429511709781, "grad_norm": 0.664314866065979, "learning_rate": 5.6484210526315785e-05, "loss": 0.2734, "step": 162500 }, { "epoch": 6.23756314097658, "grad_norm": 0.7786546945571899, "learning_rate": 5.5732330827067666e-05, "loss": 0.2699, "step": 163000 }, { "epoch": 6.23756314097658, "eval_loss": Infinity, "eval_runtime": 323.3374, "eval_samples_per_second": 21.717, "eval_steps_per_second": 2.715, "eval_wer": 0.39616828855510894, "step": 163000 }, { "epoch": 6.25669677024338, "grad_norm": 1.2886419296264648, "learning_rate": 5.498045112781954e-05, "loss": 0.2702, "step": 163500 }, { "epoch": 6.275830399510179, "grad_norm": 0.6407492160797119, "learning_rate": 5.422857142857142e-05, "loss": 0.259, "step": 164000 }, { "epoch": 6.275830399510179, "eval_loss": Infinity, "eval_runtime": 318.7872, "eval_samples_per_second": 22.027, "eval_steps_per_second": 2.754, "eval_wer": 0.39016553839644597, "step": 164000 }, { "epoch": 6.294964028776978, "grad_norm": 0.36012986302375793, "learning_rate": 5.3476691729323304e-05, "loss": 0.2591, "step": 164500 }, { "epoch": 6.314097658043778, "grad_norm": 0.5582063794136047, "learning_rate": 5.2724812030075185e-05, "loss": 0.2707, "step": 165000 }, { "epoch": 6.314097658043778, "eval_loss": Infinity, "eval_runtime": 317.8204, "eval_samples_per_second": 22.094, "eval_steps_per_second": 2.763, "eval_wer": 0.38777237148297017, "step": 165000 }, { "epoch": 6.333231287310577, "grad_norm": 0.5500897765159607, "learning_rate": 5.197293233082706e-05, "loss": 0.2574, "step": 165500 }, { "epoch": 6.352364916577376, "grad_norm": 0.5922083854675293, "learning_rate": 5.122105263157894e-05, "loss": 0.265, "step": 166000 }, { "epoch": 6.352364916577376, "eval_loss": Infinity, "eval_runtime": 316.5005, "eval_samples_per_second": 22.186, "eval_steps_per_second": 2.774, "eval_wer": 0.38557753331922995, "step": 166000 }, { "epoch": 6.371498545844176, "grad_norm": 0.5268240571022034, "learning_rate": 5.046917293233082e-05, "loss": 0.2693, "step": 166500 }, { "epoch": 6.390632175110975, "grad_norm": 2.8765857219696045, "learning_rate": 4.9717293233082705e-05, "loss": 0.2657, "step": 167000 }, { "epoch": 6.390632175110975, "eval_loss": Infinity, "eval_runtime": 317.5854, "eval_samples_per_second": 22.111, "eval_steps_per_second": 2.765, "eval_wer": 0.38506187856991747, "step": 167000 }, { "epoch": 6.409765804377774, "grad_norm": 0.6294525265693665, "learning_rate": 4.896541353383458e-05, "loss": 0.2634, "step": 167500 }, { "epoch": 6.428899433644574, "grad_norm": 1.270578384399414, "learning_rate": 4.821503759398496e-05, "loss": 0.2625, "step": 168000 }, { "epoch": 6.428899433644574, "eval_loss": Infinity, "eval_runtime": 317.2046, "eval_samples_per_second": 22.137, "eval_steps_per_second": 2.768, "eval_wer": 0.3841495663211339, "step": 168000 }, { "epoch": 6.448033062911373, "grad_norm": 1.4967974424362183, "learning_rate": 4.746315789473684e-05, "loss": 0.2679, "step": 168500 }, { "epoch": 6.467166692178172, "grad_norm": 0.8087161779403687, "learning_rate": 4.671278195488721e-05, "loss": 0.2615, "step": 169000 }, { "epoch": 6.467166692178172, "eval_loss": Infinity, "eval_runtime": 316.9216, "eval_samples_per_second": 22.157, "eval_steps_per_second": 2.77, "eval_wer": 0.3831975883224032, "step": 169000 }, { "epoch": 6.486300321444972, "grad_norm": 0.5716475248336792, "learning_rate": 4.596090225563909e-05, "loss": 0.2507, "step": 169500 }, { "epoch": 6.505433950711771, "grad_norm": 0.7699230909347534, "learning_rate": 4.520902255639097e-05, "loss": 0.2629, "step": 170000 }, { "epoch": 6.505433950711771, "eval_loss": Infinity, "eval_runtime": 317.644, "eval_samples_per_second": 22.107, "eval_steps_per_second": 2.764, "eval_wer": 0.38342236090543685, "step": 170000 }, { "epoch": 6.52456757997857, "grad_norm": 0.8144583702087402, "learning_rate": 4.445714285714285e-05, "loss": 0.2646, "step": 170500 }, { "epoch": 6.54370120924537, "grad_norm": 0.9461275339126587, "learning_rate": 4.370526315789473e-05, "loss": 0.276, "step": 171000 }, { "epoch": 6.54370120924537, "eval_loss": Infinity, "eval_runtime": 316.3907, "eval_samples_per_second": 22.194, "eval_steps_per_second": 2.775, "eval_wer": 0.38307859107256187, "step": 171000 }, { "epoch": 6.562834838512169, "grad_norm": 0.5300208926200867, "learning_rate": 4.295338345864661e-05, "loss": 0.2657, "step": 171500 }, { "epoch": 6.581968467778968, "grad_norm": 0.611358106136322, "learning_rate": 4.220150375939849e-05, "loss": 0.2623, "step": 172000 }, { "epoch": 6.581968467778968, "eval_loss": Infinity, "eval_runtime": 316.1573, "eval_samples_per_second": 22.21, "eval_steps_per_second": 2.777, "eval_wer": 0.38129363232494184, "step": 172000 }, { "epoch": 6.601102097045768, "grad_norm": 0.8705514073371887, "learning_rate": 4.144962406015037e-05, "loss": 0.2614, "step": 172500 }, { "epoch": 6.620235726312567, "grad_norm": 0.4813309609889984, "learning_rate": 4.069924812030075e-05, "loss": 0.27, "step": 173000 }, { "epoch": 6.620235726312567, "eval_loss": Infinity, "eval_runtime": 316.2461, "eval_samples_per_second": 22.204, "eval_steps_per_second": 2.776, "eval_wer": 0.3814787391580283, "step": 173000 }, { "epoch": 6.639369355579366, "grad_norm": 2.4199442863464355, "learning_rate": 3.994736842105263e-05, "loss": 0.261, "step": 173500 }, { "epoch": 6.658502984846166, "grad_norm": 0.6200481653213501, "learning_rate": 3.919548872180451e-05, "loss": 0.2712, "step": 174000 }, { "epoch": 6.658502984846166, "eval_loss": Infinity, "eval_runtime": 317.1791, "eval_samples_per_second": 22.139, "eval_steps_per_second": 2.768, "eval_wer": 0.3812143008250476, "step": 174000 }, { "epoch": 6.677636614112965, "grad_norm": 0.2569734454154968, "learning_rate": 3.844360902255639e-05, "loss": 0.2614, "step": 174500 }, { "epoch": 6.696770243379764, "grad_norm": 0.44579431414604187, "learning_rate": 3.769172932330827e-05, "loss": 0.263, "step": 175000 }, { "epoch": 6.696770243379764, "eval_loss": Infinity, "eval_runtime": 316.5662, "eval_samples_per_second": 22.182, "eval_steps_per_second": 2.774, "eval_wer": 0.38161095832451875, "step": 175000 }, { "epoch": 6.715903872646564, "grad_norm": 0.38563570380210876, "learning_rate": 3.6939849624060146e-05, "loss": 0.2488, "step": 175500 }, { "epoch": 6.735037501913363, "grad_norm": 0.5862724781036377, "learning_rate": 3.6189473684210524e-05, "loss": 0.2616, "step": 176000 }, { "epoch": 6.735037501913363, "eval_loss": Infinity, "eval_runtime": 316.2674, "eval_samples_per_second": 22.203, "eval_steps_per_second": 2.776, "eval_wer": 0.379574783160567, "step": 176000 }, { "epoch": 6.754171131180162, "grad_norm": 0.5157662034034729, "learning_rate": 3.5437593984962405e-05, "loss": 0.2525, "step": 176500 }, { "epoch": 6.773304760446962, "grad_norm": 0.807600200176239, "learning_rate": 3.4687218045112776e-05, "loss": 0.253, "step": 177000 }, { "epoch": 6.773304760446962, "eval_loss": Infinity, "eval_runtime": 316.2322, "eval_samples_per_second": 22.205, "eval_steps_per_second": 2.776, "eval_wer": 0.3794161201607785, "step": 177000 }, { "epoch": 6.792438389713761, "grad_norm": 0.4601055383682251, "learning_rate": 3.393533834586466e-05, "loss": 0.2513, "step": 177500 }, { "epoch": 6.81157201898056, "grad_norm": 0.4807584881782532, "learning_rate": 3.3184962406015036e-05, "loss": 0.2572, "step": 178000 }, { "epoch": 6.81157201898056, "eval_loss": Infinity, "eval_runtime": 314.7771, "eval_samples_per_second": 22.308, "eval_steps_per_second": 2.789, "eval_wer": 0.38285381848952826, "step": 178000 }, { "epoch": 6.83070564824736, "grad_norm": 2.668820381164551, "learning_rate": 3.243308270676692e-05, "loss": 0.2522, "step": 178500 }, { "epoch": 6.8498392775141586, "grad_norm": 0.5270944833755493, "learning_rate": 3.168270676691729e-05, "loss": 0.2517, "step": 179000 }, { "epoch": 6.8498392775141586, "eval_loss": Infinity, "eval_runtime": 314.726, "eval_samples_per_second": 22.311, "eval_steps_per_second": 2.79, "eval_wer": 0.37728739158028346, "step": 179000 }, { "epoch": 6.868972906780958, "grad_norm": 0.5133803486824036, "learning_rate": 3.093082706766917e-05, "loss": 0.2479, "step": 179500 }, { "epoch": 6.888106536047758, "grad_norm": 0.6608215570449829, "learning_rate": 3.0178947368421048e-05, "loss": 0.2471, "step": 180000 }, { "epoch": 6.888106536047758, "eval_loss": Infinity, "eval_runtime": 315.1185, "eval_samples_per_second": 22.284, "eval_steps_per_second": 2.786, "eval_wer": 0.37833192299555746, "step": 180000 }, { "epoch": 6.9072401653145565, "grad_norm": 0.9923522472381592, "learning_rate": 2.942706766917293e-05, "loss": 0.2533, "step": 180500 }, { "epoch": 6.926373794581356, "grad_norm": 0.6495700478553772, "learning_rate": 2.8675187969924808e-05, "loss": 0.2441, "step": 181000 }, { "epoch": 6.926373794581356, "eval_loss": Infinity, "eval_runtime": 315.6279, "eval_samples_per_second": 22.248, "eval_steps_per_second": 2.782, "eval_wer": 0.37630896974825473, "step": 181000 }, { "epoch": 6.945507423848156, "grad_norm": 0.5995193123817444, "learning_rate": 2.792330827067669e-05, "loss": 0.2509, "step": 181500 }, { "epoch": 6.9646410531149545, "grad_norm": 0.6942078471183777, "learning_rate": 2.7172932330827067e-05, "loss": 0.245, "step": 182000 }, { "epoch": 6.9646410531149545, "eval_loss": Infinity, "eval_runtime": 316.6284, "eval_samples_per_second": 22.177, "eval_steps_per_second": 2.773, "eval_wer": 0.3749338904167548, "step": 182000 }, { "epoch": 6.983774682381754, "grad_norm": 0.7296892404556274, "learning_rate": 2.6421052631578945e-05, "loss": 0.2513, "step": 182500 }, { "epoch": 7.002908311648554, "grad_norm": 0.9255119562149048, "learning_rate": 2.5669172932330827e-05, "loss": 0.235, "step": 183000 }, { "epoch": 7.002908311648554, "eval_loss": Infinity, "eval_runtime": 315.5684, "eval_samples_per_second": 22.252, "eval_steps_per_second": 2.782, "eval_wer": 0.3724085043367887, "step": 183000 }, { "epoch": 7.0220419409153525, "grad_norm": 0.6719674468040466, "learning_rate": 2.4917293233082705e-05, "loss": 0.2154, "step": 183500 }, { "epoch": 7.041175570182152, "grad_norm": 0.5619477033615112, "learning_rate": 2.4165413533834586e-05, "loss": 0.2281, "step": 184000 }, { "epoch": 7.041175570182152, "eval_loss": Infinity, "eval_runtime": 316.4942, "eval_samples_per_second": 22.187, "eval_steps_per_second": 2.774, "eval_wer": 0.37427279458430296, "step": 184000 }, { "epoch": 7.060309199448952, "grad_norm": 0.7847068309783936, "learning_rate": 2.3413533834586465e-05, "loss": 0.2184, "step": 184500 }, { "epoch": 7.0794428287157505, "grad_norm": 0.7864698171615601, "learning_rate": 2.2661654135338346e-05, "loss": 0.2155, "step": 185000 }, { "epoch": 7.0794428287157505, "eval_loss": Infinity, "eval_runtime": 315.7964, "eval_samples_per_second": 22.236, "eval_steps_per_second": 2.78, "eval_wer": 0.3742463507510049, "step": 185000 }, { "epoch": 7.09857645798255, "grad_norm": 0.6666255593299866, "learning_rate": 2.1909774436090224e-05, "loss": 0.2106, "step": 185500 }, { "epoch": 7.11771008724935, "grad_norm": 0.5075043439865112, "learning_rate": 2.1157894736842106e-05, "loss": 0.2177, "step": 186000 }, { "epoch": 7.11771008724935, "eval_loss": Infinity, "eval_runtime": 316.0968, "eval_samples_per_second": 22.215, "eval_steps_per_second": 2.778, "eval_wer": 0.37367780833509623, "step": 186000 }, { "epoch": 7.1368437165161485, "grad_norm": 0.7393398284912109, "learning_rate": 2.0406015037593984e-05, "loss": 0.2208, "step": 186500 }, { "epoch": 7.155977345782948, "grad_norm": 0.3300219476222992, "learning_rate": 1.9654135338345865e-05, "loss": 0.2107, "step": 187000 }, { "epoch": 7.155977345782948, "eval_loss": Infinity, "eval_runtime": 315.1907, "eval_samples_per_second": 22.279, "eval_steps_per_second": 2.786, "eval_wer": 0.37078220858895705, "step": 187000 }, { "epoch": 7.175110975049748, "grad_norm": 3.2175910472869873, "learning_rate": 1.8902255639097743e-05, "loss": 0.2032, "step": 187500 }, { "epoch": 7.194244604316546, "grad_norm": 0.28260278701782227, "learning_rate": 1.8151879699248118e-05, "loss": 0.2129, "step": 188000 }, { "epoch": 7.194244604316546, "eval_loss": Infinity, "eval_runtime": 315.4927, "eval_samples_per_second": 22.257, "eval_steps_per_second": 2.783, "eval_wer": 0.37161518933784643, "step": 188000 }, { "epoch": 7.213378233583346, "grad_norm": 1.4389430284500122, "learning_rate": 1.74e-05, "loss": 0.221, "step": 188500 }, { "epoch": 7.232511862850146, "grad_norm": 0.5784205198287964, "learning_rate": 1.6648120300751878e-05, "loss": 0.2173, "step": 189000 }, { "epoch": 7.232511862850146, "eval_loss": Infinity, "eval_runtime": 317.5523, "eval_samples_per_second": 22.113, "eval_steps_per_second": 2.765, "eval_wer": 0.36953934842394753, "step": 189000 }, { "epoch": 7.251645492116944, "grad_norm": 0.5264465808868408, "learning_rate": 1.589624060150376e-05, "loss": 0.2102, "step": 189500 }, { "epoch": 7.270779121383744, "grad_norm": 1.0469930171966553, "learning_rate": 1.5144360902255639e-05, "loss": 0.2145, "step": 190000 }, { "epoch": 7.270779121383744, "eval_loss": Infinity, "eval_runtime": 316.5149, "eval_samples_per_second": 22.185, "eval_steps_per_second": 2.774, "eval_wer": 0.37215728792045694, "step": 190000 }, { "epoch": 7.289912750650544, "grad_norm": 0.43167009949684143, "learning_rate": 1.4392481203007517e-05, "loss": 0.2217, "step": 190500 }, { "epoch": 7.309046379917342, "grad_norm": 1.083001732826233, "learning_rate": 1.3640601503759397e-05, "loss": 0.2116, "step": 191000 }, { "epoch": 7.309046379917342, "eval_loss": Infinity, "eval_runtime": 315.4082, "eval_samples_per_second": 22.263, "eval_steps_per_second": 2.784, "eval_wer": 0.37024011000634655, "step": 191000 }, { "epoch": 7.328180009184142, "grad_norm": 0.5683468580245972, "learning_rate": 1.2888721804511277e-05, "loss": 0.2085, "step": 191500 }, { "epoch": 7.347313638450942, "grad_norm": 0.45209017395973206, "learning_rate": 1.2136842105263156e-05, "loss": 0.212, "step": 192000 }, { "epoch": 7.347313638450942, "eval_loss": Infinity, "eval_runtime": 316.0151, "eval_samples_per_second": 22.22, "eval_steps_per_second": 2.778, "eval_wer": 0.37038555108948595, "step": 192000 }, { "epoch": 7.36644726771774, "grad_norm": 1.5557799339294434, "learning_rate": 1.1384962406015036e-05, "loss": 0.2091, "step": 192500 }, { "epoch": 7.38558089698454, "grad_norm": 0.456394761800766, "learning_rate": 1.0633082706766916e-05, "loss": 0.2116, "step": 193000 }, { "epoch": 7.38558089698454, "eval_loss": Infinity, "eval_runtime": 316.9212, "eval_samples_per_second": 22.157, "eval_steps_per_second": 2.77, "eval_wer": 0.37012111275650517, "step": 193000 }, { "epoch": 7.40471452625134, "grad_norm": 0.34570273756980896, "learning_rate": 9.881203007518796e-06, "loss": 0.2034, "step": 193500 }, { "epoch": 7.423848155518138, "grad_norm": 0.6514278054237366, "learning_rate": 9.129323308270676e-06, "loss": 0.2124, "step": 194000 }, { "epoch": 7.423848155518138, "eval_loss": Infinity, "eval_runtime": 315.4723, "eval_samples_per_second": 22.259, "eval_steps_per_second": 2.783, "eval_wer": 0.36865348000846204, "step": 194000 }, { "epoch": 7.442981784784938, "grad_norm": 0.3534170091152191, "learning_rate": 8.378947368421052e-06, "loss": 0.2146, "step": 194500 }, { "epoch": 7.462115414051738, "grad_norm": 0.6505366563796997, "learning_rate": 7.627067669172932e-06, "loss": 0.2078, "step": 195000 }, { "epoch": 7.462115414051738, "eval_loss": Infinity, "eval_runtime": 316.1812, "eval_samples_per_second": 22.209, "eval_steps_per_second": 2.777, "eval_wer": 0.3681113814258515, "step": 195000 }, { "epoch": 7.481249043318536, "grad_norm": 0.5068254470825195, "learning_rate": 6.8751879699248115e-06, "loss": 0.2097, "step": 195500 }, { "epoch": 7.500382672585336, "grad_norm": 0.32878100872039795, "learning_rate": 6.124812030075188e-06, "loss": 0.2158, "step": 196000 }, { "epoch": 7.500382672585336, "eval_loss": Infinity, "eval_runtime": 316.2612, "eval_samples_per_second": 22.203, "eval_steps_per_second": 2.776, "eval_wer": 0.3682171567590438, "step": 196000 }, { "epoch": 7.519516301852136, "grad_norm": 0.9998613595962524, "learning_rate": 5.3729323308270675e-06, "loss": 0.2031, "step": 196500 }, { "epoch": 7.538649931118934, "grad_norm": 0.6963976621627808, "learning_rate": 4.622556390977443e-06, "loss": 0.2157, "step": 197000 }, { "epoch": 7.538649931118934, "eval_loss": Infinity, "eval_runtime": 316.927, "eval_samples_per_second": 22.157, "eval_steps_per_second": 2.77, "eval_wer": 0.36727840067696216, "step": 197000 }, { "epoch": 7.557783560385734, "grad_norm": 0.6300442218780518, "learning_rate": 3.870676691729323e-06, "loss": 0.2082, "step": 197500 }, { "epoch": 7.576917189652534, "grad_norm": 0.3542906939983368, "learning_rate": 3.118796992481203e-06, "loss": 0.2045, "step": 198000 }, { "epoch": 7.576917189652534, "eval_loss": Infinity, "eval_runtime": 317.109, "eval_samples_per_second": 22.144, "eval_steps_per_second": 2.769, "eval_wer": 0.3666834144277554, "step": 198000 }, { "epoch": 7.596050818919332, "grad_norm": 0.9721285700798035, "learning_rate": 2.366917293233083e-06, "loss": 0.209, "step": 198500 }, { "epoch": 7.615184448186132, "grad_norm": 0.8516126275062561, "learning_rate": 1.6165413533834587e-06, "loss": 0.2188, "step": 199000 }, { "epoch": 7.615184448186132, "eval_loss": Infinity, "eval_runtime": 316.4647, "eval_samples_per_second": 22.189, "eval_steps_per_second": 2.774, "eval_wer": 0.36752961709329385, "step": 199000 }, { "epoch": 7.634318077452932, "grad_norm": 0.7036492824554443, "learning_rate": 8.646616541353383e-07, "loss": 0.2072, "step": 199500 }, { "epoch": 7.65345170671973, "grad_norm": 0.5840544104576111, "learning_rate": 1.1278195488721805e-07, "loss": 0.2041, "step": 200000 }, { "epoch": 7.65345170671973, "eval_loss": Infinity, "eval_runtime": 315.8635, "eval_samples_per_second": 22.231, "eval_steps_per_second": 2.78, "eval_wer": 0.3670800719272266, "step": 200000 }, { "epoch": 7.65345170671973, "step": 200000, "total_flos": 1.6865103601997185e+21, "train_loss": 0.5320780529403687, "train_runtime": 333857.2134, "train_samples_per_second": 4.792, "train_steps_per_second": 0.599 } ], "logging_steps": 500, "max_steps": 200000, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 4000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6865103601997185e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }