{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9830537820582188, "eval_steps": 2888, "global_step": 11550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0001731414349096418, "grad_norm": 6.625, "learning_rate": 2.0000000000000002e-07, "loss": 0.7545, "step": 1 }, { "epoch": 0.0001731414349096418, "eval_loss": 0.7016668915748596, "eval_runtime": 2676.1717, "eval_samples_per_second": 18.713, "eval_steps_per_second": 18.713, "step": 1 }, { "epoch": 0.0003462828698192836, "grad_norm": 6.0625, "learning_rate": 4.0000000000000003e-07, "loss": 0.8693, "step": 2 }, { "epoch": 0.0005194243047289255, "grad_norm": 5.875, "learning_rate": 6.000000000000001e-07, "loss": 0.84, "step": 3 }, { "epoch": 0.0006925657396385672, "grad_norm": 6.40625, "learning_rate": 8.000000000000001e-07, "loss": 0.8406, "step": 4 }, { "epoch": 0.0008657071745482091, "grad_norm": 6.34375, "learning_rate": 1.0000000000000002e-06, "loss": 0.8453, "step": 5 }, { "epoch": 0.001038848609457851, "grad_norm": 5.75, "learning_rate": 1.2000000000000002e-06, "loss": 0.7799, "step": 6 }, { "epoch": 0.0012119900443674928, "grad_norm": 6.03125, "learning_rate": 1.4000000000000001e-06, "loss": 0.8114, "step": 7 }, { "epoch": 0.0013851314792771344, "grad_norm": 5.59375, "learning_rate": 1.6000000000000001e-06, "loss": 0.8591, "step": 8 }, { "epoch": 0.0015582729141867763, "grad_norm": 5.46875, "learning_rate": 1.8000000000000001e-06, "loss": 0.8106, "step": 9 }, { "epoch": 0.0017314143490964181, "grad_norm": 5.53125, "learning_rate": 2.0000000000000003e-06, "loss": 0.8192, "step": 10 }, { "epoch": 0.00190455578400606, "grad_norm": 4.25, "learning_rate": 2.2e-06, "loss": 0.7202, "step": 11 }, { "epoch": 0.002077697218915702, "grad_norm": 4.84375, "learning_rate": 2.4000000000000003e-06, "loss": 0.806, "step": 12 }, { "epoch": 0.0022508386538253435, "grad_norm": 3.546875, "learning_rate": 2.6e-06, "loss": 0.7688, "step": 13 }, { "epoch": 0.0024239800887349856, "grad_norm": 3.5, "learning_rate": 2.8000000000000003e-06, "loss": 0.7491, "step": 14 }, { "epoch": 0.002597121523644627, "grad_norm": 3.109375, "learning_rate": 3e-06, "loss": 0.7972, "step": 15 }, { "epoch": 0.002770262958554269, "grad_norm": 2.625, "learning_rate": 3.2000000000000003e-06, "loss": 0.739, "step": 16 }, { "epoch": 0.002943404393463911, "grad_norm": 2.359375, "learning_rate": 3.4000000000000005e-06, "loss": 0.6991, "step": 17 }, { "epoch": 0.0031165458283735526, "grad_norm": 2.171875, "learning_rate": 3.6000000000000003e-06, "loss": 0.6382, "step": 18 }, { "epoch": 0.0032896872632831946, "grad_norm": 2.1875, "learning_rate": 3.8000000000000005e-06, "loss": 0.7094, "step": 19 }, { "epoch": 0.0034628286981928363, "grad_norm": 2.1875, "learning_rate": 4.000000000000001e-06, "loss": 0.7313, "step": 20 }, { "epoch": 0.003635970133102478, "grad_norm": 2.25, "learning_rate": 4.2000000000000004e-06, "loss": 0.6856, "step": 21 }, { "epoch": 0.00380911156801212, "grad_norm": 2.109375, "learning_rate": 4.4e-06, "loss": 0.7294, "step": 22 }, { "epoch": 0.003982253002921762, "grad_norm": 1.9296875, "learning_rate": 4.600000000000001e-06, "loss": 0.6787, "step": 23 }, { "epoch": 0.004155394437831404, "grad_norm": 2.203125, "learning_rate": 4.800000000000001e-06, "loss": 0.7318, "step": 24 }, { "epoch": 0.004328535872741046, "grad_norm": 2.28125, "learning_rate": 5e-06, "loss": 0.7237, "step": 25 }, { "epoch": 0.004501677307650687, "grad_norm": 2.03125, "learning_rate": 5.2e-06, "loss": 0.7419, "step": 26 }, { "epoch": 0.004674818742560329, "grad_norm": 1.875, "learning_rate": 5.400000000000001e-06, "loss": 0.7034, "step": 27 }, { "epoch": 0.004847960177469971, "grad_norm": 1.7890625, "learning_rate": 5.600000000000001e-06, "loss": 0.6711, "step": 28 }, { "epoch": 0.005021101612379612, "grad_norm": 1.828125, "learning_rate": 5.8e-06, "loss": 0.6674, "step": 29 }, { "epoch": 0.005194243047289254, "grad_norm": 2.015625, "learning_rate": 6e-06, "loss": 0.7177, "step": 30 }, { "epoch": 0.0053673844821988965, "grad_norm": 1.953125, "learning_rate": 6.200000000000001e-06, "loss": 0.7268, "step": 31 }, { "epoch": 0.005540525917108538, "grad_norm": 1.859375, "learning_rate": 6.4000000000000006e-06, "loss": 0.6421, "step": 32 }, { "epoch": 0.00571366735201818, "grad_norm": 2.015625, "learning_rate": 6.600000000000001e-06, "loss": 0.7475, "step": 33 }, { "epoch": 0.005886808786927822, "grad_norm": 1.9921875, "learning_rate": 6.800000000000001e-06, "loss": 0.7586, "step": 34 }, { "epoch": 0.006059950221837464, "grad_norm": 1.96875, "learning_rate": 7e-06, "loss": 0.7012, "step": 35 }, { "epoch": 0.006233091656747105, "grad_norm": 2.09375, "learning_rate": 7.2000000000000005e-06, "loss": 0.7215, "step": 36 }, { "epoch": 0.006406233091656747, "grad_norm": 1.9375, "learning_rate": 7.4e-06, "loss": 0.6933, "step": 37 }, { "epoch": 0.006579374526566389, "grad_norm": 1.7734375, "learning_rate": 7.600000000000001e-06, "loss": 0.7408, "step": 38 }, { "epoch": 0.0067525159614760305, "grad_norm": 1.6640625, "learning_rate": 7.800000000000002e-06, "loss": 0.6541, "step": 39 }, { "epoch": 0.0069256573963856725, "grad_norm": 1.8359375, "learning_rate": 8.000000000000001e-06, "loss": 0.7495, "step": 40 }, { "epoch": 0.007098798831295315, "grad_norm": 1.6875, "learning_rate": 8.2e-06, "loss": 0.6494, "step": 41 }, { "epoch": 0.007271940266204956, "grad_norm": 1.640625, "learning_rate": 8.400000000000001e-06, "loss": 0.6657, "step": 42 }, { "epoch": 0.007445081701114598, "grad_norm": 1.828125, "learning_rate": 8.6e-06, "loss": 0.6713, "step": 43 }, { "epoch": 0.00761822313602424, "grad_norm": 2.140625, "learning_rate": 8.8e-06, "loss": 0.7265, "step": 44 }, { "epoch": 0.007791364570933882, "grad_norm": 1.578125, "learning_rate": 9e-06, "loss": 0.6876, "step": 45 }, { "epoch": 0.007964506005843523, "grad_norm": 1.6015625, "learning_rate": 9.200000000000002e-06, "loss": 0.6251, "step": 46 }, { "epoch": 0.008137647440753164, "grad_norm": 1.6328125, "learning_rate": 9.4e-06, "loss": 0.6718, "step": 47 }, { "epoch": 0.008310788875662807, "grad_norm": 1.75, "learning_rate": 9.600000000000001e-06, "loss": 0.6638, "step": 48 }, { "epoch": 0.008483930310572449, "grad_norm": 1.75, "learning_rate": 9.800000000000001e-06, "loss": 0.7076, "step": 49 }, { "epoch": 0.008657071745482092, "grad_norm": 1.78125, "learning_rate": 1e-05, "loss": 0.6477, "step": 50 }, { "epoch": 0.008830213180391733, "grad_norm": 1.8203125, "learning_rate": 1.02e-05, "loss": 0.7154, "step": 51 }, { "epoch": 0.009003354615301374, "grad_norm": 1.8046875, "learning_rate": 1.04e-05, "loss": 0.6737, "step": 52 }, { "epoch": 0.009176496050211017, "grad_norm": 2.234375, "learning_rate": 1.0600000000000002e-05, "loss": 0.6486, "step": 53 }, { "epoch": 0.009349637485120658, "grad_norm": 1.7578125, "learning_rate": 1.0800000000000002e-05, "loss": 0.6501, "step": 54 }, { "epoch": 0.0095227789200303, "grad_norm": 1.890625, "learning_rate": 1.1000000000000001e-05, "loss": 0.6693, "step": 55 }, { "epoch": 0.009695920354939942, "grad_norm": 1.9921875, "learning_rate": 1.1200000000000001e-05, "loss": 0.6949, "step": 56 }, { "epoch": 0.009869061789849583, "grad_norm": 1.7578125, "learning_rate": 1.14e-05, "loss": 0.6332, "step": 57 }, { "epoch": 0.010042203224759225, "grad_norm": 1.796875, "learning_rate": 1.16e-05, "loss": 0.6347, "step": 58 }, { "epoch": 0.010215344659668868, "grad_norm": 1.6640625, "learning_rate": 1.18e-05, "loss": 0.6271, "step": 59 }, { "epoch": 0.010388486094578509, "grad_norm": 1.6875, "learning_rate": 1.2e-05, "loss": 0.633, "step": 60 }, { "epoch": 0.01056162752948815, "grad_norm": 1.78125, "learning_rate": 1.22e-05, "loss": 0.6825, "step": 61 }, { "epoch": 0.010734768964397793, "grad_norm": 1.6484375, "learning_rate": 1.2400000000000002e-05, "loss": 0.6528, "step": 62 }, { "epoch": 0.010907910399307434, "grad_norm": 1.640625, "learning_rate": 1.2600000000000001e-05, "loss": 0.6575, "step": 63 }, { "epoch": 0.011081051834217075, "grad_norm": 1.7578125, "learning_rate": 1.2800000000000001e-05, "loss": 0.6672, "step": 64 }, { "epoch": 0.011254193269126718, "grad_norm": 1.71875, "learning_rate": 1.3000000000000001e-05, "loss": 0.7839, "step": 65 }, { "epoch": 0.01142733470403636, "grad_norm": 1.6953125, "learning_rate": 1.3200000000000002e-05, "loss": 0.6609, "step": 66 }, { "epoch": 0.011600476138946, "grad_norm": 1.734375, "learning_rate": 1.3400000000000002e-05, "loss": 0.5698, "step": 67 }, { "epoch": 0.011773617573855644, "grad_norm": 1.953125, "learning_rate": 1.3600000000000002e-05, "loss": 0.6409, "step": 68 }, { "epoch": 0.011946759008765285, "grad_norm": 1.8046875, "learning_rate": 1.38e-05, "loss": 0.6236, "step": 69 }, { "epoch": 0.012119900443674928, "grad_norm": 2.109375, "learning_rate": 1.4e-05, "loss": 0.701, "step": 70 }, { "epoch": 0.012293041878584569, "grad_norm": 2.0625, "learning_rate": 1.4200000000000001e-05, "loss": 0.6458, "step": 71 }, { "epoch": 0.01246618331349421, "grad_norm": 1.828125, "learning_rate": 1.4400000000000001e-05, "loss": 0.676, "step": 72 }, { "epoch": 0.012639324748403853, "grad_norm": 1.8046875, "learning_rate": 1.46e-05, "loss": 0.7682, "step": 73 }, { "epoch": 0.012812466183313494, "grad_norm": 2.15625, "learning_rate": 1.48e-05, "loss": 0.6824, "step": 74 }, { "epoch": 0.012985607618223136, "grad_norm": 2.046875, "learning_rate": 1.5000000000000002e-05, "loss": 0.6144, "step": 75 }, { "epoch": 0.013158749053132779, "grad_norm": 1.796875, "learning_rate": 1.5200000000000002e-05, "loss": 0.6651, "step": 76 }, { "epoch": 0.01333189048804242, "grad_norm": 1.625, "learning_rate": 1.54e-05, "loss": 0.6501, "step": 77 }, { "epoch": 0.013505031922952061, "grad_norm": 1.90625, "learning_rate": 1.5600000000000003e-05, "loss": 0.6763, "step": 78 }, { "epoch": 0.013678173357861704, "grad_norm": 2.203125, "learning_rate": 1.58e-05, "loss": 0.715, "step": 79 }, { "epoch": 0.013851314792771345, "grad_norm": 2.328125, "learning_rate": 1.6000000000000003e-05, "loss": 0.7039, "step": 80 }, { "epoch": 0.014024456227680986, "grad_norm": 1.6875, "learning_rate": 1.62e-05, "loss": 0.6868, "step": 81 }, { "epoch": 0.01419759766259063, "grad_norm": 2.09375, "learning_rate": 1.64e-05, "loss": 0.5994, "step": 82 }, { "epoch": 0.01437073909750027, "grad_norm": 2.015625, "learning_rate": 1.66e-05, "loss": 0.6721, "step": 83 }, { "epoch": 0.014543880532409912, "grad_norm": 1.7890625, "learning_rate": 1.6800000000000002e-05, "loss": 0.6377, "step": 84 }, { "epoch": 0.014717021967319555, "grad_norm": 1.8515625, "learning_rate": 1.7e-05, "loss": 0.6836, "step": 85 }, { "epoch": 0.014890163402229196, "grad_norm": 1.71875, "learning_rate": 1.72e-05, "loss": 0.5657, "step": 86 }, { "epoch": 0.015063304837138837, "grad_norm": 2.21875, "learning_rate": 1.7400000000000003e-05, "loss": 0.7264, "step": 87 }, { "epoch": 0.01523644627204848, "grad_norm": 1.8203125, "learning_rate": 1.76e-05, "loss": 0.7024, "step": 88 }, { "epoch": 0.015409587706958121, "grad_norm": 1.7578125, "learning_rate": 1.7800000000000002e-05, "loss": 0.7023, "step": 89 }, { "epoch": 0.015582729141867764, "grad_norm": 1.84375, "learning_rate": 1.8e-05, "loss": 0.6273, "step": 90 }, { "epoch": 0.015755870576777405, "grad_norm": 1.6953125, "learning_rate": 1.8200000000000002e-05, "loss": 0.6805, "step": 91 }, { "epoch": 0.015929012011687047, "grad_norm": 1.65625, "learning_rate": 1.8400000000000003e-05, "loss": 0.6129, "step": 92 }, { "epoch": 0.016102153446596688, "grad_norm": 1.8046875, "learning_rate": 1.86e-05, "loss": 0.6254, "step": 93 }, { "epoch": 0.01627529488150633, "grad_norm": 1.6328125, "learning_rate": 1.88e-05, "loss": 0.5854, "step": 94 }, { "epoch": 0.016448436316415974, "grad_norm": 1.8046875, "learning_rate": 1.9e-05, "loss": 0.6321, "step": 95 }, { "epoch": 0.016621577751325615, "grad_norm": 1.7265625, "learning_rate": 1.9200000000000003e-05, "loss": 0.696, "step": 96 }, { "epoch": 0.016794719186235256, "grad_norm": 1.7265625, "learning_rate": 1.94e-05, "loss": 0.6804, "step": 97 }, { "epoch": 0.016967860621144897, "grad_norm": 1.7421875, "learning_rate": 1.9600000000000002e-05, "loss": 0.6123, "step": 98 }, { "epoch": 0.01714100205605454, "grad_norm": 1.5859375, "learning_rate": 1.98e-05, "loss": 0.6206, "step": 99 }, { "epoch": 0.017314143490964183, "grad_norm": 1.84375, "learning_rate": 2e-05, "loss": 0.7063, "step": 100 }, { "epoch": 0.017487284925873824, "grad_norm": 1.796875, "learning_rate": 1.999999983367739e-05, "loss": 0.5535, "step": 101 }, { "epoch": 0.017660426360783466, "grad_norm": 1.8125, "learning_rate": 1.9999999334709574e-05, "loss": 0.6926, "step": 102 }, { "epoch": 0.017833567795693107, "grad_norm": 1.7421875, "learning_rate": 1.999999850309656e-05, "loss": 0.6102, "step": 103 }, { "epoch": 0.018006709230602748, "grad_norm": 2.03125, "learning_rate": 1.9999997338838377e-05, "loss": 0.6583, "step": 104 }, { "epoch": 0.01817985066551239, "grad_norm": 1.7109375, "learning_rate": 1.999999584193507e-05, "loss": 0.6786, "step": 105 }, { "epoch": 0.018352992100422034, "grad_norm": 1.90625, "learning_rate": 1.999999401238668e-05, "loss": 0.6418, "step": 106 }, { "epoch": 0.018526133535331675, "grad_norm": 1.6640625, "learning_rate": 1.9999991850193275e-05, "loss": 0.5692, "step": 107 }, { "epoch": 0.018699274970241316, "grad_norm": 1.6328125, "learning_rate": 1.999998935535492e-05, "loss": 0.6631, "step": 108 }, { "epoch": 0.018872416405150957, "grad_norm": 1.8203125, "learning_rate": 1.999998652787171e-05, "loss": 0.6732, "step": 109 }, { "epoch": 0.0190455578400606, "grad_norm": 1.5859375, "learning_rate": 1.9999983367743727e-05, "loss": 0.61, "step": 110 }, { "epoch": 0.01921869927497024, "grad_norm": 1.7265625, "learning_rate": 1.999997987497108e-05, "loss": 0.6419, "step": 111 }, { "epoch": 0.019391840709879885, "grad_norm": 1.765625, "learning_rate": 1.9999976049553883e-05, "loss": 0.6422, "step": 112 }, { "epoch": 0.019564982144789526, "grad_norm": 1.84375, "learning_rate": 1.999997189149227e-05, "loss": 0.6596, "step": 113 }, { "epoch": 0.019738123579699167, "grad_norm": 1.6640625, "learning_rate": 1.9999967400786373e-05, "loss": 0.6332, "step": 114 }, { "epoch": 0.019911265014608808, "grad_norm": 1.7265625, "learning_rate": 1.9999962577436344e-05, "loss": 0.6274, "step": 115 }, { "epoch": 0.02008440644951845, "grad_norm": 2.171875, "learning_rate": 1.9999957421442345e-05, "loss": 0.6282, "step": 116 }, { "epoch": 0.02025754788442809, "grad_norm": 1.6953125, "learning_rate": 1.999995193280454e-05, "loss": 0.6431, "step": 117 }, { "epoch": 0.020430689319337735, "grad_norm": 1.609375, "learning_rate": 1.9999946111523127e-05, "loss": 0.6341, "step": 118 }, { "epoch": 0.020603830754247376, "grad_norm": 1.6015625, "learning_rate": 1.9999939957598284e-05, "loss": 0.5931, "step": 119 }, { "epoch": 0.020776972189157018, "grad_norm": 1.8359375, "learning_rate": 1.999993347103022e-05, "loss": 0.6817, "step": 120 }, { "epoch": 0.02095011362406666, "grad_norm": 1.703125, "learning_rate": 1.9999926651819154e-05, "loss": 0.6795, "step": 121 }, { "epoch": 0.0211232550589763, "grad_norm": 1.6953125, "learning_rate": 1.999991949996531e-05, "loss": 0.6506, "step": 122 }, { "epoch": 0.021296396493885945, "grad_norm": 1.7578125, "learning_rate": 1.9999912015468927e-05, "loss": 0.6239, "step": 123 }, { "epoch": 0.021469537928795586, "grad_norm": 1.7890625, "learning_rate": 1.9999904198330253e-05, "loss": 0.744, "step": 124 }, { "epoch": 0.021642679363705227, "grad_norm": 1.703125, "learning_rate": 1.999989604854955e-05, "loss": 0.6475, "step": 125 }, { "epoch": 0.02181582079861487, "grad_norm": 1.7109375, "learning_rate": 1.999988756612709e-05, "loss": 0.5957, "step": 126 }, { "epoch": 0.02198896223352451, "grad_norm": 1.8046875, "learning_rate": 1.9999878751063153e-05, "loss": 0.7941, "step": 127 }, { "epoch": 0.02216210366843415, "grad_norm": 1.8515625, "learning_rate": 1.999986960335803e-05, "loss": 0.6683, "step": 128 }, { "epoch": 0.022335245103343795, "grad_norm": 1.7265625, "learning_rate": 1.9999860123012032e-05, "loss": 0.6549, "step": 129 }, { "epoch": 0.022508386538253437, "grad_norm": 1.921875, "learning_rate": 1.9999850310025464e-05, "loss": 0.6899, "step": 130 }, { "epoch": 0.022681527973163078, "grad_norm": 1.6796875, "learning_rate": 1.9999840164398664e-05, "loss": 0.698, "step": 131 }, { "epoch": 0.02285466940807272, "grad_norm": 1.75, "learning_rate": 1.999982968613196e-05, "loss": 0.6138, "step": 132 }, { "epoch": 0.02302781084298236, "grad_norm": 1.6640625, "learning_rate": 1.9999818875225707e-05, "loss": 0.625, "step": 133 }, { "epoch": 0.023200952277892, "grad_norm": 1.734375, "learning_rate": 1.999980773168026e-05, "loss": 0.7574, "step": 134 }, { "epoch": 0.023374093712801646, "grad_norm": 1.859375, "learning_rate": 1.9999796255495996e-05, "loss": 0.6534, "step": 135 }, { "epoch": 0.023547235147711287, "grad_norm": 1.703125, "learning_rate": 1.9999784446673288e-05, "loss": 0.5763, "step": 136 }, { "epoch": 0.02372037658262093, "grad_norm": 1.6171875, "learning_rate": 1.999977230521254e-05, "loss": 0.6638, "step": 137 }, { "epoch": 0.02389351801753057, "grad_norm": 1.796875, "learning_rate": 1.999975983111414e-05, "loss": 0.6397, "step": 138 }, { "epoch": 0.02406665945244021, "grad_norm": 1.78125, "learning_rate": 1.9999747024378516e-05, "loss": 0.6052, "step": 139 }, { "epoch": 0.024239800887349856, "grad_norm": 1.5546875, "learning_rate": 1.9999733885006093e-05, "loss": 0.6981, "step": 140 }, { "epoch": 0.024412942322259497, "grad_norm": 1.5625, "learning_rate": 1.9999720412997306e-05, "loss": 0.6907, "step": 141 }, { "epoch": 0.024586083757169138, "grad_norm": 1.703125, "learning_rate": 1.99997066083526e-05, "loss": 0.5899, "step": 142 }, { "epoch": 0.02475922519207878, "grad_norm": 1.7265625, "learning_rate": 1.9999692471072433e-05, "loss": 0.661, "step": 143 }, { "epoch": 0.02493236662698842, "grad_norm": 2.015625, "learning_rate": 1.9999678001157285e-05, "loss": 0.649, "step": 144 }, { "epoch": 0.02510550806189806, "grad_norm": 1.890625, "learning_rate": 1.999966319860763e-05, "loss": 0.6599, "step": 145 }, { "epoch": 0.025278649496807706, "grad_norm": 1.671875, "learning_rate": 1.999964806342396e-05, "loss": 0.5843, "step": 146 }, { "epoch": 0.025451790931717348, "grad_norm": 1.625, "learning_rate": 1.999963259560678e-05, "loss": 0.6026, "step": 147 }, { "epoch": 0.02562493236662699, "grad_norm": 1.6328125, "learning_rate": 1.9999616795156605e-05, "loss": 0.6551, "step": 148 }, { "epoch": 0.02579807380153663, "grad_norm": 1.6875, "learning_rate": 1.9999600662073963e-05, "loss": 0.5603, "step": 149 }, { "epoch": 0.02597121523644627, "grad_norm": 1.5625, "learning_rate": 1.9999584196359385e-05, "loss": 0.5993, "step": 150 }, { "epoch": 0.026144356671355912, "grad_norm": 1.6640625, "learning_rate": 1.9999567398013423e-05, "loss": 0.8228, "step": 151 }, { "epoch": 0.026317498106265557, "grad_norm": 1.7265625, "learning_rate": 1.9999550267036634e-05, "loss": 0.7508, "step": 152 }, { "epoch": 0.026490639541175198, "grad_norm": 1.578125, "learning_rate": 1.999953280342959e-05, "loss": 0.6037, "step": 153 }, { "epoch": 0.02666378097608484, "grad_norm": 1.8515625, "learning_rate": 1.9999515007192866e-05, "loss": 0.6771, "step": 154 }, { "epoch": 0.02683692241099448, "grad_norm": 1.671875, "learning_rate": 1.9999496878327064e-05, "loss": 0.7258, "step": 155 }, { "epoch": 0.027010063845904122, "grad_norm": 1.8046875, "learning_rate": 1.9999478416832778e-05, "loss": 0.7064, "step": 156 }, { "epoch": 0.027183205280813763, "grad_norm": 1.578125, "learning_rate": 1.9999459622710625e-05, "loss": 0.6245, "step": 157 }, { "epoch": 0.027356346715723408, "grad_norm": 1.75, "learning_rate": 1.999944049596123e-05, "loss": 0.679, "step": 158 }, { "epoch": 0.02752948815063305, "grad_norm": 1.8515625, "learning_rate": 1.999942103658523e-05, "loss": 0.6755, "step": 159 }, { "epoch": 0.02770262958554269, "grad_norm": 2.03125, "learning_rate": 1.9999401244583274e-05, "loss": 0.7129, "step": 160 }, { "epoch": 0.02787577102045233, "grad_norm": 1.53125, "learning_rate": 1.9999381119956015e-05, "loss": 0.6356, "step": 161 }, { "epoch": 0.028048912455361973, "grad_norm": 1.6953125, "learning_rate": 1.9999360662704132e-05, "loss": 0.6605, "step": 162 }, { "epoch": 0.028222053890271617, "grad_norm": 1.7578125, "learning_rate": 1.9999339872828292e-05, "loss": 0.6119, "step": 163 }, { "epoch": 0.02839519532518126, "grad_norm": 1.734375, "learning_rate": 1.99993187503292e-05, "loss": 0.6565, "step": 164 }, { "epoch": 0.0285683367600909, "grad_norm": 1.6015625, "learning_rate": 1.999929729520755e-05, "loss": 0.6351, "step": 165 }, { "epoch": 0.02874147819500054, "grad_norm": 1.71875, "learning_rate": 1.999927550746406e-05, "loss": 0.6832, "step": 166 }, { "epoch": 0.028914619629910182, "grad_norm": 1.640625, "learning_rate": 1.9999253387099452e-05, "loss": 0.6671, "step": 167 }, { "epoch": 0.029087761064819823, "grad_norm": 1.6484375, "learning_rate": 1.9999230934114462e-05, "loss": 0.6566, "step": 168 }, { "epoch": 0.029260902499729468, "grad_norm": 1.65625, "learning_rate": 1.9999208148509838e-05, "loss": 0.7451, "step": 169 }, { "epoch": 0.02943404393463911, "grad_norm": 1.578125, "learning_rate": 1.999918503028634e-05, "loss": 0.6389, "step": 170 }, { "epoch": 0.02960718536954875, "grad_norm": 1.546875, "learning_rate": 1.999916157944473e-05, "loss": 0.6776, "step": 171 }, { "epoch": 0.02978032680445839, "grad_norm": 1.8828125, "learning_rate": 1.9999137795985796e-05, "loss": 0.7287, "step": 172 }, { "epoch": 0.029953468239368033, "grad_norm": 1.71875, "learning_rate": 1.9999113679910326e-05, "loss": 0.6642, "step": 173 }, { "epoch": 0.030126609674277674, "grad_norm": 1.59375, "learning_rate": 1.9999089231219123e-05, "loss": 0.656, "step": 174 }, { "epoch": 0.03029975110918732, "grad_norm": 1.59375, "learning_rate": 1.9999064449912996e-05, "loss": 0.7082, "step": 175 }, { "epoch": 0.03047289254409696, "grad_norm": 1.734375, "learning_rate": 1.999903933599278e-05, "loss": 0.5939, "step": 176 }, { "epoch": 0.0306460339790066, "grad_norm": 1.5859375, "learning_rate": 1.9999013889459294e-05, "loss": 0.7187, "step": 177 }, { "epoch": 0.030819175413916242, "grad_norm": 1.6796875, "learning_rate": 1.99989881103134e-05, "loss": 0.6606, "step": 178 }, { "epoch": 0.030992316848825884, "grad_norm": 1.9609375, "learning_rate": 1.9998961998555947e-05, "loss": 0.7929, "step": 179 }, { "epoch": 0.031165458283735528, "grad_norm": 1.65625, "learning_rate": 1.9998935554187808e-05, "loss": 0.5974, "step": 180 }, { "epoch": 0.031338599718645166, "grad_norm": 1.46875, "learning_rate": 1.9998908777209854e-05, "loss": 0.6009, "step": 181 }, { "epoch": 0.03151174115355481, "grad_norm": 1.578125, "learning_rate": 1.999888166762299e-05, "loss": 0.5849, "step": 182 }, { "epoch": 0.031684882588464455, "grad_norm": 1.71875, "learning_rate": 1.9998854225428105e-05, "loss": 0.6923, "step": 183 }, { "epoch": 0.03185802402337409, "grad_norm": 1.640625, "learning_rate": 1.999882645062612e-05, "loss": 0.6815, "step": 184 }, { "epoch": 0.03203116545828374, "grad_norm": 1.609375, "learning_rate": 1.9998798343217954e-05, "loss": 0.6081, "step": 185 }, { "epoch": 0.032204306893193375, "grad_norm": 1.59375, "learning_rate": 1.999876990320454e-05, "loss": 0.6306, "step": 186 }, { "epoch": 0.03237744832810302, "grad_norm": 1.5078125, "learning_rate": 1.9998741130586837e-05, "loss": 0.5962, "step": 187 }, { "epoch": 0.03255058976301266, "grad_norm": 2.015625, "learning_rate": 1.9998712025365786e-05, "loss": 0.6628, "step": 188 }, { "epoch": 0.0327237311979223, "grad_norm": 1.8515625, "learning_rate": 1.999868258754236e-05, "loss": 0.6227, "step": 189 }, { "epoch": 0.03289687263283195, "grad_norm": 1.6875, "learning_rate": 1.9998652817117547e-05, "loss": 0.6843, "step": 190 }, { "epoch": 0.033070014067741585, "grad_norm": 1.625, "learning_rate": 1.9998622714092328e-05, "loss": 0.608, "step": 191 }, { "epoch": 0.03324315550265123, "grad_norm": 1.59375, "learning_rate": 1.9998592278467702e-05, "loss": 0.7089, "step": 192 }, { "epoch": 0.03341629693756087, "grad_norm": 1.5859375, "learning_rate": 1.9998561510244694e-05, "loss": 0.6328, "step": 193 }, { "epoch": 0.03358943837247051, "grad_norm": 1.71875, "learning_rate": 1.9998530409424316e-05, "loss": 0.6686, "step": 194 }, { "epoch": 0.03376257980738016, "grad_norm": 1.625, "learning_rate": 1.9998498976007608e-05, "loss": 0.6553, "step": 195 }, { "epoch": 0.033935721242289794, "grad_norm": 1.671875, "learning_rate": 1.999846720999561e-05, "loss": 0.6402, "step": 196 }, { "epoch": 0.03410886267719944, "grad_norm": 1.7890625, "learning_rate": 1.9998435111389387e-05, "loss": 0.643, "step": 197 }, { "epoch": 0.03428200411210908, "grad_norm": 1.6640625, "learning_rate": 1.999840268019e-05, "loss": 0.6818, "step": 198 }, { "epoch": 0.03445514554701872, "grad_norm": 1.609375, "learning_rate": 1.9998369916398532e-05, "loss": 0.6872, "step": 199 }, { "epoch": 0.034628286981928366, "grad_norm": 1.7421875, "learning_rate": 1.9998336820016072e-05, "loss": 0.6137, "step": 200 }, { "epoch": 0.034801428416838004, "grad_norm": 1.5546875, "learning_rate": 1.9998303391043717e-05, "loss": 0.6047, "step": 201 }, { "epoch": 0.03497456985174765, "grad_norm": 1.578125, "learning_rate": 1.9998269629482584e-05, "loss": 0.699, "step": 202 }, { "epoch": 0.035147711286657286, "grad_norm": 1.7421875, "learning_rate": 1.9998235535333795e-05, "loss": 0.672, "step": 203 }, { "epoch": 0.03532085272156693, "grad_norm": 1.7421875, "learning_rate": 1.9998201108598477e-05, "loss": 0.6095, "step": 204 }, { "epoch": 0.03549399415647657, "grad_norm": 1.6953125, "learning_rate": 1.999816634927779e-05, "loss": 0.6847, "step": 205 }, { "epoch": 0.03566713559138621, "grad_norm": 1.515625, "learning_rate": 1.9998131257372878e-05, "loss": 0.6417, "step": 206 }, { "epoch": 0.03584027702629586, "grad_norm": 1.703125, "learning_rate": 1.999809583288491e-05, "loss": 0.6537, "step": 207 }, { "epoch": 0.036013418461205496, "grad_norm": 1.7265625, "learning_rate": 1.9998060075815068e-05, "loss": 0.6657, "step": 208 }, { "epoch": 0.03618655989611514, "grad_norm": 1.59375, "learning_rate": 1.999802398616454e-05, "loss": 0.6094, "step": 209 }, { "epoch": 0.03635970133102478, "grad_norm": 1.703125, "learning_rate": 1.9997987563934528e-05, "loss": 0.5829, "step": 210 }, { "epoch": 0.03653284276593442, "grad_norm": 1.6640625, "learning_rate": 1.999795080912624e-05, "loss": 0.656, "step": 211 }, { "epoch": 0.03670598420084407, "grad_norm": 1.671875, "learning_rate": 1.99979137217409e-05, "loss": 0.675, "step": 212 }, { "epoch": 0.036879125635753705, "grad_norm": 1.671875, "learning_rate": 1.9997876301779745e-05, "loss": 0.6832, "step": 213 }, { "epoch": 0.03705226707066335, "grad_norm": 1.6328125, "learning_rate": 1.9997838549244014e-05, "loss": 0.5866, "step": 214 }, { "epoch": 0.03722540850557299, "grad_norm": 1.6875, "learning_rate": 1.9997800464134966e-05, "loss": 0.6462, "step": 215 }, { "epoch": 0.03739854994048263, "grad_norm": 1.75, "learning_rate": 1.9997762046453868e-05, "loss": 0.6815, "step": 216 }, { "epoch": 0.03757169137539227, "grad_norm": 1.65625, "learning_rate": 1.9997723296201997e-05, "loss": 0.7094, "step": 217 }, { "epoch": 0.037744832810301915, "grad_norm": 1.9296875, "learning_rate": 1.9997684213380647e-05, "loss": 0.6586, "step": 218 }, { "epoch": 0.03791797424521156, "grad_norm": 1.5, "learning_rate": 1.999764479799111e-05, "loss": 0.6218, "step": 219 }, { "epoch": 0.0380911156801212, "grad_norm": 1.6171875, "learning_rate": 1.99976050500347e-05, "loss": 0.6245, "step": 220 }, { "epoch": 0.03826425711503084, "grad_norm": 1.5703125, "learning_rate": 1.999756496951274e-05, "loss": 0.6558, "step": 221 }, { "epoch": 0.03843739854994048, "grad_norm": 1.546875, "learning_rate": 1.9997524556426567e-05, "loss": 0.6509, "step": 222 }, { "epoch": 0.038610539984850124, "grad_norm": 1.609375, "learning_rate": 1.9997483810777518e-05, "loss": 0.6358, "step": 223 }, { "epoch": 0.03878368141975977, "grad_norm": 1.7578125, "learning_rate": 1.9997442732566955e-05, "loss": 0.7169, "step": 224 }, { "epoch": 0.03895682285466941, "grad_norm": 1.8125, "learning_rate": 1.9997401321796238e-05, "loss": 0.6781, "step": 225 }, { "epoch": 0.03912996428957905, "grad_norm": 1.59375, "learning_rate": 1.999735957846675e-05, "loss": 0.5883, "step": 226 }, { "epoch": 0.03930310572448869, "grad_norm": 1.84375, "learning_rate": 1.9997317502579876e-05, "loss": 0.6647, "step": 227 }, { "epoch": 0.039476247159398334, "grad_norm": 1.5234375, "learning_rate": 1.999727509413702e-05, "loss": 0.6122, "step": 228 }, { "epoch": 0.03964938859430798, "grad_norm": 1.5546875, "learning_rate": 1.9997232353139586e-05, "loss": 0.6474, "step": 229 }, { "epoch": 0.039822530029217616, "grad_norm": 1.7890625, "learning_rate": 1.9997189279589003e-05, "loss": 0.6589, "step": 230 }, { "epoch": 0.03999567146412726, "grad_norm": 1.734375, "learning_rate": 1.99971458734867e-05, "loss": 0.6493, "step": 231 }, { "epoch": 0.0401688128990369, "grad_norm": 1.6796875, "learning_rate": 1.9997102134834117e-05, "loss": 0.7087, "step": 232 }, { "epoch": 0.04034195433394654, "grad_norm": 1.4765625, "learning_rate": 1.999705806363272e-05, "loss": 0.5922, "step": 233 }, { "epoch": 0.04051509576885618, "grad_norm": 1.5625, "learning_rate": 1.9997013659883966e-05, "loss": 0.6728, "step": 234 }, { "epoch": 0.040688237203765826, "grad_norm": 1.640625, "learning_rate": 1.9996968923589334e-05, "loss": 0.6858, "step": 235 }, { "epoch": 0.04086137863867547, "grad_norm": 1.578125, "learning_rate": 1.9996923854750313e-05, "loss": 0.6773, "step": 236 }, { "epoch": 0.04103452007358511, "grad_norm": 1.7265625, "learning_rate": 1.99968784533684e-05, "loss": 0.6673, "step": 237 }, { "epoch": 0.04120766150849475, "grad_norm": 1.5078125, "learning_rate": 1.9996832719445105e-05, "loss": 0.6232, "step": 238 }, { "epoch": 0.04138080294340439, "grad_norm": 1.6171875, "learning_rate": 1.9996786652981954e-05, "loss": 0.6204, "step": 239 }, { "epoch": 0.041553944378314035, "grad_norm": 1.6328125, "learning_rate": 1.9996740253980475e-05, "loss": 0.7474, "step": 240 }, { "epoch": 0.04172708581322368, "grad_norm": 1.6015625, "learning_rate": 1.9996693522442216e-05, "loss": 0.5699, "step": 241 }, { "epoch": 0.04190022724813332, "grad_norm": 1.5390625, "learning_rate": 1.999664645836873e-05, "loss": 0.6295, "step": 242 }, { "epoch": 0.04207336868304296, "grad_norm": 1.6171875, "learning_rate": 1.9996599061761575e-05, "loss": 0.6586, "step": 243 }, { "epoch": 0.0422465101179526, "grad_norm": 1.59375, "learning_rate": 1.9996551332622337e-05, "loss": 0.672, "step": 244 }, { "epoch": 0.042419651552862245, "grad_norm": 1.484375, "learning_rate": 1.9996503270952598e-05, "loss": 0.6227, "step": 245 }, { "epoch": 0.04259279298777189, "grad_norm": 1.6015625, "learning_rate": 1.9996454876753963e-05, "loss": 0.5908, "step": 246 }, { "epoch": 0.04276593442268153, "grad_norm": 1.5703125, "learning_rate": 1.9996406150028038e-05, "loss": 0.6829, "step": 247 }, { "epoch": 0.04293907585759117, "grad_norm": 1.6796875, "learning_rate": 1.9996357090776443e-05, "loss": 0.7351, "step": 248 }, { "epoch": 0.04311221729250081, "grad_norm": 1.53125, "learning_rate": 1.999630769900081e-05, "loss": 0.5962, "step": 249 }, { "epoch": 0.043285358727410454, "grad_norm": 1.625, "learning_rate": 1.999625797470278e-05, "loss": 0.6305, "step": 250 }, { "epoch": 0.04345850016232009, "grad_norm": 1.53125, "learning_rate": 1.9996207917884013e-05, "loss": 0.608, "step": 251 }, { "epoch": 0.04363164159722974, "grad_norm": 1.8671875, "learning_rate": 1.9996157528546173e-05, "loss": 0.8362, "step": 252 }, { "epoch": 0.04380478303213938, "grad_norm": 1.5859375, "learning_rate": 1.9996106806690933e-05, "loss": 0.6149, "step": 253 }, { "epoch": 0.04397792446704902, "grad_norm": 1.59375, "learning_rate": 1.999605575231998e-05, "loss": 0.6492, "step": 254 }, { "epoch": 0.044151065901958664, "grad_norm": 1.6875, "learning_rate": 1.999600436543501e-05, "loss": 0.6737, "step": 255 }, { "epoch": 0.0443242073368683, "grad_norm": 1.59375, "learning_rate": 1.9995952646037743e-05, "loss": 0.656, "step": 256 }, { "epoch": 0.044497348771777946, "grad_norm": 1.4921875, "learning_rate": 1.9995900594129888e-05, "loss": 0.6187, "step": 257 }, { "epoch": 0.04467049020668759, "grad_norm": 1.5546875, "learning_rate": 1.999584820971318e-05, "loss": 0.5761, "step": 258 }, { "epoch": 0.04484363164159723, "grad_norm": 1.546875, "learning_rate": 1.9995795492789368e-05, "loss": 0.5709, "step": 259 }, { "epoch": 0.04501677307650687, "grad_norm": 1.7265625, "learning_rate": 1.9995742443360196e-05, "loss": 0.698, "step": 260 }, { "epoch": 0.04518991451141651, "grad_norm": 1.6796875, "learning_rate": 1.9995689061427435e-05, "loss": 0.6538, "step": 261 }, { "epoch": 0.045363055946326156, "grad_norm": 1.515625, "learning_rate": 1.9995635346992855e-05, "loss": 0.7159, "step": 262 }, { "epoch": 0.0455361973812358, "grad_norm": 1.625, "learning_rate": 1.999558130005825e-05, "loss": 0.6543, "step": 263 }, { "epoch": 0.04570933881614544, "grad_norm": 1.7109375, "learning_rate": 1.9995526920625412e-05, "loss": 0.6662, "step": 264 }, { "epoch": 0.04588248025105508, "grad_norm": 1.7265625, "learning_rate": 1.9995472208696154e-05, "loss": 0.6835, "step": 265 }, { "epoch": 0.04605562168596472, "grad_norm": 1.609375, "learning_rate": 1.999541716427229e-05, "loss": 0.7075, "step": 266 }, { "epoch": 0.046228763120874365, "grad_norm": 1.7890625, "learning_rate": 1.999536178735566e-05, "loss": 0.7623, "step": 267 }, { "epoch": 0.046401904555784, "grad_norm": 1.5546875, "learning_rate": 1.9995306077948096e-05, "loss": 0.5804, "step": 268 }, { "epoch": 0.04657504599069365, "grad_norm": 1.671875, "learning_rate": 1.9995250036051462e-05, "loss": 0.7453, "step": 269 }, { "epoch": 0.04674818742560329, "grad_norm": 1.671875, "learning_rate": 1.9995193661667612e-05, "loss": 0.7901, "step": 270 }, { "epoch": 0.04692132886051293, "grad_norm": 1.5, "learning_rate": 1.9995136954798428e-05, "loss": 0.6165, "step": 271 }, { "epoch": 0.047094470295422575, "grad_norm": 1.6953125, "learning_rate": 1.9995079915445793e-05, "loss": 0.6308, "step": 272 }, { "epoch": 0.04726761173033221, "grad_norm": 1.53125, "learning_rate": 1.9995022543611605e-05, "loss": 0.6205, "step": 273 }, { "epoch": 0.04744075316524186, "grad_norm": 1.640625, "learning_rate": 1.9994964839297774e-05, "loss": 0.5856, "step": 274 }, { "epoch": 0.0476138946001515, "grad_norm": 1.7109375, "learning_rate": 1.9994906802506218e-05, "loss": 0.6507, "step": 275 }, { "epoch": 0.04778703603506114, "grad_norm": 1.625, "learning_rate": 1.999484843323887e-05, "loss": 0.5779, "step": 276 }, { "epoch": 0.047960177469970784, "grad_norm": 1.515625, "learning_rate": 1.9994789731497667e-05, "loss": 0.604, "step": 277 }, { "epoch": 0.04813331890488042, "grad_norm": 1.6640625, "learning_rate": 1.999473069728456e-05, "loss": 0.7249, "step": 278 }, { "epoch": 0.04830646033979007, "grad_norm": 1.625, "learning_rate": 1.9994671330601524e-05, "loss": 0.635, "step": 279 }, { "epoch": 0.04847960177469971, "grad_norm": 1.515625, "learning_rate": 1.9994611631450526e-05, "loss": 0.6255, "step": 280 }, { "epoch": 0.04865274320960935, "grad_norm": 1.671875, "learning_rate": 1.999455159983355e-05, "loss": 0.7069, "step": 281 }, { "epoch": 0.048825884644518994, "grad_norm": 1.5546875, "learning_rate": 1.9994491235752595e-05, "loss": 0.639, "step": 282 }, { "epoch": 0.04899902607942863, "grad_norm": 1.75, "learning_rate": 1.9994430539209673e-05, "loss": 0.708, "step": 283 }, { "epoch": 0.049172167514338276, "grad_norm": 1.5078125, "learning_rate": 1.9994369510206797e-05, "loss": 0.6208, "step": 284 }, { "epoch": 0.049345308949247914, "grad_norm": 1.4140625, "learning_rate": 1.9994308148746004e-05, "loss": 0.5993, "step": 285 }, { "epoch": 0.04951845038415756, "grad_norm": 1.515625, "learning_rate": 1.9994246454829325e-05, "loss": 0.6201, "step": 286 }, { "epoch": 0.0496915918190672, "grad_norm": 1.46875, "learning_rate": 1.9994184428458818e-05, "loss": 0.6839, "step": 287 }, { "epoch": 0.04986473325397684, "grad_norm": 1.5078125, "learning_rate": 1.999412206963655e-05, "loss": 0.6454, "step": 288 }, { "epoch": 0.050037874688886486, "grad_norm": 1.59375, "learning_rate": 1.9994059378364593e-05, "loss": 0.6076, "step": 289 }, { "epoch": 0.05021101612379612, "grad_norm": 1.546875, "learning_rate": 1.9993996354645025e-05, "loss": 0.5897, "step": 290 }, { "epoch": 0.05038415755870577, "grad_norm": 1.59375, "learning_rate": 1.9993932998479954e-05, "loss": 0.62, "step": 291 }, { "epoch": 0.05055729899361541, "grad_norm": 1.6484375, "learning_rate": 1.999386930987148e-05, "loss": 0.6972, "step": 292 }, { "epoch": 0.05073044042852505, "grad_norm": 1.6328125, "learning_rate": 1.9993805288821723e-05, "loss": 0.6228, "step": 293 }, { "epoch": 0.050903581863434695, "grad_norm": 1.546875, "learning_rate": 1.9993740935332816e-05, "loss": 0.5862, "step": 294 }, { "epoch": 0.05107672329834433, "grad_norm": 1.6328125, "learning_rate": 1.9993676249406895e-05, "loss": 0.6093, "step": 295 }, { "epoch": 0.05124986473325398, "grad_norm": 1.5, "learning_rate": 1.9993611231046113e-05, "loss": 0.6175, "step": 296 }, { "epoch": 0.051423006168163615, "grad_norm": 1.484375, "learning_rate": 1.9993545880252635e-05, "loss": 0.6673, "step": 297 }, { "epoch": 0.05159614760307326, "grad_norm": 1.6171875, "learning_rate": 1.9993480197028635e-05, "loss": 0.6127, "step": 298 }, { "epoch": 0.051769289037982905, "grad_norm": 1.5, "learning_rate": 1.9993414181376292e-05, "loss": 0.5749, "step": 299 }, { "epoch": 0.05194243047289254, "grad_norm": 1.5234375, "learning_rate": 1.999334783329781e-05, "loss": 0.5923, "step": 300 }, { "epoch": 0.05211557190780219, "grad_norm": 1.640625, "learning_rate": 1.999328115279539e-05, "loss": 0.6116, "step": 301 }, { "epoch": 0.052288713342711825, "grad_norm": 1.515625, "learning_rate": 1.9993214139871254e-05, "loss": 0.6257, "step": 302 }, { "epoch": 0.05246185477762147, "grad_norm": 1.59375, "learning_rate": 1.9993146794527627e-05, "loss": 0.731, "step": 303 }, { "epoch": 0.052634996212531114, "grad_norm": 1.6015625, "learning_rate": 1.9993079116766754e-05, "loss": 0.681, "step": 304 }, { "epoch": 0.05280813764744075, "grad_norm": 1.4921875, "learning_rate": 1.9993011106590884e-05, "loss": 0.6024, "step": 305 }, { "epoch": 0.052981279082350397, "grad_norm": 1.65625, "learning_rate": 1.9992942764002276e-05, "loss": 0.6184, "step": 306 }, { "epoch": 0.053154420517260034, "grad_norm": 1.7578125, "learning_rate": 1.9992874089003212e-05, "loss": 0.63, "step": 307 }, { "epoch": 0.05332756195216968, "grad_norm": 1.8046875, "learning_rate": 1.999280508159597e-05, "loss": 0.7131, "step": 308 }, { "epoch": 0.053500703387079324, "grad_norm": 1.5859375, "learning_rate": 1.9992735741782844e-05, "loss": 0.6558, "step": 309 }, { "epoch": 0.05367384482198896, "grad_norm": 1.4375, "learning_rate": 1.9992666069566145e-05, "loss": 0.6547, "step": 310 }, { "epoch": 0.053846986256898606, "grad_norm": 1.5859375, "learning_rate": 1.9992596064948187e-05, "loss": 0.6336, "step": 311 }, { "epoch": 0.054020127691808244, "grad_norm": 1.5, "learning_rate": 1.9992525727931303e-05, "loss": 0.6664, "step": 312 }, { "epoch": 0.05419326912671789, "grad_norm": 1.609375, "learning_rate": 1.999245505851783e-05, "loss": 0.6851, "step": 313 }, { "epoch": 0.054366410561627526, "grad_norm": 1.6640625, "learning_rate": 1.9992384056710118e-05, "loss": 0.6559, "step": 314 }, { "epoch": 0.05453955199653717, "grad_norm": 1.5078125, "learning_rate": 1.999231272251053e-05, "loss": 0.6138, "step": 315 }, { "epoch": 0.054712693431446816, "grad_norm": 1.625, "learning_rate": 1.999224105592144e-05, "loss": 0.6651, "step": 316 }, { "epoch": 0.05488583486635645, "grad_norm": 1.625, "learning_rate": 1.999216905694523e-05, "loss": 0.6831, "step": 317 }, { "epoch": 0.0550589763012661, "grad_norm": 1.421875, "learning_rate": 1.9992096725584296e-05, "loss": 0.5828, "step": 318 }, { "epoch": 0.055232117736175736, "grad_norm": 1.5625, "learning_rate": 1.9992024061841042e-05, "loss": 0.604, "step": 319 }, { "epoch": 0.05540525917108538, "grad_norm": 1.765625, "learning_rate": 1.999195106571789e-05, "loss": 0.6188, "step": 320 }, { "epoch": 0.055578400605995025, "grad_norm": 1.4453125, "learning_rate": 1.999187773721726e-05, "loss": 0.6098, "step": 321 }, { "epoch": 0.05575154204090466, "grad_norm": 1.703125, "learning_rate": 1.99918040763416e-05, "loss": 0.6271, "step": 322 }, { "epoch": 0.05592468347581431, "grad_norm": 1.6328125, "learning_rate": 1.999173008309336e-05, "loss": 0.6369, "step": 323 }, { "epoch": 0.056097824910723945, "grad_norm": 1.5390625, "learning_rate": 1.9991655757474992e-05, "loss": 0.5877, "step": 324 }, { "epoch": 0.05627096634563359, "grad_norm": 1.5703125, "learning_rate": 1.9991581099488978e-05, "loss": 0.6568, "step": 325 }, { "epoch": 0.056444107780543235, "grad_norm": 1.5625, "learning_rate": 1.9991506109137796e-05, "loss": 0.5798, "step": 326 }, { "epoch": 0.05661724921545287, "grad_norm": 1.703125, "learning_rate": 1.9991430786423944e-05, "loss": 0.6227, "step": 327 }, { "epoch": 0.05679039065036252, "grad_norm": 1.5546875, "learning_rate": 1.9991355131349924e-05, "loss": 0.7054, "step": 328 }, { "epoch": 0.056963532085272155, "grad_norm": 1.421875, "learning_rate": 1.9991279143918255e-05, "loss": 0.5581, "step": 329 }, { "epoch": 0.0571366735201818, "grad_norm": 1.6875, "learning_rate": 1.9991202824131465e-05, "loss": 0.6396, "step": 330 }, { "epoch": 0.05730981495509144, "grad_norm": 1.5, "learning_rate": 1.999112617199209e-05, "loss": 0.6782, "step": 331 }, { "epoch": 0.05748295639000108, "grad_norm": 1.6328125, "learning_rate": 1.9991049187502686e-05, "loss": 0.6421, "step": 332 }, { "epoch": 0.057656097824910726, "grad_norm": 1.5234375, "learning_rate": 1.9990971870665806e-05, "loss": 0.6269, "step": 333 }, { "epoch": 0.057829239259820364, "grad_norm": 1.671875, "learning_rate": 1.9990894221484027e-05, "loss": 0.5864, "step": 334 }, { "epoch": 0.05800238069473001, "grad_norm": 1.578125, "learning_rate": 1.999081623995993e-05, "loss": 0.642, "step": 335 }, { "epoch": 0.05817552212963965, "grad_norm": 1.390625, "learning_rate": 1.999073792609611e-05, "loss": 0.6356, "step": 336 }, { "epoch": 0.05834866356454929, "grad_norm": 1.4765625, "learning_rate": 1.9990659279895172e-05, "loss": 0.5975, "step": 337 }, { "epoch": 0.058521804999458936, "grad_norm": 1.5546875, "learning_rate": 1.9990580301359733e-05, "loss": 0.6254, "step": 338 }, { "epoch": 0.058694946434368574, "grad_norm": 1.578125, "learning_rate": 1.9990500990492415e-05, "loss": 0.6154, "step": 339 }, { "epoch": 0.05886808786927822, "grad_norm": 1.734375, "learning_rate": 1.9990421347295867e-05, "loss": 0.7123, "step": 340 }, { "epoch": 0.059041229304187856, "grad_norm": 1.671875, "learning_rate": 1.9990341371772725e-05, "loss": 0.641, "step": 341 }, { "epoch": 0.0592143707390975, "grad_norm": 1.4140625, "learning_rate": 1.999026106392566e-05, "loss": 0.6278, "step": 342 }, { "epoch": 0.059387512174007145, "grad_norm": 1.5546875, "learning_rate": 1.999018042375733e-05, "loss": 0.6336, "step": 343 }, { "epoch": 0.05956065360891678, "grad_norm": 1.546875, "learning_rate": 1.9990099451270437e-05, "loss": 0.5982, "step": 344 }, { "epoch": 0.05973379504382643, "grad_norm": 1.484375, "learning_rate": 1.999001814646766e-05, "loss": 0.6291, "step": 345 }, { "epoch": 0.059906936478736066, "grad_norm": 1.640625, "learning_rate": 1.9989936509351708e-05, "loss": 0.6419, "step": 346 }, { "epoch": 0.06008007791364571, "grad_norm": 1.578125, "learning_rate": 1.9989854539925296e-05, "loss": 0.6735, "step": 347 }, { "epoch": 0.06025321934855535, "grad_norm": 1.484375, "learning_rate": 1.9989772238191153e-05, "loss": 0.5943, "step": 348 }, { "epoch": 0.06042636078346499, "grad_norm": 1.5546875, "learning_rate": 1.9989689604152016e-05, "loss": 0.6041, "step": 349 }, { "epoch": 0.06059950221837464, "grad_norm": 1.8203125, "learning_rate": 1.998960663781063e-05, "loss": 0.697, "step": 350 }, { "epoch": 0.060772643653284275, "grad_norm": 1.5390625, "learning_rate": 1.9989523339169758e-05, "loss": 0.706, "step": 351 }, { "epoch": 0.06094578508819392, "grad_norm": 1.75, "learning_rate": 1.998943970823217e-05, "loss": 0.6454, "step": 352 }, { "epoch": 0.06111892652310356, "grad_norm": 1.5390625, "learning_rate": 1.9989355745000648e-05, "loss": 0.6, "step": 353 }, { "epoch": 0.0612920679580132, "grad_norm": 1.59375, "learning_rate": 1.9989271449477985e-05, "loss": 0.6122, "step": 354 }, { "epoch": 0.06146520939292285, "grad_norm": 1.8515625, "learning_rate": 1.9989186821666987e-05, "loss": 0.6864, "step": 355 }, { "epoch": 0.061638350827832485, "grad_norm": 1.546875, "learning_rate": 1.9989101861570466e-05, "loss": 0.656, "step": 356 }, { "epoch": 0.06181149226274213, "grad_norm": 1.5625, "learning_rate": 1.9989016569191248e-05, "loss": 0.6411, "step": 357 }, { "epoch": 0.06198463369765177, "grad_norm": 1.6640625, "learning_rate": 1.9988930944532175e-05, "loss": 0.5897, "step": 358 }, { "epoch": 0.06215777513256141, "grad_norm": 1.5546875, "learning_rate": 1.998884498759609e-05, "loss": 0.6202, "step": 359 }, { "epoch": 0.062330916567471056, "grad_norm": 1.6015625, "learning_rate": 1.9988758698385854e-05, "loss": 0.6355, "step": 360 }, { "epoch": 0.0625040580023807, "grad_norm": 1.765625, "learning_rate": 1.9988672076904338e-05, "loss": 0.6146, "step": 361 }, { "epoch": 0.06267719943729033, "grad_norm": 1.78125, "learning_rate": 1.9988585123154426e-05, "loss": 0.6277, "step": 362 }, { "epoch": 0.06285034087219998, "grad_norm": 1.6953125, "learning_rate": 1.9988497837139005e-05, "loss": 0.6867, "step": 363 }, { "epoch": 0.06302348230710962, "grad_norm": 1.78125, "learning_rate": 1.998841021886098e-05, "loss": 0.694, "step": 364 }, { "epoch": 0.06319662374201926, "grad_norm": 1.5859375, "learning_rate": 1.998832226832327e-05, "loss": 0.6258, "step": 365 }, { "epoch": 0.06336976517692891, "grad_norm": 1.5234375, "learning_rate": 1.9988233985528794e-05, "loss": 0.6099, "step": 366 }, { "epoch": 0.06354290661183855, "grad_norm": 1.703125, "learning_rate": 1.998814537048049e-05, "loss": 0.6526, "step": 367 }, { "epoch": 0.06371604804674819, "grad_norm": 1.59375, "learning_rate": 1.9988056423181312e-05, "loss": 0.6763, "step": 368 }, { "epoch": 0.06388918948165782, "grad_norm": 1.5234375, "learning_rate": 1.9987967143634215e-05, "loss": 0.6392, "step": 369 }, { "epoch": 0.06406233091656748, "grad_norm": 1.53125, "learning_rate": 1.9987877531842165e-05, "loss": 0.6754, "step": 370 }, { "epoch": 0.06423547235147711, "grad_norm": 2.734375, "learning_rate": 1.998778758780815e-05, "loss": 0.6398, "step": 371 }, { "epoch": 0.06440861378638675, "grad_norm": 1.625, "learning_rate": 1.9987697311535154e-05, "loss": 0.5641, "step": 372 }, { "epoch": 0.0645817552212964, "grad_norm": 1.578125, "learning_rate": 1.9987606703026187e-05, "loss": 0.6528, "step": 373 }, { "epoch": 0.06475489665620604, "grad_norm": 1.4296875, "learning_rate": 1.998751576228426e-05, "loss": 0.648, "step": 374 }, { "epoch": 0.06492803809111568, "grad_norm": 1.5390625, "learning_rate": 1.99874244893124e-05, "loss": 0.5804, "step": 375 }, { "epoch": 0.06510117952602532, "grad_norm": 1.671875, "learning_rate": 1.998733288411364e-05, "loss": 0.6343, "step": 376 }, { "epoch": 0.06527432096093497, "grad_norm": 1.5546875, "learning_rate": 1.9987240946691025e-05, "loss": 0.6249, "step": 377 }, { "epoch": 0.0654474623958446, "grad_norm": 1.5703125, "learning_rate": 1.9987148677047623e-05, "loss": 0.6283, "step": 378 }, { "epoch": 0.06562060383075424, "grad_norm": 1.7421875, "learning_rate": 1.9987056075186493e-05, "loss": 0.6848, "step": 379 }, { "epoch": 0.0657937452656639, "grad_norm": 1.5234375, "learning_rate": 1.998696314111072e-05, "loss": 0.6937, "step": 380 }, { "epoch": 0.06596688670057353, "grad_norm": 1.46875, "learning_rate": 1.99868698748234e-05, "loss": 0.59, "step": 381 }, { "epoch": 0.06614002813548317, "grad_norm": 1.6015625, "learning_rate": 1.9986776276327627e-05, "loss": 0.6168, "step": 382 }, { "epoch": 0.06631316957039282, "grad_norm": 1.578125, "learning_rate": 1.998668234562652e-05, "loss": 0.6089, "step": 383 }, { "epoch": 0.06648631100530246, "grad_norm": 1.4921875, "learning_rate": 1.9986588082723196e-05, "loss": 0.6062, "step": 384 }, { "epoch": 0.0666594524402121, "grad_norm": 1.703125, "learning_rate": 1.99864934876208e-05, "loss": 0.7145, "step": 385 }, { "epoch": 0.06683259387512173, "grad_norm": 1.5, "learning_rate": 1.9986398560322476e-05, "loss": 0.6709, "step": 386 }, { "epoch": 0.06700573531003139, "grad_norm": 1.5234375, "learning_rate": 1.9986303300831378e-05, "loss": 0.6148, "step": 387 }, { "epoch": 0.06717887674494102, "grad_norm": 1.578125, "learning_rate": 1.998620770915068e-05, "loss": 0.6652, "step": 388 }, { "epoch": 0.06735201817985066, "grad_norm": 1.609375, "learning_rate": 1.9986111785283556e-05, "loss": 0.5602, "step": 389 }, { "epoch": 0.06752515961476031, "grad_norm": 1.453125, "learning_rate": 1.99860155292332e-05, "loss": 0.5627, "step": 390 }, { "epoch": 0.06769830104966995, "grad_norm": 1.421875, "learning_rate": 1.9985918941002817e-05, "loss": 0.6725, "step": 391 }, { "epoch": 0.06787144248457959, "grad_norm": 1.421875, "learning_rate": 1.9985822020595614e-05, "loss": 0.6216, "step": 392 }, { "epoch": 0.06804458391948923, "grad_norm": 1.59375, "learning_rate": 1.998572476801482e-05, "loss": 0.6535, "step": 393 }, { "epoch": 0.06821772535439888, "grad_norm": 1.4375, "learning_rate": 1.9985627183263667e-05, "loss": 0.6087, "step": 394 }, { "epoch": 0.06839086678930852, "grad_norm": 1.5234375, "learning_rate": 1.9985529266345404e-05, "loss": 0.5966, "step": 395 }, { "epoch": 0.06856400822421815, "grad_norm": 1.6015625, "learning_rate": 1.9985431017263282e-05, "loss": 0.7126, "step": 396 }, { "epoch": 0.0687371496591278, "grad_norm": 1.4140625, "learning_rate": 1.9985332436020578e-05, "loss": 0.5969, "step": 397 }, { "epoch": 0.06891029109403744, "grad_norm": 1.765625, "learning_rate": 1.998523352262056e-05, "loss": 0.5536, "step": 398 }, { "epoch": 0.06908343252894708, "grad_norm": 1.4921875, "learning_rate": 1.9985134277066533e-05, "loss": 0.6036, "step": 399 }, { "epoch": 0.06925657396385673, "grad_norm": 1.5, "learning_rate": 1.9985034699361785e-05, "loss": 0.6881, "step": 400 }, { "epoch": 0.06942971539876637, "grad_norm": 1.7578125, "learning_rate": 1.9984934789509637e-05, "loss": 0.7233, "step": 401 }, { "epoch": 0.06960285683367601, "grad_norm": 1.59375, "learning_rate": 1.998483454751341e-05, "loss": 0.6062, "step": 402 }, { "epoch": 0.06977599826858565, "grad_norm": 1.5703125, "learning_rate": 1.998473397337643e-05, "loss": 0.6759, "step": 403 }, { "epoch": 0.0699491397034953, "grad_norm": 1.6640625, "learning_rate": 1.998463306710206e-05, "loss": 0.7478, "step": 404 }, { "epoch": 0.07012228113840493, "grad_norm": 1.6328125, "learning_rate": 1.998453182869364e-05, "loss": 0.6041, "step": 405 }, { "epoch": 0.07029542257331457, "grad_norm": 1.734375, "learning_rate": 1.9984430258154548e-05, "loss": 0.6672, "step": 406 }, { "epoch": 0.07046856400822422, "grad_norm": 1.59375, "learning_rate": 1.998432835548816e-05, "loss": 0.6402, "step": 407 }, { "epoch": 0.07064170544313386, "grad_norm": 1.546875, "learning_rate": 1.998422612069786e-05, "loss": 0.6389, "step": 408 }, { "epoch": 0.0708148468780435, "grad_norm": 1.6171875, "learning_rate": 1.9984123553787058e-05, "loss": 0.6433, "step": 409 }, { "epoch": 0.07098798831295314, "grad_norm": 1.6171875, "learning_rate": 1.998402065475916e-05, "loss": 0.6899, "step": 410 }, { "epoch": 0.07116112974786279, "grad_norm": 1.671875, "learning_rate": 1.9983917423617592e-05, "loss": 0.6633, "step": 411 }, { "epoch": 0.07133427118277243, "grad_norm": 1.5078125, "learning_rate": 1.998381386036578e-05, "loss": 0.6451, "step": 412 }, { "epoch": 0.07150741261768206, "grad_norm": 1.796875, "learning_rate": 1.9983709965007182e-05, "loss": 0.6337, "step": 413 }, { "epoch": 0.07168055405259172, "grad_norm": 1.65625, "learning_rate": 1.9983605737545248e-05, "loss": 0.6368, "step": 414 }, { "epoch": 0.07185369548750135, "grad_norm": 1.6796875, "learning_rate": 1.998350117798344e-05, "loss": 0.6253, "step": 415 }, { "epoch": 0.07202683692241099, "grad_norm": 1.9453125, "learning_rate": 1.9983396286325246e-05, "loss": 0.5927, "step": 416 }, { "epoch": 0.07219997835732063, "grad_norm": 1.53125, "learning_rate": 1.9983291062574147e-05, "loss": 0.6463, "step": 417 }, { "epoch": 0.07237311979223028, "grad_norm": 1.640625, "learning_rate": 1.9983185506733643e-05, "loss": 0.6434, "step": 418 }, { "epoch": 0.07254626122713992, "grad_norm": 1.4609375, "learning_rate": 1.998307961880725e-05, "loss": 0.6022, "step": 419 }, { "epoch": 0.07271940266204956, "grad_norm": 1.5078125, "learning_rate": 1.998297339879849e-05, "loss": 0.6881, "step": 420 }, { "epoch": 0.07289254409695921, "grad_norm": 1.5234375, "learning_rate": 1.9982866846710897e-05, "loss": 0.7096, "step": 421 }, { "epoch": 0.07306568553186885, "grad_norm": 1.6328125, "learning_rate": 1.998275996254801e-05, "loss": 0.628, "step": 422 }, { "epoch": 0.07323882696677848, "grad_norm": 1.6015625, "learning_rate": 1.9982652746313387e-05, "loss": 0.646, "step": 423 }, { "epoch": 0.07341196840168814, "grad_norm": 1.5, "learning_rate": 1.9982545198010598e-05, "loss": 0.5831, "step": 424 }, { "epoch": 0.07358510983659777, "grad_norm": 1.6328125, "learning_rate": 1.9982437317643218e-05, "loss": 0.6979, "step": 425 }, { "epoch": 0.07375825127150741, "grad_norm": 1.4609375, "learning_rate": 1.998232910521483e-05, "loss": 0.5976, "step": 426 }, { "epoch": 0.07393139270641705, "grad_norm": 1.6640625, "learning_rate": 1.9982220560729043e-05, "loss": 0.6709, "step": 427 }, { "epoch": 0.0741045341413267, "grad_norm": 1.5859375, "learning_rate": 1.9982111684189464e-05, "loss": 0.711, "step": 428 }, { "epoch": 0.07427767557623634, "grad_norm": 1.546875, "learning_rate": 1.9982002475599715e-05, "loss": 0.6198, "step": 429 }, { "epoch": 0.07445081701114598, "grad_norm": 1.5546875, "learning_rate": 1.9981892934963428e-05, "loss": 0.7043, "step": 430 }, { "epoch": 0.07462395844605563, "grad_norm": 1.6015625, "learning_rate": 1.9981783062284246e-05, "loss": 0.6681, "step": 431 }, { "epoch": 0.07479709988096526, "grad_norm": 1.5390625, "learning_rate": 1.9981672857565825e-05, "loss": 0.6465, "step": 432 }, { "epoch": 0.0749702413158749, "grad_norm": 1.609375, "learning_rate": 1.9981562320811833e-05, "loss": 0.6032, "step": 433 }, { "epoch": 0.07514338275078454, "grad_norm": 1.6484375, "learning_rate": 1.998145145202594e-05, "loss": 0.5847, "step": 434 }, { "epoch": 0.07531652418569419, "grad_norm": 1.4375, "learning_rate": 1.9981340251211843e-05, "loss": 0.6176, "step": 435 }, { "epoch": 0.07548966562060383, "grad_norm": 1.5546875, "learning_rate": 1.9981228718373234e-05, "loss": 0.6541, "step": 436 }, { "epoch": 0.07566280705551347, "grad_norm": 1.5703125, "learning_rate": 1.9981116853513828e-05, "loss": 0.546, "step": 437 }, { "epoch": 0.07583594849042312, "grad_norm": 1.484375, "learning_rate": 1.9981004656637344e-05, "loss": 0.6045, "step": 438 }, { "epoch": 0.07600908992533276, "grad_norm": 1.546875, "learning_rate": 1.9980892127747512e-05, "loss": 0.6886, "step": 439 }, { "epoch": 0.0761822313602424, "grad_norm": 1.5703125, "learning_rate": 1.998077926684808e-05, "loss": 0.5469, "step": 440 }, { "epoch": 0.07635537279515205, "grad_norm": 1.609375, "learning_rate": 1.9980666073942795e-05, "loss": 0.6169, "step": 441 }, { "epoch": 0.07652851423006168, "grad_norm": 1.5390625, "learning_rate": 1.9980552549035432e-05, "loss": 0.659, "step": 442 }, { "epoch": 0.07670165566497132, "grad_norm": 1.4609375, "learning_rate": 1.998043869212976e-05, "loss": 0.6593, "step": 443 }, { "epoch": 0.07687479709988096, "grad_norm": 1.4609375, "learning_rate": 1.998032450322957e-05, "loss": 0.6003, "step": 444 }, { "epoch": 0.07704793853479061, "grad_norm": 1.4765625, "learning_rate": 1.9980209982338658e-05, "loss": 0.578, "step": 445 }, { "epoch": 0.07722107996970025, "grad_norm": 1.4453125, "learning_rate": 1.9980095129460836e-05, "loss": 0.6083, "step": 446 }, { "epoch": 0.07739422140460989, "grad_norm": 1.546875, "learning_rate": 1.9979979944599922e-05, "loss": 0.6015, "step": 447 }, { "epoch": 0.07756736283951954, "grad_norm": 1.5390625, "learning_rate": 1.997986442775975e-05, "loss": 0.6936, "step": 448 }, { "epoch": 0.07774050427442918, "grad_norm": 1.625, "learning_rate": 1.997974857894416e-05, "loss": 0.5752, "step": 449 }, { "epoch": 0.07791364570933881, "grad_norm": 1.4921875, "learning_rate": 1.9979632398157007e-05, "loss": 0.6519, "step": 450 }, { "epoch": 0.07808678714424845, "grad_norm": 1.4453125, "learning_rate": 1.9979515885402156e-05, "loss": 0.6475, "step": 451 }, { "epoch": 0.0782599285791581, "grad_norm": 1.515625, "learning_rate": 1.9979399040683485e-05, "loss": 0.6385, "step": 452 }, { "epoch": 0.07843307001406774, "grad_norm": 1.5546875, "learning_rate": 1.9979281864004877e-05, "loss": 0.7388, "step": 453 }, { "epoch": 0.07860621144897738, "grad_norm": 1.4609375, "learning_rate": 1.9979164355370225e-05, "loss": 0.6375, "step": 454 }, { "epoch": 0.07877935288388703, "grad_norm": 1.578125, "learning_rate": 1.9979046514783453e-05, "loss": 0.6093, "step": 455 }, { "epoch": 0.07895249431879667, "grad_norm": 1.6171875, "learning_rate": 1.9978928342248466e-05, "loss": 0.6532, "step": 456 }, { "epoch": 0.0791256357537063, "grad_norm": 1.6015625, "learning_rate": 1.9978809837769203e-05, "loss": 0.5944, "step": 457 }, { "epoch": 0.07929877718861596, "grad_norm": 1.515625, "learning_rate": 1.9978691001349602e-05, "loss": 0.7028, "step": 458 }, { "epoch": 0.0794719186235256, "grad_norm": 1.4765625, "learning_rate": 1.997857183299362e-05, "loss": 0.646, "step": 459 }, { "epoch": 0.07964506005843523, "grad_norm": 1.6171875, "learning_rate": 1.9978452332705215e-05, "loss": 0.6206, "step": 460 }, { "epoch": 0.07981820149334487, "grad_norm": 1.5546875, "learning_rate": 1.9978332500488372e-05, "loss": 0.6225, "step": 461 }, { "epoch": 0.07999134292825452, "grad_norm": 1.53125, "learning_rate": 1.9978212336347067e-05, "loss": 0.6353, "step": 462 }, { "epoch": 0.08016448436316416, "grad_norm": 1.59375, "learning_rate": 1.9978091840285303e-05, "loss": 0.6335, "step": 463 }, { "epoch": 0.0803376257980738, "grad_norm": 1.5, "learning_rate": 1.9977971012307085e-05, "loss": 0.5941, "step": 464 }, { "epoch": 0.08051076723298345, "grad_norm": 1.5078125, "learning_rate": 1.9977849852416436e-05, "loss": 0.637, "step": 465 }, { "epoch": 0.08068390866789309, "grad_norm": 1.4453125, "learning_rate": 1.9977728360617383e-05, "loss": 0.5416, "step": 466 }, { "epoch": 0.08085705010280272, "grad_norm": 1.4140625, "learning_rate": 1.9977606536913968e-05, "loss": 0.6011, "step": 467 }, { "epoch": 0.08103019153771236, "grad_norm": 1.4609375, "learning_rate": 1.9977484381310248e-05, "loss": 0.6329, "step": 468 }, { "epoch": 0.08120333297262201, "grad_norm": 1.46875, "learning_rate": 1.9977361893810277e-05, "loss": 0.6079, "step": 469 }, { "epoch": 0.08137647440753165, "grad_norm": 1.5703125, "learning_rate": 1.9977239074418135e-05, "loss": 0.6888, "step": 470 }, { "epoch": 0.08154961584244129, "grad_norm": 1.5078125, "learning_rate": 1.9977115923137912e-05, "loss": 0.6417, "step": 471 }, { "epoch": 0.08172275727735094, "grad_norm": 1.4765625, "learning_rate": 1.9976992439973697e-05, "loss": 0.6174, "step": 472 }, { "epoch": 0.08189589871226058, "grad_norm": 1.5703125, "learning_rate": 1.9976868624929603e-05, "loss": 0.613, "step": 473 }, { "epoch": 0.08206904014717022, "grad_norm": 1.765625, "learning_rate": 1.997674447800974e-05, "loss": 0.6979, "step": 474 }, { "epoch": 0.08224218158207987, "grad_norm": 1.6640625, "learning_rate": 1.9976619999218252e-05, "loss": 0.7372, "step": 475 }, { "epoch": 0.0824153230169895, "grad_norm": 1.4375, "learning_rate": 1.997649518855927e-05, "loss": 0.6579, "step": 476 }, { "epoch": 0.08258846445189914, "grad_norm": 1.5625, "learning_rate": 1.9976370046036947e-05, "loss": 0.6861, "step": 477 }, { "epoch": 0.08276160588680878, "grad_norm": 1.59375, "learning_rate": 1.9976244571655442e-05, "loss": 0.582, "step": 478 }, { "epoch": 0.08293474732171843, "grad_norm": 1.625, "learning_rate": 1.9976118765418937e-05, "loss": 0.6164, "step": 479 }, { "epoch": 0.08310788875662807, "grad_norm": 1.4765625, "learning_rate": 1.9975992627331613e-05, "loss": 0.6012, "step": 480 }, { "epoch": 0.08328103019153771, "grad_norm": 1.46875, "learning_rate": 1.9975866157397665e-05, "loss": 0.6462, "step": 481 }, { "epoch": 0.08345417162644736, "grad_norm": 1.484375, "learning_rate": 1.99757393556213e-05, "loss": 0.6554, "step": 482 }, { "epoch": 0.083627313061357, "grad_norm": 1.4375, "learning_rate": 1.9975612222006738e-05, "loss": 0.5609, "step": 483 }, { "epoch": 0.08380045449626664, "grad_norm": 1.5625, "learning_rate": 1.997548475655821e-05, "loss": 0.6273, "step": 484 }, { "epoch": 0.08397359593117627, "grad_norm": 1.6640625, "learning_rate": 1.9975356959279946e-05, "loss": 0.7083, "step": 485 }, { "epoch": 0.08414673736608592, "grad_norm": 1.5078125, "learning_rate": 1.997522883017621e-05, "loss": 0.5603, "step": 486 }, { "epoch": 0.08431987880099556, "grad_norm": 1.5390625, "learning_rate": 1.9975100369251255e-05, "loss": 0.5793, "step": 487 }, { "epoch": 0.0844930202359052, "grad_norm": 1.6796875, "learning_rate": 1.9974971576509362e-05, "loss": 0.7135, "step": 488 }, { "epoch": 0.08466616167081485, "grad_norm": 1.6171875, "learning_rate": 1.9974842451954804e-05, "loss": 0.7345, "step": 489 }, { "epoch": 0.08483930310572449, "grad_norm": 1.5625, "learning_rate": 1.9974712995591887e-05, "loss": 0.5809, "step": 490 }, { "epoch": 0.08501244454063413, "grad_norm": 1.46875, "learning_rate": 1.9974583207424915e-05, "loss": 0.5442, "step": 491 }, { "epoch": 0.08518558597554378, "grad_norm": 1.4765625, "learning_rate": 1.99744530874582e-05, "loss": 0.5729, "step": 492 }, { "epoch": 0.08535872741045342, "grad_norm": 1.546875, "learning_rate": 1.9974322635696075e-05, "loss": 0.5567, "step": 493 }, { "epoch": 0.08553186884536305, "grad_norm": 1.453125, "learning_rate": 1.9974191852142878e-05, "loss": 0.5848, "step": 494 }, { "epoch": 0.08570501028027269, "grad_norm": 1.5078125, "learning_rate": 1.997406073680296e-05, "loss": 0.6282, "step": 495 }, { "epoch": 0.08587815171518234, "grad_norm": 1.7734375, "learning_rate": 1.9973929289680685e-05, "loss": 0.6909, "step": 496 }, { "epoch": 0.08605129315009198, "grad_norm": 1.5546875, "learning_rate": 1.997379751078042e-05, "loss": 0.6648, "step": 497 }, { "epoch": 0.08622443458500162, "grad_norm": 1.484375, "learning_rate": 1.9973665400106556e-05, "loss": 0.5688, "step": 498 }, { "epoch": 0.08639757601991127, "grad_norm": 1.4453125, "learning_rate": 1.9973532957663475e-05, "loss": 0.5777, "step": 499 }, { "epoch": 0.08657071745482091, "grad_norm": 1.6171875, "learning_rate": 1.9973400183455597e-05, "loss": 0.706, "step": 500 }, { "epoch": 0.08674385888973055, "grad_norm": 1.4609375, "learning_rate": 1.997326707748733e-05, "loss": 0.5951, "step": 501 }, { "epoch": 0.08691700032464018, "grad_norm": 1.6875, "learning_rate": 1.99731336397631e-05, "loss": 0.6452, "step": 502 }, { "epoch": 0.08709014175954984, "grad_norm": 1.4765625, "learning_rate": 1.9972999870287357e-05, "loss": 0.7094, "step": 503 }, { "epoch": 0.08726328319445947, "grad_norm": 1.53125, "learning_rate": 1.997286576906454e-05, "loss": 0.6291, "step": 504 }, { "epoch": 0.08743642462936911, "grad_norm": 1.4765625, "learning_rate": 1.9972731336099117e-05, "loss": 0.5462, "step": 505 }, { "epoch": 0.08760956606427876, "grad_norm": 1.5703125, "learning_rate": 1.9972596571395553e-05, "loss": 0.6552, "step": 506 }, { "epoch": 0.0877827074991884, "grad_norm": 1.5703125, "learning_rate": 1.9972461474958335e-05, "loss": 0.6284, "step": 507 }, { "epoch": 0.08795584893409804, "grad_norm": 1.3984375, "learning_rate": 1.9972326046791956e-05, "loss": 0.6649, "step": 508 }, { "epoch": 0.08812899036900769, "grad_norm": 1.5625, "learning_rate": 1.9972190286900923e-05, "loss": 0.6486, "step": 509 }, { "epoch": 0.08830213180391733, "grad_norm": 1.4765625, "learning_rate": 1.9972054195289743e-05, "loss": 0.634, "step": 510 }, { "epoch": 0.08847527323882697, "grad_norm": 1.609375, "learning_rate": 1.9971917771962956e-05, "loss": 0.6323, "step": 511 }, { "epoch": 0.0886484146737366, "grad_norm": 1.5390625, "learning_rate": 1.9971781016925093e-05, "loss": 0.6638, "step": 512 }, { "epoch": 0.08882155610864625, "grad_norm": 1.4921875, "learning_rate": 1.9971643930180705e-05, "loss": 0.6214, "step": 513 }, { "epoch": 0.08899469754355589, "grad_norm": 1.53125, "learning_rate": 1.997150651173435e-05, "loss": 0.6595, "step": 514 }, { "epoch": 0.08916783897846553, "grad_norm": 1.46875, "learning_rate": 1.99713687615906e-05, "loss": 0.5949, "step": 515 }, { "epoch": 0.08934098041337518, "grad_norm": 1.5234375, "learning_rate": 1.997123067975404e-05, "loss": 0.6298, "step": 516 }, { "epoch": 0.08951412184828482, "grad_norm": 1.59375, "learning_rate": 1.9971092266229256e-05, "loss": 0.6552, "step": 517 }, { "epoch": 0.08968726328319446, "grad_norm": 1.4921875, "learning_rate": 1.997095352102086e-05, "loss": 0.6526, "step": 518 }, { "epoch": 0.0898604047181041, "grad_norm": 1.421875, "learning_rate": 1.9970814444133462e-05, "loss": 0.576, "step": 519 }, { "epoch": 0.09003354615301375, "grad_norm": 1.453125, "learning_rate": 1.9970675035571694e-05, "loss": 0.5949, "step": 520 }, { "epoch": 0.09020668758792338, "grad_norm": 1.4609375, "learning_rate": 1.997053529534019e-05, "loss": 0.595, "step": 521 }, { "epoch": 0.09037982902283302, "grad_norm": 1.515625, "learning_rate": 1.9970395223443596e-05, "loss": 0.6015, "step": 522 }, { "epoch": 0.09055297045774267, "grad_norm": 1.609375, "learning_rate": 1.9970254819886576e-05, "loss": 0.6383, "step": 523 }, { "epoch": 0.09072611189265231, "grad_norm": 1.5234375, "learning_rate": 1.9970114084673796e-05, "loss": 0.6554, "step": 524 }, { "epoch": 0.09089925332756195, "grad_norm": 1.515625, "learning_rate": 1.9969973017809945e-05, "loss": 0.623, "step": 525 }, { "epoch": 0.0910723947624716, "grad_norm": 1.6328125, "learning_rate": 1.9969831619299708e-05, "loss": 0.6056, "step": 526 }, { "epoch": 0.09124553619738124, "grad_norm": 1.4375, "learning_rate": 1.996968988914779e-05, "loss": 0.6687, "step": 527 }, { "epoch": 0.09141867763229088, "grad_norm": 1.59375, "learning_rate": 1.9969547827358907e-05, "loss": 0.6736, "step": 528 }, { "epoch": 0.09159181906720051, "grad_norm": 1.6640625, "learning_rate": 1.996940543393778e-05, "loss": 0.6495, "step": 529 }, { "epoch": 0.09176496050211017, "grad_norm": 1.53125, "learning_rate": 1.9969262708889156e-05, "loss": 0.6993, "step": 530 }, { "epoch": 0.0919381019370198, "grad_norm": 1.4453125, "learning_rate": 1.9969119652217772e-05, "loss": 0.5936, "step": 531 }, { "epoch": 0.09211124337192944, "grad_norm": 1.6796875, "learning_rate": 1.9968976263928394e-05, "loss": 0.6251, "step": 532 }, { "epoch": 0.09228438480683909, "grad_norm": 1.6640625, "learning_rate": 1.9968832544025788e-05, "loss": 0.6201, "step": 533 }, { "epoch": 0.09245752624174873, "grad_norm": 1.625, "learning_rate": 1.9968688492514735e-05, "loss": 0.6887, "step": 534 }, { "epoch": 0.09263066767665837, "grad_norm": 1.4921875, "learning_rate": 1.996854410940003e-05, "loss": 0.6425, "step": 535 }, { "epoch": 0.092803809111568, "grad_norm": 1.4140625, "learning_rate": 1.996839939468647e-05, "loss": 0.5546, "step": 536 }, { "epoch": 0.09297695054647766, "grad_norm": 1.6640625, "learning_rate": 1.9968254348378875e-05, "loss": 0.6316, "step": 537 }, { "epoch": 0.0931500919813873, "grad_norm": 1.5078125, "learning_rate": 1.9968108970482065e-05, "loss": 0.6112, "step": 538 }, { "epoch": 0.09332323341629693, "grad_norm": 1.484375, "learning_rate": 1.9967963261000878e-05, "loss": 0.6271, "step": 539 }, { "epoch": 0.09349637485120658, "grad_norm": 1.59375, "learning_rate": 1.9967817219940164e-05, "loss": 0.6733, "step": 540 }, { "epoch": 0.09366951628611622, "grad_norm": 1.65625, "learning_rate": 1.9967670847304774e-05, "loss": 0.6305, "step": 541 }, { "epoch": 0.09384265772102586, "grad_norm": 1.5625, "learning_rate": 1.9967524143099583e-05, "loss": 0.6298, "step": 542 }, { "epoch": 0.09401579915593551, "grad_norm": 1.5859375, "learning_rate": 1.9967377107329468e-05, "loss": 0.6569, "step": 543 }, { "epoch": 0.09418894059084515, "grad_norm": 1.3984375, "learning_rate": 1.996722973999932e-05, "loss": 0.5822, "step": 544 }, { "epoch": 0.09436208202575479, "grad_norm": 1.6015625, "learning_rate": 1.996708204111405e-05, "loss": 0.6633, "step": 545 }, { "epoch": 0.09453522346066442, "grad_norm": 1.578125, "learning_rate": 1.9966934010678554e-05, "loss": 0.6145, "step": 546 }, { "epoch": 0.09470836489557408, "grad_norm": 1.7578125, "learning_rate": 1.996678564869777e-05, "loss": 0.556, "step": 547 }, { "epoch": 0.09488150633048371, "grad_norm": 1.4765625, "learning_rate": 1.9966636955176627e-05, "loss": 0.603, "step": 548 }, { "epoch": 0.09505464776539335, "grad_norm": 1.5546875, "learning_rate": 1.9966487930120073e-05, "loss": 0.6082, "step": 549 }, { "epoch": 0.095227789200303, "grad_norm": 1.453125, "learning_rate": 1.9966338573533066e-05, "loss": 0.6154, "step": 550 }, { "epoch": 0.09540093063521264, "grad_norm": 1.4375, "learning_rate": 1.996618888542057e-05, "loss": 0.6167, "step": 551 }, { "epoch": 0.09557407207012228, "grad_norm": 1.5, "learning_rate": 1.996603886578757e-05, "loss": 0.6053, "step": 552 }, { "epoch": 0.09574721350503192, "grad_norm": 1.515625, "learning_rate": 1.9965888514639057e-05, "loss": 0.6347, "step": 553 }, { "epoch": 0.09592035493994157, "grad_norm": 1.5546875, "learning_rate": 1.996573783198003e-05, "loss": 0.6693, "step": 554 }, { "epoch": 0.0960934963748512, "grad_norm": 1.53125, "learning_rate": 1.9965586817815494e-05, "loss": 0.7346, "step": 555 }, { "epoch": 0.09626663780976084, "grad_norm": 1.5625, "learning_rate": 1.9965435472150486e-05, "loss": 0.6225, "step": 556 }, { "epoch": 0.0964397792446705, "grad_norm": 1.5, "learning_rate": 1.9965283794990027e-05, "loss": 0.625, "step": 557 }, { "epoch": 0.09661292067958013, "grad_norm": 1.5703125, "learning_rate": 1.9965131786339175e-05, "loss": 0.6866, "step": 558 }, { "epoch": 0.09678606211448977, "grad_norm": 1.484375, "learning_rate": 1.996497944620298e-05, "loss": 0.6206, "step": 559 }, { "epoch": 0.09695920354939942, "grad_norm": 1.9375, "learning_rate": 1.9964826774586504e-05, "loss": 0.6427, "step": 560 }, { "epoch": 0.09713234498430906, "grad_norm": 1.53125, "learning_rate": 1.996467377149484e-05, "loss": 0.6487, "step": 561 }, { "epoch": 0.0973054864192187, "grad_norm": 1.484375, "learning_rate": 1.9964520436933065e-05, "loss": 0.6846, "step": 562 }, { "epoch": 0.09747862785412834, "grad_norm": 1.5703125, "learning_rate": 1.9964366770906283e-05, "loss": 0.6291, "step": 563 }, { "epoch": 0.09765176928903799, "grad_norm": 1.5, "learning_rate": 1.9964212773419608e-05, "loss": 0.6839, "step": 564 }, { "epoch": 0.09782491072394763, "grad_norm": 1.4765625, "learning_rate": 1.996405844447816e-05, "loss": 0.6159, "step": 565 }, { "epoch": 0.09799805215885726, "grad_norm": 1.4296875, "learning_rate": 1.9963903784087075e-05, "loss": 0.6212, "step": 566 }, { "epoch": 0.09817119359376691, "grad_norm": 1.484375, "learning_rate": 1.99637487922515e-05, "loss": 0.66, "step": 567 }, { "epoch": 0.09834433502867655, "grad_norm": 1.515625, "learning_rate": 1.9963593468976583e-05, "loss": 0.6242, "step": 568 }, { "epoch": 0.09851747646358619, "grad_norm": 1.6015625, "learning_rate": 1.9963437814267494e-05, "loss": 0.6666, "step": 569 }, { "epoch": 0.09869061789849583, "grad_norm": 1.5703125, "learning_rate": 1.9963281828129415e-05, "loss": 0.7279, "step": 570 }, { "epoch": 0.09886375933340548, "grad_norm": 1.515625, "learning_rate": 1.996312551056753e-05, "loss": 0.6509, "step": 571 }, { "epoch": 0.09903690076831512, "grad_norm": 1.5625, "learning_rate": 1.996296886158704e-05, "loss": 0.6196, "step": 572 }, { "epoch": 0.09921004220322475, "grad_norm": 1.484375, "learning_rate": 1.9962811881193157e-05, "loss": 0.6814, "step": 573 }, { "epoch": 0.0993831836381344, "grad_norm": 1.453125, "learning_rate": 1.99626545693911e-05, "loss": 0.5561, "step": 574 }, { "epoch": 0.09955632507304404, "grad_norm": 1.4921875, "learning_rate": 1.996249692618611e-05, "loss": 0.5547, "step": 575 }, { "epoch": 0.09972946650795368, "grad_norm": 1.5546875, "learning_rate": 1.9962338951583417e-05, "loss": 0.6127, "step": 576 }, { "epoch": 0.09990260794286332, "grad_norm": 1.7578125, "learning_rate": 1.996218064558829e-05, "loss": 0.6455, "step": 577 }, { "epoch": 0.10007574937777297, "grad_norm": 1.578125, "learning_rate": 1.9962022008205987e-05, "loss": 0.6144, "step": 578 }, { "epoch": 0.10024889081268261, "grad_norm": 1.6171875, "learning_rate": 1.9961863039441786e-05, "loss": 0.6189, "step": 579 }, { "epoch": 0.10042203224759225, "grad_norm": 1.4765625, "learning_rate": 1.9961703739300975e-05, "loss": 0.6108, "step": 580 }, { "epoch": 0.1005951736825019, "grad_norm": 1.5859375, "learning_rate": 1.9961544107788855e-05, "loss": 0.6358, "step": 581 }, { "epoch": 0.10076831511741154, "grad_norm": 1.6015625, "learning_rate": 1.9961384144910737e-05, "loss": 0.672, "step": 582 }, { "epoch": 0.10094145655232117, "grad_norm": 1.4453125, "learning_rate": 1.996122385067194e-05, "loss": 0.577, "step": 583 }, { "epoch": 0.10111459798723083, "grad_norm": 1.59375, "learning_rate": 1.9961063225077796e-05, "loss": 0.5697, "step": 584 }, { "epoch": 0.10128773942214046, "grad_norm": 1.796875, "learning_rate": 1.9960902268133646e-05, "loss": 0.6692, "step": 585 }, { "epoch": 0.1014608808570501, "grad_norm": 1.515625, "learning_rate": 1.9960740979844847e-05, "loss": 0.6541, "step": 586 }, { "epoch": 0.10163402229195974, "grad_norm": 1.4375, "learning_rate": 1.9960579360216762e-05, "loss": 0.5964, "step": 587 }, { "epoch": 0.10180716372686939, "grad_norm": 1.421875, "learning_rate": 1.9960417409254773e-05, "loss": 0.58, "step": 588 }, { "epoch": 0.10198030516177903, "grad_norm": 1.4921875, "learning_rate": 1.996025512696426e-05, "loss": 0.6652, "step": 589 }, { "epoch": 0.10215344659668867, "grad_norm": 1.5234375, "learning_rate": 1.9960092513350627e-05, "loss": 0.6307, "step": 590 }, { "epoch": 0.10232658803159832, "grad_norm": 1.5625, "learning_rate": 1.9959929568419282e-05, "loss": 0.6748, "step": 591 }, { "epoch": 0.10249972946650796, "grad_norm": 1.40625, "learning_rate": 1.9959766292175638e-05, "loss": 0.6941, "step": 592 }, { "epoch": 0.10267287090141759, "grad_norm": 1.4609375, "learning_rate": 1.995960268462514e-05, "loss": 0.6134, "step": 593 }, { "epoch": 0.10284601233632723, "grad_norm": 1.515625, "learning_rate": 1.9959438745773216e-05, "loss": 0.6513, "step": 594 }, { "epoch": 0.10301915377123688, "grad_norm": 1.53125, "learning_rate": 1.9959274475625327e-05, "loss": 0.7036, "step": 595 }, { "epoch": 0.10319229520614652, "grad_norm": 1.5390625, "learning_rate": 1.9959109874186938e-05, "loss": 0.6616, "step": 596 }, { "epoch": 0.10336543664105616, "grad_norm": 1.6171875, "learning_rate": 1.9958944941463524e-05, "loss": 0.599, "step": 597 }, { "epoch": 0.10353857807596581, "grad_norm": 1.3828125, "learning_rate": 1.9958779677460565e-05, "loss": 0.5961, "step": 598 }, { "epoch": 0.10371171951087545, "grad_norm": 1.6875, "learning_rate": 1.995861408218357e-05, "loss": 0.6821, "step": 599 }, { "epoch": 0.10388486094578508, "grad_norm": 1.4453125, "learning_rate": 1.9958448155638035e-05, "loss": 0.5884, "step": 600 }, { "epoch": 0.10405800238069474, "grad_norm": 1.4921875, "learning_rate": 1.995828189782949e-05, "loss": 0.584, "step": 601 }, { "epoch": 0.10423114381560437, "grad_norm": 1.609375, "learning_rate": 1.9958115308763454e-05, "loss": 0.6973, "step": 602 }, { "epoch": 0.10440428525051401, "grad_norm": 1.5078125, "learning_rate": 1.9957948388445483e-05, "loss": 0.6305, "step": 603 }, { "epoch": 0.10457742668542365, "grad_norm": 1.640625, "learning_rate": 1.995778113688112e-05, "loss": 0.6301, "step": 604 }, { "epoch": 0.1047505681203333, "grad_norm": 1.6328125, "learning_rate": 1.995761355407593e-05, "loss": 0.6051, "step": 605 }, { "epoch": 0.10492370955524294, "grad_norm": 1.4921875, "learning_rate": 1.9957445640035486e-05, "loss": 0.5905, "step": 606 }, { "epoch": 0.10509685099015258, "grad_norm": 1.5078125, "learning_rate": 1.9957277394765377e-05, "loss": 0.5951, "step": 607 }, { "epoch": 0.10526999242506223, "grad_norm": 1.453125, "learning_rate": 1.99571088182712e-05, "loss": 0.5859, "step": 608 }, { "epoch": 0.10544313385997187, "grad_norm": 1.5, "learning_rate": 1.9956939910558557e-05, "loss": 0.6918, "step": 609 }, { "epoch": 0.1056162752948815, "grad_norm": 1.59375, "learning_rate": 1.995677067163307e-05, "loss": 0.6566, "step": 610 }, { "epoch": 0.10578941672979114, "grad_norm": 1.515625, "learning_rate": 1.9956601101500372e-05, "loss": 0.6096, "step": 611 }, { "epoch": 0.10596255816470079, "grad_norm": 1.6640625, "learning_rate": 1.9956431200166102e-05, "loss": 0.7108, "step": 612 }, { "epoch": 0.10613569959961043, "grad_norm": 1.4375, "learning_rate": 1.995626096763591e-05, "loss": 0.6375, "step": 613 }, { "epoch": 0.10630884103452007, "grad_norm": 1.4296875, "learning_rate": 1.9956090403915457e-05, "loss": 0.6047, "step": 614 }, { "epoch": 0.10648198246942972, "grad_norm": 1.3984375, "learning_rate": 1.995591950901042e-05, "loss": 0.5647, "step": 615 }, { "epoch": 0.10665512390433936, "grad_norm": 1.4921875, "learning_rate": 1.9955748282926484e-05, "loss": 0.5542, "step": 616 }, { "epoch": 0.106828265339249, "grad_norm": 1.53125, "learning_rate": 1.995557672566934e-05, "loss": 0.6724, "step": 617 }, { "epoch": 0.10700140677415865, "grad_norm": 1.4921875, "learning_rate": 1.9955404837244702e-05, "loss": 0.6189, "step": 618 }, { "epoch": 0.10717454820906828, "grad_norm": 1.4453125, "learning_rate": 1.995523261765828e-05, "loss": 0.6521, "step": 619 }, { "epoch": 0.10734768964397792, "grad_norm": 1.4921875, "learning_rate": 1.995506006691581e-05, "loss": 0.5412, "step": 620 }, { "epoch": 0.10752083107888756, "grad_norm": 1.59375, "learning_rate": 1.9954887185023026e-05, "loss": 0.6395, "step": 621 }, { "epoch": 0.10769397251379721, "grad_norm": 1.40625, "learning_rate": 1.9954713971985682e-05, "loss": 0.6071, "step": 622 }, { "epoch": 0.10786711394870685, "grad_norm": 1.5546875, "learning_rate": 1.995454042780954e-05, "loss": 0.7224, "step": 623 }, { "epoch": 0.10804025538361649, "grad_norm": 1.5078125, "learning_rate": 1.9954366552500374e-05, "loss": 0.6784, "step": 624 }, { "epoch": 0.10821339681852614, "grad_norm": 1.5390625, "learning_rate": 1.995419234606396e-05, "loss": 0.5912, "step": 625 }, { "epoch": 0.10838653825343578, "grad_norm": 1.484375, "learning_rate": 1.9954017808506107e-05, "loss": 0.6529, "step": 626 }, { "epoch": 0.10855967968834541, "grad_norm": 1.625, "learning_rate": 1.9953842939832607e-05, "loss": 0.6701, "step": 627 }, { "epoch": 0.10873282112325505, "grad_norm": 1.546875, "learning_rate": 1.9953667740049286e-05, "loss": 0.5755, "step": 628 }, { "epoch": 0.1089059625581647, "grad_norm": 1.3515625, "learning_rate": 1.9953492209161965e-05, "loss": 0.5763, "step": 629 }, { "epoch": 0.10907910399307434, "grad_norm": 1.7421875, "learning_rate": 1.995331634717649e-05, "loss": 0.6658, "step": 630 }, { "epoch": 0.10925224542798398, "grad_norm": 1.640625, "learning_rate": 1.9953140154098705e-05, "loss": 0.6081, "step": 631 }, { "epoch": 0.10942538686289363, "grad_norm": 1.4296875, "learning_rate": 1.995296362993447e-05, "loss": 0.5739, "step": 632 }, { "epoch": 0.10959852829780327, "grad_norm": 1.5234375, "learning_rate": 1.9952786774689667e-05, "loss": 0.6401, "step": 633 }, { "epoch": 0.1097716697327129, "grad_norm": 1.515625, "learning_rate": 1.995260958837017e-05, "loss": 0.5536, "step": 634 }, { "epoch": 0.10994481116762256, "grad_norm": 1.484375, "learning_rate": 1.995243207098188e-05, "loss": 0.5974, "step": 635 }, { "epoch": 0.1101179526025322, "grad_norm": 1.4765625, "learning_rate": 1.9952254222530693e-05, "loss": 0.6853, "step": 636 }, { "epoch": 0.11029109403744183, "grad_norm": 1.59375, "learning_rate": 1.995207604302253e-05, "loss": 0.688, "step": 637 }, { "epoch": 0.11046423547235147, "grad_norm": 1.53125, "learning_rate": 1.9951897532463315e-05, "loss": 0.637, "step": 638 }, { "epoch": 0.11063737690726112, "grad_norm": 1.4375, "learning_rate": 1.9951718690858994e-05, "loss": 0.5831, "step": 639 }, { "epoch": 0.11081051834217076, "grad_norm": 1.65625, "learning_rate": 1.995153951821551e-05, "loss": 0.6955, "step": 640 }, { "epoch": 0.1109836597770804, "grad_norm": 1.546875, "learning_rate": 1.995136001453882e-05, "loss": 0.5695, "step": 641 }, { "epoch": 0.11115680121199005, "grad_norm": 1.515625, "learning_rate": 1.99511801798349e-05, "loss": 0.6359, "step": 642 }, { "epoch": 0.11132994264689969, "grad_norm": 1.453125, "learning_rate": 1.9951000014109735e-05, "loss": 0.5947, "step": 643 }, { "epoch": 0.11150308408180933, "grad_norm": 1.703125, "learning_rate": 1.995081951736931e-05, "loss": 0.6308, "step": 644 }, { "epoch": 0.11167622551671896, "grad_norm": 1.5390625, "learning_rate": 1.995063868961964e-05, "loss": 0.602, "step": 645 }, { "epoch": 0.11184936695162861, "grad_norm": 1.546875, "learning_rate": 1.9950457530866726e-05, "loss": 0.6494, "step": 646 }, { "epoch": 0.11202250838653825, "grad_norm": 1.671875, "learning_rate": 1.9950276041116604e-05, "loss": 0.6488, "step": 647 }, { "epoch": 0.11219564982144789, "grad_norm": 1.90625, "learning_rate": 1.995009422037531e-05, "loss": 0.6741, "step": 648 }, { "epoch": 0.11236879125635754, "grad_norm": 1.5390625, "learning_rate": 1.9949912068648893e-05, "loss": 0.578, "step": 649 }, { "epoch": 0.11254193269126718, "grad_norm": 1.453125, "learning_rate": 1.9949729585943406e-05, "loss": 0.6215, "step": 650 }, { "epoch": 0.11271507412617682, "grad_norm": 1.5078125, "learning_rate": 1.9949546772264924e-05, "loss": 0.5948, "step": 651 }, { "epoch": 0.11288821556108647, "grad_norm": 1.640625, "learning_rate": 1.994936362761953e-05, "loss": 0.6378, "step": 652 }, { "epoch": 0.1130613569959961, "grad_norm": 1.46875, "learning_rate": 1.994918015201331e-05, "loss": 0.6638, "step": 653 }, { "epoch": 0.11323449843090574, "grad_norm": 1.4453125, "learning_rate": 1.9948996345452376e-05, "loss": 0.6082, "step": 654 }, { "epoch": 0.11340763986581538, "grad_norm": 1.390625, "learning_rate": 1.9948812207942835e-05, "loss": 0.6018, "step": 655 }, { "epoch": 0.11358078130072503, "grad_norm": 1.484375, "learning_rate": 1.9948627739490813e-05, "loss": 0.6438, "step": 656 }, { "epoch": 0.11375392273563467, "grad_norm": 1.5234375, "learning_rate": 1.994844294010245e-05, "loss": 0.6133, "step": 657 }, { "epoch": 0.11392706417054431, "grad_norm": 1.5390625, "learning_rate": 1.994825780978389e-05, "loss": 0.6685, "step": 658 }, { "epoch": 0.11410020560545396, "grad_norm": 1.6015625, "learning_rate": 1.9948072348541294e-05, "loss": 0.6164, "step": 659 }, { "epoch": 0.1142733470403636, "grad_norm": 1.6171875, "learning_rate": 1.994788655638083e-05, "loss": 0.6371, "step": 660 }, { "epoch": 0.11444648847527324, "grad_norm": 1.421875, "learning_rate": 1.9947700433308675e-05, "loss": 0.6289, "step": 661 }, { "epoch": 0.11461962991018287, "grad_norm": 1.5078125, "learning_rate": 1.9947513979331027e-05, "loss": 0.6708, "step": 662 }, { "epoch": 0.11479277134509253, "grad_norm": 1.53125, "learning_rate": 1.994732719445408e-05, "loss": 0.6068, "step": 663 }, { "epoch": 0.11496591278000216, "grad_norm": 1.453125, "learning_rate": 1.9947140078684055e-05, "loss": 0.671, "step": 664 }, { "epoch": 0.1151390542149118, "grad_norm": 1.4609375, "learning_rate": 1.9946952632027172e-05, "loss": 0.666, "step": 665 }, { "epoch": 0.11531219564982145, "grad_norm": 1.4296875, "learning_rate": 1.9946764854489668e-05, "loss": 0.6152, "step": 666 }, { "epoch": 0.11548533708473109, "grad_norm": 1.34375, "learning_rate": 1.9946576746077788e-05, "loss": 0.5428, "step": 667 }, { "epoch": 0.11565847851964073, "grad_norm": 1.5546875, "learning_rate": 1.994638830679779e-05, "loss": 0.6098, "step": 668 }, { "epoch": 0.11583161995455038, "grad_norm": 1.4921875, "learning_rate": 1.9946199536655945e-05, "loss": 0.7008, "step": 669 }, { "epoch": 0.11600476138946002, "grad_norm": 1.40625, "learning_rate": 1.9946010435658527e-05, "loss": 0.6222, "step": 670 }, { "epoch": 0.11617790282436966, "grad_norm": 1.546875, "learning_rate": 1.994582100381183e-05, "loss": 0.6605, "step": 671 }, { "epoch": 0.1163510442592793, "grad_norm": 1.5, "learning_rate": 1.9945631241122158e-05, "loss": 0.6484, "step": 672 }, { "epoch": 0.11652418569418894, "grad_norm": 1.46875, "learning_rate": 1.9945441147595816e-05, "loss": 0.732, "step": 673 }, { "epoch": 0.11669732712909858, "grad_norm": 1.484375, "learning_rate": 1.994525072323913e-05, "loss": 0.6698, "step": 674 }, { "epoch": 0.11687046856400822, "grad_norm": 1.46875, "learning_rate": 1.994505996805844e-05, "loss": 0.6523, "step": 675 }, { "epoch": 0.11704360999891787, "grad_norm": 1.5234375, "learning_rate": 1.9944868882060086e-05, "loss": 0.6315, "step": 676 }, { "epoch": 0.11721675143382751, "grad_norm": 1.5703125, "learning_rate": 1.9944677465250423e-05, "loss": 0.7268, "step": 677 }, { "epoch": 0.11738989286873715, "grad_norm": 1.4453125, "learning_rate": 1.994448571763582e-05, "loss": 0.583, "step": 678 }, { "epoch": 0.11756303430364679, "grad_norm": 1.4453125, "learning_rate": 1.994429363922266e-05, "loss": 0.6019, "step": 679 }, { "epoch": 0.11773617573855644, "grad_norm": 1.734375, "learning_rate": 1.9944101230017327e-05, "loss": 0.6091, "step": 680 }, { "epoch": 0.11790931717346607, "grad_norm": 1.453125, "learning_rate": 1.994390849002622e-05, "loss": 0.6506, "step": 681 }, { "epoch": 0.11808245860837571, "grad_norm": 1.53125, "learning_rate": 1.994371541925576e-05, "loss": 0.6734, "step": 682 }, { "epoch": 0.11825560004328536, "grad_norm": 1.3984375, "learning_rate": 1.994352201771236e-05, "loss": 0.6603, "step": 683 }, { "epoch": 0.118428741478195, "grad_norm": 1.4453125, "learning_rate": 1.9943328285402455e-05, "loss": 0.6348, "step": 684 }, { "epoch": 0.11860188291310464, "grad_norm": 1.4140625, "learning_rate": 1.9943134222332493e-05, "loss": 0.598, "step": 685 }, { "epoch": 0.11877502434801429, "grad_norm": 1.4609375, "learning_rate": 1.994293982850892e-05, "loss": 0.6486, "step": 686 }, { "epoch": 0.11894816578292393, "grad_norm": 1.5703125, "learning_rate": 1.9942745103938218e-05, "loss": 0.7148, "step": 687 }, { "epoch": 0.11912130721783357, "grad_norm": 1.4765625, "learning_rate": 1.9942550048626855e-05, "loss": 0.6307, "step": 688 }, { "epoch": 0.1192944486527432, "grad_norm": 1.453125, "learning_rate": 1.9942354662581316e-05, "loss": 0.6167, "step": 689 }, { "epoch": 0.11946759008765286, "grad_norm": 1.5546875, "learning_rate": 1.9942158945808104e-05, "loss": 0.6159, "step": 690 }, { "epoch": 0.1196407315225625, "grad_norm": 1.6640625, "learning_rate": 1.9941962898313737e-05, "loss": 0.6184, "step": 691 }, { "epoch": 0.11981387295747213, "grad_norm": 1.4921875, "learning_rate": 1.9941766520104725e-05, "loss": 0.6246, "step": 692 }, { "epoch": 0.11998701439238178, "grad_norm": 1.4453125, "learning_rate": 1.9941569811187605e-05, "loss": 0.5969, "step": 693 }, { "epoch": 0.12016015582729142, "grad_norm": 1.5078125, "learning_rate": 1.994137277156892e-05, "loss": 0.6, "step": 694 }, { "epoch": 0.12033329726220106, "grad_norm": 1.515625, "learning_rate": 1.9941175401255228e-05, "loss": 0.5732, "step": 695 }, { "epoch": 0.1205064386971107, "grad_norm": 1.421875, "learning_rate": 1.9940977700253086e-05, "loss": 0.5749, "step": 696 }, { "epoch": 0.12067958013202035, "grad_norm": 1.515625, "learning_rate": 1.994077966856908e-05, "loss": 0.6666, "step": 697 }, { "epoch": 0.12085272156692999, "grad_norm": 1.40625, "learning_rate": 1.994058130620979e-05, "loss": 0.6225, "step": 698 }, { "epoch": 0.12102586300183962, "grad_norm": 1.453125, "learning_rate": 1.994038261318182e-05, "loss": 0.6831, "step": 699 }, { "epoch": 0.12119900443674927, "grad_norm": 1.484375, "learning_rate": 1.9940183589491776e-05, "loss": 0.5541, "step": 700 }, { "epoch": 0.12137214587165891, "grad_norm": 1.4609375, "learning_rate": 1.9939984235146282e-05, "loss": 0.6032, "step": 701 }, { "epoch": 0.12154528730656855, "grad_norm": 1.59375, "learning_rate": 1.9939784550151966e-05, "loss": 0.5773, "step": 702 }, { "epoch": 0.1217184287414782, "grad_norm": 1.40625, "learning_rate": 1.9939584534515467e-05, "loss": 0.5645, "step": 703 }, { "epoch": 0.12189157017638784, "grad_norm": 1.7109375, "learning_rate": 1.993938418824345e-05, "loss": 0.6706, "step": 704 }, { "epoch": 0.12206471161129748, "grad_norm": 1.5625, "learning_rate": 1.993918351134257e-05, "loss": 0.5882, "step": 705 }, { "epoch": 0.12223785304620712, "grad_norm": 1.5859375, "learning_rate": 1.99389825038195e-05, "loss": 0.6183, "step": 706 }, { "epoch": 0.12241099448111677, "grad_norm": 1.4140625, "learning_rate": 1.9938781165680933e-05, "loss": 0.6072, "step": 707 }, { "epoch": 0.1225841359160264, "grad_norm": 1.5625, "learning_rate": 1.9938579496933564e-05, "loss": 0.6903, "step": 708 }, { "epoch": 0.12275727735093604, "grad_norm": 1.5703125, "learning_rate": 1.9938377497584103e-05, "loss": 0.6349, "step": 709 }, { "epoch": 0.1229304187858457, "grad_norm": 1.84375, "learning_rate": 1.993817516763927e-05, "loss": 0.6703, "step": 710 }, { "epoch": 0.12310356022075533, "grad_norm": 1.546875, "learning_rate": 1.9937972507105793e-05, "loss": 0.6305, "step": 711 }, { "epoch": 0.12327670165566497, "grad_norm": 1.40625, "learning_rate": 1.993776951599041e-05, "loss": 0.5318, "step": 712 }, { "epoch": 0.1234498430905746, "grad_norm": 1.46875, "learning_rate": 1.9937566194299882e-05, "loss": 0.5937, "step": 713 }, { "epoch": 0.12362298452548426, "grad_norm": 1.5703125, "learning_rate": 1.9937362542040967e-05, "loss": 0.637, "step": 714 }, { "epoch": 0.1237961259603939, "grad_norm": 1.5, "learning_rate": 1.993715855922044e-05, "loss": 0.6428, "step": 715 }, { "epoch": 0.12396926739530353, "grad_norm": 1.3828125, "learning_rate": 1.9936954245845085e-05, "loss": 0.5748, "step": 716 }, { "epoch": 0.12414240883021319, "grad_norm": 1.4453125, "learning_rate": 1.99367496019217e-05, "loss": 0.6203, "step": 717 }, { "epoch": 0.12431555026512282, "grad_norm": 1.3671875, "learning_rate": 1.9936544627457093e-05, "loss": 0.59, "step": 718 }, { "epoch": 0.12448869170003246, "grad_norm": 1.4921875, "learning_rate": 1.993633932245808e-05, "loss": 0.661, "step": 719 }, { "epoch": 0.12466183313494211, "grad_norm": 1.4609375, "learning_rate": 1.9936133686931497e-05, "loss": 0.5514, "step": 720 }, { "epoch": 0.12483497456985175, "grad_norm": 1.46875, "learning_rate": 1.993592772088418e-05, "loss": 0.6549, "step": 721 }, { "epoch": 0.1250081160047614, "grad_norm": 1.7265625, "learning_rate": 1.9935721424322975e-05, "loss": 0.6417, "step": 722 }, { "epoch": 0.12518125743967104, "grad_norm": 1.484375, "learning_rate": 1.9935514797254753e-05, "loss": 0.6345, "step": 723 }, { "epoch": 0.12535439887458066, "grad_norm": 1.421875, "learning_rate": 1.993530783968638e-05, "loss": 0.6532, "step": 724 }, { "epoch": 0.12552754030949032, "grad_norm": 1.3671875, "learning_rate": 1.9935100551624743e-05, "loss": 0.604, "step": 725 }, { "epoch": 0.12570068174439997, "grad_norm": 1.6171875, "learning_rate": 1.9934892933076742e-05, "loss": 0.6051, "step": 726 }, { "epoch": 0.1258738231793096, "grad_norm": 1.5546875, "learning_rate": 1.993468498404928e-05, "loss": 0.6506, "step": 727 }, { "epoch": 0.12604696461421924, "grad_norm": 1.546875, "learning_rate": 1.993447670454927e-05, "loss": 0.6608, "step": 728 }, { "epoch": 0.1262201060491289, "grad_norm": 1.46875, "learning_rate": 1.993426809458365e-05, "loss": 0.6092, "step": 729 }, { "epoch": 0.12639324748403852, "grad_norm": 1.4765625, "learning_rate": 1.993405915415935e-05, "loss": 0.6233, "step": 730 }, { "epoch": 0.12656638891894817, "grad_norm": 1.5859375, "learning_rate": 1.9933849883283324e-05, "loss": 0.6342, "step": 731 }, { "epoch": 0.12673953035385782, "grad_norm": 1.5, "learning_rate": 1.993364028196253e-05, "loss": 0.5899, "step": 732 }, { "epoch": 0.12691267178876744, "grad_norm": 1.53125, "learning_rate": 1.993343035020395e-05, "loss": 0.589, "step": 733 }, { "epoch": 0.1270858132236771, "grad_norm": 1.4453125, "learning_rate": 1.993322008801456e-05, "loss": 0.6567, "step": 734 }, { "epoch": 0.12725895465858672, "grad_norm": 1.625, "learning_rate": 1.9933009495401346e-05, "loss": 0.6333, "step": 735 }, { "epoch": 0.12743209609349637, "grad_norm": 1.6484375, "learning_rate": 1.993279857237133e-05, "loss": 0.7385, "step": 736 }, { "epoch": 0.12760523752840602, "grad_norm": 1.53125, "learning_rate": 1.993258731893152e-05, "loss": 0.6231, "step": 737 }, { "epoch": 0.12777837896331565, "grad_norm": 1.6484375, "learning_rate": 1.9932375735088945e-05, "loss": 0.605, "step": 738 }, { "epoch": 0.1279515203982253, "grad_norm": 1.53125, "learning_rate": 1.9932163820850638e-05, "loss": 0.7344, "step": 739 }, { "epoch": 0.12812466183313495, "grad_norm": 1.578125, "learning_rate": 1.9931951576223657e-05, "loss": 0.5808, "step": 740 }, { "epoch": 0.12829780326804457, "grad_norm": 1.5703125, "learning_rate": 1.9931739001215053e-05, "loss": 0.6362, "step": 741 }, { "epoch": 0.12847094470295423, "grad_norm": 1.421875, "learning_rate": 1.99315260958319e-05, "loss": 0.5824, "step": 742 }, { "epoch": 0.12864408613786388, "grad_norm": 1.4609375, "learning_rate": 1.993131286008129e-05, "loss": 0.585, "step": 743 }, { "epoch": 0.1288172275727735, "grad_norm": 1.3671875, "learning_rate": 1.9931099293970303e-05, "loss": 0.5947, "step": 744 }, { "epoch": 0.12899036900768315, "grad_norm": 1.4765625, "learning_rate": 1.9930885397506045e-05, "loss": 0.6069, "step": 745 }, { "epoch": 0.1291635104425928, "grad_norm": 1.5, "learning_rate": 1.9930671170695638e-05, "loss": 0.5755, "step": 746 }, { "epoch": 0.12933665187750243, "grad_norm": 1.421875, "learning_rate": 1.9930456613546206e-05, "loss": 0.604, "step": 747 }, { "epoch": 0.12950979331241208, "grad_norm": 1.359375, "learning_rate": 1.9930241726064882e-05, "loss": 0.5898, "step": 748 }, { "epoch": 0.12968293474732173, "grad_norm": 1.421875, "learning_rate": 1.993002650825882e-05, "loss": 0.6421, "step": 749 }, { "epoch": 0.12985607618223136, "grad_norm": 1.4453125, "learning_rate": 1.992981096013517e-05, "loss": 0.617, "step": 750 }, { "epoch": 0.130029217617141, "grad_norm": 1.7109375, "learning_rate": 1.9929595081701115e-05, "loss": 0.653, "step": 751 }, { "epoch": 0.13020235905205063, "grad_norm": 1.515625, "learning_rate": 1.9929378872963823e-05, "loss": 0.6417, "step": 752 }, { "epoch": 0.13037550048696028, "grad_norm": 1.546875, "learning_rate": 1.9929162333930496e-05, "loss": 0.6227, "step": 753 }, { "epoch": 0.13054864192186993, "grad_norm": 1.5703125, "learning_rate": 1.992894546460833e-05, "loss": 0.6532, "step": 754 }, { "epoch": 0.13072178335677956, "grad_norm": 1.421875, "learning_rate": 1.9928728265004545e-05, "loss": 0.6333, "step": 755 }, { "epoch": 0.1308949247916892, "grad_norm": 1.6015625, "learning_rate": 1.9928510735126364e-05, "loss": 0.6612, "step": 756 }, { "epoch": 0.13106806622659886, "grad_norm": 1.46875, "learning_rate": 1.9928292874981018e-05, "loss": 0.6144, "step": 757 }, { "epoch": 0.13124120766150849, "grad_norm": 1.53125, "learning_rate": 1.9928074684575764e-05, "loss": 0.676, "step": 758 }, { "epoch": 0.13141434909641814, "grad_norm": 1.4375, "learning_rate": 1.9927856163917852e-05, "loss": 0.5878, "step": 759 }, { "epoch": 0.1315874905313278, "grad_norm": 1.5390625, "learning_rate": 1.992763731301455e-05, "loss": 0.6588, "step": 760 }, { "epoch": 0.1317606319662374, "grad_norm": 1.40625, "learning_rate": 1.9927418131873148e-05, "loss": 0.5823, "step": 761 }, { "epoch": 0.13193377340114706, "grad_norm": 1.4609375, "learning_rate": 1.9927198620500927e-05, "loss": 0.6098, "step": 762 }, { "epoch": 0.13210691483605672, "grad_norm": 1.46875, "learning_rate": 1.9926978778905193e-05, "loss": 0.6064, "step": 763 }, { "epoch": 0.13228005627096634, "grad_norm": 1.4453125, "learning_rate": 1.9926758607093256e-05, "loss": 0.5931, "step": 764 }, { "epoch": 0.132453197705876, "grad_norm": 1.5, "learning_rate": 1.9926538105072445e-05, "loss": 0.608, "step": 765 }, { "epoch": 0.13262633914078564, "grad_norm": 1.453125, "learning_rate": 1.9926317272850087e-05, "loss": 0.6514, "step": 766 }, { "epoch": 0.13279948057569527, "grad_norm": 1.421875, "learning_rate": 1.9926096110433538e-05, "loss": 0.6223, "step": 767 }, { "epoch": 0.13297262201060492, "grad_norm": 1.5078125, "learning_rate": 1.992587461783015e-05, "loss": 0.6189, "step": 768 }, { "epoch": 0.13314576344551454, "grad_norm": 1.375, "learning_rate": 1.9925652795047286e-05, "loss": 0.5974, "step": 769 }, { "epoch": 0.1333189048804242, "grad_norm": 1.4921875, "learning_rate": 1.9925430642092332e-05, "loss": 0.6315, "step": 770 }, { "epoch": 0.13349204631533385, "grad_norm": 1.5078125, "learning_rate": 1.9925208158972674e-05, "loss": 0.6037, "step": 771 }, { "epoch": 0.13366518775024347, "grad_norm": 1.515625, "learning_rate": 1.9924985345695717e-05, "loss": 0.6808, "step": 772 }, { "epoch": 0.13383832918515312, "grad_norm": 1.3984375, "learning_rate": 1.9924762202268864e-05, "loss": 0.6634, "step": 773 }, { "epoch": 0.13401147062006277, "grad_norm": 1.578125, "learning_rate": 1.992453872869955e-05, "loss": 0.6305, "step": 774 }, { "epoch": 0.1341846120549724, "grad_norm": 1.5546875, "learning_rate": 1.9924314924995198e-05, "loss": 0.6583, "step": 775 }, { "epoch": 0.13435775348988205, "grad_norm": 1.53125, "learning_rate": 1.992409079116326e-05, "loss": 0.5918, "step": 776 }, { "epoch": 0.1345308949247917, "grad_norm": 1.6953125, "learning_rate": 1.9923866327211187e-05, "loss": 0.627, "step": 777 }, { "epoch": 0.13470403635970132, "grad_norm": 1.515625, "learning_rate": 1.9923641533146446e-05, "loss": 0.6336, "step": 778 }, { "epoch": 0.13487717779461098, "grad_norm": 1.4921875, "learning_rate": 1.992341640897652e-05, "loss": 0.5738, "step": 779 }, { "epoch": 0.13505031922952063, "grad_norm": 1.5390625, "learning_rate": 1.992319095470889e-05, "loss": 0.6644, "step": 780 }, { "epoch": 0.13522346066443025, "grad_norm": 1.4296875, "learning_rate": 1.992296517035106e-05, "loss": 0.5968, "step": 781 }, { "epoch": 0.1353966020993399, "grad_norm": 1.359375, "learning_rate": 1.9922739055910542e-05, "loss": 0.5312, "step": 782 }, { "epoch": 0.13556974353424955, "grad_norm": 1.4296875, "learning_rate": 1.9922512611394853e-05, "loss": 0.5927, "step": 783 }, { "epoch": 0.13574288496915918, "grad_norm": 1.4375, "learning_rate": 1.992228583681153e-05, "loss": 0.5515, "step": 784 }, { "epoch": 0.13591602640406883, "grad_norm": 1.4921875, "learning_rate": 1.9922058732168112e-05, "loss": 0.651, "step": 785 }, { "epoch": 0.13608916783897845, "grad_norm": 1.5, "learning_rate": 1.992183129747216e-05, "loss": 0.7083, "step": 786 }, { "epoch": 0.1362623092738881, "grad_norm": 1.609375, "learning_rate": 1.9921603532731232e-05, "loss": 0.6301, "step": 787 }, { "epoch": 0.13643545070879776, "grad_norm": 1.421875, "learning_rate": 1.992137543795291e-05, "loss": 0.6024, "step": 788 }, { "epoch": 0.13660859214370738, "grad_norm": 1.5703125, "learning_rate": 1.9921147013144782e-05, "loss": 0.6143, "step": 789 }, { "epoch": 0.13678173357861703, "grad_norm": 1.3984375, "learning_rate": 1.9920918258314437e-05, "loss": 0.6333, "step": 790 }, { "epoch": 0.13695487501352668, "grad_norm": 1.515625, "learning_rate": 1.99206891734695e-05, "loss": 0.6555, "step": 791 }, { "epoch": 0.1371280164484363, "grad_norm": 1.578125, "learning_rate": 1.992045975861758e-05, "loss": 0.6583, "step": 792 }, { "epoch": 0.13730115788334596, "grad_norm": 1.453125, "learning_rate": 1.9920230013766306e-05, "loss": 0.64, "step": 793 }, { "epoch": 0.1374742993182556, "grad_norm": 1.4375, "learning_rate": 1.991999993892333e-05, "loss": 0.6713, "step": 794 }, { "epoch": 0.13764744075316523, "grad_norm": 1.5078125, "learning_rate": 1.9919769534096303e-05, "loss": 0.6234, "step": 795 }, { "epoch": 0.13782058218807489, "grad_norm": 1.6328125, "learning_rate": 1.9919538799292885e-05, "loss": 0.6496, "step": 796 }, { "epoch": 0.13799372362298454, "grad_norm": 1.5546875, "learning_rate": 1.991930773452075e-05, "loss": 0.5588, "step": 797 }, { "epoch": 0.13816686505789416, "grad_norm": 1.6015625, "learning_rate": 1.9919076339787594e-05, "loss": 0.624, "step": 798 }, { "epoch": 0.1383400064928038, "grad_norm": 1.484375, "learning_rate": 1.9918844615101106e-05, "loss": 0.6662, "step": 799 }, { "epoch": 0.13851314792771346, "grad_norm": 1.4453125, "learning_rate": 1.9918612560468995e-05, "loss": 0.5808, "step": 800 }, { "epoch": 0.1386862893626231, "grad_norm": 1.5703125, "learning_rate": 1.9918380175898983e-05, "loss": 0.59, "step": 801 }, { "epoch": 0.13885943079753274, "grad_norm": 1.5546875, "learning_rate": 1.9918147461398796e-05, "loss": 0.6816, "step": 802 }, { "epoch": 0.13903257223244236, "grad_norm": 1.484375, "learning_rate": 1.991791441697618e-05, "loss": 0.6269, "step": 803 }, { "epoch": 0.13920571366735202, "grad_norm": 1.5703125, "learning_rate": 1.9917681042638882e-05, "loss": 0.6832, "step": 804 }, { "epoch": 0.13937885510226167, "grad_norm": 1.546875, "learning_rate": 1.991744733839467e-05, "loss": 0.6391, "step": 805 }, { "epoch": 0.1395519965371713, "grad_norm": 1.5, "learning_rate": 1.9917213304251316e-05, "loss": 0.6022, "step": 806 }, { "epoch": 0.13972513797208094, "grad_norm": 1.515625, "learning_rate": 1.9916978940216603e-05, "loss": 0.5944, "step": 807 }, { "epoch": 0.1398982794069906, "grad_norm": 1.515625, "learning_rate": 1.9916744246298332e-05, "loss": 0.6061, "step": 808 }, { "epoch": 0.14007142084190022, "grad_norm": 1.4375, "learning_rate": 1.9916509222504306e-05, "loss": 0.5278, "step": 809 }, { "epoch": 0.14024456227680987, "grad_norm": 1.453125, "learning_rate": 1.9916273868842345e-05, "loss": 0.5298, "step": 810 }, { "epoch": 0.14041770371171952, "grad_norm": 1.5859375, "learning_rate": 1.9916038185320273e-05, "loss": 0.646, "step": 811 }, { "epoch": 0.14059084514662915, "grad_norm": 1.5703125, "learning_rate": 1.991580217194594e-05, "loss": 0.6565, "step": 812 }, { "epoch": 0.1407639865815388, "grad_norm": 1.5078125, "learning_rate": 1.9915565828727183e-05, "loss": 0.5839, "step": 813 }, { "epoch": 0.14093712801644845, "grad_norm": 1.40625, "learning_rate": 1.9915329155671877e-05, "loss": 0.6543, "step": 814 }, { "epoch": 0.14111026945135807, "grad_norm": 1.5390625, "learning_rate": 1.9915092152787888e-05, "loss": 0.6206, "step": 815 }, { "epoch": 0.14128341088626772, "grad_norm": 1.5625, "learning_rate": 1.9914854820083102e-05, "loss": 0.5817, "step": 816 }, { "epoch": 0.14145655232117735, "grad_norm": 1.5078125, "learning_rate": 1.991461715756541e-05, "loss": 0.6044, "step": 817 }, { "epoch": 0.141629693756087, "grad_norm": 1.40625, "learning_rate": 1.991437916524272e-05, "loss": 0.5534, "step": 818 }, { "epoch": 0.14180283519099665, "grad_norm": 1.3828125, "learning_rate": 1.9914140843122954e-05, "loss": 0.5384, "step": 819 }, { "epoch": 0.14197597662590627, "grad_norm": 1.4296875, "learning_rate": 1.9913902191214033e-05, "loss": 0.643, "step": 820 }, { "epoch": 0.14214911806081593, "grad_norm": 1.5234375, "learning_rate": 1.9913663209523897e-05, "loss": 0.5776, "step": 821 }, { "epoch": 0.14232225949572558, "grad_norm": 1.609375, "learning_rate": 1.9913423898060497e-05, "loss": 0.6231, "step": 822 }, { "epoch": 0.1424954009306352, "grad_norm": 1.34375, "learning_rate": 1.991318425683179e-05, "loss": 0.6123, "step": 823 }, { "epoch": 0.14266854236554485, "grad_norm": 1.4609375, "learning_rate": 1.9912944285845753e-05, "loss": 0.6324, "step": 824 }, { "epoch": 0.1428416838004545, "grad_norm": 1.546875, "learning_rate": 1.9912703985110368e-05, "loss": 0.6069, "step": 825 }, { "epoch": 0.14301482523536413, "grad_norm": 1.4375, "learning_rate": 1.991246335463362e-05, "loss": 0.5568, "step": 826 }, { "epoch": 0.14318796667027378, "grad_norm": 1.3828125, "learning_rate": 1.9912222394423527e-05, "loss": 0.6094, "step": 827 }, { "epoch": 0.14336110810518343, "grad_norm": 1.484375, "learning_rate": 1.991198110448809e-05, "loss": 0.6529, "step": 828 }, { "epoch": 0.14353424954009306, "grad_norm": 1.515625, "learning_rate": 1.9911739484835348e-05, "loss": 0.6403, "step": 829 }, { "epoch": 0.1437073909750027, "grad_norm": 1.453125, "learning_rate": 1.991149753547333e-05, "loss": 0.6076, "step": 830 }, { "epoch": 0.14388053240991236, "grad_norm": 1.53125, "learning_rate": 1.991125525641009e-05, "loss": 0.7236, "step": 831 }, { "epoch": 0.14405367384482198, "grad_norm": 1.5, "learning_rate": 1.9911012647653686e-05, "loss": 0.6842, "step": 832 }, { "epoch": 0.14422681527973163, "grad_norm": 1.421875, "learning_rate": 1.9910769709212184e-05, "loss": 0.6906, "step": 833 }, { "epoch": 0.14439995671464126, "grad_norm": 1.453125, "learning_rate": 1.9910526441093667e-05, "loss": 0.6055, "step": 834 }, { "epoch": 0.1445730981495509, "grad_norm": 1.84375, "learning_rate": 1.9910282843306228e-05, "loss": 0.6827, "step": 835 }, { "epoch": 0.14474623958446056, "grad_norm": 1.375, "learning_rate": 1.9910038915857974e-05, "loss": 0.5485, "step": 836 }, { "epoch": 0.14491938101937019, "grad_norm": 1.59375, "learning_rate": 1.9909794658757012e-05, "loss": 0.6698, "step": 837 }, { "epoch": 0.14509252245427984, "grad_norm": 1.4453125, "learning_rate": 1.9909550072011475e-05, "loss": 0.6942, "step": 838 }, { "epoch": 0.1452656638891895, "grad_norm": 1.3984375, "learning_rate": 1.990930515562949e-05, "loss": 0.5868, "step": 839 }, { "epoch": 0.1454388053240991, "grad_norm": 1.421875, "learning_rate": 1.9909059909619212e-05, "loss": 0.5715, "step": 840 }, { "epoch": 0.14561194675900876, "grad_norm": 1.4921875, "learning_rate": 1.9908814333988794e-05, "loss": 0.6403, "step": 841 }, { "epoch": 0.14578508819391842, "grad_norm": 1.421875, "learning_rate": 1.9908568428746408e-05, "loss": 0.6381, "step": 842 }, { "epoch": 0.14595822962882804, "grad_norm": 1.4609375, "learning_rate": 1.990832219390023e-05, "loss": 0.6363, "step": 843 }, { "epoch": 0.1461313710637377, "grad_norm": 1.4609375, "learning_rate": 1.990807562945846e-05, "loss": 0.6146, "step": 844 }, { "epoch": 0.14630451249864734, "grad_norm": 1.84375, "learning_rate": 1.9907828735429288e-05, "loss": 0.6556, "step": 845 }, { "epoch": 0.14647765393355697, "grad_norm": 1.421875, "learning_rate": 1.9907581511820933e-05, "loss": 0.5396, "step": 846 }, { "epoch": 0.14665079536846662, "grad_norm": 1.734375, "learning_rate": 1.990733395864162e-05, "loss": 0.7529, "step": 847 }, { "epoch": 0.14682393680337627, "grad_norm": 1.6015625, "learning_rate": 1.9907086075899576e-05, "loss": 0.5863, "step": 848 }, { "epoch": 0.1469970782382859, "grad_norm": 1.5546875, "learning_rate": 1.9906837863603058e-05, "loss": 0.8027, "step": 849 }, { "epoch": 0.14717021967319555, "grad_norm": 1.46875, "learning_rate": 1.9906589321760315e-05, "loss": 0.649, "step": 850 }, { "epoch": 0.14734336110810517, "grad_norm": 1.4453125, "learning_rate": 1.9906340450379614e-05, "loss": 0.6144, "step": 851 }, { "epoch": 0.14751650254301482, "grad_norm": 1.4921875, "learning_rate": 1.9906091249469242e-05, "loss": 0.6763, "step": 852 }, { "epoch": 0.14768964397792447, "grad_norm": 1.4453125, "learning_rate": 1.990584171903748e-05, "loss": 0.6527, "step": 853 }, { "epoch": 0.1478627854128341, "grad_norm": 1.46875, "learning_rate": 1.990559185909263e-05, "loss": 0.6318, "step": 854 }, { "epoch": 0.14803592684774375, "grad_norm": 1.3984375, "learning_rate": 1.9905341669643007e-05, "loss": 0.6341, "step": 855 }, { "epoch": 0.1482090682826534, "grad_norm": 1.546875, "learning_rate": 1.990509115069693e-05, "loss": 0.5989, "step": 856 }, { "epoch": 0.14838220971756302, "grad_norm": 1.484375, "learning_rate": 1.9904840302262737e-05, "loss": 0.632, "step": 857 }, { "epoch": 0.14855535115247268, "grad_norm": 1.640625, "learning_rate": 1.9904589124348766e-05, "loss": 0.614, "step": 858 }, { "epoch": 0.14872849258738233, "grad_norm": 1.5, "learning_rate": 1.9904337616963372e-05, "loss": 0.6215, "step": 859 }, { "epoch": 0.14890163402229195, "grad_norm": 1.53125, "learning_rate": 1.990408578011493e-05, "loss": 0.725, "step": 860 }, { "epoch": 0.1490747754572016, "grad_norm": 1.359375, "learning_rate": 1.990383361381181e-05, "loss": 0.5707, "step": 861 }, { "epoch": 0.14924791689211125, "grad_norm": 1.6484375, "learning_rate": 1.99035811180624e-05, "loss": 0.6227, "step": 862 }, { "epoch": 0.14942105832702088, "grad_norm": 1.578125, "learning_rate": 1.9903328292875106e-05, "loss": 0.6496, "step": 863 }, { "epoch": 0.14959419976193053, "grad_norm": 1.453125, "learning_rate": 1.990307513825833e-05, "loss": 0.6555, "step": 864 }, { "epoch": 0.14976734119684018, "grad_norm": 1.4453125, "learning_rate": 1.9902821654220496e-05, "loss": 0.6147, "step": 865 }, { "epoch": 0.1499404826317498, "grad_norm": 1.546875, "learning_rate": 1.9902567840770038e-05, "loss": 0.5793, "step": 866 }, { "epoch": 0.15011362406665946, "grad_norm": 1.46875, "learning_rate": 1.9902313697915395e-05, "loss": 0.6256, "step": 867 }, { "epoch": 0.15028676550156908, "grad_norm": 1.484375, "learning_rate": 1.9902059225665025e-05, "loss": 0.6382, "step": 868 }, { "epoch": 0.15045990693647873, "grad_norm": 1.390625, "learning_rate": 1.990180442402739e-05, "loss": 0.6414, "step": 869 }, { "epoch": 0.15063304837138838, "grad_norm": 1.4765625, "learning_rate": 1.990154929301097e-05, "loss": 0.6901, "step": 870 }, { "epoch": 0.150806189806298, "grad_norm": 1.5, "learning_rate": 1.9901293832624246e-05, "loss": 0.6066, "step": 871 }, { "epoch": 0.15097933124120766, "grad_norm": 1.5, "learning_rate": 1.990103804287572e-05, "loss": 0.6445, "step": 872 }, { "epoch": 0.1511524726761173, "grad_norm": 1.65625, "learning_rate": 1.9900781923773897e-05, "loss": 0.619, "step": 873 }, { "epoch": 0.15132561411102693, "grad_norm": 1.3125, "learning_rate": 1.9900525475327302e-05, "loss": 0.5511, "step": 874 }, { "epoch": 0.1514987555459366, "grad_norm": 1.4375, "learning_rate": 1.9900268697544462e-05, "loss": 0.6454, "step": 875 }, { "epoch": 0.15167189698084624, "grad_norm": 2.390625, "learning_rate": 1.9900011590433917e-05, "loss": 0.6677, "step": 876 }, { "epoch": 0.15184503841575586, "grad_norm": 1.375, "learning_rate": 1.9899754154004223e-05, "loss": 0.5692, "step": 877 }, { "epoch": 0.1520181798506655, "grad_norm": 1.515625, "learning_rate": 1.9899496388263942e-05, "loss": 0.621, "step": 878 }, { "epoch": 0.15219132128557517, "grad_norm": 1.4140625, "learning_rate": 1.9899238293221652e-05, "loss": 0.5752, "step": 879 }, { "epoch": 0.1523644627204848, "grad_norm": 1.359375, "learning_rate": 1.9898979868885933e-05, "loss": 0.5629, "step": 880 }, { "epoch": 0.15253760415539444, "grad_norm": 1.5390625, "learning_rate": 1.9898721115265382e-05, "loss": 0.6354, "step": 881 }, { "epoch": 0.1527107455903041, "grad_norm": 1.4140625, "learning_rate": 1.9898462032368607e-05, "loss": 0.6102, "step": 882 }, { "epoch": 0.15288388702521372, "grad_norm": 1.5390625, "learning_rate": 1.9898202620204233e-05, "loss": 0.6968, "step": 883 }, { "epoch": 0.15305702846012337, "grad_norm": 1.5390625, "learning_rate": 1.9897942878780877e-05, "loss": 0.5562, "step": 884 }, { "epoch": 0.153230169895033, "grad_norm": 1.6015625, "learning_rate": 1.9897682808107187e-05, "loss": 0.667, "step": 885 }, { "epoch": 0.15340331132994264, "grad_norm": 1.3203125, "learning_rate": 1.9897422408191815e-05, "loss": 0.5992, "step": 886 }, { "epoch": 0.1535764527648523, "grad_norm": 1.4921875, "learning_rate": 1.989716167904342e-05, "loss": 0.5775, "step": 887 }, { "epoch": 0.15374959419976192, "grad_norm": 1.515625, "learning_rate": 1.9896900620670672e-05, "loss": 0.6949, "step": 888 }, { "epoch": 0.15392273563467157, "grad_norm": 1.4296875, "learning_rate": 1.9896639233082263e-05, "loss": 0.6223, "step": 889 }, { "epoch": 0.15409587706958122, "grad_norm": 1.53125, "learning_rate": 1.9896377516286885e-05, "loss": 0.6869, "step": 890 }, { "epoch": 0.15426901850449085, "grad_norm": 1.421875, "learning_rate": 1.9896115470293236e-05, "loss": 0.625, "step": 891 }, { "epoch": 0.1544421599394005, "grad_norm": 1.484375, "learning_rate": 1.9895853095110045e-05, "loss": 0.607, "step": 892 }, { "epoch": 0.15461530137431015, "grad_norm": 1.296875, "learning_rate": 1.989559039074603e-05, "loss": 0.6392, "step": 893 }, { "epoch": 0.15478844280921977, "grad_norm": 1.4140625, "learning_rate": 1.9895327357209938e-05, "loss": 0.683, "step": 894 }, { "epoch": 0.15496158424412942, "grad_norm": 1.6171875, "learning_rate": 1.9895063994510512e-05, "loss": 0.5916, "step": 895 }, { "epoch": 0.15513472567903908, "grad_norm": 1.5078125, "learning_rate": 1.9894800302656515e-05, "loss": 0.5906, "step": 896 }, { "epoch": 0.1553078671139487, "grad_norm": 1.453125, "learning_rate": 1.9894536281656723e-05, "loss": 0.6309, "step": 897 }, { "epoch": 0.15548100854885835, "grad_norm": 1.5, "learning_rate": 1.989427193151991e-05, "loss": 0.6426, "step": 898 }, { "epoch": 0.155654149983768, "grad_norm": 1.4140625, "learning_rate": 1.9894007252254874e-05, "loss": 0.5515, "step": 899 }, { "epoch": 0.15582729141867763, "grad_norm": 1.5625, "learning_rate": 1.989374224387042e-05, "loss": 0.6472, "step": 900 }, { "epoch": 0.15600043285358728, "grad_norm": 1.46875, "learning_rate": 1.9893476906375364e-05, "loss": 0.623, "step": 901 }, { "epoch": 0.1561735742884969, "grad_norm": 1.3671875, "learning_rate": 1.989321123977853e-05, "loss": 0.5999, "step": 902 }, { "epoch": 0.15634671572340655, "grad_norm": 1.3984375, "learning_rate": 1.9892945244088753e-05, "loss": 0.6014, "step": 903 }, { "epoch": 0.1565198571583162, "grad_norm": 1.4765625, "learning_rate": 1.9892678919314887e-05, "loss": 0.5629, "step": 904 }, { "epoch": 0.15669299859322583, "grad_norm": 1.5234375, "learning_rate": 1.989241226546579e-05, "loss": 0.6828, "step": 905 }, { "epoch": 0.15686614002813548, "grad_norm": 1.4375, "learning_rate": 1.989214528255033e-05, "loss": 0.6338, "step": 906 }, { "epoch": 0.15703928146304513, "grad_norm": 1.5703125, "learning_rate": 1.9891877970577387e-05, "loss": 0.6608, "step": 907 }, { "epoch": 0.15721242289795476, "grad_norm": 1.390625, "learning_rate": 1.9891610329555856e-05, "loss": 0.6261, "step": 908 }, { "epoch": 0.1573855643328644, "grad_norm": 1.578125, "learning_rate": 1.989134235949464e-05, "loss": 0.5784, "step": 909 }, { "epoch": 0.15755870576777406, "grad_norm": 1.5703125, "learning_rate": 1.989107406040265e-05, "loss": 0.6395, "step": 910 }, { "epoch": 0.15773184720268368, "grad_norm": 1.5078125, "learning_rate": 1.989080543228881e-05, "loss": 0.6746, "step": 911 }, { "epoch": 0.15790498863759334, "grad_norm": 1.5625, "learning_rate": 1.9890536475162062e-05, "loss": 0.6464, "step": 912 }, { "epoch": 0.158078130072503, "grad_norm": 1.3515625, "learning_rate": 1.9890267189031348e-05, "loss": 0.601, "step": 913 }, { "epoch": 0.1582512715074126, "grad_norm": 1.5, "learning_rate": 1.9889997573905624e-05, "loss": 0.6393, "step": 914 }, { "epoch": 0.15842441294232226, "grad_norm": 1.390625, "learning_rate": 1.9889727629793866e-05, "loss": 0.5757, "step": 915 }, { "epoch": 0.15859755437723191, "grad_norm": 1.5234375, "learning_rate": 1.9889457356705048e-05, "loss": 0.6441, "step": 916 }, { "epoch": 0.15877069581214154, "grad_norm": 1.515625, "learning_rate": 1.988918675464816e-05, "loss": 0.6458, "step": 917 }, { "epoch": 0.1589438372470512, "grad_norm": 1.5234375, "learning_rate": 1.98889158236322e-05, "loss": 0.6344, "step": 918 }, { "epoch": 0.1591169786819608, "grad_norm": 1.4609375, "learning_rate": 1.9888644563666194e-05, "loss": 0.6506, "step": 919 }, { "epoch": 0.15929012011687047, "grad_norm": 1.4453125, "learning_rate": 1.9888372974759154e-05, "loss": 0.6181, "step": 920 }, { "epoch": 0.15946326155178012, "grad_norm": 1.4140625, "learning_rate": 1.9888101056920112e-05, "loss": 0.6601, "step": 921 }, { "epoch": 0.15963640298668974, "grad_norm": 1.4609375, "learning_rate": 1.988782881015812e-05, "loss": 0.6298, "step": 922 }, { "epoch": 0.1598095444215994, "grad_norm": 1.4453125, "learning_rate": 1.9887556234482236e-05, "loss": 0.6018, "step": 923 }, { "epoch": 0.15998268585650904, "grad_norm": 1.4765625, "learning_rate": 1.988728332990152e-05, "loss": 0.7323, "step": 924 }, { "epoch": 0.16015582729141867, "grad_norm": 1.578125, "learning_rate": 1.9887010096425054e-05, "loss": 0.6615, "step": 925 }, { "epoch": 0.16032896872632832, "grad_norm": 1.59375, "learning_rate": 1.9886736534061926e-05, "loss": 0.5843, "step": 926 }, { "epoch": 0.16050211016123797, "grad_norm": 1.4375, "learning_rate": 1.9886462642821235e-05, "loss": 0.557, "step": 927 }, { "epoch": 0.1606752515961476, "grad_norm": 1.421875, "learning_rate": 1.9886188422712097e-05, "loss": 0.6123, "step": 928 }, { "epoch": 0.16084839303105725, "grad_norm": 1.5625, "learning_rate": 1.9885913873743624e-05, "loss": 0.6969, "step": 929 }, { "epoch": 0.1610215344659669, "grad_norm": 1.453125, "learning_rate": 1.988563899592496e-05, "loss": 0.618, "step": 930 }, { "epoch": 0.16119467590087652, "grad_norm": 1.6015625, "learning_rate": 1.988536378926524e-05, "loss": 0.6822, "step": 931 }, { "epoch": 0.16136781733578617, "grad_norm": 1.421875, "learning_rate": 1.9885088253773623e-05, "loss": 0.6426, "step": 932 }, { "epoch": 0.16154095877069582, "grad_norm": 1.3203125, "learning_rate": 1.9884812389459274e-05, "loss": 0.6, "step": 933 }, { "epoch": 0.16171410020560545, "grad_norm": 1.4765625, "learning_rate": 1.988453619633137e-05, "loss": 0.6111, "step": 934 }, { "epoch": 0.1618872416405151, "grad_norm": 1.3671875, "learning_rate": 1.9884259674399094e-05, "loss": 0.6712, "step": 935 }, { "epoch": 0.16206038307542472, "grad_norm": 1.546875, "learning_rate": 1.988398282367165e-05, "loss": 0.6617, "step": 936 }, { "epoch": 0.16223352451033438, "grad_norm": 1.5625, "learning_rate": 1.988370564415824e-05, "loss": 0.6015, "step": 937 }, { "epoch": 0.16240666594524403, "grad_norm": 1.5, "learning_rate": 1.9883428135868096e-05, "loss": 0.6118, "step": 938 }, { "epoch": 0.16257980738015365, "grad_norm": 1.421875, "learning_rate": 1.988315029881044e-05, "loss": 0.5284, "step": 939 }, { "epoch": 0.1627529488150633, "grad_norm": 1.5234375, "learning_rate": 1.9882872132994516e-05, "loss": 0.5914, "step": 940 }, { "epoch": 0.16292609024997295, "grad_norm": 1.484375, "learning_rate": 1.988259363842958e-05, "loss": 0.6639, "step": 941 }, { "epoch": 0.16309923168488258, "grad_norm": 1.703125, "learning_rate": 1.988231481512489e-05, "loss": 0.6747, "step": 942 }, { "epoch": 0.16327237311979223, "grad_norm": 1.6484375, "learning_rate": 1.9882035663089727e-05, "loss": 0.6758, "step": 943 }, { "epoch": 0.16344551455470188, "grad_norm": 1.453125, "learning_rate": 1.9881756182333376e-05, "loss": 0.6293, "step": 944 }, { "epoch": 0.1636186559896115, "grad_norm": 1.34375, "learning_rate": 1.988147637286513e-05, "loss": 0.5238, "step": 945 }, { "epoch": 0.16379179742452116, "grad_norm": 1.4140625, "learning_rate": 1.9881196234694298e-05, "loss": 0.6041, "step": 946 }, { "epoch": 0.1639649388594308, "grad_norm": 1.5859375, "learning_rate": 1.98809157678302e-05, "loss": 0.7785, "step": 947 }, { "epoch": 0.16413808029434043, "grad_norm": 1.515625, "learning_rate": 1.9880634972282168e-05, "loss": 0.6052, "step": 948 }, { "epoch": 0.16431122172925008, "grad_norm": 1.484375, "learning_rate": 1.9880353848059536e-05, "loss": 0.6959, "step": 949 }, { "epoch": 0.16448436316415974, "grad_norm": 1.5390625, "learning_rate": 1.988007239517166e-05, "loss": 0.6996, "step": 950 }, { "epoch": 0.16465750459906936, "grad_norm": 1.3515625, "learning_rate": 1.9879790613627904e-05, "loss": 0.5967, "step": 951 }, { "epoch": 0.164830646033979, "grad_norm": 2.0, "learning_rate": 1.9879508503437637e-05, "loss": 0.6116, "step": 952 }, { "epoch": 0.16500378746888864, "grad_norm": 1.4921875, "learning_rate": 1.9879226064610246e-05, "loss": 0.6818, "step": 953 }, { "epoch": 0.1651769289037983, "grad_norm": 1.3671875, "learning_rate": 1.9878943297155124e-05, "loss": 0.5912, "step": 954 }, { "epoch": 0.16535007033870794, "grad_norm": 1.4296875, "learning_rate": 1.9878660201081682e-05, "loss": 0.6234, "step": 955 }, { "epoch": 0.16552321177361756, "grad_norm": 1.4765625, "learning_rate": 1.9878376776399327e-05, "loss": 0.6277, "step": 956 }, { "epoch": 0.16569635320852721, "grad_norm": 1.390625, "learning_rate": 1.98780930231175e-05, "loss": 0.5307, "step": 957 }, { "epoch": 0.16586949464343687, "grad_norm": 1.3359375, "learning_rate": 1.987780894124563e-05, "loss": 0.5725, "step": 958 }, { "epoch": 0.1660426360783465, "grad_norm": 1.5234375, "learning_rate": 1.9877524530793172e-05, "loss": 0.6948, "step": 959 }, { "epoch": 0.16621577751325614, "grad_norm": 1.4453125, "learning_rate": 1.9877239791769584e-05, "loss": 0.6842, "step": 960 }, { "epoch": 0.1663889189481658, "grad_norm": 1.453125, "learning_rate": 1.9876954724184337e-05, "loss": 0.6796, "step": 961 }, { "epoch": 0.16656206038307542, "grad_norm": 1.5, "learning_rate": 1.9876669328046917e-05, "loss": 0.6349, "step": 962 }, { "epoch": 0.16673520181798507, "grad_norm": 1.6640625, "learning_rate": 1.987638360336682e-05, "loss": 0.7232, "step": 963 }, { "epoch": 0.16690834325289472, "grad_norm": 1.4375, "learning_rate": 1.987609755015354e-05, "loss": 0.6095, "step": 964 }, { "epoch": 0.16708148468780434, "grad_norm": 1.3359375, "learning_rate": 1.98758111684166e-05, "loss": 0.5728, "step": 965 }, { "epoch": 0.167254626122714, "grad_norm": 1.484375, "learning_rate": 1.987552445816553e-05, "loss": 0.6031, "step": 966 }, { "epoch": 0.16742776755762365, "grad_norm": 1.6171875, "learning_rate": 1.9875237419409858e-05, "loss": 0.7343, "step": 967 }, { "epoch": 0.16760090899253327, "grad_norm": 1.546875, "learning_rate": 1.987495005215914e-05, "loss": 0.7663, "step": 968 }, { "epoch": 0.16777405042744292, "grad_norm": 1.4140625, "learning_rate": 1.987466235642293e-05, "loss": 0.592, "step": 969 }, { "epoch": 0.16794719186235255, "grad_norm": 1.4609375, "learning_rate": 1.98743743322108e-05, "loss": 0.5798, "step": 970 }, { "epoch": 0.1681203332972622, "grad_norm": 1.515625, "learning_rate": 1.987408597953233e-05, "loss": 0.6151, "step": 971 }, { "epoch": 0.16829347473217185, "grad_norm": 1.359375, "learning_rate": 1.987379729839711e-05, "loss": 0.6063, "step": 972 }, { "epoch": 0.16846661616708147, "grad_norm": 1.6328125, "learning_rate": 1.9873508288814752e-05, "loss": 0.5559, "step": 973 }, { "epoch": 0.16863975760199112, "grad_norm": 1.4140625, "learning_rate": 1.9873218950794863e-05, "loss": 0.628, "step": 974 }, { "epoch": 0.16881289903690078, "grad_norm": 1.6328125, "learning_rate": 1.9872929284347067e-05, "loss": 0.6468, "step": 975 }, { "epoch": 0.1689860404718104, "grad_norm": 1.4140625, "learning_rate": 1.9872639289481e-05, "loss": 0.6055, "step": 976 }, { "epoch": 0.16915918190672005, "grad_norm": 1.484375, "learning_rate": 1.9872348966206306e-05, "loss": 0.6075, "step": 977 }, { "epoch": 0.1693323233416297, "grad_norm": 1.390625, "learning_rate": 1.9872058314532648e-05, "loss": 0.552, "step": 978 }, { "epoch": 0.16950546477653933, "grad_norm": 1.4140625, "learning_rate": 1.9871767334469694e-05, "loss": 0.5893, "step": 979 }, { "epoch": 0.16967860621144898, "grad_norm": 1.5546875, "learning_rate": 1.9871476026027122e-05, "loss": 0.6232, "step": 980 }, { "epoch": 0.16985174764635863, "grad_norm": 1.515625, "learning_rate": 1.987118438921462e-05, "loss": 0.6759, "step": 981 }, { "epoch": 0.17002488908126825, "grad_norm": 1.5546875, "learning_rate": 1.987089242404189e-05, "loss": 0.6257, "step": 982 }, { "epoch": 0.1701980305161779, "grad_norm": 1.359375, "learning_rate": 1.9870600130518645e-05, "loss": 0.6235, "step": 983 }, { "epoch": 0.17037117195108756, "grad_norm": 1.5390625, "learning_rate": 1.987030750865461e-05, "loss": 0.7569, "step": 984 }, { "epoch": 0.17054431338599718, "grad_norm": 1.5078125, "learning_rate": 1.9870014558459515e-05, "loss": 0.6079, "step": 985 }, { "epoch": 0.17071745482090683, "grad_norm": 1.46875, "learning_rate": 1.9869721279943106e-05, "loss": 0.6243, "step": 986 }, { "epoch": 0.17089059625581646, "grad_norm": 1.390625, "learning_rate": 1.9869427673115142e-05, "loss": 0.6499, "step": 987 }, { "epoch": 0.1710637376907261, "grad_norm": 1.671875, "learning_rate": 1.9869133737985386e-05, "loss": 0.5935, "step": 988 }, { "epoch": 0.17123687912563576, "grad_norm": 1.4296875, "learning_rate": 1.9868839474563617e-05, "loss": 0.6131, "step": 989 }, { "epoch": 0.17141002056054538, "grad_norm": 1.328125, "learning_rate": 1.986854488285962e-05, "loss": 0.5507, "step": 990 }, { "epoch": 0.17158316199545504, "grad_norm": 1.4453125, "learning_rate": 1.9868249962883203e-05, "loss": 0.6064, "step": 991 }, { "epoch": 0.1717563034303647, "grad_norm": 1.46875, "learning_rate": 1.986795471464417e-05, "loss": 0.6498, "step": 992 }, { "epoch": 0.1719294448652743, "grad_norm": 1.4375, "learning_rate": 1.986765913815234e-05, "loss": 0.5757, "step": 993 }, { "epoch": 0.17210258630018396, "grad_norm": 1.4609375, "learning_rate": 1.9867363233417555e-05, "loss": 0.5838, "step": 994 }, { "epoch": 0.17227572773509361, "grad_norm": 1.390625, "learning_rate": 1.9867067000449647e-05, "loss": 0.5616, "step": 995 }, { "epoch": 0.17244886917000324, "grad_norm": 1.4375, "learning_rate": 1.9866770439258475e-05, "loss": 0.5922, "step": 996 }, { "epoch": 0.1726220106049129, "grad_norm": 1.4765625, "learning_rate": 1.9866473549853904e-05, "loss": 0.6753, "step": 997 }, { "epoch": 0.17279515203982254, "grad_norm": 1.5625, "learning_rate": 1.986617633224581e-05, "loss": 0.5778, "step": 998 }, { "epoch": 0.17296829347473217, "grad_norm": 1.40625, "learning_rate": 1.986587878644408e-05, "loss": 0.586, "step": 999 }, { "epoch": 0.17314143490964182, "grad_norm": 1.4140625, "learning_rate": 1.986558091245861e-05, "loss": 0.6419, "step": 1000 }, { "epoch": 0.17331457634455147, "grad_norm": 1.4765625, "learning_rate": 1.986528271029931e-05, "loss": 0.6746, "step": 1001 }, { "epoch": 0.1734877177794611, "grad_norm": 1.5078125, "learning_rate": 1.98649841799761e-05, "loss": 0.5815, "step": 1002 }, { "epoch": 0.17366085921437074, "grad_norm": 1.3984375, "learning_rate": 1.9864685321498913e-05, "loss": 0.6291, "step": 1003 }, { "epoch": 0.17383400064928037, "grad_norm": 1.375, "learning_rate": 1.986438613487768e-05, "loss": 0.5861, "step": 1004 }, { "epoch": 0.17400714208419002, "grad_norm": 1.484375, "learning_rate": 1.986408662012237e-05, "loss": 0.5854, "step": 1005 }, { "epoch": 0.17418028351909967, "grad_norm": 1.6015625, "learning_rate": 1.9863786777242927e-05, "loss": 0.6013, "step": 1006 }, { "epoch": 0.1743534249540093, "grad_norm": 1.46875, "learning_rate": 1.986348660624934e-05, "loss": 0.6568, "step": 1007 }, { "epoch": 0.17452656638891895, "grad_norm": 1.3984375, "learning_rate": 1.9863186107151587e-05, "loss": 0.6128, "step": 1008 }, { "epoch": 0.1746997078238286, "grad_norm": 1.53125, "learning_rate": 1.9862885279959668e-05, "loss": 0.617, "step": 1009 }, { "epoch": 0.17487284925873822, "grad_norm": 1.609375, "learning_rate": 1.9862584124683587e-05, "loss": 0.5982, "step": 1010 }, { "epoch": 0.17504599069364787, "grad_norm": 1.5, "learning_rate": 1.9862282641333363e-05, "loss": 0.6003, "step": 1011 }, { "epoch": 0.17521913212855753, "grad_norm": 1.4453125, "learning_rate": 1.9861980829919022e-05, "loss": 0.5741, "step": 1012 }, { "epoch": 0.17539227356346715, "grad_norm": 1.6171875, "learning_rate": 1.9861678690450606e-05, "loss": 0.5944, "step": 1013 }, { "epoch": 0.1755654149983768, "grad_norm": 1.6328125, "learning_rate": 1.9861376222938167e-05, "loss": 0.6043, "step": 1014 }, { "epoch": 0.17573855643328645, "grad_norm": 1.4140625, "learning_rate": 1.9861073427391763e-05, "loss": 0.5867, "step": 1015 }, { "epoch": 0.17591169786819608, "grad_norm": 1.359375, "learning_rate": 1.986077030382147e-05, "loss": 0.6019, "step": 1016 }, { "epoch": 0.17608483930310573, "grad_norm": 1.53125, "learning_rate": 1.986046685223737e-05, "loss": 0.647, "step": 1017 }, { "epoch": 0.17625798073801538, "grad_norm": 1.3125, "learning_rate": 1.9860163072649557e-05, "loss": 0.5873, "step": 1018 }, { "epoch": 0.176431122172925, "grad_norm": 1.65625, "learning_rate": 1.985985896506813e-05, "loss": 0.6865, "step": 1019 }, { "epoch": 0.17660426360783466, "grad_norm": 1.578125, "learning_rate": 1.985955452950322e-05, "loss": 0.5903, "step": 1020 }, { "epoch": 0.17677740504274428, "grad_norm": 1.3984375, "learning_rate": 1.9859249765964937e-05, "loss": 0.643, "step": 1021 }, { "epoch": 0.17695054647765393, "grad_norm": 1.3828125, "learning_rate": 1.985894467446343e-05, "loss": 0.5793, "step": 1022 }, { "epoch": 0.17712368791256358, "grad_norm": 1.4921875, "learning_rate": 1.9858639255008844e-05, "loss": 0.6091, "step": 1023 }, { "epoch": 0.1772968293474732, "grad_norm": 1.515625, "learning_rate": 1.9858333507611337e-05, "loss": 0.6221, "step": 1024 }, { "epoch": 0.17746997078238286, "grad_norm": 1.421875, "learning_rate": 1.9858027432281082e-05, "loss": 0.6248, "step": 1025 }, { "epoch": 0.1776431122172925, "grad_norm": 1.34375, "learning_rate": 1.9857721029028258e-05, "loss": 0.5782, "step": 1026 }, { "epoch": 0.17781625365220213, "grad_norm": 1.5390625, "learning_rate": 1.985741429786306e-05, "loss": 0.5531, "step": 1027 }, { "epoch": 0.17798939508711178, "grad_norm": 1.4375, "learning_rate": 1.9857107238795694e-05, "loss": 0.6167, "step": 1028 }, { "epoch": 0.17816253652202144, "grad_norm": 1.359375, "learning_rate": 1.9856799851836366e-05, "loss": 0.6334, "step": 1029 }, { "epoch": 0.17833567795693106, "grad_norm": 1.4453125, "learning_rate": 1.9856492136995308e-05, "loss": 0.6325, "step": 1030 }, { "epoch": 0.1785088193918407, "grad_norm": 1.484375, "learning_rate": 1.985618409428275e-05, "loss": 0.6438, "step": 1031 }, { "epoch": 0.17868196082675036, "grad_norm": 1.40625, "learning_rate": 1.9855875723708947e-05, "loss": 0.5783, "step": 1032 }, { "epoch": 0.17885510226166, "grad_norm": 1.4765625, "learning_rate": 1.985556702528415e-05, "loss": 0.6042, "step": 1033 }, { "epoch": 0.17902824369656964, "grad_norm": 1.6484375, "learning_rate": 1.9855257999018632e-05, "loss": 0.5917, "step": 1034 }, { "epoch": 0.1792013851314793, "grad_norm": 1.4140625, "learning_rate": 1.985494864492267e-05, "loss": 0.5796, "step": 1035 }, { "epoch": 0.17937452656638891, "grad_norm": 1.4296875, "learning_rate": 1.9854638963006552e-05, "loss": 0.682, "step": 1036 }, { "epoch": 0.17954766800129857, "grad_norm": 1.4765625, "learning_rate": 1.9854328953280587e-05, "loss": 0.5278, "step": 1037 }, { "epoch": 0.1797208094362082, "grad_norm": 1.5, "learning_rate": 1.985401861575508e-05, "loss": 0.6488, "step": 1038 }, { "epoch": 0.17989395087111784, "grad_norm": 1.421875, "learning_rate": 1.9853707950440358e-05, "loss": 0.6366, "step": 1039 }, { "epoch": 0.1800670923060275, "grad_norm": 1.5078125, "learning_rate": 1.9853396957346752e-05, "loss": 0.6678, "step": 1040 }, { "epoch": 0.18024023374093712, "grad_norm": 1.59375, "learning_rate": 1.9853085636484613e-05, "loss": 0.6256, "step": 1041 }, { "epoch": 0.18041337517584677, "grad_norm": 1.40625, "learning_rate": 1.985277398786429e-05, "loss": 0.5686, "step": 1042 }, { "epoch": 0.18058651661075642, "grad_norm": 1.3984375, "learning_rate": 1.9852462011496158e-05, "loss": 0.5153, "step": 1043 }, { "epoch": 0.18075965804566604, "grad_norm": 1.3984375, "learning_rate": 1.9852149707390584e-05, "loss": 0.6047, "step": 1044 }, { "epoch": 0.1809327994805757, "grad_norm": 1.4296875, "learning_rate": 1.9851837075557964e-05, "loss": 0.6314, "step": 1045 }, { "epoch": 0.18110594091548535, "grad_norm": 1.4140625, "learning_rate": 1.9851524116008698e-05, "loss": 0.6143, "step": 1046 }, { "epoch": 0.18127908235039497, "grad_norm": 1.4609375, "learning_rate": 1.985121082875319e-05, "loss": 0.6149, "step": 1047 }, { "epoch": 0.18145222378530462, "grad_norm": 1.46875, "learning_rate": 1.985089721380187e-05, "loss": 0.6372, "step": 1048 }, { "epoch": 0.18162536522021427, "grad_norm": 1.59375, "learning_rate": 1.9850583271165166e-05, "loss": 0.6224, "step": 1049 }, { "epoch": 0.1817985066551239, "grad_norm": 1.484375, "learning_rate": 1.9850269000853522e-05, "loss": 0.7188, "step": 1050 }, { "epoch": 0.18197164809003355, "grad_norm": 1.5, "learning_rate": 1.984995440287739e-05, "loss": 0.5889, "step": 1051 }, { "epoch": 0.1821447895249432, "grad_norm": 1.5390625, "learning_rate": 1.9849639477247233e-05, "loss": 0.6552, "step": 1052 }, { "epoch": 0.18231793095985283, "grad_norm": 1.5625, "learning_rate": 1.9849324223973535e-05, "loss": 0.6153, "step": 1053 }, { "epoch": 0.18249107239476248, "grad_norm": 1.40625, "learning_rate": 1.9849008643066774e-05, "loss": 0.6291, "step": 1054 }, { "epoch": 0.1826642138296721, "grad_norm": 1.3984375, "learning_rate": 1.9848692734537455e-05, "loss": 0.6275, "step": 1055 }, { "epoch": 0.18283735526458175, "grad_norm": 1.375, "learning_rate": 1.984837649839608e-05, "loss": 0.5778, "step": 1056 }, { "epoch": 0.1830104966994914, "grad_norm": 1.484375, "learning_rate": 1.984805993465317e-05, "loss": 0.6427, "step": 1057 }, { "epoch": 0.18318363813440103, "grad_norm": 1.3515625, "learning_rate": 1.9847743043319258e-05, "loss": 0.5766, "step": 1058 }, { "epoch": 0.18335677956931068, "grad_norm": 1.3515625, "learning_rate": 1.9847425824404886e-05, "loss": 0.5991, "step": 1059 }, { "epoch": 0.18352992100422033, "grad_norm": 1.421875, "learning_rate": 1.9847108277920604e-05, "loss": 0.6314, "step": 1060 }, { "epoch": 0.18370306243912995, "grad_norm": 1.5234375, "learning_rate": 1.9846790403876975e-05, "loss": 0.6336, "step": 1061 }, { "epoch": 0.1838762038740396, "grad_norm": 1.4296875, "learning_rate": 1.9846472202284574e-05, "loss": 0.6121, "step": 1062 }, { "epoch": 0.18404934530894926, "grad_norm": 1.421875, "learning_rate": 1.984615367315398e-05, "loss": 0.692, "step": 1063 }, { "epoch": 0.18422248674385888, "grad_norm": 1.40625, "learning_rate": 1.9845834816495795e-05, "loss": 0.6205, "step": 1064 }, { "epoch": 0.18439562817876853, "grad_norm": 1.4375, "learning_rate": 1.9845515632320628e-05, "loss": 0.5661, "step": 1065 }, { "epoch": 0.18456876961367819, "grad_norm": 1.5703125, "learning_rate": 1.984519612063909e-05, "loss": 0.6416, "step": 1066 }, { "epoch": 0.1847419110485878, "grad_norm": 1.5078125, "learning_rate": 1.9844876281461816e-05, "loss": 0.5936, "step": 1067 }, { "epoch": 0.18491505248349746, "grad_norm": 1.4609375, "learning_rate": 1.984455611479944e-05, "loss": 0.6078, "step": 1068 }, { "epoch": 0.1850881939184071, "grad_norm": 1.5, "learning_rate": 1.9844235620662607e-05, "loss": 0.5427, "step": 1069 }, { "epoch": 0.18526133535331674, "grad_norm": 1.453125, "learning_rate": 1.9843914799061995e-05, "loss": 0.6804, "step": 1070 }, { "epoch": 0.1854344767882264, "grad_norm": 1.4140625, "learning_rate": 1.9843593650008257e-05, "loss": 0.5526, "step": 1071 }, { "epoch": 0.185607618223136, "grad_norm": 1.3828125, "learning_rate": 1.984327217351209e-05, "loss": 0.5559, "step": 1072 }, { "epoch": 0.18578075965804566, "grad_norm": 1.5546875, "learning_rate": 1.984295036958418e-05, "loss": 0.6158, "step": 1073 }, { "epoch": 0.18595390109295531, "grad_norm": 1.3984375, "learning_rate": 1.9842628238235235e-05, "loss": 0.6225, "step": 1074 }, { "epoch": 0.18612704252786494, "grad_norm": 1.5078125, "learning_rate": 1.984230577947597e-05, "loss": 0.6678, "step": 1075 }, { "epoch": 0.1863001839627746, "grad_norm": 1.4453125, "learning_rate": 1.984198299331711e-05, "loss": 0.5624, "step": 1076 }, { "epoch": 0.18647332539768424, "grad_norm": 1.5078125, "learning_rate": 1.9841659879769396e-05, "loss": 0.5741, "step": 1077 }, { "epoch": 0.18664646683259387, "grad_norm": 1.515625, "learning_rate": 1.984133643884357e-05, "loss": 0.6827, "step": 1078 }, { "epoch": 0.18681960826750352, "grad_norm": 1.4453125, "learning_rate": 1.9841012670550395e-05, "loss": 0.5688, "step": 1079 }, { "epoch": 0.18699274970241317, "grad_norm": 1.5234375, "learning_rate": 1.9840688574900643e-05, "loss": 0.6128, "step": 1080 }, { "epoch": 0.1871658911373228, "grad_norm": 1.390625, "learning_rate": 1.984036415190509e-05, "loss": 0.577, "step": 1081 }, { "epoch": 0.18733903257223244, "grad_norm": 1.3671875, "learning_rate": 1.984003940157453e-05, "loss": 0.5799, "step": 1082 }, { "epoch": 0.1875121740071421, "grad_norm": 1.703125, "learning_rate": 1.9839714323919765e-05, "loss": 0.7308, "step": 1083 }, { "epoch": 0.18768531544205172, "grad_norm": 1.3671875, "learning_rate": 1.9839388918951614e-05, "loss": 0.577, "step": 1084 }, { "epoch": 0.18785845687696137, "grad_norm": 1.3515625, "learning_rate": 1.9839063186680895e-05, "loss": 0.569, "step": 1085 }, { "epoch": 0.18803159831187102, "grad_norm": 1.4140625, "learning_rate": 1.9838737127118444e-05, "loss": 0.6221, "step": 1086 }, { "epoch": 0.18820473974678065, "grad_norm": 1.390625, "learning_rate": 1.983841074027511e-05, "loss": 0.567, "step": 1087 }, { "epoch": 0.1883778811816903, "grad_norm": 1.3984375, "learning_rate": 1.9838084026161746e-05, "loss": 0.5712, "step": 1088 }, { "epoch": 0.18855102261659992, "grad_norm": 1.4453125, "learning_rate": 1.9837756984789224e-05, "loss": 0.6368, "step": 1089 }, { "epoch": 0.18872416405150957, "grad_norm": 1.4921875, "learning_rate": 1.9837429616168426e-05, "loss": 0.6079, "step": 1090 }, { "epoch": 0.18889730548641923, "grad_norm": 1.5, "learning_rate": 1.983710192031023e-05, "loss": 0.6545, "step": 1091 }, { "epoch": 0.18907044692132885, "grad_norm": 1.515625, "learning_rate": 1.983677389722555e-05, "loss": 0.6606, "step": 1092 }, { "epoch": 0.1892435883562385, "grad_norm": 1.390625, "learning_rate": 1.9836445546925286e-05, "loss": 0.6766, "step": 1093 }, { "epoch": 0.18941672979114815, "grad_norm": 1.609375, "learning_rate": 1.983611686942037e-05, "loss": 0.6817, "step": 1094 }, { "epoch": 0.18958987122605778, "grad_norm": 1.4296875, "learning_rate": 1.9835787864721728e-05, "loss": 0.6644, "step": 1095 }, { "epoch": 0.18976301266096743, "grad_norm": 1.390625, "learning_rate": 1.983545853284031e-05, "loss": 0.6389, "step": 1096 }, { "epoch": 0.18993615409587708, "grad_norm": 1.515625, "learning_rate": 1.9835128873787068e-05, "loss": 0.6657, "step": 1097 }, { "epoch": 0.1901092955307867, "grad_norm": 1.5546875, "learning_rate": 1.9834798887572967e-05, "loss": 0.7078, "step": 1098 }, { "epoch": 0.19028243696569636, "grad_norm": 1.5, "learning_rate": 1.9834468574208985e-05, "loss": 0.6395, "step": 1099 }, { "epoch": 0.190455578400606, "grad_norm": 1.375, "learning_rate": 1.983413793370611e-05, "loss": 0.5976, "step": 1100 }, { "epoch": 0.19062871983551563, "grad_norm": 1.5390625, "learning_rate": 1.9833806966075343e-05, "loss": 0.5791, "step": 1101 }, { "epoch": 0.19080186127042528, "grad_norm": 1.453125, "learning_rate": 1.9833475671327686e-05, "loss": 0.6546, "step": 1102 }, { "epoch": 0.19097500270533493, "grad_norm": 1.59375, "learning_rate": 1.983314404947417e-05, "loss": 0.6763, "step": 1103 }, { "epoch": 0.19114814414024456, "grad_norm": 1.4296875, "learning_rate": 1.9832812100525816e-05, "loss": 0.6191, "step": 1104 }, { "epoch": 0.1913212855751542, "grad_norm": 1.46875, "learning_rate": 1.9832479824493674e-05, "loss": 0.6146, "step": 1105 }, { "epoch": 0.19149442701006383, "grad_norm": 1.6484375, "learning_rate": 1.9832147221388793e-05, "loss": 0.6072, "step": 1106 }, { "epoch": 0.19166756844497349, "grad_norm": 1.3984375, "learning_rate": 1.9831814291222233e-05, "loss": 0.6351, "step": 1107 }, { "epoch": 0.19184070987988314, "grad_norm": 1.6640625, "learning_rate": 1.983148103400508e-05, "loss": 0.6745, "step": 1108 }, { "epoch": 0.19201385131479276, "grad_norm": 1.4921875, "learning_rate": 1.983114744974841e-05, "loss": 0.669, "step": 1109 }, { "epoch": 0.1921869927497024, "grad_norm": 1.375, "learning_rate": 1.9830813538463322e-05, "loss": 0.6296, "step": 1110 }, { "epoch": 0.19236013418461206, "grad_norm": 1.5, "learning_rate": 1.9830479300160923e-05, "loss": 0.6177, "step": 1111 }, { "epoch": 0.1925332756195217, "grad_norm": 1.5546875, "learning_rate": 1.9830144734852333e-05, "loss": 0.5572, "step": 1112 }, { "epoch": 0.19270641705443134, "grad_norm": 1.515625, "learning_rate": 1.9829809842548683e-05, "loss": 0.6151, "step": 1113 }, { "epoch": 0.192879558489341, "grad_norm": 1.546875, "learning_rate": 1.9829474623261106e-05, "loss": 0.613, "step": 1114 }, { "epoch": 0.19305269992425061, "grad_norm": 1.4609375, "learning_rate": 1.982913907700076e-05, "loss": 0.6603, "step": 1115 }, { "epoch": 0.19322584135916027, "grad_norm": 1.5, "learning_rate": 1.9828803203778804e-05, "loss": 0.6179, "step": 1116 }, { "epoch": 0.19339898279406992, "grad_norm": 1.5078125, "learning_rate": 1.982846700360641e-05, "loss": 0.7138, "step": 1117 }, { "epoch": 0.19357212422897954, "grad_norm": 1.4609375, "learning_rate": 1.9828130476494762e-05, "loss": 0.6192, "step": 1118 }, { "epoch": 0.1937452656638892, "grad_norm": 1.4296875, "learning_rate": 1.9827793622455058e-05, "loss": 0.7952, "step": 1119 }, { "epoch": 0.19391840709879885, "grad_norm": 1.453125, "learning_rate": 1.9827456441498498e-05, "loss": 0.6725, "step": 1120 }, { "epoch": 0.19409154853370847, "grad_norm": 1.4609375, "learning_rate": 1.9827118933636297e-05, "loss": 0.5929, "step": 1121 }, { "epoch": 0.19426468996861812, "grad_norm": 1.46875, "learning_rate": 1.9826781098879685e-05, "loss": 0.6627, "step": 1122 }, { "epoch": 0.19443783140352774, "grad_norm": 1.484375, "learning_rate": 1.98264429372399e-05, "loss": 0.6365, "step": 1123 }, { "epoch": 0.1946109728384374, "grad_norm": 1.3515625, "learning_rate": 1.98261044487282e-05, "loss": 0.5652, "step": 1124 }, { "epoch": 0.19478411427334705, "grad_norm": 1.2890625, "learning_rate": 1.9825765633355825e-05, "loss": 0.584, "step": 1125 }, { "epoch": 0.19495725570825667, "grad_norm": 1.4140625, "learning_rate": 1.982542649113406e-05, "loss": 0.6209, "step": 1126 }, { "epoch": 0.19513039714316632, "grad_norm": 1.3046875, "learning_rate": 1.9825087022074182e-05, "loss": 0.62, "step": 1127 }, { "epoch": 0.19530353857807597, "grad_norm": 1.5546875, "learning_rate": 1.9824747226187482e-05, "loss": 0.5481, "step": 1128 }, { "epoch": 0.1954766800129856, "grad_norm": 1.609375, "learning_rate": 1.982440710348527e-05, "loss": 0.6056, "step": 1129 }, { "epoch": 0.19564982144789525, "grad_norm": 1.4453125, "learning_rate": 1.982406665397885e-05, "loss": 0.5984, "step": 1130 }, { "epoch": 0.1958229628828049, "grad_norm": 1.4140625, "learning_rate": 1.9823725877679554e-05, "loss": 0.6016, "step": 1131 }, { "epoch": 0.19599610431771453, "grad_norm": 1.4296875, "learning_rate": 1.9823384774598715e-05, "loss": 0.6232, "step": 1132 }, { "epoch": 0.19616924575262418, "grad_norm": 1.2578125, "learning_rate": 1.9823043344747682e-05, "loss": 0.6132, "step": 1133 }, { "epoch": 0.19634238718753383, "grad_norm": 1.4453125, "learning_rate": 1.9822701588137807e-05, "loss": 0.585, "step": 1134 }, { "epoch": 0.19651552862244345, "grad_norm": 1.484375, "learning_rate": 1.9822359504780465e-05, "loss": 0.6683, "step": 1135 }, { "epoch": 0.1966886700573531, "grad_norm": 1.46875, "learning_rate": 1.982201709468703e-05, "loss": 0.5844, "step": 1136 }, { "epoch": 0.19686181149226276, "grad_norm": 1.5, "learning_rate": 1.9821674357868895e-05, "loss": 0.614, "step": 1137 }, { "epoch": 0.19703495292717238, "grad_norm": 1.515625, "learning_rate": 1.9821331294337462e-05, "loss": 0.624, "step": 1138 }, { "epoch": 0.19720809436208203, "grad_norm": 1.375, "learning_rate": 1.982098790410414e-05, "loss": 0.6435, "step": 1139 }, { "epoch": 0.19738123579699166, "grad_norm": 1.328125, "learning_rate": 1.9820644187180354e-05, "loss": 0.6049, "step": 1140 }, { "epoch": 0.1975543772319013, "grad_norm": 1.578125, "learning_rate": 1.9820300143577533e-05, "loss": 0.6021, "step": 1141 }, { "epoch": 0.19772751866681096, "grad_norm": 1.59375, "learning_rate": 1.9819955773307125e-05, "loss": 0.5834, "step": 1142 }, { "epoch": 0.19790066010172058, "grad_norm": 1.328125, "learning_rate": 1.9819611076380592e-05, "loss": 0.562, "step": 1143 }, { "epoch": 0.19807380153663023, "grad_norm": 1.375, "learning_rate": 1.981926605280939e-05, "loss": 0.5907, "step": 1144 }, { "epoch": 0.19824694297153989, "grad_norm": 1.578125, "learning_rate": 1.9818920702604996e-05, "loss": 0.6454, "step": 1145 }, { "epoch": 0.1984200844064495, "grad_norm": 1.40625, "learning_rate": 1.9818575025778904e-05, "loss": 0.595, "step": 1146 }, { "epoch": 0.19859322584135916, "grad_norm": 1.4921875, "learning_rate": 1.981822902234261e-05, "loss": 0.6837, "step": 1147 }, { "epoch": 0.1987663672762688, "grad_norm": 1.3359375, "learning_rate": 1.9817882692307627e-05, "loss": 0.5445, "step": 1148 }, { "epoch": 0.19893950871117844, "grad_norm": 1.453125, "learning_rate": 1.9817536035685467e-05, "loss": 0.6453, "step": 1149 }, { "epoch": 0.1991126501460881, "grad_norm": 1.390625, "learning_rate": 1.9817189052487668e-05, "loss": 0.7356, "step": 1150 }, { "epoch": 0.19928579158099774, "grad_norm": 1.4765625, "learning_rate": 1.981684174272577e-05, "loss": 0.7074, "step": 1151 }, { "epoch": 0.19945893301590736, "grad_norm": 1.359375, "learning_rate": 1.9816494106411332e-05, "loss": 0.5212, "step": 1152 }, { "epoch": 0.19963207445081702, "grad_norm": 1.375, "learning_rate": 1.981614614355591e-05, "loss": 0.6123, "step": 1153 }, { "epoch": 0.19980521588572664, "grad_norm": 1.4921875, "learning_rate": 1.981579785417108e-05, "loss": 0.5819, "step": 1154 }, { "epoch": 0.1999783573206363, "grad_norm": 1.46875, "learning_rate": 1.981544923826843e-05, "loss": 0.5796, "step": 1155 }, { "epoch": 0.20015149875554594, "grad_norm": 1.40625, "learning_rate": 1.9815100295859558e-05, "loss": 0.6221, "step": 1156 }, { "epoch": 0.20032464019045557, "grad_norm": 1.4375, "learning_rate": 1.981475102695607e-05, "loss": 0.6039, "step": 1157 }, { "epoch": 0.20049778162536522, "grad_norm": 1.453125, "learning_rate": 1.9814401431569582e-05, "loss": 0.6151, "step": 1158 }, { "epoch": 0.20067092306027487, "grad_norm": 1.4921875, "learning_rate": 1.9814051509711722e-05, "loss": 0.6225, "step": 1159 }, { "epoch": 0.2008440644951845, "grad_norm": 1.4296875, "learning_rate": 1.9813701261394136e-05, "loss": 0.6379, "step": 1160 }, { "epoch": 0.20101720593009414, "grad_norm": 1.4453125, "learning_rate": 1.981335068662847e-05, "loss": 0.5893, "step": 1161 }, { "epoch": 0.2011903473650038, "grad_norm": 1.6484375, "learning_rate": 1.981299978542639e-05, "loss": 0.6733, "step": 1162 }, { "epoch": 0.20136348879991342, "grad_norm": 1.453125, "learning_rate": 1.9812648557799563e-05, "loss": 0.5735, "step": 1163 }, { "epoch": 0.20153663023482307, "grad_norm": 1.53125, "learning_rate": 1.9812297003759676e-05, "loss": 0.6073, "step": 1164 }, { "epoch": 0.20170977166973272, "grad_norm": 1.3671875, "learning_rate": 1.9811945123318425e-05, "loss": 0.5745, "step": 1165 }, { "epoch": 0.20188291310464235, "grad_norm": 1.3125, "learning_rate": 1.981159291648751e-05, "loss": 0.5913, "step": 1166 }, { "epoch": 0.202056054539552, "grad_norm": 1.4375, "learning_rate": 1.9811240383278648e-05, "loss": 0.5755, "step": 1167 }, { "epoch": 0.20222919597446165, "grad_norm": 1.390625, "learning_rate": 1.9810887523703573e-05, "loss": 0.6244, "step": 1168 }, { "epoch": 0.20240233740937127, "grad_norm": 1.515625, "learning_rate": 1.9810534337774014e-05, "loss": 0.594, "step": 1169 }, { "epoch": 0.20257547884428093, "grad_norm": 1.4921875, "learning_rate": 1.9810180825501723e-05, "loss": 0.5707, "step": 1170 }, { "epoch": 0.20274862027919055, "grad_norm": 1.6875, "learning_rate": 1.980982698689846e-05, "loss": 0.6415, "step": 1171 }, { "epoch": 0.2029217617141002, "grad_norm": 1.453125, "learning_rate": 1.9809472821975992e-05, "loss": 0.627, "step": 1172 }, { "epoch": 0.20309490314900985, "grad_norm": 1.5234375, "learning_rate": 1.9809118330746104e-05, "loss": 0.6051, "step": 1173 }, { "epoch": 0.20326804458391948, "grad_norm": 1.4921875, "learning_rate": 1.9808763513220584e-05, "loss": 0.6017, "step": 1174 }, { "epoch": 0.20344118601882913, "grad_norm": 1.5625, "learning_rate": 1.980840836941124e-05, "loss": 0.5957, "step": 1175 }, { "epoch": 0.20361432745373878, "grad_norm": 1.3828125, "learning_rate": 1.9808052899329886e-05, "loss": 0.5996, "step": 1176 }, { "epoch": 0.2037874688886484, "grad_norm": 1.6171875, "learning_rate": 1.9807697102988343e-05, "loss": 0.7468, "step": 1177 }, { "epoch": 0.20396061032355806, "grad_norm": 1.4296875, "learning_rate": 1.9807340980398443e-05, "loss": 0.6328, "step": 1178 }, { "epoch": 0.2041337517584677, "grad_norm": 1.4453125, "learning_rate": 1.9806984531572038e-05, "loss": 0.5637, "step": 1179 }, { "epoch": 0.20430689319337733, "grad_norm": 1.46875, "learning_rate": 1.9806627756520983e-05, "loss": 0.5649, "step": 1180 }, { "epoch": 0.20448003462828698, "grad_norm": 1.546875, "learning_rate": 1.9806270655257148e-05, "loss": 0.5705, "step": 1181 }, { "epoch": 0.20465317606319663, "grad_norm": 1.5390625, "learning_rate": 1.980591322779241e-05, "loss": 0.6398, "step": 1182 }, { "epoch": 0.20482631749810626, "grad_norm": 1.5703125, "learning_rate": 1.9805555474138658e-05, "loss": 0.6556, "step": 1183 }, { "epoch": 0.2049994589330159, "grad_norm": 1.484375, "learning_rate": 1.9805197394307796e-05, "loss": 0.6428, "step": 1184 }, { "epoch": 0.20517260036792556, "grad_norm": 1.390625, "learning_rate": 1.980483898831173e-05, "loss": 0.6097, "step": 1185 }, { "epoch": 0.20534574180283519, "grad_norm": 1.609375, "learning_rate": 1.9804480256162382e-05, "loss": 0.5577, "step": 1186 }, { "epoch": 0.20551888323774484, "grad_norm": 1.5234375, "learning_rate": 1.980412119787169e-05, "loss": 0.7137, "step": 1187 }, { "epoch": 0.20569202467265446, "grad_norm": 1.515625, "learning_rate": 1.98037618134516e-05, "loss": 0.6114, "step": 1188 }, { "epoch": 0.2058651661075641, "grad_norm": 1.5234375, "learning_rate": 1.980340210291406e-05, "loss": 0.6043, "step": 1189 }, { "epoch": 0.20603830754247376, "grad_norm": 1.5703125, "learning_rate": 1.9803042066271036e-05, "loss": 0.7096, "step": 1190 }, { "epoch": 0.2062114489773834, "grad_norm": 1.546875, "learning_rate": 1.9802681703534508e-05, "loss": 0.5911, "step": 1191 }, { "epoch": 0.20638459041229304, "grad_norm": 1.5234375, "learning_rate": 1.9802321014716465e-05, "loss": 0.6805, "step": 1192 }, { "epoch": 0.2065577318472027, "grad_norm": 1.453125, "learning_rate": 1.9801959999828898e-05, "loss": 0.6544, "step": 1193 }, { "epoch": 0.20673087328211232, "grad_norm": 1.59375, "learning_rate": 1.980159865888382e-05, "loss": 0.6598, "step": 1194 }, { "epoch": 0.20690401471702197, "grad_norm": 1.546875, "learning_rate": 1.980123699189325e-05, "loss": 0.6889, "step": 1195 }, { "epoch": 0.20707715615193162, "grad_norm": 1.2734375, "learning_rate": 1.9800874998869225e-05, "loss": 0.5918, "step": 1196 }, { "epoch": 0.20725029758684124, "grad_norm": 1.609375, "learning_rate": 1.9800512679823775e-05, "loss": 0.5971, "step": 1197 }, { "epoch": 0.2074234390217509, "grad_norm": 1.3671875, "learning_rate": 1.980015003476896e-05, "loss": 0.621, "step": 1198 }, { "epoch": 0.20759658045666055, "grad_norm": 1.484375, "learning_rate": 1.9799787063716845e-05, "loss": 0.6208, "step": 1199 }, { "epoch": 0.20776972189157017, "grad_norm": 1.5234375, "learning_rate": 1.9799423766679497e-05, "loss": 0.5849, "step": 1200 }, { "epoch": 0.20794286332647982, "grad_norm": 1.4609375, "learning_rate": 1.9799060143669008e-05, "loss": 0.655, "step": 1201 }, { "epoch": 0.20811600476138947, "grad_norm": 1.40625, "learning_rate": 1.9798696194697465e-05, "loss": 0.6329, "step": 1202 }, { "epoch": 0.2082891461962991, "grad_norm": 1.4921875, "learning_rate": 1.9798331919776985e-05, "loss": 0.6975, "step": 1203 }, { "epoch": 0.20846228763120875, "grad_norm": 1.53125, "learning_rate": 1.979796731891968e-05, "loss": 0.6144, "step": 1204 }, { "epoch": 0.20863542906611837, "grad_norm": 1.5, "learning_rate": 1.9797602392137678e-05, "loss": 0.6551, "step": 1205 }, { "epoch": 0.20880857050102802, "grad_norm": 1.4921875, "learning_rate": 1.9797237139443116e-05, "loss": 0.6723, "step": 1206 }, { "epoch": 0.20898171193593768, "grad_norm": 1.40625, "learning_rate": 1.9796871560848152e-05, "loss": 0.5614, "step": 1207 }, { "epoch": 0.2091548533708473, "grad_norm": 1.484375, "learning_rate": 1.9796505656364936e-05, "loss": 0.6153, "step": 1208 }, { "epoch": 0.20932799480575695, "grad_norm": 1.4609375, "learning_rate": 1.9796139426005648e-05, "loss": 0.5649, "step": 1209 }, { "epoch": 0.2095011362406666, "grad_norm": 1.546875, "learning_rate": 1.979577286978247e-05, "loss": 0.5658, "step": 1210 }, { "epoch": 0.20967427767557623, "grad_norm": 1.375, "learning_rate": 1.979540598770759e-05, "loss": 0.6047, "step": 1211 }, { "epoch": 0.20984741911048588, "grad_norm": 1.546875, "learning_rate": 1.9795038779793215e-05, "loss": 0.6991, "step": 1212 }, { "epoch": 0.21002056054539553, "grad_norm": 1.4765625, "learning_rate": 1.979467124605156e-05, "loss": 0.594, "step": 1213 }, { "epoch": 0.21019370198030515, "grad_norm": 1.4375, "learning_rate": 1.979430338649485e-05, "loss": 0.5965, "step": 1214 }, { "epoch": 0.2103668434152148, "grad_norm": 1.40625, "learning_rate": 1.979393520113533e-05, "loss": 0.6495, "step": 1215 }, { "epoch": 0.21053998485012446, "grad_norm": 1.5625, "learning_rate": 1.979356668998523e-05, "loss": 0.6611, "step": 1216 }, { "epoch": 0.21071312628503408, "grad_norm": 1.4765625, "learning_rate": 1.9793197853056826e-05, "loss": 0.5912, "step": 1217 }, { "epoch": 0.21088626771994373, "grad_norm": 1.5, "learning_rate": 1.9792828690362377e-05, "loss": 0.6763, "step": 1218 }, { "epoch": 0.21105940915485338, "grad_norm": 1.5234375, "learning_rate": 1.9792459201914164e-05, "loss": 0.6363, "step": 1219 }, { "epoch": 0.211232550589763, "grad_norm": 1.734375, "learning_rate": 1.979208938772448e-05, "loss": 0.5882, "step": 1220 }, { "epoch": 0.21140569202467266, "grad_norm": 1.578125, "learning_rate": 1.979171924780563e-05, "loss": 0.706, "step": 1221 }, { "epoch": 0.21157883345958228, "grad_norm": 1.296875, "learning_rate": 1.9791348782169918e-05, "loss": 0.5389, "step": 1222 }, { "epoch": 0.21175197489449193, "grad_norm": 1.3046875, "learning_rate": 1.9790977990829673e-05, "loss": 0.5881, "step": 1223 }, { "epoch": 0.21192511632940159, "grad_norm": 1.2890625, "learning_rate": 1.979060687379723e-05, "loss": 0.5737, "step": 1224 }, { "epoch": 0.2120982577643112, "grad_norm": 1.328125, "learning_rate": 1.979023543108493e-05, "loss": 0.6115, "step": 1225 }, { "epoch": 0.21227139919922086, "grad_norm": 1.2734375, "learning_rate": 1.9789863662705134e-05, "loss": 0.5919, "step": 1226 }, { "epoch": 0.2124445406341305, "grad_norm": 1.4296875, "learning_rate": 1.9789491568670205e-05, "loss": 0.5503, "step": 1227 }, { "epoch": 0.21261768206904014, "grad_norm": 1.5078125, "learning_rate": 1.978911914899252e-05, "loss": 0.6606, "step": 1228 }, { "epoch": 0.2127908235039498, "grad_norm": 1.4453125, "learning_rate": 1.9788746403684473e-05, "loss": 0.5934, "step": 1229 }, { "epoch": 0.21296396493885944, "grad_norm": 1.375, "learning_rate": 1.9788373332758453e-05, "loss": 0.6773, "step": 1230 }, { "epoch": 0.21313710637376906, "grad_norm": 1.3046875, "learning_rate": 1.9787999936226877e-05, "loss": 0.5696, "step": 1231 }, { "epoch": 0.21331024780867872, "grad_norm": 1.484375, "learning_rate": 1.9787626214102168e-05, "loss": 0.6324, "step": 1232 }, { "epoch": 0.21348338924358837, "grad_norm": 1.4921875, "learning_rate": 1.9787252166396754e-05, "loss": 0.6311, "step": 1233 }, { "epoch": 0.213656530678498, "grad_norm": 1.515625, "learning_rate": 1.9786877793123077e-05, "loss": 0.6692, "step": 1234 }, { "epoch": 0.21382967211340764, "grad_norm": 1.359375, "learning_rate": 1.9786503094293594e-05, "loss": 0.6469, "step": 1235 }, { "epoch": 0.2140028135483173, "grad_norm": 1.3984375, "learning_rate": 1.9786128069920764e-05, "loss": 0.5803, "step": 1236 }, { "epoch": 0.21417595498322692, "grad_norm": 1.3671875, "learning_rate": 1.9785752720017065e-05, "loss": 0.658, "step": 1237 }, { "epoch": 0.21434909641813657, "grad_norm": 1.3984375, "learning_rate": 1.9785377044594983e-05, "loss": 0.6331, "step": 1238 }, { "epoch": 0.2145222378530462, "grad_norm": 1.375, "learning_rate": 1.9785001043667012e-05, "loss": 0.6826, "step": 1239 }, { "epoch": 0.21469537928795585, "grad_norm": 1.3984375, "learning_rate": 1.9784624717245666e-05, "loss": 0.6469, "step": 1240 }, { "epoch": 0.2148685207228655, "grad_norm": 1.40625, "learning_rate": 1.9784248065343456e-05, "loss": 0.6151, "step": 1241 }, { "epoch": 0.21504166215777512, "grad_norm": 1.4453125, "learning_rate": 1.9783871087972916e-05, "loss": 0.6137, "step": 1242 }, { "epoch": 0.21521480359268477, "grad_norm": 1.546875, "learning_rate": 1.978349378514658e-05, "loss": 0.6767, "step": 1243 }, { "epoch": 0.21538794502759442, "grad_norm": 1.3671875, "learning_rate": 1.9783116156877008e-05, "loss": 0.562, "step": 1244 }, { "epoch": 0.21556108646250405, "grad_norm": 1.453125, "learning_rate": 1.9782738203176753e-05, "loss": 0.5947, "step": 1245 }, { "epoch": 0.2157342278974137, "grad_norm": 1.4140625, "learning_rate": 1.9782359924058393e-05, "loss": 0.6425, "step": 1246 }, { "epoch": 0.21590736933232335, "grad_norm": 1.5390625, "learning_rate": 1.9781981319534505e-05, "loss": 0.5711, "step": 1247 }, { "epoch": 0.21608051076723298, "grad_norm": 1.4375, "learning_rate": 1.9781602389617693e-05, "loss": 0.6625, "step": 1248 }, { "epoch": 0.21625365220214263, "grad_norm": 1.5546875, "learning_rate": 1.978122313432055e-05, "loss": 0.6676, "step": 1249 }, { "epoch": 0.21642679363705228, "grad_norm": 1.375, "learning_rate": 1.9780843553655703e-05, "loss": 0.6073, "step": 1250 }, { "epoch": 0.2165999350719619, "grad_norm": 1.390625, "learning_rate": 1.9780463647635774e-05, "loss": 0.5975, "step": 1251 }, { "epoch": 0.21677307650687155, "grad_norm": 1.2578125, "learning_rate": 1.9780083416273394e-05, "loss": 0.5948, "step": 1252 }, { "epoch": 0.2169462179417812, "grad_norm": 1.3515625, "learning_rate": 1.9779702859581224e-05, "loss": 0.5779, "step": 1253 }, { "epoch": 0.21711935937669083, "grad_norm": 1.453125, "learning_rate": 1.977932197757191e-05, "loss": 0.6285, "step": 1254 }, { "epoch": 0.21729250081160048, "grad_norm": 1.4296875, "learning_rate": 1.9778940770258136e-05, "loss": 0.6308, "step": 1255 }, { "epoch": 0.2174656422465101, "grad_norm": 1.40625, "learning_rate": 1.9778559237652568e-05, "loss": 0.5803, "step": 1256 }, { "epoch": 0.21763878368141976, "grad_norm": 1.390625, "learning_rate": 1.9778177379767903e-05, "loss": 0.5584, "step": 1257 }, { "epoch": 0.2178119251163294, "grad_norm": 1.4765625, "learning_rate": 1.977779519661685e-05, "loss": 0.7132, "step": 1258 }, { "epoch": 0.21798506655123903, "grad_norm": 1.40625, "learning_rate": 1.9777412688212116e-05, "loss": 0.5789, "step": 1259 }, { "epoch": 0.21815820798614868, "grad_norm": 1.4375, "learning_rate": 1.977702985456642e-05, "loss": 0.6009, "step": 1260 }, { "epoch": 0.21833134942105833, "grad_norm": 1.421875, "learning_rate": 1.977664669569251e-05, "loss": 0.597, "step": 1261 }, { "epoch": 0.21850449085596796, "grad_norm": 1.4375, "learning_rate": 1.977626321160312e-05, "loss": 0.6488, "step": 1262 }, { "epoch": 0.2186776322908776, "grad_norm": 1.421875, "learning_rate": 1.9775879402311008e-05, "loss": 0.5906, "step": 1263 }, { "epoch": 0.21885077372578726, "grad_norm": 1.515625, "learning_rate": 1.977549526782895e-05, "loss": 0.6534, "step": 1264 }, { "epoch": 0.21902391516069689, "grad_norm": 1.2890625, "learning_rate": 1.9775110808169714e-05, "loss": 0.5195, "step": 1265 }, { "epoch": 0.21919705659560654, "grad_norm": 1.3046875, "learning_rate": 1.977472602334609e-05, "loss": 0.688, "step": 1266 }, { "epoch": 0.2193701980305162, "grad_norm": 1.3984375, "learning_rate": 1.9774340913370885e-05, "loss": 0.6191, "step": 1267 }, { "epoch": 0.2195433394654258, "grad_norm": 1.546875, "learning_rate": 1.97739554782569e-05, "loss": 0.6345, "step": 1268 }, { "epoch": 0.21971648090033546, "grad_norm": 1.421875, "learning_rate": 1.9773569718016966e-05, "loss": 0.6605, "step": 1269 }, { "epoch": 0.21988962233524512, "grad_norm": 1.484375, "learning_rate": 1.9773183632663907e-05, "loss": 0.6383, "step": 1270 }, { "epoch": 0.22006276377015474, "grad_norm": 1.5390625, "learning_rate": 1.9772797222210573e-05, "loss": 0.651, "step": 1271 }, { "epoch": 0.2202359052050644, "grad_norm": 1.3828125, "learning_rate": 1.9772410486669808e-05, "loss": 0.5859, "step": 1272 }, { "epoch": 0.22040904663997402, "grad_norm": 1.4453125, "learning_rate": 1.977202342605449e-05, "loss": 0.6406, "step": 1273 }, { "epoch": 0.22058218807488367, "grad_norm": 1.546875, "learning_rate": 1.977163604037748e-05, "loss": 0.6907, "step": 1274 }, { "epoch": 0.22075532950979332, "grad_norm": 1.265625, "learning_rate": 1.977124832965167e-05, "loss": 0.5571, "step": 1275 }, { "epoch": 0.22092847094470294, "grad_norm": 1.5546875, "learning_rate": 1.977086029388996e-05, "loss": 0.7221, "step": 1276 }, { "epoch": 0.2211016123796126, "grad_norm": 1.40625, "learning_rate": 1.977047193310526e-05, "loss": 0.5545, "step": 1277 }, { "epoch": 0.22127475381452225, "grad_norm": 1.3359375, "learning_rate": 1.9770083247310482e-05, "loss": 0.6318, "step": 1278 }, { "epoch": 0.22144789524943187, "grad_norm": 1.4375, "learning_rate": 1.9769694236518557e-05, "loss": 0.5893, "step": 1279 }, { "epoch": 0.22162103668434152, "grad_norm": 1.5, "learning_rate": 1.9769304900742427e-05, "loss": 0.731, "step": 1280 }, { "epoch": 0.22179417811925117, "grad_norm": 1.609375, "learning_rate": 1.9768915239995043e-05, "loss": 0.7291, "step": 1281 }, { "epoch": 0.2219673195541608, "grad_norm": 1.453125, "learning_rate": 1.9768525254289364e-05, "loss": 0.5673, "step": 1282 }, { "epoch": 0.22214046098907045, "grad_norm": 1.3671875, "learning_rate": 1.9768134943638367e-05, "loss": 0.5385, "step": 1283 }, { "epoch": 0.2223136024239801, "grad_norm": 1.3984375, "learning_rate": 1.976774430805503e-05, "loss": 0.63, "step": 1284 }, { "epoch": 0.22248674385888972, "grad_norm": 1.3203125, "learning_rate": 1.9767353347552358e-05, "loss": 0.5645, "step": 1285 }, { "epoch": 0.22265988529379938, "grad_norm": 1.484375, "learning_rate": 1.9766962062143343e-05, "loss": 0.5567, "step": 1286 }, { "epoch": 0.22283302672870903, "grad_norm": 1.3984375, "learning_rate": 1.976657045184101e-05, "loss": 0.5975, "step": 1287 }, { "epoch": 0.22300616816361865, "grad_norm": 1.40625, "learning_rate": 1.976617851665838e-05, "loss": 0.5858, "step": 1288 }, { "epoch": 0.2231793095985283, "grad_norm": 1.4296875, "learning_rate": 1.9765786256608494e-05, "loss": 0.72, "step": 1289 }, { "epoch": 0.22335245103343793, "grad_norm": 1.4609375, "learning_rate": 1.9765393671704398e-05, "loss": 0.6486, "step": 1290 }, { "epoch": 0.22352559246834758, "grad_norm": 1.4609375, "learning_rate": 1.976500076195915e-05, "loss": 0.6312, "step": 1291 }, { "epoch": 0.22369873390325723, "grad_norm": 1.3671875, "learning_rate": 1.9764607527385826e-05, "loss": 0.6531, "step": 1292 }, { "epoch": 0.22387187533816685, "grad_norm": 1.484375, "learning_rate": 1.9764213967997504e-05, "loss": 0.6421, "step": 1293 }, { "epoch": 0.2240450167730765, "grad_norm": 1.4296875, "learning_rate": 1.9763820083807276e-05, "loss": 0.6527, "step": 1294 }, { "epoch": 0.22421815820798616, "grad_norm": 1.375, "learning_rate": 1.9763425874828238e-05, "loss": 0.6314, "step": 1295 }, { "epoch": 0.22439129964289578, "grad_norm": 1.453125, "learning_rate": 1.9763031341073512e-05, "loss": 0.6544, "step": 1296 }, { "epoch": 0.22456444107780543, "grad_norm": 1.390625, "learning_rate": 1.9762636482556216e-05, "loss": 0.6135, "step": 1297 }, { "epoch": 0.22473758251271508, "grad_norm": 1.53125, "learning_rate": 1.9762241299289487e-05, "loss": 0.5945, "step": 1298 }, { "epoch": 0.2249107239476247, "grad_norm": 1.5390625, "learning_rate": 1.9761845791286474e-05, "loss": 0.6202, "step": 1299 }, { "epoch": 0.22508386538253436, "grad_norm": 1.3515625, "learning_rate": 1.9761449958560326e-05, "loss": 0.6149, "step": 1300 }, { "epoch": 0.225257006817444, "grad_norm": 1.4921875, "learning_rate": 1.9761053801124217e-05, "loss": 0.657, "step": 1301 }, { "epoch": 0.22543014825235363, "grad_norm": 1.546875, "learning_rate": 1.976065731899132e-05, "loss": 0.6953, "step": 1302 }, { "epoch": 0.2256032896872633, "grad_norm": 1.4296875, "learning_rate": 1.9760260512174828e-05, "loss": 0.5849, "step": 1303 }, { "epoch": 0.22577643112217294, "grad_norm": 1.390625, "learning_rate": 1.975986338068794e-05, "loss": 0.5987, "step": 1304 }, { "epoch": 0.22594957255708256, "grad_norm": 1.53125, "learning_rate": 1.9759465924543862e-05, "loss": 0.5467, "step": 1305 }, { "epoch": 0.2261227139919922, "grad_norm": 1.4296875, "learning_rate": 1.9759068143755814e-05, "loss": 0.616, "step": 1306 }, { "epoch": 0.22629585542690184, "grad_norm": 1.3515625, "learning_rate": 1.975867003833704e-05, "loss": 0.6433, "step": 1307 }, { "epoch": 0.2264689968618115, "grad_norm": 1.625, "learning_rate": 1.975827160830077e-05, "loss": 0.6051, "step": 1308 }, { "epoch": 0.22664213829672114, "grad_norm": 1.5, "learning_rate": 1.9757872853660265e-05, "loss": 0.6437, "step": 1309 }, { "epoch": 0.22681527973163076, "grad_norm": 1.4765625, "learning_rate": 1.9757473774428784e-05, "loss": 0.5753, "step": 1310 }, { "epoch": 0.22698842116654042, "grad_norm": 1.328125, "learning_rate": 1.9757074370619608e-05, "loss": 0.5862, "step": 1311 }, { "epoch": 0.22716156260145007, "grad_norm": 1.390625, "learning_rate": 1.975667464224602e-05, "loss": 0.6145, "step": 1312 }, { "epoch": 0.2273347040363597, "grad_norm": 1.484375, "learning_rate": 1.9756274589321318e-05, "loss": 0.6761, "step": 1313 }, { "epoch": 0.22750784547126934, "grad_norm": 1.3515625, "learning_rate": 1.9755874211858804e-05, "loss": 0.6029, "step": 1314 }, { "epoch": 0.227680986906179, "grad_norm": 1.546875, "learning_rate": 1.9755473509871805e-05, "loss": 0.5774, "step": 1315 }, { "epoch": 0.22785412834108862, "grad_norm": 1.3984375, "learning_rate": 1.9755072483373646e-05, "loss": 0.6114, "step": 1316 }, { "epoch": 0.22802726977599827, "grad_norm": 1.4296875, "learning_rate": 1.9754671132377663e-05, "loss": 0.5978, "step": 1317 }, { "epoch": 0.22820041121090792, "grad_norm": 1.5703125, "learning_rate": 1.9754269456897212e-05, "loss": 0.7226, "step": 1318 }, { "epoch": 0.22837355264581755, "grad_norm": 1.3515625, "learning_rate": 1.9753867456945653e-05, "loss": 0.6312, "step": 1319 }, { "epoch": 0.2285466940807272, "grad_norm": 1.375, "learning_rate": 1.975346513253636e-05, "loss": 0.6018, "step": 1320 }, { "epoch": 0.22871983551563685, "grad_norm": 1.515625, "learning_rate": 1.9753062483682713e-05, "loss": 0.6407, "step": 1321 }, { "epoch": 0.22889297695054647, "grad_norm": 1.4140625, "learning_rate": 1.975265951039811e-05, "loss": 0.6687, "step": 1322 }, { "epoch": 0.22906611838545612, "grad_norm": 1.390625, "learning_rate": 1.975225621269595e-05, "loss": 0.654, "step": 1323 }, { "epoch": 0.22923925982036575, "grad_norm": 1.390625, "learning_rate": 1.9751852590589652e-05, "loss": 0.5317, "step": 1324 }, { "epoch": 0.2294124012552754, "grad_norm": 1.4140625, "learning_rate": 1.9751448644092645e-05, "loss": 0.668, "step": 1325 }, { "epoch": 0.22958554269018505, "grad_norm": 1.4609375, "learning_rate": 1.975104437321836e-05, "loss": 0.6335, "step": 1326 }, { "epoch": 0.22975868412509468, "grad_norm": 1.3671875, "learning_rate": 1.975063977798025e-05, "loss": 0.5908, "step": 1327 }, { "epoch": 0.22993182556000433, "grad_norm": 1.4296875, "learning_rate": 1.9750234858391773e-05, "loss": 0.6399, "step": 1328 }, { "epoch": 0.23010496699491398, "grad_norm": 1.390625, "learning_rate": 1.9749829614466395e-05, "loss": 0.5899, "step": 1329 }, { "epoch": 0.2302781084298236, "grad_norm": 1.4921875, "learning_rate": 1.9749424046217597e-05, "loss": 0.5404, "step": 1330 }, { "epoch": 0.23045124986473325, "grad_norm": 1.375, "learning_rate": 1.9749018153658874e-05, "loss": 0.5703, "step": 1331 }, { "epoch": 0.2306243912996429, "grad_norm": 1.4609375, "learning_rate": 1.9748611936803724e-05, "loss": 0.6465, "step": 1332 }, { "epoch": 0.23079753273455253, "grad_norm": 1.515625, "learning_rate": 1.974820539566566e-05, "loss": 0.6647, "step": 1333 }, { "epoch": 0.23097067416946218, "grad_norm": 1.625, "learning_rate": 1.9747798530258213e-05, "loss": 0.6662, "step": 1334 }, { "epoch": 0.23114381560437183, "grad_norm": 1.4609375, "learning_rate": 1.97473913405949e-05, "loss": 0.6127, "step": 1335 }, { "epoch": 0.23131695703928146, "grad_norm": 1.4609375, "learning_rate": 1.9746983826689282e-05, "loss": 0.5821, "step": 1336 }, { "epoch": 0.2314900984741911, "grad_norm": 1.4921875, "learning_rate": 1.9746575988554907e-05, "loss": 0.6287, "step": 1337 }, { "epoch": 0.23166323990910076, "grad_norm": 1.3984375, "learning_rate": 1.9746167826205346e-05, "loss": 0.5847, "step": 1338 }, { "epoch": 0.23183638134401038, "grad_norm": 1.546875, "learning_rate": 1.9745759339654172e-05, "loss": 0.6706, "step": 1339 }, { "epoch": 0.23200952277892004, "grad_norm": 1.4140625, "learning_rate": 1.974535052891498e-05, "loss": 0.6635, "step": 1340 }, { "epoch": 0.23218266421382966, "grad_norm": 1.578125, "learning_rate": 1.974494139400136e-05, "loss": 0.6638, "step": 1341 }, { "epoch": 0.2323558056487393, "grad_norm": 1.5, "learning_rate": 1.9744531934926926e-05, "loss": 0.7005, "step": 1342 }, { "epoch": 0.23252894708364896, "grad_norm": 1.546875, "learning_rate": 1.9744122151705295e-05, "loss": 0.6667, "step": 1343 }, { "epoch": 0.2327020885185586, "grad_norm": 1.34375, "learning_rate": 1.9743712044350105e-05, "loss": 0.5333, "step": 1344 }, { "epoch": 0.23287522995346824, "grad_norm": 1.4296875, "learning_rate": 1.9743301612874996e-05, "loss": 0.6169, "step": 1345 }, { "epoch": 0.2330483713883779, "grad_norm": 1.4296875, "learning_rate": 1.9742890857293616e-05, "loss": 0.6601, "step": 1346 }, { "epoch": 0.2332215128232875, "grad_norm": 1.3828125, "learning_rate": 1.9742479777619633e-05, "loss": 0.7319, "step": 1347 }, { "epoch": 0.23339465425819717, "grad_norm": 1.4453125, "learning_rate": 1.974206837386672e-05, "loss": 0.6775, "step": 1348 }, { "epoch": 0.23356779569310682, "grad_norm": 1.6328125, "learning_rate": 1.9741656646048564e-05, "loss": 0.6333, "step": 1349 }, { "epoch": 0.23374093712801644, "grad_norm": 1.546875, "learning_rate": 1.9741244594178858e-05, "loss": 0.6242, "step": 1350 }, { "epoch": 0.2339140785629261, "grad_norm": 1.40625, "learning_rate": 1.9740832218271307e-05, "loss": 0.5859, "step": 1351 }, { "epoch": 0.23408721999783574, "grad_norm": 1.5, "learning_rate": 1.9740419518339634e-05, "loss": 0.6357, "step": 1352 }, { "epoch": 0.23426036143274537, "grad_norm": 1.46875, "learning_rate": 1.9740006494397567e-05, "loss": 0.5636, "step": 1353 }, { "epoch": 0.23443350286765502, "grad_norm": 1.46875, "learning_rate": 1.973959314645884e-05, "loss": 0.6669, "step": 1354 }, { "epoch": 0.23460664430256467, "grad_norm": 1.5234375, "learning_rate": 1.9739179474537207e-05, "loss": 0.6149, "step": 1355 }, { "epoch": 0.2347797857374743, "grad_norm": 1.375, "learning_rate": 1.9738765478646425e-05, "loss": 0.6194, "step": 1356 }, { "epoch": 0.23495292717238395, "grad_norm": 1.5546875, "learning_rate": 1.973835115880027e-05, "loss": 0.6519, "step": 1357 }, { "epoch": 0.23512606860729357, "grad_norm": 1.4140625, "learning_rate": 1.973793651501252e-05, "loss": 0.6441, "step": 1358 }, { "epoch": 0.23529921004220322, "grad_norm": 1.5703125, "learning_rate": 1.9737521547296972e-05, "loss": 0.6606, "step": 1359 }, { "epoch": 0.23547235147711287, "grad_norm": 1.4609375, "learning_rate": 1.9737106255667426e-05, "loss": 0.5865, "step": 1360 }, { "epoch": 0.2356454929120225, "grad_norm": 1.3984375, "learning_rate": 1.9736690640137696e-05, "loss": 0.5733, "step": 1361 }, { "epoch": 0.23581863434693215, "grad_norm": 1.3359375, "learning_rate": 1.9736274700721614e-05, "loss": 0.6125, "step": 1362 }, { "epoch": 0.2359917757818418, "grad_norm": 1.4296875, "learning_rate": 1.9735858437433005e-05, "loss": 0.5929, "step": 1363 }, { "epoch": 0.23616491721675142, "grad_norm": 1.3671875, "learning_rate": 1.9735441850285726e-05, "loss": 0.5999, "step": 1364 }, { "epoch": 0.23633805865166108, "grad_norm": 1.4140625, "learning_rate": 1.9735024939293628e-05, "loss": 0.6101, "step": 1365 }, { "epoch": 0.23651120008657073, "grad_norm": 1.5625, "learning_rate": 1.9734607704470585e-05, "loss": 0.6216, "step": 1366 }, { "epoch": 0.23668434152148035, "grad_norm": 1.4296875, "learning_rate": 1.973419014583047e-05, "loss": 0.6562, "step": 1367 }, { "epoch": 0.23685748295639, "grad_norm": 1.328125, "learning_rate": 1.9733772263387176e-05, "loss": 0.5563, "step": 1368 }, { "epoch": 0.23703062439129965, "grad_norm": 1.421875, "learning_rate": 1.9733354057154606e-05, "loss": 0.6194, "step": 1369 }, { "epoch": 0.23720376582620928, "grad_norm": 1.4921875, "learning_rate": 1.9732935527146667e-05, "loss": 0.6186, "step": 1370 }, { "epoch": 0.23737690726111893, "grad_norm": 1.3984375, "learning_rate": 1.9732516673377283e-05, "loss": 0.604, "step": 1371 }, { "epoch": 0.23755004869602858, "grad_norm": 1.375, "learning_rate": 1.9732097495860388e-05, "loss": 0.4699, "step": 1372 }, { "epoch": 0.2377231901309382, "grad_norm": 1.421875, "learning_rate": 1.9731677994609922e-05, "loss": 0.6051, "step": 1373 }, { "epoch": 0.23789633156584786, "grad_norm": 1.4296875, "learning_rate": 1.9731258169639846e-05, "loss": 0.6417, "step": 1374 }, { "epoch": 0.23806947300075748, "grad_norm": 1.4921875, "learning_rate": 1.973083802096412e-05, "loss": 0.6724, "step": 1375 }, { "epoch": 0.23824261443566713, "grad_norm": 1.5078125, "learning_rate": 1.973041754859672e-05, "loss": 0.5541, "step": 1376 }, { "epoch": 0.23841575587057678, "grad_norm": 1.5546875, "learning_rate": 1.9729996752551638e-05, "loss": 0.6136, "step": 1377 }, { "epoch": 0.2385888973054864, "grad_norm": 1.46875, "learning_rate": 1.9729575632842865e-05, "loss": 0.759, "step": 1378 }, { "epoch": 0.23876203874039606, "grad_norm": 1.4375, "learning_rate": 1.9729154189484413e-05, "loss": 0.6707, "step": 1379 }, { "epoch": 0.2389351801753057, "grad_norm": 1.4296875, "learning_rate": 1.97287324224903e-05, "loss": 0.5622, "step": 1380 }, { "epoch": 0.23910832161021534, "grad_norm": 1.5546875, "learning_rate": 1.972831033187456e-05, "loss": 0.5893, "step": 1381 }, { "epoch": 0.239281463045125, "grad_norm": 1.640625, "learning_rate": 1.9727887917651228e-05, "loss": 0.6081, "step": 1382 }, { "epoch": 0.23945460448003464, "grad_norm": 1.484375, "learning_rate": 1.9727465179834356e-05, "loss": 0.6048, "step": 1383 }, { "epoch": 0.23962774591494426, "grad_norm": 1.421875, "learning_rate": 1.9727042118438012e-05, "loss": 0.5643, "step": 1384 }, { "epoch": 0.23980088734985391, "grad_norm": 1.375, "learning_rate": 1.972661873347626e-05, "loss": 0.5761, "step": 1385 }, { "epoch": 0.23997402878476357, "grad_norm": 1.546875, "learning_rate": 1.972619502496319e-05, "loss": 0.7123, "step": 1386 }, { "epoch": 0.2401471702196732, "grad_norm": 1.5546875, "learning_rate": 1.9725770992912893e-05, "loss": 0.6787, "step": 1387 }, { "epoch": 0.24032031165458284, "grad_norm": 1.359375, "learning_rate": 1.9725346637339477e-05, "loss": 0.6233, "step": 1388 }, { "epoch": 0.2404934530894925, "grad_norm": 1.46875, "learning_rate": 1.9724921958257056e-05, "loss": 0.6418, "step": 1389 }, { "epoch": 0.24066659452440212, "grad_norm": 1.484375, "learning_rate": 1.972449695567976e-05, "loss": 0.6078, "step": 1390 }, { "epoch": 0.24083973595931177, "grad_norm": 1.4765625, "learning_rate": 1.9724071629621725e-05, "loss": 0.627, "step": 1391 }, { "epoch": 0.2410128773942214, "grad_norm": 1.453125, "learning_rate": 1.9723645980097092e-05, "loss": 0.5633, "step": 1392 }, { "epoch": 0.24118601882913104, "grad_norm": 1.359375, "learning_rate": 1.9723220007120033e-05, "loss": 0.5795, "step": 1393 }, { "epoch": 0.2413591602640407, "grad_norm": 1.421875, "learning_rate": 1.972279371070471e-05, "loss": 0.5976, "step": 1394 }, { "epoch": 0.24153230169895032, "grad_norm": 1.53125, "learning_rate": 1.9722367090865303e-05, "loss": 0.6266, "step": 1395 }, { "epoch": 0.24170544313385997, "grad_norm": 1.375, "learning_rate": 1.9721940147616004e-05, "loss": 0.6248, "step": 1396 }, { "epoch": 0.24187858456876962, "grad_norm": 1.328125, "learning_rate": 1.972151288097102e-05, "loss": 0.6201, "step": 1397 }, { "epoch": 0.24205172600367925, "grad_norm": 1.484375, "learning_rate": 1.9721085290944554e-05, "loss": 0.6876, "step": 1398 }, { "epoch": 0.2422248674385889, "grad_norm": 1.3671875, "learning_rate": 1.9720657377550843e-05, "loss": 0.6568, "step": 1399 }, { "epoch": 0.24239800887349855, "grad_norm": 1.4453125, "learning_rate": 1.972022914080411e-05, "loss": 0.5905, "step": 1400 }, { "epoch": 0.24257115030840817, "grad_norm": 1.359375, "learning_rate": 1.9719800580718603e-05, "loss": 0.581, "step": 1401 }, { "epoch": 0.24274429174331782, "grad_norm": 1.3828125, "learning_rate": 1.971937169730858e-05, "loss": 0.6038, "step": 1402 }, { "epoch": 0.24291743317822748, "grad_norm": 1.5625, "learning_rate": 1.971894249058831e-05, "loss": 0.6018, "step": 1403 }, { "epoch": 0.2430905746131371, "grad_norm": 1.359375, "learning_rate": 1.971851296057206e-05, "loss": 0.5376, "step": 1404 }, { "epoch": 0.24326371604804675, "grad_norm": 1.4296875, "learning_rate": 1.971808310727413e-05, "loss": 0.5738, "step": 1405 }, { "epoch": 0.2434368574829564, "grad_norm": 1.4140625, "learning_rate": 1.971765293070881e-05, "loss": 0.6427, "step": 1406 }, { "epoch": 0.24360999891786603, "grad_norm": 1.5078125, "learning_rate": 1.9717222430890416e-05, "loss": 0.6428, "step": 1407 }, { "epoch": 0.24378314035277568, "grad_norm": 1.4296875, "learning_rate": 1.9716791607833265e-05, "loss": 0.6098, "step": 1408 }, { "epoch": 0.2439562817876853, "grad_norm": 1.421875, "learning_rate": 1.971636046155169e-05, "loss": 0.6205, "step": 1409 }, { "epoch": 0.24412942322259495, "grad_norm": 1.4453125, "learning_rate": 1.971592899206003e-05, "loss": 0.6175, "step": 1410 }, { "epoch": 0.2443025646575046, "grad_norm": 1.5625, "learning_rate": 1.9715497199372643e-05, "loss": 0.7044, "step": 1411 }, { "epoch": 0.24447570609241423, "grad_norm": 1.46875, "learning_rate": 1.9715065083503884e-05, "loss": 0.6834, "step": 1412 }, { "epoch": 0.24464884752732388, "grad_norm": 1.484375, "learning_rate": 1.9714632644468135e-05, "loss": 0.6031, "step": 1413 }, { "epoch": 0.24482198896223353, "grad_norm": 1.328125, "learning_rate": 1.9714199882279773e-05, "loss": 0.5513, "step": 1414 }, { "epoch": 0.24499513039714316, "grad_norm": 1.3828125, "learning_rate": 1.9713766796953203e-05, "loss": 0.6369, "step": 1415 }, { "epoch": 0.2451682718320528, "grad_norm": 1.3984375, "learning_rate": 1.9713333388502825e-05, "loss": 0.5512, "step": 1416 }, { "epoch": 0.24534141326696246, "grad_norm": 1.4453125, "learning_rate": 1.971289965694306e-05, "loss": 0.6307, "step": 1417 }, { "epoch": 0.24551455470187208, "grad_norm": 1.421875, "learning_rate": 1.971246560228833e-05, "loss": 0.6346, "step": 1418 }, { "epoch": 0.24568769613678174, "grad_norm": 1.3203125, "learning_rate": 1.9712031224553085e-05, "loss": 0.5626, "step": 1419 }, { "epoch": 0.2458608375716914, "grad_norm": 1.4765625, "learning_rate": 1.971159652375176e-05, "loss": 0.609, "step": 1420 }, { "epoch": 0.246033979006601, "grad_norm": 1.3125, "learning_rate": 1.9711161499898824e-05, "loss": 0.6166, "step": 1421 }, { "epoch": 0.24620712044151066, "grad_norm": 1.515625, "learning_rate": 1.9710726153008745e-05, "loss": 0.6429, "step": 1422 }, { "epoch": 0.24638026187642031, "grad_norm": 1.3515625, "learning_rate": 1.9710290483096e-05, "loss": 0.5944, "step": 1423 }, { "epoch": 0.24655340331132994, "grad_norm": 1.609375, "learning_rate": 1.9709854490175097e-05, "loss": 0.5967, "step": 1424 }, { "epoch": 0.2467265447462396, "grad_norm": 1.4453125, "learning_rate": 1.9709418174260523e-05, "loss": 0.598, "step": 1425 }, { "epoch": 0.2468996861811492, "grad_norm": 1.3046875, "learning_rate": 1.9708981535366797e-05, "loss": 0.623, "step": 1426 }, { "epoch": 0.24707282761605887, "grad_norm": 1.46875, "learning_rate": 1.9708544573508444e-05, "loss": 0.6164, "step": 1427 }, { "epoch": 0.24724596905096852, "grad_norm": 1.3671875, "learning_rate": 1.9708107288700004e-05, "loss": 0.5683, "step": 1428 }, { "epoch": 0.24741911048587814, "grad_norm": 1.2890625, "learning_rate": 1.9707669680956013e-05, "loss": 0.6625, "step": 1429 }, { "epoch": 0.2475922519207878, "grad_norm": 1.515625, "learning_rate": 1.970723175029104e-05, "loss": 0.5968, "step": 1430 }, { "epoch": 0.24776539335569744, "grad_norm": 1.5390625, "learning_rate": 1.970679349671964e-05, "loss": 0.7111, "step": 1431 }, { "epoch": 0.24793853479060707, "grad_norm": 1.5625, "learning_rate": 1.97063549202564e-05, "loss": 0.6433, "step": 1432 }, { "epoch": 0.24811167622551672, "grad_norm": 1.421875, "learning_rate": 1.9705916020915904e-05, "loss": 0.5897, "step": 1433 }, { "epoch": 0.24828481766042637, "grad_norm": 1.484375, "learning_rate": 1.9705476798712757e-05, "loss": 0.696, "step": 1434 }, { "epoch": 0.248457959095336, "grad_norm": 1.453125, "learning_rate": 1.9705037253661566e-05, "loss": 0.5944, "step": 1435 }, { "epoch": 0.24863110053024565, "grad_norm": 1.5, "learning_rate": 1.970459738577695e-05, "loss": 0.6551, "step": 1436 }, { "epoch": 0.2488042419651553, "grad_norm": 1.390625, "learning_rate": 1.9704157195073547e-05, "loss": 0.6098, "step": 1437 }, { "epoch": 0.24897738340006492, "grad_norm": 1.4296875, "learning_rate": 1.9703716681565996e-05, "loss": 0.5841, "step": 1438 }, { "epoch": 0.24915052483497457, "grad_norm": 1.4296875, "learning_rate": 1.970327584526895e-05, "loss": 0.6202, "step": 1439 }, { "epoch": 0.24932366626988423, "grad_norm": 1.46875, "learning_rate": 1.9702834686197074e-05, "loss": 0.6089, "step": 1440 }, { "epoch": 0.24949680770479385, "grad_norm": 1.4140625, "learning_rate": 1.9702393204365046e-05, "loss": 0.6819, "step": 1441 }, { "epoch": 0.2496699491397035, "grad_norm": 1.453125, "learning_rate": 1.9701951399787545e-05, "loss": 0.5707, "step": 1442 }, { "epoch": 0.24984309057461312, "grad_norm": 1.3203125, "learning_rate": 1.9701509272479273e-05, "loss": 0.5462, "step": 1443 }, { "epoch": 0.2500162320095228, "grad_norm": 1.5546875, "learning_rate": 1.9701066822454935e-05, "loss": 0.6415, "step": 1444 }, { "epoch": 0.25018937344443243, "grad_norm": 1.28125, "learning_rate": 1.970062404972925e-05, "loss": 0.5777, "step": 1445 }, { "epoch": 0.2503625148793421, "grad_norm": 1.3046875, "learning_rate": 1.9700180954316948e-05, "loss": 0.6491, "step": 1446 }, { "epoch": 0.25053565631425173, "grad_norm": 1.4609375, "learning_rate": 1.9699737536232762e-05, "loss": 0.6143, "step": 1447 }, { "epoch": 0.2507087977491613, "grad_norm": 1.359375, "learning_rate": 1.969929379549145e-05, "loss": 0.5656, "step": 1448 }, { "epoch": 0.250881939184071, "grad_norm": 1.4140625, "learning_rate": 1.9698849732107767e-05, "loss": 0.6065, "step": 1449 }, { "epoch": 0.25105508061898063, "grad_norm": 1.5, "learning_rate": 1.9698405346096488e-05, "loss": 0.6595, "step": 1450 }, { "epoch": 0.2512282220538903, "grad_norm": 1.4453125, "learning_rate": 1.9697960637472392e-05, "loss": 0.6038, "step": 1451 }, { "epoch": 0.25140136348879993, "grad_norm": 1.421875, "learning_rate": 1.9697515606250276e-05, "loss": 0.6609, "step": 1452 }, { "epoch": 0.25157450492370953, "grad_norm": 1.3125, "learning_rate": 1.9697070252444942e-05, "loss": 0.6562, "step": 1453 }, { "epoch": 0.2517476463586192, "grad_norm": 1.4765625, "learning_rate": 1.9696624576071203e-05, "loss": 0.5649, "step": 1454 }, { "epoch": 0.25192078779352883, "grad_norm": 1.5546875, "learning_rate": 1.969617857714389e-05, "loss": 0.6091, "step": 1455 }, { "epoch": 0.2520939292284385, "grad_norm": 1.328125, "learning_rate": 1.969573225567783e-05, "loss": 0.5882, "step": 1456 }, { "epoch": 0.25226707066334814, "grad_norm": 1.4375, "learning_rate": 1.9695285611687875e-05, "loss": 0.6085, "step": 1457 }, { "epoch": 0.2524402120982578, "grad_norm": 1.4453125, "learning_rate": 1.969483864518888e-05, "loss": 0.6381, "step": 1458 }, { "epoch": 0.2526133535331674, "grad_norm": 1.421875, "learning_rate": 1.9694391356195716e-05, "loss": 0.5869, "step": 1459 }, { "epoch": 0.25278649496807704, "grad_norm": 1.4921875, "learning_rate": 1.9693943744723262e-05, "loss": 0.6127, "step": 1460 }, { "epoch": 0.2529596364029867, "grad_norm": 1.6484375, "learning_rate": 1.9693495810786405e-05, "loss": 0.6168, "step": 1461 }, { "epoch": 0.25313277783789634, "grad_norm": 1.4140625, "learning_rate": 1.9693047554400047e-05, "loss": 0.6378, "step": 1462 }, { "epoch": 0.253305919272806, "grad_norm": 1.4453125, "learning_rate": 1.9692598975579097e-05, "loss": 0.5876, "step": 1463 }, { "epoch": 0.25347906070771564, "grad_norm": 1.484375, "learning_rate": 1.9692150074338482e-05, "loss": 0.6505, "step": 1464 }, { "epoch": 0.25365220214262524, "grad_norm": 1.328125, "learning_rate": 1.9691700850693126e-05, "loss": 0.6031, "step": 1465 }, { "epoch": 0.2538253435775349, "grad_norm": 1.4609375, "learning_rate": 1.969125130465798e-05, "loss": 0.5747, "step": 1466 }, { "epoch": 0.25399848501244454, "grad_norm": 1.4765625, "learning_rate": 1.969080143624799e-05, "loss": 0.6767, "step": 1467 }, { "epoch": 0.2541716264473542, "grad_norm": 1.5078125, "learning_rate": 1.969035124547813e-05, "loss": 0.6143, "step": 1468 }, { "epoch": 0.25434476788226384, "grad_norm": 1.4140625, "learning_rate": 1.968990073236337e-05, "loss": 0.4904, "step": 1469 }, { "epoch": 0.25451790931717344, "grad_norm": 1.453125, "learning_rate": 1.9689449896918694e-05, "loss": 0.58, "step": 1470 }, { "epoch": 0.2546910507520831, "grad_norm": 1.4921875, "learning_rate": 1.968899873915911e-05, "loss": 0.5869, "step": 1471 }, { "epoch": 0.25486419218699274, "grad_norm": 1.4609375, "learning_rate": 1.968854725909961e-05, "loss": 0.6302, "step": 1472 }, { "epoch": 0.2550373336219024, "grad_norm": 1.4375, "learning_rate": 1.968809545675522e-05, "loss": 0.6085, "step": 1473 }, { "epoch": 0.25521047505681205, "grad_norm": 1.3984375, "learning_rate": 1.968764333214097e-05, "loss": 0.639, "step": 1474 }, { "epoch": 0.2553836164917217, "grad_norm": 1.4296875, "learning_rate": 1.96871908852719e-05, "loss": 0.6335, "step": 1475 }, { "epoch": 0.2555567579266313, "grad_norm": 1.3359375, "learning_rate": 1.9686738116163057e-05, "loss": 0.6413, "step": 1476 }, { "epoch": 0.25572989936154095, "grad_norm": 1.3984375, "learning_rate": 1.9686285024829504e-05, "loss": 0.6372, "step": 1477 }, { "epoch": 0.2559030407964506, "grad_norm": 1.40625, "learning_rate": 1.9685831611286312e-05, "loss": 0.5891, "step": 1478 }, { "epoch": 0.25607618223136025, "grad_norm": 1.3125, "learning_rate": 1.9685377875548566e-05, "loss": 0.6571, "step": 1479 }, { "epoch": 0.2562493236662699, "grad_norm": 1.5, "learning_rate": 1.9684923817631358e-05, "loss": 0.6654, "step": 1480 }, { "epoch": 0.25642246510117955, "grad_norm": 1.3515625, "learning_rate": 1.968446943754979e-05, "loss": 0.5753, "step": 1481 }, { "epoch": 0.25659560653608915, "grad_norm": 1.5, "learning_rate": 1.968401473531898e-05, "loss": 0.5981, "step": 1482 }, { "epoch": 0.2567687479709988, "grad_norm": 1.390625, "learning_rate": 1.968355971095405e-05, "loss": 0.6441, "step": 1483 }, { "epoch": 0.25694188940590845, "grad_norm": 1.4609375, "learning_rate": 1.9683104364470138e-05, "loss": 0.632, "step": 1484 }, { "epoch": 0.2571150308408181, "grad_norm": 1.4609375, "learning_rate": 1.9682648695882393e-05, "loss": 0.655, "step": 1485 }, { "epoch": 0.25728817227572776, "grad_norm": 1.3671875, "learning_rate": 1.9682192705205968e-05, "loss": 0.6098, "step": 1486 }, { "epoch": 0.25746131371063735, "grad_norm": 1.3828125, "learning_rate": 1.9681736392456033e-05, "loss": 0.6471, "step": 1487 }, { "epoch": 0.257634455145547, "grad_norm": 1.421875, "learning_rate": 1.968127975764777e-05, "loss": 0.6014, "step": 1488 }, { "epoch": 0.25780759658045665, "grad_norm": 1.46875, "learning_rate": 1.968082280079637e-05, "loss": 0.6736, "step": 1489 }, { "epoch": 0.2579807380153663, "grad_norm": 1.4609375, "learning_rate": 1.9680365521917023e-05, "loss": 0.6598, "step": 1490 }, { "epoch": 0.25815387945027596, "grad_norm": 1.3984375, "learning_rate": 1.967990792102495e-05, "loss": 0.5719, "step": 1491 }, { "epoch": 0.2583270208851856, "grad_norm": 1.3984375, "learning_rate": 1.9679449998135372e-05, "loss": 0.6033, "step": 1492 }, { "epoch": 0.2585001623200952, "grad_norm": 1.4609375, "learning_rate": 1.967899175326352e-05, "loss": 0.7236, "step": 1493 }, { "epoch": 0.25867330375500486, "grad_norm": 1.453125, "learning_rate": 1.9678533186424635e-05, "loss": 0.6247, "step": 1494 }, { "epoch": 0.2588464451899145, "grad_norm": 1.3828125, "learning_rate": 1.9678074297633977e-05, "loss": 0.6006, "step": 1495 }, { "epoch": 0.25901958662482416, "grad_norm": 1.375, "learning_rate": 1.96776150869068e-05, "loss": 0.5829, "step": 1496 }, { "epoch": 0.2591927280597338, "grad_norm": 1.3046875, "learning_rate": 1.967715555425839e-05, "loss": 0.5936, "step": 1497 }, { "epoch": 0.25936586949464346, "grad_norm": 1.421875, "learning_rate": 1.9676695699704028e-05, "loss": 0.5882, "step": 1498 }, { "epoch": 0.25953901092955306, "grad_norm": 1.4453125, "learning_rate": 1.9676235523259013e-05, "loss": 0.58, "step": 1499 }, { "epoch": 0.2597121523644627, "grad_norm": 1.3984375, "learning_rate": 1.967577502493865e-05, "loss": 0.5996, "step": 1500 }, { "epoch": 0.25988529379937236, "grad_norm": 1.3203125, "learning_rate": 1.967531420475826e-05, "loss": 0.588, "step": 1501 }, { "epoch": 0.260058435234282, "grad_norm": 1.5390625, "learning_rate": 1.967485306273317e-05, "loss": 0.751, "step": 1502 }, { "epoch": 0.26023157666919167, "grad_norm": 1.359375, "learning_rate": 1.9674391598878722e-05, "loss": 0.6051, "step": 1503 }, { "epoch": 0.26040471810410126, "grad_norm": 1.3828125, "learning_rate": 1.9673929813210265e-05, "loss": 0.6365, "step": 1504 }, { "epoch": 0.2605778595390109, "grad_norm": 1.4375, "learning_rate": 1.967346770574316e-05, "loss": 0.5923, "step": 1505 }, { "epoch": 0.26075100097392057, "grad_norm": 1.3671875, "learning_rate": 1.9673005276492777e-05, "loss": 0.6129, "step": 1506 }, { "epoch": 0.2609241424088302, "grad_norm": 1.46875, "learning_rate": 1.9672542525474504e-05, "loss": 0.609, "step": 1507 }, { "epoch": 0.26109728384373987, "grad_norm": 1.3828125, "learning_rate": 1.9672079452703725e-05, "loss": 0.6007, "step": 1508 }, { "epoch": 0.2612704252786495, "grad_norm": 1.453125, "learning_rate": 1.9671616058195853e-05, "loss": 0.5932, "step": 1509 }, { "epoch": 0.2614435667135591, "grad_norm": 1.5234375, "learning_rate": 1.9671152341966294e-05, "loss": 0.7001, "step": 1510 }, { "epoch": 0.26161670814846877, "grad_norm": 1.375, "learning_rate": 1.9670688304030484e-05, "loss": 0.6174, "step": 1511 }, { "epoch": 0.2617898495833784, "grad_norm": 1.6015625, "learning_rate": 1.9670223944403853e-05, "loss": 0.7024, "step": 1512 }, { "epoch": 0.26196299101828807, "grad_norm": 1.4296875, "learning_rate": 1.9669759263101844e-05, "loss": 0.5507, "step": 1513 }, { "epoch": 0.2621361324531977, "grad_norm": 1.3515625, "learning_rate": 1.966929426013992e-05, "loss": 0.6029, "step": 1514 }, { "epoch": 0.2623092738881074, "grad_norm": 1.421875, "learning_rate": 1.9668828935533552e-05, "loss": 0.5844, "step": 1515 }, { "epoch": 0.26248241532301697, "grad_norm": 1.4609375, "learning_rate": 1.9668363289298206e-05, "loss": 0.6875, "step": 1516 }, { "epoch": 0.2626555567579266, "grad_norm": 1.4453125, "learning_rate": 1.9667897321449387e-05, "loss": 0.6197, "step": 1517 }, { "epoch": 0.2628286981928363, "grad_norm": 1.390625, "learning_rate": 1.9667431032002584e-05, "loss": 0.5998, "step": 1518 }, { "epoch": 0.2630018396277459, "grad_norm": 1.3359375, "learning_rate": 1.9666964420973313e-05, "loss": 0.5641, "step": 1519 }, { "epoch": 0.2631749810626556, "grad_norm": 1.3359375, "learning_rate": 1.966649748837709e-05, "loss": 0.5943, "step": 1520 }, { "epoch": 0.2633481224975652, "grad_norm": 1.46875, "learning_rate": 1.9666030234229458e-05, "loss": 0.624, "step": 1521 }, { "epoch": 0.2635212639324748, "grad_norm": 1.390625, "learning_rate": 1.966556265854595e-05, "loss": 0.6003, "step": 1522 }, { "epoch": 0.2636944053673845, "grad_norm": 1.390625, "learning_rate": 1.9665094761342127e-05, "loss": 0.5837, "step": 1523 }, { "epoch": 0.26386754680229413, "grad_norm": 1.4765625, "learning_rate": 1.9664626542633547e-05, "loss": 0.7029, "step": 1524 }, { "epoch": 0.2640406882372038, "grad_norm": 1.3984375, "learning_rate": 1.9664158002435788e-05, "loss": 0.579, "step": 1525 }, { "epoch": 0.26421382967211343, "grad_norm": 1.3984375, "learning_rate": 1.9663689140764437e-05, "loss": 0.6306, "step": 1526 }, { "epoch": 0.264386971107023, "grad_norm": 1.2734375, "learning_rate": 1.9663219957635088e-05, "loss": 0.5567, "step": 1527 }, { "epoch": 0.2645601125419327, "grad_norm": 1.4140625, "learning_rate": 1.966275045306335e-05, "loss": 0.5563, "step": 1528 }, { "epoch": 0.26473325397684233, "grad_norm": 1.484375, "learning_rate": 1.966228062706484e-05, "loss": 0.6463, "step": 1529 }, { "epoch": 0.264906395411752, "grad_norm": 1.3046875, "learning_rate": 1.9661810479655184e-05, "loss": 0.5629, "step": 1530 }, { "epoch": 0.26507953684666163, "grad_norm": 1.46875, "learning_rate": 1.9661340010850025e-05, "loss": 0.6131, "step": 1531 }, { "epoch": 0.2652526782815713, "grad_norm": 1.4453125, "learning_rate": 1.9660869220665014e-05, "loss": 0.6064, "step": 1532 }, { "epoch": 0.2654258197164809, "grad_norm": 1.390625, "learning_rate": 1.9660398109115804e-05, "loss": 0.6286, "step": 1533 }, { "epoch": 0.26559896115139053, "grad_norm": 1.3359375, "learning_rate": 1.9659926676218076e-05, "loss": 0.5706, "step": 1534 }, { "epoch": 0.2657721025863002, "grad_norm": 1.5234375, "learning_rate": 1.9659454921987507e-05, "loss": 0.6256, "step": 1535 }, { "epoch": 0.26594524402120984, "grad_norm": 1.5625, "learning_rate": 1.965898284643979e-05, "loss": 0.6745, "step": 1536 }, { "epoch": 0.2661183854561195, "grad_norm": 1.421875, "learning_rate": 1.965851044959063e-05, "loss": 0.5895, "step": 1537 }, { "epoch": 0.2662915268910291, "grad_norm": 1.421875, "learning_rate": 1.965803773145574e-05, "loss": 0.6076, "step": 1538 }, { "epoch": 0.26646466832593874, "grad_norm": 1.3359375, "learning_rate": 1.9657564692050842e-05, "loss": 0.5831, "step": 1539 }, { "epoch": 0.2666378097608484, "grad_norm": 1.34375, "learning_rate": 1.9657091331391673e-05, "loss": 0.6502, "step": 1540 }, { "epoch": 0.26681095119575804, "grad_norm": 1.4453125, "learning_rate": 1.9656617649493985e-05, "loss": 0.6313, "step": 1541 }, { "epoch": 0.2669840926306677, "grad_norm": 1.390625, "learning_rate": 1.9656143646373523e-05, "loss": 0.606, "step": 1542 }, { "epoch": 0.26715723406557734, "grad_norm": 1.59375, "learning_rate": 1.9655669322046068e-05, "loss": 0.6895, "step": 1543 }, { "epoch": 0.26733037550048694, "grad_norm": 1.3515625, "learning_rate": 1.9655194676527388e-05, "loss": 0.6027, "step": 1544 }, { "epoch": 0.2675035169353966, "grad_norm": 1.390625, "learning_rate": 1.9654719709833277e-05, "loss": 0.5419, "step": 1545 }, { "epoch": 0.26767665837030624, "grad_norm": 1.421875, "learning_rate": 1.965424442197953e-05, "loss": 0.6127, "step": 1546 }, { "epoch": 0.2678497998052159, "grad_norm": 1.3671875, "learning_rate": 1.9653768812981962e-05, "loss": 0.5627, "step": 1547 }, { "epoch": 0.26802294124012555, "grad_norm": 1.46875, "learning_rate": 1.965329288285639e-05, "loss": 0.6609, "step": 1548 }, { "epoch": 0.2681960826750352, "grad_norm": 1.515625, "learning_rate": 1.965281663161865e-05, "loss": 0.6459, "step": 1549 }, { "epoch": 0.2683692241099448, "grad_norm": 1.4296875, "learning_rate": 1.9652340059284582e-05, "loss": 0.5752, "step": 1550 }, { "epoch": 0.26854236554485444, "grad_norm": 1.3671875, "learning_rate": 1.965186316587004e-05, "loss": 0.563, "step": 1551 }, { "epoch": 0.2687155069797641, "grad_norm": 1.453125, "learning_rate": 1.9651385951390885e-05, "loss": 0.6126, "step": 1552 }, { "epoch": 0.26888864841467375, "grad_norm": 1.4375, "learning_rate": 1.965090841586299e-05, "loss": 0.599, "step": 1553 }, { "epoch": 0.2690617898495834, "grad_norm": 1.3828125, "learning_rate": 1.9650430559302245e-05, "loss": 0.5954, "step": 1554 }, { "epoch": 0.269234931284493, "grad_norm": 1.4296875, "learning_rate": 1.9649952381724544e-05, "loss": 0.609, "step": 1555 }, { "epoch": 0.26940807271940265, "grad_norm": 1.390625, "learning_rate": 1.9649473883145792e-05, "loss": 0.66, "step": 1556 }, { "epoch": 0.2695812141543123, "grad_norm": 1.40625, "learning_rate": 1.9648995063581906e-05, "loss": 0.6368, "step": 1557 }, { "epoch": 0.26975435558922195, "grad_norm": 1.4609375, "learning_rate": 1.9648515923048816e-05, "loss": 0.5926, "step": 1558 }, { "epoch": 0.2699274970241316, "grad_norm": 1.3515625, "learning_rate": 1.964803646156246e-05, "loss": 0.5976, "step": 1559 }, { "epoch": 0.27010063845904125, "grad_norm": 1.359375, "learning_rate": 1.9647556679138784e-05, "loss": 0.6563, "step": 1560 }, { "epoch": 0.27027377989395085, "grad_norm": 1.578125, "learning_rate": 1.9647076575793748e-05, "loss": 0.6076, "step": 1561 }, { "epoch": 0.2704469213288605, "grad_norm": 1.5390625, "learning_rate": 1.9646596151543327e-05, "loss": 0.6512, "step": 1562 }, { "epoch": 0.27062006276377015, "grad_norm": 1.3984375, "learning_rate": 1.9646115406403497e-05, "loss": 0.606, "step": 1563 }, { "epoch": 0.2707932041986798, "grad_norm": 1.328125, "learning_rate": 1.9645634340390253e-05, "loss": 0.6058, "step": 1564 }, { "epoch": 0.27096634563358946, "grad_norm": 1.421875, "learning_rate": 1.9645152953519597e-05, "loss": 0.6071, "step": 1565 }, { "epoch": 0.2711394870684991, "grad_norm": 1.484375, "learning_rate": 1.964467124580754e-05, "loss": 0.7153, "step": 1566 }, { "epoch": 0.2713126285034087, "grad_norm": 1.4609375, "learning_rate": 1.9644189217270108e-05, "loss": 0.6697, "step": 1567 }, { "epoch": 0.27148576993831836, "grad_norm": 1.546875, "learning_rate": 1.9643706867923337e-05, "loss": 0.6887, "step": 1568 }, { "epoch": 0.271658911373228, "grad_norm": 1.375, "learning_rate": 1.9643224197783265e-05, "loss": 0.6356, "step": 1569 }, { "epoch": 0.27183205280813766, "grad_norm": 1.3984375, "learning_rate": 1.9642741206865957e-05, "loss": 0.5875, "step": 1570 }, { "epoch": 0.2720051942430473, "grad_norm": 1.390625, "learning_rate": 1.964225789518747e-05, "loss": 0.6533, "step": 1571 }, { "epoch": 0.2721783356779569, "grad_norm": 1.453125, "learning_rate": 1.9641774262763892e-05, "loss": 0.5958, "step": 1572 }, { "epoch": 0.27235147711286656, "grad_norm": 1.5390625, "learning_rate": 1.9641290309611302e-05, "loss": 0.6702, "step": 1573 }, { "epoch": 0.2725246185477762, "grad_norm": 1.5703125, "learning_rate": 1.96408060357458e-05, "loss": 0.636, "step": 1574 }, { "epoch": 0.27269775998268586, "grad_norm": 1.3671875, "learning_rate": 1.9640321441183497e-05, "loss": 0.5908, "step": 1575 }, { "epoch": 0.2728709014175955, "grad_norm": 1.46875, "learning_rate": 1.9639836525940514e-05, "loss": 0.6236, "step": 1576 }, { "epoch": 0.27304404285250516, "grad_norm": 1.5390625, "learning_rate": 1.9639351290032978e-05, "loss": 0.6585, "step": 1577 }, { "epoch": 0.27321718428741476, "grad_norm": 1.40625, "learning_rate": 1.9638865733477034e-05, "loss": 0.5999, "step": 1578 }, { "epoch": 0.2733903257223244, "grad_norm": 1.4375, "learning_rate": 1.9638379856288833e-05, "loss": 0.5616, "step": 1579 }, { "epoch": 0.27356346715723406, "grad_norm": 1.5546875, "learning_rate": 1.9637893658484536e-05, "loss": 0.6211, "step": 1580 }, { "epoch": 0.2737366085921437, "grad_norm": 1.296875, "learning_rate": 1.9637407140080315e-05, "loss": 0.5705, "step": 1581 }, { "epoch": 0.27390975002705337, "grad_norm": 1.6484375, "learning_rate": 1.9636920301092352e-05, "loss": 0.6095, "step": 1582 }, { "epoch": 0.274082891461963, "grad_norm": 1.390625, "learning_rate": 1.9636433141536848e-05, "loss": 0.6842, "step": 1583 }, { "epoch": 0.2742560328968726, "grad_norm": 1.390625, "learning_rate": 1.9635945661430006e-05, "loss": 0.6903, "step": 1584 }, { "epoch": 0.27442917433178227, "grad_norm": 1.4765625, "learning_rate": 1.9635457860788038e-05, "loss": 0.5962, "step": 1585 }, { "epoch": 0.2746023157666919, "grad_norm": 1.3671875, "learning_rate": 1.9634969739627173e-05, "loss": 0.6143, "step": 1586 }, { "epoch": 0.27477545720160157, "grad_norm": 1.53125, "learning_rate": 1.963448129796365e-05, "loss": 0.6219, "step": 1587 }, { "epoch": 0.2749485986365112, "grad_norm": 1.4609375, "learning_rate": 1.9633992535813717e-05, "loss": 0.6117, "step": 1588 }, { "epoch": 0.2751217400714208, "grad_norm": 1.5078125, "learning_rate": 1.9633503453193627e-05, "loss": 0.6129, "step": 1589 }, { "epoch": 0.27529488150633047, "grad_norm": 1.4140625, "learning_rate": 1.9633014050119653e-05, "loss": 0.5885, "step": 1590 }, { "epoch": 0.2754680229412401, "grad_norm": 1.3515625, "learning_rate": 1.9632524326608076e-05, "loss": 0.5426, "step": 1591 }, { "epoch": 0.27564116437614977, "grad_norm": 1.5, "learning_rate": 1.963203428267518e-05, "loss": 0.6422, "step": 1592 }, { "epoch": 0.2758143058110594, "grad_norm": 1.3046875, "learning_rate": 1.9631543918337276e-05, "loss": 0.5821, "step": 1593 }, { "epoch": 0.2759874472459691, "grad_norm": 1.4296875, "learning_rate": 1.9631053233610668e-05, "loss": 0.6383, "step": 1594 }, { "epoch": 0.27616058868087867, "grad_norm": 1.5234375, "learning_rate": 1.9630562228511682e-05, "loss": 0.687, "step": 1595 }, { "epoch": 0.2763337301157883, "grad_norm": 1.375, "learning_rate": 1.963007090305665e-05, "loss": 0.6259, "step": 1596 }, { "epoch": 0.276506871550698, "grad_norm": 1.34375, "learning_rate": 1.9629579257261916e-05, "loss": 0.5881, "step": 1597 }, { "epoch": 0.2766800129856076, "grad_norm": 1.375, "learning_rate": 1.962908729114383e-05, "loss": 0.6065, "step": 1598 }, { "epoch": 0.2768531544205173, "grad_norm": 1.4296875, "learning_rate": 1.9628595004718763e-05, "loss": 0.6218, "step": 1599 }, { "epoch": 0.27702629585542693, "grad_norm": 1.5234375, "learning_rate": 1.9628102398003094e-05, "loss": 0.6366, "step": 1600 }, { "epoch": 0.2771994372903365, "grad_norm": 1.5546875, "learning_rate": 1.9627609471013195e-05, "loss": 0.6477, "step": 1601 }, { "epoch": 0.2773725787252462, "grad_norm": 1.4375, "learning_rate": 1.962711622376548e-05, "loss": 0.6538, "step": 1602 }, { "epoch": 0.27754572016015583, "grad_norm": 1.6171875, "learning_rate": 1.9626622656276343e-05, "loss": 0.6648, "step": 1603 }, { "epoch": 0.2777188615950655, "grad_norm": 1.3203125, "learning_rate": 1.962612876856221e-05, "loss": 0.5776, "step": 1604 }, { "epoch": 0.27789200302997513, "grad_norm": 1.390625, "learning_rate": 1.962563456063951e-05, "loss": 0.5919, "step": 1605 }, { "epoch": 0.27806514446488473, "grad_norm": 1.375, "learning_rate": 1.962514003252468e-05, "loss": 0.6924, "step": 1606 }, { "epoch": 0.2782382858997944, "grad_norm": 1.3359375, "learning_rate": 1.9624645184234167e-05, "loss": 0.5615, "step": 1607 }, { "epoch": 0.27841142733470403, "grad_norm": 1.4765625, "learning_rate": 1.962415001578444e-05, "loss": 0.603, "step": 1608 }, { "epoch": 0.2785845687696137, "grad_norm": 1.421875, "learning_rate": 1.9623654527191962e-05, "loss": 0.5495, "step": 1609 }, { "epoch": 0.27875771020452333, "grad_norm": 1.515625, "learning_rate": 1.962315871847322e-05, "loss": 0.6015, "step": 1610 }, { "epoch": 0.278930851639433, "grad_norm": 1.40625, "learning_rate": 1.962266258964471e-05, "loss": 0.6138, "step": 1611 }, { "epoch": 0.2791039930743426, "grad_norm": 1.3828125, "learning_rate": 1.9622166140722927e-05, "loss": 0.6182, "step": 1612 }, { "epoch": 0.27927713450925223, "grad_norm": 1.3203125, "learning_rate": 1.962166937172439e-05, "loss": 0.5808, "step": 1613 }, { "epoch": 0.2794502759441619, "grad_norm": 1.4375, "learning_rate": 1.962117228266563e-05, "loss": 0.6609, "step": 1614 }, { "epoch": 0.27962341737907154, "grad_norm": 1.3984375, "learning_rate": 1.962067487356317e-05, "loss": 0.6692, "step": 1615 }, { "epoch": 0.2797965588139812, "grad_norm": 1.34375, "learning_rate": 1.962017714443356e-05, "loss": 0.5567, "step": 1616 }, { "epoch": 0.27996970024889084, "grad_norm": 1.4296875, "learning_rate": 1.9619679095293365e-05, "loss": 0.6232, "step": 1617 }, { "epoch": 0.28014284168380044, "grad_norm": 1.4140625, "learning_rate": 1.9619180726159142e-05, "loss": 0.6395, "step": 1618 }, { "epoch": 0.2803159831187101, "grad_norm": 1.4921875, "learning_rate": 1.9618682037047473e-05, "loss": 0.6608, "step": 1619 }, { "epoch": 0.28048912455361974, "grad_norm": 1.375, "learning_rate": 1.961818302797495e-05, "loss": 0.5987, "step": 1620 }, { "epoch": 0.2806622659885294, "grad_norm": 1.546875, "learning_rate": 1.9617683698958168e-05, "loss": 0.5985, "step": 1621 }, { "epoch": 0.28083540742343904, "grad_norm": 1.3515625, "learning_rate": 1.9617184050013737e-05, "loss": 0.5974, "step": 1622 }, { "epoch": 0.28100854885834864, "grad_norm": 1.2421875, "learning_rate": 1.961668408115828e-05, "loss": 0.6355, "step": 1623 }, { "epoch": 0.2811816902932583, "grad_norm": 1.5234375, "learning_rate": 1.9616183792408427e-05, "loss": 0.5964, "step": 1624 }, { "epoch": 0.28135483172816794, "grad_norm": 1.359375, "learning_rate": 1.9615683183780816e-05, "loss": 0.617, "step": 1625 }, { "epoch": 0.2815279731630776, "grad_norm": 1.296875, "learning_rate": 1.9615182255292107e-05, "loss": 0.5975, "step": 1626 }, { "epoch": 0.28170111459798725, "grad_norm": 1.515625, "learning_rate": 1.9614681006958957e-05, "loss": 0.6801, "step": 1627 }, { "epoch": 0.2818742560328969, "grad_norm": 1.3984375, "learning_rate": 1.961417943879804e-05, "loss": 0.6457, "step": 1628 }, { "epoch": 0.2820473974678065, "grad_norm": 1.453125, "learning_rate": 1.9613677550826053e-05, "loss": 0.6438, "step": 1629 }, { "epoch": 0.28222053890271614, "grad_norm": 1.3671875, "learning_rate": 1.961317534305967e-05, "loss": 0.6481, "step": 1630 }, { "epoch": 0.2823936803376258, "grad_norm": 1.609375, "learning_rate": 1.9612672815515613e-05, "loss": 0.6539, "step": 1631 }, { "epoch": 0.28256682177253545, "grad_norm": 1.4375, "learning_rate": 1.961216996821059e-05, "loss": 0.642, "step": 1632 }, { "epoch": 0.2827399632074451, "grad_norm": 1.3359375, "learning_rate": 1.961166680116133e-05, "loss": 0.61, "step": 1633 }, { "epoch": 0.2829131046423547, "grad_norm": 1.4453125, "learning_rate": 1.9611163314384574e-05, "loss": 0.6374, "step": 1634 }, { "epoch": 0.28308624607726435, "grad_norm": 1.46875, "learning_rate": 1.9610659507897065e-05, "loss": 0.5924, "step": 1635 }, { "epoch": 0.283259387512174, "grad_norm": 1.4140625, "learning_rate": 1.9610155381715565e-05, "loss": 0.6033, "step": 1636 }, { "epoch": 0.28343252894708365, "grad_norm": 1.5078125, "learning_rate": 1.9609650935856847e-05, "loss": 0.6406, "step": 1637 }, { "epoch": 0.2836056703819933, "grad_norm": 1.3125, "learning_rate": 1.960914617033768e-05, "loss": 0.6213, "step": 1638 }, { "epoch": 0.28377881181690295, "grad_norm": 1.4765625, "learning_rate": 1.9608641085174868e-05, "loss": 0.6673, "step": 1639 }, { "epoch": 0.28395195325181255, "grad_norm": 1.3671875, "learning_rate": 1.9608135680385204e-05, "loss": 0.6012, "step": 1640 }, { "epoch": 0.2841250946867222, "grad_norm": 1.4609375, "learning_rate": 1.9607629955985502e-05, "loss": 0.589, "step": 1641 }, { "epoch": 0.28429823612163185, "grad_norm": 1.3515625, "learning_rate": 1.9607123911992585e-05, "loss": 0.5458, "step": 1642 }, { "epoch": 0.2844713775565415, "grad_norm": 1.4453125, "learning_rate": 1.9606617548423282e-05, "loss": 0.5692, "step": 1643 }, { "epoch": 0.28464451899145116, "grad_norm": 1.53125, "learning_rate": 1.9606110865294448e-05, "loss": 0.6457, "step": 1644 }, { "epoch": 0.2848176604263608, "grad_norm": 1.375, "learning_rate": 1.960560386262293e-05, "loss": 0.5978, "step": 1645 }, { "epoch": 0.2849908018612704, "grad_norm": 1.4453125, "learning_rate": 1.960509654042559e-05, "loss": 0.6629, "step": 1646 }, { "epoch": 0.28516394329618006, "grad_norm": 1.4296875, "learning_rate": 1.9604588898719314e-05, "loss": 0.6483, "step": 1647 }, { "epoch": 0.2853370847310897, "grad_norm": 1.515625, "learning_rate": 1.9604080937520975e-05, "loss": 0.6224, "step": 1648 }, { "epoch": 0.28551022616599936, "grad_norm": 1.4609375, "learning_rate": 1.9603572656847484e-05, "loss": 0.6495, "step": 1649 }, { "epoch": 0.285683367600909, "grad_norm": 1.40625, "learning_rate": 1.960306405671574e-05, "loss": 0.6008, "step": 1650 }, { "epoch": 0.2858565090358186, "grad_norm": 1.421875, "learning_rate": 1.9602555137142662e-05, "loss": 0.6105, "step": 1651 }, { "epoch": 0.28602965047072826, "grad_norm": 1.390625, "learning_rate": 1.9602045898145178e-05, "loss": 0.6397, "step": 1652 }, { "epoch": 0.2862027919056379, "grad_norm": 1.390625, "learning_rate": 1.9601536339740233e-05, "loss": 0.6618, "step": 1653 }, { "epoch": 0.28637593334054756, "grad_norm": 1.3671875, "learning_rate": 1.9601026461944778e-05, "loss": 0.5997, "step": 1654 }, { "epoch": 0.2865490747754572, "grad_norm": 1.375, "learning_rate": 1.9600516264775764e-05, "loss": 0.6706, "step": 1655 }, { "epoch": 0.28672221621036686, "grad_norm": 1.53125, "learning_rate": 1.960000574825017e-05, "loss": 0.6436, "step": 1656 }, { "epoch": 0.28689535764527646, "grad_norm": 1.4140625, "learning_rate": 1.959949491238498e-05, "loss": 0.5771, "step": 1657 }, { "epoch": 0.2870684990801861, "grad_norm": 1.3671875, "learning_rate": 1.9598983757197182e-05, "loss": 0.5442, "step": 1658 }, { "epoch": 0.28724164051509576, "grad_norm": 1.5078125, "learning_rate": 1.9598472282703777e-05, "loss": 0.6415, "step": 1659 }, { "epoch": 0.2874147819500054, "grad_norm": 1.5703125, "learning_rate": 1.9597960488921785e-05, "loss": 0.7169, "step": 1660 }, { "epoch": 0.28758792338491507, "grad_norm": 1.484375, "learning_rate": 1.959744837586823e-05, "loss": 0.6138, "step": 1661 }, { "epoch": 0.2877610648198247, "grad_norm": 1.3125, "learning_rate": 1.9596935943560145e-05, "loss": 0.5891, "step": 1662 }, { "epoch": 0.2879342062547343, "grad_norm": 1.3125, "learning_rate": 1.9596423192014574e-05, "loss": 0.5892, "step": 1663 }, { "epoch": 0.28810734768964397, "grad_norm": 1.3828125, "learning_rate": 1.9595910121248576e-05, "loss": 0.5661, "step": 1664 }, { "epoch": 0.2882804891245536, "grad_norm": 1.3515625, "learning_rate": 1.9595396731279218e-05, "loss": 0.5966, "step": 1665 }, { "epoch": 0.28845363055946327, "grad_norm": 1.4609375, "learning_rate": 1.959488302212358e-05, "loss": 0.6304, "step": 1666 }, { "epoch": 0.2886267719943729, "grad_norm": 1.4765625, "learning_rate": 1.9594368993798745e-05, "loss": 0.6203, "step": 1667 }, { "epoch": 0.2887999134292825, "grad_norm": 1.5546875, "learning_rate": 1.9593854646321815e-05, "loss": 0.6313, "step": 1668 }, { "epoch": 0.28897305486419217, "grad_norm": 1.3359375, "learning_rate": 1.95933399797099e-05, "loss": 0.5528, "step": 1669 }, { "epoch": 0.2891461962991018, "grad_norm": 1.4375, "learning_rate": 1.9592824993980115e-05, "loss": 0.5532, "step": 1670 }, { "epoch": 0.2893193377340115, "grad_norm": 1.9375, "learning_rate": 1.9592309689149597e-05, "loss": 0.5872, "step": 1671 }, { "epoch": 0.2894924791689211, "grad_norm": 1.609375, "learning_rate": 1.959179406523549e-05, "loss": 0.6582, "step": 1672 }, { "epoch": 0.2896656206038308, "grad_norm": 1.515625, "learning_rate": 1.9591278122254938e-05, "loss": 0.6351, "step": 1673 }, { "epoch": 0.28983876203874037, "grad_norm": 1.4140625, "learning_rate": 1.9590761860225105e-05, "loss": 0.6106, "step": 1674 }, { "epoch": 0.29001190347365, "grad_norm": 1.4765625, "learning_rate": 1.9590245279163168e-05, "loss": 0.6772, "step": 1675 }, { "epoch": 0.2901850449085597, "grad_norm": 1.40625, "learning_rate": 1.958972837908631e-05, "loss": 0.5505, "step": 1676 }, { "epoch": 0.2903581863434693, "grad_norm": 1.328125, "learning_rate": 1.9589211160011722e-05, "loss": 0.5901, "step": 1677 }, { "epoch": 0.290531327778379, "grad_norm": 1.4140625, "learning_rate": 1.9588693621956613e-05, "loss": 0.581, "step": 1678 }, { "epoch": 0.29070446921328863, "grad_norm": 1.40625, "learning_rate": 1.9588175764938197e-05, "loss": 0.6213, "step": 1679 }, { "epoch": 0.2908776106481982, "grad_norm": 1.34375, "learning_rate": 1.9587657588973702e-05, "loss": 0.5658, "step": 1680 }, { "epoch": 0.2910507520831079, "grad_norm": 1.3671875, "learning_rate": 1.958713909408036e-05, "loss": 0.6372, "step": 1681 }, { "epoch": 0.29122389351801753, "grad_norm": 1.5078125, "learning_rate": 1.9586620280275424e-05, "loss": 0.6864, "step": 1682 }, { "epoch": 0.2913970349529272, "grad_norm": 1.375, "learning_rate": 1.958610114757615e-05, "loss": 0.5299, "step": 1683 }, { "epoch": 0.29157017638783683, "grad_norm": 1.390625, "learning_rate": 1.9585581695999806e-05, "loss": 0.6502, "step": 1684 }, { "epoch": 0.29174331782274643, "grad_norm": 1.4296875, "learning_rate": 1.9585061925563672e-05, "loss": 0.6057, "step": 1685 }, { "epoch": 0.2919164592576561, "grad_norm": 1.484375, "learning_rate": 1.9584541836285035e-05, "loss": 0.7, "step": 1686 }, { "epoch": 0.29208960069256573, "grad_norm": 1.3359375, "learning_rate": 1.95840214281812e-05, "loss": 0.664, "step": 1687 }, { "epoch": 0.2922627421274754, "grad_norm": 1.421875, "learning_rate": 1.9583500701269476e-05, "loss": 0.6109, "step": 1688 }, { "epoch": 0.29243588356238504, "grad_norm": 1.5, "learning_rate": 1.958297965556719e-05, "loss": 0.6079, "step": 1689 }, { "epoch": 0.2926090249972947, "grad_norm": 1.4296875, "learning_rate": 1.9582458291091664e-05, "loss": 0.6788, "step": 1690 }, { "epoch": 0.2927821664322043, "grad_norm": 1.4453125, "learning_rate": 1.9581936607860247e-05, "loss": 0.6704, "step": 1691 }, { "epoch": 0.29295530786711393, "grad_norm": 1.40625, "learning_rate": 1.958141460589029e-05, "loss": 0.6086, "step": 1692 }, { "epoch": 0.2931284493020236, "grad_norm": 1.40625, "learning_rate": 1.9580892285199167e-05, "loss": 0.5926, "step": 1693 }, { "epoch": 0.29330159073693324, "grad_norm": 1.5859375, "learning_rate": 1.958036964580424e-05, "loss": 0.6041, "step": 1694 }, { "epoch": 0.2934747321718429, "grad_norm": 1.6875, "learning_rate": 1.9579846687722897e-05, "loss": 0.6289, "step": 1695 }, { "epoch": 0.29364787360675254, "grad_norm": 1.359375, "learning_rate": 1.9579323410972536e-05, "loss": 0.5762, "step": 1696 }, { "epoch": 0.29382101504166214, "grad_norm": 1.484375, "learning_rate": 1.957879981557057e-05, "loss": 0.7195, "step": 1697 }, { "epoch": 0.2939941564765718, "grad_norm": 1.34375, "learning_rate": 1.95782759015344e-05, "loss": 0.574, "step": 1698 }, { "epoch": 0.29416729791148144, "grad_norm": 1.4765625, "learning_rate": 1.957775166888147e-05, "loss": 0.6805, "step": 1699 }, { "epoch": 0.2943404393463911, "grad_norm": 1.390625, "learning_rate": 1.957722711762921e-05, "loss": 0.563, "step": 1700 }, { "epoch": 0.29451358078130074, "grad_norm": 1.546875, "learning_rate": 1.9576702247795075e-05, "loss": 0.6525, "step": 1701 }, { "epoch": 0.29468672221621034, "grad_norm": 1.4765625, "learning_rate": 1.9576177059396516e-05, "loss": 0.6167, "step": 1702 }, { "epoch": 0.29485986365112, "grad_norm": 1.390625, "learning_rate": 1.957565155245101e-05, "loss": 0.6338, "step": 1703 }, { "epoch": 0.29503300508602964, "grad_norm": 1.4765625, "learning_rate": 1.9575125726976036e-05, "loss": 0.6519, "step": 1704 }, { "epoch": 0.2952061465209393, "grad_norm": 1.3828125, "learning_rate": 1.9574599582989082e-05, "loss": 0.5722, "step": 1705 }, { "epoch": 0.29537928795584895, "grad_norm": 1.359375, "learning_rate": 1.9574073120507654e-05, "loss": 0.594, "step": 1706 }, { "epoch": 0.2955524293907586, "grad_norm": 1.4375, "learning_rate": 1.957354633954926e-05, "loss": 0.6219, "step": 1707 }, { "epoch": 0.2957255708256682, "grad_norm": 1.4296875, "learning_rate": 1.957301924013143e-05, "loss": 0.6319, "step": 1708 }, { "epoch": 0.29589871226057785, "grad_norm": 1.5, "learning_rate": 1.9572491822271692e-05, "loss": 0.7289, "step": 1709 }, { "epoch": 0.2960718536954875, "grad_norm": 1.265625, "learning_rate": 1.9571964085987593e-05, "loss": 0.5601, "step": 1710 }, { "epoch": 0.29624499513039715, "grad_norm": 1.390625, "learning_rate": 1.957143603129669e-05, "loss": 0.6439, "step": 1711 }, { "epoch": 0.2964181365653068, "grad_norm": 1.3125, "learning_rate": 1.957090765821654e-05, "loss": 0.5752, "step": 1712 }, { "epoch": 0.29659127800021645, "grad_norm": 1.4921875, "learning_rate": 1.957037896676473e-05, "loss": 0.6079, "step": 1713 }, { "epoch": 0.29676441943512605, "grad_norm": 1.3125, "learning_rate": 1.9569849956958834e-05, "loss": 0.6319, "step": 1714 }, { "epoch": 0.2969375608700357, "grad_norm": 1.4765625, "learning_rate": 1.9569320628816464e-05, "loss": 0.6837, "step": 1715 }, { "epoch": 0.29711070230494535, "grad_norm": 1.8359375, "learning_rate": 1.9568790982355216e-05, "loss": 0.6408, "step": 1716 }, { "epoch": 0.297283843739855, "grad_norm": 1.4609375, "learning_rate": 1.9568261017592715e-05, "loss": 0.6865, "step": 1717 }, { "epoch": 0.29745698517476465, "grad_norm": 1.375, "learning_rate": 1.9567730734546587e-05, "loss": 0.6029, "step": 1718 }, { "epoch": 0.29763012660967425, "grad_norm": 1.40625, "learning_rate": 1.9567200133234476e-05, "loss": 0.6514, "step": 1719 }, { "epoch": 0.2978032680445839, "grad_norm": 1.4609375, "learning_rate": 1.9566669213674024e-05, "loss": 0.5929, "step": 1720 }, { "epoch": 0.29797640947949355, "grad_norm": 1.4609375, "learning_rate": 1.9566137975882898e-05, "loss": 0.6156, "step": 1721 }, { "epoch": 0.2981495509144032, "grad_norm": 1.4375, "learning_rate": 1.956560641987877e-05, "loss": 0.6327, "step": 1722 }, { "epoch": 0.29832269234931286, "grad_norm": 1.5, "learning_rate": 1.9565074545679318e-05, "loss": 0.5782, "step": 1723 }, { "epoch": 0.2984958337842225, "grad_norm": 1.5078125, "learning_rate": 1.9564542353302233e-05, "loss": 0.53, "step": 1724 }, { "epoch": 0.2986689752191321, "grad_norm": 1.3203125, "learning_rate": 1.9564009842765225e-05, "loss": 0.5504, "step": 1725 }, { "epoch": 0.29884211665404176, "grad_norm": 1.5625, "learning_rate": 1.9563477014086006e-05, "loss": 0.7245, "step": 1726 }, { "epoch": 0.2990152580889514, "grad_norm": 1.3984375, "learning_rate": 1.9562943867282296e-05, "loss": 0.5745, "step": 1727 }, { "epoch": 0.29918839952386106, "grad_norm": 1.484375, "learning_rate": 1.9562410402371833e-05, "loss": 0.5726, "step": 1728 }, { "epoch": 0.2993615409587707, "grad_norm": 1.5078125, "learning_rate": 1.9561876619372362e-05, "loss": 0.5854, "step": 1729 }, { "epoch": 0.29953468239368036, "grad_norm": 1.3984375, "learning_rate": 1.9561342518301636e-05, "loss": 0.5804, "step": 1730 }, { "epoch": 0.29970782382858996, "grad_norm": 1.4765625, "learning_rate": 1.9560808099177427e-05, "loss": 0.612, "step": 1731 }, { "epoch": 0.2998809652634996, "grad_norm": 1.3671875, "learning_rate": 1.9560273362017514e-05, "loss": 0.5852, "step": 1732 }, { "epoch": 0.30005410669840926, "grad_norm": 1.5234375, "learning_rate": 1.9559738306839673e-05, "loss": 0.6473, "step": 1733 }, { "epoch": 0.3002272481333189, "grad_norm": 1.4609375, "learning_rate": 1.9559202933661713e-05, "loss": 0.6302, "step": 1734 }, { "epoch": 0.30040038956822857, "grad_norm": 1.7734375, "learning_rate": 1.955866724250144e-05, "loss": 0.6699, "step": 1735 }, { "epoch": 0.30057353100313816, "grad_norm": 1.4375, "learning_rate": 1.9558131233376672e-05, "loss": 0.6917, "step": 1736 }, { "epoch": 0.3007466724380478, "grad_norm": 1.3515625, "learning_rate": 1.955759490630524e-05, "loss": 0.591, "step": 1737 }, { "epoch": 0.30091981387295746, "grad_norm": 1.359375, "learning_rate": 1.955705826130499e-05, "loss": 0.6362, "step": 1738 }, { "epoch": 0.3010929553078671, "grad_norm": 1.6171875, "learning_rate": 1.9556521298393762e-05, "loss": 0.6788, "step": 1739 }, { "epoch": 0.30126609674277677, "grad_norm": 1.5, "learning_rate": 1.955598401758943e-05, "loss": 0.6279, "step": 1740 }, { "epoch": 0.3014392381776864, "grad_norm": 1.4765625, "learning_rate": 1.9555446418909856e-05, "loss": 0.6069, "step": 1741 }, { "epoch": 0.301612379612596, "grad_norm": 1.3359375, "learning_rate": 1.9554908502372927e-05, "loss": 0.5537, "step": 1742 }, { "epoch": 0.30178552104750567, "grad_norm": 1.453125, "learning_rate": 1.9554370267996537e-05, "loss": 0.6488, "step": 1743 }, { "epoch": 0.3019586624824153, "grad_norm": 1.46875, "learning_rate": 1.9553831715798593e-05, "loss": 0.6925, "step": 1744 }, { "epoch": 0.30213180391732497, "grad_norm": 1.3359375, "learning_rate": 1.9553292845797004e-05, "loss": 0.5724, "step": 1745 }, { "epoch": 0.3023049453522346, "grad_norm": 1.265625, "learning_rate": 1.95527536580097e-05, "loss": 0.5783, "step": 1746 }, { "epoch": 0.3024780867871443, "grad_norm": 1.4765625, "learning_rate": 1.9552214152454615e-05, "loss": 0.5711, "step": 1747 }, { "epoch": 0.30265122822205387, "grad_norm": 1.4375, "learning_rate": 1.9551674329149692e-05, "loss": 0.6422, "step": 1748 }, { "epoch": 0.3028243696569635, "grad_norm": 1.4921875, "learning_rate": 1.9551134188112895e-05, "loss": 0.6587, "step": 1749 }, { "epoch": 0.3029975110918732, "grad_norm": 1.515625, "learning_rate": 1.9550593729362185e-05, "loss": 0.634, "step": 1750 }, { "epoch": 0.3031706525267828, "grad_norm": 1.3359375, "learning_rate": 1.9550052952915545e-05, "loss": 0.5858, "step": 1751 }, { "epoch": 0.3033437939616925, "grad_norm": 1.421875, "learning_rate": 1.954951185879096e-05, "loss": 0.5858, "step": 1752 }, { "epoch": 0.30351693539660207, "grad_norm": 1.5078125, "learning_rate": 1.954897044700643e-05, "loss": 0.6983, "step": 1753 }, { "epoch": 0.3036900768315117, "grad_norm": 1.390625, "learning_rate": 1.9548428717579967e-05, "loss": 0.6537, "step": 1754 }, { "epoch": 0.3038632182664214, "grad_norm": 1.4296875, "learning_rate": 1.954788667052959e-05, "loss": 0.5692, "step": 1755 }, { "epoch": 0.304036359701331, "grad_norm": 1.4296875, "learning_rate": 1.9547344305873327e-05, "loss": 0.619, "step": 1756 }, { "epoch": 0.3042095011362407, "grad_norm": 1.3515625, "learning_rate": 1.9546801623629227e-05, "loss": 0.6085, "step": 1757 }, { "epoch": 0.30438264257115033, "grad_norm": 7.65625, "learning_rate": 1.9546258623815335e-05, "loss": 0.679, "step": 1758 }, { "epoch": 0.3045557840060599, "grad_norm": 1.390625, "learning_rate": 1.9545715306449716e-05, "loss": 0.5719, "step": 1759 }, { "epoch": 0.3047289254409696, "grad_norm": 1.3671875, "learning_rate": 1.9545171671550443e-05, "loss": 0.5785, "step": 1760 }, { "epoch": 0.30490206687587923, "grad_norm": 1.3671875, "learning_rate": 1.9544627719135597e-05, "loss": 0.5913, "step": 1761 }, { "epoch": 0.3050752083107889, "grad_norm": 1.3984375, "learning_rate": 1.954408344922328e-05, "loss": 0.4939, "step": 1762 }, { "epoch": 0.30524834974569853, "grad_norm": 1.3828125, "learning_rate": 1.954353886183159e-05, "loss": 0.5893, "step": 1763 }, { "epoch": 0.3054214911806082, "grad_norm": 1.4296875, "learning_rate": 1.9542993956978647e-05, "loss": 0.6317, "step": 1764 }, { "epoch": 0.3055946326155178, "grad_norm": 1.40625, "learning_rate": 1.954244873468257e-05, "loss": 0.6549, "step": 1765 }, { "epoch": 0.30576777405042743, "grad_norm": 1.4609375, "learning_rate": 1.9541903194961504e-05, "loss": 0.6649, "step": 1766 }, { "epoch": 0.3059409154853371, "grad_norm": 1.296875, "learning_rate": 1.954135733783359e-05, "loss": 0.5829, "step": 1767 }, { "epoch": 0.30611405692024674, "grad_norm": 1.3828125, "learning_rate": 1.954081116331699e-05, "loss": 0.6689, "step": 1768 }, { "epoch": 0.3062871983551564, "grad_norm": 1.3203125, "learning_rate": 1.954026467142987e-05, "loss": 0.6008, "step": 1769 }, { "epoch": 0.306460339790066, "grad_norm": 1.4140625, "learning_rate": 1.9539717862190407e-05, "loss": 0.5921, "step": 1770 }, { "epoch": 0.30663348122497563, "grad_norm": 1.3203125, "learning_rate": 1.9539170735616794e-05, "loss": 0.5868, "step": 1771 }, { "epoch": 0.3068066226598853, "grad_norm": 1.296875, "learning_rate": 1.953862329172723e-05, "loss": 0.6091, "step": 1772 }, { "epoch": 0.30697976409479494, "grad_norm": 1.4453125, "learning_rate": 1.9538075530539924e-05, "loss": 0.6055, "step": 1773 }, { "epoch": 0.3071529055297046, "grad_norm": 1.359375, "learning_rate": 1.9537527452073096e-05, "loss": 0.6081, "step": 1774 }, { "epoch": 0.30732604696461424, "grad_norm": 1.3828125, "learning_rate": 1.9536979056344983e-05, "loss": 0.5941, "step": 1775 }, { "epoch": 0.30749918839952384, "grad_norm": 1.3828125, "learning_rate": 1.953643034337382e-05, "loss": 0.5477, "step": 1776 }, { "epoch": 0.3076723298344335, "grad_norm": 1.4140625, "learning_rate": 1.9535881313177864e-05, "loss": 0.5814, "step": 1777 }, { "epoch": 0.30784547126934314, "grad_norm": 1.34375, "learning_rate": 1.9535331965775376e-05, "loss": 0.6271, "step": 1778 }, { "epoch": 0.3080186127042528, "grad_norm": 1.53125, "learning_rate": 1.953478230118463e-05, "loss": 0.6448, "step": 1779 }, { "epoch": 0.30819175413916244, "grad_norm": 1.3515625, "learning_rate": 1.9534232319423915e-05, "loss": 0.5656, "step": 1780 }, { "epoch": 0.3083648955740721, "grad_norm": 1.375, "learning_rate": 1.9533682020511523e-05, "loss": 0.6179, "step": 1781 }, { "epoch": 0.3085380370089817, "grad_norm": 1.34375, "learning_rate": 1.9533131404465757e-05, "loss": 0.5671, "step": 1782 }, { "epoch": 0.30871117844389134, "grad_norm": 1.3671875, "learning_rate": 1.9532580471304933e-05, "loss": 0.5762, "step": 1783 }, { "epoch": 0.308884319878801, "grad_norm": 1.4140625, "learning_rate": 1.953202922104738e-05, "loss": 0.6359, "step": 1784 }, { "epoch": 0.30905746131371065, "grad_norm": 1.5390625, "learning_rate": 1.9531477653711434e-05, "loss": 0.7064, "step": 1785 }, { "epoch": 0.3092306027486203, "grad_norm": 1.4453125, "learning_rate": 1.9530925769315443e-05, "loss": 0.6412, "step": 1786 }, { "epoch": 0.3094037441835299, "grad_norm": 1.421875, "learning_rate": 1.9530373567877767e-05, "loss": 0.6075, "step": 1787 }, { "epoch": 0.30957688561843955, "grad_norm": 2.859375, "learning_rate": 1.9529821049416774e-05, "loss": 0.6118, "step": 1788 }, { "epoch": 0.3097500270533492, "grad_norm": 1.984375, "learning_rate": 1.9529268213950838e-05, "loss": 0.6136, "step": 1789 }, { "epoch": 0.30992316848825885, "grad_norm": 1.421875, "learning_rate": 1.9528715061498355e-05, "loss": 0.6616, "step": 1790 }, { "epoch": 0.3100963099231685, "grad_norm": 1.46875, "learning_rate": 1.9528161592077725e-05, "loss": 0.7646, "step": 1791 }, { "epoch": 0.31026945135807815, "grad_norm": 1.4765625, "learning_rate": 1.952760780570735e-05, "loss": 0.6184, "step": 1792 }, { "epoch": 0.31044259279298775, "grad_norm": 1.2890625, "learning_rate": 1.9527053702405666e-05, "loss": 0.597, "step": 1793 }, { "epoch": 0.3106157342278974, "grad_norm": 1.453125, "learning_rate": 1.9526499282191094e-05, "loss": 0.5871, "step": 1794 }, { "epoch": 0.31078887566280705, "grad_norm": 1.3828125, "learning_rate": 1.952594454508208e-05, "loss": 0.6, "step": 1795 }, { "epoch": 0.3109620170977167, "grad_norm": 1.46875, "learning_rate": 1.952538949109708e-05, "loss": 0.6373, "step": 1796 }, { "epoch": 0.31113515853262635, "grad_norm": 1.5234375, "learning_rate": 1.952483412025455e-05, "loss": 0.6455, "step": 1797 }, { "epoch": 0.311308299967536, "grad_norm": 1.3125, "learning_rate": 1.9524278432572976e-05, "loss": 0.5741, "step": 1798 }, { "epoch": 0.3114814414024456, "grad_norm": 1.296875, "learning_rate": 1.9523722428070832e-05, "loss": 0.5968, "step": 1799 }, { "epoch": 0.31165458283735525, "grad_norm": 1.3203125, "learning_rate": 1.9523166106766616e-05, "loss": 0.6035, "step": 1800 }, { "epoch": 0.3118277242722649, "grad_norm": 1.3515625, "learning_rate": 1.9522609468678833e-05, "loss": 0.5875, "step": 1801 }, { "epoch": 0.31200086570717456, "grad_norm": 1.3359375, "learning_rate": 1.9522052513826e-05, "loss": 0.5922, "step": 1802 }, { "epoch": 0.3121740071420842, "grad_norm": 1.390625, "learning_rate": 1.9521495242226648e-05, "loss": 0.5877, "step": 1803 }, { "epoch": 0.3123471485769938, "grad_norm": 1.4375, "learning_rate": 1.952093765389931e-05, "loss": 0.5801, "step": 1804 }, { "epoch": 0.31252029001190346, "grad_norm": 1.578125, "learning_rate": 1.9520379748862536e-05, "loss": 0.6642, "step": 1805 }, { "epoch": 0.3126934314468131, "grad_norm": 1.4453125, "learning_rate": 1.9519821527134885e-05, "loss": 0.701, "step": 1806 }, { "epoch": 0.31286657288172276, "grad_norm": 1.453125, "learning_rate": 1.9519262988734923e-05, "loss": 0.6699, "step": 1807 }, { "epoch": 0.3130397143166324, "grad_norm": 1.4609375, "learning_rate": 1.9518704133681228e-05, "loss": 0.6649, "step": 1808 }, { "epoch": 0.31321285575154206, "grad_norm": 1.34375, "learning_rate": 1.9518144961992393e-05, "loss": 0.5819, "step": 1809 }, { "epoch": 0.31338599718645166, "grad_norm": 1.3125, "learning_rate": 1.951758547368702e-05, "loss": 0.6772, "step": 1810 }, { "epoch": 0.3135591386213613, "grad_norm": 1.4609375, "learning_rate": 1.951702566878372e-05, "loss": 0.6494, "step": 1811 }, { "epoch": 0.31373228005627096, "grad_norm": 1.3125, "learning_rate": 1.9516465547301112e-05, "loss": 0.5609, "step": 1812 }, { "epoch": 0.3139054214911806, "grad_norm": 1.25, "learning_rate": 1.9515905109257832e-05, "loss": 0.5295, "step": 1813 }, { "epoch": 0.31407856292609027, "grad_norm": 1.5234375, "learning_rate": 1.951534435467252e-05, "loss": 0.6169, "step": 1814 }, { "epoch": 0.3142517043609999, "grad_norm": 1.3828125, "learning_rate": 1.9514783283563824e-05, "loss": 0.5988, "step": 1815 }, { "epoch": 0.3144248457959095, "grad_norm": 1.4453125, "learning_rate": 1.9514221895950416e-05, "loss": 0.5948, "step": 1816 }, { "epoch": 0.31459798723081916, "grad_norm": 1.3828125, "learning_rate": 1.951366019185097e-05, "loss": 0.6255, "step": 1817 }, { "epoch": 0.3147711286657288, "grad_norm": 1.390625, "learning_rate": 1.9513098171284166e-05, "loss": 0.6475, "step": 1818 }, { "epoch": 0.31494427010063847, "grad_norm": 1.5, "learning_rate": 1.95125358342687e-05, "loss": 0.6379, "step": 1819 }, { "epoch": 0.3151174115355481, "grad_norm": 1.484375, "learning_rate": 1.9511973180823284e-05, "loss": 0.6431, "step": 1820 }, { "epoch": 0.3152905529704577, "grad_norm": 1.3359375, "learning_rate": 1.9511410210966625e-05, "loss": 0.6227, "step": 1821 }, { "epoch": 0.31546369440536737, "grad_norm": 1.3515625, "learning_rate": 1.9510846924717458e-05, "loss": 0.5758, "step": 1822 }, { "epoch": 0.315636835840277, "grad_norm": 1.546875, "learning_rate": 1.9510283322094516e-05, "loss": 0.6102, "step": 1823 }, { "epoch": 0.31580997727518667, "grad_norm": 1.3046875, "learning_rate": 1.9509719403116548e-05, "loss": 0.6228, "step": 1824 }, { "epoch": 0.3159831187100963, "grad_norm": 1.515625, "learning_rate": 1.9509155167802316e-05, "loss": 0.618, "step": 1825 }, { "epoch": 0.316156260145006, "grad_norm": 1.328125, "learning_rate": 1.950859061617058e-05, "loss": 0.5972, "step": 1826 }, { "epoch": 0.31632940157991557, "grad_norm": 1.46875, "learning_rate": 1.950802574824013e-05, "loss": 0.6012, "step": 1827 }, { "epoch": 0.3165025430148252, "grad_norm": 1.421875, "learning_rate": 1.950746056402975e-05, "loss": 0.5888, "step": 1828 }, { "epoch": 0.3166756844497349, "grad_norm": 1.390625, "learning_rate": 1.950689506355824e-05, "loss": 0.5928, "step": 1829 }, { "epoch": 0.3168488258846445, "grad_norm": 1.3984375, "learning_rate": 1.9506329246844414e-05, "loss": 0.6451, "step": 1830 }, { "epoch": 0.3170219673195542, "grad_norm": 1.3203125, "learning_rate": 1.950576311390709e-05, "loss": 0.5822, "step": 1831 }, { "epoch": 0.31719510875446383, "grad_norm": 1.6484375, "learning_rate": 1.9505196664765108e-05, "loss": 0.6615, "step": 1832 }, { "epoch": 0.3173682501893734, "grad_norm": 1.328125, "learning_rate": 1.9504629899437306e-05, "loss": 0.6345, "step": 1833 }, { "epoch": 0.3175413916242831, "grad_norm": 1.3671875, "learning_rate": 1.9504062817942534e-05, "loss": 0.562, "step": 1834 }, { "epoch": 0.3177145330591927, "grad_norm": 1.4140625, "learning_rate": 1.9503495420299656e-05, "loss": 0.6754, "step": 1835 }, { "epoch": 0.3178876744941024, "grad_norm": 1.359375, "learning_rate": 1.9502927706527552e-05, "loss": 0.5919, "step": 1836 }, { "epoch": 0.31806081592901203, "grad_norm": 1.484375, "learning_rate": 1.9502359676645103e-05, "loss": 0.5816, "step": 1837 }, { "epoch": 0.3182339573639216, "grad_norm": 1.3984375, "learning_rate": 1.9501791330671202e-05, "loss": 0.6126, "step": 1838 }, { "epoch": 0.3184070987988313, "grad_norm": 1.484375, "learning_rate": 1.9501222668624758e-05, "loss": 0.6459, "step": 1839 }, { "epoch": 0.31858024023374093, "grad_norm": 1.2890625, "learning_rate": 1.9500653690524687e-05, "loss": 0.5614, "step": 1840 }, { "epoch": 0.3187533816686506, "grad_norm": 1.4609375, "learning_rate": 1.9500084396389914e-05, "loss": 0.6321, "step": 1841 }, { "epoch": 0.31892652310356023, "grad_norm": 1.484375, "learning_rate": 1.949951478623938e-05, "loss": 0.6071, "step": 1842 }, { "epoch": 0.3190996645384699, "grad_norm": 1.5078125, "learning_rate": 1.949894486009203e-05, "loss": 0.6216, "step": 1843 }, { "epoch": 0.3192728059733795, "grad_norm": 1.4140625, "learning_rate": 1.9498374617966824e-05, "loss": 0.6653, "step": 1844 }, { "epoch": 0.31944594740828913, "grad_norm": 1.40625, "learning_rate": 1.9497804059882728e-05, "loss": 0.6092, "step": 1845 }, { "epoch": 0.3196190888431988, "grad_norm": 1.3515625, "learning_rate": 1.949723318585872e-05, "loss": 0.5908, "step": 1846 }, { "epoch": 0.31979223027810844, "grad_norm": 1.5390625, "learning_rate": 1.9496661995913796e-05, "loss": 0.548, "step": 1847 }, { "epoch": 0.3199653717130181, "grad_norm": 1.3046875, "learning_rate": 1.9496090490066955e-05, "loss": 0.6393, "step": 1848 }, { "epoch": 0.32013851314792774, "grad_norm": 1.390625, "learning_rate": 1.9495518668337204e-05, "loss": 0.6152, "step": 1849 }, { "epoch": 0.32031165458283734, "grad_norm": 1.5390625, "learning_rate": 1.9494946530743565e-05, "loss": 0.7844, "step": 1850 }, { "epoch": 0.320484796017747, "grad_norm": 1.390625, "learning_rate": 1.9494374077305075e-05, "loss": 0.5845, "step": 1851 }, { "epoch": 0.32065793745265664, "grad_norm": 1.3671875, "learning_rate": 1.9493801308040766e-05, "loss": 0.6582, "step": 1852 }, { "epoch": 0.3208310788875663, "grad_norm": 1.484375, "learning_rate": 1.9493228222969702e-05, "loss": 0.6273, "step": 1853 }, { "epoch": 0.32100422032247594, "grad_norm": 1.3046875, "learning_rate": 1.9492654822110942e-05, "loss": 0.6362, "step": 1854 }, { "epoch": 0.32117736175738554, "grad_norm": 1.2734375, "learning_rate": 1.949208110548356e-05, "loss": 0.5871, "step": 1855 }, { "epoch": 0.3213505031922952, "grad_norm": 1.375, "learning_rate": 1.9491507073106638e-05, "loss": 0.5858, "step": 1856 }, { "epoch": 0.32152364462720484, "grad_norm": 1.328125, "learning_rate": 1.9490932724999275e-05, "loss": 0.603, "step": 1857 }, { "epoch": 0.3216967860621145, "grad_norm": 1.296875, "learning_rate": 1.9490358061180577e-05, "loss": 0.5836, "step": 1858 }, { "epoch": 0.32186992749702414, "grad_norm": 1.4375, "learning_rate": 1.9489783081669654e-05, "loss": 0.6505, "step": 1859 }, { "epoch": 0.3220430689319338, "grad_norm": 1.34375, "learning_rate": 1.9489207786485635e-05, "loss": 0.6411, "step": 1860 }, { "epoch": 0.3222162103668434, "grad_norm": 1.46875, "learning_rate": 1.9488632175647663e-05, "loss": 0.5955, "step": 1861 }, { "epoch": 0.32238935180175304, "grad_norm": 1.3359375, "learning_rate": 1.9488056249174874e-05, "loss": 0.6353, "step": 1862 }, { "epoch": 0.3225624932366627, "grad_norm": 1.46875, "learning_rate": 1.9487480007086435e-05, "loss": 0.6288, "step": 1863 }, { "epoch": 0.32273563467157235, "grad_norm": 1.359375, "learning_rate": 1.9486903449401512e-05, "loss": 0.6455, "step": 1864 }, { "epoch": 0.322908776106482, "grad_norm": 1.4140625, "learning_rate": 1.9486326576139283e-05, "loss": 0.5868, "step": 1865 }, { "epoch": 0.32308191754139165, "grad_norm": 1.375, "learning_rate": 1.9485749387318937e-05, "loss": 0.5853, "step": 1866 }, { "epoch": 0.32325505897630125, "grad_norm": 1.359375, "learning_rate": 1.948517188295968e-05, "loss": 0.553, "step": 1867 }, { "epoch": 0.3234282004112109, "grad_norm": 1.359375, "learning_rate": 1.948459406308071e-05, "loss": 0.5739, "step": 1868 }, { "epoch": 0.32360134184612055, "grad_norm": 1.4140625, "learning_rate": 1.9484015927701258e-05, "loss": 0.5608, "step": 1869 }, { "epoch": 0.3237744832810302, "grad_norm": 1.3984375, "learning_rate": 1.9483437476840554e-05, "loss": 0.5397, "step": 1870 }, { "epoch": 0.32394762471593985, "grad_norm": 1.5, "learning_rate": 1.9482858710517834e-05, "loss": 0.6014, "step": 1871 }, { "epoch": 0.32412076615084945, "grad_norm": 1.421875, "learning_rate": 1.9482279628752358e-05, "loss": 0.7158, "step": 1872 }, { "epoch": 0.3242939075857591, "grad_norm": 1.5, "learning_rate": 1.9481700231563385e-05, "loss": 0.6365, "step": 1873 }, { "epoch": 0.32446704902066875, "grad_norm": 1.390625, "learning_rate": 1.948112051897019e-05, "loss": 0.6032, "step": 1874 }, { "epoch": 0.3246401904555784, "grad_norm": 1.3203125, "learning_rate": 1.9480540490992057e-05, "loss": 0.5834, "step": 1875 }, { "epoch": 0.32481333189048806, "grad_norm": 1.484375, "learning_rate": 1.9479960147648275e-05, "loss": 0.6356, "step": 1876 }, { "epoch": 0.3249864733253977, "grad_norm": 1.4609375, "learning_rate": 1.947937948895816e-05, "loss": 0.6179, "step": 1877 }, { "epoch": 0.3251596147603073, "grad_norm": 1.390625, "learning_rate": 1.9478798514941014e-05, "loss": 0.5988, "step": 1878 }, { "epoch": 0.32533275619521695, "grad_norm": 1.3359375, "learning_rate": 1.9478217225616173e-05, "loss": 0.5429, "step": 1879 }, { "epoch": 0.3255058976301266, "grad_norm": 1.421875, "learning_rate": 1.9477635621002965e-05, "loss": 0.6088, "step": 1880 }, { "epoch": 0.32567903906503626, "grad_norm": 1.3359375, "learning_rate": 1.9477053701120746e-05, "loss": 0.5994, "step": 1881 }, { "epoch": 0.3258521804999459, "grad_norm": 1.3515625, "learning_rate": 1.947647146598887e-05, "loss": 0.621, "step": 1882 }, { "epoch": 0.32602532193485556, "grad_norm": 1.4765625, "learning_rate": 1.9475888915626697e-05, "loss": 0.604, "step": 1883 }, { "epoch": 0.32619846336976516, "grad_norm": 1.5, "learning_rate": 1.9475306050053616e-05, "loss": 0.6543, "step": 1884 }, { "epoch": 0.3263716048046748, "grad_norm": 1.390625, "learning_rate": 1.947472286928901e-05, "loss": 0.7157, "step": 1885 }, { "epoch": 0.32654474623958446, "grad_norm": 1.3515625, "learning_rate": 1.947413937335228e-05, "loss": 0.5536, "step": 1886 }, { "epoch": 0.3267178876744941, "grad_norm": 1.3828125, "learning_rate": 1.9473555562262838e-05, "loss": 0.6207, "step": 1887 }, { "epoch": 0.32689102910940376, "grad_norm": 1.4140625, "learning_rate": 1.94729714360401e-05, "loss": 0.5319, "step": 1888 }, { "epoch": 0.32706417054431336, "grad_norm": 1.3671875, "learning_rate": 1.9472386994703497e-05, "loss": 0.601, "step": 1889 }, { "epoch": 0.327237311979223, "grad_norm": 1.3046875, "learning_rate": 1.947180223827247e-05, "loss": 0.5934, "step": 1890 }, { "epoch": 0.32741045341413266, "grad_norm": 1.3203125, "learning_rate": 1.9471217166766477e-05, "loss": 0.6534, "step": 1891 }, { "epoch": 0.3275835948490423, "grad_norm": 1.3359375, "learning_rate": 1.9470631780204973e-05, "loss": 0.5555, "step": 1892 }, { "epoch": 0.32775673628395197, "grad_norm": 1.3203125, "learning_rate": 1.947004607860743e-05, "loss": 0.5685, "step": 1893 }, { "epoch": 0.3279298777188616, "grad_norm": 1.546875, "learning_rate": 1.9469460061993336e-05, "loss": 0.6726, "step": 1894 }, { "epoch": 0.3281030191537712, "grad_norm": 1.3203125, "learning_rate": 1.9468873730382184e-05, "loss": 0.6063, "step": 1895 }, { "epoch": 0.32827616058868087, "grad_norm": 1.5859375, "learning_rate": 1.9468287083793472e-05, "loss": 0.5874, "step": 1896 }, { "epoch": 0.3284493020235905, "grad_norm": 1.390625, "learning_rate": 1.9467700122246726e-05, "loss": 0.5808, "step": 1897 }, { "epoch": 0.32862244345850017, "grad_norm": 1.265625, "learning_rate": 1.9467112845761457e-05, "loss": 0.5428, "step": 1898 }, { "epoch": 0.3287955848934098, "grad_norm": 1.484375, "learning_rate": 1.946652525435721e-05, "loss": 0.6943, "step": 1899 }, { "epoch": 0.32896872632831947, "grad_norm": 1.359375, "learning_rate": 1.946593734805353e-05, "loss": 0.5815, "step": 1900 }, { "epoch": 0.32914186776322907, "grad_norm": 1.4140625, "learning_rate": 1.946534912686997e-05, "loss": 0.6072, "step": 1901 }, { "epoch": 0.3293150091981387, "grad_norm": 1.53125, "learning_rate": 1.94647605908261e-05, "loss": 0.6437, "step": 1902 }, { "epoch": 0.32948815063304837, "grad_norm": 1.3828125, "learning_rate": 1.9464171739941494e-05, "loss": 0.6004, "step": 1903 }, { "epoch": 0.329661292067958, "grad_norm": 1.4765625, "learning_rate": 1.9463582574235743e-05, "loss": 0.6354, "step": 1904 }, { "epoch": 0.3298344335028677, "grad_norm": 1.3828125, "learning_rate": 1.9462993093728445e-05, "loss": 0.5905, "step": 1905 }, { "epoch": 0.33000757493777727, "grad_norm": 1.375, "learning_rate": 1.9462403298439208e-05, "loss": 0.5872, "step": 1906 }, { "epoch": 0.3301807163726869, "grad_norm": 1.3125, "learning_rate": 1.9461813188387652e-05, "loss": 0.6222, "step": 1907 }, { "epoch": 0.3303538578075966, "grad_norm": 1.2890625, "learning_rate": 1.9461222763593405e-05, "loss": 0.577, "step": 1908 }, { "epoch": 0.3305269992425062, "grad_norm": 1.3984375, "learning_rate": 1.946063202407611e-05, "loss": 0.5447, "step": 1909 }, { "epoch": 0.3307001406774159, "grad_norm": 1.3359375, "learning_rate": 1.9460040969855414e-05, "loss": 0.5639, "step": 1910 }, { "epoch": 0.33087328211232553, "grad_norm": 1.5078125, "learning_rate": 1.945944960095098e-05, "loss": 0.5697, "step": 1911 }, { "epoch": 0.3310464235472351, "grad_norm": 1.203125, "learning_rate": 1.945885791738248e-05, "loss": 0.4928, "step": 1912 }, { "epoch": 0.3312195649821448, "grad_norm": 1.453125, "learning_rate": 1.9458265919169594e-05, "loss": 0.6254, "step": 1913 }, { "epoch": 0.33139270641705443, "grad_norm": 1.515625, "learning_rate": 1.9457673606332016e-05, "loss": 0.5712, "step": 1914 }, { "epoch": 0.3315658478519641, "grad_norm": 1.4453125, "learning_rate": 1.9457080978889454e-05, "loss": 0.5623, "step": 1915 }, { "epoch": 0.33173898928687373, "grad_norm": 1.390625, "learning_rate": 1.9456488036861613e-05, "loss": 0.5914, "step": 1916 }, { "epoch": 0.3319121307217834, "grad_norm": 1.421875, "learning_rate": 1.9455894780268223e-05, "loss": 0.6392, "step": 1917 }, { "epoch": 0.332085272156693, "grad_norm": 1.5078125, "learning_rate": 1.9455301209129015e-05, "loss": 0.6305, "step": 1918 }, { "epoch": 0.33225841359160263, "grad_norm": 1.3984375, "learning_rate": 1.9454707323463733e-05, "loss": 0.595, "step": 1919 }, { "epoch": 0.3324315550265123, "grad_norm": 1.3125, "learning_rate": 1.9454113123292133e-05, "loss": 0.5042, "step": 1920 }, { "epoch": 0.33260469646142193, "grad_norm": 1.4375, "learning_rate": 1.9453518608633985e-05, "loss": 0.6248, "step": 1921 }, { "epoch": 0.3327778378963316, "grad_norm": 1.3203125, "learning_rate": 1.9452923779509064e-05, "loss": 0.5965, "step": 1922 }, { "epoch": 0.3329509793312412, "grad_norm": 1.46875, "learning_rate": 1.945232863593715e-05, "loss": 0.6647, "step": 1923 }, { "epoch": 0.33312412076615083, "grad_norm": 1.3359375, "learning_rate": 1.945173317793805e-05, "loss": 0.5826, "step": 1924 }, { "epoch": 0.3332972622010605, "grad_norm": 1.515625, "learning_rate": 1.9451137405531564e-05, "loss": 0.5887, "step": 1925 }, { "epoch": 0.33347040363597014, "grad_norm": 1.421875, "learning_rate": 1.9450541318737514e-05, "loss": 0.6707, "step": 1926 }, { "epoch": 0.3336435450708798, "grad_norm": 1.3984375, "learning_rate": 1.9449944917575727e-05, "loss": 0.6322, "step": 1927 }, { "epoch": 0.33381668650578944, "grad_norm": 1.359375, "learning_rate": 1.9449348202066044e-05, "loss": 0.6183, "step": 1928 }, { "epoch": 0.33398982794069904, "grad_norm": 1.3359375, "learning_rate": 1.9448751172228312e-05, "loss": 0.6016, "step": 1929 }, { "epoch": 0.3341629693756087, "grad_norm": 1.375, "learning_rate": 1.9448153828082387e-05, "loss": 0.5948, "step": 1930 }, { "epoch": 0.33433611081051834, "grad_norm": 1.2578125, "learning_rate": 1.944755616964815e-05, "loss": 0.5352, "step": 1931 }, { "epoch": 0.334509252245428, "grad_norm": 1.6484375, "learning_rate": 1.9446958196945474e-05, "loss": 0.6919, "step": 1932 }, { "epoch": 0.33468239368033764, "grad_norm": 1.3671875, "learning_rate": 1.9446359909994253e-05, "loss": 0.6546, "step": 1933 }, { "epoch": 0.3348555351152473, "grad_norm": 1.359375, "learning_rate": 1.944576130881439e-05, "loss": 0.6585, "step": 1934 }, { "epoch": 0.3350286765501569, "grad_norm": 1.484375, "learning_rate": 1.9445162393425788e-05, "loss": 0.6433, "step": 1935 }, { "epoch": 0.33520181798506654, "grad_norm": 1.375, "learning_rate": 1.9444563163848382e-05, "loss": 0.6353, "step": 1936 }, { "epoch": 0.3353749594199762, "grad_norm": 1.28125, "learning_rate": 1.9443963620102098e-05, "loss": 0.6182, "step": 1937 }, { "epoch": 0.33554810085488584, "grad_norm": 1.328125, "learning_rate": 1.9443363762206883e-05, "loss": 0.5775, "step": 1938 }, { "epoch": 0.3357212422897955, "grad_norm": 1.4375, "learning_rate": 1.944276359018269e-05, "loss": 0.753, "step": 1939 }, { "epoch": 0.3358943837247051, "grad_norm": 1.4375, "learning_rate": 1.944216310404948e-05, "loss": 0.6727, "step": 1940 }, { "epoch": 0.33606752515961474, "grad_norm": 1.5546875, "learning_rate": 1.944156230382723e-05, "loss": 0.6081, "step": 1941 }, { "epoch": 0.3362406665945244, "grad_norm": 1.4375, "learning_rate": 1.944096118953593e-05, "loss": 0.6706, "step": 1942 }, { "epoch": 0.33641380802943405, "grad_norm": 1.4765625, "learning_rate": 1.944035976119557e-05, "loss": 0.6445, "step": 1943 }, { "epoch": 0.3365869494643437, "grad_norm": 1.3828125, "learning_rate": 1.943975801882616e-05, "loss": 0.7119, "step": 1944 }, { "epoch": 0.33676009089925335, "grad_norm": 1.34375, "learning_rate": 1.943915596244771e-05, "loss": 0.5701, "step": 1945 }, { "epoch": 0.33693323233416295, "grad_norm": 1.3046875, "learning_rate": 1.9438553592080257e-05, "loss": 0.5859, "step": 1946 }, { "epoch": 0.3371063737690726, "grad_norm": 1.34375, "learning_rate": 1.943795090774383e-05, "loss": 0.5882, "step": 1947 }, { "epoch": 0.33727951520398225, "grad_norm": 1.34375, "learning_rate": 1.9437347909458482e-05, "loss": 0.6041, "step": 1948 }, { "epoch": 0.3374526566388919, "grad_norm": 1.25, "learning_rate": 1.943674459724427e-05, "loss": 0.5682, "step": 1949 }, { "epoch": 0.33762579807380155, "grad_norm": 1.4296875, "learning_rate": 1.943614097112126e-05, "loss": 0.6937, "step": 1950 }, { "epoch": 0.3377989395087112, "grad_norm": 1.4375, "learning_rate": 1.9435537031109536e-05, "loss": 0.6561, "step": 1951 }, { "epoch": 0.3379720809436208, "grad_norm": 1.359375, "learning_rate": 1.9434932777229186e-05, "loss": 0.5513, "step": 1952 }, { "epoch": 0.33814522237853045, "grad_norm": 1.265625, "learning_rate": 1.943432820950031e-05, "loss": 0.551, "step": 1953 }, { "epoch": 0.3383183638134401, "grad_norm": 1.3046875, "learning_rate": 1.9433723327943018e-05, "loss": 0.5764, "step": 1954 }, { "epoch": 0.33849150524834976, "grad_norm": 1.46875, "learning_rate": 1.9433118132577432e-05, "loss": 0.6151, "step": 1955 }, { "epoch": 0.3386646466832594, "grad_norm": 1.3359375, "learning_rate": 1.9432512623423686e-05, "loss": 0.5727, "step": 1956 }, { "epoch": 0.338837788118169, "grad_norm": 1.3671875, "learning_rate": 1.9431906800501913e-05, "loss": 0.5468, "step": 1957 }, { "epoch": 0.33901092955307865, "grad_norm": 1.4765625, "learning_rate": 1.943130066383228e-05, "loss": 0.6221, "step": 1958 }, { "epoch": 0.3391840709879883, "grad_norm": 1.2734375, "learning_rate": 1.9430694213434936e-05, "loss": 0.5485, "step": 1959 }, { "epoch": 0.33935721242289796, "grad_norm": 1.3125, "learning_rate": 1.943008744933006e-05, "loss": 0.6172, "step": 1960 }, { "epoch": 0.3395303538578076, "grad_norm": 1.3671875, "learning_rate": 1.9429480371537836e-05, "loss": 0.6448, "step": 1961 }, { "epoch": 0.33970349529271726, "grad_norm": 1.375, "learning_rate": 1.942887298007846e-05, "loss": 0.6322, "step": 1962 }, { "epoch": 0.33987663672762686, "grad_norm": 1.4609375, "learning_rate": 1.9428265274972132e-05, "loss": 0.5917, "step": 1963 }, { "epoch": 0.3400497781625365, "grad_norm": 1.40625, "learning_rate": 1.942765725623907e-05, "loss": 0.6765, "step": 1964 }, { "epoch": 0.34022291959744616, "grad_norm": 1.265625, "learning_rate": 1.94270489238995e-05, "loss": 0.5697, "step": 1965 }, { "epoch": 0.3403960610323558, "grad_norm": 1.390625, "learning_rate": 1.9426440277973655e-05, "loss": 0.5834, "step": 1966 }, { "epoch": 0.34056920246726546, "grad_norm": 1.3515625, "learning_rate": 1.9425831318481784e-05, "loss": 0.5645, "step": 1967 }, { "epoch": 0.3407423439021751, "grad_norm": 1.3515625, "learning_rate": 1.942522204544414e-05, "loss": 0.7482, "step": 1968 }, { "epoch": 0.3409154853370847, "grad_norm": 1.484375, "learning_rate": 1.9424612458880997e-05, "loss": 0.5619, "step": 1969 }, { "epoch": 0.34108862677199436, "grad_norm": 1.484375, "learning_rate": 1.9424002558812627e-05, "loss": 0.622, "step": 1970 }, { "epoch": 0.341261768206904, "grad_norm": 1.375, "learning_rate": 1.942339234525932e-05, "loss": 0.6507, "step": 1971 }, { "epoch": 0.34143490964181367, "grad_norm": 1.4140625, "learning_rate": 1.942278181824137e-05, "loss": 0.6121, "step": 1972 }, { "epoch": 0.3416080510767233, "grad_norm": 1.3984375, "learning_rate": 1.9422170977779093e-05, "loss": 0.6223, "step": 1973 }, { "epoch": 0.3417811925116329, "grad_norm": 1.4140625, "learning_rate": 1.9421559823892805e-05, "loss": 0.6124, "step": 1974 }, { "epoch": 0.34195433394654257, "grad_norm": 1.3671875, "learning_rate": 1.9420948356602836e-05, "loss": 0.6363, "step": 1975 }, { "epoch": 0.3421274753814522, "grad_norm": 1.4453125, "learning_rate": 1.942033657592953e-05, "loss": 0.6124, "step": 1976 }, { "epoch": 0.34230061681636187, "grad_norm": 1.421875, "learning_rate": 1.9419724481893228e-05, "loss": 0.6318, "step": 1977 }, { "epoch": 0.3424737582512715, "grad_norm": 1.453125, "learning_rate": 1.9419112074514296e-05, "loss": 0.6893, "step": 1978 }, { "epoch": 0.34264689968618117, "grad_norm": 1.3984375, "learning_rate": 1.941849935381311e-05, "loss": 0.6426, "step": 1979 }, { "epoch": 0.34282004112109077, "grad_norm": 1.4921875, "learning_rate": 1.941788631981005e-05, "loss": 0.6977, "step": 1980 }, { "epoch": 0.3429931825560004, "grad_norm": 1.3828125, "learning_rate": 1.94172729725255e-05, "loss": 0.6622, "step": 1981 }, { "epoch": 0.34316632399091007, "grad_norm": 1.2734375, "learning_rate": 1.9416659311979874e-05, "loss": 0.5816, "step": 1982 }, { "epoch": 0.3433394654258197, "grad_norm": 1.4140625, "learning_rate": 1.9416045338193576e-05, "loss": 0.6112, "step": 1983 }, { "epoch": 0.3435126068607294, "grad_norm": 1.3359375, "learning_rate": 1.941543105118704e-05, "loss": 0.5943, "step": 1984 }, { "epoch": 0.343685748295639, "grad_norm": 1.5, "learning_rate": 1.9414816450980686e-05, "loss": 0.629, "step": 1985 }, { "epoch": 0.3438588897305486, "grad_norm": 1.3203125, "learning_rate": 1.9414201537594973e-05, "loss": 0.5533, "step": 1986 }, { "epoch": 0.3440320311654583, "grad_norm": 1.390625, "learning_rate": 1.9413586311050344e-05, "loss": 0.6632, "step": 1987 }, { "epoch": 0.3442051726003679, "grad_norm": 1.265625, "learning_rate": 1.941297077136727e-05, "loss": 0.5723, "step": 1988 }, { "epoch": 0.3443783140352776, "grad_norm": 1.4609375, "learning_rate": 1.9412354918566226e-05, "loss": 0.6129, "step": 1989 }, { "epoch": 0.34455145547018723, "grad_norm": 1.2578125, "learning_rate": 1.94117387526677e-05, "loss": 0.5942, "step": 1990 }, { "epoch": 0.3447245969050968, "grad_norm": 1.328125, "learning_rate": 1.9411122273692183e-05, "loss": 0.5757, "step": 1991 }, { "epoch": 0.3448977383400065, "grad_norm": 1.2890625, "learning_rate": 1.9410505481660187e-05, "loss": 0.621, "step": 1992 }, { "epoch": 0.34507087977491613, "grad_norm": 1.4453125, "learning_rate": 1.9409888376592226e-05, "loss": 0.6401, "step": 1993 }, { "epoch": 0.3452440212098258, "grad_norm": 1.296875, "learning_rate": 1.9409270958508828e-05, "loss": 0.5299, "step": 1994 }, { "epoch": 0.34541716264473543, "grad_norm": 1.421875, "learning_rate": 1.9408653227430532e-05, "loss": 0.6742, "step": 1995 }, { "epoch": 0.3455903040796451, "grad_norm": 1.328125, "learning_rate": 1.940803518337789e-05, "loss": 0.6113, "step": 1996 }, { "epoch": 0.3457634455145547, "grad_norm": 1.3671875, "learning_rate": 1.9407416826371456e-05, "loss": 0.5991, "step": 1997 }, { "epoch": 0.34593658694946433, "grad_norm": 1.5625, "learning_rate": 1.94067981564318e-05, "loss": 0.7044, "step": 1998 }, { "epoch": 0.346109728384374, "grad_norm": 1.2421875, "learning_rate": 1.9406179173579507e-05, "loss": 0.6549, "step": 1999 }, { "epoch": 0.34628286981928363, "grad_norm": 1.5234375, "learning_rate": 1.940555987783516e-05, "loss": 0.5808, "step": 2000 }, { "epoch": 0.3464560112541933, "grad_norm": 1.3203125, "learning_rate": 1.940494026921936e-05, "loss": 0.6029, "step": 2001 }, { "epoch": 0.34662915268910294, "grad_norm": 1.4140625, "learning_rate": 1.9404320347752725e-05, "loss": 0.6309, "step": 2002 }, { "epoch": 0.34680229412401253, "grad_norm": 1.453125, "learning_rate": 1.940370011345587e-05, "loss": 0.6042, "step": 2003 }, { "epoch": 0.3469754355589222, "grad_norm": 1.359375, "learning_rate": 1.9403079566349427e-05, "loss": 0.6346, "step": 2004 }, { "epoch": 0.34714857699383184, "grad_norm": 1.3984375, "learning_rate": 1.940245870645404e-05, "loss": 0.645, "step": 2005 }, { "epoch": 0.3473217184287415, "grad_norm": 1.4921875, "learning_rate": 1.9401837533790364e-05, "loss": 0.5784, "step": 2006 }, { "epoch": 0.34749485986365114, "grad_norm": 1.3203125, "learning_rate": 1.9401216048379056e-05, "loss": 0.6117, "step": 2007 }, { "epoch": 0.34766800129856074, "grad_norm": 1.3125, "learning_rate": 1.94005942502408e-05, "loss": 0.5947, "step": 2008 }, { "epoch": 0.3478411427334704, "grad_norm": 1.3515625, "learning_rate": 1.9399972139396267e-05, "loss": 0.6185, "step": 2009 }, { "epoch": 0.34801428416838004, "grad_norm": 1.2890625, "learning_rate": 1.9399349715866155e-05, "loss": 0.5837, "step": 2010 }, { "epoch": 0.3481874256032897, "grad_norm": 1.3046875, "learning_rate": 1.9398726979671174e-05, "loss": 0.6227, "step": 2011 }, { "epoch": 0.34836056703819934, "grad_norm": 1.34375, "learning_rate": 1.9398103930832036e-05, "loss": 0.5783, "step": 2012 }, { "epoch": 0.348533708473109, "grad_norm": 1.3671875, "learning_rate": 1.9397480569369466e-05, "loss": 0.6225, "step": 2013 }, { "epoch": 0.3487068499080186, "grad_norm": 1.4453125, "learning_rate": 1.9396856895304196e-05, "loss": 0.6264, "step": 2014 }, { "epoch": 0.34887999134292824, "grad_norm": 1.5, "learning_rate": 1.939623290865698e-05, "loss": 0.6329, "step": 2015 }, { "epoch": 0.3490531327778379, "grad_norm": 1.4296875, "learning_rate": 1.939560860944857e-05, "loss": 0.562, "step": 2016 }, { "epoch": 0.34922627421274755, "grad_norm": 1.53125, "learning_rate": 1.9394983997699732e-05, "loss": 0.6924, "step": 2017 }, { "epoch": 0.3493994156476572, "grad_norm": 1.5, "learning_rate": 1.9394359073431248e-05, "loss": 0.7555, "step": 2018 }, { "epoch": 0.34957255708256685, "grad_norm": 1.328125, "learning_rate": 1.9393733836663903e-05, "loss": 0.5869, "step": 2019 }, { "epoch": 0.34974569851747644, "grad_norm": 1.3984375, "learning_rate": 1.939310828741849e-05, "loss": 0.7295, "step": 2020 }, { "epoch": 0.3499188399523861, "grad_norm": 1.5, "learning_rate": 1.9392482425715827e-05, "loss": 0.6856, "step": 2021 }, { "epoch": 0.35009198138729575, "grad_norm": 1.421875, "learning_rate": 1.9391856251576727e-05, "loss": 0.5734, "step": 2022 }, { "epoch": 0.3502651228222054, "grad_norm": 1.5390625, "learning_rate": 1.9391229765022022e-05, "loss": 0.592, "step": 2023 }, { "epoch": 0.35043826425711505, "grad_norm": 1.484375, "learning_rate": 1.9390602966072548e-05, "loss": 0.6257, "step": 2024 }, { "epoch": 0.35061140569202465, "grad_norm": 1.46875, "learning_rate": 1.9389975854749164e-05, "loss": 0.5811, "step": 2025 }, { "epoch": 0.3507845471269343, "grad_norm": 1.609375, "learning_rate": 1.938934843107272e-05, "loss": 0.6216, "step": 2026 }, { "epoch": 0.35095768856184395, "grad_norm": 1.296875, "learning_rate": 1.938872069506409e-05, "loss": 0.552, "step": 2027 }, { "epoch": 0.3511308299967536, "grad_norm": 1.453125, "learning_rate": 1.9388092646744163e-05, "loss": 0.582, "step": 2028 }, { "epoch": 0.35130397143166325, "grad_norm": 1.4375, "learning_rate": 1.938746428613382e-05, "loss": 0.6044, "step": 2029 }, { "epoch": 0.3514771128665729, "grad_norm": 1.40625, "learning_rate": 1.9386835613253973e-05, "loss": 0.5583, "step": 2030 }, { "epoch": 0.3516502543014825, "grad_norm": 1.5546875, "learning_rate": 1.9386206628125523e-05, "loss": 0.745, "step": 2031 }, { "epoch": 0.35182339573639215, "grad_norm": 1.421875, "learning_rate": 1.93855773307694e-05, "loss": 0.6099, "step": 2032 }, { "epoch": 0.3519965371713018, "grad_norm": 1.46875, "learning_rate": 1.9384947721206538e-05, "loss": 0.6548, "step": 2033 }, { "epoch": 0.35216967860621146, "grad_norm": 1.4375, "learning_rate": 1.9384317799457878e-05, "loss": 0.617, "step": 2034 }, { "epoch": 0.3523428200411211, "grad_norm": 1.5390625, "learning_rate": 1.9383687565544376e-05, "loss": 0.6226, "step": 2035 }, { "epoch": 0.35251596147603076, "grad_norm": 1.4609375, "learning_rate": 1.9383057019486996e-05, "loss": 0.6294, "step": 2036 }, { "epoch": 0.35268910291094036, "grad_norm": 1.359375, "learning_rate": 1.9382426161306712e-05, "loss": 0.6331, "step": 2037 }, { "epoch": 0.35286224434585, "grad_norm": 1.40625, "learning_rate": 1.9381794991024508e-05, "loss": 0.6362, "step": 2038 }, { "epoch": 0.35303538578075966, "grad_norm": 1.6484375, "learning_rate": 1.938116350866138e-05, "loss": 0.6075, "step": 2039 }, { "epoch": 0.3532085272156693, "grad_norm": 1.3984375, "learning_rate": 1.938053171423834e-05, "loss": 0.6363, "step": 2040 }, { "epoch": 0.35338166865057896, "grad_norm": 1.5546875, "learning_rate": 1.9379899607776397e-05, "loss": 0.6304, "step": 2041 }, { "epoch": 0.35355481008548856, "grad_norm": 1.4609375, "learning_rate": 1.937926718929658e-05, "loss": 0.5644, "step": 2042 }, { "epoch": 0.3537279515203982, "grad_norm": 1.2890625, "learning_rate": 1.9378634458819923e-05, "loss": 0.6105, "step": 2043 }, { "epoch": 0.35390109295530786, "grad_norm": 1.3828125, "learning_rate": 1.937800141636748e-05, "loss": 0.6265, "step": 2044 }, { "epoch": 0.3540742343902175, "grad_norm": 1.5078125, "learning_rate": 1.9377368061960305e-05, "loss": 0.6355, "step": 2045 }, { "epoch": 0.35424737582512716, "grad_norm": 1.4140625, "learning_rate": 1.9376734395619467e-05, "loss": 0.634, "step": 2046 }, { "epoch": 0.3544205172600368, "grad_norm": 1.4765625, "learning_rate": 1.9376100417366044e-05, "loss": 0.5756, "step": 2047 }, { "epoch": 0.3545936586949464, "grad_norm": 1.5, "learning_rate": 1.9375466127221125e-05, "loss": 0.6284, "step": 2048 }, { "epoch": 0.35476680012985606, "grad_norm": 1.3515625, "learning_rate": 1.937483152520581e-05, "loss": 0.6216, "step": 2049 }, { "epoch": 0.3549399415647657, "grad_norm": 1.5703125, "learning_rate": 1.9374196611341212e-05, "loss": 0.6317, "step": 2050 }, { "epoch": 0.35511308299967537, "grad_norm": 1.3125, "learning_rate": 1.9373561385648443e-05, "loss": 0.5737, "step": 2051 }, { "epoch": 0.355286224434585, "grad_norm": 1.5, "learning_rate": 1.937292584814864e-05, "loss": 0.6583, "step": 2052 }, { "epoch": 0.35545936586949467, "grad_norm": 1.5078125, "learning_rate": 1.937228999886294e-05, "loss": 0.6334, "step": 2053 }, { "epoch": 0.35563250730440427, "grad_norm": 1.34375, "learning_rate": 1.93716538378125e-05, "loss": 0.568, "step": 2054 }, { "epoch": 0.3558056487393139, "grad_norm": 1.5859375, "learning_rate": 1.9371017365018475e-05, "loss": 0.6426, "step": 2055 }, { "epoch": 0.35597879017422357, "grad_norm": 1.4921875, "learning_rate": 1.937038058050204e-05, "loss": 0.5699, "step": 2056 }, { "epoch": 0.3561519316091332, "grad_norm": 1.4296875, "learning_rate": 1.9369743484284375e-05, "loss": 0.556, "step": 2057 }, { "epoch": 0.3563250730440429, "grad_norm": 1.5078125, "learning_rate": 1.936910607638668e-05, "loss": 0.6183, "step": 2058 }, { "epoch": 0.35649821447895247, "grad_norm": 1.3359375, "learning_rate": 1.9368468356830144e-05, "loss": 0.5382, "step": 2059 }, { "epoch": 0.3566713559138621, "grad_norm": 1.484375, "learning_rate": 1.9367830325635996e-05, "loss": 0.7007, "step": 2060 }, { "epoch": 0.35684449734877177, "grad_norm": 1.5078125, "learning_rate": 1.936719198282545e-05, "loss": 0.6485, "step": 2061 }, { "epoch": 0.3570176387836814, "grad_norm": 1.3359375, "learning_rate": 1.9366553328419746e-05, "loss": 0.573, "step": 2062 }, { "epoch": 0.3571907802185911, "grad_norm": 1.359375, "learning_rate": 1.9365914362440125e-05, "loss": 0.6441, "step": 2063 }, { "epoch": 0.3573639216535007, "grad_norm": 1.3984375, "learning_rate": 1.936527508490784e-05, "loss": 0.6797, "step": 2064 }, { "epoch": 0.3575370630884103, "grad_norm": 1.28125, "learning_rate": 1.9364635495844163e-05, "loss": 0.5505, "step": 2065 }, { "epoch": 0.35771020452332, "grad_norm": 1.3203125, "learning_rate": 1.9363995595270363e-05, "loss": 0.562, "step": 2066 }, { "epoch": 0.3578833459582296, "grad_norm": 1.3125, "learning_rate": 1.9363355383207726e-05, "loss": 0.5651, "step": 2067 }, { "epoch": 0.3580564873931393, "grad_norm": 1.328125, "learning_rate": 1.9362714859677555e-05, "loss": 0.6083, "step": 2068 }, { "epoch": 0.35822962882804893, "grad_norm": 1.4296875, "learning_rate": 1.9362074024701152e-05, "loss": 0.5941, "step": 2069 }, { "epoch": 0.3584027702629586, "grad_norm": 1.390625, "learning_rate": 1.9361432878299832e-05, "loss": 0.572, "step": 2070 }, { "epoch": 0.3585759116978682, "grad_norm": 1.328125, "learning_rate": 1.9360791420494926e-05, "loss": 0.5609, "step": 2071 }, { "epoch": 0.35874905313277783, "grad_norm": 1.3203125, "learning_rate": 1.9360149651307772e-05, "loss": 0.5666, "step": 2072 }, { "epoch": 0.3589221945676875, "grad_norm": 1.4375, "learning_rate": 1.935950757075972e-05, "loss": 0.5908, "step": 2073 }, { "epoch": 0.35909533600259713, "grad_norm": 1.359375, "learning_rate": 1.9358865178872118e-05, "loss": 0.523, "step": 2074 }, { "epoch": 0.3592684774375068, "grad_norm": 1.4375, "learning_rate": 1.9358222475666348e-05, "loss": 0.6379, "step": 2075 }, { "epoch": 0.3594416188724164, "grad_norm": 1.46875, "learning_rate": 1.9357579461163783e-05, "loss": 0.5949, "step": 2076 }, { "epoch": 0.35961476030732603, "grad_norm": 1.375, "learning_rate": 1.9356936135385812e-05, "loss": 0.6965, "step": 2077 }, { "epoch": 0.3597879017422357, "grad_norm": 1.328125, "learning_rate": 1.9356292498353838e-05, "loss": 0.5811, "step": 2078 }, { "epoch": 0.35996104317714533, "grad_norm": 1.2890625, "learning_rate": 1.9355648550089267e-05, "loss": 0.5389, "step": 2079 }, { "epoch": 0.360134184612055, "grad_norm": 1.4453125, "learning_rate": 1.9355004290613522e-05, "loss": 0.5762, "step": 2080 }, { "epoch": 0.36030732604696464, "grad_norm": 1.4921875, "learning_rate": 1.9354359719948034e-05, "loss": 0.5912, "step": 2081 }, { "epoch": 0.36048046748187423, "grad_norm": 1.3984375, "learning_rate": 1.9353714838114245e-05, "loss": 0.6403, "step": 2082 }, { "epoch": 0.3606536089167839, "grad_norm": 1.359375, "learning_rate": 1.9353069645133603e-05, "loss": 0.6005, "step": 2083 }, { "epoch": 0.36082675035169354, "grad_norm": 1.3046875, "learning_rate": 1.9352424141027577e-05, "loss": 0.6122, "step": 2084 }, { "epoch": 0.3609998917866032, "grad_norm": 1.4296875, "learning_rate": 1.9351778325817633e-05, "loss": 0.5561, "step": 2085 }, { "epoch": 0.36117303322151284, "grad_norm": 1.4453125, "learning_rate": 1.9351132199525254e-05, "loss": 0.6348, "step": 2086 }, { "epoch": 0.3613461746564225, "grad_norm": 1.515625, "learning_rate": 1.935048576217194e-05, "loss": 0.6861, "step": 2087 }, { "epoch": 0.3615193160913321, "grad_norm": 1.4609375, "learning_rate": 1.934983901377919e-05, "loss": 0.6098, "step": 2088 }, { "epoch": 0.36169245752624174, "grad_norm": 1.3984375, "learning_rate": 1.9349191954368515e-05, "loss": 0.6491, "step": 2089 }, { "epoch": 0.3618655989611514, "grad_norm": 1.375, "learning_rate": 1.934854458396144e-05, "loss": 0.5649, "step": 2090 }, { "epoch": 0.36203874039606104, "grad_norm": 1.453125, "learning_rate": 1.9347896902579505e-05, "loss": 0.5887, "step": 2091 }, { "epoch": 0.3622118818309707, "grad_norm": 1.359375, "learning_rate": 1.9347248910244246e-05, "loss": 0.5796, "step": 2092 }, { "epoch": 0.3623850232658803, "grad_norm": 1.40625, "learning_rate": 1.9346600606977226e-05, "loss": 0.7073, "step": 2093 }, { "epoch": 0.36255816470078994, "grad_norm": 1.5078125, "learning_rate": 1.9345951992800006e-05, "loss": 0.6313, "step": 2094 }, { "epoch": 0.3627313061356996, "grad_norm": 1.421875, "learning_rate": 1.9345303067734166e-05, "loss": 0.6011, "step": 2095 }, { "epoch": 0.36290444757060925, "grad_norm": 1.3984375, "learning_rate": 1.9344653831801285e-05, "loss": 0.556, "step": 2096 }, { "epoch": 0.3630775890055189, "grad_norm": 1.34375, "learning_rate": 1.9344004285022966e-05, "loss": 0.6394, "step": 2097 }, { "epoch": 0.36325073044042855, "grad_norm": 1.46875, "learning_rate": 1.9343354427420817e-05, "loss": 0.6172, "step": 2098 }, { "epoch": 0.36342387187533814, "grad_norm": 1.421875, "learning_rate": 1.934270425901645e-05, "loss": 0.6626, "step": 2099 }, { "epoch": 0.3635970133102478, "grad_norm": 1.46875, "learning_rate": 1.9342053779831495e-05, "loss": 0.6423, "step": 2100 }, { "epoch": 0.36377015474515745, "grad_norm": 1.34375, "learning_rate": 1.9341402989887584e-05, "loss": 0.5881, "step": 2101 }, { "epoch": 0.3639432961800671, "grad_norm": 1.28125, "learning_rate": 1.9340751889206378e-05, "loss": 0.6228, "step": 2102 }, { "epoch": 0.36411643761497675, "grad_norm": 1.4921875, "learning_rate": 1.9340100477809525e-05, "loss": 0.6024, "step": 2103 }, { "epoch": 0.3642895790498864, "grad_norm": 1.4296875, "learning_rate": 1.9339448755718697e-05, "loss": 0.5819, "step": 2104 }, { "epoch": 0.364462720484796, "grad_norm": 1.3984375, "learning_rate": 1.9338796722955574e-05, "loss": 0.5681, "step": 2105 }, { "epoch": 0.36463586191970565, "grad_norm": 1.328125, "learning_rate": 1.9338144379541844e-05, "loss": 0.6786, "step": 2106 }, { "epoch": 0.3648090033546153, "grad_norm": 1.4453125, "learning_rate": 1.9337491725499208e-05, "loss": 0.6275, "step": 2107 }, { "epoch": 0.36498214478952495, "grad_norm": 1.421875, "learning_rate": 1.933683876084938e-05, "loss": 0.6608, "step": 2108 }, { "epoch": 0.3651552862244346, "grad_norm": 1.484375, "learning_rate": 1.933618548561407e-05, "loss": 0.6063, "step": 2109 }, { "epoch": 0.3653284276593442, "grad_norm": 1.2890625, "learning_rate": 1.9335531899815022e-05, "loss": 0.6332, "step": 2110 }, { "epoch": 0.36550156909425385, "grad_norm": 1.484375, "learning_rate": 1.9334878003473963e-05, "loss": 0.6487, "step": 2111 }, { "epoch": 0.3656747105291635, "grad_norm": 1.421875, "learning_rate": 1.933422379661266e-05, "loss": 0.5643, "step": 2112 }, { "epoch": 0.36584785196407316, "grad_norm": 1.4296875, "learning_rate": 1.9333569279252864e-05, "loss": 0.6111, "step": 2113 }, { "epoch": 0.3660209933989828, "grad_norm": 1.5234375, "learning_rate": 1.933291445141635e-05, "loss": 0.6496, "step": 2114 }, { "epoch": 0.36619413483389246, "grad_norm": 1.3515625, "learning_rate": 1.93322593131249e-05, "loss": 0.5765, "step": 2115 }, { "epoch": 0.36636727626880206, "grad_norm": 1.421875, "learning_rate": 1.933160386440031e-05, "loss": 0.542, "step": 2116 }, { "epoch": 0.3665404177037117, "grad_norm": 1.359375, "learning_rate": 1.933094810526438e-05, "loss": 0.5996, "step": 2117 }, { "epoch": 0.36671355913862136, "grad_norm": 1.5078125, "learning_rate": 1.9330292035738924e-05, "loss": 0.5806, "step": 2118 }, { "epoch": 0.366886700573531, "grad_norm": 1.3359375, "learning_rate": 1.9329635655845767e-05, "loss": 0.6046, "step": 2119 }, { "epoch": 0.36705984200844066, "grad_norm": 1.328125, "learning_rate": 1.932897896560674e-05, "loss": 0.5875, "step": 2120 }, { "epoch": 0.3672329834433503, "grad_norm": 1.359375, "learning_rate": 1.932832196504369e-05, "loss": 0.5589, "step": 2121 }, { "epoch": 0.3674061248782599, "grad_norm": 1.53125, "learning_rate": 1.9327664654178476e-05, "loss": 0.7492, "step": 2122 }, { "epoch": 0.36757926631316956, "grad_norm": 1.3828125, "learning_rate": 1.9327007033032952e-05, "loss": 0.561, "step": 2123 }, { "epoch": 0.3677524077480792, "grad_norm": 1.390625, "learning_rate": 1.932634910162901e-05, "loss": 0.6756, "step": 2124 }, { "epoch": 0.36792554918298886, "grad_norm": 1.40625, "learning_rate": 1.9325690859988516e-05, "loss": 0.6169, "step": 2125 }, { "epoch": 0.3680986906178985, "grad_norm": 1.5078125, "learning_rate": 1.9325032308133384e-05, "loss": 0.7074, "step": 2126 }, { "epoch": 0.3682718320528081, "grad_norm": 1.5390625, "learning_rate": 1.9324373446085508e-05, "loss": 0.5986, "step": 2127 }, { "epoch": 0.36844497348771776, "grad_norm": 1.296875, "learning_rate": 1.932371427386681e-05, "loss": 0.5648, "step": 2128 }, { "epoch": 0.3686181149226274, "grad_norm": 1.390625, "learning_rate": 1.932305479149922e-05, "loss": 0.5943, "step": 2129 }, { "epoch": 0.36879125635753707, "grad_norm": 1.546875, "learning_rate": 1.9322394999004668e-05, "loss": 0.6591, "step": 2130 }, { "epoch": 0.3689643977924467, "grad_norm": 1.4140625, "learning_rate": 1.9321734896405107e-05, "loss": 0.6143, "step": 2131 }, { "epoch": 0.36913753922735637, "grad_norm": 1.578125, "learning_rate": 1.932107448372249e-05, "loss": 0.6575, "step": 2132 }, { "epoch": 0.36931068066226597, "grad_norm": 1.46875, "learning_rate": 1.9320413760978792e-05, "loss": 0.6318, "step": 2133 }, { "epoch": 0.3694838220971756, "grad_norm": 1.46875, "learning_rate": 1.931975272819599e-05, "loss": 0.5785, "step": 2134 }, { "epoch": 0.36965696353208527, "grad_norm": 1.5234375, "learning_rate": 1.9319091385396066e-05, "loss": 0.647, "step": 2135 }, { "epoch": 0.3698301049669949, "grad_norm": 1.3203125, "learning_rate": 1.931842973260103e-05, "loss": 0.7347, "step": 2136 }, { "epoch": 0.3700032464019046, "grad_norm": 1.5078125, "learning_rate": 1.9317767769832883e-05, "loss": 0.5722, "step": 2137 }, { "epoch": 0.3701763878368142, "grad_norm": 1.453125, "learning_rate": 1.9317105497113647e-05, "loss": 0.5972, "step": 2138 }, { "epoch": 0.3703495292717238, "grad_norm": 1.4921875, "learning_rate": 1.9316442914465357e-05, "loss": 0.6236, "step": 2139 }, { "epoch": 0.3705226707066335, "grad_norm": 1.34375, "learning_rate": 1.9315780021910046e-05, "loss": 0.6441, "step": 2140 }, { "epoch": 0.3706958121415431, "grad_norm": 1.4140625, "learning_rate": 1.931511681946977e-05, "loss": 0.6867, "step": 2141 }, { "epoch": 0.3708689535764528, "grad_norm": 1.4296875, "learning_rate": 1.931445330716659e-05, "loss": 0.6113, "step": 2142 }, { "epoch": 0.3710420950113624, "grad_norm": 1.390625, "learning_rate": 1.9313789485022573e-05, "loss": 0.6111, "step": 2143 }, { "epoch": 0.371215236446272, "grad_norm": 1.40625, "learning_rate": 1.9313125353059807e-05, "loss": 0.623, "step": 2144 }, { "epoch": 0.3713883778811817, "grad_norm": 1.375, "learning_rate": 1.931246091130038e-05, "loss": 0.6422, "step": 2145 }, { "epoch": 0.3715615193160913, "grad_norm": 1.4140625, "learning_rate": 1.9311796159766395e-05, "loss": 0.6659, "step": 2146 }, { "epoch": 0.371734660751001, "grad_norm": 1.3046875, "learning_rate": 1.931113109847996e-05, "loss": 0.6284, "step": 2147 }, { "epoch": 0.37190780218591063, "grad_norm": 1.4296875, "learning_rate": 1.9310465727463207e-05, "loss": 0.5862, "step": 2148 }, { "epoch": 0.3720809436208203, "grad_norm": 1.3203125, "learning_rate": 1.9309800046738264e-05, "loss": 0.5947, "step": 2149 }, { "epoch": 0.3722540850557299, "grad_norm": 1.4609375, "learning_rate": 1.9309134056327275e-05, "loss": 0.6532, "step": 2150 }, { "epoch": 0.37242722649063953, "grad_norm": 1.484375, "learning_rate": 1.9308467756252397e-05, "loss": 0.6311, "step": 2151 }, { "epoch": 0.3726003679255492, "grad_norm": 1.4375, "learning_rate": 1.9307801146535793e-05, "loss": 0.5885, "step": 2152 }, { "epoch": 0.37277350936045883, "grad_norm": 1.5234375, "learning_rate": 1.930713422719963e-05, "loss": 0.5966, "step": 2153 }, { "epoch": 0.3729466507953685, "grad_norm": 1.5, "learning_rate": 1.9306466998266102e-05, "loss": 0.595, "step": 2154 }, { "epoch": 0.37311979223027814, "grad_norm": 1.4765625, "learning_rate": 1.93057994597574e-05, "loss": 0.6106, "step": 2155 }, { "epoch": 0.37329293366518773, "grad_norm": 1.359375, "learning_rate": 1.930513161169573e-05, "loss": 0.4997, "step": 2156 }, { "epoch": 0.3734660751000974, "grad_norm": 1.375, "learning_rate": 1.9304463454103304e-05, "loss": 0.5804, "step": 2157 }, { "epoch": 0.37363921653500703, "grad_norm": 1.3828125, "learning_rate": 1.9303794987002357e-05, "loss": 0.6001, "step": 2158 }, { "epoch": 0.3738123579699167, "grad_norm": 1.359375, "learning_rate": 1.930312621041512e-05, "loss": 0.6, "step": 2159 }, { "epoch": 0.37398549940482634, "grad_norm": 1.265625, "learning_rate": 1.930245712436384e-05, "loss": 0.5658, "step": 2160 }, { "epoch": 0.37415864083973593, "grad_norm": 1.4140625, "learning_rate": 1.930178772887077e-05, "loss": 0.6382, "step": 2161 }, { "epoch": 0.3743317822746456, "grad_norm": 1.375, "learning_rate": 1.930111802395818e-05, "loss": 0.5875, "step": 2162 }, { "epoch": 0.37450492370955524, "grad_norm": 1.453125, "learning_rate": 1.9300448009648353e-05, "loss": 0.715, "step": 2163 }, { "epoch": 0.3746780651444649, "grad_norm": 1.4296875, "learning_rate": 1.9299777685963566e-05, "loss": 0.5878, "step": 2164 }, { "epoch": 0.37485120657937454, "grad_norm": 1.359375, "learning_rate": 1.9299107052926124e-05, "loss": 0.6147, "step": 2165 }, { "epoch": 0.3750243480142842, "grad_norm": 1.4375, "learning_rate": 1.9298436110558335e-05, "loss": 0.6319, "step": 2166 }, { "epoch": 0.3751974894491938, "grad_norm": 1.4375, "learning_rate": 1.9297764858882516e-05, "loss": 0.6302, "step": 2167 }, { "epoch": 0.37537063088410344, "grad_norm": 1.34375, "learning_rate": 1.9297093297920996e-05, "loss": 0.6069, "step": 2168 }, { "epoch": 0.3755437723190131, "grad_norm": 1.4375, "learning_rate": 1.929642142769611e-05, "loss": 0.5491, "step": 2169 }, { "epoch": 0.37571691375392274, "grad_norm": 1.4453125, "learning_rate": 1.929574924823022e-05, "loss": 0.6102, "step": 2170 }, { "epoch": 0.3758900551888324, "grad_norm": 1.4765625, "learning_rate": 1.9295076759545674e-05, "loss": 0.614, "step": 2171 }, { "epoch": 0.37606319662374205, "grad_norm": 1.34375, "learning_rate": 1.9294403961664844e-05, "loss": 0.6573, "step": 2172 }, { "epoch": 0.37623633805865164, "grad_norm": 1.390625, "learning_rate": 1.9293730854610114e-05, "loss": 0.5594, "step": 2173 }, { "epoch": 0.3764094794935613, "grad_norm": 1.296875, "learning_rate": 1.9293057438403873e-05, "loss": 0.5801, "step": 2174 }, { "epoch": 0.37658262092847095, "grad_norm": 1.40625, "learning_rate": 1.929238371306852e-05, "loss": 0.6082, "step": 2175 }, { "epoch": 0.3767557623633806, "grad_norm": 1.421875, "learning_rate": 1.9291709678626468e-05, "loss": 0.6621, "step": 2176 }, { "epoch": 0.37692890379829025, "grad_norm": 1.390625, "learning_rate": 1.9291035335100137e-05, "loss": 0.6022, "step": 2177 }, { "epoch": 0.37710204523319985, "grad_norm": 1.359375, "learning_rate": 1.929036068251196e-05, "loss": 0.6524, "step": 2178 }, { "epoch": 0.3772751866681095, "grad_norm": 1.296875, "learning_rate": 1.928968572088438e-05, "loss": 0.6136, "step": 2179 }, { "epoch": 0.37744832810301915, "grad_norm": 1.3203125, "learning_rate": 1.9289010450239843e-05, "loss": 0.4996, "step": 2180 }, { "epoch": 0.3776214695379288, "grad_norm": 1.6015625, "learning_rate": 1.9288334870600822e-05, "loss": 0.6276, "step": 2181 }, { "epoch": 0.37779461097283845, "grad_norm": 1.7421875, "learning_rate": 1.9287658981989782e-05, "loss": 0.5943, "step": 2182 }, { "epoch": 0.3779677524077481, "grad_norm": 1.4765625, "learning_rate": 1.928698278442921e-05, "loss": 0.6202, "step": 2183 }, { "epoch": 0.3781408938426577, "grad_norm": 1.3984375, "learning_rate": 1.9286306277941595e-05, "loss": 0.6731, "step": 2184 }, { "epoch": 0.37831403527756735, "grad_norm": 1.3359375, "learning_rate": 1.9285629462549446e-05, "loss": 0.5822, "step": 2185 }, { "epoch": 0.378487176712477, "grad_norm": 1.34375, "learning_rate": 1.9284952338275274e-05, "loss": 0.591, "step": 2186 }, { "epoch": 0.37866031814738665, "grad_norm": 1.28125, "learning_rate": 1.9284274905141603e-05, "loss": 0.646, "step": 2187 }, { "epoch": 0.3788334595822963, "grad_norm": 1.484375, "learning_rate": 1.9283597163170968e-05, "loss": 0.6211, "step": 2188 }, { "epoch": 0.37900660101720596, "grad_norm": 1.34375, "learning_rate": 1.9282919112385914e-05, "loss": 0.5988, "step": 2189 }, { "epoch": 0.37917974245211555, "grad_norm": 1.28125, "learning_rate": 1.9282240752808995e-05, "loss": 0.6061, "step": 2190 }, { "epoch": 0.3793528838870252, "grad_norm": 1.2890625, "learning_rate": 1.9281562084462776e-05, "loss": 0.5807, "step": 2191 }, { "epoch": 0.37952602532193486, "grad_norm": 1.5, "learning_rate": 1.928088310736984e-05, "loss": 0.6305, "step": 2192 }, { "epoch": 0.3796991667568445, "grad_norm": 1.5390625, "learning_rate": 1.928020382155276e-05, "loss": 0.6354, "step": 2193 }, { "epoch": 0.37987230819175416, "grad_norm": 1.4375, "learning_rate": 1.927952422703414e-05, "loss": 0.6084, "step": 2194 }, { "epoch": 0.38004544962666376, "grad_norm": 1.359375, "learning_rate": 1.927884432383659e-05, "loss": 0.5739, "step": 2195 }, { "epoch": 0.3802185910615734, "grad_norm": 1.375, "learning_rate": 1.9278164111982715e-05, "loss": 0.5919, "step": 2196 }, { "epoch": 0.38039173249648306, "grad_norm": 1.4609375, "learning_rate": 1.9277483591495155e-05, "loss": 0.6944, "step": 2197 }, { "epoch": 0.3805648739313927, "grad_norm": 1.4140625, "learning_rate": 1.9276802762396535e-05, "loss": 0.6406, "step": 2198 }, { "epoch": 0.38073801536630236, "grad_norm": 1.5234375, "learning_rate": 1.9276121624709513e-05, "loss": 0.589, "step": 2199 }, { "epoch": 0.380911156801212, "grad_norm": 1.390625, "learning_rate": 1.927544017845674e-05, "loss": 0.5184, "step": 2200 }, { "epoch": 0.3810842982361216, "grad_norm": 1.640625, "learning_rate": 1.9274758423660888e-05, "loss": 0.6019, "step": 2201 }, { "epoch": 0.38125743967103126, "grad_norm": 1.515625, "learning_rate": 1.927407636034463e-05, "loss": 0.6125, "step": 2202 }, { "epoch": 0.3814305811059409, "grad_norm": 1.390625, "learning_rate": 1.9273393988530663e-05, "loss": 0.589, "step": 2203 }, { "epoch": 0.38160372254085057, "grad_norm": 1.453125, "learning_rate": 1.9272711308241676e-05, "loss": 0.6174, "step": 2204 }, { "epoch": 0.3817768639757602, "grad_norm": 1.3359375, "learning_rate": 1.927202831950038e-05, "loss": 0.6025, "step": 2205 }, { "epoch": 0.38195000541066987, "grad_norm": 1.4140625, "learning_rate": 1.9271345022329502e-05, "loss": 0.6592, "step": 2206 }, { "epoch": 0.38212314684557946, "grad_norm": 1.3359375, "learning_rate": 1.9270661416751765e-05, "loss": 0.6311, "step": 2207 }, { "epoch": 0.3822962882804891, "grad_norm": 1.3203125, "learning_rate": 1.926997750278991e-05, "loss": 0.6369, "step": 2208 }, { "epoch": 0.38246942971539877, "grad_norm": 1.375, "learning_rate": 1.926929328046669e-05, "loss": 0.6091, "step": 2209 }, { "epoch": 0.3826425711503084, "grad_norm": 1.3515625, "learning_rate": 1.926860874980486e-05, "loss": 0.5485, "step": 2210 }, { "epoch": 0.38281571258521807, "grad_norm": 1.359375, "learning_rate": 1.926792391082719e-05, "loss": 0.6024, "step": 2211 }, { "epoch": 0.38298885402012767, "grad_norm": 1.3359375, "learning_rate": 1.9267238763556473e-05, "loss": 0.5827, "step": 2212 }, { "epoch": 0.3831619954550373, "grad_norm": 1.421875, "learning_rate": 1.9266553308015482e-05, "loss": 0.6315, "step": 2213 }, { "epoch": 0.38333513688994697, "grad_norm": 1.40625, "learning_rate": 1.9265867544227035e-05, "loss": 0.5744, "step": 2214 }, { "epoch": 0.3835082783248566, "grad_norm": 1.421875, "learning_rate": 1.926518147221393e-05, "loss": 0.6579, "step": 2215 }, { "epoch": 0.3836814197597663, "grad_norm": 1.515625, "learning_rate": 1.9264495091999e-05, "loss": 0.5824, "step": 2216 }, { "epoch": 0.3838545611946759, "grad_norm": 1.390625, "learning_rate": 1.9263808403605072e-05, "loss": 0.6276, "step": 2217 }, { "epoch": 0.3840277026295855, "grad_norm": 1.4296875, "learning_rate": 1.9263121407054984e-05, "loss": 0.5864, "step": 2218 }, { "epoch": 0.3842008440644952, "grad_norm": 1.4453125, "learning_rate": 1.9262434102371596e-05, "loss": 0.5623, "step": 2219 }, { "epoch": 0.3843739854994048, "grad_norm": 1.390625, "learning_rate": 1.9261746489577767e-05, "loss": 0.5789, "step": 2220 }, { "epoch": 0.3845471269343145, "grad_norm": 1.3125, "learning_rate": 1.9261058568696372e-05, "loss": 0.5312, "step": 2221 }, { "epoch": 0.3847202683692241, "grad_norm": 1.453125, "learning_rate": 1.9260370339750293e-05, "loss": 0.597, "step": 2222 }, { "epoch": 0.3848934098041338, "grad_norm": 1.453125, "learning_rate": 1.925968180276242e-05, "loss": 0.6372, "step": 2223 }, { "epoch": 0.3850665512390434, "grad_norm": 1.390625, "learning_rate": 1.9258992957755666e-05, "loss": 0.7004, "step": 2224 }, { "epoch": 0.385239692673953, "grad_norm": 1.3828125, "learning_rate": 1.9258303804752942e-05, "loss": 0.5836, "step": 2225 }, { "epoch": 0.3854128341088627, "grad_norm": 1.40625, "learning_rate": 1.9257614343777166e-05, "loss": 0.5933, "step": 2226 }, { "epoch": 0.38558597554377233, "grad_norm": 1.28125, "learning_rate": 1.925692457485128e-05, "loss": 0.5259, "step": 2227 }, { "epoch": 0.385759116978682, "grad_norm": 1.3984375, "learning_rate": 1.9256234497998222e-05, "loss": 0.6433, "step": 2228 }, { "epoch": 0.3859322584135916, "grad_norm": 1.390625, "learning_rate": 1.9255544113240955e-05, "loss": 0.602, "step": 2229 }, { "epoch": 0.38610539984850123, "grad_norm": 1.4296875, "learning_rate": 1.9254853420602435e-05, "loss": 0.5886, "step": 2230 }, { "epoch": 0.3862785412834109, "grad_norm": 1.3203125, "learning_rate": 1.9254162420105647e-05, "loss": 0.6079, "step": 2231 }, { "epoch": 0.38645168271832053, "grad_norm": 1.609375, "learning_rate": 1.9253471111773572e-05, "loss": 0.6506, "step": 2232 }, { "epoch": 0.3866248241532302, "grad_norm": 1.359375, "learning_rate": 1.9252779495629202e-05, "loss": 0.6006, "step": 2233 }, { "epoch": 0.38679796558813984, "grad_norm": 1.375, "learning_rate": 1.9252087571695553e-05, "loss": 0.5818, "step": 2234 }, { "epoch": 0.38697110702304943, "grad_norm": 1.546875, "learning_rate": 1.9251395339995637e-05, "loss": 0.5636, "step": 2235 }, { "epoch": 0.3871442484579591, "grad_norm": 1.3984375, "learning_rate": 1.9250702800552476e-05, "loss": 0.5733, "step": 2236 }, { "epoch": 0.38731738989286874, "grad_norm": 1.40625, "learning_rate": 1.9250009953389113e-05, "loss": 0.5843, "step": 2237 }, { "epoch": 0.3874905313277784, "grad_norm": 1.375, "learning_rate": 1.9249316798528594e-05, "loss": 0.6164, "step": 2238 }, { "epoch": 0.38766367276268804, "grad_norm": 1.3125, "learning_rate": 1.9248623335993976e-05, "loss": 0.6284, "step": 2239 }, { "epoch": 0.3878368141975977, "grad_norm": 1.3515625, "learning_rate": 1.9247929565808324e-05, "loss": 0.5651, "step": 2240 }, { "epoch": 0.3880099556325073, "grad_norm": 1.28125, "learning_rate": 1.9247235487994722e-05, "loss": 0.6017, "step": 2241 }, { "epoch": 0.38818309706741694, "grad_norm": 1.328125, "learning_rate": 1.9246541102576254e-05, "loss": 0.5932, "step": 2242 }, { "epoch": 0.3883562385023266, "grad_norm": 1.4375, "learning_rate": 1.9245846409576016e-05, "loss": 0.6557, "step": 2243 }, { "epoch": 0.38852937993723624, "grad_norm": 1.4609375, "learning_rate": 1.9245151409017122e-05, "loss": 0.644, "step": 2244 }, { "epoch": 0.3887025213721459, "grad_norm": 1.3203125, "learning_rate": 1.924445610092269e-05, "loss": 0.6174, "step": 2245 }, { "epoch": 0.3888756628070555, "grad_norm": 1.390625, "learning_rate": 1.9243760485315842e-05, "loss": 0.6805, "step": 2246 }, { "epoch": 0.38904880424196514, "grad_norm": 1.6953125, "learning_rate": 1.924306456221973e-05, "loss": 0.6279, "step": 2247 }, { "epoch": 0.3892219456768748, "grad_norm": 1.4375, "learning_rate": 1.9242368331657492e-05, "loss": 0.6539, "step": 2248 }, { "epoch": 0.38939508711178444, "grad_norm": 1.359375, "learning_rate": 1.9241671793652294e-05, "loss": 0.6254, "step": 2249 }, { "epoch": 0.3895682285466941, "grad_norm": 1.4140625, "learning_rate": 1.92409749482273e-05, "loss": 0.6176, "step": 2250 }, { "epoch": 0.38974136998160375, "grad_norm": 1.3515625, "learning_rate": 1.9240277795405696e-05, "loss": 0.6018, "step": 2251 }, { "epoch": 0.38991451141651334, "grad_norm": 1.4296875, "learning_rate": 1.9239580335210673e-05, "loss": 0.6014, "step": 2252 }, { "epoch": 0.390087652851423, "grad_norm": 1.390625, "learning_rate": 1.9238882567665432e-05, "loss": 0.61, "step": 2253 }, { "epoch": 0.39026079428633265, "grad_norm": 1.4375, "learning_rate": 1.923818449279318e-05, "loss": 0.6312, "step": 2254 }, { "epoch": 0.3904339357212423, "grad_norm": 1.3984375, "learning_rate": 1.9237486110617138e-05, "loss": 0.5806, "step": 2255 }, { "epoch": 0.39060707715615195, "grad_norm": 1.3125, "learning_rate": 1.923678742116054e-05, "loss": 0.5829, "step": 2256 }, { "epoch": 0.3907802185910616, "grad_norm": 1.34375, "learning_rate": 1.9236088424446625e-05, "loss": 0.6299, "step": 2257 }, { "epoch": 0.3909533600259712, "grad_norm": 1.3125, "learning_rate": 1.9235389120498645e-05, "loss": 0.648, "step": 2258 }, { "epoch": 0.39112650146088085, "grad_norm": 1.3203125, "learning_rate": 1.9234689509339865e-05, "loss": 0.7085, "step": 2259 }, { "epoch": 0.3912996428957905, "grad_norm": 1.390625, "learning_rate": 1.923398959099356e-05, "loss": 0.6095, "step": 2260 }, { "epoch": 0.39147278433070015, "grad_norm": 1.3671875, "learning_rate": 1.9233289365483e-05, "loss": 0.6568, "step": 2261 }, { "epoch": 0.3916459257656098, "grad_norm": 1.4765625, "learning_rate": 1.923258883283149e-05, "loss": 0.6095, "step": 2262 }, { "epoch": 0.3918190672005194, "grad_norm": 1.359375, "learning_rate": 1.923188799306233e-05, "loss": 0.6215, "step": 2263 }, { "epoch": 0.39199220863542905, "grad_norm": 1.3828125, "learning_rate": 1.923118684619883e-05, "loss": 0.6036, "step": 2264 }, { "epoch": 0.3921653500703387, "grad_norm": 1.3828125, "learning_rate": 1.9230485392264316e-05, "loss": 0.5943, "step": 2265 }, { "epoch": 0.39233849150524835, "grad_norm": 1.4765625, "learning_rate": 1.922978363128212e-05, "loss": 0.6117, "step": 2266 }, { "epoch": 0.392511632940158, "grad_norm": 1.3984375, "learning_rate": 1.9229081563275585e-05, "loss": 0.6047, "step": 2267 }, { "epoch": 0.39268477437506766, "grad_norm": 1.375, "learning_rate": 1.9228379188268068e-05, "loss": 0.6107, "step": 2268 }, { "epoch": 0.39285791580997725, "grad_norm": 1.3515625, "learning_rate": 1.922767650628293e-05, "loss": 0.5933, "step": 2269 }, { "epoch": 0.3930310572448869, "grad_norm": 1.359375, "learning_rate": 1.9226973517343547e-05, "loss": 0.5684, "step": 2270 }, { "epoch": 0.39320419867979656, "grad_norm": 1.390625, "learning_rate": 1.9226270221473302e-05, "loss": 0.6173, "step": 2271 }, { "epoch": 0.3933773401147062, "grad_norm": 1.3828125, "learning_rate": 1.9225566618695594e-05, "loss": 0.5685, "step": 2272 }, { "epoch": 0.39355048154961586, "grad_norm": 1.4140625, "learning_rate": 1.9224862709033823e-05, "loss": 0.6347, "step": 2273 }, { "epoch": 0.3937236229845255, "grad_norm": 1.375, "learning_rate": 1.922415849251141e-05, "loss": 0.6247, "step": 2274 }, { "epoch": 0.3938967644194351, "grad_norm": 1.3515625, "learning_rate": 1.9223453969151772e-05, "loss": 0.6607, "step": 2275 }, { "epoch": 0.39406990585434476, "grad_norm": 1.171875, "learning_rate": 1.9222749138978353e-05, "loss": 0.5713, "step": 2276 }, { "epoch": 0.3942430472892544, "grad_norm": 1.3359375, "learning_rate": 1.9222044002014594e-05, "loss": 0.6562, "step": 2277 }, { "epoch": 0.39441618872416406, "grad_norm": 1.2578125, "learning_rate": 1.922133855828395e-05, "loss": 0.5679, "step": 2278 }, { "epoch": 0.3945893301590737, "grad_norm": 1.4296875, "learning_rate": 1.9220632807809895e-05, "loss": 0.5918, "step": 2279 }, { "epoch": 0.3947624715939833, "grad_norm": 1.2890625, "learning_rate": 1.9219926750615895e-05, "loss": 0.5966, "step": 2280 }, { "epoch": 0.39493561302889296, "grad_norm": 1.34375, "learning_rate": 1.9219220386725444e-05, "loss": 0.6143, "step": 2281 }, { "epoch": 0.3951087544638026, "grad_norm": 1.328125, "learning_rate": 1.9218513716162035e-05, "loss": 0.6614, "step": 2282 }, { "epoch": 0.39528189589871227, "grad_norm": 1.453125, "learning_rate": 1.921780673894918e-05, "loss": 0.5867, "step": 2283 }, { "epoch": 0.3954550373336219, "grad_norm": 1.5390625, "learning_rate": 1.921709945511039e-05, "loss": 0.6017, "step": 2284 }, { "epoch": 0.39562817876853157, "grad_norm": 1.421875, "learning_rate": 1.9216391864669196e-05, "loss": 0.6475, "step": 2285 }, { "epoch": 0.39580132020344116, "grad_norm": 1.4296875, "learning_rate": 1.9215683967649134e-05, "loss": 0.5724, "step": 2286 }, { "epoch": 0.3959744616383508, "grad_norm": 1.4375, "learning_rate": 1.9214975764073754e-05, "loss": 0.6035, "step": 2287 }, { "epoch": 0.39614760307326047, "grad_norm": 1.390625, "learning_rate": 1.9214267253966613e-05, "loss": 0.549, "step": 2288 }, { "epoch": 0.3963207445081701, "grad_norm": 1.4453125, "learning_rate": 1.921355843735128e-05, "loss": 0.6386, "step": 2289 }, { "epoch": 0.39649388594307977, "grad_norm": 1.2578125, "learning_rate": 1.921284931425133e-05, "loss": 0.6061, "step": 2290 }, { "epoch": 0.39666702737798937, "grad_norm": 1.3828125, "learning_rate": 1.9212139884690354e-05, "loss": 0.6024, "step": 2291 }, { "epoch": 0.396840168812899, "grad_norm": 1.2109375, "learning_rate": 1.9211430148691954e-05, "loss": 0.5317, "step": 2292 }, { "epoch": 0.39701331024780867, "grad_norm": 1.328125, "learning_rate": 1.9210720106279735e-05, "loss": 0.6051, "step": 2293 }, { "epoch": 0.3971864516827183, "grad_norm": 1.2890625, "learning_rate": 1.9210009757477318e-05, "loss": 0.5247, "step": 2294 }, { "epoch": 0.397359593117628, "grad_norm": 1.2890625, "learning_rate": 1.920929910230833e-05, "loss": 0.5532, "step": 2295 }, { "epoch": 0.3975327345525376, "grad_norm": 1.328125, "learning_rate": 1.920858814079641e-05, "loss": 0.5707, "step": 2296 }, { "epoch": 0.3977058759874472, "grad_norm": 1.3125, "learning_rate": 1.9207876872965217e-05, "loss": 0.6732, "step": 2297 }, { "epoch": 0.3978790174223569, "grad_norm": 1.40625, "learning_rate": 1.92071652988384e-05, "loss": 0.6491, "step": 2298 }, { "epoch": 0.3980521588572665, "grad_norm": 1.3671875, "learning_rate": 1.9206453418439634e-05, "loss": 0.5915, "step": 2299 }, { "epoch": 0.3982253002921762, "grad_norm": 1.40625, "learning_rate": 1.9205741231792596e-05, "loss": 0.629, "step": 2300 }, { "epoch": 0.39839844172708583, "grad_norm": 1.3828125, "learning_rate": 1.9205028738920982e-05, "loss": 0.6595, "step": 2301 }, { "epoch": 0.3985715831619955, "grad_norm": 1.3828125, "learning_rate": 1.920431593984849e-05, "loss": 0.6609, "step": 2302 }, { "epoch": 0.3987447245969051, "grad_norm": 1.2890625, "learning_rate": 1.9203602834598828e-05, "loss": 0.5934, "step": 2303 }, { "epoch": 0.3989178660318147, "grad_norm": 1.390625, "learning_rate": 1.9202889423195725e-05, "loss": 0.6305, "step": 2304 }, { "epoch": 0.3990910074667244, "grad_norm": 1.375, "learning_rate": 1.9202175705662903e-05, "loss": 0.6394, "step": 2305 }, { "epoch": 0.39926414890163403, "grad_norm": 1.3203125, "learning_rate": 1.9201461682024108e-05, "loss": 0.6028, "step": 2306 }, { "epoch": 0.3994372903365437, "grad_norm": 1.3515625, "learning_rate": 1.9200747352303093e-05, "loss": 0.615, "step": 2307 }, { "epoch": 0.3996104317714533, "grad_norm": 1.4375, "learning_rate": 1.920003271652362e-05, "loss": 0.6649, "step": 2308 }, { "epoch": 0.39978357320636293, "grad_norm": 1.53125, "learning_rate": 1.9199317774709457e-05, "loss": 0.6391, "step": 2309 }, { "epoch": 0.3999567146412726, "grad_norm": 1.421875, "learning_rate": 1.9198602526884388e-05, "loss": 0.5907, "step": 2310 }, { "epoch": 0.40012985607618223, "grad_norm": 1.3046875, "learning_rate": 1.9197886973072205e-05, "loss": 0.6079, "step": 2311 }, { "epoch": 0.4003029975110919, "grad_norm": 1.3828125, "learning_rate": 1.9197171113296712e-05, "loss": 0.602, "step": 2312 }, { "epoch": 0.40047613894600154, "grad_norm": 1.28125, "learning_rate": 1.919645494758172e-05, "loss": 0.5701, "step": 2313 }, { "epoch": 0.40064928038091113, "grad_norm": 1.375, "learning_rate": 1.9195738475951056e-05, "loss": 0.6719, "step": 2314 }, { "epoch": 0.4008224218158208, "grad_norm": 1.3984375, "learning_rate": 1.9195021698428548e-05, "loss": 0.5976, "step": 2315 }, { "epoch": 0.40099556325073044, "grad_norm": 1.3125, "learning_rate": 1.919430461503804e-05, "loss": 0.6462, "step": 2316 }, { "epoch": 0.4011687046856401, "grad_norm": 1.34375, "learning_rate": 1.9193587225803388e-05, "loss": 0.5984, "step": 2317 }, { "epoch": 0.40134184612054974, "grad_norm": 1.359375, "learning_rate": 1.919286953074846e-05, "loss": 0.6081, "step": 2318 }, { "epoch": 0.4015149875554594, "grad_norm": 1.34375, "learning_rate": 1.9192151529897115e-05, "loss": 0.6061, "step": 2319 }, { "epoch": 0.401688128990369, "grad_norm": 1.4453125, "learning_rate": 1.919143322327325e-05, "loss": 0.6985, "step": 2320 }, { "epoch": 0.40186127042527864, "grad_norm": 1.4375, "learning_rate": 1.919071461090076e-05, "loss": 0.6082, "step": 2321 }, { "epoch": 0.4020344118601883, "grad_norm": 1.2578125, "learning_rate": 1.9189995692803535e-05, "loss": 0.5736, "step": 2322 }, { "epoch": 0.40220755329509794, "grad_norm": 1.40625, "learning_rate": 1.9189276469005508e-05, "loss": 0.6206, "step": 2323 }, { "epoch": 0.4023806947300076, "grad_norm": 1.4296875, "learning_rate": 1.918855693953059e-05, "loss": 0.5858, "step": 2324 }, { "epoch": 0.4025538361649172, "grad_norm": 1.5546875, "learning_rate": 1.918783710440272e-05, "loss": 0.6663, "step": 2325 }, { "epoch": 0.40272697759982684, "grad_norm": 1.3828125, "learning_rate": 1.9187116963645845e-05, "loss": 0.5888, "step": 2326 }, { "epoch": 0.4029001190347365, "grad_norm": 1.3671875, "learning_rate": 1.9186396517283918e-05, "loss": 0.6174, "step": 2327 }, { "epoch": 0.40307326046964614, "grad_norm": 1.3671875, "learning_rate": 1.9185675765340902e-05, "loss": 0.6155, "step": 2328 }, { "epoch": 0.4032464019045558, "grad_norm": 1.2890625, "learning_rate": 1.9184954707840783e-05, "loss": 0.6086, "step": 2329 }, { "epoch": 0.40341954333946545, "grad_norm": 1.390625, "learning_rate": 1.918423334480753e-05, "loss": 0.6601, "step": 2330 }, { "epoch": 0.40359268477437504, "grad_norm": 1.4453125, "learning_rate": 1.9183511676265152e-05, "loss": 0.625, "step": 2331 }, { "epoch": 0.4037658262092847, "grad_norm": 1.3203125, "learning_rate": 1.9182789702237654e-05, "loss": 0.5964, "step": 2332 }, { "epoch": 0.40393896764419435, "grad_norm": 1.3984375, "learning_rate": 1.9182067422749045e-05, "loss": 0.602, "step": 2333 }, { "epoch": 0.404112109079104, "grad_norm": 1.5078125, "learning_rate": 1.9181344837823354e-05, "loss": 0.6115, "step": 2334 }, { "epoch": 0.40428525051401365, "grad_norm": 1.3046875, "learning_rate": 1.9180621947484622e-05, "loss": 0.6506, "step": 2335 }, { "epoch": 0.4044583919489233, "grad_norm": 1.3984375, "learning_rate": 1.917989875175689e-05, "loss": 0.5108, "step": 2336 }, { "epoch": 0.4046315333838329, "grad_norm": 1.3984375, "learning_rate": 1.9179175250664218e-05, "loss": 0.6294, "step": 2337 }, { "epoch": 0.40480467481874255, "grad_norm": 1.4296875, "learning_rate": 1.917845144423067e-05, "loss": 0.6944, "step": 2338 }, { "epoch": 0.4049778162536522, "grad_norm": 1.609375, "learning_rate": 1.9177727332480325e-05, "loss": 0.6757, "step": 2339 }, { "epoch": 0.40515095768856185, "grad_norm": 1.53125, "learning_rate": 1.9177002915437273e-05, "loss": 0.5929, "step": 2340 }, { "epoch": 0.4053240991234715, "grad_norm": 1.3203125, "learning_rate": 1.9176278193125607e-05, "loss": 0.6232, "step": 2341 }, { "epoch": 0.4054972405583811, "grad_norm": 1.4453125, "learning_rate": 1.9175553165569437e-05, "loss": 0.6286, "step": 2342 }, { "epoch": 0.40567038199329075, "grad_norm": 1.390625, "learning_rate": 1.9174827832792875e-05, "loss": 0.6541, "step": 2343 }, { "epoch": 0.4058435234282004, "grad_norm": 1.4921875, "learning_rate": 1.917410219482006e-05, "loss": 0.6587, "step": 2344 }, { "epoch": 0.40601666486311005, "grad_norm": 1.3203125, "learning_rate": 1.917337625167512e-05, "loss": 0.5674, "step": 2345 }, { "epoch": 0.4061898062980197, "grad_norm": 1.3828125, "learning_rate": 1.9172650003382206e-05, "loss": 0.6138, "step": 2346 }, { "epoch": 0.40636294773292936, "grad_norm": 1.359375, "learning_rate": 1.917192344996548e-05, "loss": 0.5584, "step": 2347 }, { "epoch": 0.40653608916783895, "grad_norm": 1.4140625, "learning_rate": 1.9171196591449107e-05, "loss": 0.5974, "step": 2348 }, { "epoch": 0.4067092306027486, "grad_norm": 1.3671875, "learning_rate": 1.9170469427857264e-05, "loss": 0.5618, "step": 2349 }, { "epoch": 0.40688237203765826, "grad_norm": 1.609375, "learning_rate": 1.9169741959214143e-05, "loss": 0.6201, "step": 2350 }, { "epoch": 0.4070555134725679, "grad_norm": 1.2734375, "learning_rate": 1.916901418554394e-05, "loss": 0.638, "step": 2351 }, { "epoch": 0.40722865490747756, "grad_norm": 1.3671875, "learning_rate": 1.916828610687087e-05, "loss": 0.5628, "step": 2352 }, { "epoch": 0.4074017963423872, "grad_norm": 1.375, "learning_rate": 1.9167557723219145e-05, "loss": 0.6244, "step": 2353 }, { "epoch": 0.4075749377772968, "grad_norm": 1.4765625, "learning_rate": 1.9166829034613e-05, "loss": 0.6644, "step": 2354 }, { "epoch": 0.40774807921220646, "grad_norm": 1.3828125, "learning_rate": 1.916610004107667e-05, "loss": 0.5728, "step": 2355 }, { "epoch": 0.4079212206471161, "grad_norm": 1.328125, "learning_rate": 1.9165370742634404e-05, "loss": 0.6701, "step": 2356 }, { "epoch": 0.40809436208202576, "grad_norm": 1.34375, "learning_rate": 1.916464113931047e-05, "loss": 0.5806, "step": 2357 }, { "epoch": 0.4082675035169354, "grad_norm": 1.3125, "learning_rate": 1.9163911231129126e-05, "loss": 0.5425, "step": 2358 }, { "epoch": 0.408440644951845, "grad_norm": 1.5, "learning_rate": 1.916318101811466e-05, "loss": 0.6367, "step": 2359 }, { "epoch": 0.40861378638675466, "grad_norm": 1.4296875, "learning_rate": 1.9162450500291362e-05, "loss": 0.5528, "step": 2360 }, { "epoch": 0.4087869278216643, "grad_norm": 1.4453125, "learning_rate": 1.916171967768353e-05, "loss": 0.6852, "step": 2361 }, { "epoch": 0.40896006925657397, "grad_norm": 1.421875, "learning_rate": 1.9160988550315475e-05, "loss": 0.691, "step": 2362 }, { "epoch": 0.4091332106914836, "grad_norm": 1.4375, "learning_rate": 1.9160257118211518e-05, "loss": 0.5861, "step": 2363 }, { "epoch": 0.40930635212639327, "grad_norm": 1.421875, "learning_rate": 1.915952538139599e-05, "loss": 0.5975, "step": 2364 }, { "epoch": 0.40947949356130287, "grad_norm": 1.4296875, "learning_rate": 1.915879333989323e-05, "loss": 0.5528, "step": 2365 }, { "epoch": 0.4096526349962125, "grad_norm": 1.375, "learning_rate": 1.915806099372759e-05, "loss": 0.6128, "step": 2366 }, { "epoch": 0.40982577643112217, "grad_norm": 1.3671875, "learning_rate": 1.915732834292343e-05, "loss": 0.6027, "step": 2367 }, { "epoch": 0.4099989178660318, "grad_norm": 1.265625, "learning_rate": 1.9156595387505127e-05, "loss": 0.5872, "step": 2368 }, { "epoch": 0.41017205930094147, "grad_norm": 1.375, "learning_rate": 1.915586212749706e-05, "loss": 0.5637, "step": 2369 }, { "epoch": 0.4103452007358511, "grad_norm": 1.3828125, "learning_rate": 1.915512856292361e-05, "loss": 0.5628, "step": 2370 }, { "epoch": 0.4105183421707607, "grad_norm": 1.3828125, "learning_rate": 1.9154394693809194e-05, "loss": 0.6337, "step": 2371 }, { "epoch": 0.41069148360567037, "grad_norm": 1.484375, "learning_rate": 1.9153660520178217e-05, "loss": 0.6409, "step": 2372 }, { "epoch": 0.41086462504058, "grad_norm": 1.3515625, "learning_rate": 1.9152926042055096e-05, "loss": 0.6515, "step": 2373 }, { "epoch": 0.4110377664754897, "grad_norm": 1.3046875, "learning_rate": 1.9152191259464276e-05, "loss": 0.6577, "step": 2374 }, { "epoch": 0.4112109079103993, "grad_norm": 1.46875, "learning_rate": 1.9151456172430186e-05, "loss": 0.6619, "step": 2375 }, { "epoch": 0.4113840493453089, "grad_norm": 1.4140625, "learning_rate": 1.9150720780977287e-05, "loss": 0.6016, "step": 2376 }, { "epoch": 0.4115571907802186, "grad_norm": 2.015625, "learning_rate": 1.9149985085130037e-05, "loss": 0.6161, "step": 2377 }, { "epoch": 0.4117303322151282, "grad_norm": 1.2265625, "learning_rate": 1.9149249084912905e-05, "loss": 0.5798, "step": 2378 }, { "epoch": 0.4119034736500379, "grad_norm": 1.390625, "learning_rate": 1.9148512780350384e-05, "loss": 0.5858, "step": 2379 }, { "epoch": 0.41207661508494753, "grad_norm": 1.2734375, "learning_rate": 1.914777617146696e-05, "loss": 0.5955, "step": 2380 }, { "epoch": 0.4122497565198572, "grad_norm": 1.328125, "learning_rate": 1.9147039258287137e-05, "loss": 0.5626, "step": 2381 }, { "epoch": 0.4124228979547668, "grad_norm": 1.2734375, "learning_rate": 1.9146302040835427e-05, "loss": 0.541, "step": 2382 }, { "epoch": 0.41259603938967643, "grad_norm": 1.34375, "learning_rate": 1.9145564519136356e-05, "loss": 0.5572, "step": 2383 }, { "epoch": 0.4127691808245861, "grad_norm": 1.3515625, "learning_rate": 1.9144826693214455e-05, "loss": 0.5691, "step": 2384 }, { "epoch": 0.41294232225949573, "grad_norm": 1.3203125, "learning_rate": 1.914408856309427e-05, "loss": 0.5896, "step": 2385 }, { "epoch": 0.4131154636944054, "grad_norm": 1.3671875, "learning_rate": 1.9143350128800352e-05, "loss": 0.6231, "step": 2386 }, { "epoch": 0.41328860512931503, "grad_norm": 1.4140625, "learning_rate": 1.9142611390357264e-05, "loss": 0.551, "step": 2387 }, { "epoch": 0.41346174656422463, "grad_norm": 1.46875, "learning_rate": 1.914187234778958e-05, "loss": 0.6008, "step": 2388 }, { "epoch": 0.4136348879991343, "grad_norm": 1.3359375, "learning_rate": 1.9141133001121887e-05, "loss": 0.5837, "step": 2389 }, { "epoch": 0.41380802943404393, "grad_norm": 1.5234375, "learning_rate": 1.9140393350378778e-05, "loss": 0.6376, "step": 2390 }, { "epoch": 0.4139811708689536, "grad_norm": 1.484375, "learning_rate": 1.9139653395584855e-05, "loss": 0.5786, "step": 2391 }, { "epoch": 0.41415431230386324, "grad_norm": 1.375, "learning_rate": 1.9138913136764734e-05, "loss": 0.6704, "step": 2392 }, { "epoch": 0.41432745373877283, "grad_norm": 1.5078125, "learning_rate": 1.9138172573943036e-05, "loss": 0.713, "step": 2393 }, { "epoch": 0.4145005951736825, "grad_norm": 1.375, "learning_rate": 1.91374317071444e-05, "loss": 0.5408, "step": 2394 }, { "epoch": 0.41467373660859214, "grad_norm": 1.4765625, "learning_rate": 1.9136690536393472e-05, "loss": 0.6339, "step": 2395 }, { "epoch": 0.4148468780435018, "grad_norm": 1.3515625, "learning_rate": 1.91359490617149e-05, "loss": 0.5952, "step": 2396 }, { "epoch": 0.41502001947841144, "grad_norm": 1.359375, "learning_rate": 1.913520728313335e-05, "loss": 0.6001, "step": 2397 }, { "epoch": 0.4151931609133211, "grad_norm": 1.296875, "learning_rate": 1.9134465200673505e-05, "loss": 0.5283, "step": 2398 }, { "epoch": 0.4153663023482307, "grad_norm": 1.359375, "learning_rate": 1.9133722814360043e-05, "loss": 0.6516, "step": 2399 }, { "epoch": 0.41553944378314034, "grad_norm": 1.3203125, "learning_rate": 1.913298012421766e-05, "loss": 0.5654, "step": 2400 }, { "epoch": 0.41571258521805, "grad_norm": 1.4453125, "learning_rate": 1.913223713027106e-05, "loss": 0.6265, "step": 2401 }, { "epoch": 0.41588572665295964, "grad_norm": 1.2890625, "learning_rate": 1.913149383254496e-05, "loss": 0.5857, "step": 2402 }, { "epoch": 0.4160588680878693, "grad_norm": 1.4921875, "learning_rate": 1.913075023106409e-05, "loss": 0.668, "step": 2403 }, { "epoch": 0.41623200952277895, "grad_norm": 1.4765625, "learning_rate": 1.9130006325853173e-05, "loss": 0.7193, "step": 2404 }, { "epoch": 0.41640515095768854, "grad_norm": 1.421875, "learning_rate": 1.912926211693697e-05, "loss": 0.6558, "step": 2405 }, { "epoch": 0.4165782923925982, "grad_norm": 1.34375, "learning_rate": 1.9128517604340225e-05, "loss": 0.5769, "step": 2406 }, { "epoch": 0.41675143382750784, "grad_norm": 1.4453125, "learning_rate": 1.912777278808771e-05, "loss": 0.6495, "step": 2407 }, { "epoch": 0.4169245752624175, "grad_norm": 1.5390625, "learning_rate": 1.9127027668204196e-05, "loss": 0.5885, "step": 2408 }, { "epoch": 0.41709771669732715, "grad_norm": 1.421875, "learning_rate": 1.9126282244714477e-05, "loss": 0.6201, "step": 2409 }, { "epoch": 0.41727085813223674, "grad_norm": 1.4375, "learning_rate": 1.9125536517643342e-05, "loss": 0.6488, "step": 2410 }, { "epoch": 0.4174439995671464, "grad_norm": 1.3515625, "learning_rate": 1.9124790487015603e-05, "loss": 0.6479, "step": 2411 }, { "epoch": 0.41761714100205605, "grad_norm": 1.375, "learning_rate": 1.9124044152856066e-05, "loss": 0.6229, "step": 2412 }, { "epoch": 0.4177902824369657, "grad_norm": 1.34375, "learning_rate": 1.9123297515189568e-05, "loss": 0.573, "step": 2413 }, { "epoch": 0.41796342387187535, "grad_norm": 1.4453125, "learning_rate": 1.9122550574040942e-05, "loss": 0.5738, "step": 2414 }, { "epoch": 0.418136565306785, "grad_norm": 1.359375, "learning_rate": 1.9121803329435035e-05, "loss": 0.6114, "step": 2415 }, { "epoch": 0.4183097067416946, "grad_norm": 1.4453125, "learning_rate": 1.9121055781396706e-05, "loss": 0.6792, "step": 2416 }, { "epoch": 0.41848284817660425, "grad_norm": 1.375, "learning_rate": 1.9120307929950815e-05, "loss": 0.6226, "step": 2417 }, { "epoch": 0.4186559896115139, "grad_norm": 1.328125, "learning_rate": 1.9119559775122246e-05, "loss": 0.6217, "step": 2418 }, { "epoch": 0.41882913104642355, "grad_norm": 1.3359375, "learning_rate": 1.911881131693588e-05, "loss": 0.62, "step": 2419 }, { "epoch": 0.4190022724813332, "grad_norm": 1.4921875, "learning_rate": 1.9118062555416615e-05, "loss": 0.6349, "step": 2420 }, { "epoch": 0.41917541391624286, "grad_norm": 1.3671875, "learning_rate": 1.9117313490589367e-05, "loss": 0.5963, "step": 2421 }, { "epoch": 0.41934855535115245, "grad_norm": 1.34375, "learning_rate": 1.9116564122479044e-05, "loss": 0.671, "step": 2422 }, { "epoch": 0.4195216967860621, "grad_norm": 1.3671875, "learning_rate": 1.9115814451110577e-05, "loss": 0.5633, "step": 2423 }, { "epoch": 0.41969483822097176, "grad_norm": 1.328125, "learning_rate": 1.91150644765089e-05, "loss": 0.6595, "step": 2424 }, { "epoch": 0.4198679796558814, "grad_norm": 1.3828125, "learning_rate": 1.9114314198698967e-05, "loss": 0.5498, "step": 2425 }, { "epoch": 0.42004112109079106, "grad_norm": 1.328125, "learning_rate": 1.9113563617705727e-05, "loss": 0.575, "step": 2426 }, { "epoch": 0.42021426252570065, "grad_norm": 1.390625, "learning_rate": 1.9112812733554155e-05, "loss": 0.6386, "step": 2427 }, { "epoch": 0.4203874039606103, "grad_norm": 1.296875, "learning_rate": 1.9112061546269226e-05, "loss": 0.5507, "step": 2428 }, { "epoch": 0.42056054539551996, "grad_norm": 1.4140625, "learning_rate": 1.911131005587593e-05, "loss": 0.6208, "step": 2429 }, { "epoch": 0.4207336868304296, "grad_norm": 1.46875, "learning_rate": 1.9110558262399262e-05, "loss": 0.5954, "step": 2430 }, { "epoch": 0.42090682826533926, "grad_norm": 1.3828125, "learning_rate": 1.9109806165864228e-05, "loss": 0.5979, "step": 2431 }, { "epoch": 0.4210799697002489, "grad_norm": 1.359375, "learning_rate": 1.910905376629585e-05, "loss": 0.6069, "step": 2432 }, { "epoch": 0.4212531111351585, "grad_norm": 1.3515625, "learning_rate": 1.910830106371916e-05, "loss": 0.6158, "step": 2433 }, { "epoch": 0.42142625257006816, "grad_norm": 1.3515625, "learning_rate": 1.9107548058159187e-05, "loss": 0.6318, "step": 2434 }, { "epoch": 0.4215993940049778, "grad_norm": 1.34375, "learning_rate": 1.9106794749640985e-05, "loss": 0.6703, "step": 2435 }, { "epoch": 0.42177253543988746, "grad_norm": 1.375, "learning_rate": 1.9106041138189616e-05, "loss": 0.5999, "step": 2436 }, { "epoch": 0.4219456768747971, "grad_norm": 1.5, "learning_rate": 1.910528722383014e-05, "loss": 0.6706, "step": 2437 }, { "epoch": 0.42211881830970677, "grad_norm": 2.21875, "learning_rate": 1.910453300658764e-05, "loss": 0.6423, "step": 2438 }, { "epoch": 0.42229195974461636, "grad_norm": 1.5390625, "learning_rate": 1.9103778486487205e-05, "loss": 0.6879, "step": 2439 }, { "epoch": 0.422465101179526, "grad_norm": 1.3046875, "learning_rate": 1.910302366355393e-05, "loss": 0.6052, "step": 2440 }, { "epoch": 0.42263824261443567, "grad_norm": 1.4609375, "learning_rate": 1.9102268537812932e-05, "loss": 0.6473, "step": 2441 }, { "epoch": 0.4228113840493453, "grad_norm": 1.328125, "learning_rate": 1.9101513109289326e-05, "loss": 0.5486, "step": 2442 }, { "epoch": 0.42298452548425497, "grad_norm": 1.359375, "learning_rate": 1.9100757378008233e-05, "loss": 0.5549, "step": 2443 }, { "epoch": 0.42315766691916457, "grad_norm": 1.375, "learning_rate": 1.9100001343994804e-05, "loss": 0.601, "step": 2444 }, { "epoch": 0.4233308083540742, "grad_norm": 1.28125, "learning_rate": 1.909924500727418e-05, "loss": 0.5338, "step": 2445 }, { "epoch": 0.42350394978898387, "grad_norm": 1.328125, "learning_rate": 1.9098488367871522e-05, "loss": 0.6152, "step": 2446 }, { "epoch": 0.4236770912238935, "grad_norm": 1.484375, "learning_rate": 1.9097731425812007e-05, "loss": 0.6209, "step": 2447 }, { "epoch": 0.42385023265880317, "grad_norm": 1.3515625, "learning_rate": 1.9096974181120804e-05, "loss": 0.5803, "step": 2448 }, { "epoch": 0.4240233740937128, "grad_norm": 1.4453125, "learning_rate": 1.9096216633823105e-05, "loss": 0.6109, "step": 2449 }, { "epoch": 0.4241965155286224, "grad_norm": 1.265625, "learning_rate": 1.9095458783944113e-05, "loss": 0.6004, "step": 2450 }, { "epoch": 0.42436965696353207, "grad_norm": 1.4765625, "learning_rate": 1.909470063150903e-05, "loss": 0.6791, "step": 2451 }, { "epoch": 0.4245427983984417, "grad_norm": 1.3671875, "learning_rate": 1.9093942176543084e-05, "loss": 0.5502, "step": 2452 }, { "epoch": 0.4247159398333514, "grad_norm": 1.375, "learning_rate": 1.90931834190715e-05, "loss": 0.602, "step": 2453 }, { "epoch": 0.424889081268261, "grad_norm": 1.359375, "learning_rate": 1.9092424359119523e-05, "loss": 0.6227, "step": 2454 }, { "epoch": 0.4250622227031707, "grad_norm": 1.296875, "learning_rate": 1.9091664996712397e-05, "loss": 0.6343, "step": 2455 }, { "epoch": 0.4252353641380803, "grad_norm": 1.3359375, "learning_rate": 1.9090905331875384e-05, "loss": 0.6036, "step": 2456 }, { "epoch": 0.4254085055729899, "grad_norm": 1.34375, "learning_rate": 1.909014536463375e-05, "loss": 0.5824, "step": 2457 }, { "epoch": 0.4255816470078996, "grad_norm": 1.546875, "learning_rate": 1.908938509501278e-05, "loss": 0.601, "step": 2458 }, { "epoch": 0.42575478844280923, "grad_norm": 1.3828125, "learning_rate": 1.9088624523037764e-05, "loss": 0.5938, "step": 2459 }, { "epoch": 0.4259279298777189, "grad_norm": 1.296875, "learning_rate": 1.9087863648734e-05, "loss": 0.6076, "step": 2460 }, { "epoch": 0.4261010713126285, "grad_norm": 1.46875, "learning_rate": 1.9087102472126794e-05, "loss": 0.5566, "step": 2461 }, { "epoch": 0.42627421274753813, "grad_norm": 1.2890625, "learning_rate": 1.908634099324148e-05, "loss": 0.6011, "step": 2462 }, { "epoch": 0.4264473541824478, "grad_norm": 1.453125, "learning_rate": 1.908557921210337e-05, "loss": 0.572, "step": 2463 }, { "epoch": 0.42662049561735743, "grad_norm": 1.40625, "learning_rate": 1.9084817128737817e-05, "loss": 0.6213, "step": 2464 }, { "epoch": 0.4267936370522671, "grad_norm": 1.34375, "learning_rate": 1.908405474317017e-05, "loss": 0.5974, "step": 2465 }, { "epoch": 0.42696677848717673, "grad_norm": 1.4296875, "learning_rate": 1.9083292055425783e-05, "loss": 0.6447, "step": 2466 }, { "epoch": 0.42713991992208633, "grad_norm": 1.375, "learning_rate": 1.9082529065530035e-05, "loss": 0.6233, "step": 2467 }, { "epoch": 0.427313061356996, "grad_norm": 1.4765625, "learning_rate": 1.9081765773508297e-05, "loss": 0.5792, "step": 2468 }, { "epoch": 0.42748620279190563, "grad_norm": 1.390625, "learning_rate": 1.9081002179385967e-05, "loss": 0.6279, "step": 2469 }, { "epoch": 0.4276593442268153, "grad_norm": 1.3359375, "learning_rate": 1.9080238283188445e-05, "loss": 0.653, "step": 2470 }, { "epoch": 0.42783248566172494, "grad_norm": 1.359375, "learning_rate": 1.9079474084941133e-05, "loss": 0.5858, "step": 2471 }, { "epoch": 0.4280056270966346, "grad_norm": 1.40625, "learning_rate": 1.9078709584669464e-05, "loss": 0.5257, "step": 2472 }, { "epoch": 0.4281787685315442, "grad_norm": 1.375, "learning_rate": 1.9077944782398864e-05, "loss": 0.5938, "step": 2473 }, { "epoch": 0.42835190996645384, "grad_norm": 1.453125, "learning_rate": 1.907717967815477e-05, "loss": 0.6103, "step": 2474 }, { "epoch": 0.4285250514013635, "grad_norm": 1.40625, "learning_rate": 1.907641427196264e-05, "loss": 0.5641, "step": 2475 }, { "epoch": 0.42869819283627314, "grad_norm": 1.3203125, "learning_rate": 1.9075648563847924e-05, "loss": 0.5984, "step": 2476 }, { "epoch": 0.4288713342711828, "grad_norm": 1.4765625, "learning_rate": 1.9074882553836104e-05, "loss": 0.5735, "step": 2477 }, { "epoch": 0.4290444757060924, "grad_norm": 1.6875, "learning_rate": 1.9074116241952655e-05, "loss": 0.5752, "step": 2478 }, { "epoch": 0.42921761714100204, "grad_norm": 1.40625, "learning_rate": 1.907334962822307e-05, "loss": 0.6642, "step": 2479 }, { "epoch": 0.4293907585759117, "grad_norm": 1.328125, "learning_rate": 1.907258271267285e-05, "loss": 0.6357, "step": 2480 }, { "epoch": 0.42956390001082134, "grad_norm": 1.2578125, "learning_rate": 1.9071815495327507e-05, "loss": 0.5661, "step": 2481 }, { "epoch": 0.429737041445731, "grad_norm": 1.3046875, "learning_rate": 1.907104797621256e-05, "loss": 0.6039, "step": 2482 }, { "epoch": 0.42991018288064065, "grad_norm": 1.328125, "learning_rate": 1.9070280155353538e-05, "loss": 0.5834, "step": 2483 }, { "epoch": 0.43008332431555024, "grad_norm": 1.71875, "learning_rate": 1.9069512032775988e-05, "loss": 0.6358, "step": 2484 }, { "epoch": 0.4302564657504599, "grad_norm": 1.53125, "learning_rate": 1.9068743608505454e-05, "loss": 0.6474, "step": 2485 }, { "epoch": 0.43042960718536954, "grad_norm": 1.3671875, "learning_rate": 1.9067974882567507e-05, "loss": 0.5988, "step": 2486 }, { "epoch": 0.4306027486202792, "grad_norm": 1.484375, "learning_rate": 1.906720585498771e-05, "loss": 0.5992, "step": 2487 }, { "epoch": 0.43077589005518885, "grad_norm": 1.3671875, "learning_rate": 1.906643652579165e-05, "loss": 0.6178, "step": 2488 }, { "epoch": 0.4309490314900985, "grad_norm": 1.4609375, "learning_rate": 1.9065666895004912e-05, "loss": 0.6343, "step": 2489 }, { "epoch": 0.4311221729250081, "grad_norm": 1.296875, "learning_rate": 1.9064896962653104e-05, "loss": 0.5869, "step": 2490 }, { "epoch": 0.43129531435991775, "grad_norm": 1.4140625, "learning_rate": 1.9064126728761832e-05, "loss": 0.5827, "step": 2491 }, { "epoch": 0.4314684557948274, "grad_norm": 1.484375, "learning_rate": 1.906335619335672e-05, "loss": 0.6095, "step": 2492 }, { "epoch": 0.43164159722973705, "grad_norm": 1.3515625, "learning_rate": 1.9062585356463402e-05, "loss": 0.5862, "step": 2493 }, { "epoch": 0.4318147386646467, "grad_norm": 1.3125, "learning_rate": 1.9061814218107514e-05, "loss": 0.6305, "step": 2494 }, { "epoch": 0.4319878800995563, "grad_norm": 1.625, "learning_rate": 1.9061042778314713e-05, "loss": 0.6836, "step": 2495 }, { "epoch": 0.43216102153446595, "grad_norm": 1.515625, "learning_rate": 1.9060271037110657e-05, "loss": 0.6653, "step": 2496 }, { "epoch": 0.4323341629693756, "grad_norm": 1.359375, "learning_rate": 1.9059498994521018e-05, "loss": 0.6213, "step": 2497 }, { "epoch": 0.43250730440428525, "grad_norm": 1.3359375, "learning_rate": 1.9058726650571477e-05, "loss": 0.6622, "step": 2498 }, { "epoch": 0.4326804458391949, "grad_norm": 1.3671875, "learning_rate": 1.905795400528773e-05, "loss": 0.6488, "step": 2499 }, { "epoch": 0.43285358727410456, "grad_norm": 1.484375, "learning_rate": 1.9057181058695472e-05, "loss": 0.6853, "step": 2500 }, { "epoch": 0.43302672870901415, "grad_norm": 1.2734375, "learning_rate": 1.9056407810820424e-05, "loss": 0.5957, "step": 2501 }, { "epoch": 0.4331998701439238, "grad_norm": 1.3046875, "learning_rate": 1.9055634261688295e-05, "loss": 0.5786, "step": 2502 }, { "epoch": 0.43337301157883346, "grad_norm": 1.453125, "learning_rate": 1.905486041132483e-05, "loss": 0.5908, "step": 2503 }, { "epoch": 0.4335461530137431, "grad_norm": 1.4375, "learning_rate": 1.9054086259755758e-05, "loss": 0.5724, "step": 2504 }, { "epoch": 0.43371929444865276, "grad_norm": 1.328125, "learning_rate": 1.9053311807006845e-05, "loss": 0.667, "step": 2505 }, { "epoch": 0.4338924358835624, "grad_norm": 1.5, "learning_rate": 1.9052537053103842e-05, "loss": 0.5985, "step": 2506 }, { "epoch": 0.434065577318472, "grad_norm": 1.40625, "learning_rate": 1.905176199807252e-05, "loss": 0.6825, "step": 2507 }, { "epoch": 0.43423871875338166, "grad_norm": 1.3515625, "learning_rate": 1.905098664193867e-05, "loss": 0.5602, "step": 2508 }, { "epoch": 0.4344118601882913, "grad_norm": 1.5234375, "learning_rate": 1.9050210984728073e-05, "loss": 0.6727, "step": 2509 }, { "epoch": 0.43458500162320096, "grad_norm": 1.3203125, "learning_rate": 1.904943502646654e-05, "loss": 0.5975, "step": 2510 }, { "epoch": 0.4347581430581106, "grad_norm": 1.375, "learning_rate": 1.9048658767179883e-05, "loss": 0.5876, "step": 2511 }, { "epoch": 0.4349312844930202, "grad_norm": 1.5390625, "learning_rate": 1.904788220689391e-05, "loss": 0.5932, "step": 2512 }, { "epoch": 0.43510442592792986, "grad_norm": 1.6640625, "learning_rate": 1.9047105345634477e-05, "loss": 0.7631, "step": 2513 }, { "epoch": 0.4352775673628395, "grad_norm": 1.359375, "learning_rate": 1.90463281834274e-05, "loss": 0.649, "step": 2514 }, { "epoch": 0.43545070879774916, "grad_norm": 1.5546875, "learning_rate": 1.904555072029855e-05, "loss": 0.7724, "step": 2515 }, { "epoch": 0.4356238502326588, "grad_norm": 1.34375, "learning_rate": 1.9044772956273778e-05, "loss": 0.6563, "step": 2516 }, { "epoch": 0.43579699166756847, "grad_norm": 1.421875, "learning_rate": 1.9043994891378964e-05, "loss": 0.7933, "step": 2517 }, { "epoch": 0.43597013310247806, "grad_norm": 1.375, "learning_rate": 1.904321652563998e-05, "loss": 0.6181, "step": 2518 }, { "epoch": 0.4361432745373877, "grad_norm": 1.3359375, "learning_rate": 1.904243785908273e-05, "loss": 0.619, "step": 2519 }, { "epoch": 0.43631641597229737, "grad_norm": 1.3125, "learning_rate": 1.9041658891733105e-05, "loss": 0.5534, "step": 2520 }, { "epoch": 0.436489557407207, "grad_norm": 1.3828125, "learning_rate": 1.9040879623617025e-05, "loss": 0.6232, "step": 2521 }, { "epoch": 0.43666269884211667, "grad_norm": 1.359375, "learning_rate": 1.9040100054760406e-05, "loss": 0.6145, "step": 2522 }, { "epoch": 0.4368358402770263, "grad_norm": 1.4609375, "learning_rate": 1.9039320185189182e-05, "loss": 0.5952, "step": 2523 }, { "epoch": 0.4370089817119359, "grad_norm": 1.5390625, "learning_rate": 1.9038540014929298e-05, "loss": 0.6193, "step": 2524 }, { "epoch": 0.43718212314684557, "grad_norm": 1.4765625, "learning_rate": 1.9037759544006704e-05, "loss": 0.6303, "step": 2525 }, { "epoch": 0.4373552645817552, "grad_norm": 1.265625, "learning_rate": 1.9036978772447364e-05, "loss": 0.5669, "step": 2526 }, { "epoch": 0.4375284060166649, "grad_norm": 1.515625, "learning_rate": 1.9036197700277243e-05, "loss": 0.6644, "step": 2527 }, { "epoch": 0.4377015474515745, "grad_norm": 1.4765625, "learning_rate": 1.9035416327522326e-05, "loss": 0.6202, "step": 2528 }, { "epoch": 0.4378746888864841, "grad_norm": 1.3515625, "learning_rate": 1.9034634654208613e-05, "loss": 0.5947, "step": 2529 }, { "epoch": 0.43804783032139377, "grad_norm": 1.46875, "learning_rate": 1.9033852680362094e-05, "loss": 0.693, "step": 2530 }, { "epoch": 0.4382209717563034, "grad_norm": 1.328125, "learning_rate": 1.903307040600879e-05, "loss": 0.5456, "step": 2531 }, { "epoch": 0.4383941131912131, "grad_norm": 1.5, "learning_rate": 1.903228783117472e-05, "loss": 0.5619, "step": 2532 }, { "epoch": 0.4385672546261227, "grad_norm": 1.4765625, "learning_rate": 1.9031504955885917e-05, "loss": 0.5671, "step": 2533 }, { "epoch": 0.4387403960610324, "grad_norm": 1.375, "learning_rate": 1.903072178016842e-05, "loss": 0.645, "step": 2534 }, { "epoch": 0.438913537495942, "grad_norm": 1.390625, "learning_rate": 1.902993830404828e-05, "loss": 0.676, "step": 2535 }, { "epoch": 0.4390866789308516, "grad_norm": 1.3515625, "learning_rate": 1.9029154527551562e-05, "loss": 0.564, "step": 2536 }, { "epoch": 0.4392598203657613, "grad_norm": 1.3828125, "learning_rate": 1.9028370450704338e-05, "loss": 0.5568, "step": 2537 }, { "epoch": 0.43943296180067093, "grad_norm": 1.4921875, "learning_rate": 1.902758607353269e-05, "loss": 0.6134, "step": 2538 }, { "epoch": 0.4396061032355806, "grad_norm": 1.484375, "learning_rate": 1.9026801396062708e-05, "loss": 0.6152, "step": 2539 }, { "epoch": 0.43977924467049023, "grad_norm": 1.4140625, "learning_rate": 1.9026016418320497e-05, "loss": 0.5973, "step": 2540 }, { "epoch": 0.43995238610539983, "grad_norm": 1.3671875, "learning_rate": 1.9025231140332167e-05, "loss": 0.6451, "step": 2541 }, { "epoch": 0.4401255275403095, "grad_norm": 1.625, "learning_rate": 1.9024445562123837e-05, "loss": 0.5759, "step": 2542 }, { "epoch": 0.44029866897521913, "grad_norm": 1.2578125, "learning_rate": 1.9023659683721648e-05, "loss": 0.5885, "step": 2543 }, { "epoch": 0.4404718104101288, "grad_norm": 1.2265625, "learning_rate": 1.902287350515173e-05, "loss": 0.6181, "step": 2544 }, { "epoch": 0.44064495184503844, "grad_norm": 1.3203125, "learning_rate": 1.9022087026440245e-05, "loss": 0.5485, "step": 2545 }, { "epoch": 0.44081809327994803, "grad_norm": 1.28125, "learning_rate": 1.902130024761335e-05, "loss": 0.5808, "step": 2546 }, { "epoch": 0.4409912347148577, "grad_norm": 1.328125, "learning_rate": 1.9020513168697214e-05, "loss": 0.5879, "step": 2547 }, { "epoch": 0.44116437614976733, "grad_norm": 1.2578125, "learning_rate": 1.9019725789718024e-05, "loss": 0.585, "step": 2548 }, { "epoch": 0.441337517584677, "grad_norm": 1.3984375, "learning_rate": 1.901893811070197e-05, "loss": 0.6239, "step": 2549 }, { "epoch": 0.44151065901958664, "grad_norm": 1.3359375, "learning_rate": 1.9018150131675253e-05, "loss": 0.5993, "step": 2550 }, { "epoch": 0.4416838004544963, "grad_norm": 1.40625, "learning_rate": 1.9017361852664085e-05, "loss": 0.5819, "step": 2551 }, { "epoch": 0.4418569418894059, "grad_norm": 1.5625, "learning_rate": 1.9016573273694688e-05, "loss": 0.6095, "step": 2552 }, { "epoch": 0.44203008332431554, "grad_norm": 1.328125, "learning_rate": 1.9015784394793296e-05, "loss": 0.6354, "step": 2553 }, { "epoch": 0.4422032247592252, "grad_norm": 1.3515625, "learning_rate": 1.9014995215986147e-05, "loss": 0.5933, "step": 2554 }, { "epoch": 0.44237636619413484, "grad_norm": 1.375, "learning_rate": 1.9014205737299492e-05, "loss": 0.6083, "step": 2555 }, { "epoch": 0.4425495076290445, "grad_norm": 1.421875, "learning_rate": 1.90134159587596e-05, "loss": 0.6475, "step": 2556 }, { "epoch": 0.44272264906395414, "grad_norm": 1.3359375, "learning_rate": 1.9012625880392733e-05, "loss": 0.5504, "step": 2557 }, { "epoch": 0.44289579049886374, "grad_norm": 1.4140625, "learning_rate": 1.901183550222518e-05, "loss": 0.6038, "step": 2558 }, { "epoch": 0.4430689319337734, "grad_norm": 1.3671875, "learning_rate": 1.901104482428323e-05, "loss": 0.5806, "step": 2559 }, { "epoch": 0.44324207336868304, "grad_norm": 1.515625, "learning_rate": 1.901025384659318e-05, "loss": 0.5901, "step": 2560 }, { "epoch": 0.4434152148035927, "grad_norm": 1.4296875, "learning_rate": 1.9009462569181347e-05, "loss": 0.597, "step": 2561 }, { "epoch": 0.44358835623850235, "grad_norm": 1.40625, "learning_rate": 1.9008670992074056e-05, "loss": 0.5689, "step": 2562 }, { "epoch": 0.44376149767341194, "grad_norm": 1.5, "learning_rate": 1.9007879115297627e-05, "loss": 0.5933, "step": 2563 }, { "epoch": 0.4439346391083216, "grad_norm": 1.390625, "learning_rate": 1.9007086938878408e-05, "loss": 0.5847, "step": 2564 }, { "epoch": 0.44410778054323125, "grad_norm": 1.3828125, "learning_rate": 1.9006294462842755e-05, "loss": 0.6309, "step": 2565 }, { "epoch": 0.4442809219781409, "grad_norm": 1.40625, "learning_rate": 1.900550168721702e-05, "loss": 0.5689, "step": 2566 }, { "epoch": 0.44445406341305055, "grad_norm": 1.3359375, "learning_rate": 1.900470861202758e-05, "loss": 0.6237, "step": 2567 }, { "epoch": 0.4446272048479602, "grad_norm": 1.3515625, "learning_rate": 1.9003915237300817e-05, "loss": 0.5404, "step": 2568 }, { "epoch": 0.4448003462828698, "grad_norm": 1.40625, "learning_rate": 1.900312156306312e-05, "loss": 0.5825, "step": 2569 }, { "epoch": 0.44497348771777945, "grad_norm": 1.328125, "learning_rate": 1.900232758934089e-05, "loss": 0.6349, "step": 2570 }, { "epoch": 0.4451466291526891, "grad_norm": 1.4765625, "learning_rate": 1.9001533316160538e-05, "loss": 0.6345, "step": 2571 }, { "epoch": 0.44531977058759875, "grad_norm": 1.5, "learning_rate": 1.9000738743548486e-05, "loss": 0.6141, "step": 2572 }, { "epoch": 0.4454929120225084, "grad_norm": 1.5234375, "learning_rate": 1.8999943871531165e-05, "loss": 0.5618, "step": 2573 }, { "epoch": 0.44566605345741805, "grad_norm": 1.2734375, "learning_rate": 1.8999148700135015e-05, "loss": 0.553, "step": 2574 }, { "epoch": 0.44583919489232765, "grad_norm": 1.4296875, "learning_rate": 1.899835322938649e-05, "loss": 0.7046, "step": 2575 }, { "epoch": 0.4460123363272373, "grad_norm": 1.375, "learning_rate": 1.8997557459312047e-05, "loss": 0.5183, "step": 2576 }, { "epoch": 0.44618547776214695, "grad_norm": 1.4296875, "learning_rate": 1.899676138993816e-05, "loss": 0.5809, "step": 2577 }, { "epoch": 0.4463586191970566, "grad_norm": 1.3671875, "learning_rate": 1.8995965021291308e-05, "loss": 0.5401, "step": 2578 }, { "epoch": 0.44653176063196626, "grad_norm": 1.34375, "learning_rate": 1.8995168353397985e-05, "loss": 0.5953, "step": 2579 }, { "epoch": 0.44670490206687585, "grad_norm": 1.28125, "learning_rate": 1.8994371386284686e-05, "loss": 0.5719, "step": 2580 }, { "epoch": 0.4468780435017855, "grad_norm": 1.3359375, "learning_rate": 1.8993574119977927e-05, "loss": 0.6277, "step": 2581 }, { "epoch": 0.44705118493669516, "grad_norm": 1.3984375, "learning_rate": 1.8992776554504223e-05, "loss": 0.6518, "step": 2582 }, { "epoch": 0.4472243263716048, "grad_norm": 1.3828125, "learning_rate": 1.899197868989011e-05, "loss": 0.6692, "step": 2583 }, { "epoch": 0.44739746780651446, "grad_norm": 1.3984375, "learning_rate": 1.899118052616213e-05, "loss": 0.5554, "step": 2584 }, { "epoch": 0.4475706092414241, "grad_norm": 1.421875, "learning_rate": 1.8990382063346827e-05, "loss": 0.6719, "step": 2585 }, { "epoch": 0.4477437506763337, "grad_norm": 1.3984375, "learning_rate": 1.8989583301470768e-05, "loss": 0.6748, "step": 2586 }, { "epoch": 0.44791689211124336, "grad_norm": 1.34375, "learning_rate": 1.898878424056052e-05, "loss": 0.6279, "step": 2587 }, { "epoch": 0.448090033546153, "grad_norm": 1.4296875, "learning_rate": 1.8987984880642664e-05, "loss": 0.6993, "step": 2588 }, { "epoch": 0.44826317498106266, "grad_norm": 1.375, "learning_rate": 1.8987185221743785e-05, "loss": 0.6572, "step": 2589 }, { "epoch": 0.4484363164159723, "grad_norm": 1.359375, "learning_rate": 1.8986385263890493e-05, "loss": 0.5917, "step": 2590 }, { "epoch": 0.44860945785088197, "grad_norm": 1.34375, "learning_rate": 1.898558500710939e-05, "loss": 0.5529, "step": 2591 }, { "epoch": 0.44878259928579156, "grad_norm": 1.3359375, "learning_rate": 1.89847844514271e-05, "loss": 0.6309, "step": 2592 }, { "epoch": 0.4489557407207012, "grad_norm": 1.3203125, "learning_rate": 1.898398359687026e-05, "loss": 0.5657, "step": 2593 }, { "epoch": 0.44912888215561086, "grad_norm": 1.375, "learning_rate": 1.8983182443465494e-05, "loss": 0.5647, "step": 2594 }, { "epoch": 0.4493020235905205, "grad_norm": 1.3984375, "learning_rate": 1.8982380991239468e-05, "loss": 0.5951, "step": 2595 }, { "epoch": 0.44947516502543017, "grad_norm": 1.328125, "learning_rate": 1.898157924021883e-05, "loss": 0.603, "step": 2596 }, { "epoch": 0.44964830646033976, "grad_norm": 1.3046875, "learning_rate": 1.8980777190430256e-05, "loss": 0.6115, "step": 2597 }, { "epoch": 0.4498214478952494, "grad_norm": 1.296875, "learning_rate": 1.8979974841900422e-05, "loss": 0.5772, "step": 2598 }, { "epoch": 0.44999458933015907, "grad_norm": 1.4609375, "learning_rate": 1.8979172194656025e-05, "loss": 0.5351, "step": 2599 }, { "epoch": 0.4501677307650687, "grad_norm": 1.4375, "learning_rate": 1.8978369248723757e-05, "loss": 0.6329, "step": 2600 }, { "epoch": 0.45034087219997837, "grad_norm": 1.359375, "learning_rate": 1.897756600413033e-05, "loss": 0.5646, "step": 2601 }, { "epoch": 0.450514013634888, "grad_norm": 1.390625, "learning_rate": 1.8976762460902465e-05, "loss": 0.5363, "step": 2602 }, { "epoch": 0.4506871550697976, "grad_norm": 1.3671875, "learning_rate": 1.897595861906689e-05, "loss": 0.6192, "step": 2603 }, { "epoch": 0.45086029650470727, "grad_norm": 1.40625, "learning_rate": 1.8975154478650345e-05, "loss": 0.6125, "step": 2604 }, { "epoch": 0.4510334379396169, "grad_norm": 1.4140625, "learning_rate": 1.897435003967958e-05, "loss": 0.5713, "step": 2605 }, { "epoch": 0.4512065793745266, "grad_norm": 1.6015625, "learning_rate": 1.8973545302181352e-05, "loss": 0.7418, "step": 2606 }, { "epoch": 0.4513797208094362, "grad_norm": 1.359375, "learning_rate": 1.8972740266182433e-05, "loss": 0.5589, "step": 2607 }, { "epoch": 0.4515528622443459, "grad_norm": 1.4453125, "learning_rate": 1.8971934931709598e-05, "loss": 0.5959, "step": 2608 }, { "epoch": 0.4517260036792555, "grad_norm": 1.3203125, "learning_rate": 1.8971129298789644e-05, "loss": 0.6285, "step": 2609 }, { "epoch": 0.4518991451141651, "grad_norm": 1.359375, "learning_rate": 1.897032336744936e-05, "loss": 0.598, "step": 2610 }, { "epoch": 0.4520722865490748, "grad_norm": 1.28125, "learning_rate": 1.896951713771556e-05, "loss": 0.5458, "step": 2611 }, { "epoch": 0.4522454279839844, "grad_norm": 1.4609375, "learning_rate": 1.8968710609615068e-05, "loss": 0.6417, "step": 2612 }, { "epoch": 0.4524185694188941, "grad_norm": 1.4140625, "learning_rate": 1.8967903783174706e-05, "loss": 0.5943, "step": 2613 }, { "epoch": 0.4525917108538037, "grad_norm": 1.421875, "learning_rate": 1.896709665842131e-05, "loss": 0.6352, "step": 2614 }, { "epoch": 0.4527648522887133, "grad_norm": 1.4453125, "learning_rate": 1.8966289235381737e-05, "loss": 0.6665, "step": 2615 }, { "epoch": 0.452937993723623, "grad_norm": 1.3203125, "learning_rate": 1.896548151408284e-05, "loss": 0.5159, "step": 2616 }, { "epoch": 0.45311113515853263, "grad_norm": 1.2734375, "learning_rate": 1.8964673494551487e-05, "loss": 0.601, "step": 2617 }, { "epoch": 0.4532842765934423, "grad_norm": 1.40625, "learning_rate": 1.8963865176814557e-05, "loss": 0.6268, "step": 2618 }, { "epoch": 0.45345741802835193, "grad_norm": 1.4375, "learning_rate": 1.8963056560898944e-05, "loss": 0.6431, "step": 2619 }, { "epoch": 0.45363055946326153, "grad_norm": 1.3671875, "learning_rate": 1.896224764683154e-05, "loss": 0.6092, "step": 2620 }, { "epoch": 0.4538037008981712, "grad_norm": 1.3984375, "learning_rate": 1.8961438434639255e-05, "loss": 0.6126, "step": 2621 }, { "epoch": 0.45397684233308083, "grad_norm": 1.2734375, "learning_rate": 1.8960628924349006e-05, "loss": 0.5988, "step": 2622 }, { "epoch": 0.4541499837679905, "grad_norm": 1.4140625, "learning_rate": 1.895981911598772e-05, "loss": 0.5742, "step": 2623 }, { "epoch": 0.45432312520290014, "grad_norm": 1.28125, "learning_rate": 1.8959009009582342e-05, "loss": 0.6012, "step": 2624 }, { "epoch": 0.4544962666378098, "grad_norm": 1.3984375, "learning_rate": 1.8958198605159808e-05, "loss": 0.5986, "step": 2625 }, { "epoch": 0.4546694080727194, "grad_norm": 1.421875, "learning_rate": 1.8957387902747085e-05, "loss": 0.6261, "step": 2626 }, { "epoch": 0.45484254950762903, "grad_norm": 1.4296875, "learning_rate": 1.895657690237114e-05, "loss": 0.5761, "step": 2627 }, { "epoch": 0.4550156909425387, "grad_norm": 1.4375, "learning_rate": 1.8955765604058948e-05, "loss": 0.5538, "step": 2628 }, { "epoch": 0.45518883237744834, "grad_norm": 1.4140625, "learning_rate": 1.8954954007837492e-05, "loss": 0.5283, "step": 2629 }, { "epoch": 0.455361973812358, "grad_norm": 1.359375, "learning_rate": 1.895414211373378e-05, "loss": 0.605, "step": 2630 }, { "epoch": 0.4555351152472676, "grad_norm": 1.2890625, "learning_rate": 1.895332992177481e-05, "loss": 0.619, "step": 2631 }, { "epoch": 0.45570825668217724, "grad_norm": 1.3515625, "learning_rate": 1.8952517431987604e-05, "loss": 0.6027, "step": 2632 }, { "epoch": 0.4558813981170869, "grad_norm": 1.46875, "learning_rate": 1.8951704644399188e-05, "loss": 0.6624, "step": 2633 }, { "epoch": 0.45605453955199654, "grad_norm": 1.375, "learning_rate": 1.89508915590366e-05, "loss": 0.5717, "step": 2634 }, { "epoch": 0.4562276809869062, "grad_norm": 1.4609375, "learning_rate": 1.8950078175926886e-05, "loss": 0.5634, "step": 2635 }, { "epoch": 0.45640082242181584, "grad_norm": 1.34375, "learning_rate": 1.89492644950971e-05, "loss": 0.5737, "step": 2636 }, { "epoch": 0.45657396385672544, "grad_norm": 1.3671875, "learning_rate": 1.894845051657431e-05, "loss": 0.6211, "step": 2637 }, { "epoch": 0.4567471052916351, "grad_norm": 1.34375, "learning_rate": 1.89476362403856e-05, "loss": 0.6333, "step": 2638 }, { "epoch": 0.45692024672654474, "grad_norm": 1.53125, "learning_rate": 1.8946821666558043e-05, "loss": 0.6033, "step": 2639 }, { "epoch": 0.4570933881614544, "grad_norm": 1.4765625, "learning_rate": 1.8946006795118746e-05, "loss": 0.5837, "step": 2640 }, { "epoch": 0.45726652959636405, "grad_norm": 1.484375, "learning_rate": 1.8945191626094813e-05, "loss": 0.6752, "step": 2641 }, { "epoch": 0.4574396710312737, "grad_norm": 1.3984375, "learning_rate": 1.894437615951336e-05, "loss": 0.6024, "step": 2642 }, { "epoch": 0.4576128124661833, "grad_norm": 1.8671875, "learning_rate": 1.8943560395401505e-05, "loss": 0.6604, "step": 2643 }, { "epoch": 0.45778595390109295, "grad_norm": 1.578125, "learning_rate": 1.89427443337864e-05, "loss": 0.5795, "step": 2644 }, { "epoch": 0.4579590953360026, "grad_norm": 1.5390625, "learning_rate": 1.8941927974695174e-05, "loss": 0.6512, "step": 2645 }, { "epoch": 0.45813223677091225, "grad_norm": 1.4453125, "learning_rate": 1.8941111318154998e-05, "loss": 0.6061, "step": 2646 }, { "epoch": 0.4583053782058219, "grad_norm": 1.4921875, "learning_rate": 1.894029436419303e-05, "loss": 0.5986, "step": 2647 }, { "epoch": 0.4584785196407315, "grad_norm": 1.2421875, "learning_rate": 1.8939477112836445e-05, "loss": 0.5581, "step": 2648 }, { "epoch": 0.45865166107564115, "grad_norm": 1.5859375, "learning_rate": 1.8938659564112427e-05, "loss": 0.644, "step": 2649 }, { "epoch": 0.4588248025105508, "grad_norm": 1.2265625, "learning_rate": 1.893784171804818e-05, "loss": 0.5362, "step": 2650 }, { "epoch": 0.45899794394546045, "grad_norm": 1.4609375, "learning_rate": 1.8937023574670897e-05, "loss": 0.5886, "step": 2651 }, { "epoch": 0.4591710853803701, "grad_norm": 1.53125, "learning_rate": 1.8936205134007802e-05, "loss": 0.6879, "step": 2652 }, { "epoch": 0.45934422681527975, "grad_norm": 1.3671875, "learning_rate": 1.8935386396086118e-05, "loss": 0.5358, "step": 2653 }, { "epoch": 0.45951736825018935, "grad_norm": 1.3984375, "learning_rate": 1.8934567360933078e-05, "loss": 0.5411, "step": 2654 }, { "epoch": 0.459690509685099, "grad_norm": 1.359375, "learning_rate": 1.893374802857593e-05, "loss": 0.7055, "step": 2655 }, { "epoch": 0.45986365112000865, "grad_norm": 1.390625, "learning_rate": 1.8932928399041927e-05, "loss": 0.6658, "step": 2656 }, { "epoch": 0.4600367925549183, "grad_norm": 1.34375, "learning_rate": 1.8932108472358335e-05, "loss": 0.6502, "step": 2657 }, { "epoch": 0.46020993398982796, "grad_norm": 1.3359375, "learning_rate": 1.8931288248552423e-05, "loss": 0.577, "step": 2658 }, { "epoch": 0.4603830754247376, "grad_norm": 1.390625, "learning_rate": 1.8930467727651484e-05, "loss": 0.617, "step": 2659 }, { "epoch": 0.4605562168596472, "grad_norm": 1.25, "learning_rate": 1.8929646909682802e-05, "loss": 0.5482, "step": 2660 }, { "epoch": 0.46072935829455686, "grad_norm": 1.4296875, "learning_rate": 1.892882579467369e-05, "loss": 0.586, "step": 2661 }, { "epoch": 0.4609024997294665, "grad_norm": 1.28125, "learning_rate": 1.892800438265146e-05, "loss": 0.5474, "step": 2662 }, { "epoch": 0.46107564116437616, "grad_norm": 1.3203125, "learning_rate": 1.8927182673643434e-05, "loss": 0.6593, "step": 2663 }, { "epoch": 0.4612487825992858, "grad_norm": 1.5390625, "learning_rate": 1.8926360667676946e-05, "loss": 0.5678, "step": 2664 }, { "epoch": 0.4614219240341954, "grad_norm": 1.3984375, "learning_rate": 1.892553836477934e-05, "loss": 0.6734, "step": 2665 }, { "epoch": 0.46159506546910506, "grad_norm": 1.359375, "learning_rate": 1.892471576497797e-05, "loss": 0.5692, "step": 2666 }, { "epoch": 0.4617682069040147, "grad_norm": 1.4453125, "learning_rate": 1.8923892868300198e-05, "loss": 0.5863, "step": 2667 }, { "epoch": 0.46194134833892436, "grad_norm": 1.3359375, "learning_rate": 1.89230696747734e-05, "loss": 0.6284, "step": 2668 }, { "epoch": 0.462114489773834, "grad_norm": 1.2890625, "learning_rate": 1.8922246184424954e-05, "loss": 0.5913, "step": 2669 }, { "epoch": 0.46228763120874367, "grad_norm": 1.359375, "learning_rate": 1.8921422397282256e-05, "loss": 0.5736, "step": 2670 }, { "epoch": 0.46246077264365326, "grad_norm": 1.2734375, "learning_rate": 1.8920598313372715e-05, "loss": 0.571, "step": 2671 }, { "epoch": 0.4626339140785629, "grad_norm": 1.28125, "learning_rate": 1.8919773932723734e-05, "loss": 0.5664, "step": 2672 }, { "epoch": 0.46280705551347256, "grad_norm": 1.28125, "learning_rate": 1.8918949255362738e-05, "loss": 0.6233, "step": 2673 }, { "epoch": 0.4629801969483822, "grad_norm": 1.390625, "learning_rate": 1.8918124281317162e-05, "loss": 0.585, "step": 2674 }, { "epoch": 0.46315333838329187, "grad_norm": 1.421875, "learning_rate": 1.891729901061445e-05, "loss": 0.5614, "step": 2675 }, { "epoch": 0.4633264798182015, "grad_norm": 1.375, "learning_rate": 1.8916473443282052e-05, "loss": 0.6032, "step": 2676 }, { "epoch": 0.4634996212531111, "grad_norm": 1.4453125, "learning_rate": 1.8915647579347426e-05, "loss": 0.6165, "step": 2677 }, { "epoch": 0.46367276268802077, "grad_norm": 1.578125, "learning_rate": 1.891482141883805e-05, "loss": 0.5654, "step": 2678 }, { "epoch": 0.4638459041229304, "grad_norm": 1.4921875, "learning_rate": 1.8913994961781404e-05, "loss": 0.624, "step": 2679 }, { "epoch": 0.46401904555784007, "grad_norm": 1.328125, "learning_rate": 1.8913168208204977e-05, "loss": 0.6382, "step": 2680 }, { "epoch": 0.4641921869927497, "grad_norm": 1.375, "learning_rate": 1.8912341158136275e-05, "loss": 0.6266, "step": 2681 }, { "epoch": 0.4643653284276593, "grad_norm": 1.4609375, "learning_rate": 1.8911513811602805e-05, "loss": 0.6104, "step": 2682 }, { "epoch": 0.46453846986256897, "grad_norm": 1.328125, "learning_rate": 1.8910686168632096e-05, "loss": 0.5727, "step": 2683 }, { "epoch": 0.4647116112974786, "grad_norm": 1.328125, "learning_rate": 1.890985822925167e-05, "loss": 0.6247, "step": 2684 }, { "epoch": 0.4648847527323883, "grad_norm": 1.3828125, "learning_rate": 1.890902999348907e-05, "loss": 0.6191, "step": 2685 }, { "epoch": 0.4650578941672979, "grad_norm": 1.3203125, "learning_rate": 1.8908201461371852e-05, "loss": 0.5778, "step": 2686 }, { "epoch": 0.4652310356022076, "grad_norm": 1.3515625, "learning_rate": 1.8907372632927573e-05, "loss": 0.6481, "step": 2687 }, { "epoch": 0.4654041770371172, "grad_norm": 1.2578125, "learning_rate": 1.89065435081838e-05, "loss": 0.6174, "step": 2688 }, { "epoch": 0.4655773184720268, "grad_norm": 1.4140625, "learning_rate": 1.8905714087168123e-05, "loss": 0.5458, "step": 2689 }, { "epoch": 0.4657504599069365, "grad_norm": 1.296875, "learning_rate": 1.890488436990812e-05, "loss": 0.5618, "step": 2690 }, { "epoch": 0.4659236013418461, "grad_norm": 1.3671875, "learning_rate": 1.89040543564314e-05, "loss": 0.5737, "step": 2691 }, { "epoch": 0.4660967427767558, "grad_norm": 1.359375, "learning_rate": 1.8903224046765575e-05, "loss": 0.5566, "step": 2692 }, { "epoch": 0.46626988421166543, "grad_norm": 1.65625, "learning_rate": 1.890239344093826e-05, "loss": 0.76, "step": 2693 }, { "epoch": 0.466443025646575, "grad_norm": 1.390625, "learning_rate": 1.8901562538977085e-05, "loss": 0.6091, "step": 2694 }, { "epoch": 0.4666161670814847, "grad_norm": 1.375, "learning_rate": 1.8900731340909685e-05, "loss": 0.627, "step": 2695 }, { "epoch": 0.46678930851639433, "grad_norm": 1.390625, "learning_rate": 1.889989984676372e-05, "loss": 0.6089, "step": 2696 }, { "epoch": 0.466962449951304, "grad_norm": 1.40625, "learning_rate": 1.889906805656684e-05, "loss": 0.6275, "step": 2697 }, { "epoch": 0.46713559138621363, "grad_norm": 1.359375, "learning_rate": 1.8898235970346718e-05, "loss": 0.5565, "step": 2698 }, { "epoch": 0.46730873282112323, "grad_norm": 1.4609375, "learning_rate": 1.8897403588131033e-05, "loss": 0.5819, "step": 2699 }, { "epoch": 0.4674818742560329, "grad_norm": 1.3828125, "learning_rate": 1.8896570909947477e-05, "loss": 0.5838, "step": 2700 }, { "epoch": 0.46765501569094253, "grad_norm": 1.5234375, "learning_rate": 1.889573793582374e-05, "loss": 0.6441, "step": 2701 }, { "epoch": 0.4678281571258522, "grad_norm": 1.46875, "learning_rate": 1.8894904665787536e-05, "loss": 0.6205, "step": 2702 }, { "epoch": 0.46800129856076184, "grad_norm": 1.28125, "learning_rate": 1.8894071099866584e-05, "loss": 0.5487, "step": 2703 }, { "epoch": 0.4681744399956715, "grad_norm": 1.3515625, "learning_rate": 1.889323723808861e-05, "loss": 0.6055, "step": 2704 }, { "epoch": 0.4683475814305811, "grad_norm": 1.4609375, "learning_rate": 1.8892403080481358e-05, "loss": 0.6642, "step": 2705 }, { "epoch": 0.46852072286549074, "grad_norm": 1.3125, "learning_rate": 1.8891568627072568e-05, "loss": 0.5668, "step": 2706 }, { "epoch": 0.4686938643004004, "grad_norm": 1.3125, "learning_rate": 1.889073387789e-05, "loss": 0.5512, "step": 2707 }, { "epoch": 0.46886700573531004, "grad_norm": 1.3125, "learning_rate": 1.888989883296142e-05, "loss": 0.5734, "step": 2708 }, { "epoch": 0.4690401471702197, "grad_norm": 1.5625, "learning_rate": 1.888906349231461e-05, "loss": 0.6296, "step": 2709 }, { "epoch": 0.46921328860512934, "grad_norm": 1.3125, "learning_rate": 1.8888227855977355e-05, "loss": 0.6534, "step": 2710 }, { "epoch": 0.46938643004003894, "grad_norm": 1.3828125, "learning_rate": 1.888739192397745e-05, "loss": 0.6443, "step": 2711 }, { "epoch": 0.4695595714749486, "grad_norm": 1.4296875, "learning_rate": 1.8886555696342706e-05, "loss": 0.5742, "step": 2712 }, { "epoch": 0.46973271290985824, "grad_norm": 1.328125, "learning_rate": 1.8885719173100937e-05, "loss": 0.6255, "step": 2713 }, { "epoch": 0.4699058543447679, "grad_norm": 1.3046875, "learning_rate": 1.8884882354279967e-05, "loss": 0.5097, "step": 2714 }, { "epoch": 0.47007899577967754, "grad_norm": 1.5625, "learning_rate": 1.888404523990764e-05, "loss": 0.7754, "step": 2715 }, { "epoch": 0.47025213721458714, "grad_norm": 1.3828125, "learning_rate": 1.8883207830011795e-05, "loss": 0.5603, "step": 2716 }, { "epoch": 0.4704252786494968, "grad_norm": 1.5078125, "learning_rate": 1.8882370124620287e-05, "loss": 0.6735, "step": 2717 }, { "epoch": 0.47059842008440644, "grad_norm": 1.3203125, "learning_rate": 1.888153212376099e-05, "loss": 0.6272, "step": 2718 }, { "epoch": 0.4707715615193161, "grad_norm": 1.328125, "learning_rate": 1.8880693827461774e-05, "loss": 0.5648, "step": 2719 }, { "epoch": 0.47094470295422575, "grad_norm": 1.3984375, "learning_rate": 1.8879855235750525e-05, "loss": 0.5989, "step": 2720 }, { "epoch": 0.4711178443891354, "grad_norm": 1.359375, "learning_rate": 1.887901634865514e-05, "loss": 0.5862, "step": 2721 }, { "epoch": 0.471290985824045, "grad_norm": 1.3671875, "learning_rate": 1.8878177166203523e-05, "loss": 0.5863, "step": 2722 }, { "epoch": 0.47146412725895465, "grad_norm": 1.359375, "learning_rate": 1.8877337688423586e-05, "loss": 0.5733, "step": 2723 }, { "epoch": 0.4716372686938643, "grad_norm": 1.4453125, "learning_rate": 1.887649791534326e-05, "loss": 0.6323, "step": 2724 }, { "epoch": 0.47181041012877395, "grad_norm": 1.359375, "learning_rate": 1.8875657846990476e-05, "loss": 0.6037, "step": 2725 }, { "epoch": 0.4719835515636836, "grad_norm": 1.546875, "learning_rate": 1.887481748339318e-05, "loss": 0.5909, "step": 2726 }, { "epoch": 0.47215669299859325, "grad_norm": 1.3046875, "learning_rate": 1.8873976824579324e-05, "loss": 0.6385, "step": 2727 }, { "epoch": 0.47232983443350285, "grad_norm": 1.34375, "learning_rate": 1.887313587057687e-05, "loss": 0.6101, "step": 2728 }, { "epoch": 0.4725029758684125, "grad_norm": 1.546875, "learning_rate": 1.8872294621413805e-05, "loss": 0.6764, "step": 2729 }, { "epoch": 0.47267611730332215, "grad_norm": 1.2578125, "learning_rate": 1.88714530771181e-05, "loss": 0.5712, "step": 2730 }, { "epoch": 0.4728492587382318, "grad_norm": 1.2890625, "learning_rate": 1.8870611237717745e-05, "loss": 0.6171, "step": 2731 }, { "epoch": 0.47302240017314146, "grad_norm": 1.3125, "learning_rate": 1.8869769103240755e-05, "loss": 0.5777, "step": 2732 }, { "epoch": 0.47319554160805105, "grad_norm": 1.3046875, "learning_rate": 1.886892667371514e-05, "loss": 0.6149, "step": 2733 }, { "epoch": 0.4733686830429607, "grad_norm": 1.5703125, "learning_rate": 1.886808394916892e-05, "loss": 0.6331, "step": 2734 }, { "epoch": 0.47354182447787035, "grad_norm": 1.328125, "learning_rate": 1.8867240929630126e-05, "loss": 0.6004, "step": 2735 }, { "epoch": 0.47371496591278, "grad_norm": 1.4296875, "learning_rate": 1.8866397615126807e-05, "loss": 0.5858, "step": 2736 }, { "epoch": 0.47388810734768966, "grad_norm": 1.390625, "learning_rate": 1.886555400568701e-05, "loss": 0.6221, "step": 2737 }, { "epoch": 0.4740612487825993, "grad_norm": 1.5, "learning_rate": 1.8864710101338805e-05, "loss": 0.6527, "step": 2738 }, { "epoch": 0.4742343902175089, "grad_norm": 1.3828125, "learning_rate": 1.8863865902110253e-05, "loss": 0.588, "step": 2739 }, { "epoch": 0.47440753165241856, "grad_norm": 1.4609375, "learning_rate": 1.886302140802944e-05, "loss": 0.6223, "step": 2740 }, { "epoch": 0.4745806730873282, "grad_norm": 1.3359375, "learning_rate": 1.8862176619124467e-05, "loss": 0.6234, "step": 2741 }, { "epoch": 0.47475381452223786, "grad_norm": 1.359375, "learning_rate": 1.886133153542342e-05, "loss": 0.5811, "step": 2742 }, { "epoch": 0.4749269559571475, "grad_norm": 1.421875, "learning_rate": 1.8860486156954424e-05, "loss": 0.678, "step": 2743 }, { "epoch": 0.47510009739205716, "grad_norm": 1.3515625, "learning_rate": 1.8859640483745593e-05, "loss": 0.6046, "step": 2744 }, { "epoch": 0.47527323882696676, "grad_norm": 1.40625, "learning_rate": 1.8858794515825054e-05, "loss": 0.6171, "step": 2745 }, { "epoch": 0.4754463802618764, "grad_norm": 1.4140625, "learning_rate": 1.885794825322096e-05, "loss": 0.6278, "step": 2746 }, { "epoch": 0.47561952169678606, "grad_norm": 1.3203125, "learning_rate": 1.885710169596145e-05, "loss": 0.6468, "step": 2747 }, { "epoch": 0.4757926631316957, "grad_norm": 1.3359375, "learning_rate": 1.8856254844074687e-05, "loss": 0.5565, "step": 2748 }, { "epoch": 0.47596580456660537, "grad_norm": 1.359375, "learning_rate": 1.8855407697588846e-05, "loss": 0.5912, "step": 2749 }, { "epoch": 0.47613894600151496, "grad_norm": 1.34375, "learning_rate": 1.8854560256532098e-05, "loss": 0.6543, "step": 2750 }, { "epoch": 0.4763120874364246, "grad_norm": 1.390625, "learning_rate": 1.8853712520932647e-05, "loss": 0.6205, "step": 2751 }, { "epoch": 0.47648522887133427, "grad_norm": 1.3203125, "learning_rate": 1.8852864490818678e-05, "loss": 0.5892, "step": 2752 }, { "epoch": 0.4766583703062439, "grad_norm": 1.3515625, "learning_rate": 1.8852016166218407e-05, "loss": 0.6174, "step": 2753 }, { "epoch": 0.47683151174115357, "grad_norm": 1.40625, "learning_rate": 1.8851167547160055e-05, "loss": 0.7055, "step": 2754 }, { "epoch": 0.4770046531760632, "grad_norm": 1.3359375, "learning_rate": 1.8850318633671844e-05, "loss": 0.5542, "step": 2755 }, { "epoch": 0.4771777946109728, "grad_norm": 1.3359375, "learning_rate": 1.884946942578202e-05, "loss": 0.5522, "step": 2756 }, { "epoch": 0.47735093604588247, "grad_norm": 1.5234375, "learning_rate": 1.8848619923518826e-05, "loss": 0.6338, "step": 2757 }, { "epoch": 0.4775240774807921, "grad_norm": 1.3671875, "learning_rate": 1.8847770126910524e-05, "loss": 0.5279, "step": 2758 }, { "epoch": 0.47769721891570177, "grad_norm": 1.4921875, "learning_rate": 1.8846920035985384e-05, "loss": 0.6725, "step": 2759 }, { "epoch": 0.4778703603506114, "grad_norm": 1.40625, "learning_rate": 1.8846069650771677e-05, "loss": 0.5924, "step": 2760 }, { "epoch": 0.4780435017855211, "grad_norm": 1.3828125, "learning_rate": 1.8845218971297695e-05, "loss": 0.5764, "step": 2761 }, { "epoch": 0.47821664322043067, "grad_norm": 1.3203125, "learning_rate": 1.8844367997591734e-05, "loss": 0.6166, "step": 2762 }, { "epoch": 0.4783897846553403, "grad_norm": 1.296875, "learning_rate": 1.8843516729682103e-05, "loss": 0.5574, "step": 2763 }, { "epoch": 0.47856292609025, "grad_norm": 1.6015625, "learning_rate": 1.884266516759712e-05, "loss": 0.6782, "step": 2764 }, { "epoch": 0.4787360675251596, "grad_norm": 1.4140625, "learning_rate": 1.8841813311365105e-05, "loss": 0.6001, "step": 2765 }, { "epoch": 0.4789092089600693, "grad_norm": 1.4296875, "learning_rate": 1.8840961161014402e-05, "loss": 0.6678, "step": 2766 }, { "epoch": 0.4790823503949789, "grad_norm": 1.375, "learning_rate": 1.8840108716573354e-05, "loss": 0.6059, "step": 2767 }, { "epoch": 0.4792554918298885, "grad_norm": 1.3515625, "learning_rate": 1.8839255978070318e-05, "loss": 0.5636, "step": 2768 }, { "epoch": 0.4794286332647982, "grad_norm": 1.515625, "learning_rate": 1.883840294553366e-05, "loss": 0.6356, "step": 2769 }, { "epoch": 0.47960177469970783, "grad_norm": 1.3671875, "learning_rate": 1.8837549618991757e-05, "loss": 0.6019, "step": 2770 }, { "epoch": 0.4797749161346175, "grad_norm": 1.359375, "learning_rate": 1.883669599847299e-05, "loss": 0.5877, "step": 2771 }, { "epoch": 0.47994805756952713, "grad_norm": 1.3203125, "learning_rate": 1.883584208400576e-05, "loss": 0.5655, "step": 2772 }, { "epoch": 0.4801211990044367, "grad_norm": 1.3359375, "learning_rate": 1.883498787561847e-05, "loss": 0.594, "step": 2773 }, { "epoch": 0.4802943404393464, "grad_norm": 1.4375, "learning_rate": 1.8834133373339533e-05, "loss": 0.635, "step": 2774 }, { "epoch": 0.48046748187425603, "grad_norm": 1.3984375, "learning_rate": 1.8833278577197373e-05, "loss": 0.5709, "step": 2775 }, { "epoch": 0.4806406233091657, "grad_norm": 1.421875, "learning_rate": 1.883242348722043e-05, "loss": 0.544, "step": 2776 }, { "epoch": 0.48081376474407533, "grad_norm": 1.40625, "learning_rate": 1.883156810343714e-05, "loss": 0.6128, "step": 2777 }, { "epoch": 0.480986906178985, "grad_norm": 1.4375, "learning_rate": 1.8830712425875964e-05, "loss": 0.5795, "step": 2778 }, { "epoch": 0.4811600476138946, "grad_norm": 1.375, "learning_rate": 1.8829856454565365e-05, "loss": 0.6901, "step": 2779 }, { "epoch": 0.48133318904880423, "grad_norm": 1.4609375, "learning_rate": 1.882900018953381e-05, "loss": 0.5578, "step": 2780 }, { "epoch": 0.4815063304837139, "grad_norm": 1.3125, "learning_rate": 1.882814363080979e-05, "loss": 0.6109, "step": 2781 }, { "epoch": 0.48167947191862354, "grad_norm": 1.3671875, "learning_rate": 1.8827286778421795e-05, "loss": 0.5907, "step": 2782 }, { "epoch": 0.4818526133535332, "grad_norm": 1.359375, "learning_rate": 1.8826429632398328e-05, "loss": 0.6107, "step": 2783 }, { "epoch": 0.4820257547884428, "grad_norm": 1.5, "learning_rate": 1.8825572192767895e-05, "loss": 0.5829, "step": 2784 }, { "epoch": 0.48219889622335244, "grad_norm": 1.4296875, "learning_rate": 1.882471445955903e-05, "loss": 0.6493, "step": 2785 }, { "epoch": 0.4823720376582621, "grad_norm": 1.5859375, "learning_rate": 1.882385643280026e-05, "loss": 0.5619, "step": 2786 }, { "epoch": 0.48254517909317174, "grad_norm": 1.421875, "learning_rate": 1.8822998112520125e-05, "loss": 0.6598, "step": 2787 }, { "epoch": 0.4827183205280814, "grad_norm": 1.375, "learning_rate": 1.8822139498747176e-05, "loss": 0.5121, "step": 2788 }, { "epoch": 0.48289146196299104, "grad_norm": 1.6328125, "learning_rate": 1.882128059150998e-05, "loss": 0.6001, "step": 2789 }, { "epoch": 0.48306460339790064, "grad_norm": 1.3203125, "learning_rate": 1.8820421390837103e-05, "loss": 0.5844, "step": 2790 }, { "epoch": 0.4832377448328103, "grad_norm": 1.3671875, "learning_rate": 1.8819561896757124e-05, "loss": 0.6049, "step": 2791 }, { "epoch": 0.48341088626771994, "grad_norm": 1.484375, "learning_rate": 1.881870210929864e-05, "loss": 0.6461, "step": 2792 }, { "epoch": 0.4835840277026296, "grad_norm": 1.453125, "learning_rate": 1.881784202849025e-05, "loss": 0.6514, "step": 2793 }, { "epoch": 0.48375716913753924, "grad_norm": 1.3046875, "learning_rate": 1.8816981654360563e-05, "loss": 0.6019, "step": 2794 }, { "epoch": 0.4839303105724489, "grad_norm": 1.453125, "learning_rate": 1.8816120986938195e-05, "loss": 0.6433, "step": 2795 }, { "epoch": 0.4841034520073585, "grad_norm": 1.390625, "learning_rate": 1.881526002625178e-05, "loss": 0.6332, "step": 2796 }, { "epoch": 0.48427659344226814, "grad_norm": 1.46875, "learning_rate": 1.881439877232996e-05, "loss": 0.6472, "step": 2797 }, { "epoch": 0.4844497348771778, "grad_norm": 1.2734375, "learning_rate": 1.8813537225201378e-05, "loss": 0.5386, "step": 2798 }, { "epoch": 0.48462287631208745, "grad_norm": 1.4296875, "learning_rate": 1.8812675384894694e-05, "loss": 0.6674, "step": 2799 }, { "epoch": 0.4847960177469971, "grad_norm": 1.453125, "learning_rate": 1.8811813251438583e-05, "loss": 0.6293, "step": 2800 }, { "epoch": 0.4849691591819067, "grad_norm": 1.4140625, "learning_rate": 1.8810950824861714e-05, "loss": 0.6214, "step": 2801 }, { "epoch": 0.48514230061681635, "grad_norm": 1.7265625, "learning_rate": 1.8810088105192786e-05, "loss": 0.6375, "step": 2802 }, { "epoch": 0.485315442051726, "grad_norm": 1.3515625, "learning_rate": 1.8809225092460488e-05, "loss": 0.6257, "step": 2803 }, { "epoch": 0.48548858348663565, "grad_norm": 1.3984375, "learning_rate": 1.8808361786693533e-05, "loss": 0.5307, "step": 2804 }, { "epoch": 0.4856617249215453, "grad_norm": 1.296875, "learning_rate": 1.8807498187920633e-05, "loss": 0.5747, "step": 2805 }, { "epoch": 0.48583486635645495, "grad_norm": 1.3515625, "learning_rate": 1.880663429617052e-05, "loss": 0.5896, "step": 2806 }, { "epoch": 0.48600800779136455, "grad_norm": 1.5, "learning_rate": 1.880577011147193e-05, "loss": 0.6043, "step": 2807 }, { "epoch": 0.4861811492262742, "grad_norm": 1.4453125, "learning_rate": 1.880490563385361e-05, "loss": 0.618, "step": 2808 }, { "epoch": 0.48635429066118385, "grad_norm": 1.4453125, "learning_rate": 1.8804040863344313e-05, "loss": 0.6147, "step": 2809 }, { "epoch": 0.4865274320960935, "grad_norm": 1.296875, "learning_rate": 1.880317579997281e-05, "loss": 0.5656, "step": 2810 }, { "epoch": 0.48670057353100316, "grad_norm": 1.5859375, "learning_rate": 1.8802310443767875e-05, "loss": 0.5801, "step": 2811 }, { "epoch": 0.4868737149659128, "grad_norm": 1.4296875, "learning_rate": 1.880144479475829e-05, "loss": 0.5835, "step": 2812 }, { "epoch": 0.4870468564008224, "grad_norm": 1.5078125, "learning_rate": 1.8800578852972858e-05, "loss": 0.6259, "step": 2813 }, { "epoch": 0.48721999783573205, "grad_norm": 1.2890625, "learning_rate": 1.879971261844038e-05, "loss": 0.5904, "step": 2814 }, { "epoch": 0.4873931392706417, "grad_norm": 1.4296875, "learning_rate": 1.879884609118967e-05, "loss": 0.6268, "step": 2815 }, { "epoch": 0.48756628070555136, "grad_norm": 1.359375, "learning_rate": 1.879797927124955e-05, "loss": 0.5666, "step": 2816 }, { "epoch": 0.487739422140461, "grad_norm": 1.46875, "learning_rate": 1.879711215864886e-05, "loss": 0.5446, "step": 2817 }, { "epoch": 0.4879125635753706, "grad_norm": 1.296875, "learning_rate": 1.8796244753416442e-05, "loss": 0.5977, "step": 2818 }, { "epoch": 0.48808570501028026, "grad_norm": 1.3046875, "learning_rate": 1.8795377055581153e-05, "loss": 0.5507, "step": 2819 }, { "epoch": 0.4882588464451899, "grad_norm": 1.3359375, "learning_rate": 1.879450906517185e-05, "loss": 0.6494, "step": 2820 }, { "epoch": 0.48843198788009956, "grad_norm": 1.296875, "learning_rate": 1.8793640782217405e-05, "loss": 0.5918, "step": 2821 }, { "epoch": 0.4886051293150092, "grad_norm": 1.453125, "learning_rate": 1.8792772206746713e-05, "loss": 0.6115, "step": 2822 }, { "epoch": 0.48877827074991886, "grad_norm": 1.3671875, "learning_rate": 1.8791903338788656e-05, "loss": 0.5703, "step": 2823 }, { "epoch": 0.48895141218482846, "grad_norm": 1.3828125, "learning_rate": 1.879103417837214e-05, "loss": 0.5938, "step": 2824 }, { "epoch": 0.4891245536197381, "grad_norm": 1.296875, "learning_rate": 1.8790164725526078e-05, "loss": 0.6197, "step": 2825 }, { "epoch": 0.48929769505464776, "grad_norm": 1.3828125, "learning_rate": 1.878929498027939e-05, "loss": 0.6198, "step": 2826 }, { "epoch": 0.4894708364895574, "grad_norm": 1.4609375, "learning_rate": 1.878842494266101e-05, "loss": 0.5658, "step": 2827 }, { "epoch": 0.48964397792446707, "grad_norm": 1.375, "learning_rate": 1.8787554612699873e-05, "loss": 0.6373, "step": 2828 }, { "epoch": 0.4898171193593767, "grad_norm": 1.5625, "learning_rate": 1.878668399042494e-05, "loss": 0.6201, "step": 2829 }, { "epoch": 0.4899902607942863, "grad_norm": 1.4453125, "learning_rate": 1.8785813075865164e-05, "loss": 0.661, "step": 2830 }, { "epoch": 0.49016340222919597, "grad_norm": 1.3125, "learning_rate": 1.878494186904952e-05, "loss": 0.6237, "step": 2831 }, { "epoch": 0.4903365436641056, "grad_norm": 1.5546875, "learning_rate": 1.8784070370006986e-05, "loss": 0.6164, "step": 2832 }, { "epoch": 0.49050968509901527, "grad_norm": 1.296875, "learning_rate": 1.8783198578766552e-05, "loss": 0.5731, "step": 2833 }, { "epoch": 0.4906828265339249, "grad_norm": 1.4375, "learning_rate": 1.878232649535722e-05, "loss": 0.6202, "step": 2834 }, { "epoch": 0.4908559679688345, "grad_norm": 1.265625, "learning_rate": 1.8781454119807996e-05, "loss": 0.6312, "step": 2835 }, { "epoch": 0.49102910940374417, "grad_norm": 1.453125, "learning_rate": 1.8780581452147904e-05, "loss": 0.5819, "step": 2836 }, { "epoch": 0.4912022508386538, "grad_norm": 1.484375, "learning_rate": 1.877970849240597e-05, "loss": 0.6078, "step": 2837 }, { "epoch": 0.49137539227356347, "grad_norm": 1.3359375, "learning_rate": 1.8778835240611228e-05, "loss": 0.6029, "step": 2838 }, { "epoch": 0.4915485337084731, "grad_norm": 1.3125, "learning_rate": 1.8777961696792733e-05, "loss": 0.5589, "step": 2839 }, { "epoch": 0.4917216751433828, "grad_norm": 1.265625, "learning_rate": 1.8777087860979543e-05, "loss": 0.5598, "step": 2840 }, { "epoch": 0.49189481657829237, "grad_norm": 1.421875, "learning_rate": 1.8776213733200722e-05, "loss": 0.6242, "step": 2841 }, { "epoch": 0.492067958013202, "grad_norm": 1.421875, "learning_rate": 1.877533931348535e-05, "loss": 0.6136, "step": 2842 }, { "epoch": 0.4922410994481117, "grad_norm": 1.3515625, "learning_rate": 1.877446460186251e-05, "loss": 0.6932, "step": 2843 }, { "epoch": 0.4924142408830213, "grad_norm": 1.3671875, "learning_rate": 1.8773589598361305e-05, "loss": 0.5964, "step": 2844 }, { "epoch": 0.492587382317931, "grad_norm": 1.421875, "learning_rate": 1.8772714303010837e-05, "loss": 0.5732, "step": 2845 }, { "epoch": 0.49276052375284063, "grad_norm": 1.3046875, "learning_rate": 1.8771838715840225e-05, "loss": 0.529, "step": 2846 }, { "epoch": 0.4929336651877502, "grad_norm": 1.3671875, "learning_rate": 1.8770962836878594e-05, "loss": 0.5715, "step": 2847 }, { "epoch": 0.4931068066226599, "grad_norm": 1.40625, "learning_rate": 1.877008666615508e-05, "loss": 0.685, "step": 2848 }, { "epoch": 0.49327994805756953, "grad_norm": 1.4140625, "learning_rate": 1.8769210203698827e-05, "loss": 0.6145, "step": 2849 }, { "epoch": 0.4934530894924792, "grad_norm": 1.3515625, "learning_rate": 1.8768333449538987e-05, "loss": 0.5906, "step": 2850 }, { "epoch": 0.49362623092738883, "grad_norm": 1.453125, "learning_rate": 1.8767456403704735e-05, "loss": 0.5879, "step": 2851 }, { "epoch": 0.4937993723622984, "grad_norm": 1.2734375, "learning_rate": 1.8766579066225237e-05, "loss": 0.5574, "step": 2852 }, { "epoch": 0.4939725137972081, "grad_norm": 1.5234375, "learning_rate": 1.876570143712968e-05, "loss": 0.5747, "step": 2853 }, { "epoch": 0.49414565523211773, "grad_norm": 1.359375, "learning_rate": 1.8764823516447257e-05, "loss": 0.6934, "step": 2854 }, { "epoch": 0.4943187966670274, "grad_norm": 1.4765625, "learning_rate": 1.876394530420717e-05, "loss": 0.7258, "step": 2855 }, { "epoch": 0.49449193810193703, "grad_norm": 1.421875, "learning_rate": 1.8763066800438638e-05, "loss": 0.6085, "step": 2856 }, { "epoch": 0.4946650795368467, "grad_norm": 1.34375, "learning_rate": 1.8762188005170877e-05, "loss": 0.6801, "step": 2857 }, { "epoch": 0.4948382209717563, "grad_norm": 1.359375, "learning_rate": 1.8761308918433127e-05, "loss": 0.5713, "step": 2858 }, { "epoch": 0.49501136240666593, "grad_norm": 1.390625, "learning_rate": 1.8760429540254622e-05, "loss": 0.5894, "step": 2859 }, { "epoch": 0.4951845038415756, "grad_norm": 1.421875, "learning_rate": 1.8759549870664624e-05, "loss": 0.6066, "step": 2860 }, { "epoch": 0.49535764527648524, "grad_norm": 1.3515625, "learning_rate": 1.8758669909692385e-05, "loss": 0.5656, "step": 2861 }, { "epoch": 0.4955307867113949, "grad_norm": 1.3125, "learning_rate": 1.875778965736718e-05, "loss": 0.5978, "step": 2862 }, { "epoch": 0.49570392814630454, "grad_norm": 1.3359375, "learning_rate": 1.8756909113718298e-05, "loss": 0.5832, "step": 2863 }, { "epoch": 0.49587706958121414, "grad_norm": 1.5234375, "learning_rate": 1.8756028278775017e-05, "loss": 0.7106, "step": 2864 }, { "epoch": 0.4960502110161238, "grad_norm": 1.3984375, "learning_rate": 1.8755147152566647e-05, "loss": 0.633, "step": 2865 }, { "epoch": 0.49622335245103344, "grad_norm": 1.3671875, "learning_rate": 1.875426573512249e-05, "loss": 0.6974, "step": 2866 }, { "epoch": 0.4963964938859431, "grad_norm": 1.2265625, "learning_rate": 1.8753384026471875e-05, "loss": 0.6062, "step": 2867 }, { "epoch": 0.49656963532085274, "grad_norm": 1.3984375, "learning_rate": 1.8752502026644125e-05, "loss": 0.666, "step": 2868 }, { "epoch": 0.49674277675576234, "grad_norm": 1.4375, "learning_rate": 1.875161973566858e-05, "loss": 0.6988, "step": 2869 }, { "epoch": 0.496915918190672, "grad_norm": 1.40625, "learning_rate": 1.8750737153574593e-05, "loss": 0.6101, "step": 2870 }, { "epoch": 0.49708905962558164, "grad_norm": 1.4296875, "learning_rate": 1.874985428039152e-05, "loss": 0.6586, "step": 2871 }, { "epoch": 0.4972622010604913, "grad_norm": 1.53125, "learning_rate": 1.874897111614873e-05, "loss": 0.6979, "step": 2872 }, { "epoch": 0.49743534249540095, "grad_norm": 1.3046875, "learning_rate": 1.87480876608756e-05, "loss": 0.6122, "step": 2873 }, { "epoch": 0.4976084839303106, "grad_norm": 1.4140625, "learning_rate": 1.8747203914601518e-05, "loss": 0.6039, "step": 2874 }, { "epoch": 0.4977816253652202, "grad_norm": 1.328125, "learning_rate": 1.8746319877355882e-05, "loss": 0.557, "step": 2875 }, { "epoch": 0.49795476680012984, "grad_norm": 1.390625, "learning_rate": 1.8745435549168096e-05, "loss": 0.6463, "step": 2876 }, { "epoch": 0.4981279082350395, "grad_norm": 1.3125, "learning_rate": 1.8744550930067584e-05, "loss": 0.5246, "step": 2877 }, { "epoch": 0.49830104966994915, "grad_norm": 1.59375, "learning_rate": 1.8743666020083766e-05, "loss": 0.6488, "step": 2878 }, { "epoch": 0.4984741911048588, "grad_norm": 1.3359375, "learning_rate": 1.874278081924608e-05, "loss": 0.5943, "step": 2879 }, { "epoch": 0.49864733253976845, "grad_norm": 1.3828125, "learning_rate": 1.8741895327583972e-05, "loss": 0.5951, "step": 2880 }, { "epoch": 0.49882047397467805, "grad_norm": 1.28125, "learning_rate": 1.8741009545126898e-05, "loss": 0.6633, "step": 2881 }, { "epoch": 0.4989936154095877, "grad_norm": 1.3359375, "learning_rate": 1.874012347190432e-05, "loss": 0.6193, "step": 2882 }, { "epoch": 0.49916675684449735, "grad_norm": 1.3671875, "learning_rate": 1.873923710794572e-05, "loss": 0.5361, "step": 2883 }, { "epoch": 0.499339898279407, "grad_norm": 1.3828125, "learning_rate": 1.8738350453280574e-05, "loss": 0.6479, "step": 2884 }, { "epoch": 0.49951303971431665, "grad_norm": 1.3515625, "learning_rate": 1.873746350793838e-05, "loss": 0.6082, "step": 2885 }, { "epoch": 0.49968618114922625, "grad_norm": 1.453125, "learning_rate": 1.8736576271948642e-05, "loss": 0.7241, "step": 2886 }, { "epoch": 0.4998593225841359, "grad_norm": 1.3203125, "learning_rate": 1.8735688745340877e-05, "loss": 0.6014, "step": 2887 }, { "epoch": 0.5000324640190456, "grad_norm": 1.3359375, "learning_rate": 1.8734800928144598e-05, "loss": 0.5535, "step": 2888 }, { "epoch": 0.5000324640190456, "eval_loss": 0.6689422726631165, "eval_runtime": 2675.8501, "eval_samples_per_second": 18.715, "eval_steps_per_second": 18.715, "step": 2888 }, { "epoch": 0.5002056054539552, "grad_norm": 1.265625, "learning_rate": 1.873391282038935e-05, "loss": 0.5977, "step": 2889 }, { "epoch": 0.5003787468888649, "grad_norm": 1.3203125, "learning_rate": 1.8733024422104664e-05, "loss": 0.5973, "step": 2890 }, { "epoch": 0.5005518883237745, "grad_norm": 1.3671875, "learning_rate": 1.87321357333201e-05, "loss": 0.5883, "step": 2891 }, { "epoch": 0.5007250297586842, "grad_norm": 1.2265625, "learning_rate": 1.873124675406522e-05, "loss": 0.547, "step": 2892 }, { "epoch": 0.5008981711935938, "grad_norm": 1.4453125, "learning_rate": 1.873035748436959e-05, "loss": 0.7111, "step": 2893 }, { "epoch": 0.5010713126285035, "grad_norm": 1.3203125, "learning_rate": 1.8729467924262796e-05, "loss": 0.5668, "step": 2894 }, { "epoch": 0.501244454063413, "grad_norm": 1.4609375, "learning_rate": 1.8728578073774427e-05, "loss": 0.6597, "step": 2895 }, { "epoch": 0.5014175954983227, "grad_norm": 1.4375, "learning_rate": 1.8727687932934083e-05, "loss": 0.6956, "step": 2896 }, { "epoch": 0.5015907369332323, "grad_norm": 1.4765625, "learning_rate": 1.872679750177137e-05, "loss": 0.6758, "step": 2897 }, { "epoch": 0.501763878368142, "grad_norm": 1.28125, "learning_rate": 1.872590678031592e-05, "loss": 0.5591, "step": 2898 }, { "epoch": 0.5019370198030516, "grad_norm": 1.421875, "learning_rate": 1.872501576859735e-05, "loss": 0.5724, "step": 2899 }, { "epoch": 0.5021101612379613, "grad_norm": 1.3984375, "learning_rate": 1.8724124466645302e-05, "loss": 0.6337, "step": 2900 }, { "epoch": 0.5022833026728709, "grad_norm": 1.2421875, "learning_rate": 1.8723232874489434e-05, "loss": 0.5887, "step": 2901 }, { "epoch": 0.5024564441077806, "grad_norm": 1.3828125, "learning_rate": 1.872234099215939e-05, "loss": 0.5858, "step": 2902 }, { "epoch": 0.5026295855426902, "grad_norm": 2.125, "learning_rate": 1.8721448819684847e-05, "loss": 0.6309, "step": 2903 }, { "epoch": 0.5028027269775999, "grad_norm": 1.3984375, "learning_rate": 1.8720556357095482e-05, "loss": 0.6051, "step": 2904 }, { "epoch": 0.5029758684125095, "grad_norm": 1.375, "learning_rate": 1.8719663604420978e-05, "loss": 0.6222, "step": 2905 }, { "epoch": 0.5031490098474191, "grad_norm": 1.359375, "learning_rate": 1.8718770561691036e-05, "loss": 0.6704, "step": 2906 }, { "epoch": 0.5033221512823287, "grad_norm": 1.3359375, "learning_rate": 1.8717877228935363e-05, "loss": 0.5909, "step": 2907 }, { "epoch": 0.5034952927172384, "grad_norm": 1.4140625, "learning_rate": 1.8716983606183673e-05, "loss": 0.6174, "step": 2908 }, { "epoch": 0.503668434152148, "grad_norm": 1.4140625, "learning_rate": 1.8716089693465696e-05, "loss": 0.6346, "step": 2909 }, { "epoch": 0.5038415755870577, "grad_norm": 1.4453125, "learning_rate": 1.871519549081116e-05, "loss": 0.5224, "step": 2910 }, { "epoch": 0.5040147170219673, "grad_norm": 1.6328125, "learning_rate": 1.8714300998249814e-05, "loss": 0.6461, "step": 2911 }, { "epoch": 0.504187858456877, "grad_norm": 1.3359375, "learning_rate": 1.8713406215811417e-05, "loss": 0.5796, "step": 2912 }, { "epoch": 0.5043609998917866, "grad_norm": 1.390625, "learning_rate": 1.871251114352573e-05, "loss": 0.582, "step": 2913 }, { "epoch": 0.5045341413266963, "grad_norm": 1.4140625, "learning_rate": 1.8711615781422524e-05, "loss": 0.5986, "step": 2914 }, { "epoch": 0.5047072827616059, "grad_norm": 1.7734375, "learning_rate": 1.8710720129531587e-05, "loss": 0.6579, "step": 2915 }, { "epoch": 0.5048804241965156, "grad_norm": 1.3828125, "learning_rate": 1.8709824187882713e-05, "loss": 0.5791, "step": 2916 }, { "epoch": 0.5050535656314251, "grad_norm": 1.390625, "learning_rate": 1.87089279565057e-05, "loss": 0.5589, "step": 2917 }, { "epoch": 0.5052267070663348, "grad_norm": 1.2890625, "learning_rate": 1.8708031435430367e-05, "loss": 0.577, "step": 2918 }, { "epoch": 0.5053998485012444, "grad_norm": 1.375, "learning_rate": 1.8707134624686537e-05, "loss": 0.589, "step": 2919 }, { "epoch": 0.5055729899361541, "grad_norm": 1.25, "learning_rate": 1.870623752430403e-05, "loss": 0.5898, "step": 2920 }, { "epoch": 0.5057461313710637, "grad_norm": 1.3515625, "learning_rate": 1.87053401343127e-05, "loss": 0.5915, "step": 2921 }, { "epoch": 0.5059192728059734, "grad_norm": 1.3359375, "learning_rate": 1.87044424547424e-05, "loss": 0.5611, "step": 2922 }, { "epoch": 0.506092414240883, "grad_norm": 1.3984375, "learning_rate": 1.8703544485622978e-05, "loss": 0.663, "step": 2923 }, { "epoch": 0.5062655556757927, "grad_norm": 4.96875, "learning_rate": 1.8702646226984313e-05, "loss": 0.516, "step": 2924 }, { "epoch": 0.5064386971107023, "grad_norm": 1.375, "learning_rate": 1.8701747678856286e-05, "loss": 0.6332, "step": 2925 }, { "epoch": 0.506611838545612, "grad_norm": 1.359375, "learning_rate": 1.8700848841268783e-05, "loss": 0.6209, "step": 2926 }, { "epoch": 0.5067849799805216, "grad_norm": 1.2578125, "learning_rate": 1.8699949714251708e-05, "loss": 0.549, "step": 2927 }, { "epoch": 0.5069581214154313, "grad_norm": 1.4921875, "learning_rate": 1.8699050297834963e-05, "loss": 0.6359, "step": 2928 }, { "epoch": 0.5071312628503408, "grad_norm": 1.3828125, "learning_rate": 1.869815059204847e-05, "loss": 0.6146, "step": 2929 }, { "epoch": 0.5073044042852505, "grad_norm": 1.2734375, "learning_rate": 1.8697250596922164e-05, "loss": 0.5682, "step": 2930 }, { "epoch": 0.5074775457201601, "grad_norm": 1.5625, "learning_rate": 1.869635031248597e-05, "loss": 0.6627, "step": 2931 }, { "epoch": 0.5076506871550698, "grad_norm": 1.2734375, "learning_rate": 1.869544973876985e-05, "loss": 0.5472, "step": 2932 }, { "epoch": 0.5078238285899794, "grad_norm": 1.5, "learning_rate": 1.869454887580375e-05, "loss": 0.7394, "step": 2933 }, { "epoch": 0.5079969700248891, "grad_norm": 1.4921875, "learning_rate": 1.8693647723617637e-05, "loss": 0.5981, "step": 2934 }, { "epoch": 0.5081701114597987, "grad_norm": 1.421875, "learning_rate": 1.8692746282241496e-05, "loss": 0.6407, "step": 2935 }, { "epoch": 0.5083432528947084, "grad_norm": 1.5078125, "learning_rate": 1.8691844551705305e-05, "loss": 0.5649, "step": 2936 }, { "epoch": 0.508516394329618, "grad_norm": 1.25, "learning_rate": 1.869094253203906e-05, "loss": 0.5659, "step": 2937 }, { "epoch": 0.5086895357645277, "grad_norm": 1.375, "learning_rate": 1.869004022327277e-05, "loss": 0.5651, "step": 2938 }, { "epoch": 0.5088626771994373, "grad_norm": 1.46875, "learning_rate": 1.8689137625436455e-05, "loss": 0.6374, "step": 2939 }, { "epoch": 0.5090358186343469, "grad_norm": 1.3984375, "learning_rate": 1.8688234738560127e-05, "loss": 0.6024, "step": 2940 }, { "epoch": 0.5092089600692565, "grad_norm": 1.421875, "learning_rate": 1.8687331562673824e-05, "loss": 0.6937, "step": 2941 }, { "epoch": 0.5093821015041662, "grad_norm": 1.390625, "learning_rate": 1.8686428097807597e-05, "loss": 0.5453, "step": 2942 }, { "epoch": 0.5095552429390758, "grad_norm": 1.4140625, "learning_rate": 1.8685524343991493e-05, "loss": 0.5488, "step": 2943 }, { "epoch": 0.5097283843739855, "grad_norm": 1.421875, "learning_rate": 1.8684620301255574e-05, "loss": 0.6535, "step": 2944 }, { "epoch": 0.5099015258088951, "grad_norm": 1.4921875, "learning_rate": 1.8683715969629917e-05, "loss": 0.6885, "step": 2945 }, { "epoch": 0.5100746672438048, "grad_norm": 1.421875, "learning_rate": 1.8682811349144603e-05, "loss": 0.597, "step": 2946 }, { "epoch": 0.5102478086787144, "grad_norm": 1.4921875, "learning_rate": 1.8681906439829716e-05, "loss": 0.5871, "step": 2947 }, { "epoch": 0.5104209501136241, "grad_norm": 1.484375, "learning_rate": 1.8681001241715372e-05, "loss": 0.6217, "step": 2948 }, { "epoch": 0.5105940915485337, "grad_norm": 1.375, "learning_rate": 1.868009575483167e-05, "loss": 0.5925, "step": 2949 }, { "epoch": 0.5107672329834434, "grad_norm": 1.390625, "learning_rate": 1.8679189979208738e-05, "loss": 0.6078, "step": 2950 }, { "epoch": 0.5109403744183529, "grad_norm": 1.3359375, "learning_rate": 1.86782839148767e-05, "loss": 0.5962, "step": 2951 }, { "epoch": 0.5111135158532626, "grad_norm": 1.34375, "learning_rate": 1.8677377561865694e-05, "loss": 0.6285, "step": 2952 }, { "epoch": 0.5112866572881722, "grad_norm": 1.2734375, "learning_rate": 1.8676470920205882e-05, "loss": 0.6002, "step": 2953 }, { "epoch": 0.5114597987230819, "grad_norm": 1.453125, "learning_rate": 1.867556398992741e-05, "loss": 0.6281, "step": 2954 }, { "epoch": 0.5116329401579915, "grad_norm": 1.4609375, "learning_rate": 1.8674656771060454e-05, "loss": 0.6619, "step": 2955 }, { "epoch": 0.5118060815929012, "grad_norm": 1.4765625, "learning_rate": 1.8673749263635188e-05, "loss": 0.5583, "step": 2956 }, { "epoch": 0.5119792230278108, "grad_norm": 1.5234375, "learning_rate": 1.8672841467681802e-05, "loss": 0.661, "step": 2957 }, { "epoch": 0.5121523644627205, "grad_norm": 1.3125, "learning_rate": 1.8671933383230492e-05, "loss": 0.5682, "step": 2958 }, { "epoch": 0.5123255058976302, "grad_norm": 1.3515625, "learning_rate": 1.8671025010311467e-05, "loss": 0.6165, "step": 2959 }, { "epoch": 0.5124986473325398, "grad_norm": 1.3828125, "learning_rate": 1.8670116348954945e-05, "loss": 0.6626, "step": 2960 }, { "epoch": 0.5126717887674495, "grad_norm": 1.296875, "learning_rate": 1.8669207399191144e-05, "loss": 0.6043, "step": 2961 }, { "epoch": 0.5128449302023591, "grad_norm": 1.3828125, "learning_rate": 1.8668298161050308e-05, "loss": 0.6156, "step": 2962 }, { "epoch": 0.5130180716372686, "grad_norm": 1.359375, "learning_rate": 1.8667388634562682e-05, "loss": 0.6105, "step": 2963 }, { "epoch": 0.5131912130721783, "grad_norm": 1.4296875, "learning_rate": 1.8666478819758518e-05, "loss": 0.6031, "step": 2964 }, { "epoch": 0.513364354507088, "grad_norm": 1.3515625, "learning_rate": 1.866556871666808e-05, "loss": 0.5589, "step": 2965 }, { "epoch": 0.5135374959419976, "grad_norm": 1.3671875, "learning_rate": 1.8664658325321644e-05, "loss": 0.6481, "step": 2966 }, { "epoch": 0.5137106373769073, "grad_norm": 1.4140625, "learning_rate": 1.8663747645749493e-05, "loss": 0.6568, "step": 2967 }, { "epoch": 0.5138837788118169, "grad_norm": 1.2890625, "learning_rate": 1.8662836677981924e-05, "loss": 0.5314, "step": 2968 }, { "epoch": 0.5140569202467266, "grad_norm": 1.328125, "learning_rate": 1.8661925422049233e-05, "loss": 0.558, "step": 2969 }, { "epoch": 0.5142300616816362, "grad_norm": 1.34375, "learning_rate": 1.8661013877981737e-05, "loss": 0.6361, "step": 2970 }, { "epoch": 0.5144032031165459, "grad_norm": 1.3515625, "learning_rate": 1.8660102045809758e-05, "loss": 0.5479, "step": 2971 }, { "epoch": 0.5145763445514555, "grad_norm": 1.34375, "learning_rate": 1.8659189925563624e-05, "loss": 0.5922, "step": 2972 }, { "epoch": 0.5147494859863652, "grad_norm": 1.390625, "learning_rate": 1.865827751727368e-05, "loss": 0.6108, "step": 2973 }, { "epoch": 0.5149226274212747, "grad_norm": 1.390625, "learning_rate": 1.8657364820970276e-05, "loss": 0.5169, "step": 2974 }, { "epoch": 0.5150957688561844, "grad_norm": 1.3828125, "learning_rate": 1.8656451836683773e-05, "loss": 0.5877, "step": 2975 }, { "epoch": 0.515268910291094, "grad_norm": 1.2890625, "learning_rate": 1.865553856444454e-05, "loss": 0.6298, "step": 2976 }, { "epoch": 0.5154420517260037, "grad_norm": 1.3671875, "learning_rate": 1.8654625004282952e-05, "loss": 0.5424, "step": 2977 }, { "epoch": 0.5156151931609133, "grad_norm": 1.3671875, "learning_rate": 1.865371115622941e-05, "loss": 0.5881, "step": 2978 }, { "epoch": 0.515788334595823, "grad_norm": 1.359375, "learning_rate": 1.8652797020314304e-05, "loss": 0.5946, "step": 2979 }, { "epoch": 0.5159614760307326, "grad_norm": 1.2890625, "learning_rate": 1.865188259656804e-05, "loss": 0.6158, "step": 2980 }, { "epoch": 0.5161346174656423, "grad_norm": 1.3828125, "learning_rate": 1.8650967885021045e-05, "loss": 0.6259, "step": 2981 }, { "epoch": 0.5163077589005519, "grad_norm": 1.515625, "learning_rate": 1.8650052885703737e-05, "loss": 0.6811, "step": 2982 }, { "epoch": 0.5164809003354616, "grad_norm": 1.4453125, "learning_rate": 1.8649137598646558e-05, "loss": 0.5775, "step": 2983 }, { "epoch": 0.5166540417703712, "grad_norm": 1.359375, "learning_rate": 1.8648222023879955e-05, "loss": 0.618, "step": 2984 }, { "epoch": 0.5168271832052808, "grad_norm": 1.28125, "learning_rate": 1.8647306161434383e-05, "loss": 0.5691, "step": 2985 }, { "epoch": 0.5170003246401904, "grad_norm": 1.3671875, "learning_rate": 1.864639001134031e-05, "loss": 0.6783, "step": 2986 }, { "epoch": 0.5171734660751001, "grad_norm": 1.59375, "learning_rate": 1.8645473573628204e-05, "loss": 0.6326, "step": 2987 }, { "epoch": 0.5173466075100097, "grad_norm": 1.34375, "learning_rate": 1.864455684832856e-05, "loss": 0.5546, "step": 2988 }, { "epoch": 0.5175197489449194, "grad_norm": 1.2109375, "learning_rate": 1.864363983547186e-05, "loss": 0.5468, "step": 2989 }, { "epoch": 0.517692890379829, "grad_norm": 1.3671875, "learning_rate": 1.8642722535088623e-05, "loss": 0.632, "step": 2990 }, { "epoch": 0.5178660318147387, "grad_norm": 1.390625, "learning_rate": 1.864180494720935e-05, "loss": 0.5621, "step": 2991 }, { "epoch": 0.5180391732496483, "grad_norm": 1.375, "learning_rate": 1.8640887071864573e-05, "loss": 0.6226, "step": 2992 }, { "epoch": 0.518212314684558, "grad_norm": 1.328125, "learning_rate": 1.8639968909084816e-05, "loss": 0.6516, "step": 2993 }, { "epoch": 0.5183854561194676, "grad_norm": 1.4375, "learning_rate": 1.863905045890063e-05, "loss": 0.6873, "step": 2994 }, { "epoch": 0.5185585975543773, "grad_norm": 1.546875, "learning_rate": 1.8638131721342557e-05, "loss": 0.6541, "step": 2995 }, { "epoch": 0.5187317389892869, "grad_norm": 1.3984375, "learning_rate": 1.863721269644117e-05, "loss": 0.6309, "step": 2996 }, { "epoch": 0.5189048804241965, "grad_norm": 1.3671875, "learning_rate": 1.8636293384227034e-05, "loss": 0.6396, "step": 2997 }, { "epoch": 0.5190780218591061, "grad_norm": 1.359375, "learning_rate": 1.8635373784730727e-05, "loss": 0.5913, "step": 2998 }, { "epoch": 0.5192511632940158, "grad_norm": 1.2890625, "learning_rate": 1.863445389798284e-05, "loss": 0.5794, "step": 2999 }, { "epoch": 0.5194243047289254, "grad_norm": 1.390625, "learning_rate": 1.8633533724013976e-05, "loss": 0.6752, "step": 3000 }, { "epoch": 0.5195974461638351, "grad_norm": 1.4140625, "learning_rate": 1.863261326285474e-05, "loss": 0.6057, "step": 3001 }, { "epoch": 0.5197705875987447, "grad_norm": 1.3828125, "learning_rate": 1.8631692514535756e-05, "loss": 0.6341, "step": 3002 }, { "epoch": 0.5199437290336544, "grad_norm": 1.40625, "learning_rate": 1.863077147908765e-05, "loss": 0.6513, "step": 3003 }, { "epoch": 0.520116870468564, "grad_norm": 1.5234375, "learning_rate": 1.8629850156541056e-05, "loss": 0.7285, "step": 3004 }, { "epoch": 0.5202900119034737, "grad_norm": 1.453125, "learning_rate": 1.8628928546926627e-05, "loss": 0.608, "step": 3005 }, { "epoch": 0.5204631533383833, "grad_norm": 1.34375, "learning_rate": 1.8628006650275015e-05, "loss": 0.682, "step": 3006 }, { "epoch": 0.520636294773293, "grad_norm": 1.3359375, "learning_rate": 1.862708446661689e-05, "loss": 0.5543, "step": 3007 }, { "epoch": 0.5208094362082025, "grad_norm": 1.3125, "learning_rate": 1.8626161995982927e-05, "loss": 0.5705, "step": 3008 }, { "epoch": 0.5209825776431122, "grad_norm": 1.4375, "learning_rate": 1.862523923840381e-05, "loss": 0.529, "step": 3009 }, { "epoch": 0.5211557190780218, "grad_norm": 1.421875, "learning_rate": 1.8624316193910234e-05, "loss": 0.6073, "step": 3010 }, { "epoch": 0.5213288605129315, "grad_norm": 1.3515625, "learning_rate": 1.8623392862532908e-05, "loss": 0.582, "step": 3011 }, { "epoch": 0.5215020019478411, "grad_norm": 1.4453125, "learning_rate": 1.8622469244302542e-05, "loss": 0.6292, "step": 3012 }, { "epoch": 0.5216751433827508, "grad_norm": 1.4921875, "learning_rate": 1.862154533924986e-05, "loss": 0.6707, "step": 3013 }, { "epoch": 0.5218482848176604, "grad_norm": 1.390625, "learning_rate": 1.8620621147405596e-05, "loss": 0.6769, "step": 3014 }, { "epoch": 0.5220214262525701, "grad_norm": 1.34375, "learning_rate": 1.8619696668800494e-05, "loss": 0.6682, "step": 3015 }, { "epoch": 0.5221945676874797, "grad_norm": 1.390625, "learning_rate": 1.8618771903465304e-05, "loss": 0.5622, "step": 3016 }, { "epoch": 0.5223677091223894, "grad_norm": 1.4296875, "learning_rate": 1.861784685143079e-05, "loss": 0.5287, "step": 3017 }, { "epoch": 0.522540850557299, "grad_norm": 1.2578125, "learning_rate": 1.8616921512727724e-05, "loss": 0.5603, "step": 3018 }, { "epoch": 0.5227139919922086, "grad_norm": 1.3203125, "learning_rate": 1.8615995887386883e-05, "loss": 0.574, "step": 3019 }, { "epoch": 0.5228871334271182, "grad_norm": 1.359375, "learning_rate": 1.8615069975439062e-05, "loss": 0.5727, "step": 3020 }, { "epoch": 0.5230602748620279, "grad_norm": 1.453125, "learning_rate": 1.8614143776915058e-05, "loss": 0.5486, "step": 3021 }, { "epoch": 0.5232334162969375, "grad_norm": 1.28125, "learning_rate": 1.8613217291845683e-05, "loss": 0.6113, "step": 3022 }, { "epoch": 0.5234065577318472, "grad_norm": 1.375, "learning_rate": 1.8612290520261752e-05, "loss": 0.645, "step": 3023 }, { "epoch": 0.5235796991667568, "grad_norm": 1.3046875, "learning_rate": 1.86113634621941e-05, "loss": 0.6174, "step": 3024 }, { "epoch": 0.5237528406016665, "grad_norm": 1.25, "learning_rate": 1.8610436117673557e-05, "loss": 0.5643, "step": 3025 }, { "epoch": 0.5239259820365761, "grad_norm": 1.3359375, "learning_rate": 1.8609508486730977e-05, "loss": 0.5695, "step": 3026 }, { "epoch": 0.5240991234714858, "grad_norm": 1.3046875, "learning_rate": 1.8608580569397213e-05, "loss": 0.5198, "step": 3027 }, { "epoch": 0.5242722649063954, "grad_norm": 1.3515625, "learning_rate": 1.8607652365703138e-05, "loss": 0.5401, "step": 3028 }, { "epoch": 0.5244454063413051, "grad_norm": 1.40625, "learning_rate": 1.8606723875679624e-05, "loss": 0.6332, "step": 3029 }, { "epoch": 0.5246185477762147, "grad_norm": 1.359375, "learning_rate": 1.8605795099357555e-05, "loss": 0.6298, "step": 3030 }, { "epoch": 0.5247916892111243, "grad_norm": 1.34375, "learning_rate": 1.860486603676783e-05, "loss": 0.5664, "step": 3031 }, { "epoch": 0.5249648306460339, "grad_norm": 1.3515625, "learning_rate": 1.860393668794135e-05, "loss": 0.5795, "step": 3032 }, { "epoch": 0.5251379720809436, "grad_norm": 1.390625, "learning_rate": 1.8603007052909033e-05, "loss": 0.6227, "step": 3033 }, { "epoch": 0.5253111135158532, "grad_norm": 1.28125, "learning_rate": 1.86020771317018e-05, "loss": 0.5343, "step": 3034 }, { "epoch": 0.5254842549507629, "grad_norm": 1.375, "learning_rate": 1.8601146924350585e-05, "loss": 0.6338, "step": 3035 }, { "epoch": 0.5256573963856725, "grad_norm": 1.4140625, "learning_rate": 1.8600216430886333e-05, "loss": 0.7113, "step": 3036 }, { "epoch": 0.5258305378205822, "grad_norm": 1.4296875, "learning_rate": 1.8599285651339997e-05, "loss": 0.5401, "step": 3037 }, { "epoch": 0.5260036792554919, "grad_norm": 1.3203125, "learning_rate": 1.8598354585742537e-05, "loss": 0.5984, "step": 3038 }, { "epoch": 0.5261768206904015, "grad_norm": 1.3984375, "learning_rate": 1.859742323412492e-05, "loss": 0.6079, "step": 3039 }, { "epoch": 0.5263499621253112, "grad_norm": 1.359375, "learning_rate": 1.8596491596518134e-05, "loss": 0.6066, "step": 3040 }, { "epoch": 0.5265231035602208, "grad_norm": 1.3984375, "learning_rate": 1.859555967295317e-05, "loss": 0.6087, "step": 3041 }, { "epoch": 0.5266962449951303, "grad_norm": 1.5, "learning_rate": 1.859462746346102e-05, "loss": 0.6404, "step": 3042 }, { "epoch": 0.52686938643004, "grad_norm": 1.375, "learning_rate": 1.85936949680727e-05, "loss": 0.5781, "step": 3043 }, { "epoch": 0.5270425278649497, "grad_norm": 1.3984375, "learning_rate": 1.8592762186819227e-05, "loss": 0.5741, "step": 3044 }, { "epoch": 0.5272156692998593, "grad_norm": 1.5390625, "learning_rate": 1.859182911973163e-05, "loss": 0.6351, "step": 3045 }, { "epoch": 0.527388810734769, "grad_norm": 1.3046875, "learning_rate": 1.8590895766840948e-05, "loss": 0.5865, "step": 3046 }, { "epoch": 0.5275619521696786, "grad_norm": 1.5625, "learning_rate": 1.8589962128178226e-05, "loss": 0.6739, "step": 3047 }, { "epoch": 0.5277350936045883, "grad_norm": 1.3125, "learning_rate": 1.8589028203774525e-05, "loss": 0.5777, "step": 3048 }, { "epoch": 0.5279082350394979, "grad_norm": 1.359375, "learning_rate": 1.8588093993660906e-05, "loss": 0.6288, "step": 3049 }, { "epoch": 0.5280813764744076, "grad_norm": 1.453125, "learning_rate": 1.858715949786845e-05, "loss": 0.642, "step": 3050 }, { "epoch": 0.5282545179093172, "grad_norm": 1.296875, "learning_rate": 1.858622471642824e-05, "loss": 0.622, "step": 3051 }, { "epoch": 0.5284276593442269, "grad_norm": 1.3203125, "learning_rate": 1.858528964937137e-05, "loss": 0.5072, "step": 3052 }, { "epoch": 0.5286008007791364, "grad_norm": 1.4921875, "learning_rate": 1.8584354296728952e-05, "loss": 0.6577, "step": 3053 }, { "epoch": 0.528773942214046, "grad_norm": 1.2734375, "learning_rate": 1.8583418658532092e-05, "loss": 0.5185, "step": 3054 }, { "epoch": 0.5289470836489557, "grad_norm": 1.3984375, "learning_rate": 1.858248273481191e-05, "loss": 0.6599, "step": 3055 }, { "epoch": 0.5291202250838654, "grad_norm": 1.34375, "learning_rate": 1.8581546525599554e-05, "loss": 0.6105, "step": 3056 }, { "epoch": 0.529293366518775, "grad_norm": 1.421875, "learning_rate": 1.8580610030926152e-05, "loss": 0.5857, "step": 3057 }, { "epoch": 0.5294665079536847, "grad_norm": 1.359375, "learning_rate": 1.857967325082286e-05, "loss": 0.6257, "step": 3058 }, { "epoch": 0.5296396493885943, "grad_norm": 1.4375, "learning_rate": 1.857873618532085e-05, "loss": 0.6077, "step": 3059 }, { "epoch": 0.529812790823504, "grad_norm": 1.4609375, "learning_rate": 1.8577798834451274e-05, "loss": 0.627, "step": 3060 }, { "epoch": 0.5299859322584136, "grad_norm": 1.3515625, "learning_rate": 1.857686119824533e-05, "loss": 0.602, "step": 3061 }, { "epoch": 0.5301590736933233, "grad_norm": 1.296875, "learning_rate": 1.8575923276734194e-05, "loss": 0.6007, "step": 3062 }, { "epoch": 0.5303322151282329, "grad_norm": 1.421875, "learning_rate": 1.8574985069949075e-05, "loss": 0.6433, "step": 3063 }, { "epoch": 0.5305053565631426, "grad_norm": 1.3046875, "learning_rate": 1.8574046577921182e-05, "loss": 0.613, "step": 3064 }, { "epoch": 0.5306784979980521, "grad_norm": 1.578125, "learning_rate": 1.8573107800681728e-05, "loss": 0.597, "step": 3065 }, { "epoch": 0.5308516394329618, "grad_norm": 1.375, "learning_rate": 1.8572168738261944e-05, "loss": 0.6018, "step": 3066 }, { "epoch": 0.5310247808678714, "grad_norm": 1.359375, "learning_rate": 1.8571229390693068e-05, "loss": 0.5507, "step": 3067 }, { "epoch": 0.5311979223027811, "grad_norm": 1.5703125, "learning_rate": 1.8570289758006346e-05, "loss": 0.5543, "step": 3068 }, { "epoch": 0.5313710637376907, "grad_norm": 1.6015625, "learning_rate": 1.8569349840233034e-05, "loss": 0.6486, "step": 3069 }, { "epoch": 0.5315442051726004, "grad_norm": 1.5078125, "learning_rate": 1.8568409637404396e-05, "loss": 0.6266, "step": 3070 }, { "epoch": 0.53171734660751, "grad_norm": 1.3828125, "learning_rate": 1.8567469149551714e-05, "loss": 0.5886, "step": 3071 }, { "epoch": 0.5318904880424197, "grad_norm": 1.3359375, "learning_rate": 1.8566528376706268e-05, "loss": 0.5488, "step": 3072 }, { "epoch": 0.5320636294773293, "grad_norm": 1.34375, "learning_rate": 1.856558731889935e-05, "loss": 0.6103, "step": 3073 }, { "epoch": 0.532236770912239, "grad_norm": 1.4140625, "learning_rate": 1.856464597616227e-05, "loss": 0.5688, "step": 3074 }, { "epoch": 0.5324099123471486, "grad_norm": 1.390625, "learning_rate": 1.8563704348526337e-05, "loss": 0.5646, "step": 3075 }, { "epoch": 0.5325830537820582, "grad_norm": 1.4453125, "learning_rate": 1.8562762436022875e-05, "loss": 0.5921, "step": 3076 }, { "epoch": 0.5327561952169678, "grad_norm": 1.234375, "learning_rate": 1.8561820238683216e-05, "loss": 0.5087, "step": 3077 }, { "epoch": 0.5329293366518775, "grad_norm": 1.296875, "learning_rate": 1.8560877756538703e-05, "loss": 0.6433, "step": 3078 }, { "epoch": 0.5331024780867871, "grad_norm": 1.359375, "learning_rate": 1.8559934989620686e-05, "loss": 0.6463, "step": 3079 }, { "epoch": 0.5332756195216968, "grad_norm": 1.3984375, "learning_rate": 1.8558991937960523e-05, "loss": 0.5869, "step": 3080 }, { "epoch": 0.5334487609566064, "grad_norm": 1.3203125, "learning_rate": 1.855804860158959e-05, "loss": 0.642, "step": 3081 }, { "epoch": 0.5336219023915161, "grad_norm": 1.3359375, "learning_rate": 1.8557104980539265e-05, "loss": 0.5993, "step": 3082 }, { "epoch": 0.5337950438264257, "grad_norm": 1.375, "learning_rate": 1.855616107484093e-05, "loss": 0.6253, "step": 3083 }, { "epoch": 0.5339681852613354, "grad_norm": 1.390625, "learning_rate": 1.8555216884526e-05, "loss": 0.6095, "step": 3084 }, { "epoch": 0.534141326696245, "grad_norm": 1.421875, "learning_rate": 1.855427240962586e-05, "loss": 0.592, "step": 3085 }, { "epoch": 0.5343144681311547, "grad_norm": 1.5078125, "learning_rate": 1.8553327650171948e-05, "loss": 0.5805, "step": 3086 }, { "epoch": 0.5344876095660642, "grad_norm": 1.4140625, "learning_rate": 1.8552382606195683e-05, "loss": 0.6098, "step": 3087 }, { "epoch": 0.5346607510009739, "grad_norm": 1.5, "learning_rate": 1.8551437277728498e-05, "loss": 0.6488, "step": 3088 }, { "epoch": 0.5348338924358835, "grad_norm": 1.421875, "learning_rate": 1.8550491664801845e-05, "loss": 0.6121, "step": 3089 }, { "epoch": 0.5350070338707932, "grad_norm": 1.4296875, "learning_rate": 1.8549545767447174e-05, "loss": 0.5883, "step": 3090 }, { "epoch": 0.5351801753057028, "grad_norm": 1.1796875, "learning_rate": 1.8548599585695952e-05, "loss": 0.5656, "step": 3091 }, { "epoch": 0.5353533167406125, "grad_norm": 1.3984375, "learning_rate": 1.854765311957966e-05, "loss": 0.5848, "step": 3092 }, { "epoch": 0.5355264581755221, "grad_norm": 1.328125, "learning_rate": 1.854670636912977e-05, "loss": 0.5648, "step": 3093 }, { "epoch": 0.5356995996104318, "grad_norm": 1.375, "learning_rate": 1.854575933437778e-05, "loss": 0.621, "step": 3094 }, { "epoch": 0.5358727410453414, "grad_norm": 1.453125, "learning_rate": 1.8544812015355195e-05, "loss": 0.6259, "step": 3095 }, { "epoch": 0.5360458824802511, "grad_norm": 1.3125, "learning_rate": 1.8543864412093525e-05, "loss": 0.5487, "step": 3096 }, { "epoch": 0.5362190239151607, "grad_norm": 1.40625, "learning_rate": 1.854291652462429e-05, "loss": 0.6611, "step": 3097 }, { "epoch": 0.5363921653500704, "grad_norm": 1.3046875, "learning_rate": 1.8541968352979028e-05, "loss": 0.6069, "step": 3098 }, { "epoch": 0.5365653067849799, "grad_norm": 1.2734375, "learning_rate": 1.854101989718927e-05, "loss": 0.5616, "step": 3099 }, { "epoch": 0.5367384482198896, "grad_norm": 1.4375, "learning_rate": 1.854007115728657e-05, "loss": 0.6327, "step": 3100 }, { "epoch": 0.5369115896547992, "grad_norm": 1.5859375, "learning_rate": 1.8539122133302495e-05, "loss": 0.6229, "step": 3101 }, { "epoch": 0.5370847310897089, "grad_norm": 1.4296875, "learning_rate": 1.85381728252686e-05, "loss": 0.5992, "step": 3102 }, { "epoch": 0.5372578725246185, "grad_norm": 1.3671875, "learning_rate": 1.853722323321647e-05, "loss": 0.6675, "step": 3103 }, { "epoch": 0.5374310139595282, "grad_norm": 1.3203125, "learning_rate": 1.8536273357177694e-05, "loss": 0.6837, "step": 3104 }, { "epoch": 0.5376041553944378, "grad_norm": 1.34375, "learning_rate": 1.853532319718387e-05, "loss": 0.5432, "step": 3105 }, { "epoch": 0.5377772968293475, "grad_norm": 1.4453125, "learning_rate": 1.85343727532666e-05, "loss": 0.6097, "step": 3106 }, { "epoch": 0.5379504382642571, "grad_norm": 1.3359375, "learning_rate": 1.8533422025457502e-05, "loss": 0.5908, "step": 3107 }, { "epoch": 0.5381235796991668, "grad_norm": 1.4609375, "learning_rate": 1.8532471013788202e-05, "loss": 0.6139, "step": 3108 }, { "epoch": 0.5382967211340765, "grad_norm": 1.359375, "learning_rate": 1.853151971829034e-05, "loss": 0.5789, "step": 3109 }, { "epoch": 0.538469862568986, "grad_norm": 1.3671875, "learning_rate": 1.853056813899555e-05, "loss": 0.5571, "step": 3110 }, { "epoch": 0.5386430040038956, "grad_norm": 1.265625, "learning_rate": 1.852961627593549e-05, "loss": 0.5893, "step": 3111 }, { "epoch": 0.5388161454388053, "grad_norm": 1.390625, "learning_rate": 1.8528664129141827e-05, "loss": 0.5743, "step": 3112 }, { "epoch": 0.538989286873715, "grad_norm": 1.375, "learning_rate": 1.852771169864623e-05, "loss": 0.6037, "step": 3113 }, { "epoch": 0.5391624283086246, "grad_norm": 1.390625, "learning_rate": 1.8526758984480383e-05, "loss": 0.6098, "step": 3114 }, { "epoch": 0.5393355697435342, "grad_norm": 1.3671875, "learning_rate": 1.852580598667598e-05, "loss": 0.6133, "step": 3115 }, { "epoch": 0.5395087111784439, "grad_norm": 1.4375, "learning_rate": 1.8524852705264716e-05, "loss": 0.6372, "step": 3116 }, { "epoch": 0.5396818526133536, "grad_norm": 1.21875, "learning_rate": 1.8523899140278303e-05, "loss": 0.5259, "step": 3117 }, { "epoch": 0.5398549940482632, "grad_norm": 1.4140625, "learning_rate": 1.8522945291748466e-05, "loss": 0.5793, "step": 3118 }, { "epoch": 0.5400281354831729, "grad_norm": 1.3125, "learning_rate": 1.8521991159706926e-05, "loss": 0.5994, "step": 3119 }, { "epoch": 0.5402012769180825, "grad_norm": 1.453125, "learning_rate": 1.852103674418543e-05, "loss": 0.6678, "step": 3120 }, { "epoch": 0.540374418352992, "grad_norm": 1.4296875, "learning_rate": 1.852008204521572e-05, "loss": 0.6343, "step": 3121 }, { "epoch": 0.5405475597879017, "grad_norm": 1.515625, "learning_rate": 1.8519127062829557e-05, "loss": 0.6109, "step": 3122 }, { "epoch": 0.5407207012228114, "grad_norm": 1.34375, "learning_rate": 1.8518171797058707e-05, "loss": 0.6601, "step": 3123 }, { "epoch": 0.540893842657721, "grad_norm": 1.3125, "learning_rate": 1.8517216247934946e-05, "loss": 0.7571, "step": 3124 }, { "epoch": 0.5410669840926307, "grad_norm": 1.453125, "learning_rate": 1.8516260415490062e-05, "loss": 0.6585, "step": 3125 }, { "epoch": 0.5412401255275403, "grad_norm": 1.3046875, "learning_rate": 1.8515304299755846e-05, "loss": 0.6327, "step": 3126 }, { "epoch": 0.54141326696245, "grad_norm": 1.3359375, "learning_rate": 1.8514347900764108e-05, "loss": 0.5931, "step": 3127 }, { "epoch": 0.5415864083973596, "grad_norm": 1.3359375, "learning_rate": 1.851339121854666e-05, "loss": 0.6347, "step": 3128 }, { "epoch": 0.5417595498322693, "grad_norm": 1.3671875, "learning_rate": 1.8512434253135324e-05, "loss": 0.6147, "step": 3129 }, { "epoch": 0.5419326912671789, "grad_norm": 1.3984375, "learning_rate": 1.8511477004561934e-05, "loss": 0.6226, "step": 3130 }, { "epoch": 0.5421058327020886, "grad_norm": 1.3828125, "learning_rate": 1.8510519472858332e-05, "loss": 0.5596, "step": 3131 }, { "epoch": 0.5422789741369982, "grad_norm": 1.40625, "learning_rate": 1.8509561658056373e-05, "loss": 0.5983, "step": 3132 }, { "epoch": 0.5424521155719078, "grad_norm": 1.40625, "learning_rate": 1.8508603560187915e-05, "loss": 0.6623, "step": 3133 }, { "epoch": 0.5426252570068174, "grad_norm": 1.4921875, "learning_rate": 1.850764517928483e-05, "loss": 0.5916, "step": 3134 }, { "epoch": 0.5427983984417271, "grad_norm": 1.25, "learning_rate": 1.850668651537899e-05, "loss": 0.567, "step": 3135 }, { "epoch": 0.5429715398766367, "grad_norm": 1.2578125, "learning_rate": 1.8505727568502302e-05, "loss": 0.5577, "step": 3136 }, { "epoch": 0.5431446813115464, "grad_norm": 1.390625, "learning_rate": 1.850476833868665e-05, "loss": 0.6284, "step": 3137 }, { "epoch": 0.543317822746456, "grad_norm": 1.296875, "learning_rate": 1.8503808825963947e-05, "loss": 0.5932, "step": 3138 }, { "epoch": 0.5434909641813657, "grad_norm": 1.296875, "learning_rate": 1.8502849030366113e-05, "loss": 0.5967, "step": 3139 }, { "epoch": 0.5436641056162753, "grad_norm": 1.4453125, "learning_rate": 1.8501888951925073e-05, "loss": 0.7312, "step": 3140 }, { "epoch": 0.543837247051185, "grad_norm": 1.3828125, "learning_rate": 1.850092859067276e-05, "loss": 0.6686, "step": 3141 }, { "epoch": 0.5440103884860946, "grad_norm": 1.34375, "learning_rate": 1.8499967946641127e-05, "loss": 0.6199, "step": 3142 }, { "epoch": 0.5441835299210043, "grad_norm": 1.5078125, "learning_rate": 1.8499007019862122e-05, "loss": 0.6014, "step": 3143 }, { "epoch": 0.5443566713559138, "grad_norm": 1.3671875, "learning_rate": 1.849804581036772e-05, "loss": 0.6406, "step": 3144 }, { "epoch": 0.5445298127908235, "grad_norm": 1.421875, "learning_rate": 1.8497084318189884e-05, "loss": 0.5983, "step": 3145 }, { "epoch": 0.5447029542257331, "grad_norm": 1.3203125, "learning_rate": 1.8496122543360605e-05, "loss": 0.6236, "step": 3146 }, { "epoch": 0.5448760956606428, "grad_norm": 1.3671875, "learning_rate": 1.849516048591187e-05, "loss": 0.6074, "step": 3147 }, { "epoch": 0.5450492370955524, "grad_norm": 1.375, "learning_rate": 1.8494198145875687e-05, "loss": 0.5379, "step": 3148 }, { "epoch": 0.5452223785304621, "grad_norm": 1.359375, "learning_rate": 1.8493235523284067e-05, "loss": 0.6301, "step": 3149 }, { "epoch": 0.5453955199653717, "grad_norm": 1.390625, "learning_rate": 1.849227261816903e-05, "loss": 0.5532, "step": 3150 }, { "epoch": 0.5455686614002814, "grad_norm": 1.3359375, "learning_rate": 1.8491309430562604e-05, "loss": 0.6077, "step": 3151 }, { "epoch": 0.545741802835191, "grad_norm": 1.3359375, "learning_rate": 1.8490345960496834e-05, "loss": 0.6463, "step": 3152 }, { "epoch": 0.5459149442701007, "grad_norm": 1.5390625, "learning_rate": 1.8489382208003767e-05, "loss": 0.679, "step": 3153 }, { "epoch": 0.5460880857050103, "grad_norm": 1.2421875, "learning_rate": 1.848841817311546e-05, "loss": 0.5417, "step": 3154 }, { "epoch": 0.5462612271399199, "grad_norm": 1.3515625, "learning_rate": 1.848745385586398e-05, "loss": 0.5813, "step": 3155 }, { "epoch": 0.5464343685748295, "grad_norm": 1.3828125, "learning_rate": 1.848648925628141e-05, "loss": 0.6035, "step": 3156 }, { "epoch": 0.5466075100097392, "grad_norm": 1.6640625, "learning_rate": 1.8485524374399832e-05, "loss": 0.5441, "step": 3157 }, { "epoch": 0.5467806514446488, "grad_norm": 1.3671875, "learning_rate": 1.8484559210251345e-05, "loss": 0.6102, "step": 3158 }, { "epoch": 0.5469537928795585, "grad_norm": 1.34375, "learning_rate": 1.848359376386806e-05, "loss": 0.5974, "step": 3159 }, { "epoch": 0.5471269343144681, "grad_norm": 1.3515625, "learning_rate": 1.8482628035282078e-05, "loss": 0.5351, "step": 3160 }, { "epoch": 0.5473000757493778, "grad_norm": 1.546875, "learning_rate": 1.8481662024525536e-05, "loss": 0.6784, "step": 3161 }, { "epoch": 0.5474732171842874, "grad_norm": 1.28125, "learning_rate": 1.8480695731630563e-05, "loss": 0.6392, "step": 3162 }, { "epoch": 0.5476463586191971, "grad_norm": 1.21875, "learning_rate": 1.8479729156629302e-05, "loss": 0.4987, "step": 3163 }, { "epoch": 0.5478195000541067, "grad_norm": 1.3125, "learning_rate": 1.8478762299553906e-05, "loss": 0.5784, "step": 3164 }, { "epoch": 0.5479926414890164, "grad_norm": 1.3203125, "learning_rate": 1.8477795160436542e-05, "loss": 0.5672, "step": 3165 }, { "epoch": 0.548165782923926, "grad_norm": 1.4296875, "learning_rate": 1.847682773930937e-05, "loss": 0.6227, "step": 3166 }, { "epoch": 0.5483389243588356, "grad_norm": 1.40625, "learning_rate": 1.8475860036204585e-05, "loss": 0.6094, "step": 3167 }, { "epoch": 0.5485120657937452, "grad_norm": 1.34375, "learning_rate": 1.8474892051154366e-05, "loss": 0.6103, "step": 3168 }, { "epoch": 0.5486852072286549, "grad_norm": 1.3125, "learning_rate": 1.8473923784190918e-05, "loss": 0.5231, "step": 3169 }, { "epoch": 0.5488583486635645, "grad_norm": 1.3046875, "learning_rate": 1.847295523534645e-05, "loss": 0.5653, "step": 3170 }, { "epoch": 0.5490314900984742, "grad_norm": 1.5, "learning_rate": 1.8471986404653176e-05, "loss": 0.6339, "step": 3171 }, { "epoch": 0.5492046315333838, "grad_norm": 1.328125, "learning_rate": 1.8471017292143323e-05, "loss": 0.5698, "step": 3172 }, { "epoch": 0.5493777729682935, "grad_norm": 1.4296875, "learning_rate": 1.847004789784914e-05, "loss": 0.6658, "step": 3173 }, { "epoch": 0.5495509144032031, "grad_norm": 1.4140625, "learning_rate": 1.846907822180286e-05, "loss": 0.6003, "step": 3174 }, { "epoch": 0.5497240558381128, "grad_norm": 1.328125, "learning_rate": 1.8468108264036747e-05, "loss": 0.6442, "step": 3175 }, { "epoch": 0.5498971972730224, "grad_norm": 1.3984375, "learning_rate": 1.846713802458306e-05, "loss": 0.6244, "step": 3176 }, { "epoch": 0.5500703387079321, "grad_norm": 1.265625, "learning_rate": 1.846616750347408e-05, "loss": 0.5707, "step": 3177 }, { "epoch": 0.5502434801428416, "grad_norm": 1.2578125, "learning_rate": 1.8465196700742083e-05, "loss": 0.6287, "step": 3178 }, { "epoch": 0.5504166215777513, "grad_norm": 1.2265625, "learning_rate": 1.8464225616419368e-05, "loss": 0.5996, "step": 3179 }, { "epoch": 0.5505897630126609, "grad_norm": 1.3828125, "learning_rate": 1.846325425053824e-05, "loss": 0.6327, "step": 3180 }, { "epoch": 0.5507629044475706, "grad_norm": 1.3515625, "learning_rate": 1.8462282603131005e-05, "loss": 0.6532, "step": 3181 }, { "epoch": 0.5509360458824802, "grad_norm": 1.7109375, "learning_rate": 1.8461310674229986e-05, "loss": 0.5716, "step": 3182 }, { "epoch": 0.5511091873173899, "grad_norm": 1.2890625, "learning_rate": 1.8460338463867515e-05, "loss": 0.6035, "step": 3183 }, { "epoch": 0.5512823287522995, "grad_norm": 1.28125, "learning_rate": 1.8459365972075934e-05, "loss": 0.6219, "step": 3184 }, { "epoch": 0.5514554701872092, "grad_norm": 1.3828125, "learning_rate": 1.8458393198887585e-05, "loss": 0.6296, "step": 3185 }, { "epoch": 0.5516286116221188, "grad_norm": 1.9453125, "learning_rate": 1.8457420144334833e-05, "loss": 0.6175, "step": 3186 }, { "epoch": 0.5518017530570285, "grad_norm": 1.375, "learning_rate": 1.8456446808450052e-05, "loss": 0.6148, "step": 3187 }, { "epoch": 0.5519748944919382, "grad_norm": 1.484375, "learning_rate": 1.845547319126561e-05, "loss": 0.5791, "step": 3188 }, { "epoch": 0.5521480359268477, "grad_norm": 1.453125, "learning_rate": 1.845449929281389e-05, "loss": 0.6304, "step": 3189 }, { "epoch": 0.5523211773617573, "grad_norm": 1.453125, "learning_rate": 1.8453525113127302e-05, "loss": 0.5739, "step": 3190 }, { "epoch": 0.552494318796667, "grad_norm": 1.3125, "learning_rate": 1.8452550652238243e-05, "loss": 0.5646, "step": 3191 }, { "epoch": 0.5526674602315766, "grad_norm": 1.40625, "learning_rate": 1.8451575910179128e-05, "loss": 0.5978, "step": 3192 }, { "epoch": 0.5528406016664863, "grad_norm": 1.3671875, "learning_rate": 1.8450600886982385e-05, "loss": 0.6023, "step": 3193 }, { "epoch": 0.553013743101396, "grad_norm": 1.3828125, "learning_rate": 1.8449625582680445e-05, "loss": 0.5688, "step": 3194 }, { "epoch": 0.5531868845363056, "grad_norm": 1.4375, "learning_rate": 1.8448649997305752e-05, "loss": 0.6317, "step": 3195 }, { "epoch": 0.5533600259712153, "grad_norm": 1.3359375, "learning_rate": 1.8447674130890756e-05, "loss": 0.6045, "step": 3196 }, { "epoch": 0.5535331674061249, "grad_norm": 1.3828125, "learning_rate": 1.844669798346792e-05, "loss": 0.6592, "step": 3197 }, { "epoch": 0.5537063088410346, "grad_norm": 1.34375, "learning_rate": 1.844572155506972e-05, "loss": 0.567, "step": 3198 }, { "epoch": 0.5538794502759442, "grad_norm": 1.3515625, "learning_rate": 1.844474484572863e-05, "loss": 0.5758, "step": 3199 }, { "epoch": 0.5540525917108539, "grad_norm": 1.46875, "learning_rate": 1.844376785547714e-05, "loss": 0.5995, "step": 3200 }, { "epoch": 0.5542257331457634, "grad_norm": 1.3125, "learning_rate": 1.844279058434775e-05, "loss": 0.5602, "step": 3201 }, { "epoch": 0.554398874580673, "grad_norm": 1.4375, "learning_rate": 1.8441813032372975e-05, "loss": 0.5877, "step": 3202 }, { "epoch": 0.5545720160155827, "grad_norm": 1.34375, "learning_rate": 1.8440835199585327e-05, "loss": 0.6071, "step": 3203 }, { "epoch": 0.5547451574504924, "grad_norm": 1.28125, "learning_rate": 1.843985708601733e-05, "loss": 0.5785, "step": 3204 }, { "epoch": 0.554918298885402, "grad_norm": 1.3203125, "learning_rate": 1.8438878691701525e-05, "loss": 0.5758, "step": 3205 }, { "epoch": 0.5550914403203117, "grad_norm": 1.3125, "learning_rate": 1.8437900016670457e-05, "loss": 0.5972, "step": 3206 }, { "epoch": 0.5552645817552213, "grad_norm": 1.359375, "learning_rate": 1.843692106095668e-05, "loss": 0.6138, "step": 3207 }, { "epoch": 0.555437723190131, "grad_norm": 1.390625, "learning_rate": 1.843594182459276e-05, "loss": 0.5537, "step": 3208 }, { "epoch": 0.5556108646250406, "grad_norm": 1.40625, "learning_rate": 1.8434962307611272e-05, "loss": 0.5899, "step": 3209 }, { "epoch": 0.5557840060599503, "grad_norm": 1.3671875, "learning_rate": 1.8433982510044793e-05, "loss": 0.5963, "step": 3210 }, { "epoch": 0.5559571474948599, "grad_norm": 1.359375, "learning_rate": 1.8433002431925925e-05, "loss": 0.6722, "step": 3211 }, { "epoch": 0.5561302889297695, "grad_norm": 1.6875, "learning_rate": 1.8432022073287262e-05, "loss": 0.6075, "step": 3212 }, { "epoch": 0.5563034303646791, "grad_norm": 1.703125, "learning_rate": 1.8431041434161417e-05, "loss": 0.626, "step": 3213 }, { "epoch": 0.5564765717995888, "grad_norm": 1.2890625, "learning_rate": 1.8430060514581015e-05, "loss": 0.5984, "step": 3214 }, { "epoch": 0.5566497132344984, "grad_norm": 1.4609375, "learning_rate": 1.842907931457868e-05, "loss": 0.6127, "step": 3215 }, { "epoch": 0.5568228546694081, "grad_norm": 1.3984375, "learning_rate": 1.8428097834187055e-05, "loss": 0.6055, "step": 3216 }, { "epoch": 0.5569959961043177, "grad_norm": 1.3125, "learning_rate": 1.8427116073438785e-05, "loss": 0.6197, "step": 3217 }, { "epoch": 0.5571691375392274, "grad_norm": 1.40625, "learning_rate": 1.8426134032366528e-05, "loss": 0.6677, "step": 3218 }, { "epoch": 0.557342278974137, "grad_norm": 1.2734375, "learning_rate": 1.8425151711002956e-05, "loss": 0.7159, "step": 3219 }, { "epoch": 0.5575154204090467, "grad_norm": 1.453125, "learning_rate": 1.842416910938074e-05, "loss": 0.5893, "step": 3220 }, { "epoch": 0.5576885618439563, "grad_norm": 1.28125, "learning_rate": 1.842318622753257e-05, "loss": 0.6258, "step": 3221 }, { "epoch": 0.557861703278866, "grad_norm": 1.3203125, "learning_rate": 1.842220306549114e-05, "loss": 0.5943, "step": 3222 }, { "epoch": 0.5580348447137755, "grad_norm": 1.3515625, "learning_rate": 1.842121962328915e-05, "loss": 0.656, "step": 3223 }, { "epoch": 0.5582079861486852, "grad_norm": 1.3125, "learning_rate": 1.842023590095932e-05, "loss": 0.6322, "step": 3224 }, { "epoch": 0.5583811275835948, "grad_norm": 1.296875, "learning_rate": 1.841925189853437e-05, "loss": 0.5994, "step": 3225 }, { "epoch": 0.5585542690185045, "grad_norm": 1.453125, "learning_rate": 1.841826761604703e-05, "loss": 0.5967, "step": 3226 }, { "epoch": 0.5587274104534141, "grad_norm": 1.40625, "learning_rate": 1.8417283053530047e-05, "loss": 0.6457, "step": 3227 }, { "epoch": 0.5589005518883238, "grad_norm": 1.4375, "learning_rate": 1.8416298211016168e-05, "loss": 0.6568, "step": 3228 }, { "epoch": 0.5590736933232334, "grad_norm": 1.3125, "learning_rate": 1.8415313088538154e-05, "loss": 0.6098, "step": 3229 }, { "epoch": 0.5592468347581431, "grad_norm": 1.28125, "learning_rate": 1.8414327686128778e-05, "loss": 0.51, "step": 3230 }, { "epoch": 0.5594199761930527, "grad_norm": 1.28125, "learning_rate": 1.8413342003820813e-05, "loss": 0.564, "step": 3231 }, { "epoch": 0.5595931176279624, "grad_norm": 1.375, "learning_rate": 1.841235604164705e-05, "loss": 0.5225, "step": 3232 }, { "epoch": 0.559766259062872, "grad_norm": 1.3828125, "learning_rate": 1.841136979964029e-05, "loss": 0.6452, "step": 3233 }, { "epoch": 0.5599394004977817, "grad_norm": 1.40625, "learning_rate": 1.8410383277833338e-05, "loss": 0.5526, "step": 3234 }, { "epoch": 0.5601125419326912, "grad_norm": 1.3515625, "learning_rate": 1.8409396476259006e-05, "loss": 0.5743, "step": 3235 }, { "epoch": 0.5602856833676009, "grad_norm": 1.2578125, "learning_rate": 1.8408409394950128e-05, "loss": 0.5651, "step": 3236 }, { "epoch": 0.5604588248025105, "grad_norm": 1.3046875, "learning_rate": 1.8407422033939525e-05, "loss": 0.6195, "step": 3237 }, { "epoch": 0.5606319662374202, "grad_norm": 1.3515625, "learning_rate": 1.8406434393260054e-05, "loss": 0.6542, "step": 3238 }, { "epoch": 0.5608051076723298, "grad_norm": 1.453125, "learning_rate": 1.8405446472944564e-05, "loss": 0.5904, "step": 3239 }, { "epoch": 0.5609782491072395, "grad_norm": 1.2265625, "learning_rate": 1.8404458273025917e-05, "loss": 0.553, "step": 3240 }, { "epoch": 0.5611513905421491, "grad_norm": 1.3359375, "learning_rate": 1.8403469793536985e-05, "loss": 0.6336, "step": 3241 }, { "epoch": 0.5613245319770588, "grad_norm": 1.359375, "learning_rate": 1.840248103451065e-05, "loss": 0.6209, "step": 3242 }, { "epoch": 0.5614976734119684, "grad_norm": 1.2578125, "learning_rate": 1.8401491995979803e-05, "loss": 0.563, "step": 3243 }, { "epoch": 0.5616708148468781, "grad_norm": 1.265625, "learning_rate": 1.8400502677977344e-05, "loss": 0.5724, "step": 3244 }, { "epoch": 0.5618439562817877, "grad_norm": 1.40625, "learning_rate": 1.839951308053618e-05, "loss": 0.5945, "step": 3245 }, { "epoch": 0.5620170977166973, "grad_norm": 1.3203125, "learning_rate": 1.8398523203689235e-05, "loss": 0.5845, "step": 3246 }, { "epoch": 0.5621902391516069, "grad_norm": 1.3203125, "learning_rate": 1.839753304746943e-05, "loss": 0.6792, "step": 3247 }, { "epoch": 0.5623633805865166, "grad_norm": 1.34375, "learning_rate": 1.8396542611909704e-05, "loss": 0.539, "step": 3248 }, { "epoch": 0.5625365220214262, "grad_norm": 1.4140625, "learning_rate": 1.8395551897043002e-05, "loss": 0.5732, "step": 3249 }, { "epoch": 0.5627096634563359, "grad_norm": 1.3046875, "learning_rate": 1.8394560902902284e-05, "loss": 0.5558, "step": 3250 }, { "epoch": 0.5628828048912455, "grad_norm": 1.3125, "learning_rate": 1.8393569629520516e-05, "loss": 0.5423, "step": 3251 }, { "epoch": 0.5630559463261552, "grad_norm": 1.4375, "learning_rate": 1.8392578076930663e-05, "loss": 0.6885, "step": 3252 }, { "epoch": 0.5632290877610648, "grad_norm": 1.3125, "learning_rate": 1.8391586245165718e-05, "loss": 0.6082, "step": 3253 }, { "epoch": 0.5634022291959745, "grad_norm": 1.3359375, "learning_rate": 1.8390594134258673e-05, "loss": 0.5909, "step": 3254 }, { "epoch": 0.5635753706308841, "grad_norm": 1.40625, "learning_rate": 1.838960174424252e-05, "loss": 0.5957, "step": 3255 }, { "epoch": 0.5637485120657938, "grad_norm": 1.3125, "learning_rate": 1.8388609075150284e-05, "loss": 0.5259, "step": 3256 }, { "epoch": 0.5639216535007033, "grad_norm": 1.296875, "learning_rate": 1.838761612701498e-05, "loss": 0.577, "step": 3257 }, { "epoch": 0.564094794935613, "grad_norm": 1.3515625, "learning_rate": 1.8386622899869637e-05, "loss": 0.6116, "step": 3258 }, { "epoch": 0.5642679363705226, "grad_norm": 1.2421875, "learning_rate": 1.8385629393747292e-05, "loss": 0.6089, "step": 3259 }, { "epoch": 0.5644410778054323, "grad_norm": 1.484375, "learning_rate": 1.8384635608681e-05, "loss": 0.6535, "step": 3260 }, { "epoch": 0.5646142192403419, "grad_norm": 1.4375, "learning_rate": 1.838364154470381e-05, "loss": 0.5643, "step": 3261 }, { "epoch": 0.5647873606752516, "grad_norm": 1.234375, "learning_rate": 1.8382647201848796e-05, "loss": 0.6647, "step": 3262 }, { "epoch": 0.5649605021101612, "grad_norm": 1.3828125, "learning_rate": 1.8381652580149033e-05, "loss": 0.6302, "step": 3263 }, { "epoch": 0.5651336435450709, "grad_norm": 1.3046875, "learning_rate": 1.8380657679637604e-05, "loss": 0.5526, "step": 3264 }, { "epoch": 0.5653067849799805, "grad_norm": 1.5078125, "learning_rate": 1.8379662500347612e-05, "loss": 0.5982, "step": 3265 }, { "epoch": 0.5654799264148902, "grad_norm": 1.328125, "learning_rate": 1.837866704231215e-05, "loss": 0.5936, "step": 3266 }, { "epoch": 0.5656530678497999, "grad_norm": 1.328125, "learning_rate": 1.8377671305564336e-05, "loss": 0.6161, "step": 3267 }, { "epoch": 0.5658262092847094, "grad_norm": 1.546875, "learning_rate": 1.8376675290137297e-05, "loss": 0.6191, "step": 3268 }, { "epoch": 0.565999350719619, "grad_norm": 1.4296875, "learning_rate": 1.8375678996064157e-05, "loss": 0.5166, "step": 3269 }, { "epoch": 0.5661724921545287, "grad_norm": 1.2734375, "learning_rate": 1.8374682423378065e-05, "loss": 0.5478, "step": 3270 }, { "epoch": 0.5663456335894383, "grad_norm": 1.375, "learning_rate": 1.8373685572112164e-05, "loss": 0.5842, "step": 3271 }, { "epoch": 0.566518775024348, "grad_norm": 1.4453125, "learning_rate": 1.837268844229962e-05, "loss": 0.6281, "step": 3272 }, { "epoch": 0.5666919164592576, "grad_norm": 1.40625, "learning_rate": 1.83716910339736e-05, "loss": 0.5917, "step": 3273 }, { "epoch": 0.5668650578941673, "grad_norm": 1.3359375, "learning_rate": 1.8370693347167283e-05, "loss": 0.5721, "step": 3274 }, { "epoch": 0.567038199329077, "grad_norm": 1.3984375, "learning_rate": 1.8369695381913852e-05, "loss": 0.6147, "step": 3275 }, { "epoch": 0.5672113407639866, "grad_norm": 1.4453125, "learning_rate": 1.836869713824651e-05, "loss": 0.5618, "step": 3276 }, { "epoch": 0.5673844821988963, "grad_norm": 1.3046875, "learning_rate": 1.8367698616198463e-05, "loss": 0.5854, "step": 3277 }, { "epoch": 0.5675576236338059, "grad_norm": 1.3828125, "learning_rate": 1.836669981580292e-05, "loss": 0.5971, "step": 3278 }, { "epoch": 0.5677307650687156, "grad_norm": 1.421875, "learning_rate": 1.836570073709311e-05, "loss": 0.6701, "step": 3279 }, { "epoch": 0.5679039065036251, "grad_norm": 1.3203125, "learning_rate": 1.8364701380102267e-05, "loss": 0.5497, "step": 3280 }, { "epoch": 0.5680770479385348, "grad_norm": 1.3671875, "learning_rate": 1.8363701744863634e-05, "loss": 0.6069, "step": 3281 }, { "epoch": 0.5682501893734444, "grad_norm": 1.296875, "learning_rate": 1.8362701831410467e-05, "loss": 0.6009, "step": 3282 }, { "epoch": 0.568423330808354, "grad_norm": 1.2890625, "learning_rate": 1.8361701639776016e-05, "loss": 0.6251, "step": 3283 }, { "epoch": 0.5685964722432637, "grad_norm": 1.2734375, "learning_rate": 1.8360701169993564e-05, "loss": 0.603, "step": 3284 }, { "epoch": 0.5687696136781734, "grad_norm": 1.328125, "learning_rate": 1.8359700422096385e-05, "loss": 0.5701, "step": 3285 }, { "epoch": 0.568942755113083, "grad_norm": 1.4375, "learning_rate": 1.835869939611777e-05, "loss": 0.6491, "step": 3286 }, { "epoch": 0.5691158965479927, "grad_norm": 1.46875, "learning_rate": 1.835769809209102e-05, "loss": 0.6516, "step": 3287 }, { "epoch": 0.5692890379829023, "grad_norm": 1.3125, "learning_rate": 1.835669651004944e-05, "loss": 0.6262, "step": 3288 }, { "epoch": 0.569462179417812, "grad_norm": 1.296875, "learning_rate": 1.8355694650026346e-05, "loss": 0.5929, "step": 3289 }, { "epoch": 0.5696353208527216, "grad_norm": 1.2734375, "learning_rate": 1.8354692512055066e-05, "loss": 0.531, "step": 3290 }, { "epoch": 0.5698084622876312, "grad_norm": 1.328125, "learning_rate": 1.8353690096168935e-05, "loss": 0.5804, "step": 3291 }, { "epoch": 0.5699816037225408, "grad_norm": 1.28125, "learning_rate": 1.8352687402401303e-05, "loss": 0.5591, "step": 3292 }, { "epoch": 0.5701547451574505, "grad_norm": 1.3671875, "learning_rate": 1.8351684430785516e-05, "loss": 0.5994, "step": 3293 }, { "epoch": 0.5703278865923601, "grad_norm": 1.484375, "learning_rate": 1.8350681181354943e-05, "loss": 0.6249, "step": 3294 }, { "epoch": 0.5705010280272698, "grad_norm": 1.4375, "learning_rate": 1.8349677654142954e-05, "loss": 0.6295, "step": 3295 }, { "epoch": 0.5706741694621794, "grad_norm": 1.328125, "learning_rate": 1.8348673849182933e-05, "loss": 0.5779, "step": 3296 }, { "epoch": 0.5708473108970891, "grad_norm": 1.4140625, "learning_rate": 1.834766976650827e-05, "loss": 0.6122, "step": 3297 }, { "epoch": 0.5710204523319987, "grad_norm": 1.3125, "learning_rate": 1.8346665406152362e-05, "loss": 0.6344, "step": 3298 }, { "epoch": 0.5711935937669084, "grad_norm": 1.2890625, "learning_rate": 1.834566076814862e-05, "loss": 0.5902, "step": 3299 }, { "epoch": 0.571366735201818, "grad_norm": 1.3828125, "learning_rate": 1.834465585253047e-05, "loss": 0.6429, "step": 3300 }, { "epoch": 0.5715398766367277, "grad_norm": 1.28125, "learning_rate": 1.8343650659331332e-05, "loss": 0.5409, "step": 3301 }, { "epoch": 0.5717130180716372, "grad_norm": 1.2734375, "learning_rate": 1.8342645188584645e-05, "loss": 0.578, "step": 3302 }, { "epoch": 0.5718861595065469, "grad_norm": 1.3203125, "learning_rate": 1.8341639440323858e-05, "loss": 0.6042, "step": 3303 }, { "epoch": 0.5720593009414565, "grad_norm": 1.4375, "learning_rate": 1.8340633414582423e-05, "loss": 0.5811, "step": 3304 }, { "epoch": 0.5722324423763662, "grad_norm": 1.3046875, "learning_rate": 1.8339627111393808e-05, "loss": 0.6371, "step": 3305 }, { "epoch": 0.5724055838112758, "grad_norm": 1.359375, "learning_rate": 1.8338620530791488e-05, "loss": 0.6357, "step": 3306 }, { "epoch": 0.5725787252461855, "grad_norm": 1.296875, "learning_rate": 1.8337613672808937e-05, "loss": 0.5773, "step": 3307 }, { "epoch": 0.5727518666810951, "grad_norm": 1.4140625, "learning_rate": 1.833660653747966e-05, "loss": 0.5806, "step": 3308 }, { "epoch": 0.5729250081160048, "grad_norm": 1.2421875, "learning_rate": 1.8335599124837157e-05, "loss": 0.5484, "step": 3309 }, { "epoch": 0.5730981495509144, "grad_norm": 1.3515625, "learning_rate": 1.8334591434914936e-05, "loss": 0.6187, "step": 3310 }, { "epoch": 0.5732712909858241, "grad_norm": 1.4296875, "learning_rate": 1.8333583467746515e-05, "loss": 0.6184, "step": 3311 }, { "epoch": 0.5734444324207337, "grad_norm": 1.4296875, "learning_rate": 1.8332575223365424e-05, "loss": 0.5979, "step": 3312 }, { "epoch": 0.5736175738556434, "grad_norm": 1.4140625, "learning_rate": 1.8331566701805207e-05, "loss": 0.5839, "step": 3313 }, { "epoch": 0.5737907152905529, "grad_norm": 1.2734375, "learning_rate": 1.833055790309941e-05, "loss": 0.5718, "step": 3314 }, { "epoch": 0.5739638567254626, "grad_norm": 1.2890625, "learning_rate": 1.8329548827281588e-05, "loss": 0.5619, "step": 3315 }, { "epoch": 0.5741369981603722, "grad_norm": 1.2734375, "learning_rate": 1.8328539474385304e-05, "loss": 0.5926, "step": 3316 }, { "epoch": 0.5743101395952819, "grad_norm": 1.3203125, "learning_rate": 1.8327529844444143e-05, "loss": 0.5706, "step": 3317 }, { "epoch": 0.5744832810301915, "grad_norm": 1.421875, "learning_rate": 1.8326519937491682e-05, "loss": 0.6053, "step": 3318 }, { "epoch": 0.5746564224651012, "grad_norm": 1.46875, "learning_rate": 1.832550975356152e-05, "loss": 0.5911, "step": 3319 }, { "epoch": 0.5748295639000108, "grad_norm": 1.375, "learning_rate": 1.832449929268726e-05, "loss": 0.6676, "step": 3320 }, { "epoch": 0.5750027053349205, "grad_norm": 1.2578125, "learning_rate": 1.8323488554902507e-05, "loss": 0.5686, "step": 3321 }, { "epoch": 0.5751758467698301, "grad_norm": 1.3515625, "learning_rate": 1.8322477540240893e-05, "loss": 0.5817, "step": 3322 }, { "epoch": 0.5753489882047398, "grad_norm": 1.421875, "learning_rate": 1.8321466248736044e-05, "loss": 0.6976, "step": 3323 }, { "epoch": 0.5755221296396494, "grad_norm": 1.3359375, "learning_rate": 1.83204546804216e-05, "loss": 0.6421, "step": 3324 }, { "epoch": 0.575695271074559, "grad_norm": 1.265625, "learning_rate": 1.8319442835331212e-05, "loss": 0.5976, "step": 3325 }, { "epoch": 0.5758684125094686, "grad_norm": 1.2734375, "learning_rate": 1.8318430713498535e-05, "loss": 0.6295, "step": 3326 }, { "epoch": 0.5760415539443783, "grad_norm": 1.265625, "learning_rate": 1.831741831495724e-05, "loss": 0.6643, "step": 3327 }, { "epoch": 0.5762146953792879, "grad_norm": 1.3671875, "learning_rate": 1.8316405639741003e-05, "loss": 0.5361, "step": 3328 }, { "epoch": 0.5763878368141976, "grad_norm": 1.4375, "learning_rate": 1.8315392687883512e-05, "loss": 0.5873, "step": 3329 }, { "epoch": 0.5765609782491072, "grad_norm": 1.3984375, "learning_rate": 1.8314379459418458e-05, "loss": 0.6625, "step": 3330 }, { "epoch": 0.5767341196840169, "grad_norm": 1.4609375, "learning_rate": 1.831336595437955e-05, "loss": 0.6449, "step": 3331 }, { "epoch": 0.5769072611189265, "grad_norm": 1.2421875, "learning_rate": 1.83123521728005e-05, "loss": 0.5287, "step": 3332 }, { "epoch": 0.5770804025538362, "grad_norm": 1.328125, "learning_rate": 1.831133811471503e-05, "loss": 0.5533, "step": 3333 }, { "epoch": 0.5772535439887458, "grad_norm": 1.296875, "learning_rate": 1.8310323780156873e-05, "loss": 0.5674, "step": 3334 }, { "epoch": 0.5774266854236555, "grad_norm": 1.3515625, "learning_rate": 1.830930916915977e-05, "loss": 0.5973, "step": 3335 }, { "epoch": 0.577599826858565, "grad_norm": 1.2421875, "learning_rate": 1.8308294281757476e-05, "loss": 0.5887, "step": 3336 }, { "epoch": 0.5777729682934747, "grad_norm": 1.328125, "learning_rate": 1.8307279117983744e-05, "loss": 0.5581, "step": 3337 }, { "epoch": 0.5779461097283843, "grad_norm": 1.46875, "learning_rate": 1.8306263677872343e-05, "loss": 0.6345, "step": 3338 }, { "epoch": 0.578119251163294, "grad_norm": 1.28125, "learning_rate": 1.8305247961457057e-05, "loss": 0.5573, "step": 3339 }, { "epoch": 0.5782923925982036, "grad_norm": 1.2890625, "learning_rate": 1.8304231968771668e-05, "loss": 0.5619, "step": 3340 }, { "epoch": 0.5784655340331133, "grad_norm": 1.421875, "learning_rate": 1.8303215699849977e-05, "loss": 0.607, "step": 3341 }, { "epoch": 0.578638675468023, "grad_norm": 1.3203125, "learning_rate": 1.8302199154725784e-05, "loss": 0.5951, "step": 3342 }, { "epoch": 0.5788118169029326, "grad_norm": 1.421875, "learning_rate": 1.830118233343291e-05, "loss": 0.6358, "step": 3343 }, { "epoch": 0.5789849583378422, "grad_norm": 1.515625, "learning_rate": 1.8300165236005174e-05, "loss": 0.5914, "step": 3344 }, { "epoch": 0.5791580997727519, "grad_norm": 1.453125, "learning_rate": 1.8299147862476415e-05, "loss": 0.638, "step": 3345 }, { "epoch": 0.5793312412076616, "grad_norm": 1.4375, "learning_rate": 1.829813021288047e-05, "loss": 0.5602, "step": 3346 }, { "epoch": 0.5795043826425712, "grad_norm": 1.3515625, "learning_rate": 1.8297112287251194e-05, "loss": 0.5957, "step": 3347 }, { "epoch": 0.5796775240774807, "grad_norm": 1.3203125, "learning_rate": 1.8296094085622444e-05, "loss": 0.6639, "step": 3348 }, { "epoch": 0.5798506655123904, "grad_norm": 1.3828125, "learning_rate": 1.8295075608028092e-05, "loss": 0.6183, "step": 3349 }, { "epoch": 0.5800238069473, "grad_norm": 1.4453125, "learning_rate": 1.829405685450202e-05, "loss": 0.5766, "step": 3350 }, { "epoch": 0.5801969483822097, "grad_norm": 1.3984375, "learning_rate": 1.829303782507811e-05, "loss": 0.6145, "step": 3351 }, { "epoch": 0.5803700898171194, "grad_norm": 1.3515625, "learning_rate": 1.829201851979027e-05, "loss": 0.6555, "step": 3352 }, { "epoch": 0.580543231252029, "grad_norm": 1.3515625, "learning_rate": 1.8290998938672394e-05, "loss": 0.6384, "step": 3353 }, { "epoch": 0.5807163726869387, "grad_norm": 1.40625, "learning_rate": 1.8289979081758407e-05, "loss": 0.5714, "step": 3354 }, { "epoch": 0.5808895141218483, "grad_norm": 1.4921875, "learning_rate": 1.8288958949082232e-05, "loss": 0.5519, "step": 3355 }, { "epoch": 0.581062655556758, "grad_norm": 1.3203125, "learning_rate": 1.82879385406778e-05, "loss": 0.5911, "step": 3356 }, { "epoch": 0.5812357969916676, "grad_norm": 1.328125, "learning_rate": 1.828691785657906e-05, "loss": 0.5971, "step": 3357 }, { "epoch": 0.5814089384265773, "grad_norm": 1.390625, "learning_rate": 1.8285896896819958e-05, "loss": 0.6185, "step": 3358 }, { "epoch": 0.5815820798614868, "grad_norm": 1.5, "learning_rate": 1.828487566143446e-05, "loss": 0.5687, "step": 3359 }, { "epoch": 0.5817552212963965, "grad_norm": 1.28125, "learning_rate": 1.8283854150456535e-05, "loss": 0.5565, "step": 3360 }, { "epoch": 0.5819283627313061, "grad_norm": 1.390625, "learning_rate": 1.8282832363920165e-05, "loss": 0.6542, "step": 3361 }, { "epoch": 0.5821015041662158, "grad_norm": 1.46875, "learning_rate": 1.8281810301859336e-05, "loss": 0.594, "step": 3362 }, { "epoch": 0.5822746456011254, "grad_norm": 1.390625, "learning_rate": 1.828078796430805e-05, "loss": 0.5945, "step": 3363 }, { "epoch": 0.5824477870360351, "grad_norm": 1.2578125, "learning_rate": 1.8279765351300316e-05, "loss": 0.5726, "step": 3364 }, { "epoch": 0.5826209284709447, "grad_norm": 1.2890625, "learning_rate": 1.8278742462870146e-05, "loss": 0.5594, "step": 3365 }, { "epoch": 0.5827940699058544, "grad_norm": 1.328125, "learning_rate": 1.827771929905157e-05, "loss": 0.5394, "step": 3366 }, { "epoch": 0.582967211340764, "grad_norm": 1.375, "learning_rate": 1.8276695859878616e-05, "loss": 0.6222, "step": 3367 }, { "epoch": 0.5831403527756737, "grad_norm": 1.375, "learning_rate": 1.8275672145385333e-05, "loss": 0.6525, "step": 3368 }, { "epoch": 0.5833134942105833, "grad_norm": 1.296875, "learning_rate": 1.8274648155605777e-05, "loss": 0.6206, "step": 3369 }, { "epoch": 0.5834866356454929, "grad_norm": 1.515625, "learning_rate": 1.8273623890574012e-05, "loss": 0.7827, "step": 3370 }, { "epoch": 0.5836597770804025, "grad_norm": 1.3984375, "learning_rate": 1.82725993503241e-05, "loss": 0.6643, "step": 3371 }, { "epoch": 0.5838329185153122, "grad_norm": 1.3515625, "learning_rate": 1.827157453489013e-05, "loss": 0.5508, "step": 3372 }, { "epoch": 0.5840060599502218, "grad_norm": 1.359375, "learning_rate": 1.827054944430619e-05, "loss": 0.5992, "step": 3373 }, { "epoch": 0.5841792013851315, "grad_norm": 1.28125, "learning_rate": 1.8269524078606376e-05, "loss": 0.5949, "step": 3374 }, { "epoch": 0.5843523428200411, "grad_norm": 1.328125, "learning_rate": 1.8268498437824802e-05, "loss": 0.5972, "step": 3375 }, { "epoch": 0.5845254842549508, "grad_norm": 1.484375, "learning_rate": 1.826747252199558e-05, "loss": 0.6334, "step": 3376 }, { "epoch": 0.5846986256898604, "grad_norm": 1.3359375, "learning_rate": 1.826644633115284e-05, "loss": 0.609, "step": 3377 }, { "epoch": 0.5848717671247701, "grad_norm": 1.40625, "learning_rate": 1.8265419865330717e-05, "loss": 0.6075, "step": 3378 }, { "epoch": 0.5850449085596797, "grad_norm": 1.359375, "learning_rate": 1.826439312456336e-05, "loss": 0.6287, "step": 3379 }, { "epoch": 0.5852180499945894, "grad_norm": 1.4609375, "learning_rate": 1.826336610888491e-05, "loss": 0.5778, "step": 3380 }, { "epoch": 0.585391191429499, "grad_norm": 1.3046875, "learning_rate": 1.8262338818329542e-05, "loss": 0.5013, "step": 3381 }, { "epoch": 0.5855643328644086, "grad_norm": 1.34375, "learning_rate": 1.826131125293143e-05, "loss": 0.6235, "step": 3382 }, { "epoch": 0.5857374742993182, "grad_norm": 1.34375, "learning_rate": 1.8260283412724744e-05, "loss": 0.6069, "step": 3383 }, { "epoch": 0.5859106157342279, "grad_norm": 1.359375, "learning_rate": 1.8259255297743682e-05, "loss": 0.658, "step": 3384 }, { "epoch": 0.5860837571691375, "grad_norm": 1.296875, "learning_rate": 1.825822690802245e-05, "loss": 0.5203, "step": 3385 }, { "epoch": 0.5862568986040472, "grad_norm": 1.3359375, "learning_rate": 1.825719824359524e-05, "loss": 0.5577, "step": 3386 }, { "epoch": 0.5864300400389568, "grad_norm": 1.34375, "learning_rate": 1.8256169304496288e-05, "loss": 0.6203, "step": 3387 }, { "epoch": 0.5866031814738665, "grad_norm": 1.4609375, "learning_rate": 1.825514009075981e-05, "loss": 0.6537, "step": 3388 }, { "epoch": 0.5867763229087761, "grad_norm": 1.3203125, "learning_rate": 1.8254110602420047e-05, "loss": 0.5796, "step": 3389 }, { "epoch": 0.5869494643436858, "grad_norm": 1.40625, "learning_rate": 1.8253080839511242e-05, "loss": 0.6171, "step": 3390 }, { "epoch": 0.5871226057785954, "grad_norm": 1.2890625, "learning_rate": 1.825205080206765e-05, "loss": 0.5362, "step": 3391 }, { "epoch": 0.5872957472135051, "grad_norm": 1.375, "learning_rate": 1.8251020490123536e-05, "loss": 0.6188, "step": 3392 }, { "epoch": 0.5874688886484146, "grad_norm": 1.34375, "learning_rate": 1.824998990371317e-05, "loss": 0.5821, "step": 3393 }, { "epoch": 0.5876420300833243, "grad_norm": 1.296875, "learning_rate": 1.8248959042870837e-05, "loss": 0.6233, "step": 3394 }, { "epoch": 0.5878151715182339, "grad_norm": 2.984375, "learning_rate": 1.824792790763083e-05, "loss": 0.5267, "step": 3395 }, { "epoch": 0.5879883129531436, "grad_norm": 1.359375, "learning_rate": 1.8246896498027443e-05, "loss": 0.6289, "step": 3396 }, { "epoch": 0.5881614543880532, "grad_norm": 1.3046875, "learning_rate": 1.824586481409499e-05, "loss": 0.6162, "step": 3397 }, { "epoch": 0.5883345958229629, "grad_norm": 1.2890625, "learning_rate": 1.8244832855867788e-05, "loss": 0.6011, "step": 3398 }, { "epoch": 0.5885077372578725, "grad_norm": 1.25, "learning_rate": 1.8243800623380162e-05, "loss": 0.5404, "step": 3399 }, { "epoch": 0.5886808786927822, "grad_norm": 1.25, "learning_rate": 1.8242768116666452e-05, "loss": 0.5308, "step": 3400 }, { "epoch": 0.5888540201276918, "grad_norm": 1.3359375, "learning_rate": 1.824173533576101e-05, "loss": 0.6436, "step": 3401 }, { "epoch": 0.5890271615626015, "grad_norm": 1.3203125, "learning_rate": 1.8240702280698176e-05, "loss": 0.5807, "step": 3402 }, { "epoch": 0.5892003029975111, "grad_norm": 1.28125, "learning_rate": 1.8239668951512323e-05, "loss": 0.6408, "step": 3403 }, { "epoch": 0.5893734444324207, "grad_norm": 1.3515625, "learning_rate": 1.823863534823783e-05, "loss": 0.6247, "step": 3404 }, { "epoch": 0.5895465858673303, "grad_norm": 1.3046875, "learning_rate": 1.8237601470909067e-05, "loss": 0.6025, "step": 3405 }, { "epoch": 0.58971972730224, "grad_norm": 1.53125, "learning_rate": 1.8236567319560432e-05, "loss": 0.6112, "step": 3406 }, { "epoch": 0.5898928687371496, "grad_norm": 1.328125, "learning_rate": 1.8235532894226326e-05, "loss": 0.5908, "step": 3407 }, { "epoch": 0.5900660101720593, "grad_norm": 1.3828125, "learning_rate": 1.8234498194941157e-05, "loss": 0.5855, "step": 3408 }, { "epoch": 0.5902391516069689, "grad_norm": 1.3203125, "learning_rate": 1.8233463221739344e-05, "loss": 0.5855, "step": 3409 }, { "epoch": 0.5904122930418786, "grad_norm": 1.421875, "learning_rate": 1.8232427974655316e-05, "loss": 0.6234, "step": 3410 }, { "epoch": 0.5905854344767882, "grad_norm": 1.3046875, "learning_rate": 1.823139245372351e-05, "loss": 0.5659, "step": 3411 }, { "epoch": 0.5907585759116979, "grad_norm": 1.2578125, "learning_rate": 1.823035665897837e-05, "loss": 0.5902, "step": 3412 }, { "epoch": 0.5909317173466075, "grad_norm": 1.265625, "learning_rate": 1.822932059045435e-05, "loss": 0.6541, "step": 3413 }, { "epoch": 0.5911048587815172, "grad_norm": 1.25, "learning_rate": 1.8228284248185918e-05, "loss": 0.5016, "step": 3414 }, { "epoch": 0.5912780002164268, "grad_norm": 1.421875, "learning_rate": 1.822724763220755e-05, "loss": 0.6345, "step": 3415 }, { "epoch": 0.5914511416513364, "grad_norm": 1.3671875, "learning_rate": 1.822621074255372e-05, "loss": 0.595, "step": 3416 }, { "epoch": 0.591624283086246, "grad_norm": 1.3046875, "learning_rate": 1.8225173579258927e-05, "loss": 0.5641, "step": 3417 }, { "epoch": 0.5917974245211557, "grad_norm": 1.4375, "learning_rate": 1.822413614235767e-05, "loss": 0.5831, "step": 3418 }, { "epoch": 0.5919705659560653, "grad_norm": 1.3359375, "learning_rate": 1.8223098431884457e-05, "loss": 0.5786, "step": 3419 }, { "epoch": 0.592143707390975, "grad_norm": 1.3125, "learning_rate": 1.8222060447873807e-05, "loss": 0.6302, "step": 3420 }, { "epoch": 0.5923168488258846, "grad_norm": 1.3984375, "learning_rate": 1.822102219036025e-05, "loss": 0.5816, "step": 3421 }, { "epoch": 0.5924899902607943, "grad_norm": 1.34375, "learning_rate": 1.821998365937832e-05, "loss": 0.5666, "step": 3422 }, { "epoch": 0.592663131695704, "grad_norm": 1.3984375, "learning_rate": 1.8218944854962568e-05, "loss": 0.6169, "step": 3423 }, { "epoch": 0.5928362731306136, "grad_norm": 1.3984375, "learning_rate": 1.8217905777147543e-05, "loss": 0.5854, "step": 3424 }, { "epoch": 0.5930094145655233, "grad_norm": 1.3828125, "learning_rate": 1.821686642596782e-05, "loss": 0.607, "step": 3425 }, { "epoch": 0.5931825560004329, "grad_norm": 1.5234375, "learning_rate": 1.8215826801457957e-05, "loss": 0.6314, "step": 3426 }, { "epoch": 0.5933556974353424, "grad_norm": 1.5078125, "learning_rate": 1.8214786903652552e-05, "loss": 0.6275, "step": 3427 }, { "epoch": 0.5935288388702521, "grad_norm": 1.3359375, "learning_rate": 1.8213746732586186e-05, "loss": 0.5989, "step": 3428 }, { "epoch": 0.5937019803051617, "grad_norm": 1.5390625, "learning_rate": 1.8212706288293463e-05, "loss": 0.7062, "step": 3429 }, { "epoch": 0.5938751217400714, "grad_norm": 1.2890625, "learning_rate": 1.8211665570809e-05, "loss": 0.5923, "step": 3430 }, { "epoch": 0.594048263174981, "grad_norm": 1.3359375, "learning_rate": 1.82106245801674e-05, "loss": 0.6217, "step": 3431 }, { "epoch": 0.5942214046098907, "grad_norm": 1.3203125, "learning_rate": 1.820958331640331e-05, "loss": 0.5656, "step": 3432 }, { "epoch": 0.5943945460448004, "grad_norm": 1.4296875, "learning_rate": 1.8208541779551353e-05, "loss": 0.6696, "step": 3433 }, { "epoch": 0.59456768747971, "grad_norm": 1.671875, "learning_rate": 1.820749996964618e-05, "loss": 0.7356, "step": 3434 }, { "epoch": 0.5947408289146197, "grad_norm": 1.3984375, "learning_rate": 1.820645788672245e-05, "loss": 0.5734, "step": 3435 }, { "epoch": 0.5949139703495293, "grad_norm": 1.34375, "learning_rate": 1.8205415530814817e-05, "loss": 0.6513, "step": 3436 }, { "epoch": 0.595087111784439, "grad_norm": 1.2265625, "learning_rate": 1.8204372901957967e-05, "loss": 0.5401, "step": 3437 }, { "epoch": 0.5952602532193485, "grad_norm": 1.40625, "learning_rate": 1.8203330000186572e-05, "loss": 0.5861, "step": 3438 }, { "epoch": 0.5954333946542582, "grad_norm": 1.3203125, "learning_rate": 1.820228682553533e-05, "loss": 0.5714, "step": 3439 }, { "epoch": 0.5956065360891678, "grad_norm": 1.4296875, "learning_rate": 1.820124337803894e-05, "loss": 0.6346, "step": 3440 }, { "epoch": 0.5957796775240775, "grad_norm": 1.359375, "learning_rate": 1.8200199657732115e-05, "loss": 0.5989, "step": 3441 }, { "epoch": 0.5959528189589871, "grad_norm": 1.2734375, "learning_rate": 1.819915566464957e-05, "loss": 0.603, "step": 3442 }, { "epoch": 0.5961259603938968, "grad_norm": 1.3828125, "learning_rate": 1.819811139882603e-05, "loss": 0.5693, "step": 3443 }, { "epoch": 0.5962991018288064, "grad_norm": 1.3125, "learning_rate": 1.819706686029624e-05, "loss": 0.6542, "step": 3444 }, { "epoch": 0.5964722432637161, "grad_norm": 1.3203125, "learning_rate": 1.8196022049094938e-05, "loss": 0.5602, "step": 3445 }, { "epoch": 0.5966453846986257, "grad_norm": 1.3125, "learning_rate": 1.8194976965256882e-05, "loss": 0.6225, "step": 3446 }, { "epoch": 0.5968185261335354, "grad_norm": 1.3125, "learning_rate": 1.819393160881684e-05, "loss": 0.6223, "step": 3447 }, { "epoch": 0.596991667568445, "grad_norm": 1.234375, "learning_rate": 1.819288597980958e-05, "loss": 0.5801, "step": 3448 }, { "epoch": 0.5971648090033547, "grad_norm": 1.3828125, "learning_rate": 1.819184007826989e-05, "loss": 0.6053, "step": 3449 }, { "epoch": 0.5973379504382642, "grad_norm": 1.2265625, "learning_rate": 1.8190793904232557e-05, "loss": 0.5586, "step": 3450 }, { "epoch": 0.5975110918731739, "grad_norm": 1.328125, "learning_rate": 1.8189747457732378e-05, "loss": 0.5521, "step": 3451 }, { "epoch": 0.5976842333080835, "grad_norm": 1.453125, "learning_rate": 1.8188700738804174e-05, "loss": 0.5684, "step": 3452 }, { "epoch": 0.5978573747429932, "grad_norm": 1.421875, "learning_rate": 1.818765374748275e-05, "loss": 0.5699, "step": 3453 }, { "epoch": 0.5980305161779028, "grad_norm": 1.390625, "learning_rate": 1.8186606483802945e-05, "loss": 0.6118, "step": 3454 }, { "epoch": 0.5982036576128125, "grad_norm": 1.234375, "learning_rate": 1.818555894779959e-05, "loss": 0.5963, "step": 3455 }, { "epoch": 0.5983767990477221, "grad_norm": 1.3671875, "learning_rate": 1.8184511139507535e-05, "loss": 0.6281, "step": 3456 }, { "epoch": 0.5985499404826318, "grad_norm": 1.53125, "learning_rate": 1.8183463058961626e-05, "loss": 0.5744, "step": 3457 }, { "epoch": 0.5987230819175414, "grad_norm": 1.3671875, "learning_rate": 1.8182414706196737e-05, "loss": 0.5777, "step": 3458 }, { "epoch": 0.5988962233524511, "grad_norm": 1.3828125, "learning_rate": 1.8181366081247734e-05, "loss": 0.5824, "step": 3459 }, { "epoch": 0.5990693647873607, "grad_norm": 1.71875, "learning_rate": 1.8180317184149502e-05, "loss": 0.7271, "step": 3460 }, { "epoch": 0.5992425062222703, "grad_norm": 1.3984375, "learning_rate": 1.817926801493693e-05, "loss": 0.6625, "step": 3461 }, { "epoch": 0.5994156476571799, "grad_norm": 1.3125, "learning_rate": 1.8178218573644923e-05, "loss": 0.5335, "step": 3462 }, { "epoch": 0.5995887890920896, "grad_norm": 1.2578125, "learning_rate": 1.8177168860308388e-05, "loss": 0.5347, "step": 3463 }, { "epoch": 0.5997619305269992, "grad_norm": 1.359375, "learning_rate": 1.817611887496224e-05, "loss": 0.6011, "step": 3464 }, { "epoch": 0.5999350719619089, "grad_norm": 1.3515625, "learning_rate": 1.817506861764141e-05, "loss": 0.5762, "step": 3465 }, { "epoch": 0.6001082133968185, "grad_norm": 1.296875, "learning_rate": 1.817401808838083e-05, "loss": 0.5612, "step": 3466 }, { "epoch": 0.6002813548317282, "grad_norm": 1.328125, "learning_rate": 1.817296728721545e-05, "loss": 0.5783, "step": 3467 }, { "epoch": 0.6004544962666378, "grad_norm": 1.2265625, "learning_rate": 1.8171916214180217e-05, "loss": 0.4927, "step": 3468 }, { "epoch": 0.6006276377015475, "grad_norm": 1.515625, "learning_rate": 1.8170864869310108e-05, "loss": 0.6087, "step": 3469 }, { "epoch": 0.6008007791364571, "grad_norm": 1.359375, "learning_rate": 1.8169813252640084e-05, "loss": 0.6167, "step": 3470 }, { "epoch": 0.6009739205713668, "grad_norm": 1.34375, "learning_rate": 1.816876136420513e-05, "loss": 0.6414, "step": 3471 }, { "epoch": 0.6011470620062763, "grad_norm": 1.4609375, "learning_rate": 1.8167709204040232e-05, "loss": 0.6245, "step": 3472 }, { "epoch": 0.601320203441186, "grad_norm": 1.328125, "learning_rate": 1.81666567721804e-05, "loss": 0.5979, "step": 3473 }, { "epoch": 0.6014933448760956, "grad_norm": 1.5, "learning_rate": 1.8165604068660635e-05, "loss": 0.6626, "step": 3474 }, { "epoch": 0.6016664863110053, "grad_norm": 1.296875, "learning_rate": 1.816455109351596e-05, "loss": 0.5573, "step": 3475 }, { "epoch": 0.6018396277459149, "grad_norm": 1.3671875, "learning_rate": 1.8163497846781394e-05, "loss": 0.6668, "step": 3476 }, { "epoch": 0.6020127691808246, "grad_norm": 1.390625, "learning_rate": 1.816244432849198e-05, "loss": 0.5621, "step": 3477 }, { "epoch": 0.6021859106157342, "grad_norm": 1.2734375, "learning_rate": 1.8161390538682754e-05, "loss": 0.534, "step": 3478 }, { "epoch": 0.6023590520506439, "grad_norm": 1.3984375, "learning_rate": 1.8160336477388777e-05, "loss": 0.6131, "step": 3479 }, { "epoch": 0.6025321934855535, "grad_norm": 1.390625, "learning_rate": 1.815928214464511e-05, "loss": 0.5796, "step": 3480 }, { "epoch": 0.6027053349204632, "grad_norm": 1.34375, "learning_rate": 1.815822754048683e-05, "loss": 0.5378, "step": 3481 }, { "epoch": 0.6028784763553728, "grad_norm": 1.359375, "learning_rate": 1.815717266494901e-05, "loss": 0.567, "step": 3482 }, { "epoch": 0.6030516177902825, "grad_norm": 1.296875, "learning_rate": 1.8156117518066744e-05, "loss": 0.6326, "step": 3483 }, { "epoch": 0.603224759225192, "grad_norm": 1.3203125, "learning_rate": 1.815506209987513e-05, "loss": 0.5673, "step": 3484 }, { "epoch": 0.6033979006601017, "grad_norm": 1.3125, "learning_rate": 1.8154006410409275e-05, "loss": 0.591, "step": 3485 }, { "epoch": 0.6035710420950113, "grad_norm": 1.4140625, "learning_rate": 1.8152950449704297e-05, "loss": 0.5829, "step": 3486 }, { "epoch": 0.603744183529921, "grad_norm": 1.3203125, "learning_rate": 1.815189421779532e-05, "loss": 0.5423, "step": 3487 }, { "epoch": 0.6039173249648306, "grad_norm": 1.4453125, "learning_rate": 1.8150837714717484e-05, "loss": 0.557, "step": 3488 }, { "epoch": 0.6040904663997403, "grad_norm": 1.34375, "learning_rate": 1.814978094050593e-05, "loss": 0.6077, "step": 3489 }, { "epoch": 0.6042636078346499, "grad_norm": 1.234375, "learning_rate": 1.814872389519581e-05, "loss": 0.6337, "step": 3490 }, { "epoch": 0.6044367492695596, "grad_norm": 1.2578125, "learning_rate": 1.8147666578822285e-05, "loss": 0.5684, "step": 3491 }, { "epoch": 0.6046098907044692, "grad_norm": 1.6484375, "learning_rate": 1.8146608991420533e-05, "loss": 0.7069, "step": 3492 }, { "epoch": 0.6047830321393789, "grad_norm": 1.3359375, "learning_rate": 1.814555113302573e-05, "loss": 0.5796, "step": 3493 }, { "epoch": 0.6049561735742885, "grad_norm": 1.3671875, "learning_rate": 1.814449300367306e-05, "loss": 0.6438, "step": 3494 }, { "epoch": 0.6051293150091981, "grad_norm": 1.3203125, "learning_rate": 1.8143434603397726e-05, "loss": 0.6058, "step": 3495 }, { "epoch": 0.6053024564441077, "grad_norm": 1.25, "learning_rate": 1.814237593223494e-05, "loss": 0.5954, "step": 3496 }, { "epoch": 0.6054755978790174, "grad_norm": 1.5234375, "learning_rate": 1.814131699021991e-05, "loss": 0.624, "step": 3497 }, { "epoch": 0.605648739313927, "grad_norm": 1.390625, "learning_rate": 1.8140257777387864e-05, "loss": 0.582, "step": 3498 }, { "epoch": 0.6058218807488367, "grad_norm": 1.3359375, "learning_rate": 1.8139198293774037e-05, "loss": 0.6196, "step": 3499 }, { "epoch": 0.6059950221837463, "grad_norm": 1.484375, "learning_rate": 1.8138138539413673e-05, "loss": 0.6591, "step": 3500 }, { "epoch": 0.606168163618656, "grad_norm": 1.3671875, "learning_rate": 1.8137078514342024e-05, "loss": 0.6146, "step": 3501 }, { "epoch": 0.6063413050535656, "grad_norm": 1.3125, "learning_rate": 1.8136018218594348e-05, "loss": 0.59, "step": 3502 }, { "epoch": 0.6065144464884753, "grad_norm": 1.2890625, "learning_rate": 1.8134957652205917e-05, "loss": 0.5968, "step": 3503 }, { "epoch": 0.606687587923385, "grad_norm": 1.3828125, "learning_rate": 1.813389681521201e-05, "loss": 0.5985, "step": 3504 }, { "epoch": 0.6068607293582946, "grad_norm": 1.453125, "learning_rate": 1.8132835707647922e-05, "loss": 0.6381, "step": 3505 }, { "epoch": 0.6070338707932041, "grad_norm": 1.359375, "learning_rate": 1.813177432954894e-05, "loss": 0.643, "step": 3506 }, { "epoch": 0.6072070122281138, "grad_norm": 1.5, "learning_rate": 1.8130712680950375e-05, "loss": 0.5755, "step": 3507 }, { "epoch": 0.6073801536630234, "grad_norm": 1.3203125, "learning_rate": 1.8129650761887542e-05, "loss": 0.6023, "step": 3508 }, { "epoch": 0.6075532950979331, "grad_norm": 1.328125, "learning_rate": 1.8128588572395763e-05, "loss": 0.6488, "step": 3509 }, { "epoch": 0.6077264365328428, "grad_norm": 1.34375, "learning_rate": 1.8127526112510377e-05, "loss": 0.6039, "step": 3510 }, { "epoch": 0.6078995779677524, "grad_norm": 1.3984375, "learning_rate": 1.812646338226672e-05, "loss": 0.5577, "step": 3511 }, { "epoch": 0.608072719402662, "grad_norm": 1.484375, "learning_rate": 1.8125400381700145e-05, "loss": 0.6288, "step": 3512 }, { "epoch": 0.6082458608375717, "grad_norm": 1.3203125, "learning_rate": 1.8124337110846014e-05, "loss": 0.7606, "step": 3513 }, { "epoch": 0.6084190022724814, "grad_norm": 1.28125, "learning_rate": 1.8123273569739696e-05, "loss": 0.6124, "step": 3514 }, { "epoch": 0.608592143707391, "grad_norm": 1.359375, "learning_rate": 1.8122209758416566e-05, "loss": 0.5872, "step": 3515 }, { "epoch": 0.6087652851423007, "grad_norm": 1.390625, "learning_rate": 1.8121145676912016e-05, "loss": 0.5856, "step": 3516 }, { "epoch": 0.6089384265772103, "grad_norm": 1.375, "learning_rate": 1.8120081325261435e-05, "loss": 0.5937, "step": 3517 }, { "epoch": 0.6091115680121199, "grad_norm": 1.3984375, "learning_rate": 1.8119016703500237e-05, "loss": 0.5427, "step": 3518 }, { "epoch": 0.6092847094470295, "grad_norm": 1.2890625, "learning_rate": 1.811795181166383e-05, "loss": 0.5927, "step": 3519 }, { "epoch": 0.6094578508819392, "grad_norm": 1.328125, "learning_rate": 1.811688664978764e-05, "loss": 0.6629, "step": 3520 }, { "epoch": 0.6096309923168488, "grad_norm": 1.3125, "learning_rate": 1.8115821217907097e-05, "loss": 0.6954, "step": 3521 }, { "epoch": 0.6098041337517585, "grad_norm": 1.359375, "learning_rate": 1.8114755516057644e-05, "loss": 0.6256, "step": 3522 }, { "epoch": 0.6099772751866681, "grad_norm": 1.3828125, "learning_rate": 1.811368954427473e-05, "loss": 0.5483, "step": 3523 }, { "epoch": 0.6101504166215778, "grad_norm": 1.390625, "learning_rate": 1.8112623302593813e-05, "loss": 0.5998, "step": 3524 }, { "epoch": 0.6103235580564874, "grad_norm": 1.40625, "learning_rate": 1.8111556791050362e-05, "loss": 0.5913, "step": 3525 }, { "epoch": 0.6104966994913971, "grad_norm": 1.34375, "learning_rate": 1.8110490009679857e-05, "loss": 0.5815, "step": 3526 }, { "epoch": 0.6106698409263067, "grad_norm": 1.4296875, "learning_rate": 1.8109422958517777e-05, "loss": 0.5755, "step": 3527 }, { "epoch": 0.6108429823612164, "grad_norm": 1.4140625, "learning_rate": 1.8108355637599624e-05, "loss": 0.6625, "step": 3528 }, { "epoch": 0.6110161237961259, "grad_norm": 1.3828125, "learning_rate": 1.81072880469609e-05, "loss": 0.5871, "step": 3529 }, { "epoch": 0.6111892652310356, "grad_norm": 1.3984375, "learning_rate": 1.8106220186637114e-05, "loss": 0.5952, "step": 3530 }, { "epoch": 0.6113624066659452, "grad_norm": 1.375, "learning_rate": 1.8105152056663796e-05, "loss": 0.5871, "step": 3531 }, { "epoch": 0.6115355481008549, "grad_norm": 1.40625, "learning_rate": 1.8104083657076466e-05, "loss": 0.6122, "step": 3532 }, { "epoch": 0.6117086895357645, "grad_norm": 1.4140625, "learning_rate": 1.8103014987910672e-05, "loss": 0.5645, "step": 3533 }, { "epoch": 0.6118818309706742, "grad_norm": 1.375, "learning_rate": 1.8101946049201958e-05, "loss": 0.5754, "step": 3534 }, { "epoch": 0.6120549724055838, "grad_norm": 1.34375, "learning_rate": 1.8100876840985887e-05, "loss": 0.5927, "step": 3535 }, { "epoch": 0.6122281138404935, "grad_norm": 1.46875, "learning_rate": 1.8099807363298023e-05, "loss": 0.6956, "step": 3536 }, { "epoch": 0.6124012552754031, "grad_norm": 1.3359375, "learning_rate": 1.809873761617394e-05, "loss": 0.5907, "step": 3537 }, { "epoch": 0.6125743967103128, "grad_norm": 1.4375, "learning_rate": 1.8097667599649224e-05, "loss": 0.5743, "step": 3538 }, { "epoch": 0.6127475381452224, "grad_norm": 1.34375, "learning_rate": 1.8096597313759466e-05, "loss": 0.6014, "step": 3539 }, { "epoch": 0.612920679580132, "grad_norm": 1.21875, "learning_rate": 1.8095526758540275e-05, "loss": 0.5548, "step": 3540 }, { "epoch": 0.6130938210150416, "grad_norm": 1.3203125, "learning_rate": 1.8094455934027257e-05, "loss": 0.6129, "step": 3541 }, { "epoch": 0.6132669624499513, "grad_norm": 1.390625, "learning_rate": 1.8093384840256035e-05, "loss": 0.5824, "step": 3542 }, { "epoch": 0.6134401038848609, "grad_norm": 1.25, "learning_rate": 1.8092313477262234e-05, "loss": 0.5435, "step": 3543 }, { "epoch": 0.6136132453197706, "grad_norm": 1.296875, "learning_rate": 1.80912418450815e-05, "loss": 0.6022, "step": 3544 }, { "epoch": 0.6137863867546802, "grad_norm": 1.296875, "learning_rate": 1.8090169943749477e-05, "loss": 0.5975, "step": 3545 }, { "epoch": 0.6139595281895899, "grad_norm": 1.28125, "learning_rate": 1.8089097773301817e-05, "loss": 0.5958, "step": 3546 }, { "epoch": 0.6141326696244995, "grad_norm": 1.3203125, "learning_rate": 1.808802533377419e-05, "loss": 0.6286, "step": 3547 }, { "epoch": 0.6143058110594092, "grad_norm": 1.40625, "learning_rate": 1.808695262520227e-05, "loss": 0.6293, "step": 3548 }, { "epoch": 0.6144789524943188, "grad_norm": 1.328125, "learning_rate": 1.8085879647621735e-05, "loss": 0.5908, "step": 3549 }, { "epoch": 0.6146520939292285, "grad_norm": 1.3515625, "learning_rate": 1.8084806401068288e-05, "loss": 0.5578, "step": 3550 }, { "epoch": 0.6148252353641381, "grad_norm": 1.359375, "learning_rate": 1.8083732885577617e-05, "loss": 0.5807, "step": 3551 }, { "epoch": 0.6149983767990477, "grad_norm": 1.375, "learning_rate": 1.808265910118544e-05, "loss": 0.5803, "step": 3552 }, { "epoch": 0.6151715182339573, "grad_norm": 1.3828125, "learning_rate": 1.8081585047927474e-05, "loss": 0.5954, "step": 3553 }, { "epoch": 0.615344659668867, "grad_norm": 1.40625, "learning_rate": 1.8080510725839448e-05, "loss": 0.5549, "step": 3554 }, { "epoch": 0.6155178011037766, "grad_norm": 1.34375, "learning_rate": 1.80794361349571e-05, "loss": 0.5241, "step": 3555 }, { "epoch": 0.6156909425386863, "grad_norm": 1.453125, "learning_rate": 1.8078361275316167e-05, "loss": 0.6297, "step": 3556 }, { "epoch": 0.6158640839735959, "grad_norm": 1.453125, "learning_rate": 1.8077286146952416e-05, "loss": 0.6558, "step": 3557 }, { "epoch": 0.6160372254085056, "grad_norm": 1.4140625, "learning_rate": 1.80762107499016e-05, "loss": 0.5262, "step": 3558 }, { "epoch": 0.6162103668434152, "grad_norm": 1.40625, "learning_rate": 1.80751350841995e-05, "loss": 0.6573, "step": 3559 }, { "epoch": 0.6163835082783249, "grad_norm": 1.3984375, "learning_rate": 1.8074059149881895e-05, "loss": 0.5939, "step": 3560 }, { "epoch": 0.6165566497132345, "grad_norm": 1.28125, "learning_rate": 1.8072982946984572e-05, "loss": 0.6057, "step": 3561 }, { "epoch": 0.6167297911481442, "grad_norm": 1.40625, "learning_rate": 1.8071906475543333e-05, "loss": 0.6397, "step": 3562 }, { "epoch": 0.6169029325830537, "grad_norm": 1.390625, "learning_rate": 1.8070829735593985e-05, "loss": 0.6233, "step": 3563 }, { "epoch": 0.6170760740179634, "grad_norm": 1.3671875, "learning_rate": 1.8069752727172347e-05, "loss": 0.6136, "step": 3564 }, { "epoch": 0.617249215452873, "grad_norm": 1.3515625, "learning_rate": 1.8068675450314245e-05, "loss": 0.6394, "step": 3565 }, { "epoch": 0.6174223568877827, "grad_norm": 1.3671875, "learning_rate": 1.8067597905055515e-05, "loss": 0.5949, "step": 3566 }, { "epoch": 0.6175954983226923, "grad_norm": 1.4375, "learning_rate": 1.8066520091431998e-05, "loss": 0.568, "step": 3567 }, { "epoch": 0.617768639757602, "grad_norm": 1.4140625, "learning_rate": 1.8065442009479545e-05, "loss": 0.6306, "step": 3568 }, { "epoch": 0.6179417811925116, "grad_norm": 1.3046875, "learning_rate": 1.8064363659234025e-05, "loss": 0.6047, "step": 3569 }, { "epoch": 0.6181149226274213, "grad_norm": 1.390625, "learning_rate": 1.8063285040731307e-05, "loss": 0.586, "step": 3570 }, { "epoch": 0.6182880640623309, "grad_norm": 1.3515625, "learning_rate": 1.8062206154007267e-05, "loss": 0.5942, "step": 3571 }, { "epoch": 0.6184612054972406, "grad_norm": 1.3828125, "learning_rate": 1.8061126999097794e-05, "loss": 0.6058, "step": 3572 }, { "epoch": 0.6186343469321502, "grad_norm": 1.5390625, "learning_rate": 1.8060047576038788e-05, "loss": 0.6501, "step": 3573 }, { "epoch": 0.6188074883670598, "grad_norm": 1.3046875, "learning_rate": 1.8058967884866156e-05, "loss": 0.616, "step": 3574 }, { "epoch": 0.6189806298019694, "grad_norm": 1.2890625, "learning_rate": 1.8057887925615812e-05, "loss": 0.613, "step": 3575 }, { "epoch": 0.6191537712368791, "grad_norm": 1.3359375, "learning_rate": 1.805680769832368e-05, "loss": 0.5954, "step": 3576 }, { "epoch": 0.6193269126717887, "grad_norm": 1.40625, "learning_rate": 1.805572720302569e-05, "loss": 0.6017, "step": 3577 }, { "epoch": 0.6195000541066984, "grad_norm": 1.3671875, "learning_rate": 1.8054646439757792e-05, "loss": 0.5842, "step": 3578 }, { "epoch": 0.619673195541608, "grad_norm": 1.34375, "learning_rate": 1.805356540855593e-05, "loss": 0.5637, "step": 3579 }, { "epoch": 0.6198463369765177, "grad_norm": 1.3359375, "learning_rate": 1.805248410945607e-05, "loss": 0.5203, "step": 3580 }, { "epoch": 0.6200194784114273, "grad_norm": 1.3125, "learning_rate": 1.8051402542494177e-05, "loss": 0.5191, "step": 3581 }, { "epoch": 0.620192619846337, "grad_norm": 1.3515625, "learning_rate": 1.805032070770623e-05, "loss": 0.63, "step": 3582 }, { "epoch": 0.6203657612812467, "grad_norm": 1.515625, "learning_rate": 1.8049238605128212e-05, "loss": 0.6386, "step": 3583 }, { "epoch": 0.6205389027161563, "grad_norm": 1.375, "learning_rate": 1.8048156234796124e-05, "loss": 0.6048, "step": 3584 }, { "epoch": 0.620712044151066, "grad_norm": 1.3828125, "learning_rate": 1.8047073596745968e-05, "loss": 0.6455, "step": 3585 }, { "epoch": 0.6208851855859755, "grad_norm": 1.34375, "learning_rate": 1.804599069101376e-05, "loss": 0.5467, "step": 3586 }, { "epoch": 0.6210583270208851, "grad_norm": 1.4765625, "learning_rate": 1.804490751763552e-05, "loss": 0.5811, "step": 3587 }, { "epoch": 0.6212314684557948, "grad_norm": 1.390625, "learning_rate": 1.8043824076647277e-05, "loss": 0.6419, "step": 3588 }, { "epoch": 0.6214046098907045, "grad_norm": 1.3515625, "learning_rate": 1.8042740368085074e-05, "loss": 0.5403, "step": 3589 }, { "epoch": 0.6215777513256141, "grad_norm": 1.390625, "learning_rate": 1.804165639198496e-05, "loss": 0.6208, "step": 3590 }, { "epoch": 0.6217508927605238, "grad_norm": 1.390625, "learning_rate": 1.8040572148382993e-05, "loss": 0.5413, "step": 3591 }, { "epoch": 0.6219240341954334, "grad_norm": 1.3984375, "learning_rate": 1.803948763731524e-05, "loss": 0.5571, "step": 3592 }, { "epoch": 0.6220971756303431, "grad_norm": 1.3828125, "learning_rate": 1.8038402858817776e-05, "loss": 0.6122, "step": 3593 }, { "epoch": 0.6222703170652527, "grad_norm": 1.234375, "learning_rate": 1.8037317812926685e-05, "loss": 0.5374, "step": 3594 }, { "epoch": 0.6224434585001624, "grad_norm": 1.46875, "learning_rate": 1.803623249967806e-05, "loss": 0.6325, "step": 3595 }, { "epoch": 0.622616599935072, "grad_norm": 1.421875, "learning_rate": 1.8035146919108006e-05, "loss": 0.6228, "step": 3596 }, { "epoch": 0.6227897413699816, "grad_norm": 1.40625, "learning_rate": 1.8034061071252632e-05, "loss": 0.5848, "step": 3597 }, { "epoch": 0.6229628828048912, "grad_norm": 1.3515625, "learning_rate": 1.8032974956148064e-05, "loss": 0.5565, "step": 3598 }, { "epoch": 0.6231360242398009, "grad_norm": 1.3359375, "learning_rate": 1.8031888573830422e-05, "loss": 0.5725, "step": 3599 }, { "epoch": 0.6233091656747105, "grad_norm": 1.4921875, "learning_rate": 1.803080192433585e-05, "loss": 0.6418, "step": 3600 }, { "epoch": 0.6234823071096202, "grad_norm": 1.3046875, "learning_rate": 1.8029715007700494e-05, "loss": 0.6254, "step": 3601 }, { "epoch": 0.6236554485445298, "grad_norm": 1.234375, "learning_rate": 1.8028627823960508e-05, "loss": 0.5084, "step": 3602 }, { "epoch": 0.6238285899794395, "grad_norm": 1.3359375, "learning_rate": 1.8027540373152056e-05, "loss": 0.5914, "step": 3603 }, { "epoch": 0.6240017314143491, "grad_norm": 1.34375, "learning_rate": 1.8026452655311315e-05, "loss": 0.597, "step": 3604 }, { "epoch": 0.6241748728492588, "grad_norm": 1.2734375, "learning_rate": 1.8025364670474466e-05, "loss": 0.61, "step": 3605 }, { "epoch": 0.6243480142841684, "grad_norm": 1.375, "learning_rate": 1.80242764186777e-05, "loss": 0.611, "step": 3606 }, { "epoch": 0.6245211557190781, "grad_norm": 1.3046875, "learning_rate": 1.8023187899957214e-05, "loss": 0.5642, "step": 3607 }, { "epoch": 0.6246942971539876, "grad_norm": 1.3984375, "learning_rate": 1.8022099114349223e-05, "loss": 0.6566, "step": 3608 }, { "epoch": 0.6248674385888973, "grad_norm": 1.2578125, "learning_rate": 1.802101006188994e-05, "loss": 0.5468, "step": 3609 }, { "epoch": 0.6250405800238069, "grad_norm": 1.3046875, "learning_rate": 1.8019920742615596e-05, "loss": 0.5835, "step": 3610 }, { "epoch": 0.6252137214587166, "grad_norm": 1.40625, "learning_rate": 1.8018831156562425e-05, "loss": 0.6975, "step": 3611 }, { "epoch": 0.6253868628936262, "grad_norm": 1.3671875, "learning_rate": 1.801774130376667e-05, "loss": 0.588, "step": 3612 }, { "epoch": 0.6255600043285359, "grad_norm": 1.2890625, "learning_rate": 1.8016651184264583e-05, "loss": 0.6975, "step": 3613 }, { "epoch": 0.6257331457634455, "grad_norm": 1.3046875, "learning_rate": 1.8015560798092432e-05, "loss": 0.5554, "step": 3614 }, { "epoch": 0.6259062871983552, "grad_norm": 1.2890625, "learning_rate": 1.8014470145286486e-05, "loss": 0.64, "step": 3615 }, { "epoch": 0.6260794286332648, "grad_norm": 1.3203125, "learning_rate": 1.801337922588302e-05, "loss": 0.5719, "step": 3616 }, { "epoch": 0.6262525700681745, "grad_norm": 1.2734375, "learning_rate": 1.801228803991833e-05, "loss": 0.551, "step": 3617 }, { "epoch": 0.6264257115030841, "grad_norm": 1.3203125, "learning_rate": 1.801119658742871e-05, "loss": 0.5849, "step": 3618 }, { "epoch": 0.6265988529379938, "grad_norm": 1.3203125, "learning_rate": 1.801010486845047e-05, "loss": 0.5886, "step": 3619 }, { "epoch": 0.6267719943729033, "grad_norm": 1.3984375, "learning_rate": 1.8009012883019917e-05, "loss": 0.6006, "step": 3620 }, { "epoch": 0.626945135807813, "grad_norm": 1.3828125, "learning_rate": 1.8007920631173386e-05, "loss": 0.5896, "step": 3621 }, { "epoch": 0.6271182772427226, "grad_norm": 1.3125, "learning_rate": 1.8006828112947207e-05, "loss": 0.535, "step": 3622 }, { "epoch": 0.6272914186776323, "grad_norm": 1.28125, "learning_rate": 1.8005735328377718e-05, "loss": 0.5686, "step": 3623 }, { "epoch": 0.6274645601125419, "grad_norm": 1.2265625, "learning_rate": 1.8004642277501274e-05, "loss": 0.6001, "step": 3624 }, { "epoch": 0.6276377015474516, "grad_norm": 1.3984375, "learning_rate": 1.800354896035423e-05, "loss": 0.6158, "step": 3625 }, { "epoch": 0.6278108429823612, "grad_norm": 1.296875, "learning_rate": 1.8002455376972958e-05, "loss": 0.5958, "step": 3626 }, { "epoch": 0.6279839844172709, "grad_norm": 1.3046875, "learning_rate": 1.800136152739384e-05, "loss": 0.5777, "step": 3627 }, { "epoch": 0.6281571258521805, "grad_norm": 1.28125, "learning_rate": 1.8000267411653253e-05, "loss": 0.587, "step": 3628 }, { "epoch": 0.6283302672870902, "grad_norm": 1.3984375, "learning_rate": 1.79991730297876e-05, "loss": 0.5404, "step": 3629 }, { "epoch": 0.6285034087219998, "grad_norm": 1.390625, "learning_rate": 1.799807838183328e-05, "loss": 0.6299, "step": 3630 }, { "epoch": 0.6286765501569094, "grad_norm": 1.2421875, "learning_rate": 1.7996983467826713e-05, "loss": 0.6044, "step": 3631 }, { "epoch": 0.628849691591819, "grad_norm": 1.421875, "learning_rate": 1.7995888287804315e-05, "loss": 0.63, "step": 3632 }, { "epoch": 0.6290228330267287, "grad_norm": 1.203125, "learning_rate": 1.7994792841802516e-05, "loss": 0.5593, "step": 3633 }, { "epoch": 0.6291959744616383, "grad_norm": 1.3125, "learning_rate": 1.7993697129857756e-05, "loss": 0.5445, "step": 3634 }, { "epoch": 0.629369115896548, "grad_norm": 1.3046875, "learning_rate": 1.7992601152006487e-05, "loss": 0.5378, "step": 3635 }, { "epoch": 0.6295422573314576, "grad_norm": 1.375, "learning_rate": 1.7991504908285162e-05, "loss": 0.6199, "step": 3636 }, { "epoch": 0.6297153987663673, "grad_norm": 1.296875, "learning_rate": 1.7990408398730248e-05, "loss": 0.6163, "step": 3637 }, { "epoch": 0.6298885402012769, "grad_norm": 1.203125, "learning_rate": 1.798931162337822e-05, "loss": 0.5118, "step": 3638 }, { "epoch": 0.6300616816361866, "grad_norm": 1.328125, "learning_rate": 1.7988214582265566e-05, "loss": 0.5499, "step": 3639 }, { "epoch": 0.6302348230710962, "grad_norm": 1.328125, "learning_rate": 1.798711727542877e-05, "loss": 0.5867, "step": 3640 }, { "epoch": 0.6304079645060059, "grad_norm": 1.328125, "learning_rate": 1.7986019702904344e-05, "loss": 0.5381, "step": 3641 }, { "epoch": 0.6305811059409154, "grad_norm": 1.3359375, "learning_rate": 1.798492186472879e-05, "loss": 0.6074, "step": 3642 }, { "epoch": 0.6307542473758251, "grad_norm": 1.3203125, "learning_rate": 1.7983823760938626e-05, "loss": 0.6048, "step": 3643 }, { "epoch": 0.6309273888107347, "grad_norm": 1.3203125, "learning_rate": 1.7982725391570387e-05, "loss": 0.6832, "step": 3644 }, { "epoch": 0.6311005302456444, "grad_norm": 1.4453125, "learning_rate": 1.7981626756660605e-05, "loss": 0.6831, "step": 3645 }, { "epoch": 0.631273671680554, "grad_norm": 1.40625, "learning_rate": 1.798052785624583e-05, "loss": 0.6074, "step": 3646 }, { "epoch": 0.6314468131154637, "grad_norm": 1.40625, "learning_rate": 1.797942869036261e-05, "loss": 0.6193, "step": 3647 }, { "epoch": 0.6316199545503733, "grad_norm": 1.3359375, "learning_rate": 1.797832925904751e-05, "loss": 0.611, "step": 3648 }, { "epoch": 0.631793095985283, "grad_norm": 1.3359375, "learning_rate": 1.7977229562337104e-05, "loss": 0.6529, "step": 3649 }, { "epoch": 0.6319662374201926, "grad_norm": 1.28125, "learning_rate": 1.7976129600267972e-05, "loss": 0.6115, "step": 3650 }, { "epoch": 0.6321393788551023, "grad_norm": 1.2890625, "learning_rate": 1.7975029372876706e-05, "loss": 0.5651, "step": 3651 }, { "epoch": 0.632312520290012, "grad_norm": 1.359375, "learning_rate": 1.7973928880199902e-05, "loss": 0.5831, "step": 3652 }, { "epoch": 0.6324856617249216, "grad_norm": 1.234375, "learning_rate": 1.7972828122274168e-05, "loss": 0.5074, "step": 3653 }, { "epoch": 0.6326588031598311, "grad_norm": 1.265625, "learning_rate": 1.797172709913612e-05, "loss": 0.5171, "step": 3654 }, { "epoch": 0.6328319445947408, "grad_norm": 1.5, "learning_rate": 1.797062581082238e-05, "loss": 0.5553, "step": 3655 }, { "epoch": 0.6330050860296504, "grad_norm": 1.3828125, "learning_rate": 1.796952425736959e-05, "loss": 0.5678, "step": 3656 }, { "epoch": 0.6331782274645601, "grad_norm": 1.3046875, "learning_rate": 1.7968422438814382e-05, "loss": 0.5698, "step": 3657 }, { "epoch": 0.6333513688994697, "grad_norm": 1.4609375, "learning_rate": 1.796732035519342e-05, "loss": 0.6259, "step": 3658 }, { "epoch": 0.6335245103343794, "grad_norm": 1.3671875, "learning_rate": 1.796621800654335e-05, "loss": 0.5929, "step": 3659 }, { "epoch": 0.633697651769289, "grad_norm": 1.390625, "learning_rate": 1.7965115392900854e-05, "loss": 0.7098, "step": 3660 }, { "epoch": 0.6338707932041987, "grad_norm": 1.4453125, "learning_rate": 1.79640125143026e-05, "loss": 0.6005, "step": 3661 }, { "epoch": 0.6340439346391084, "grad_norm": 1.3671875, "learning_rate": 1.7962909370785283e-05, "loss": 0.5634, "step": 3662 }, { "epoch": 0.634217076074018, "grad_norm": 1.390625, "learning_rate": 1.7961805962385592e-05, "loss": 0.5753, "step": 3663 }, { "epoch": 0.6343902175089277, "grad_norm": 1.359375, "learning_rate": 1.7960702289140234e-05, "loss": 0.6324, "step": 3664 }, { "epoch": 0.6345633589438372, "grad_norm": 1.3359375, "learning_rate": 1.7959598351085922e-05, "loss": 0.5288, "step": 3665 }, { "epoch": 0.6347365003787468, "grad_norm": 1.3671875, "learning_rate": 1.7958494148259376e-05, "loss": 0.6388, "step": 3666 }, { "epoch": 0.6349096418136565, "grad_norm": 1.4140625, "learning_rate": 1.7957389680697335e-05, "loss": 0.5665, "step": 3667 }, { "epoch": 0.6350827832485662, "grad_norm": 1.484375, "learning_rate": 1.7956284948436526e-05, "loss": 0.5666, "step": 3668 }, { "epoch": 0.6352559246834758, "grad_norm": 1.3828125, "learning_rate": 1.795517995151371e-05, "loss": 0.5717, "step": 3669 }, { "epoch": 0.6354290661183855, "grad_norm": 1.3359375, "learning_rate": 1.7954074689965633e-05, "loss": 0.5797, "step": 3670 }, { "epoch": 0.6356022075532951, "grad_norm": 1.2421875, "learning_rate": 1.7952969163829068e-05, "loss": 0.6074, "step": 3671 }, { "epoch": 0.6357753489882048, "grad_norm": 1.234375, "learning_rate": 1.7951863373140787e-05, "loss": 0.5492, "step": 3672 }, { "epoch": 0.6359484904231144, "grad_norm": 1.3515625, "learning_rate": 1.7950757317937574e-05, "loss": 0.6334, "step": 3673 }, { "epoch": 0.6361216318580241, "grad_norm": 1.46875, "learning_rate": 1.7949650998256225e-05, "loss": 0.5541, "step": 3674 }, { "epoch": 0.6362947732929337, "grad_norm": 1.3671875, "learning_rate": 1.7948544414133534e-05, "loss": 0.6472, "step": 3675 }, { "epoch": 0.6364679147278433, "grad_norm": 1.421875, "learning_rate": 1.7947437565606316e-05, "loss": 0.616, "step": 3676 }, { "epoch": 0.6366410561627529, "grad_norm": 1.2578125, "learning_rate": 1.7946330452711392e-05, "loss": 0.6142, "step": 3677 }, { "epoch": 0.6368141975976626, "grad_norm": 1.2578125, "learning_rate": 1.794522307548558e-05, "loss": 0.5324, "step": 3678 }, { "epoch": 0.6369873390325722, "grad_norm": 1.328125, "learning_rate": 1.794411543396573e-05, "loss": 0.5977, "step": 3679 }, { "epoch": 0.6371604804674819, "grad_norm": 1.453125, "learning_rate": 1.7943007528188675e-05, "loss": 0.6555, "step": 3680 }, { "epoch": 0.6373336219023915, "grad_norm": 1.390625, "learning_rate": 1.7941899358191276e-05, "loss": 0.5698, "step": 3681 }, { "epoch": 0.6375067633373012, "grad_norm": 1.546875, "learning_rate": 1.7940790924010396e-05, "loss": 0.6755, "step": 3682 }, { "epoch": 0.6376799047722108, "grad_norm": 1.375, "learning_rate": 1.7939682225682897e-05, "loss": 0.6246, "step": 3683 }, { "epoch": 0.6378530462071205, "grad_norm": 1.2421875, "learning_rate": 1.793857326324567e-05, "loss": 0.5714, "step": 3684 }, { "epoch": 0.6380261876420301, "grad_norm": 1.2578125, "learning_rate": 1.7937464036735603e-05, "loss": 0.5349, "step": 3685 }, { "epoch": 0.6381993290769398, "grad_norm": 1.21875, "learning_rate": 1.7936354546189586e-05, "loss": 0.5549, "step": 3686 }, { "epoch": 0.6383724705118494, "grad_norm": 1.2890625, "learning_rate": 1.7935244791644536e-05, "loss": 0.5818, "step": 3687 }, { "epoch": 0.638545611946759, "grad_norm": 1.4140625, "learning_rate": 1.7934134773137364e-05, "loss": 0.6488, "step": 3688 }, { "epoch": 0.6387187533816686, "grad_norm": 1.453125, "learning_rate": 1.793302449070499e-05, "loss": 0.5981, "step": 3689 }, { "epoch": 0.6388918948165783, "grad_norm": 1.3984375, "learning_rate": 1.7931913944384354e-05, "loss": 0.5786, "step": 3690 }, { "epoch": 0.6390650362514879, "grad_norm": 1.4296875, "learning_rate": 1.793080313421239e-05, "loss": 0.6644, "step": 3691 }, { "epoch": 0.6392381776863976, "grad_norm": 1.296875, "learning_rate": 1.792969206022606e-05, "loss": 0.5884, "step": 3692 }, { "epoch": 0.6394113191213072, "grad_norm": 1.1875, "learning_rate": 1.792858072246231e-05, "loss": 0.5153, "step": 3693 }, { "epoch": 0.6395844605562169, "grad_norm": 1.3671875, "learning_rate": 1.792746912095812e-05, "loss": 0.6651, "step": 3694 }, { "epoch": 0.6397576019911265, "grad_norm": 1.3359375, "learning_rate": 1.7926357255750456e-05, "loss": 0.6389, "step": 3695 }, { "epoch": 0.6399307434260362, "grad_norm": 1.359375, "learning_rate": 1.7925245126876315e-05, "loss": 0.5878, "step": 3696 }, { "epoch": 0.6401038848609458, "grad_norm": 1.3984375, "learning_rate": 1.7924132734372684e-05, "loss": 0.6402, "step": 3697 }, { "epoch": 0.6402770262958555, "grad_norm": 1.375, "learning_rate": 1.7923020078276567e-05, "loss": 0.6253, "step": 3698 }, { "epoch": 0.640450167730765, "grad_norm": 1.2265625, "learning_rate": 1.7921907158624976e-05, "loss": 0.5525, "step": 3699 }, { "epoch": 0.6406233091656747, "grad_norm": 1.5234375, "learning_rate": 1.7920793975454933e-05, "loss": 0.6132, "step": 3700 }, { "epoch": 0.6407964506005843, "grad_norm": 1.328125, "learning_rate": 1.7919680528803468e-05, "loss": 0.6631, "step": 3701 }, { "epoch": 0.640969592035494, "grad_norm": 1.296875, "learning_rate": 1.7918566818707615e-05, "loss": 0.6139, "step": 3702 }, { "epoch": 0.6411427334704036, "grad_norm": 1.328125, "learning_rate": 1.791745284520443e-05, "loss": 0.6211, "step": 3703 }, { "epoch": 0.6413158749053133, "grad_norm": 1.53125, "learning_rate": 1.791633860833096e-05, "loss": 0.7912, "step": 3704 }, { "epoch": 0.6414890163402229, "grad_norm": 1.4140625, "learning_rate": 1.791522410812427e-05, "loss": 0.5641, "step": 3705 }, { "epoch": 0.6416621577751326, "grad_norm": 1.296875, "learning_rate": 1.7914109344621443e-05, "loss": 0.6182, "step": 3706 }, { "epoch": 0.6418352992100422, "grad_norm": 1.5390625, "learning_rate": 1.7912994317859545e-05, "loss": 0.626, "step": 3707 }, { "epoch": 0.6420084406449519, "grad_norm": 1.4140625, "learning_rate": 1.791187902787568e-05, "loss": 0.6336, "step": 3708 }, { "epoch": 0.6421815820798615, "grad_norm": 1.3125, "learning_rate": 1.791076347470695e-05, "loss": 0.5646, "step": 3709 }, { "epoch": 0.6423547235147711, "grad_norm": 1.3203125, "learning_rate": 1.790964765839045e-05, "loss": 0.6107, "step": 3710 }, { "epoch": 0.6425278649496807, "grad_norm": 1.5, "learning_rate": 1.7908531578963304e-05, "loss": 0.5833, "step": 3711 }, { "epoch": 0.6427010063845904, "grad_norm": 1.375, "learning_rate": 1.790741523646264e-05, "loss": 0.5754, "step": 3712 }, { "epoch": 0.6428741478195, "grad_norm": 1.3515625, "learning_rate": 1.790629863092559e-05, "loss": 0.6385, "step": 3713 }, { "epoch": 0.6430472892544097, "grad_norm": 1.4140625, "learning_rate": 1.7905181762389298e-05, "loss": 0.6236, "step": 3714 }, { "epoch": 0.6432204306893193, "grad_norm": 1.3671875, "learning_rate": 1.7904064630890914e-05, "loss": 0.5876, "step": 3715 }, { "epoch": 0.643393572124229, "grad_norm": 1.9140625, "learning_rate": 1.7902947236467604e-05, "loss": 0.5671, "step": 3716 }, { "epoch": 0.6435667135591386, "grad_norm": 1.3515625, "learning_rate": 1.790182957915653e-05, "loss": 0.583, "step": 3717 }, { "epoch": 0.6437398549940483, "grad_norm": 1.3125, "learning_rate": 1.7900711658994875e-05, "loss": 0.5763, "step": 3718 }, { "epoch": 0.6439129964289579, "grad_norm": 1.34375, "learning_rate": 1.789959347601983e-05, "loss": 0.572, "step": 3719 }, { "epoch": 0.6440861378638676, "grad_norm": 1.390625, "learning_rate": 1.7898475030268583e-05, "loss": 0.5891, "step": 3720 }, { "epoch": 0.6442592792987772, "grad_norm": 1.2734375, "learning_rate": 1.7897356321778342e-05, "loss": 0.6121, "step": 3721 }, { "epoch": 0.6444324207336868, "grad_norm": 1.359375, "learning_rate": 1.789623735058632e-05, "loss": 0.6026, "step": 3722 }, { "epoch": 0.6446055621685964, "grad_norm": 1.390625, "learning_rate": 1.7895118116729743e-05, "loss": 0.6598, "step": 3723 }, { "epoch": 0.6447787036035061, "grad_norm": 1.296875, "learning_rate": 1.7893998620245835e-05, "loss": 0.5949, "step": 3724 }, { "epoch": 0.6449518450384157, "grad_norm": 1.390625, "learning_rate": 1.789287886117184e-05, "loss": 0.6317, "step": 3725 }, { "epoch": 0.6451249864733254, "grad_norm": 1.2890625, "learning_rate": 1.7891758839545005e-05, "loss": 0.5371, "step": 3726 }, { "epoch": 0.645298127908235, "grad_norm": 1.4140625, "learning_rate": 1.7890638555402585e-05, "loss": 0.65, "step": 3727 }, { "epoch": 0.6454712693431447, "grad_norm": 1.3359375, "learning_rate": 1.7889518008781853e-05, "loss": 0.5922, "step": 3728 }, { "epoch": 0.6456444107780543, "grad_norm": 1.28125, "learning_rate": 1.7888397199720074e-05, "loss": 0.6103, "step": 3729 }, { "epoch": 0.645817552212964, "grad_norm": 1.3046875, "learning_rate": 1.7887276128254532e-05, "loss": 0.6593, "step": 3730 }, { "epoch": 0.6459906936478736, "grad_norm": 1.3671875, "learning_rate": 1.7886154794422527e-05, "loss": 0.5426, "step": 3731 }, { "epoch": 0.6461638350827833, "grad_norm": 1.3125, "learning_rate": 1.7885033198261356e-05, "loss": 0.6339, "step": 3732 }, { "epoch": 0.6463369765176928, "grad_norm": 1.421875, "learning_rate": 1.7883911339808324e-05, "loss": 0.6283, "step": 3733 }, { "epoch": 0.6465101179526025, "grad_norm": 1.2421875, "learning_rate": 1.7882789219100756e-05, "loss": 0.5764, "step": 3734 }, { "epoch": 0.6466832593875121, "grad_norm": 1.4453125, "learning_rate": 1.788166683617597e-05, "loss": 0.6469, "step": 3735 }, { "epoch": 0.6468564008224218, "grad_norm": 1.296875, "learning_rate": 1.788054419107131e-05, "loss": 0.5814, "step": 3736 }, { "epoch": 0.6470295422573314, "grad_norm": 1.34375, "learning_rate": 1.7879421283824116e-05, "loss": 0.6225, "step": 3737 }, { "epoch": 0.6472026836922411, "grad_norm": 1.2734375, "learning_rate": 1.787829811447174e-05, "loss": 0.5227, "step": 3738 }, { "epoch": 0.6473758251271508, "grad_norm": 1.4765625, "learning_rate": 1.787717468305155e-05, "loss": 0.6908, "step": 3739 }, { "epoch": 0.6475489665620604, "grad_norm": 1.578125, "learning_rate": 1.7876050989600908e-05, "loss": 0.6735, "step": 3740 }, { "epoch": 0.64772210799697, "grad_norm": 1.375, "learning_rate": 1.7874927034157198e-05, "loss": 0.6253, "step": 3741 }, { "epoch": 0.6478952494318797, "grad_norm": 1.2421875, "learning_rate": 1.7873802816757808e-05, "loss": 0.5949, "step": 3742 }, { "epoch": 0.6480683908667894, "grad_norm": 1.328125, "learning_rate": 1.7872678337440134e-05, "loss": 0.5858, "step": 3743 }, { "epoch": 0.6482415323016989, "grad_norm": 1.3671875, "learning_rate": 1.787155359624158e-05, "loss": 0.6221, "step": 3744 }, { "epoch": 0.6484146737366085, "grad_norm": 1.5234375, "learning_rate": 1.787042859319956e-05, "loss": 0.6452, "step": 3745 }, { "epoch": 0.6485878151715182, "grad_norm": 1.2734375, "learning_rate": 1.78693033283515e-05, "loss": 0.589, "step": 3746 }, { "epoch": 0.6487609566064279, "grad_norm": 1.3125, "learning_rate": 1.7868177801734825e-05, "loss": 0.5422, "step": 3747 }, { "epoch": 0.6489340980413375, "grad_norm": 1.3515625, "learning_rate": 1.786705201338698e-05, "loss": 0.5406, "step": 3748 }, { "epoch": 0.6491072394762472, "grad_norm": 1.34375, "learning_rate": 1.786592596334541e-05, "loss": 0.6162, "step": 3749 }, { "epoch": 0.6492803809111568, "grad_norm": 1.484375, "learning_rate": 1.786479965164758e-05, "loss": 0.6201, "step": 3750 }, { "epoch": 0.6494535223460665, "grad_norm": 1.5, "learning_rate": 1.786367307833095e-05, "loss": 0.676, "step": 3751 }, { "epoch": 0.6496266637809761, "grad_norm": 1.3203125, "learning_rate": 1.7862546243432993e-05, "loss": 0.5481, "step": 3752 }, { "epoch": 0.6497998052158858, "grad_norm": 1.3984375, "learning_rate": 1.7861419146991204e-05, "loss": 0.6097, "step": 3753 }, { "epoch": 0.6499729466507954, "grad_norm": 1.21875, "learning_rate": 1.786029178904306e-05, "loss": 0.5724, "step": 3754 }, { "epoch": 0.6501460880857051, "grad_norm": 1.28125, "learning_rate": 1.785916416962607e-05, "loss": 0.5922, "step": 3755 }, { "epoch": 0.6503192295206146, "grad_norm": 1.375, "learning_rate": 1.7858036288777746e-05, "loss": 0.5793, "step": 3756 }, { "epoch": 0.6504923709555243, "grad_norm": 1.3828125, "learning_rate": 1.7856908146535602e-05, "loss": 0.623, "step": 3757 }, { "epoch": 0.6506655123904339, "grad_norm": 1.2578125, "learning_rate": 1.785577974293717e-05, "loss": 0.5733, "step": 3758 }, { "epoch": 0.6508386538253436, "grad_norm": 1.421875, "learning_rate": 1.7854651078019977e-05, "loss": 0.5605, "step": 3759 }, { "epoch": 0.6510117952602532, "grad_norm": 1.3984375, "learning_rate": 1.7853522151821577e-05, "loss": 0.6137, "step": 3760 }, { "epoch": 0.6511849366951629, "grad_norm": 1.2890625, "learning_rate": 1.7852392964379516e-05, "loss": 0.5201, "step": 3761 }, { "epoch": 0.6513580781300725, "grad_norm": 1.3515625, "learning_rate": 1.785126351573136e-05, "loss": 0.522, "step": 3762 }, { "epoch": 0.6515312195649822, "grad_norm": 1.359375, "learning_rate": 1.785013380591468e-05, "loss": 0.6114, "step": 3763 }, { "epoch": 0.6517043609998918, "grad_norm": 1.390625, "learning_rate": 1.784900383496705e-05, "loss": 0.632, "step": 3764 }, { "epoch": 0.6518775024348015, "grad_norm": 1.2421875, "learning_rate": 1.7847873602926067e-05, "loss": 0.5666, "step": 3765 }, { "epoch": 0.6520506438697111, "grad_norm": 1.3359375, "learning_rate": 1.7846743109829318e-05, "loss": 0.5401, "step": 3766 }, { "epoch": 0.6522237853046207, "grad_norm": 1.4453125, "learning_rate": 1.7845612355714417e-05, "loss": 0.6384, "step": 3767 }, { "epoch": 0.6523969267395303, "grad_norm": 1.2734375, "learning_rate": 1.784448134061897e-05, "loss": 0.5471, "step": 3768 }, { "epoch": 0.65257006817444, "grad_norm": 1.46875, "learning_rate": 1.7843350064580603e-05, "loss": 0.689, "step": 3769 }, { "epoch": 0.6527432096093496, "grad_norm": 1.3671875, "learning_rate": 1.784221852763695e-05, "loss": 0.561, "step": 3770 }, { "epoch": 0.6529163510442593, "grad_norm": 1.390625, "learning_rate": 1.784108672982565e-05, "loss": 0.6336, "step": 3771 }, { "epoch": 0.6530894924791689, "grad_norm": 1.359375, "learning_rate": 1.7839954671184346e-05, "loss": 0.6044, "step": 3772 }, { "epoch": 0.6532626339140786, "grad_norm": 1.390625, "learning_rate": 1.7838822351750705e-05, "loss": 0.5886, "step": 3773 }, { "epoch": 0.6534357753489882, "grad_norm": 1.3671875, "learning_rate": 1.7837689771562386e-05, "loss": 0.659, "step": 3774 }, { "epoch": 0.6536089167838979, "grad_norm": 1.28125, "learning_rate": 1.7836556930657062e-05, "loss": 0.5993, "step": 3775 }, { "epoch": 0.6537820582188075, "grad_norm": 1.375, "learning_rate": 1.783542382907242e-05, "loss": 0.5973, "step": 3776 }, { "epoch": 0.6539551996537172, "grad_norm": 1.4453125, "learning_rate": 1.783429046684616e-05, "loss": 0.5797, "step": 3777 }, { "epoch": 0.6541283410886267, "grad_norm": 1.546875, "learning_rate": 1.783315684401597e-05, "loss": 0.6856, "step": 3778 }, { "epoch": 0.6543014825235364, "grad_norm": 1.2734375, "learning_rate": 1.7832022960619562e-05, "loss": 0.5097, "step": 3779 }, { "epoch": 0.654474623958446, "grad_norm": 1.359375, "learning_rate": 1.783088881669466e-05, "loss": 0.5622, "step": 3780 }, { "epoch": 0.6546477653933557, "grad_norm": 1.34375, "learning_rate": 1.7829754412278983e-05, "loss": 0.6125, "step": 3781 }, { "epoch": 0.6548209068282653, "grad_norm": 1.234375, "learning_rate": 1.7828619747410278e-05, "loss": 0.5599, "step": 3782 }, { "epoch": 0.654994048263175, "grad_norm": 1.2734375, "learning_rate": 1.7827484822126277e-05, "loss": 0.5879, "step": 3783 }, { "epoch": 0.6551671896980846, "grad_norm": 1.359375, "learning_rate": 1.7826349636464734e-05, "loss": 0.6483, "step": 3784 }, { "epoch": 0.6553403311329943, "grad_norm": 1.28125, "learning_rate": 1.7825214190463417e-05, "loss": 0.565, "step": 3785 }, { "epoch": 0.6555134725679039, "grad_norm": 1.28125, "learning_rate": 1.7824078484160093e-05, "loss": 0.5684, "step": 3786 }, { "epoch": 0.6556866140028136, "grad_norm": 1.390625, "learning_rate": 1.7822942517592544e-05, "loss": 0.5909, "step": 3787 }, { "epoch": 0.6558597554377232, "grad_norm": 1.1796875, "learning_rate": 1.782180629079855e-05, "loss": 0.4824, "step": 3788 }, { "epoch": 0.6560328968726329, "grad_norm": 1.359375, "learning_rate": 1.7820669803815912e-05, "loss": 0.5364, "step": 3789 }, { "epoch": 0.6562060383075424, "grad_norm": 1.4765625, "learning_rate": 1.7819533056682433e-05, "loss": 0.5798, "step": 3790 }, { "epoch": 0.6563791797424521, "grad_norm": 1.34375, "learning_rate": 1.7818396049435927e-05, "loss": 0.5383, "step": 3791 }, { "epoch": 0.6565523211773617, "grad_norm": 1.3515625, "learning_rate": 1.7817258782114216e-05, "loss": 0.6205, "step": 3792 }, { "epoch": 0.6567254626122714, "grad_norm": 1.3203125, "learning_rate": 1.781612125475513e-05, "loss": 0.5452, "step": 3793 }, { "epoch": 0.656898604047181, "grad_norm": 1.28125, "learning_rate": 1.7814983467396512e-05, "loss": 0.6142, "step": 3794 }, { "epoch": 0.6570717454820907, "grad_norm": 1.765625, "learning_rate": 1.7813845420076207e-05, "loss": 0.7513, "step": 3795 }, { "epoch": 0.6572448869170003, "grad_norm": 1.2734375, "learning_rate": 1.781270711283207e-05, "loss": 0.6033, "step": 3796 }, { "epoch": 0.65741802835191, "grad_norm": 1.328125, "learning_rate": 1.781156854570197e-05, "loss": 0.5686, "step": 3797 }, { "epoch": 0.6575911697868196, "grad_norm": 1.3125, "learning_rate": 1.7810429718723774e-05, "loss": 0.57, "step": 3798 }, { "epoch": 0.6577643112217293, "grad_norm": 1.4765625, "learning_rate": 1.7809290631935373e-05, "loss": 0.62, "step": 3799 }, { "epoch": 0.6579374526566389, "grad_norm": 1.296875, "learning_rate": 1.7808151285374652e-05, "loss": 0.5719, "step": 3800 }, { "epoch": 0.6581105940915485, "grad_norm": 1.3515625, "learning_rate": 1.7807011679079515e-05, "loss": 0.5776, "step": 3801 }, { "epoch": 0.6582837355264581, "grad_norm": 1.5390625, "learning_rate": 1.7805871813087867e-05, "loss": 0.678, "step": 3802 }, { "epoch": 0.6584568769613678, "grad_norm": 1.3203125, "learning_rate": 1.780473168743763e-05, "loss": 0.6168, "step": 3803 }, { "epoch": 0.6586300183962774, "grad_norm": 1.375, "learning_rate": 1.7803591302166724e-05, "loss": 0.6054, "step": 3804 }, { "epoch": 0.6588031598311871, "grad_norm": 1.265625, "learning_rate": 1.7802450657313086e-05, "loss": 0.5766, "step": 3805 }, { "epoch": 0.6589763012660967, "grad_norm": 1.3125, "learning_rate": 1.780130975291466e-05, "loss": 0.5585, "step": 3806 }, { "epoch": 0.6591494427010064, "grad_norm": 1.3125, "learning_rate": 1.7800168589009397e-05, "loss": 0.6313, "step": 3807 }, { "epoch": 0.659322584135916, "grad_norm": 1.359375, "learning_rate": 1.7799027165635254e-05, "loss": 0.6795, "step": 3808 }, { "epoch": 0.6594957255708257, "grad_norm": 1.328125, "learning_rate": 1.7797885482830207e-05, "loss": 0.6134, "step": 3809 }, { "epoch": 0.6596688670057353, "grad_norm": 1.2578125, "learning_rate": 1.7796743540632226e-05, "loss": 0.5623, "step": 3810 }, { "epoch": 0.659842008440645, "grad_norm": 1.3125, "learning_rate": 1.7795601339079302e-05, "loss": 0.6143, "step": 3811 }, { "epoch": 0.6600151498755545, "grad_norm": 1.1953125, "learning_rate": 1.7794458878209427e-05, "loss": 0.5271, "step": 3812 }, { "epoch": 0.6601882913104642, "grad_norm": 1.265625, "learning_rate": 1.7793316158060605e-05, "loss": 0.5888, "step": 3813 }, { "epoch": 0.6603614327453738, "grad_norm": 1.359375, "learning_rate": 1.779217317867085e-05, "loss": 0.6365, "step": 3814 }, { "epoch": 0.6605345741802835, "grad_norm": 1.3359375, "learning_rate": 1.7791029940078184e-05, "loss": 0.6451, "step": 3815 }, { "epoch": 0.6607077156151931, "grad_norm": 1.6171875, "learning_rate": 1.778988644232063e-05, "loss": 0.6146, "step": 3816 }, { "epoch": 0.6608808570501028, "grad_norm": 1.4296875, "learning_rate": 1.778874268543623e-05, "loss": 0.652, "step": 3817 }, { "epoch": 0.6610539984850125, "grad_norm": 1.4296875, "learning_rate": 1.7787598669463027e-05, "loss": 0.5134, "step": 3818 }, { "epoch": 0.6612271399199221, "grad_norm": 1.390625, "learning_rate": 1.7786454394439087e-05, "loss": 0.5836, "step": 3819 }, { "epoch": 0.6614002813548318, "grad_norm": 1.3203125, "learning_rate": 1.778530986040246e-05, "loss": 0.5951, "step": 3820 }, { "epoch": 0.6615734227897414, "grad_norm": 1.4765625, "learning_rate": 1.7784165067391225e-05, "loss": 0.5941, "step": 3821 }, { "epoch": 0.6617465642246511, "grad_norm": 1.4765625, "learning_rate": 1.7783020015443468e-05, "loss": 0.6152, "step": 3822 }, { "epoch": 0.6619197056595607, "grad_norm": 1.359375, "learning_rate": 1.7781874704597265e-05, "loss": 0.6043, "step": 3823 }, { "epoch": 0.6620928470944702, "grad_norm": 1.2890625, "learning_rate": 1.7780729134890725e-05, "loss": 0.5931, "step": 3824 }, { "epoch": 0.6622659885293799, "grad_norm": 1.4765625, "learning_rate": 1.7779583306361953e-05, "loss": 0.6207, "step": 3825 }, { "epoch": 0.6624391299642896, "grad_norm": 1.3671875, "learning_rate": 1.7778437219049063e-05, "loss": 0.6512, "step": 3826 }, { "epoch": 0.6626122713991992, "grad_norm": 1.2734375, "learning_rate": 1.777729087299018e-05, "loss": 0.5871, "step": 3827 }, { "epoch": 0.6627854128341089, "grad_norm": 1.3203125, "learning_rate": 1.7776144268223433e-05, "loss": 0.5516, "step": 3828 }, { "epoch": 0.6629585542690185, "grad_norm": 1.4453125, "learning_rate": 1.777499740478697e-05, "loss": 0.7335, "step": 3829 }, { "epoch": 0.6631316957039282, "grad_norm": 1.296875, "learning_rate": 1.7773850282718936e-05, "loss": 0.6111, "step": 3830 }, { "epoch": 0.6633048371388378, "grad_norm": 1.34375, "learning_rate": 1.777270290205749e-05, "loss": 0.6656, "step": 3831 }, { "epoch": 0.6634779785737475, "grad_norm": 1.328125, "learning_rate": 1.77715552628408e-05, "loss": 0.5419, "step": 3832 }, { "epoch": 0.6636511200086571, "grad_norm": 1.3984375, "learning_rate": 1.7770407365107044e-05, "loss": 0.609, "step": 3833 }, { "epoch": 0.6638242614435668, "grad_norm": 1.34375, "learning_rate": 1.7769259208894402e-05, "loss": 0.674, "step": 3834 }, { "epoch": 0.6639974028784763, "grad_norm": 1.2890625, "learning_rate": 1.7768110794241067e-05, "loss": 0.6228, "step": 3835 }, { "epoch": 0.664170544313386, "grad_norm": 1.4375, "learning_rate": 1.7766962121185245e-05, "loss": 0.6474, "step": 3836 }, { "epoch": 0.6643436857482956, "grad_norm": 1.3984375, "learning_rate": 1.776581318976514e-05, "loss": 0.6327, "step": 3837 }, { "epoch": 0.6645168271832053, "grad_norm": 1.3671875, "learning_rate": 1.7764664000018975e-05, "loss": 0.6397, "step": 3838 }, { "epoch": 0.6646899686181149, "grad_norm": 1.328125, "learning_rate": 1.7763514551984977e-05, "loss": 0.5561, "step": 3839 }, { "epoch": 0.6648631100530246, "grad_norm": 1.3203125, "learning_rate": 1.7762364845701377e-05, "loss": 0.5093, "step": 3840 }, { "epoch": 0.6650362514879342, "grad_norm": 1.359375, "learning_rate": 1.7761214881206427e-05, "loss": 0.6045, "step": 3841 }, { "epoch": 0.6652093929228439, "grad_norm": 1.453125, "learning_rate": 1.7760064658538377e-05, "loss": 0.6949, "step": 3842 }, { "epoch": 0.6653825343577535, "grad_norm": 1.3125, "learning_rate": 1.7758914177735487e-05, "loss": 0.591, "step": 3843 }, { "epoch": 0.6655556757926632, "grad_norm": 1.34375, "learning_rate": 1.7757763438836027e-05, "loss": 0.6016, "step": 3844 }, { "epoch": 0.6657288172275728, "grad_norm": 1.203125, "learning_rate": 1.775661244187828e-05, "loss": 0.4953, "step": 3845 }, { "epoch": 0.6659019586624824, "grad_norm": 1.3203125, "learning_rate": 1.7755461186900523e-05, "loss": 0.5409, "step": 3846 }, { "epoch": 0.666075100097392, "grad_norm": 1.4140625, "learning_rate": 1.7754309673941065e-05, "loss": 0.6337, "step": 3847 }, { "epoch": 0.6662482415323017, "grad_norm": 1.390625, "learning_rate": 1.7753157903038204e-05, "loss": 0.6427, "step": 3848 }, { "epoch": 0.6664213829672113, "grad_norm": 1.2578125, "learning_rate": 1.7752005874230253e-05, "loss": 0.6125, "step": 3849 }, { "epoch": 0.666594524402121, "grad_norm": 1.4375, "learning_rate": 1.7750853587555535e-05, "loss": 0.6189, "step": 3850 }, { "epoch": 0.6667676658370306, "grad_norm": 1.484375, "learning_rate": 1.774970104305238e-05, "loss": 0.7437, "step": 3851 }, { "epoch": 0.6669408072719403, "grad_norm": 1.40625, "learning_rate": 1.7748548240759126e-05, "loss": 0.5588, "step": 3852 }, { "epoch": 0.6671139487068499, "grad_norm": 1.3828125, "learning_rate": 1.7747395180714125e-05, "loss": 0.5927, "step": 3853 }, { "epoch": 0.6672870901417596, "grad_norm": 1.4375, "learning_rate": 1.7746241862955722e-05, "loss": 0.6688, "step": 3854 }, { "epoch": 0.6674602315766692, "grad_norm": 1.4296875, "learning_rate": 1.7745088287522292e-05, "loss": 0.684, "step": 3855 }, { "epoch": 0.6676333730115789, "grad_norm": 1.2734375, "learning_rate": 1.7743934454452205e-05, "loss": 0.5787, "step": 3856 }, { "epoch": 0.6678065144464885, "grad_norm": 1.453125, "learning_rate": 1.7742780363783843e-05, "loss": 0.6621, "step": 3857 }, { "epoch": 0.6679796558813981, "grad_norm": 1.25, "learning_rate": 1.774162601555559e-05, "loss": 0.6076, "step": 3858 }, { "epoch": 0.6681527973163077, "grad_norm": 1.3203125, "learning_rate": 1.774047140980586e-05, "loss": 0.5682, "step": 3859 }, { "epoch": 0.6683259387512174, "grad_norm": 1.3046875, "learning_rate": 1.7739316546573046e-05, "loss": 0.5679, "step": 3860 }, { "epoch": 0.668499080186127, "grad_norm": 1.2890625, "learning_rate": 1.7738161425895568e-05, "loss": 0.568, "step": 3861 }, { "epoch": 0.6686722216210367, "grad_norm": 1.25, "learning_rate": 1.7737006047811855e-05, "loss": 0.5709, "step": 3862 }, { "epoch": 0.6688453630559463, "grad_norm": 1.421875, "learning_rate": 1.7735850412360332e-05, "loss": 0.5842, "step": 3863 }, { "epoch": 0.669018504490856, "grad_norm": 1.4453125, "learning_rate": 1.773469451957945e-05, "loss": 0.5974, "step": 3864 }, { "epoch": 0.6691916459257656, "grad_norm": 1.421875, "learning_rate": 1.773353836950765e-05, "loss": 0.6332, "step": 3865 }, { "epoch": 0.6693647873606753, "grad_norm": 1.3515625, "learning_rate": 1.7732381962183398e-05, "loss": 0.6289, "step": 3866 }, { "epoch": 0.6695379287955849, "grad_norm": 1.2734375, "learning_rate": 1.7731225297645157e-05, "loss": 0.5923, "step": 3867 }, { "epoch": 0.6697110702304946, "grad_norm": 1.3984375, "learning_rate": 1.7730068375931403e-05, "loss": 0.8879, "step": 3868 }, { "epoch": 0.6698842116654041, "grad_norm": 1.3203125, "learning_rate": 1.772891119708063e-05, "loss": 0.6506, "step": 3869 }, { "epoch": 0.6700573531003138, "grad_norm": 1.3203125, "learning_rate": 1.7727753761131312e-05, "loss": 0.5625, "step": 3870 }, { "epoch": 0.6702304945352234, "grad_norm": 1.3359375, "learning_rate": 1.7726596068121967e-05, "loss": 0.538, "step": 3871 }, { "epoch": 0.6704036359701331, "grad_norm": 1.375, "learning_rate": 1.77254381180911e-05, "loss": 0.588, "step": 3872 }, { "epoch": 0.6705767774050427, "grad_norm": 1.2890625, "learning_rate": 1.7724279911077225e-05, "loss": 0.5698, "step": 3873 }, { "epoch": 0.6707499188399524, "grad_norm": 1.375, "learning_rate": 1.7723121447118878e-05, "loss": 0.5214, "step": 3874 }, { "epoch": 0.670923060274862, "grad_norm": 1.3828125, "learning_rate": 1.772196272625459e-05, "loss": 0.6384, "step": 3875 }, { "epoch": 0.6710962017097717, "grad_norm": 1.3671875, "learning_rate": 1.7720803748522903e-05, "loss": 0.5607, "step": 3876 }, { "epoch": 0.6712693431446813, "grad_norm": 1.390625, "learning_rate": 1.7719644513962377e-05, "loss": 0.5686, "step": 3877 }, { "epoch": 0.671442484579591, "grad_norm": 1.40625, "learning_rate": 1.7718485022611564e-05, "loss": 0.5867, "step": 3878 }, { "epoch": 0.6716156260145006, "grad_norm": 1.2890625, "learning_rate": 1.771732527450904e-05, "loss": 0.5689, "step": 3879 }, { "epoch": 0.6717887674494102, "grad_norm": 1.3359375, "learning_rate": 1.771616526969338e-05, "loss": 0.5734, "step": 3880 }, { "epoch": 0.6719619088843198, "grad_norm": 1.375, "learning_rate": 1.7715005008203175e-05, "loss": 0.628, "step": 3881 }, { "epoch": 0.6721350503192295, "grad_norm": 1.5859375, "learning_rate": 1.771384449007702e-05, "loss": 0.6004, "step": 3882 }, { "epoch": 0.6723081917541391, "grad_norm": 1.3203125, "learning_rate": 1.7712683715353514e-05, "loss": 0.5326, "step": 3883 }, { "epoch": 0.6724813331890488, "grad_norm": 1.4296875, "learning_rate": 1.7711522684071277e-05, "loss": 0.557, "step": 3884 }, { "epoch": 0.6726544746239584, "grad_norm": 1.4453125, "learning_rate": 1.7710361396268924e-05, "loss": 0.6397, "step": 3885 }, { "epoch": 0.6728276160588681, "grad_norm": 1.359375, "learning_rate": 1.770919985198509e-05, "loss": 0.6171, "step": 3886 }, { "epoch": 0.6730007574937777, "grad_norm": 1.3125, "learning_rate": 1.7708038051258407e-05, "loss": 0.6902, "step": 3887 }, { "epoch": 0.6731738989286874, "grad_norm": 1.3203125, "learning_rate": 1.7706875994127527e-05, "loss": 0.5589, "step": 3888 }, { "epoch": 0.673347040363597, "grad_norm": 1.375, "learning_rate": 1.7705713680631103e-05, "loss": 0.6344, "step": 3889 }, { "epoch": 0.6735201817985067, "grad_norm": 1.2734375, "learning_rate": 1.77045511108078e-05, "loss": 0.5778, "step": 3890 }, { "epoch": 0.6736933232334164, "grad_norm": 1.40625, "learning_rate": 1.770338828469629e-05, "loss": 0.6661, "step": 3891 }, { "epoch": 0.6738664646683259, "grad_norm": 1.3125, "learning_rate": 1.770222520233525e-05, "loss": 0.48, "step": 3892 }, { "epoch": 0.6740396061032355, "grad_norm": 1.3671875, "learning_rate": 1.7701061863763375e-05, "loss": 0.603, "step": 3893 }, { "epoch": 0.6742127475381452, "grad_norm": 1.3671875, "learning_rate": 1.769989826901936e-05, "loss": 0.6063, "step": 3894 }, { "epoch": 0.6743858889730548, "grad_norm": 1.390625, "learning_rate": 1.769873441814191e-05, "loss": 0.5837, "step": 3895 }, { "epoch": 0.6745590304079645, "grad_norm": 1.3515625, "learning_rate": 1.7697570311169746e-05, "loss": 0.6138, "step": 3896 }, { "epoch": 0.6747321718428742, "grad_norm": 1.265625, "learning_rate": 1.7696405948141585e-05, "loss": 0.504, "step": 3897 }, { "epoch": 0.6749053132777838, "grad_norm": 1.2265625, "learning_rate": 1.7695241329096164e-05, "loss": 0.5248, "step": 3898 }, { "epoch": 0.6750784547126935, "grad_norm": 1.3515625, "learning_rate": 1.769407645407222e-05, "loss": 0.5959, "step": 3899 }, { "epoch": 0.6752515961476031, "grad_norm": 1.3046875, "learning_rate": 1.7692911323108505e-05, "loss": 0.5899, "step": 3900 }, { "epoch": 0.6754247375825128, "grad_norm": 1.2421875, "learning_rate": 1.769174593624377e-05, "loss": 0.5819, "step": 3901 }, { "epoch": 0.6755978790174224, "grad_norm": 1.453125, "learning_rate": 1.7690580293516786e-05, "loss": 0.6611, "step": 3902 }, { "epoch": 0.675771020452332, "grad_norm": 1.2890625, "learning_rate": 1.768941439496633e-05, "loss": 0.671, "step": 3903 }, { "epoch": 0.6759441618872416, "grad_norm": 1.484375, "learning_rate": 1.7688248240631177e-05, "loss": 0.6154, "step": 3904 }, { "epoch": 0.6761173033221513, "grad_norm": 1.3359375, "learning_rate": 1.768708183055013e-05, "loss": 0.5873, "step": 3905 }, { "epoch": 0.6762904447570609, "grad_norm": 1.2421875, "learning_rate": 1.768591516476198e-05, "loss": 0.5111, "step": 3906 }, { "epoch": 0.6764635861919706, "grad_norm": 1.375, "learning_rate": 1.768474824330554e-05, "loss": 0.6073, "step": 3907 }, { "epoch": 0.6766367276268802, "grad_norm": 1.3359375, "learning_rate": 1.7683581066219622e-05, "loss": 0.6078, "step": 3908 }, { "epoch": 0.6768098690617899, "grad_norm": 1.359375, "learning_rate": 1.7682413633543057e-05, "loss": 0.5725, "step": 3909 }, { "epoch": 0.6769830104966995, "grad_norm": 1.4375, "learning_rate": 1.7681245945314677e-05, "loss": 0.6558, "step": 3910 }, { "epoch": 0.6771561519316092, "grad_norm": 1.3984375, "learning_rate": 1.7680078001573323e-05, "loss": 0.5714, "step": 3911 }, { "epoch": 0.6773292933665188, "grad_norm": 1.1953125, "learning_rate": 1.767890980235785e-05, "loss": 0.5998, "step": 3912 }, { "epoch": 0.6775024348014285, "grad_norm": 1.3359375, "learning_rate": 1.7677741347707117e-05, "loss": 0.6053, "step": 3913 }, { "epoch": 0.677675576236338, "grad_norm": 1.2890625, "learning_rate": 1.767657263765999e-05, "loss": 0.5445, "step": 3914 }, { "epoch": 0.6778487176712477, "grad_norm": 1.34375, "learning_rate": 1.767540367225534e-05, "loss": 0.5635, "step": 3915 }, { "epoch": 0.6780218591061573, "grad_norm": 1.3203125, "learning_rate": 1.7674234451532065e-05, "loss": 0.6292, "step": 3916 }, { "epoch": 0.678195000541067, "grad_norm": 1.4453125, "learning_rate": 1.7673064975529048e-05, "loss": 0.571, "step": 3917 }, { "epoch": 0.6783681419759766, "grad_norm": 1.3515625, "learning_rate": 1.7671895244285195e-05, "loss": 0.6352, "step": 3918 }, { "epoch": 0.6785412834108863, "grad_norm": 1.4296875, "learning_rate": 1.767072525783942e-05, "loss": 0.5901, "step": 3919 }, { "epoch": 0.6787144248457959, "grad_norm": 1.3125, "learning_rate": 1.7669555016230637e-05, "loss": 0.5981, "step": 3920 }, { "epoch": 0.6788875662807056, "grad_norm": 1.2578125, "learning_rate": 1.766838451949777e-05, "loss": 0.5427, "step": 3921 }, { "epoch": 0.6790607077156152, "grad_norm": 1.46875, "learning_rate": 1.766721376767976e-05, "loss": 0.5582, "step": 3922 }, { "epoch": 0.6792338491505249, "grad_norm": 1.234375, "learning_rate": 1.766604276081556e-05, "loss": 0.6259, "step": 3923 }, { "epoch": 0.6794069905854345, "grad_norm": 1.5078125, "learning_rate": 1.766487149894411e-05, "loss": 0.6155, "step": 3924 }, { "epoch": 0.6795801320203441, "grad_norm": 1.34375, "learning_rate": 1.766369998210437e-05, "loss": 0.5984, "step": 3925 }, { "epoch": 0.6797532734552537, "grad_norm": 1.3125, "learning_rate": 1.7662528210335322e-05, "loss": 0.5714, "step": 3926 }, { "epoch": 0.6799264148901634, "grad_norm": 1.4453125, "learning_rate": 1.7661356183675937e-05, "loss": 0.6425, "step": 3927 }, { "epoch": 0.680099556325073, "grad_norm": 1.1640625, "learning_rate": 1.7660183902165205e-05, "loss": 0.5587, "step": 3928 }, { "epoch": 0.6802726977599827, "grad_norm": 1.3046875, "learning_rate": 1.7659011365842117e-05, "loss": 0.5356, "step": 3929 }, { "epoch": 0.6804458391948923, "grad_norm": 1.3203125, "learning_rate": 1.7657838574745678e-05, "loss": 0.5997, "step": 3930 }, { "epoch": 0.680618980629802, "grad_norm": 1.40625, "learning_rate": 1.7656665528914905e-05, "loss": 0.5591, "step": 3931 }, { "epoch": 0.6807921220647116, "grad_norm": 1.234375, "learning_rate": 1.7655492228388813e-05, "loss": 0.5812, "step": 3932 }, { "epoch": 0.6809652634996213, "grad_norm": 1.2578125, "learning_rate": 1.7654318673206435e-05, "loss": 0.6199, "step": 3933 }, { "epoch": 0.6811384049345309, "grad_norm": 1.3046875, "learning_rate": 1.7653144863406805e-05, "loss": 0.6169, "step": 3934 }, { "epoch": 0.6813115463694406, "grad_norm": 1.2421875, "learning_rate": 1.7651970799028976e-05, "loss": 0.5732, "step": 3935 }, { "epoch": 0.6814846878043502, "grad_norm": 1.3515625, "learning_rate": 1.7650796480111996e-05, "loss": 0.5913, "step": 3936 }, { "epoch": 0.6816578292392598, "grad_norm": 1.4765625, "learning_rate": 1.7649621906694928e-05, "loss": 0.6062, "step": 3937 }, { "epoch": 0.6818309706741694, "grad_norm": 1.3984375, "learning_rate": 1.764844707881685e-05, "loss": 0.6018, "step": 3938 }, { "epoch": 0.6820041121090791, "grad_norm": 1.3046875, "learning_rate": 1.764727199651684e-05, "loss": 0.6291, "step": 3939 }, { "epoch": 0.6821772535439887, "grad_norm": 1.328125, "learning_rate": 1.764609665983398e-05, "loss": 0.5888, "step": 3940 }, { "epoch": 0.6823503949788984, "grad_norm": 1.2890625, "learning_rate": 1.7644921068807375e-05, "loss": 0.5074, "step": 3941 }, { "epoch": 0.682523536413808, "grad_norm": 1.3125, "learning_rate": 1.7643745223476126e-05, "loss": 0.4926, "step": 3942 }, { "epoch": 0.6826966778487177, "grad_norm": 1.3359375, "learning_rate": 1.764256912387935e-05, "loss": 0.5196, "step": 3943 }, { "epoch": 0.6828698192836273, "grad_norm": 1.3125, "learning_rate": 1.7641392770056167e-05, "loss": 0.583, "step": 3944 }, { "epoch": 0.683042960718537, "grad_norm": 1.359375, "learning_rate": 1.764021616204571e-05, "loss": 0.5486, "step": 3945 }, { "epoch": 0.6832161021534466, "grad_norm": 1.359375, "learning_rate": 1.763903929988711e-05, "loss": 0.5894, "step": 3946 }, { "epoch": 0.6833892435883563, "grad_norm": 1.34375, "learning_rate": 1.7637862183619527e-05, "loss": 0.6294, "step": 3947 }, { "epoch": 0.6835623850232658, "grad_norm": 1.3203125, "learning_rate": 1.7636684813282113e-05, "loss": 0.6652, "step": 3948 }, { "epoch": 0.6837355264581755, "grad_norm": 1.390625, "learning_rate": 1.763550718891403e-05, "loss": 0.6479, "step": 3949 }, { "epoch": 0.6839086678930851, "grad_norm": 1.3671875, "learning_rate": 1.7634329310554453e-05, "loss": 0.5657, "step": 3950 }, { "epoch": 0.6840818093279948, "grad_norm": 1.3828125, "learning_rate": 1.763315117824256e-05, "loss": 0.6336, "step": 3951 }, { "epoch": 0.6842549507629044, "grad_norm": 1.34375, "learning_rate": 1.7631972792017547e-05, "loss": 0.5846, "step": 3952 }, { "epoch": 0.6844280921978141, "grad_norm": 1.359375, "learning_rate": 1.763079415191861e-05, "loss": 0.5338, "step": 3953 }, { "epoch": 0.6846012336327237, "grad_norm": 1.421875, "learning_rate": 1.7629615257984954e-05, "loss": 0.6415, "step": 3954 }, { "epoch": 0.6847743750676334, "grad_norm": 1.3828125, "learning_rate": 1.7628436110255796e-05, "loss": 0.5505, "step": 3955 }, { "epoch": 0.684947516502543, "grad_norm": 1.3828125, "learning_rate": 1.762725670877036e-05, "loss": 0.6242, "step": 3956 }, { "epoch": 0.6851206579374527, "grad_norm": 1.3359375, "learning_rate": 1.762607705356788e-05, "loss": 0.6366, "step": 3957 }, { "epoch": 0.6852937993723623, "grad_norm": 1.21875, "learning_rate": 1.762489714468759e-05, "loss": 0.5491, "step": 3958 }, { "epoch": 0.6854669408072719, "grad_norm": 1.2109375, "learning_rate": 1.7623716982168745e-05, "loss": 0.5426, "step": 3959 }, { "epoch": 0.6856400822421815, "grad_norm": 1.2578125, "learning_rate": 1.7622536566050602e-05, "loss": 0.5185, "step": 3960 }, { "epoch": 0.6858132236770912, "grad_norm": 1.390625, "learning_rate": 1.7621355896372424e-05, "loss": 0.6499, "step": 3961 }, { "epoch": 0.6859863651120008, "grad_norm": 1.3515625, "learning_rate": 1.762017497317349e-05, "loss": 0.5591, "step": 3962 }, { "epoch": 0.6861595065469105, "grad_norm": 1.5390625, "learning_rate": 1.7618993796493083e-05, "loss": 0.6195, "step": 3963 }, { "epoch": 0.6863326479818201, "grad_norm": 1.3984375, "learning_rate": 1.7617812366370487e-05, "loss": 0.6111, "step": 3964 }, { "epoch": 0.6865057894167298, "grad_norm": 1.2734375, "learning_rate": 1.7616630682845008e-05, "loss": 0.5409, "step": 3965 }, { "epoch": 0.6866789308516394, "grad_norm": 1.4453125, "learning_rate": 1.7615448745955956e-05, "loss": 0.4956, "step": 3966 }, { "epoch": 0.6868520722865491, "grad_norm": 1.265625, "learning_rate": 1.7614266555742635e-05, "loss": 0.5886, "step": 3967 }, { "epoch": 0.6870252137214587, "grad_norm": 1.40625, "learning_rate": 1.7613084112244387e-05, "loss": 0.5869, "step": 3968 }, { "epoch": 0.6871983551563684, "grad_norm": 1.3359375, "learning_rate": 1.7611901415500536e-05, "loss": 0.5858, "step": 3969 }, { "epoch": 0.687371496591278, "grad_norm": 1.375, "learning_rate": 1.7610718465550423e-05, "loss": 0.5721, "step": 3970 }, { "epoch": 0.6875446380261876, "grad_norm": 1.34375, "learning_rate": 1.7609535262433403e-05, "loss": 0.6261, "step": 3971 }, { "epoch": 0.6877177794610972, "grad_norm": 1.421875, "learning_rate": 1.760835180618883e-05, "loss": 0.704, "step": 3972 }, { "epoch": 0.6878909208960069, "grad_norm": 1.328125, "learning_rate": 1.7607168096856075e-05, "loss": 0.654, "step": 3973 }, { "epoch": 0.6880640623309165, "grad_norm": 1.4765625, "learning_rate": 1.760598413447451e-05, "loss": 0.601, "step": 3974 }, { "epoch": 0.6882372037658262, "grad_norm": 1.5078125, "learning_rate": 1.7604799919083523e-05, "loss": 0.6754, "step": 3975 }, { "epoch": 0.6884103452007359, "grad_norm": 1.2890625, "learning_rate": 1.7603615450722504e-05, "loss": 0.6065, "step": 3976 }, { "epoch": 0.6885834866356455, "grad_norm": 1.4609375, "learning_rate": 1.7602430729430854e-05, "loss": 0.5743, "step": 3977 }, { "epoch": 0.6887566280705552, "grad_norm": 1.4765625, "learning_rate": 1.760124575524798e-05, "loss": 0.5971, "step": 3978 }, { "epoch": 0.6889297695054648, "grad_norm": 1.453125, "learning_rate": 1.7600060528213302e-05, "loss": 0.5672, "step": 3979 }, { "epoch": 0.6891029109403745, "grad_norm": 1.5546875, "learning_rate": 1.7598875048366252e-05, "loss": 0.5268, "step": 3980 }, { "epoch": 0.6892760523752841, "grad_norm": 1.296875, "learning_rate": 1.7597689315746248e-05, "loss": 0.595, "step": 3981 }, { "epoch": 0.6894491938101937, "grad_norm": 1.4140625, "learning_rate": 1.759650333039275e-05, "loss": 0.6627, "step": 3982 }, { "epoch": 0.6896223352451033, "grad_norm": 1.515625, "learning_rate": 1.75953170923452e-05, "loss": 0.6207, "step": 3983 }, { "epoch": 0.689795476680013, "grad_norm": 1.34375, "learning_rate": 1.759413060164306e-05, "loss": 0.5479, "step": 3984 }, { "epoch": 0.6899686181149226, "grad_norm": 1.3046875, "learning_rate": 1.75929438583258e-05, "loss": 0.5416, "step": 3985 }, { "epoch": 0.6901417595498323, "grad_norm": 1.359375, "learning_rate": 1.7591756862432893e-05, "loss": 0.6922, "step": 3986 }, { "epoch": 0.6903149009847419, "grad_norm": 1.421875, "learning_rate": 1.7590569614003825e-05, "loss": 0.5486, "step": 3987 }, { "epoch": 0.6904880424196516, "grad_norm": 1.2890625, "learning_rate": 1.758938211307809e-05, "loss": 0.5905, "step": 3988 }, { "epoch": 0.6906611838545612, "grad_norm": 1.328125, "learning_rate": 1.7588194359695186e-05, "loss": 0.6084, "step": 3989 }, { "epoch": 0.6908343252894709, "grad_norm": 1.34375, "learning_rate": 1.758700635389463e-05, "loss": 0.5625, "step": 3990 }, { "epoch": 0.6910074667243805, "grad_norm": 1.2421875, "learning_rate": 1.758581809571594e-05, "loss": 0.5461, "step": 3991 }, { "epoch": 0.6911806081592902, "grad_norm": 1.3671875, "learning_rate": 1.758462958519863e-05, "loss": 0.5599, "step": 3992 }, { "epoch": 0.6913537495941997, "grad_norm": 1.34375, "learning_rate": 1.758344082238225e-05, "loss": 0.5861, "step": 3993 }, { "epoch": 0.6915268910291094, "grad_norm": 1.390625, "learning_rate": 1.7582251807306342e-05, "loss": 0.6342, "step": 3994 }, { "epoch": 0.691700032464019, "grad_norm": 1.375, "learning_rate": 1.758106254001045e-05, "loss": 0.6023, "step": 3995 }, { "epoch": 0.6918731738989287, "grad_norm": 1.3671875, "learning_rate": 1.7579873020534136e-05, "loss": 0.5821, "step": 3996 }, { "epoch": 0.6920463153338383, "grad_norm": 1.2734375, "learning_rate": 1.7578683248916974e-05, "loss": 0.601, "step": 3997 }, { "epoch": 0.692219456768748, "grad_norm": 1.265625, "learning_rate": 1.7577493225198542e-05, "loss": 0.5524, "step": 3998 }, { "epoch": 0.6923925982036576, "grad_norm": 1.265625, "learning_rate": 1.757630294941842e-05, "loss": 0.5312, "step": 3999 }, { "epoch": 0.6925657396385673, "grad_norm": 1.3828125, "learning_rate": 1.7575112421616203e-05, "loss": 0.57, "step": 4000 }, { "epoch": 0.6927388810734769, "grad_norm": 1.28125, "learning_rate": 1.7573921641831496e-05, "loss": 0.587, "step": 4001 }, { "epoch": 0.6929120225083866, "grad_norm": 1.3203125, "learning_rate": 1.7572730610103907e-05, "loss": 0.5861, "step": 4002 }, { "epoch": 0.6930851639432962, "grad_norm": 1.3828125, "learning_rate": 1.7571539326473057e-05, "loss": 0.5805, "step": 4003 }, { "epoch": 0.6932583053782059, "grad_norm": 1.4140625, "learning_rate": 1.7570347790978576e-05, "loss": 0.6781, "step": 4004 }, { "epoch": 0.6934314468131154, "grad_norm": 1.3515625, "learning_rate": 1.756915600366009e-05, "loss": 0.6244, "step": 4005 }, { "epoch": 0.6936045882480251, "grad_norm": 1.3125, "learning_rate": 1.7567963964557255e-05, "loss": 0.5967, "step": 4006 }, { "epoch": 0.6937777296829347, "grad_norm": 1.3671875, "learning_rate": 1.7566771673709718e-05, "loss": 0.6395, "step": 4007 }, { "epoch": 0.6939508711178444, "grad_norm": 1.40625, "learning_rate": 1.756557913115714e-05, "loss": 0.6075, "step": 4008 }, { "epoch": 0.694124012552754, "grad_norm": 1.2578125, "learning_rate": 1.756438633693919e-05, "loss": 0.5616, "step": 4009 }, { "epoch": 0.6942971539876637, "grad_norm": 1.2890625, "learning_rate": 1.756319329109555e-05, "loss": 0.5921, "step": 4010 }, { "epoch": 0.6944702954225733, "grad_norm": 1.3671875, "learning_rate": 1.75619999936659e-05, "loss": 0.5838, "step": 4011 }, { "epoch": 0.694643436857483, "grad_norm": 1.3671875, "learning_rate": 1.756080644468994e-05, "loss": 0.6725, "step": 4012 }, { "epoch": 0.6948165782923926, "grad_norm": 1.28125, "learning_rate": 1.7559612644207364e-05, "loss": 0.5964, "step": 4013 }, { "epoch": 0.6949897197273023, "grad_norm": 1.296875, "learning_rate": 1.7558418592257895e-05, "loss": 0.5623, "step": 4014 }, { "epoch": 0.6951628611622119, "grad_norm": 1.25, "learning_rate": 1.755722428888124e-05, "loss": 0.5594, "step": 4015 }, { "epoch": 0.6953360025971215, "grad_norm": 1.421875, "learning_rate": 1.7556029734117142e-05, "loss": 0.5703, "step": 4016 }, { "epoch": 0.6955091440320311, "grad_norm": 1.3359375, "learning_rate": 1.7554834928005326e-05, "loss": 0.6465, "step": 4017 }, { "epoch": 0.6956822854669408, "grad_norm": 1.3515625, "learning_rate": 1.7553639870585543e-05, "loss": 0.5658, "step": 4018 }, { "epoch": 0.6958554269018504, "grad_norm": 1.390625, "learning_rate": 1.7552444561897536e-05, "loss": 0.5811, "step": 4019 }, { "epoch": 0.6960285683367601, "grad_norm": 1.3125, "learning_rate": 1.7551249001981078e-05, "loss": 0.5697, "step": 4020 }, { "epoch": 0.6962017097716697, "grad_norm": 1.3828125, "learning_rate": 1.7550053190875932e-05, "loss": 0.6226, "step": 4021 }, { "epoch": 0.6963748512065794, "grad_norm": 1.2734375, "learning_rate": 1.7548857128621878e-05, "loss": 0.6502, "step": 4022 }, { "epoch": 0.696547992641489, "grad_norm": 1.3203125, "learning_rate": 1.7547660815258704e-05, "loss": 0.5441, "step": 4023 }, { "epoch": 0.6967211340763987, "grad_norm": 1.2421875, "learning_rate": 1.75464642508262e-05, "loss": 0.5622, "step": 4024 }, { "epoch": 0.6968942755113083, "grad_norm": 1.2109375, "learning_rate": 1.7545267435364173e-05, "loss": 0.5282, "step": 4025 }, { "epoch": 0.697067416946218, "grad_norm": 1.4140625, "learning_rate": 1.7544070368912435e-05, "loss": 0.5874, "step": 4026 }, { "epoch": 0.6972405583811275, "grad_norm": 1.3125, "learning_rate": 1.75428730515108e-05, "loss": 0.6146, "step": 4027 }, { "epoch": 0.6974136998160372, "grad_norm": 1.3671875, "learning_rate": 1.7541675483199106e-05, "loss": 0.5973, "step": 4028 }, { "epoch": 0.6975868412509468, "grad_norm": 1.3828125, "learning_rate": 1.7540477664017187e-05, "loss": 0.6258, "step": 4029 }, { "epoch": 0.6977599826858565, "grad_norm": 1.3515625, "learning_rate": 1.7539279594004883e-05, "loss": 0.5659, "step": 4030 }, { "epoch": 0.6979331241207661, "grad_norm": 1.546875, "learning_rate": 1.7538081273202046e-05, "loss": 0.6374, "step": 4031 }, { "epoch": 0.6981062655556758, "grad_norm": 1.359375, "learning_rate": 1.7536882701648546e-05, "loss": 0.6084, "step": 4032 }, { "epoch": 0.6982794069905854, "grad_norm": 1.2890625, "learning_rate": 1.753568387938424e-05, "loss": 0.6692, "step": 4033 }, { "epoch": 0.6984525484254951, "grad_norm": 1.390625, "learning_rate": 1.7534484806449023e-05, "loss": 0.5485, "step": 4034 }, { "epoch": 0.6986256898604047, "grad_norm": 1.34375, "learning_rate": 1.753328548288277e-05, "loss": 0.6006, "step": 4035 }, { "epoch": 0.6987988312953144, "grad_norm": 1.328125, "learning_rate": 1.7532085908725376e-05, "loss": 0.6638, "step": 4036 }, { "epoch": 0.698971972730224, "grad_norm": 1.40625, "learning_rate": 1.753088608401675e-05, "loss": 0.6079, "step": 4037 }, { "epoch": 0.6991451141651337, "grad_norm": 1.2578125, "learning_rate": 1.75296860087968e-05, "loss": 0.595, "step": 4038 }, { "epoch": 0.6993182556000432, "grad_norm": 1.3203125, "learning_rate": 1.7528485683105444e-05, "loss": 0.5885, "step": 4039 }, { "epoch": 0.6994913970349529, "grad_norm": 1.1796875, "learning_rate": 1.7527285106982616e-05, "loss": 0.5193, "step": 4040 }, { "epoch": 0.6996645384698625, "grad_norm": 1.390625, "learning_rate": 1.7526084280468246e-05, "loss": 0.6684, "step": 4041 }, { "epoch": 0.6998376799047722, "grad_norm": 1.484375, "learning_rate": 1.7524883203602288e-05, "loss": 0.5619, "step": 4042 }, { "epoch": 0.7000108213396818, "grad_norm": 1.25, "learning_rate": 1.7523681876424687e-05, "loss": 0.6297, "step": 4043 }, { "epoch": 0.7001839627745915, "grad_norm": 1.40625, "learning_rate": 1.7522480298975402e-05, "loss": 0.6117, "step": 4044 }, { "epoch": 0.7003571042095011, "grad_norm": 1.4296875, "learning_rate": 1.7521278471294415e-05, "loss": 0.5498, "step": 4045 }, { "epoch": 0.7005302456444108, "grad_norm": 1.2578125, "learning_rate": 1.752007639342169e-05, "loss": 0.5584, "step": 4046 }, { "epoch": 0.7007033870793204, "grad_norm": 1.328125, "learning_rate": 1.7518874065397227e-05, "loss": 0.6474, "step": 4047 }, { "epoch": 0.7008765285142301, "grad_norm": 1.2890625, "learning_rate": 1.7517671487261013e-05, "loss": 0.5791, "step": 4048 }, { "epoch": 0.7010496699491398, "grad_norm": 1.296875, "learning_rate": 1.7516468659053053e-05, "loss": 0.5677, "step": 4049 }, { "epoch": 0.7012228113840493, "grad_norm": 1.2578125, "learning_rate": 1.7515265580813357e-05, "loss": 0.5963, "step": 4050 }, { "epoch": 0.701395952818959, "grad_norm": 1.34375, "learning_rate": 1.7514062252581947e-05, "loss": 0.5945, "step": 4051 }, { "epoch": 0.7015690942538686, "grad_norm": 1.359375, "learning_rate": 1.751285867439885e-05, "loss": 0.6316, "step": 4052 }, { "epoch": 0.7017422356887782, "grad_norm": 1.296875, "learning_rate": 1.75116548463041e-05, "loss": 0.5353, "step": 4053 }, { "epoch": 0.7019153771236879, "grad_norm": 1.34375, "learning_rate": 1.751045076833775e-05, "loss": 0.4939, "step": 4054 }, { "epoch": 0.7020885185585976, "grad_norm": 1.25, "learning_rate": 1.7509246440539844e-05, "loss": 0.6175, "step": 4055 }, { "epoch": 0.7022616599935072, "grad_norm": 1.8046875, "learning_rate": 1.7508041862950446e-05, "loss": 0.6297, "step": 4056 }, { "epoch": 0.7024348014284169, "grad_norm": 1.3359375, "learning_rate": 1.750683703560963e-05, "loss": 0.6508, "step": 4057 }, { "epoch": 0.7026079428633265, "grad_norm": 1.7109375, "learning_rate": 1.750563195855747e-05, "loss": 0.68, "step": 4058 }, { "epoch": 0.7027810842982362, "grad_norm": 1.328125, "learning_rate": 1.750442663183405e-05, "loss": 0.5657, "step": 4059 }, { "epoch": 0.7029542257331458, "grad_norm": 1.2890625, "learning_rate": 1.7503221055479472e-05, "loss": 0.5437, "step": 4060 }, { "epoch": 0.7031273671680554, "grad_norm": 1.34375, "learning_rate": 1.750201522953383e-05, "loss": 0.5966, "step": 4061 }, { "epoch": 0.703300508602965, "grad_norm": 1.3046875, "learning_rate": 1.7500809154037243e-05, "loss": 0.583, "step": 4062 }, { "epoch": 0.7034736500378747, "grad_norm": 1.375, "learning_rate": 1.7499602829029828e-05, "loss": 0.6102, "step": 4063 }, { "epoch": 0.7036467914727843, "grad_norm": 1.453125, "learning_rate": 1.7498396254551708e-05, "loss": 0.6097, "step": 4064 }, { "epoch": 0.703819932907694, "grad_norm": 1.34375, "learning_rate": 1.7497189430643025e-05, "loss": 0.5789, "step": 4065 }, { "epoch": 0.7039930743426036, "grad_norm": 1.2890625, "learning_rate": 1.7495982357343923e-05, "loss": 0.5947, "step": 4066 }, { "epoch": 0.7041662157775133, "grad_norm": 1.34375, "learning_rate": 1.749477503469455e-05, "loss": 0.5468, "step": 4067 }, { "epoch": 0.7043393572124229, "grad_norm": 1.375, "learning_rate": 1.7493567462735073e-05, "loss": 0.6112, "step": 4068 }, { "epoch": 0.7045124986473326, "grad_norm": 1.359375, "learning_rate": 1.7492359641505658e-05, "loss": 0.6144, "step": 4069 }, { "epoch": 0.7046856400822422, "grad_norm": 1.4453125, "learning_rate": 1.7491151571046484e-05, "loss": 0.5466, "step": 4070 }, { "epoch": 0.7048587815171519, "grad_norm": 1.4453125, "learning_rate": 1.7489943251397737e-05, "loss": 0.6412, "step": 4071 }, { "epoch": 0.7050319229520615, "grad_norm": 1.3671875, "learning_rate": 1.7488734682599607e-05, "loss": 0.5683, "step": 4072 }, { "epoch": 0.7052050643869711, "grad_norm": 1.2890625, "learning_rate": 1.7487525864692303e-05, "loss": 0.5873, "step": 4073 }, { "epoch": 0.7053782058218807, "grad_norm": 1.3125, "learning_rate": 1.7486316797716032e-05, "loss": 0.6067, "step": 4074 }, { "epoch": 0.7055513472567904, "grad_norm": 1.28125, "learning_rate": 1.7485107481711014e-05, "loss": 0.5958, "step": 4075 }, { "epoch": 0.7057244886917, "grad_norm": 1.296875, "learning_rate": 1.7483897916717473e-05, "loss": 0.5984, "step": 4076 }, { "epoch": 0.7058976301266097, "grad_norm": 1.2890625, "learning_rate": 1.7482688102775652e-05, "loss": 0.5904, "step": 4077 }, { "epoch": 0.7060707715615193, "grad_norm": 1.421875, "learning_rate": 1.7481478039925784e-05, "loss": 0.5637, "step": 4078 }, { "epoch": 0.706243912996429, "grad_norm": 1.3203125, "learning_rate": 1.7480267728208132e-05, "loss": 0.5304, "step": 4079 }, { "epoch": 0.7064170544313386, "grad_norm": 1.328125, "learning_rate": 1.747905716766295e-05, "loss": 0.5522, "step": 4080 }, { "epoch": 0.7065901958662483, "grad_norm": 1.296875, "learning_rate": 1.7477846358330512e-05, "loss": 0.6191, "step": 4081 }, { "epoch": 0.7067633373011579, "grad_norm": 1.21875, "learning_rate": 1.747663530025109e-05, "loss": 0.5875, "step": 4082 }, { "epoch": 0.7069364787360676, "grad_norm": 1.296875, "learning_rate": 1.747542399346497e-05, "loss": 0.6269, "step": 4083 }, { "epoch": 0.7071096201709771, "grad_norm": 1.3203125, "learning_rate": 1.7474212438012445e-05, "loss": 0.5844, "step": 4084 }, { "epoch": 0.7072827616058868, "grad_norm": 1.2578125, "learning_rate": 1.747300063393382e-05, "loss": 0.5673, "step": 4085 }, { "epoch": 0.7074559030407964, "grad_norm": 1.4296875, "learning_rate": 1.7471788581269402e-05, "loss": 0.6379, "step": 4086 }, { "epoch": 0.7076290444757061, "grad_norm": 1.34375, "learning_rate": 1.7470576280059514e-05, "loss": 0.5903, "step": 4087 }, { "epoch": 0.7078021859106157, "grad_norm": 1.3203125, "learning_rate": 1.7469363730344477e-05, "loss": 0.6015, "step": 4088 }, { "epoch": 0.7079753273455254, "grad_norm": 1.296875, "learning_rate": 1.746815093216463e-05, "loss": 0.5936, "step": 4089 }, { "epoch": 0.708148468780435, "grad_norm": 1.421875, "learning_rate": 1.7466937885560314e-05, "loss": 0.6166, "step": 4090 }, { "epoch": 0.7083216102153447, "grad_norm": 1.3671875, "learning_rate": 1.746572459057188e-05, "loss": 0.5775, "step": 4091 }, { "epoch": 0.7084947516502543, "grad_norm": 1.2265625, "learning_rate": 1.7464511047239687e-05, "loss": 0.5038, "step": 4092 }, { "epoch": 0.708667893085164, "grad_norm": 1.3359375, "learning_rate": 1.7463297255604106e-05, "loss": 0.575, "step": 4093 }, { "epoch": 0.7088410345200736, "grad_norm": 1.3203125, "learning_rate": 1.746208321570551e-05, "loss": 0.7236, "step": 4094 }, { "epoch": 0.7090141759549832, "grad_norm": 1.3359375, "learning_rate": 1.7460868927584286e-05, "loss": 0.563, "step": 4095 }, { "epoch": 0.7091873173898928, "grad_norm": 1.375, "learning_rate": 1.7459654391280828e-05, "loss": 0.5579, "step": 4096 }, { "epoch": 0.7093604588248025, "grad_norm": 1.34375, "learning_rate": 1.7458439606835534e-05, "loss": 0.5736, "step": 4097 }, { "epoch": 0.7095336002597121, "grad_norm": 1.375, "learning_rate": 1.7457224574288814e-05, "loss": 0.6021, "step": 4098 }, { "epoch": 0.7097067416946218, "grad_norm": 1.34375, "learning_rate": 1.7456009293681083e-05, "loss": 0.5822, "step": 4099 }, { "epoch": 0.7098798831295314, "grad_norm": 1.34375, "learning_rate": 1.745479376505277e-05, "loss": 0.613, "step": 4100 }, { "epoch": 0.7100530245644411, "grad_norm": 1.2734375, "learning_rate": 1.745357798844431e-05, "loss": 0.5205, "step": 4101 }, { "epoch": 0.7102261659993507, "grad_norm": 1.28125, "learning_rate": 1.7452361963896144e-05, "loss": 0.6145, "step": 4102 }, { "epoch": 0.7103993074342604, "grad_norm": 1.28125, "learning_rate": 1.745114569144872e-05, "loss": 0.5787, "step": 4103 }, { "epoch": 0.71057244886917, "grad_norm": 1.328125, "learning_rate": 1.7449929171142495e-05, "loss": 0.5735, "step": 4104 }, { "epoch": 0.7107455903040797, "grad_norm": 1.296875, "learning_rate": 1.7448712403017945e-05, "loss": 0.5084, "step": 4105 }, { "epoch": 0.7109187317389893, "grad_norm": 1.3515625, "learning_rate": 1.744749538711554e-05, "loss": 0.5379, "step": 4106 }, { "epoch": 0.7110918731738989, "grad_norm": 1.390625, "learning_rate": 1.744627812347576e-05, "loss": 0.6314, "step": 4107 }, { "epoch": 0.7112650146088085, "grad_norm": 1.4140625, "learning_rate": 1.74450606121391e-05, "loss": 0.579, "step": 4108 }, { "epoch": 0.7114381560437182, "grad_norm": 1.2421875, "learning_rate": 1.744384285314606e-05, "loss": 0.5868, "step": 4109 }, { "epoch": 0.7116112974786278, "grad_norm": 1.4765625, "learning_rate": 1.7442624846537152e-05, "loss": 0.634, "step": 4110 }, { "epoch": 0.7117844389135375, "grad_norm": 1.46875, "learning_rate": 1.744140659235288e-05, "loss": 0.5994, "step": 4111 }, { "epoch": 0.7119575803484471, "grad_norm": 1.2890625, "learning_rate": 1.7440188090633786e-05, "loss": 0.6221, "step": 4112 }, { "epoch": 0.7121307217833568, "grad_norm": 1.2734375, "learning_rate": 1.743896934142039e-05, "loss": 0.6166, "step": 4113 }, { "epoch": 0.7123038632182664, "grad_norm": 1.4296875, "learning_rate": 1.7437750344753235e-05, "loss": 0.6116, "step": 4114 }, { "epoch": 0.7124770046531761, "grad_norm": 1.3828125, "learning_rate": 1.7436531100672874e-05, "loss": 0.5951, "step": 4115 }, { "epoch": 0.7126501460880857, "grad_norm": 1.390625, "learning_rate": 1.7435311609219864e-05, "loss": 0.6256, "step": 4116 }, { "epoch": 0.7128232875229954, "grad_norm": 1.234375, "learning_rate": 1.7434091870434772e-05, "loss": 0.5757, "step": 4117 }, { "epoch": 0.7129964289579049, "grad_norm": 1.5078125, "learning_rate": 1.7432871884358167e-05, "loss": 0.6043, "step": 4118 }, { "epoch": 0.7131695703928146, "grad_norm": 1.3203125, "learning_rate": 1.7431651651030635e-05, "loss": 0.5613, "step": 4119 }, { "epoch": 0.7133427118277242, "grad_norm": 1.4375, "learning_rate": 1.7430431170492763e-05, "loss": 0.6743, "step": 4120 }, { "epoch": 0.7135158532626339, "grad_norm": 1.3203125, "learning_rate": 1.7429210442785155e-05, "loss": 0.5474, "step": 4121 }, { "epoch": 0.7136889946975435, "grad_norm": 1.34375, "learning_rate": 1.7427989467948413e-05, "loss": 0.5779, "step": 4122 }, { "epoch": 0.7138621361324532, "grad_norm": 1.46875, "learning_rate": 1.7426768246023155e-05, "loss": 0.5837, "step": 4123 }, { "epoch": 0.7140352775673628, "grad_norm": 1.34375, "learning_rate": 1.7425546777050005e-05, "loss": 0.5422, "step": 4124 }, { "epoch": 0.7142084190022725, "grad_norm": 1.3046875, "learning_rate": 1.7424325061069592e-05, "loss": 0.5397, "step": 4125 }, { "epoch": 0.7143815604371822, "grad_norm": 1.2421875, "learning_rate": 1.7423103098122557e-05, "loss": 0.6095, "step": 4126 }, { "epoch": 0.7145547018720918, "grad_norm": 1.25, "learning_rate": 1.742188088824955e-05, "loss": 0.57, "step": 4127 }, { "epoch": 0.7147278433070015, "grad_norm": 1.3203125, "learning_rate": 1.7420658431491224e-05, "loss": 0.5615, "step": 4128 }, { "epoch": 0.714900984741911, "grad_norm": 1.328125, "learning_rate": 1.7419435727888243e-05, "loss": 0.5696, "step": 4129 }, { "epoch": 0.7150741261768206, "grad_norm": 1.2265625, "learning_rate": 1.741821277748128e-05, "loss": 0.5911, "step": 4130 }, { "epoch": 0.7152472676117303, "grad_norm": 1.359375, "learning_rate": 1.741698958031102e-05, "loss": 0.576, "step": 4131 }, { "epoch": 0.71542040904664, "grad_norm": 1.328125, "learning_rate": 1.741576613641815e-05, "loss": 0.5923, "step": 4132 }, { "epoch": 0.7155935504815496, "grad_norm": 1.2421875, "learning_rate": 1.7414542445843367e-05, "loss": 0.5336, "step": 4133 }, { "epoch": 0.7157666919164593, "grad_norm": 1.328125, "learning_rate": 1.7413318508627375e-05, "loss": 0.6603, "step": 4134 }, { "epoch": 0.7159398333513689, "grad_norm": 1.390625, "learning_rate": 1.741209432481089e-05, "loss": 0.5507, "step": 4135 }, { "epoch": 0.7161129747862786, "grad_norm": 1.296875, "learning_rate": 1.741086989443463e-05, "loss": 0.5341, "step": 4136 }, { "epoch": 0.7162861162211882, "grad_norm": 1.390625, "learning_rate": 1.740964521753933e-05, "loss": 0.6689, "step": 4137 }, { "epoch": 0.7164592576560979, "grad_norm": 1.3671875, "learning_rate": 1.7408420294165722e-05, "loss": 0.6558, "step": 4138 }, { "epoch": 0.7166323990910075, "grad_norm": 1.3828125, "learning_rate": 1.7407195124354562e-05, "loss": 0.5801, "step": 4139 }, { "epoch": 0.7168055405259172, "grad_norm": 1.2734375, "learning_rate": 1.7405969708146596e-05, "loss": 0.5785, "step": 4140 }, { "epoch": 0.7169786819608267, "grad_norm": 1.4765625, "learning_rate": 1.740474404558259e-05, "loss": 0.6383, "step": 4141 }, { "epoch": 0.7171518233957364, "grad_norm": 1.34375, "learning_rate": 1.7403518136703318e-05, "loss": 0.5895, "step": 4142 }, { "epoch": 0.717324964830646, "grad_norm": 1.296875, "learning_rate": 1.740229198154955e-05, "loss": 0.6227, "step": 4143 }, { "epoch": 0.7174981062655557, "grad_norm": 1.2734375, "learning_rate": 1.7401065580162084e-05, "loss": 0.5786, "step": 4144 }, { "epoch": 0.7176712477004653, "grad_norm": 1.421875, "learning_rate": 1.7399838932581713e-05, "loss": 0.6031, "step": 4145 }, { "epoch": 0.717844389135375, "grad_norm": 1.265625, "learning_rate": 1.7398612038849237e-05, "loss": 0.5474, "step": 4146 }, { "epoch": 0.7180175305702846, "grad_norm": 1.34375, "learning_rate": 1.7397384899005473e-05, "loss": 0.6815, "step": 4147 }, { "epoch": 0.7181906720051943, "grad_norm": 1.3203125, "learning_rate": 1.7396157513091235e-05, "loss": 0.5361, "step": 4148 }, { "epoch": 0.7183638134401039, "grad_norm": 1.3828125, "learning_rate": 1.7394929881147356e-05, "loss": 0.64, "step": 4149 }, { "epoch": 0.7185369548750136, "grad_norm": 1.375, "learning_rate": 1.7393702003214674e-05, "loss": 0.5943, "step": 4150 }, { "epoch": 0.7187100963099232, "grad_norm": 1.1953125, "learning_rate": 1.739247387933403e-05, "loss": 0.5578, "step": 4151 }, { "epoch": 0.7188832377448328, "grad_norm": 1.359375, "learning_rate": 1.7391245509546276e-05, "loss": 0.6354, "step": 4152 }, { "epoch": 0.7190563791797424, "grad_norm": 1.265625, "learning_rate": 1.7390016893892282e-05, "loss": 0.5247, "step": 4153 }, { "epoch": 0.7192295206146521, "grad_norm": 1.3125, "learning_rate": 1.7388788032412904e-05, "loss": 0.5977, "step": 4154 }, { "epoch": 0.7194026620495617, "grad_norm": 1.2734375, "learning_rate": 1.7387558925149027e-05, "loss": 0.5769, "step": 4155 }, { "epoch": 0.7195758034844714, "grad_norm": 1.34375, "learning_rate": 1.738632957214154e-05, "loss": 0.5906, "step": 4156 }, { "epoch": 0.719748944919381, "grad_norm": 1.5, "learning_rate": 1.738509997343133e-05, "loss": 0.5877, "step": 4157 }, { "epoch": 0.7199220863542907, "grad_norm": 1.2890625, "learning_rate": 1.7383870129059303e-05, "loss": 0.5784, "step": 4158 }, { "epoch": 0.7200952277892003, "grad_norm": 1.3984375, "learning_rate": 1.7382640039066367e-05, "loss": 0.6582, "step": 4159 }, { "epoch": 0.72026836922411, "grad_norm": 1.3046875, "learning_rate": 1.738140970349344e-05, "loss": 0.5767, "step": 4160 }, { "epoch": 0.7204415106590196, "grad_norm": 1.4609375, "learning_rate": 1.7380179122381454e-05, "loss": 0.5441, "step": 4161 }, { "epoch": 0.7206146520939293, "grad_norm": 1.296875, "learning_rate": 1.7378948295771337e-05, "loss": 0.6017, "step": 4162 }, { "epoch": 0.7207877935288388, "grad_norm": 1.4296875, "learning_rate": 1.7377717223704035e-05, "loss": 0.5995, "step": 4163 }, { "epoch": 0.7209609349637485, "grad_norm": 1.4609375, "learning_rate": 1.7376485906220497e-05, "loss": 0.5696, "step": 4164 }, { "epoch": 0.7211340763986581, "grad_norm": 1.328125, "learning_rate": 1.7375254343361685e-05, "loss": 0.5996, "step": 4165 }, { "epoch": 0.7213072178335678, "grad_norm": 1.2421875, "learning_rate": 1.7374022535168566e-05, "loss": 0.5201, "step": 4166 }, { "epoch": 0.7214803592684774, "grad_norm": 1.2734375, "learning_rate": 1.737279048168211e-05, "loss": 0.5861, "step": 4167 }, { "epoch": 0.7216535007033871, "grad_norm": 1.296875, "learning_rate": 1.7371558182943308e-05, "loss": 0.6287, "step": 4168 }, { "epoch": 0.7218266421382967, "grad_norm": 1.34375, "learning_rate": 1.737032563899315e-05, "loss": 0.6288, "step": 4169 }, { "epoch": 0.7219997835732064, "grad_norm": 1.3515625, "learning_rate": 1.7369092849872636e-05, "loss": 0.5699, "step": 4170 }, { "epoch": 0.722172925008116, "grad_norm": 1.3515625, "learning_rate": 1.736785981562277e-05, "loss": 0.6567, "step": 4171 }, { "epoch": 0.7223460664430257, "grad_norm": 1.4609375, "learning_rate": 1.736662653628457e-05, "loss": 0.5899, "step": 4172 }, { "epoch": 0.7225192078779353, "grad_norm": 1.3125, "learning_rate": 1.7365393011899067e-05, "loss": 0.4951, "step": 4173 }, { "epoch": 0.722692349312845, "grad_norm": 1.34375, "learning_rate": 1.7364159242507285e-05, "loss": 0.5842, "step": 4174 }, { "epoch": 0.7228654907477545, "grad_norm": 1.203125, "learning_rate": 1.736292522815027e-05, "loss": 0.5066, "step": 4175 }, { "epoch": 0.7230386321826642, "grad_norm": 1.359375, "learning_rate": 1.7361690968869068e-05, "loss": 0.6335, "step": 4176 }, { "epoch": 0.7232117736175738, "grad_norm": 1.3515625, "learning_rate": 1.736045646470474e-05, "loss": 0.6113, "step": 4177 }, { "epoch": 0.7233849150524835, "grad_norm": 1.421875, "learning_rate": 1.7359221715698343e-05, "loss": 0.5788, "step": 4178 }, { "epoch": 0.7235580564873931, "grad_norm": 1.4375, "learning_rate": 1.7357986721890962e-05, "loss": 0.6184, "step": 4179 }, { "epoch": 0.7237311979223028, "grad_norm": 1.28125, "learning_rate": 1.7356751483323666e-05, "loss": 0.5471, "step": 4180 }, { "epoch": 0.7239043393572124, "grad_norm": 1.3359375, "learning_rate": 1.7355516000037555e-05, "loss": 0.6154, "step": 4181 }, { "epoch": 0.7240774807921221, "grad_norm": 1.2421875, "learning_rate": 1.7354280272073718e-05, "loss": 0.5512, "step": 4182 }, { "epoch": 0.7242506222270317, "grad_norm": 1.375, "learning_rate": 1.735304429947327e-05, "loss": 0.5869, "step": 4183 }, { "epoch": 0.7244237636619414, "grad_norm": 1.2890625, "learning_rate": 1.735180808227732e-05, "loss": 0.5772, "step": 4184 }, { "epoch": 0.724596905096851, "grad_norm": 1.3984375, "learning_rate": 1.735057162052699e-05, "loss": 0.5846, "step": 4185 }, { "epoch": 0.7247700465317606, "grad_norm": 1.328125, "learning_rate": 1.734933491426341e-05, "loss": 0.5124, "step": 4186 }, { "epoch": 0.7249431879666702, "grad_norm": 1.2578125, "learning_rate": 1.7348097963527715e-05, "loss": 0.6169, "step": 4187 }, { "epoch": 0.7251163294015799, "grad_norm": 1.1796875, "learning_rate": 1.7346860768361065e-05, "loss": 0.5529, "step": 4188 }, { "epoch": 0.7252894708364895, "grad_norm": 1.34375, "learning_rate": 1.73456233288046e-05, "loss": 0.636, "step": 4189 }, { "epoch": 0.7254626122713992, "grad_norm": 1.34375, "learning_rate": 1.734438564489949e-05, "loss": 0.5935, "step": 4190 }, { "epoch": 0.7256357537063088, "grad_norm": 1.3125, "learning_rate": 1.7343147716686905e-05, "loss": 0.5165, "step": 4191 }, { "epoch": 0.7258088951412185, "grad_norm": 1.3359375, "learning_rate": 1.7341909544208022e-05, "loss": 0.6799, "step": 4192 }, { "epoch": 0.7259820365761281, "grad_norm": 1.3359375, "learning_rate": 1.734067112750403e-05, "loss": 0.6379, "step": 4193 }, { "epoch": 0.7261551780110378, "grad_norm": 1.453125, "learning_rate": 1.7339432466616125e-05, "loss": 0.5928, "step": 4194 }, { "epoch": 0.7263283194459474, "grad_norm": 1.34375, "learning_rate": 1.7338193561585507e-05, "loss": 0.6519, "step": 4195 }, { "epoch": 0.7265014608808571, "grad_norm": 1.28125, "learning_rate": 1.7336954412453394e-05, "loss": 0.5789, "step": 4196 }, { "epoch": 0.7266746023157666, "grad_norm": 1.3359375, "learning_rate": 1.7335715019261002e-05, "loss": 0.6286, "step": 4197 }, { "epoch": 0.7268477437506763, "grad_norm": 1.3046875, "learning_rate": 1.733447538204956e-05, "loss": 0.6051, "step": 4198 }, { "epoch": 0.7270208851855859, "grad_norm": 1.390625, "learning_rate": 1.7333235500860297e-05, "loss": 0.5976, "step": 4199 }, { "epoch": 0.7271940266204956, "grad_norm": 1.265625, "learning_rate": 1.7331995375734463e-05, "loss": 0.5653, "step": 4200 }, { "epoch": 0.7273671680554052, "grad_norm": 1.25, "learning_rate": 1.7330755006713312e-05, "loss": 0.6218, "step": 4201 }, { "epoch": 0.7275403094903149, "grad_norm": 1.3046875, "learning_rate": 1.73295143938381e-05, "loss": 0.5689, "step": 4202 }, { "epoch": 0.7277134509252245, "grad_norm": 1.390625, "learning_rate": 1.73282735371501e-05, "loss": 0.6095, "step": 4203 }, { "epoch": 0.7278865923601342, "grad_norm": 1.2890625, "learning_rate": 1.732703243669059e-05, "loss": 0.556, "step": 4204 }, { "epoch": 0.7280597337950439, "grad_norm": 1.4609375, "learning_rate": 1.7325791092500844e-05, "loss": 0.7226, "step": 4205 }, { "epoch": 0.7282328752299535, "grad_norm": 1.359375, "learning_rate": 1.7324549504622166e-05, "loss": 0.5586, "step": 4206 }, { "epoch": 0.7284060166648632, "grad_norm": 1.328125, "learning_rate": 1.732330767309585e-05, "loss": 0.6377, "step": 4207 }, { "epoch": 0.7285791580997728, "grad_norm": 1.359375, "learning_rate": 1.7322065597963206e-05, "loss": 0.5714, "step": 4208 }, { "epoch": 0.7287522995346823, "grad_norm": 1.265625, "learning_rate": 1.7320823279265555e-05, "loss": 0.5606, "step": 4209 }, { "epoch": 0.728925440969592, "grad_norm": 1.2890625, "learning_rate": 1.7319580717044216e-05, "loss": 0.6414, "step": 4210 }, { "epoch": 0.7290985824045016, "grad_norm": 1.3046875, "learning_rate": 1.7318337911340527e-05, "loss": 0.5308, "step": 4211 }, { "epoch": 0.7292717238394113, "grad_norm": 1.328125, "learning_rate": 1.731709486219583e-05, "loss": 0.6134, "step": 4212 }, { "epoch": 0.729444865274321, "grad_norm": 1.4140625, "learning_rate": 1.7315851569651467e-05, "loss": 0.5336, "step": 4213 }, { "epoch": 0.7296180067092306, "grad_norm": 1.421875, "learning_rate": 1.7314608033748805e-05, "loss": 0.6175, "step": 4214 }, { "epoch": 0.7297911481441403, "grad_norm": 1.3125, "learning_rate": 1.7313364254529207e-05, "loss": 0.5931, "step": 4215 }, { "epoch": 0.7299642895790499, "grad_norm": 1.359375, "learning_rate": 1.731212023203404e-05, "loss": 0.6253, "step": 4216 }, { "epoch": 0.7301374310139596, "grad_norm": 1.5390625, "learning_rate": 1.7310875966304696e-05, "loss": 0.556, "step": 4217 }, { "epoch": 0.7303105724488692, "grad_norm": 1.28125, "learning_rate": 1.7309631457382558e-05, "loss": 0.6659, "step": 4218 }, { "epoch": 0.7304837138837789, "grad_norm": 1.4453125, "learning_rate": 1.7308386705309024e-05, "loss": 0.5444, "step": 4219 }, { "epoch": 0.7306568553186884, "grad_norm": 1.46875, "learning_rate": 1.7307141710125502e-05, "loss": 0.553, "step": 4220 }, { "epoch": 0.730829996753598, "grad_norm": 1.2578125, "learning_rate": 1.730589647187341e-05, "loss": 0.5265, "step": 4221 }, { "epoch": 0.7310031381885077, "grad_norm": 1.359375, "learning_rate": 1.7304650990594163e-05, "loss": 0.5815, "step": 4222 }, { "epoch": 0.7311762796234174, "grad_norm": 1.265625, "learning_rate": 1.7303405266329196e-05, "loss": 0.5844, "step": 4223 }, { "epoch": 0.731349421058327, "grad_norm": 1.4453125, "learning_rate": 1.7302159299119944e-05, "loss": 0.6155, "step": 4224 }, { "epoch": 0.7315225624932367, "grad_norm": 1.453125, "learning_rate": 1.7300913089007856e-05, "loss": 0.6488, "step": 4225 }, { "epoch": 0.7316957039281463, "grad_norm": 1.4453125, "learning_rate": 1.729966663603439e-05, "loss": 0.6141, "step": 4226 }, { "epoch": 0.731868845363056, "grad_norm": 1.296875, "learning_rate": 1.7298419940241004e-05, "loss": 0.5886, "step": 4227 }, { "epoch": 0.7320419867979656, "grad_norm": 1.3984375, "learning_rate": 1.7297173001669167e-05, "loss": 0.6572, "step": 4228 }, { "epoch": 0.7322151282328753, "grad_norm": 1.15625, "learning_rate": 1.729592582036036e-05, "loss": 0.4959, "step": 4229 }, { "epoch": 0.7323882696677849, "grad_norm": 1.2890625, "learning_rate": 1.7294678396356076e-05, "loss": 0.5464, "step": 4230 }, { "epoch": 0.7325614111026945, "grad_norm": 1.40625, "learning_rate": 1.7293430729697798e-05, "loss": 0.5753, "step": 4231 }, { "epoch": 0.7327345525376041, "grad_norm": 1.515625, "learning_rate": 1.729218282042704e-05, "loss": 0.6582, "step": 4232 }, { "epoch": 0.7329076939725138, "grad_norm": 1.4921875, "learning_rate": 1.7290934668585305e-05, "loss": 0.5867, "step": 4233 }, { "epoch": 0.7330808354074234, "grad_norm": 1.2421875, "learning_rate": 1.7289686274214116e-05, "loss": 0.515, "step": 4234 }, { "epoch": 0.7332539768423331, "grad_norm": 1.4140625, "learning_rate": 1.7288437637355003e-05, "loss": 0.5235, "step": 4235 }, { "epoch": 0.7334271182772427, "grad_norm": 1.3671875, "learning_rate": 1.7287188758049493e-05, "loss": 0.6841, "step": 4236 }, { "epoch": 0.7336002597121524, "grad_norm": 1.3828125, "learning_rate": 1.7285939636339136e-05, "loss": 0.5901, "step": 4237 }, { "epoch": 0.733773401147062, "grad_norm": 2.03125, "learning_rate": 1.7284690272265487e-05, "loss": 0.6662, "step": 4238 }, { "epoch": 0.7339465425819717, "grad_norm": 1.609375, "learning_rate": 1.7283440665870097e-05, "loss": 0.6477, "step": 4239 }, { "epoch": 0.7341196840168813, "grad_norm": 1.34375, "learning_rate": 1.7282190817194536e-05, "loss": 0.5426, "step": 4240 }, { "epoch": 0.734292825451791, "grad_norm": 1.3125, "learning_rate": 1.7280940726280383e-05, "loss": 0.6296, "step": 4241 }, { "epoch": 0.7344659668867006, "grad_norm": 1.25, "learning_rate": 1.7279690393169217e-05, "loss": 0.5333, "step": 4242 }, { "epoch": 0.7346391083216102, "grad_norm": 1.4296875, "learning_rate": 1.7278439817902634e-05, "loss": 0.6084, "step": 4243 }, { "epoch": 0.7348122497565198, "grad_norm": 1.2890625, "learning_rate": 1.727718900052223e-05, "loss": 0.5848, "step": 4244 }, { "epoch": 0.7349853911914295, "grad_norm": 1.390625, "learning_rate": 1.7275937941069616e-05, "loss": 0.5488, "step": 4245 }, { "epoch": 0.7351585326263391, "grad_norm": 1.390625, "learning_rate": 1.7274686639586407e-05, "loss": 0.5995, "step": 4246 }, { "epoch": 0.7353316740612488, "grad_norm": 1.296875, "learning_rate": 1.7273435096114223e-05, "loss": 0.6152, "step": 4247 }, { "epoch": 0.7355048154961584, "grad_norm": 1.2890625, "learning_rate": 1.7272183310694704e-05, "loss": 0.5386, "step": 4248 }, { "epoch": 0.7356779569310681, "grad_norm": 1.359375, "learning_rate": 1.7270931283369484e-05, "loss": 0.6594, "step": 4249 }, { "epoch": 0.7358510983659777, "grad_norm": 1.265625, "learning_rate": 1.7269679014180213e-05, "loss": 0.5225, "step": 4250 }, { "epoch": 0.7360242398008874, "grad_norm": 1.3515625, "learning_rate": 1.7268426503168545e-05, "loss": 0.523, "step": 4251 }, { "epoch": 0.736197381235797, "grad_norm": 1.390625, "learning_rate": 1.7267173750376146e-05, "loss": 0.5715, "step": 4252 }, { "epoch": 0.7363705226707067, "grad_norm": 1.2421875, "learning_rate": 1.7265920755844692e-05, "loss": 0.5573, "step": 4253 }, { "epoch": 0.7365436641056162, "grad_norm": 1.40625, "learning_rate": 1.7264667519615855e-05, "loss": 0.6441, "step": 4254 }, { "epoch": 0.7367168055405259, "grad_norm": 1.328125, "learning_rate": 1.726341404173133e-05, "loss": 0.6614, "step": 4255 }, { "epoch": 0.7368899469754355, "grad_norm": 1.3203125, "learning_rate": 1.726216032223281e-05, "loss": 0.5395, "step": 4256 }, { "epoch": 0.7370630884103452, "grad_norm": 1.2109375, "learning_rate": 1.7260906361162003e-05, "loss": 0.5555, "step": 4257 }, { "epoch": 0.7372362298452548, "grad_norm": 1.25, "learning_rate": 1.7259652158560616e-05, "loss": 0.5801, "step": 4258 }, { "epoch": 0.7374093712801645, "grad_norm": 1.703125, "learning_rate": 1.7258397714470376e-05, "loss": 0.6414, "step": 4259 }, { "epoch": 0.7375825127150741, "grad_norm": 1.484375, "learning_rate": 1.7257143028933004e-05, "loss": 0.6009, "step": 4260 }, { "epoch": 0.7377556541499838, "grad_norm": 1.265625, "learning_rate": 1.725588810199024e-05, "loss": 0.6044, "step": 4261 }, { "epoch": 0.7379287955848934, "grad_norm": 1.3125, "learning_rate": 1.7254632933683835e-05, "loss": 0.6224, "step": 4262 }, { "epoch": 0.7381019370198031, "grad_norm": 1.28125, "learning_rate": 1.7253377524055527e-05, "loss": 0.5958, "step": 4263 }, { "epoch": 0.7382750784547127, "grad_norm": 1.4765625, "learning_rate": 1.7252121873147093e-05, "loss": 0.5952, "step": 4264 }, { "epoch": 0.7384482198896223, "grad_norm": 1.265625, "learning_rate": 1.725086598100029e-05, "loss": 0.5335, "step": 4265 }, { "epoch": 0.7386213613245319, "grad_norm": 1.21875, "learning_rate": 1.72496098476569e-05, "loss": 0.5949, "step": 4266 }, { "epoch": 0.7387945027594416, "grad_norm": 1.28125, "learning_rate": 1.72483534731587e-05, "loss": 0.5582, "step": 4267 }, { "epoch": 0.7389676441943512, "grad_norm": 1.265625, "learning_rate": 1.7247096857547495e-05, "loss": 0.5948, "step": 4268 }, { "epoch": 0.7391407856292609, "grad_norm": 1.4765625, "learning_rate": 1.7245840000865074e-05, "loss": 0.6039, "step": 4269 }, { "epoch": 0.7393139270641705, "grad_norm": 1.359375, "learning_rate": 1.724458290315326e-05, "loss": 0.5102, "step": 4270 }, { "epoch": 0.7394870684990802, "grad_norm": 1.328125, "learning_rate": 1.7243325564453855e-05, "loss": 0.6132, "step": 4271 }, { "epoch": 0.7396602099339898, "grad_norm": 1.3125, "learning_rate": 1.724206798480869e-05, "loss": 0.5464, "step": 4272 }, { "epoch": 0.7398333513688995, "grad_norm": 1.34375, "learning_rate": 1.7240810164259597e-05, "loss": 0.5599, "step": 4273 }, { "epoch": 0.7400064928038091, "grad_norm": 1.515625, "learning_rate": 1.723955210284842e-05, "loss": 0.5896, "step": 4274 }, { "epoch": 0.7401796342387188, "grad_norm": 1.25, "learning_rate": 1.7238293800617e-05, "loss": 0.5692, "step": 4275 }, { "epoch": 0.7403527756736284, "grad_norm": 1.3671875, "learning_rate": 1.7237035257607202e-05, "loss": 0.677, "step": 4276 }, { "epoch": 0.740525917108538, "grad_norm": 1.515625, "learning_rate": 1.723577647386089e-05, "loss": 0.5878, "step": 4277 }, { "epoch": 0.7406990585434476, "grad_norm": 1.1953125, "learning_rate": 1.723451744941993e-05, "loss": 0.5496, "step": 4278 }, { "epoch": 0.7408721999783573, "grad_norm": 1.3515625, "learning_rate": 1.7233258184326212e-05, "loss": 0.6241, "step": 4279 }, { "epoch": 0.741045341413267, "grad_norm": 1.4375, "learning_rate": 1.7231998678621616e-05, "loss": 0.7011, "step": 4280 }, { "epoch": 0.7412184828481766, "grad_norm": 1.2890625, "learning_rate": 1.723073893234805e-05, "loss": 0.5143, "step": 4281 }, { "epoch": 0.7413916242830862, "grad_norm": 1.421875, "learning_rate": 1.7229478945547404e-05, "loss": 0.5471, "step": 4282 }, { "epoch": 0.7415647657179959, "grad_norm": 1.3046875, "learning_rate": 1.7228218718261603e-05, "loss": 0.5752, "step": 4283 }, { "epoch": 0.7417379071529056, "grad_norm": 1.5390625, "learning_rate": 1.722695825053256e-05, "loss": 0.7576, "step": 4284 }, { "epoch": 0.7419110485878152, "grad_norm": 1.2265625, "learning_rate": 1.7225697542402212e-05, "loss": 0.6132, "step": 4285 }, { "epoch": 0.7420841900227249, "grad_norm": 1.296875, "learning_rate": 1.722443659391249e-05, "loss": 0.6058, "step": 4286 }, { "epoch": 0.7422573314576345, "grad_norm": 1.4375, "learning_rate": 1.722317540510534e-05, "loss": 0.5663, "step": 4287 }, { "epoch": 0.742430472892544, "grad_norm": 1.3515625, "learning_rate": 1.7221913976022713e-05, "loss": 0.6429, "step": 4288 }, { "epoch": 0.7426036143274537, "grad_norm": 1.3671875, "learning_rate": 1.7220652306706576e-05, "loss": 0.5609, "step": 4289 }, { "epoch": 0.7427767557623633, "grad_norm": 1.3984375, "learning_rate": 1.721939039719889e-05, "loss": 0.7313, "step": 4290 }, { "epoch": 0.742949897197273, "grad_norm": 1.40625, "learning_rate": 1.7218128247541642e-05, "loss": 0.6516, "step": 4291 }, { "epoch": 0.7431230386321827, "grad_norm": 1.40625, "learning_rate": 1.7216865857776803e-05, "loss": 0.6503, "step": 4292 }, { "epoch": 0.7432961800670923, "grad_norm": 1.2109375, "learning_rate": 1.7215603227946378e-05, "loss": 0.548, "step": 4293 }, { "epoch": 0.743469321502002, "grad_norm": 1.3515625, "learning_rate": 1.7214340358092362e-05, "loss": 0.5933, "step": 4294 }, { "epoch": 0.7436424629369116, "grad_norm": 1.3359375, "learning_rate": 1.721307724825676e-05, "loss": 0.5726, "step": 4295 }, { "epoch": 0.7438156043718213, "grad_norm": 1.40625, "learning_rate": 1.72118138984816e-05, "loss": 0.5973, "step": 4296 }, { "epoch": 0.7439887458067309, "grad_norm": 1.3359375, "learning_rate": 1.7210550308808898e-05, "loss": 0.554, "step": 4297 }, { "epoch": 0.7441618872416406, "grad_norm": 1.21875, "learning_rate": 1.7209286479280688e-05, "loss": 0.6208, "step": 4298 }, { "epoch": 0.7443350286765501, "grad_norm": 1.2421875, "learning_rate": 1.7208022409939012e-05, "loss": 0.597, "step": 4299 }, { "epoch": 0.7445081701114598, "grad_norm": 1.2109375, "learning_rate": 1.7206758100825917e-05, "loss": 0.5862, "step": 4300 }, { "epoch": 0.7446813115463694, "grad_norm": 1.4375, "learning_rate": 1.7205493551983464e-05, "loss": 0.6859, "step": 4301 }, { "epoch": 0.7448544529812791, "grad_norm": 1.359375, "learning_rate": 1.7204228763453713e-05, "loss": 0.5755, "step": 4302 }, { "epoch": 0.7450275944161887, "grad_norm": 1.359375, "learning_rate": 1.7202963735278737e-05, "loss": 0.594, "step": 4303 }, { "epoch": 0.7452007358510984, "grad_norm": 1.4765625, "learning_rate": 1.720169846750062e-05, "loss": 0.6291, "step": 4304 }, { "epoch": 0.745373877286008, "grad_norm": 1.3828125, "learning_rate": 1.7200432960161448e-05, "loss": 0.6238, "step": 4305 }, { "epoch": 0.7455470187209177, "grad_norm": 1.2109375, "learning_rate": 1.7199167213303312e-05, "loss": 0.5254, "step": 4306 }, { "epoch": 0.7457201601558273, "grad_norm": 1.3515625, "learning_rate": 1.719790122696833e-05, "loss": 0.5869, "step": 4307 }, { "epoch": 0.745893301590737, "grad_norm": 1.3046875, "learning_rate": 1.7196635001198602e-05, "loss": 0.6057, "step": 4308 }, { "epoch": 0.7460664430256466, "grad_norm": 1.359375, "learning_rate": 1.7195368536036253e-05, "loss": 0.6237, "step": 4309 }, { "epoch": 0.7462395844605563, "grad_norm": 1.3359375, "learning_rate": 1.7194101831523413e-05, "loss": 0.6154, "step": 4310 }, { "epoch": 0.7464127258954658, "grad_norm": 1.3203125, "learning_rate": 1.7192834887702213e-05, "loss": 0.6058, "step": 4311 }, { "epoch": 0.7465858673303755, "grad_norm": 1.3359375, "learning_rate": 1.7191567704614806e-05, "loss": 0.588, "step": 4312 }, { "epoch": 0.7467590087652851, "grad_norm": 1.3125, "learning_rate": 1.719030028230334e-05, "loss": 0.6289, "step": 4313 }, { "epoch": 0.7469321502001948, "grad_norm": 1.3203125, "learning_rate": 1.7189032620809968e-05, "loss": 0.5564, "step": 4314 }, { "epoch": 0.7471052916351044, "grad_norm": 1.390625, "learning_rate": 1.7187764720176868e-05, "loss": 0.6388, "step": 4315 }, { "epoch": 0.7472784330700141, "grad_norm": 1.25, "learning_rate": 1.718649658044621e-05, "loss": 0.5442, "step": 4316 }, { "epoch": 0.7474515745049237, "grad_norm": 1.3203125, "learning_rate": 1.7185228201660184e-05, "loss": 0.5713, "step": 4317 }, { "epoch": 0.7476247159398334, "grad_norm": 1.359375, "learning_rate": 1.7183959583860977e-05, "loss": 0.6312, "step": 4318 }, { "epoch": 0.747797857374743, "grad_norm": 1.3203125, "learning_rate": 1.7182690727090795e-05, "loss": 0.5793, "step": 4319 }, { "epoch": 0.7479709988096527, "grad_norm": 1.28125, "learning_rate": 1.7181421631391835e-05, "loss": 0.5626, "step": 4320 }, { "epoch": 0.7481441402445623, "grad_norm": 1.3359375, "learning_rate": 1.7180152296806322e-05, "loss": 0.6395, "step": 4321 }, { "epoch": 0.7483172816794719, "grad_norm": 1.2421875, "learning_rate": 1.717888272337648e-05, "loss": 0.539, "step": 4322 }, { "epoch": 0.7484904231143815, "grad_norm": 1.3515625, "learning_rate": 1.7177612911144535e-05, "loss": 0.6054, "step": 4323 }, { "epoch": 0.7486635645492912, "grad_norm": 1.25, "learning_rate": 1.717634286015273e-05, "loss": 0.5594, "step": 4324 }, { "epoch": 0.7488367059842008, "grad_norm": 1.4140625, "learning_rate": 1.717507257044331e-05, "loss": 0.5935, "step": 4325 }, { "epoch": 0.7490098474191105, "grad_norm": 1.328125, "learning_rate": 1.7173802042058537e-05, "loss": 0.6688, "step": 4326 }, { "epoch": 0.7491829888540201, "grad_norm": 1.390625, "learning_rate": 1.7172531275040668e-05, "loss": 0.6312, "step": 4327 }, { "epoch": 0.7493561302889298, "grad_norm": 1.4140625, "learning_rate": 1.717126026943198e-05, "loss": 0.6082, "step": 4328 }, { "epoch": 0.7495292717238394, "grad_norm": 1.4765625, "learning_rate": 1.7169989025274748e-05, "loss": 0.674, "step": 4329 }, { "epoch": 0.7497024131587491, "grad_norm": 1.40625, "learning_rate": 1.7168717542611258e-05, "loss": 0.639, "step": 4330 }, { "epoch": 0.7498755545936587, "grad_norm": 1.3125, "learning_rate": 1.7167445821483812e-05, "loss": 0.5629, "step": 4331 }, { "epoch": 0.7500486960285684, "grad_norm": 1.4375, "learning_rate": 1.7166173861934704e-05, "loss": 0.5801, "step": 4332 }, { "epoch": 0.7502218374634779, "grad_norm": 1.40625, "learning_rate": 1.7164901664006253e-05, "loss": 0.5369, "step": 4333 }, { "epoch": 0.7503949788983876, "grad_norm": 1.2890625, "learning_rate": 1.7163629227740778e-05, "loss": 0.5749, "step": 4334 }, { "epoch": 0.7505681203332972, "grad_norm": 1.359375, "learning_rate": 1.7162356553180596e-05, "loss": 0.5418, "step": 4335 }, { "epoch": 0.7507412617682069, "grad_norm": 1.3125, "learning_rate": 1.7161083640368056e-05, "loss": 0.5675, "step": 4336 }, { "epoch": 0.7509144032031165, "grad_norm": 1.3359375, "learning_rate": 1.715981048934549e-05, "loss": 0.5652, "step": 4337 }, { "epoch": 0.7510875446380262, "grad_norm": 1.3203125, "learning_rate": 1.7158537100155256e-05, "loss": 0.5553, "step": 4338 }, { "epoch": 0.7512606860729358, "grad_norm": 1.3046875, "learning_rate": 1.715726347283971e-05, "loss": 0.6226, "step": 4339 }, { "epoch": 0.7514338275078455, "grad_norm": 1.375, "learning_rate": 1.715598960744121e-05, "loss": 0.619, "step": 4340 }, { "epoch": 0.7516069689427551, "grad_norm": 1.25, "learning_rate": 1.7154715504002145e-05, "loss": 0.6399, "step": 4341 }, { "epoch": 0.7517801103776648, "grad_norm": 1.28125, "learning_rate": 1.7153441162564894e-05, "loss": 0.5961, "step": 4342 }, { "epoch": 0.7519532518125744, "grad_norm": 1.2578125, "learning_rate": 1.715216658317184e-05, "loss": 0.5763, "step": 4343 }, { "epoch": 0.7521263932474841, "grad_norm": 1.265625, "learning_rate": 1.7150891765865382e-05, "loss": 0.5725, "step": 4344 }, { "epoch": 0.7522995346823936, "grad_norm": 1.296875, "learning_rate": 1.7149616710687934e-05, "loss": 0.5965, "step": 4345 }, { "epoch": 0.7524726761173033, "grad_norm": 1.3515625, "learning_rate": 1.714834141768191e-05, "loss": 0.6238, "step": 4346 }, { "epoch": 0.7526458175522129, "grad_norm": 1.3125, "learning_rate": 1.714706588688972e-05, "loss": 0.5633, "step": 4347 }, { "epoch": 0.7528189589871226, "grad_norm": 1.359375, "learning_rate": 1.7145790118353807e-05, "loss": 0.5291, "step": 4348 }, { "epoch": 0.7529921004220322, "grad_norm": 1.2734375, "learning_rate": 1.71445141121166e-05, "loss": 0.5947, "step": 4349 }, { "epoch": 0.7531652418569419, "grad_norm": 1.5546875, "learning_rate": 1.714323786822055e-05, "loss": 0.6461, "step": 4350 }, { "epoch": 0.7533383832918515, "grad_norm": 1.3515625, "learning_rate": 1.714196138670811e-05, "loss": 0.6364, "step": 4351 }, { "epoch": 0.7535115247267612, "grad_norm": 1.375, "learning_rate": 1.714068466762174e-05, "loss": 0.6275, "step": 4352 }, { "epoch": 0.7536846661616708, "grad_norm": 1.359375, "learning_rate": 1.713940771100391e-05, "loss": 0.5482, "step": 4353 }, { "epoch": 0.7538578075965805, "grad_norm": 1.2421875, "learning_rate": 1.71381305168971e-05, "loss": 0.5427, "step": 4354 }, { "epoch": 0.7540309490314901, "grad_norm": 1.3828125, "learning_rate": 1.713685308534379e-05, "loss": 0.555, "step": 4355 }, { "epoch": 0.7542040904663997, "grad_norm": 1.25, "learning_rate": 1.7135575416386476e-05, "loss": 0.5505, "step": 4356 }, { "epoch": 0.7543772319013093, "grad_norm": 1.3515625, "learning_rate": 1.713429751006766e-05, "loss": 0.6176, "step": 4357 }, { "epoch": 0.754550373336219, "grad_norm": 1.4296875, "learning_rate": 1.7133019366429847e-05, "loss": 0.5969, "step": 4358 }, { "epoch": 0.7547235147711286, "grad_norm": 1.3203125, "learning_rate": 1.713174098551556e-05, "loss": 0.5871, "step": 4359 }, { "epoch": 0.7548966562060383, "grad_norm": 1.5, "learning_rate": 1.7130462367367318e-05, "loss": 0.6431, "step": 4360 }, { "epoch": 0.755069797640948, "grad_norm": 1.4453125, "learning_rate": 1.7129183512027655e-05, "loss": 0.6479, "step": 4361 }, { "epoch": 0.7552429390758576, "grad_norm": 1.296875, "learning_rate": 1.7127904419539115e-05, "loss": 0.5866, "step": 4362 }, { "epoch": 0.7554160805107673, "grad_norm": 1.3515625, "learning_rate": 1.7126625089944245e-05, "loss": 0.5846, "step": 4363 }, { "epoch": 0.7555892219456769, "grad_norm": 1.3046875, "learning_rate": 1.7125345523285598e-05, "loss": 0.6052, "step": 4364 }, { "epoch": 0.7557623633805866, "grad_norm": 1.3125, "learning_rate": 1.712406571960574e-05, "loss": 0.6794, "step": 4365 }, { "epoch": 0.7559355048154962, "grad_norm": 1.296875, "learning_rate": 1.7122785678947242e-05, "loss": 0.5758, "step": 4366 }, { "epoch": 0.7561086462504057, "grad_norm": 1.2890625, "learning_rate": 1.712150540135269e-05, "loss": 0.5411, "step": 4367 }, { "epoch": 0.7562817876853154, "grad_norm": 1.46875, "learning_rate": 1.7120224886864663e-05, "loss": 0.676, "step": 4368 }, { "epoch": 0.756454929120225, "grad_norm": 1.4296875, "learning_rate": 1.7118944135525763e-05, "loss": 0.604, "step": 4369 }, { "epoch": 0.7566280705551347, "grad_norm": 1.4140625, "learning_rate": 1.711766314737859e-05, "loss": 0.5499, "step": 4370 }, { "epoch": 0.7568012119900444, "grad_norm": 1.296875, "learning_rate": 1.7116381922465753e-05, "loss": 0.5089, "step": 4371 }, { "epoch": 0.756974353424954, "grad_norm": 1.3671875, "learning_rate": 1.7115100460829882e-05, "loss": 0.5911, "step": 4372 }, { "epoch": 0.7571474948598637, "grad_norm": 1.359375, "learning_rate": 1.7113818762513595e-05, "loss": 0.5308, "step": 4373 }, { "epoch": 0.7573206362947733, "grad_norm": 1.484375, "learning_rate": 1.7112536827559532e-05, "loss": 0.5483, "step": 4374 }, { "epoch": 0.757493777729683, "grad_norm": 1.2578125, "learning_rate": 1.711125465601033e-05, "loss": 0.5895, "step": 4375 }, { "epoch": 0.7576669191645926, "grad_norm": 1.3515625, "learning_rate": 1.7109972247908645e-05, "loss": 0.5758, "step": 4376 }, { "epoch": 0.7578400605995023, "grad_norm": 1.3046875, "learning_rate": 1.7108689603297134e-05, "loss": 0.57, "step": 4377 }, { "epoch": 0.7580132020344119, "grad_norm": 1.3046875, "learning_rate": 1.7107406722218463e-05, "loss": 0.5651, "step": 4378 }, { "epoch": 0.7581863434693215, "grad_norm": 1.375, "learning_rate": 1.710612360471531e-05, "loss": 0.5693, "step": 4379 }, { "epoch": 0.7583594849042311, "grad_norm": 1.359375, "learning_rate": 1.7104840250830347e-05, "loss": 0.5334, "step": 4380 }, { "epoch": 0.7585326263391408, "grad_norm": 1.3828125, "learning_rate": 1.7103556660606274e-05, "loss": 0.5999, "step": 4381 }, { "epoch": 0.7587057677740504, "grad_norm": 1.4453125, "learning_rate": 1.7102272834085792e-05, "loss": 0.6509, "step": 4382 }, { "epoch": 0.7588789092089601, "grad_norm": 1.375, "learning_rate": 1.7100988771311598e-05, "loss": 0.5324, "step": 4383 }, { "epoch": 0.7590520506438697, "grad_norm": 1.3203125, "learning_rate": 1.7099704472326408e-05, "loss": 0.5459, "step": 4384 }, { "epoch": 0.7592251920787794, "grad_norm": 1.28125, "learning_rate": 1.7098419937172944e-05, "loss": 0.5672, "step": 4385 }, { "epoch": 0.759398333513689, "grad_norm": 1.2734375, "learning_rate": 1.7097135165893935e-05, "loss": 0.5576, "step": 4386 }, { "epoch": 0.7595714749485987, "grad_norm": 1.25, "learning_rate": 1.7095850158532123e-05, "loss": 0.5265, "step": 4387 }, { "epoch": 0.7597446163835083, "grad_norm": 1.5078125, "learning_rate": 1.7094564915130248e-05, "loss": 0.5283, "step": 4388 }, { "epoch": 0.759917757818418, "grad_norm": 1.390625, "learning_rate": 1.7093279435731065e-05, "loss": 0.5921, "step": 4389 }, { "epoch": 0.7600908992533275, "grad_norm": 1.3046875, "learning_rate": 1.7091993720377336e-05, "loss": 0.5779, "step": 4390 }, { "epoch": 0.7602640406882372, "grad_norm": 1.3828125, "learning_rate": 1.7090707769111826e-05, "loss": 0.6018, "step": 4391 }, { "epoch": 0.7604371821231468, "grad_norm": 1.328125, "learning_rate": 1.7089421581977314e-05, "loss": 0.5537, "step": 4392 }, { "epoch": 0.7606103235580565, "grad_norm": 1.3359375, "learning_rate": 1.7088135159016584e-05, "loss": 0.5978, "step": 4393 }, { "epoch": 0.7607834649929661, "grad_norm": 1.3359375, "learning_rate": 1.708684850027243e-05, "loss": 0.5202, "step": 4394 }, { "epoch": 0.7609566064278758, "grad_norm": 1.3515625, "learning_rate": 1.7085561605787645e-05, "loss": 0.6027, "step": 4395 }, { "epoch": 0.7611297478627854, "grad_norm": 1.7578125, "learning_rate": 1.7084274475605047e-05, "loss": 0.6309, "step": 4396 }, { "epoch": 0.7613028892976951, "grad_norm": 1.28125, "learning_rate": 1.7082987109767447e-05, "loss": 0.6296, "step": 4397 }, { "epoch": 0.7614760307326047, "grad_norm": 1.3671875, "learning_rate": 1.7081699508317665e-05, "loss": 0.5994, "step": 4398 }, { "epoch": 0.7616491721675144, "grad_norm": 1.3203125, "learning_rate": 1.7080411671298544e-05, "loss": 0.6001, "step": 4399 }, { "epoch": 0.761822313602424, "grad_norm": 1.3671875, "learning_rate": 1.7079123598752906e-05, "loss": 0.5816, "step": 4400 }, { "epoch": 0.7619954550373336, "grad_norm": 1.296875, "learning_rate": 1.7077835290723612e-05, "loss": 0.5695, "step": 4401 }, { "epoch": 0.7621685964722432, "grad_norm": 1.3203125, "learning_rate": 1.7076546747253514e-05, "loss": 0.6024, "step": 4402 }, { "epoch": 0.7623417379071529, "grad_norm": 1.2265625, "learning_rate": 1.7075257968385472e-05, "loss": 0.5417, "step": 4403 }, { "epoch": 0.7625148793420625, "grad_norm": 1.2734375, "learning_rate": 1.7073968954162356e-05, "loss": 0.544, "step": 4404 }, { "epoch": 0.7626880207769722, "grad_norm": 1.2890625, "learning_rate": 1.707267970462705e-05, "loss": 0.5243, "step": 4405 }, { "epoch": 0.7628611622118818, "grad_norm": 1.4140625, "learning_rate": 1.7071390219822434e-05, "loss": 0.619, "step": 4406 }, { "epoch": 0.7630343036467915, "grad_norm": 1.328125, "learning_rate": 1.7070100499791405e-05, "loss": 0.5329, "step": 4407 }, { "epoch": 0.7632074450817011, "grad_norm": 1.3359375, "learning_rate": 1.7068810544576864e-05, "loss": 0.5875, "step": 4408 }, { "epoch": 0.7633805865166108, "grad_norm": 1.328125, "learning_rate": 1.706752035422172e-05, "loss": 0.6111, "step": 4409 }, { "epoch": 0.7635537279515204, "grad_norm": 1.4921875, "learning_rate": 1.7066229928768894e-05, "loss": 0.5819, "step": 4410 }, { "epoch": 0.7637268693864301, "grad_norm": 1.2734375, "learning_rate": 1.706493926826131e-05, "loss": 0.5613, "step": 4411 }, { "epoch": 0.7639000108213397, "grad_norm": 1.453125, "learning_rate": 1.7063648372741902e-05, "loss": 0.5634, "step": 4412 }, { "epoch": 0.7640731522562493, "grad_norm": 1.3203125, "learning_rate": 1.706235724225361e-05, "loss": 0.6165, "step": 4413 }, { "epoch": 0.7642462936911589, "grad_norm": 1.328125, "learning_rate": 1.706106587683938e-05, "loss": 0.5548, "step": 4414 }, { "epoch": 0.7644194351260686, "grad_norm": 1.1484375, "learning_rate": 1.705977427654217e-05, "loss": 0.5938, "step": 4415 }, { "epoch": 0.7645925765609782, "grad_norm": 1.2890625, "learning_rate": 1.7058482441404946e-05, "loss": 0.571, "step": 4416 }, { "epoch": 0.7647657179958879, "grad_norm": 1.3515625, "learning_rate": 1.705719037147068e-05, "loss": 0.6252, "step": 4417 }, { "epoch": 0.7649388594307975, "grad_norm": 1.34375, "learning_rate": 1.7055898066782353e-05, "loss": 0.5987, "step": 4418 }, { "epoch": 0.7651120008657072, "grad_norm": 1.265625, "learning_rate": 1.705460552738295e-05, "loss": 0.6226, "step": 4419 }, { "epoch": 0.7652851423006168, "grad_norm": 1.28125, "learning_rate": 1.7053312753315468e-05, "loss": 0.546, "step": 4420 }, { "epoch": 0.7654582837355265, "grad_norm": 1.2890625, "learning_rate": 1.7052019744622914e-05, "loss": 0.5857, "step": 4421 }, { "epoch": 0.7656314251704361, "grad_norm": 1.3125, "learning_rate": 1.7050726501348295e-05, "loss": 0.5489, "step": 4422 }, { "epoch": 0.7658045666053458, "grad_norm": 1.3046875, "learning_rate": 1.704943302353463e-05, "loss": 0.5586, "step": 4423 }, { "epoch": 0.7659777080402553, "grad_norm": 1.265625, "learning_rate": 1.704813931122495e-05, "loss": 0.6026, "step": 4424 }, { "epoch": 0.766150849475165, "grad_norm": 1.3671875, "learning_rate": 1.7046845364462286e-05, "loss": 0.5653, "step": 4425 }, { "epoch": 0.7663239909100746, "grad_norm": 1.421875, "learning_rate": 1.704555118328968e-05, "loss": 0.6669, "step": 4426 }, { "epoch": 0.7664971323449843, "grad_norm": 1.2734375, "learning_rate": 1.7044256767750184e-05, "loss": 0.4974, "step": 4427 }, { "epoch": 0.7666702737798939, "grad_norm": 1.3671875, "learning_rate": 1.7042962117886856e-05, "loss": 0.5341, "step": 4428 }, { "epoch": 0.7668434152148036, "grad_norm": 1.3671875, "learning_rate": 1.7041667233742763e-05, "loss": 0.6025, "step": 4429 }, { "epoch": 0.7670165566497132, "grad_norm": 1.3046875, "learning_rate": 1.704037211536098e-05, "loss": 0.6762, "step": 4430 }, { "epoch": 0.7671896980846229, "grad_norm": 1.3125, "learning_rate": 1.7039076762784582e-05, "loss": 0.6089, "step": 4431 }, { "epoch": 0.7673628395195325, "grad_norm": 1.4609375, "learning_rate": 1.7037781176056665e-05, "loss": 0.5897, "step": 4432 }, { "epoch": 0.7675359809544422, "grad_norm": 1.34375, "learning_rate": 1.703648535522032e-05, "loss": 0.5519, "step": 4433 }, { "epoch": 0.7677091223893519, "grad_norm": 1.484375, "learning_rate": 1.7035189300318658e-05, "loss": 0.5929, "step": 4434 }, { "epoch": 0.7678822638242614, "grad_norm": 1.34375, "learning_rate": 1.703389301139479e-05, "loss": 0.5894, "step": 4435 }, { "epoch": 0.768055405259171, "grad_norm": 1.421875, "learning_rate": 1.7032596488491835e-05, "loss": 0.5543, "step": 4436 }, { "epoch": 0.7682285466940807, "grad_norm": 1.390625, "learning_rate": 1.703129973165292e-05, "loss": 0.5491, "step": 4437 }, { "epoch": 0.7684016881289903, "grad_norm": 1.2734375, "learning_rate": 1.7030002740921183e-05, "loss": 0.556, "step": 4438 }, { "epoch": 0.7685748295639, "grad_norm": 1.34375, "learning_rate": 1.7028705516339767e-05, "loss": 0.6053, "step": 4439 }, { "epoch": 0.7687479709988096, "grad_norm": 1.25, "learning_rate": 1.7027408057951827e-05, "loss": 0.5751, "step": 4440 }, { "epoch": 0.7689211124337193, "grad_norm": 1.421875, "learning_rate": 1.7026110365800516e-05, "loss": 0.5585, "step": 4441 }, { "epoch": 0.769094253868629, "grad_norm": 1.390625, "learning_rate": 1.7024812439929004e-05, "loss": 0.6844, "step": 4442 }, { "epoch": 0.7692673953035386, "grad_norm": 1.3203125, "learning_rate": 1.7023514280380468e-05, "loss": 0.6133, "step": 4443 }, { "epoch": 0.7694405367384483, "grad_norm": 1.2578125, "learning_rate": 1.702221588719809e-05, "loss": 0.5556, "step": 4444 }, { "epoch": 0.7696136781733579, "grad_norm": 1.4140625, "learning_rate": 1.7020917260425055e-05, "loss": 0.538, "step": 4445 }, { "epoch": 0.7697868196082676, "grad_norm": 1.328125, "learning_rate": 1.7019618400104572e-05, "loss": 0.5951, "step": 4446 }, { "epoch": 0.7699599610431771, "grad_norm": 1.1953125, "learning_rate": 1.7018319306279837e-05, "loss": 0.5183, "step": 4447 }, { "epoch": 0.7701331024780868, "grad_norm": 1.3046875, "learning_rate": 1.7017019978994065e-05, "loss": 0.6044, "step": 4448 }, { "epoch": 0.7703062439129964, "grad_norm": 1.328125, "learning_rate": 1.7015720418290482e-05, "loss": 0.5993, "step": 4449 }, { "epoch": 0.770479385347906, "grad_norm": 1.4375, "learning_rate": 1.7014420624212317e-05, "loss": 0.6141, "step": 4450 }, { "epoch": 0.7706525267828157, "grad_norm": 1.3671875, "learning_rate": 1.7013120596802802e-05, "loss": 0.5918, "step": 4451 }, { "epoch": 0.7708256682177254, "grad_norm": 1.3125, "learning_rate": 1.7011820336105187e-05, "loss": 0.6316, "step": 4452 }, { "epoch": 0.770998809652635, "grad_norm": 1.3203125, "learning_rate": 1.701051984216272e-05, "loss": 0.5214, "step": 4453 }, { "epoch": 0.7711719510875447, "grad_norm": 1.390625, "learning_rate": 1.7009219115018663e-05, "loss": 0.6028, "step": 4454 }, { "epoch": 0.7713450925224543, "grad_norm": 1.46875, "learning_rate": 1.7007918154716286e-05, "loss": 0.6703, "step": 4455 }, { "epoch": 0.771518233957364, "grad_norm": 1.4921875, "learning_rate": 1.700661696129887e-05, "loss": 0.5958, "step": 4456 }, { "epoch": 0.7716913753922736, "grad_norm": 1.25, "learning_rate": 1.7005315534809687e-05, "loss": 0.5534, "step": 4457 }, { "epoch": 0.7718645168271832, "grad_norm": 1.34375, "learning_rate": 1.700401387529203e-05, "loss": 0.6349, "step": 4458 }, { "epoch": 0.7720376582620928, "grad_norm": 1.21875, "learning_rate": 1.7002711982789204e-05, "loss": 0.5538, "step": 4459 }, { "epoch": 0.7722107996970025, "grad_norm": 1.4296875, "learning_rate": 1.7001409857344514e-05, "loss": 0.6547, "step": 4460 }, { "epoch": 0.7723839411319121, "grad_norm": 1.3046875, "learning_rate": 1.7000107499001274e-05, "loss": 0.5259, "step": 4461 }, { "epoch": 0.7725570825668218, "grad_norm": 1.328125, "learning_rate": 1.699880490780281e-05, "loss": 0.595, "step": 4462 }, { "epoch": 0.7727302240017314, "grad_norm": 1.359375, "learning_rate": 1.699750208379244e-05, "loss": 0.6366, "step": 4463 }, { "epoch": 0.7729033654366411, "grad_norm": 1.421875, "learning_rate": 1.6996199027013516e-05, "loss": 0.6471, "step": 4464 }, { "epoch": 0.7730765068715507, "grad_norm": 1.2734375, "learning_rate": 1.6994895737509377e-05, "loss": 0.5774, "step": 4465 }, { "epoch": 0.7732496483064604, "grad_norm": 1.3125, "learning_rate": 1.699359221532338e-05, "loss": 0.4967, "step": 4466 }, { "epoch": 0.77342278974137, "grad_norm": 1.3515625, "learning_rate": 1.6992288460498878e-05, "loss": 0.6273, "step": 4467 }, { "epoch": 0.7735959311762797, "grad_norm": 1.265625, "learning_rate": 1.6990984473079245e-05, "loss": 0.6026, "step": 4468 }, { "epoch": 0.7737690726111892, "grad_norm": 1.2421875, "learning_rate": 1.6989680253107857e-05, "loss": 0.567, "step": 4469 }, { "epoch": 0.7739422140460989, "grad_norm": 1.421875, "learning_rate": 1.69883758006281e-05, "loss": 0.6599, "step": 4470 }, { "epoch": 0.7741153554810085, "grad_norm": 1.3203125, "learning_rate": 1.6987071115683368e-05, "loss": 0.6339, "step": 4471 }, { "epoch": 0.7742884969159182, "grad_norm": 1.328125, "learning_rate": 1.6985766198317057e-05, "loss": 0.55, "step": 4472 }, { "epoch": 0.7744616383508278, "grad_norm": 1.4296875, "learning_rate": 1.6984461048572572e-05, "loss": 0.5381, "step": 4473 }, { "epoch": 0.7746347797857375, "grad_norm": 1.3671875, "learning_rate": 1.698315566649333e-05, "loss": 0.5744, "step": 4474 }, { "epoch": 0.7748079212206471, "grad_norm": 1.21875, "learning_rate": 1.6981850052122756e-05, "loss": 0.5925, "step": 4475 }, { "epoch": 0.7749810626555568, "grad_norm": 1.3515625, "learning_rate": 1.6980544205504282e-05, "loss": 0.6244, "step": 4476 }, { "epoch": 0.7751542040904664, "grad_norm": 1.2578125, "learning_rate": 1.6979238126681342e-05, "loss": 0.5543, "step": 4477 }, { "epoch": 0.7753273455253761, "grad_norm": 1.3359375, "learning_rate": 1.6977931815697384e-05, "loss": 0.6206, "step": 4478 }, { "epoch": 0.7755004869602857, "grad_norm": 1.40625, "learning_rate": 1.697662527259586e-05, "loss": 0.5588, "step": 4479 }, { "epoch": 0.7756736283951954, "grad_norm": 1.421875, "learning_rate": 1.6975318497420236e-05, "loss": 0.5528, "step": 4480 }, { "epoch": 0.7758467698301049, "grad_norm": 1.3828125, "learning_rate": 1.6974011490213976e-05, "loss": 0.5996, "step": 4481 }, { "epoch": 0.7760199112650146, "grad_norm": 1.28125, "learning_rate": 1.6972704251020564e-05, "loss": 0.5279, "step": 4482 }, { "epoch": 0.7761930526999242, "grad_norm": 1.390625, "learning_rate": 1.697139677988348e-05, "loss": 0.6259, "step": 4483 }, { "epoch": 0.7763661941348339, "grad_norm": 1.4375, "learning_rate": 1.6970089076846212e-05, "loss": 0.6438, "step": 4484 }, { "epoch": 0.7765393355697435, "grad_norm": 1.296875, "learning_rate": 1.6968781141952267e-05, "loss": 0.562, "step": 4485 }, { "epoch": 0.7767124770046532, "grad_norm": 1.453125, "learning_rate": 1.696747297524515e-05, "loss": 0.6793, "step": 4486 }, { "epoch": 0.7768856184395628, "grad_norm": 1.21875, "learning_rate": 1.6966164576768378e-05, "loss": 0.5183, "step": 4487 }, { "epoch": 0.7770587598744725, "grad_norm": 1.328125, "learning_rate": 1.6964855946565472e-05, "loss": 0.5829, "step": 4488 }, { "epoch": 0.7772319013093821, "grad_norm": 1.3359375, "learning_rate": 1.6963547084679967e-05, "loss": 0.6033, "step": 4489 }, { "epoch": 0.7774050427442918, "grad_norm": 1.3828125, "learning_rate": 1.69622379911554e-05, "loss": 0.5844, "step": 4490 }, { "epoch": 0.7775781841792014, "grad_norm": 1.4609375, "learning_rate": 1.696092866603531e-05, "loss": 0.5847, "step": 4491 }, { "epoch": 0.777751325614111, "grad_norm": 1.3671875, "learning_rate": 1.695961910936326e-05, "loss": 0.5909, "step": 4492 }, { "epoch": 0.7779244670490206, "grad_norm": 1.3828125, "learning_rate": 1.6958309321182813e-05, "loss": 0.6656, "step": 4493 }, { "epoch": 0.7780976084839303, "grad_norm": 1.3125, "learning_rate": 1.6956999301537533e-05, "loss": 0.6259, "step": 4494 }, { "epoch": 0.7782707499188399, "grad_norm": 1.25, "learning_rate": 1.6955689050470998e-05, "loss": 0.5237, "step": 4495 }, { "epoch": 0.7784438913537496, "grad_norm": 1.3359375, "learning_rate": 1.695437856802679e-05, "loss": 0.5386, "step": 4496 }, { "epoch": 0.7786170327886592, "grad_norm": 1.34375, "learning_rate": 1.6953067854248514e-05, "loss": 0.5338, "step": 4497 }, { "epoch": 0.7787901742235689, "grad_norm": 1.2578125, "learning_rate": 1.6951756909179757e-05, "loss": 0.5069, "step": 4498 }, { "epoch": 0.7789633156584785, "grad_norm": 1.3125, "learning_rate": 1.695044573286413e-05, "loss": 0.6992, "step": 4499 }, { "epoch": 0.7791364570933882, "grad_norm": 1.3203125, "learning_rate": 1.6949134325345253e-05, "loss": 0.7223, "step": 4500 }, { "epoch": 0.7793095985282978, "grad_norm": 1.328125, "learning_rate": 1.6947822686666745e-05, "loss": 0.5515, "step": 4501 }, { "epoch": 0.7794827399632075, "grad_norm": 1.3984375, "learning_rate": 1.6946510816872237e-05, "loss": 0.6761, "step": 4502 }, { "epoch": 0.779655881398117, "grad_norm": 1.28125, "learning_rate": 1.6945198716005373e-05, "loss": 0.5472, "step": 4503 }, { "epoch": 0.7798290228330267, "grad_norm": 1.2734375, "learning_rate": 1.6943886384109794e-05, "loss": 0.6099, "step": 4504 }, { "epoch": 0.7800021642679363, "grad_norm": 1.3046875, "learning_rate": 1.6942573821229155e-05, "loss": 0.5562, "step": 4505 }, { "epoch": 0.780175305702846, "grad_norm": 1.34375, "learning_rate": 1.694126102740712e-05, "loss": 0.6049, "step": 4506 }, { "epoch": 0.7803484471377556, "grad_norm": 1.375, "learning_rate": 1.6939948002687352e-05, "loss": 0.6983, "step": 4507 }, { "epoch": 0.7805215885726653, "grad_norm": 1.3203125, "learning_rate": 1.693863474711354e-05, "loss": 0.6289, "step": 4508 }, { "epoch": 0.7806947300075749, "grad_norm": 1.453125, "learning_rate": 1.693732126072936e-05, "loss": 0.5989, "step": 4509 }, { "epoch": 0.7808678714424846, "grad_norm": 1.2734375, "learning_rate": 1.6936007543578506e-05, "loss": 0.581, "step": 4510 }, { "epoch": 0.7810410128773942, "grad_norm": 1.3125, "learning_rate": 1.693469359570468e-05, "loss": 0.5874, "step": 4511 }, { "epoch": 0.7812141543123039, "grad_norm": 1.375, "learning_rate": 1.6933379417151586e-05, "loss": 0.5754, "step": 4512 }, { "epoch": 0.7813872957472136, "grad_norm": 1.2734375, "learning_rate": 1.6932065007962944e-05, "loss": 0.5399, "step": 4513 }, { "epoch": 0.7815604371821232, "grad_norm": 1.25, "learning_rate": 1.693075036818247e-05, "loss": 0.5689, "step": 4514 }, { "epoch": 0.7817335786170327, "grad_norm": 1.359375, "learning_rate": 1.6929435497853908e-05, "loss": 0.6129, "step": 4515 }, { "epoch": 0.7819067200519424, "grad_norm": 1.34375, "learning_rate": 1.6928120397020984e-05, "loss": 0.6447, "step": 4516 }, { "epoch": 0.782079861486852, "grad_norm": 1.3671875, "learning_rate": 1.692680506572745e-05, "loss": 0.5926, "step": 4517 }, { "epoch": 0.7822530029217617, "grad_norm": 1.3125, "learning_rate": 1.6925489504017058e-05, "loss": 0.4935, "step": 4518 }, { "epoch": 0.7824261443566713, "grad_norm": 1.3125, "learning_rate": 1.692417371193357e-05, "loss": 0.6005, "step": 4519 }, { "epoch": 0.782599285791581, "grad_norm": 1.2421875, "learning_rate": 1.692285768952076e-05, "loss": 0.5411, "step": 4520 }, { "epoch": 0.7827724272264907, "grad_norm": 1.2734375, "learning_rate": 1.6921541436822395e-05, "loss": 0.5688, "step": 4521 }, { "epoch": 0.7829455686614003, "grad_norm": 1.359375, "learning_rate": 1.6920224953882267e-05, "loss": 0.6025, "step": 4522 }, { "epoch": 0.78311871009631, "grad_norm": 1.3359375, "learning_rate": 1.6918908240744162e-05, "loss": 0.5433, "step": 4523 }, { "epoch": 0.7832918515312196, "grad_norm": 1.46875, "learning_rate": 1.6917591297451887e-05, "loss": 0.5836, "step": 4524 }, { "epoch": 0.7834649929661293, "grad_norm": 1.453125, "learning_rate": 1.691627412404925e-05, "loss": 0.6034, "step": 4525 }, { "epoch": 0.7836381344010388, "grad_norm": 1.3515625, "learning_rate": 1.691495672058006e-05, "loss": 0.5959, "step": 4526 }, { "epoch": 0.7838112758359485, "grad_norm": 1.359375, "learning_rate": 1.691363908708814e-05, "loss": 0.5458, "step": 4527 }, { "epoch": 0.7839844172708581, "grad_norm": 1.3828125, "learning_rate": 1.6912321223617323e-05, "loss": 0.5517, "step": 4528 }, { "epoch": 0.7841575587057678, "grad_norm": 1.390625, "learning_rate": 1.6911003130211447e-05, "loss": 0.619, "step": 4529 }, { "epoch": 0.7843307001406774, "grad_norm": 1.25, "learning_rate": 1.6909684806914358e-05, "loss": 0.5807, "step": 4530 }, { "epoch": 0.7845038415755871, "grad_norm": 1.2890625, "learning_rate": 1.6908366253769908e-05, "loss": 0.6328, "step": 4531 }, { "epoch": 0.7846769830104967, "grad_norm": 1.3984375, "learning_rate": 1.690704747082196e-05, "loss": 0.5094, "step": 4532 }, { "epoch": 0.7848501244454064, "grad_norm": 1.3046875, "learning_rate": 1.6905728458114384e-05, "loss": 0.5501, "step": 4533 }, { "epoch": 0.785023265880316, "grad_norm": 1.328125, "learning_rate": 1.690440921569105e-05, "loss": 0.594, "step": 4534 }, { "epoch": 0.7851964073152257, "grad_norm": 1.2734375, "learning_rate": 1.6903089743595846e-05, "loss": 0.5328, "step": 4535 }, { "epoch": 0.7853695487501353, "grad_norm": 1.2265625, "learning_rate": 1.6901770041872666e-05, "loss": 0.5693, "step": 4536 }, { "epoch": 0.7855426901850449, "grad_norm": 1.3671875, "learning_rate": 1.6900450110565408e-05, "loss": 0.5581, "step": 4537 }, { "epoch": 0.7857158316199545, "grad_norm": 1.2890625, "learning_rate": 1.6899129949717977e-05, "loss": 0.5684, "step": 4538 }, { "epoch": 0.7858889730548642, "grad_norm": 1.3046875, "learning_rate": 1.6897809559374284e-05, "loss": 0.5563, "step": 4539 }, { "epoch": 0.7860621144897738, "grad_norm": 1.25, "learning_rate": 1.6896488939578256e-05, "loss": 0.5393, "step": 4540 }, { "epoch": 0.7862352559246835, "grad_norm": 1.3203125, "learning_rate": 1.6895168090373824e-05, "loss": 0.5993, "step": 4541 }, { "epoch": 0.7864083973595931, "grad_norm": 1.34375, "learning_rate": 1.6893847011804925e-05, "loss": 0.58, "step": 4542 }, { "epoch": 0.7865815387945028, "grad_norm": 1.34375, "learning_rate": 1.6892525703915502e-05, "loss": 0.5406, "step": 4543 }, { "epoch": 0.7867546802294124, "grad_norm": 1.3359375, "learning_rate": 1.6891204166749508e-05, "loss": 0.6109, "step": 4544 }, { "epoch": 0.7869278216643221, "grad_norm": 1.4375, "learning_rate": 1.68898824003509e-05, "loss": 0.6045, "step": 4545 }, { "epoch": 0.7871009630992317, "grad_norm": 1.453125, "learning_rate": 1.6888560404763656e-05, "loss": 0.6431, "step": 4546 }, { "epoch": 0.7872741045341414, "grad_norm": 1.234375, "learning_rate": 1.688723818003174e-05, "loss": 0.5402, "step": 4547 }, { "epoch": 0.787447245969051, "grad_norm": 1.1640625, "learning_rate": 1.6885915726199142e-05, "loss": 0.5247, "step": 4548 }, { "epoch": 0.7876203874039606, "grad_norm": 1.28125, "learning_rate": 1.6884593043309853e-05, "loss": 0.5507, "step": 4549 }, { "epoch": 0.7877935288388702, "grad_norm": 1.3359375, "learning_rate": 1.6883270131407864e-05, "loss": 0.6055, "step": 4550 }, { "epoch": 0.7879666702737799, "grad_norm": 1.46875, "learning_rate": 1.6881946990537192e-05, "loss": 0.5745, "step": 4551 }, { "epoch": 0.7881398117086895, "grad_norm": 1.328125, "learning_rate": 1.6880623620741843e-05, "loss": 0.6171, "step": 4552 }, { "epoch": 0.7883129531435992, "grad_norm": 1.234375, "learning_rate": 1.687930002206584e-05, "loss": 0.5607, "step": 4553 }, { "epoch": 0.7884860945785088, "grad_norm": 1.3203125, "learning_rate": 1.6877976194553213e-05, "loss": 0.5489, "step": 4554 }, { "epoch": 0.7886592360134185, "grad_norm": 1.3203125, "learning_rate": 1.6876652138247997e-05, "loss": 0.5281, "step": 4555 }, { "epoch": 0.7888323774483281, "grad_norm": 1.3828125, "learning_rate": 1.6875327853194236e-05, "loss": 0.5375, "step": 4556 }, { "epoch": 0.7890055188832378, "grad_norm": 1.2890625, "learning_rate": 1.6874003339435985e-05, "loss": 0.5674, "step": 4557 }, { "epoch": 0.7891786603181474, "grad_norm": 1.34375, "learning_rate": 1.68726785970173e-05, "loss": 0.6255, "step": 4558 }, { "epoch": 0.7893518017530571, "grad_norm": 1.171875, "learning_rate": 1.687135362598225e-05, "loss": 0.5534, "step": 4559 }, { "epoch": 0.7895249431879666, "grad_norm": 1.3984375, "learning_rate": 1.6870028426374904e-05, "loss": 0.6812, "step": 4560 }, { "epoch": 0.7896980846228763, "grad_norm": 1.265625, "learning_rate": 1.686870299823935e-05, "loss": 0.5857, "step": 4561 }, { "epoch": 0.7898712260577859, "grad_norm": 1.4140625, "learning_rate": 1.6867377341619678e-05, "loss": 0.6476, "step": 4562 }, { "epoch": 0.7900443674926956, "grad_norm": 1.3671875, "learning_rate": 1.686605145655998e-05, "loss": 0.6026, "step": 4563 }, { "epoch": 0.7902175089276052, "grad_norm": 1.34375, "learning_rate": 1.686472534310437e-05, "loss": 0.5536, "step": 4564 }, { "epoch": 0.7903906503625149, "grad_norm": 1.609375, "learning_rate": 1.6863399001296956e-05, "loss": 0.6552, "step": 4565 }, { "epoch": 0.7905637917974245, "grad_norm": 1.25, "learning_rate": 1.686207243118185e-05, "loss": 0.6113, "step": 4566 }, { "epoch": 0.7907369332323342, "grad_norm": 1.3125, "learning_rate": 1.686074563280319e-05, "loss": 0.6382, "step": 4567 }, { "epoch": 0.7909100746672438, "grad_norm": 1.4140625, "learning_rate": 1.6859418606205108e-05, "loss": 0.5838, "step": 4568 }, { "epoch": 0.7910832161021535, "grad_norm": 1.3046875, "learning_rate": 1.685809135143175e-05, "loss": 0.5489, "step": 4569 }, { "epoch": 0.7912563575370631, "grad_norm": 1.3515625, "learning_rate": 1.685676386852726e-05, "loss": 0.6074, "step": 4570 }, { "epoch": 0.7914294989719727, "grad_norm": 1.1640625, "learning_rate": 1.6855436157535802e-05, "loss": 0.5105, "step": 4571 }, { "epoch": 0.7916026404068823, "grad_norm": 1.28125, "learning_rate": 1.6854108218501534e-05, "loss": 0.5613, "step": 4572 }, { "epoch": 0.791775781841792, "grad_norm": 1.3671875, "learning_rate": 1.6852780051468637e-05, "loss": 0.6213, "step": 4573 }, { "epoch": 0.7919489232767016, "grad_norm": 1.203125, "learning_rate": 1.6851451656481294e-05, "loss": 0.5316, "step": 4574 }, { "epoch": 0.7921220647116113, "grad_norm": 1.34375, "learning_rate": 1.6850123033583687e-05, "loss": 0.5943, "step": 4575 }, { "epoch": 0.7922952061465209, "grad_norm": 1.5390625, "learning_rate": 1.684879418282001e-05, "loss": 0.5981, "step": 4576 }, { "epoch": 0.7924683475814306, "grad_norm": 1.40625, "learning_rate": 1.6847465104234474e-05, "loss": 0.6083, "step": 4577 }, { "epoch": 0.7926414890163402, "grad_norm": 1.265625, "learning_rate": 1.6846135797871284e-05, "loss": 0.5468, "step": 4578 }, { "epoch": 0.7928146304512499, "grad_norm": 1.3125, "learning_rate": 1.6844806263774664e-05, "loss": 0.609, "step": 4579 }, { "epoch": 0.7929877718861595, "grad_norm": 1.2421875, "learning_rate": 1.6843476501988835e-05, "loss": 0.6457, "step": 4580 }, { "epoch": 0.7931609133210692, "grad_norm": 1.3125, "learning_rate": 1.6842146512558036e-05, "loss": 0.5819, "step": 4581 }, { "epoch": 0.7933340547559787, "grad_norm": 1.3125, "learning_rate": 1.6840816295526506e-05, "loss": 0.5989, "step": 4582 }, { "epoch": 0.7935071961908884, "grad_norm": 1.3359375, "learning_rate": 1.683948585093849e-05, "loss": 0.5797, "step": 4583 }, { "epoch": 0.793680337625798, "grad_norm": 1.328125, "learning_rate": 1.683815517883825e-05, "loss": 0.6106, "step": 4584 }, { "epoch": 0.7938534790607077, "grad_norm": 1.3828125, "learning_rate": 1.6836824279270053e-05, "loss": 0.5963, "step": 4585 }, { "epoch": 0.7940266204956173, "grad_norm": 1.3203125, "learning_rate": 1.6835493152278163e-05, "loss": 0.6009, "step": 4586 }, { "epoch": 0.794199761930527, "grad_norm": 1.3046875, "learning_rate": 1.683416179790686e-05, "loss": 0.6257, "step": 4587 }, { "epoch": 0.7943729033654366, "grad_norm": 1.265625, "learning_rate": 1.683283021620044e-05, "loss": 0.5324, "step": 4588 }, { "epoch": 0.7945460448003463, "grad_norm": 1.328125, "learning_rate": 1.6831498407203186e-05, "loss": 0.5689, "step": 4589 }, { "epoch": 0.794719186235256, "grad_norm": 1.2734375, "learning_rate": 1.6830166370959408e-05, "loss": 0.572, "step": 4590 }, { "epoch": 0.7948923276701656, "grad_norm": 1.4296875, "learning_rate": 1.6828834107513414e-05, "loss": 0.61, "step": 4591 }, { "epoch": 0.7950654691050753, "grad_norm": 1.4375, "learning_rate": 1.682750161690952e-05, "loss": 0.568, "step": 4592 }, { "epoch": 0.7952386105399849, "grad_norm": 1.3515625, "learning_rate": 1.6826168899192044e-05, "loss": 0.61, "step": 4593 }, { "epoch": 0.7954117519748944, "grad_norm": 1.3359375, "learning_rate": 1.6824835954405328e-05, "loss": 0.6051, "step": 4594 }, { "epoch": 0.7955848934098041, "grad_norm": 1.4296875, "learning_rate": 1.682350278259371e-05, "loss": 0.6027, "step": 4595 }, { "epoch": 0.7957580348447137, "grad_norm": 1.40625, "learning_rate": 1.6822169383801536e-05, "loss": 0.6582, "step": 4596 }, { "epoch": 0.7959311762796234, "grad_norm": 1.3828125, "learning_rate": 1.682083575807316e-05, "loss": 0.6167, "step": 4597 }, { "epoch": 0.796104317714533, "grad_norm": 1.4296875, "learning_rate": 1.6819501905452945e-05, "loss": 0.5718, "step": 4598 }, { "epoch": 0.7962774591494427, "grad_norm": 1.4453125, "learning_rate": 1.681816782598526e-05, "loss": 0.5776, "step": 4599 }, { "epoch": 0.7964506005843524, "grad_norm": 1.4453125, "learning_rate": 1.681683351971448e-05, "loss": 0.6494, "step": 4600 }, { "epoch": 0.796623742019262, "grad_norm": 1.375, "learning_rate": 1.6815498986685e-05, "loss": 0.6576, "step": 4601 }, { "epoch": 0.7967968834541717, "grad_norm": 1.234375, "learning_rate": 1.6814164226941205e-05, "loss": 0.5879, "step": 4602 }, { "epoch": 0.7969700248890813, "grad_norm": 1.2890625, "learning_rate": 1.6812829240527496e-05, "loss": 0.5354, "step": 4603 }, { "epoch": 0.797143166323991, "grad_norm": 1.3671875, "learning_rate": 1.681149402748828e-05, "loss": 0.5825, "step": 4604 }, { "epoch": 0.7973163077589005, "grad_norm": 1.328125, "learning_rate": 1.6810158587867973e-05, "loss": 0.6965, "step": 4605 }, { "epoch": 0.7974894491938102, "grad_norm": 1.4609375, "learning_rate": 1.6808822921710998e-05, "loss": 0.5481, "step": 4606 }, { "epoch": 0.7976625906287198, "grad_norm": 1.3984375, "learning_rate": 1.6807487029061788e-05, "loss": 0.5589, "step": 4607 }, { "epoch": 0.7978357320636295, "grad_norm": 1.4453125, "learning_rate": 1.6806150909964776e-05, "loss": 0.5681, "step": 4608 }, { "epoch": 0.7980088734985391, "grad_norm": 1.40625, "learning_rate": 1.6804814564464406e-05, "loss": 0.5844, "step": 4609 }, { "epoch": 0.7981820149334488, "grad_norm": 1.328125, "learning_rate": 1.6803477992605137e-05, "loss": 0.5267, "step": 4610 }, { "epoch": 0.7983551563683584, "grad_norm": 1.34375, "learning_rate": 1.680214119443143e-05, "loss": 0.5736, "step": 4611 }, { "epoch": 0.7985282978032681, "grad_norm": 1.375, "learning_rate": 1.6800804169987747e-05, "loss": 0.6692, "step": 4612 }, { "epoch": 0.7987014392381777, "grad_norm": 1.2890625, "learning_rate": 1.6799466919318567e-05, "loss": 0.5481, "step": 4613 }, { "epoch": 0.7988745806730874, "grad_norm": 1.3671875, "learning_rate": 1.679812944246837e-05, "loss": 0.6928, "step": 4614 }, { "epoch": 0.799047722107997, "grad_norm": 1.265625, "learning_rate": 1.679679173948165e-05, "loss": 0.5348, "step": 4615 }, { "epoch": 0.7992208635429066, "grad_norm": 1.2890625, "learning_rate": 1.6795453810402906e-05, "loss": 0.5846, "step": 4616 }, { "epoch": 0.7993940049778162, "grad_norm": 1.3671875, "learning_rate": 1.6794115655276638e-05, "loss": 0.6054, "step": 4617 }, { "epoch": 0.7995671464127259, "grad_norm": 1.3046875, "learning_rate": 1.6792777274147363e-05, "loss": 0.6744, "step": 4618 }, { "epoch": 0.7997402878476355, "grad_norm": 1.390625, "learning_rate": 1.6791438667059602e-05, "loss": 0.6212, "step": 4619 }, { "epoch": 0.7999134292825452, "grad_norm": 1.34375, "learning_rate": 1.6790099834057884e-05, "loss": 0.5859, "step": 4620 }, { "epoch": 0.8000865707174548, "grad_norm": 1.328125, "learning_rate": 1.678876077518674e-05, "loss": 0.6634, "step": 4621 }, { "epoch": 0.8002597121523645, "grad_norm": 1.3203125, "learning_rate": 1.6787421490490717e-05, "loss": 0.5479, "step": 4622 }, { "epoch": 0.8004328535872741, "grad_norm": 1.3046875, "learning_rate": 1.6786081980014365e-05, "loss": 0.6137, "step": 4623 }, { "epoch": 0.8006059950221838, "grad_norm": 1.421875, "learning_rate": 1.6784742243802242e-05, "loss": 0.6609, "step": 4624 }, { "epoch": 0.8007791364570934, "grad_norm": 1.3359375, "learning_rate": 1.6783402281898916e-05, "loss": 0.5641, "step": 4625 }, { "epoch": 0.8009522778920031, "grad_norm": 1.2890625, "learning_rate": 1.6782062094348953e-05, "loss": 0.5597, "step": 4626 }, { "epoch": 0.8011254193269127, "grad_norm": 1.1953125, "learning_rate": 1.678072168119694e-05, "loss": 0.5556, "step": 4627 }, { "epoch": 0.8012985607618223, "grad_norm": 1.3046875, "learning_rate": 1.6779381042487463e-05, "loss": 0.6035, "step": 4628 }, { "epoch": 0.8014717021967319, "grad_norm": 1.3125, "learning_rate": 1.677804017826512e-05, "loss": 0.5992, "step": 4629 }, { "epoch": 0.8016448436316416, "grad_norm": 1.34375, "learning_rate": 1.6776699088574512e-05, "loss": 0.5765, "step": 4630 }, { "epoch": 0.8018179850665512, "grad_norm": 1.3359375, "learning_rate": 1.677535777346025e-05, "loss": 0.5657, "step": 4631 }, { "epoch": 0.8019911265014609, "grad_norm": 1.28125, "learning_rate": 1.677401623296695e-05, "loss": 0.6843, "step": 4632 }, { "epoch": 0.8021642679363705, "grad_norm": 1.3984375, "learning_rate": 1.677267446713924e-05, "loss": 0.627, "step": 4633 }, { "epoch": 0.8023374093712802, "grad_norm": 1.375, "learning_rate": 1.677133247602176e-05, "loss": 0.6231, "step": 4634 }, { "epoch": 0.8025105508061898, "grad_norm": 1.28125, "learning_rate": 1.6769990259659142e-05, "loss": 0.5673, "step": 4635 }, { "epoch": 0.8026836922410995, "grad_norm": 1.3046875, "learning_rate": 1.676864781809603e-05, "loss": 0.4816, "step": 4636 }, { "epoch": 0.8028568336760091, "grad_norm": 1.25, "learning_rate": 1.676730515137709e-05, "loss": 0.5403, "step": 4637 }, { "epoch": 0.8030299751109188, "grad_norm": 1.3203125, "learning_rate": 1.6765962259546983e-05, "loss": 0.569, "step": 4638 }, { "epoch": 0.8032031165458283, "grad_norm": 1.2421875, "learning_rate": 1.6764619142650375e-05, "loss": 0.5496, "step": 4639 }, { "epoch": 0.803376257980738, "grad_norm": 1.359375, "learning_rate": 1.676327580073195e-05, "loss": 0.6316, "step": 4640 }, { "epoch": 0.8035493994156476, "grad_norm": 1.40625, "learning_rate": 1.6761932233836388e-05, "loss": 0.6454, "step": 4641 }, { "epoch": 0.8037225408505573, "grad_norm": 1.3203125, "learning_rate": 1.6760588442008383e-05, "loss": 0.5731, "step": 4642 }, { "epoch": 0.8038956822854669, "grad_norm": 1.3359375, "learning_rate": 1.6759244425292638e-05, "loss": 0.5846, "step": 4643 }, { "epoch": 0.8040688237203766, "grad_norm": 1.3359375, "learning_rate": 1.675790018373386e-05, "loss": 0.6159, "step": 4644 }, { "epoch": 0.8042419651552862, "grad_norm": 1.2265625, "learning_rate": 1.675655571737677e-05, "loss": 0.5585, "step": 4645 }, { "epoch": 0.8044151065901959, "grad_norm": 1.328125, "learning_rate": 1.675521102626608e-05, "loss": 0.5475, "step": 4646 }, { "epoch": 0.8045882480251055, "grad_norm": 1.4609375, "learning_rate": 1.675386611044653e-05, "loss": 0.5768, "step": 4647 }, { "epoch": 0.8047613894600152, "grad_norm": 1.3203125, "learning_rate": 1.675252096996285e-05, "loss": 0.5727, "step": 4648 }, { "epoch": 0.8049345308949248, "grad_norm": 1.2578125, "learning_rate": 1.675117560485979e-05, "loss": 0.5466, "step": 4649 }, { "epoch": 0.8051076723298344, "grad_norm": 1.4375, "learning_rate": 1.6749830015182106e-05, "loss": 0.7163, "step": 4650 }, { "epoch": 0.805280813764744, "grad_norm": 1.40625, "learning_rate": 1.6748484200974556e-05, "loss": 0.596, "step": 4651 }, { "epoch": 0.8054539551996537, "grad_norm": 1.2890625, "learning_rate": 1.6747138162281906e-05, "loss": 0.5886, "step": 4652 }, { "epoch": 0.8056270966345633, "grad_norm": 1.234375, "learning_rate": 1.674579189914893e-05, "loss": 0.5968, "step": 4653 }, { "epoch": 0.805800238069473, "grad_norm": 1.296875, "learning_rate": 1.6744445411620412e-05, "loss": 0.5583, "step": 4654 }, { "epoch": 0.8059733795043826, "grad_norm": 1.3046875, "learning_rate": 1.6743098699741148e-05, "loss": 0.5424, "step": 4655 }, { "epoch": 0.8061465209392923, "grad_norm": 1.296875, "learning_rate": 1.6741751763555928e-05, "loss": 0.602, "step": 4656 }, { "epoch": 0.8063196623742019, "grad_norm": 1.3203125, "learning_rate": 1.674040460310956e-05, "loss": 0.5363, "step": 4657 }, { "epoch": 0.8064928038091116, "grad_norm": 1.2734375, "learning_rate": 1.673905721844686e-05, "loss": 0.5429, "step": 4658 }, { "epoch": 0.8066659452440212, "grad_norm": 1.546875, "learning_rate": 1.6737709609612643e-05, "loss": 0.6705, "step": 4659 }, { "epoch": 0.8068390866789309, "grad_norm": 1.171875, "learning_rate": 1.6736361776651737e-05, "loss": 0.5159, "step": 4660 }, { "epoch": 0.8070122281138405, "grad_norm": 1.40625, "learning_rate": 1.673501371960898e-05, "loss": 0.6201, "step": 4661 }, { "epoch": 0.8071853695487501, "grad_norm": 1.28125, "learning_rate": 1.6733665438529214e-05, "loss": 0.6344, "step": 4662 }, { "epoch": 0.8073585109836597, "grad_norm": 1.3984375, "learning_rate": 1.673231693345729e-05, "loss": 0.5881, "step": 4663 }, { "epoch": 0.8075316524185694, "grad_norm": 1.2890625, "learning_rate": 1.6730968204438055e-05, "loss": 0.6153, "step": 4664 }, { "epoch": 0.807704793853479, "grad_norm": 1.3203125, "learning_rate": 1.672961925151639e-05, "loss": 0.5799, "step": 4665 }, { "epoch": 0.8078779352883887, "grad_norm": 1.234375, "learning_rate": 1.6728270074737158e-05, "loss": 0.532, "step": 4666 }, { "epoch": 0.8080510767232983, "grad_norm": 1.3203125, "learning_rate": 1.672692067414524e-05, "loss": 0.5981, "step": 4667 }, { "epoch": 0.808224218158208, "grad_norm": 1.5078125, "learning_rate": 1.6725571049785526e-05, "loss": 0.6529, "step": 4668 }, { "epoch": 0.8083973595931176, "grad_norm": 1.28125, "learning_rate": 1.67242212017029e-05, "loss": 0.5582, "step": 4669 }, { "epoch": 0.8085705010280273, "grad_norm": 1.234375, "learning_rate": 1.6722871129942277e-05, "loss": 0.5819, "step": 4670 }, { "epoch": 0.808743642462937, "grad_norm": 1.3046875, "learning_rate": 1.6721520834548563e-05, "loss": 0.587, "step": 4671 }, { "epoch": 0.8089167838978466, "grad_norm": 1.3203125, "learning_rate": 1.6720170315566672e-05, "loss": 0.5974, "step": 4672 }, { "epoch": 0.8090899253327561, "grad_norm": 1.40625, "learning_rate": 1.671881957304153e-05, "loss": 0.5595, "step": 4673 }, { "epoch": 0.8092630667676658, "grad_norm": 1.453125, "learning_rate": 1.671746860701807e-05, "loss": 0.7157, "step": 4674 }, { "epoch": 0.8094362082025754, "grad_norm": 1.34375, "learning_rate": 1.6716117417541226e-05, "loss": 0.5723, "step": 4675 }, { "epoch": 0.8096093496374851, "grad_norm": 1.3515625, "learning_rate": 1.6714766004655952e-05, "loss": 0.5299, "step": 4676 }, { "epoch": 0.8097824910723948, "grad_norm": 1.3359375, "learning_rate": 1.6713414368407195e-05, "loss": 0.5874, "step": 4677 }, { "epoch": 0.8099556325073044, "grad_norm": 1.40625, "learning_rate": 1.6712062508839927e-05, "loss": 0.5623, "step": 4678 }, { "epoch": 0.810128773942214, "grad_norm": 1.3984375, "learning_rate": 1.6710710425999103e-05, "loss": 0.5955, "step": 4679 }, { "epoch": 0.8103019153771237, "grad_norm": 1.4609375, "learning_rate": 1.670935811992971e-05, "loss": 0.5737, "step": 4680 }, { "epoch": 0.8104750568120334, "grad_norm": 1.328125, "learning_rate": 1.670800559067673e-05, "loss": 0.6359, "step": 4681 }, { "epoch": 0.810648198246943, "grad_norm": 1.328125, "learning_rate": 1.6706652838285146e-05, "loss": 0.6073, "step": 4682 }, { "epoch": 0.8108213396818527, "grad_norm": 1.4296875, "learning_rate": 1.6705299862799972e-05, "loss": 0.5816, "step": 4683 }, { "epoch": 0.8109944811167622, "grad_norm": 1.4140625, "learning_rate": 1.67039466642662e-05, "loss": 0.5985, "step": 4684 }, { "epoch": 0.8111676225516719, "grad_norm": 1.359375, "learning_rate": 1.6702593242728847e-05, "loss": 0.5568, "step": 4685 }, { "epoch": 0.8113407639865815, "grad_norm": 1.234375, "learning_rate": 1.6701239598232942e-05, "loss": 0.5462, "step": 4686 }, { "epoch": 0.8115139054214912, "grad_norm": 1.40625, "learning_rate": 1.66998857308235e-05, "loss": 0.5931, "step": 4687 }, { "epoch": 0.8116870468564008, "grad_norm": 1.4296875, "learning_rate": 1.669853164054557e-05, "loss": 0.6721, "step": 4688 }, { "epoch": 0.8118601882913105, "grad_norm": 1.421875, "learning_rate": 1.6697177327444185e-05, "loss": 0.574, "step": 4689 }, { "epoch": 0.8120333297262201, "grad_norm": 1.4296875, "learning_rate": 1.66958227915644e-05, "loss": 0.5715, "step": 4690 }, { "epoch": 0.8122064711611298, "grad_norm": 1.2734375, "learning_rate": 1.6694468032951272e-05, "loss": 0.5625, "step": 4691 }, { "epoch": 0.8123796125960394, "grad_norm": 1.34375, "learning_rate": 1.669311305164987e-05, "loss": 0.6397, "step": 4692 }, { "epoch": 0.8125527540309491, "grad_norm": 1.453125, "learning_rate": 1.6691757847705262e-05, "loss": 0.5916, "step": 4693 }, { "epoch": 0.8127258954658587, "grad_norm": 1.296875, "learning_rate": 1.669040242116253e-05, "loss": 0.5612, "step": 4694 }, { "epoch": 0.8128990369007684, "grad_norm": 1.4921875, "learning_rate": 1.6689046772066762e-05, "loss": 0.6084, "step": 4695 }, { "epoch": 0.8130721783356779, "grad_norm": 1.28125, "learning_rate": 1.6687690900463053e-05, "loss": 0.606, "step": 4696 }, { "epoch": 0.8132453197705876, "grad_norm": 1.3984375, "learning_rate": 1.6686334806396504e-05, "loss": 0.6221, "step": 4697 }, { "epoch": 0.8134184612054972, "grad_norm": 1.3515625, "learning_rate": 1.6684978489912225e-05, "loss": 0.6014, "step": 4698 }, { "epoch": 0.8135916026404069, "grad_norm": 1.359375, "learning_rate": 1.6683621951055337e-05, "loss": 0.6134, "step": 4699 }, { "epoch": 0.8137647440753165, "grad_norm": 1.3515625, "learning_rate": 1.668226518987096e-05, "loss": 0.6662, "step": 4700 }, { "epoch": 0.8139378855102262, "grad_norm": 1.3046875, "learning_rate": 1.6680908206404226e-05, "loss": 0.5669, "step": 4701 }, { "epoch": 0.8141110269451358, "grad_norm": 1.3125, "learning_rate": 1.6679551000700277e-05, "loss": 0.5365, "step": 4702 }, { "epoch": 0.8142841683800455, "grad_norm": 1.3828125, "learning_rate": 1.667819357280426e-05, "loss": 0.6015, "step": 4703 }, { "epoch": 0.8144573098149551, "grad_norm": 1.25, "learning_rate": 1.667683592276133e-05, "loss": 0.5162, "step": 4704 }, { "epoch": 0.8146304512498648, "grad_norm": 1.4921875, "learning_rate": 1.6675478050616646e-05, "loss": 0.6367, "step": 4705 }, { "epoch": 0.8148035926847744, "grad_norm": 1.3828125, "learning_rate": 1.6674119956415372e-05, "loss": 0.5613, "step": 4706 }, { "epoch": 0.814976734119684, "grad_norm": 1.3515625, "learning_rate": 1.6672761640202695e-05, "loss": 0.6292, "step": 4707 }, { "epoch": 0.8151498755545936, "grad_norm": 1.4375, "learning_rate": 1.6671403102023793e-05, "loss": 0.6035, "step": 4708 }, { "epoch": 0.8153230169895033, "grad_norm": 1.515625, "learning_rate": 1.6670044341923858e-05, "loss": 0.562, "step": 4709 }, { "epoch": 0.8154961584244129, "grad_norm": 1.375, "learning_rate": 1.666868535994809e-05, "loss": 0.6013, "step": 4710 }, { "epoch": 0.8156692998593226, "grad_norm": 1.2890625, "learning_rate": 1.666732615614169e-05, "loss": 0.6369, "step": 4711 }, { "epoch": 0.8158424412942322, "grad_norm": 1.328125, "learning_rate": 1.6665966730549877e-05, "loss": 0.614, "step": 4712 }, { "epoch": 0.8160155827291419, "grad_norm": 1.2890625, "learning_rate": 1.6664607083217865e-05, "loss": 0.5206, "step": 4713 }, { "epoch": 0.8161887241640515, "grad_norm": 1.234375, "learning_rate": 1.666324721419089e-05, "loss": 0.5757, "step": 4714 }, { "epoch": 0.8163618655989612, "grad_norm": 1.25, "learning_rate": 1.6661887123514183e-05, "loss": 0.5938, "step": 4715 }, { "epoch": 0.8165350070338708, "grad_norm": 1.3203125, "learning_rate": 1.666052681123299e-05, "loss": 0.5875, "step": 4716 }, { "epoch": 0.8167081484687805, "grad_norm": 1.5, "learning_rate": 1.6659166277392555e-05, "loss": 0.6847, "step": 4717 }, { "epoch": 0.81688128990369, "grad_norm": 1.2109375, "learning_rate": 1.6657805522038137e-05, "loss": 0.49, "step": 4718 }, { "epoch": 0.8170544313385997, "grad_norm": 1.2890625, "learning_rate": 1.6656444545215005e-05, "loss": 0.5438, "step": 4719 }, { "epoch": 0.8172275727735093, "grad_norm": 1.484375, "learning_rate": 1.665508334696843e-05, "loss": 0.6271, "step": 4720 }, { "epoch": 0.817400714208419, "grad_norm": 1.328125, "learning_rate": 1.6653721927343692e-05, "loss": 0.5831, "step": 4721 }, { "epoch": 0.8175738556433286, "grad_norm": 1.2890625, "learning_rate": 1.6652360286386072e-05, "loss": 0.563, "step": 4722 }, { "epoch": 0.8177469970782383, "grad_norm": 1.390625, "learning_rate": 1.665099842414087e-05, "loss": 0.6529, "step": 4723 }, { "epoch": 0.8179201385131479, "grad_norm": 1.2578125, "learning_rate": 1.664963634065339e-05, "loss": 0.5544, "step": 4724 }, { "epoch": 0.8180932799480576, "grad_norm": 1.3515625, "learning_rate": 1.6648274035968936e-05, "loss": 0.6389, "step": 4725 }, { "epoch": 0.8182664213829672, "grad_norm": 1.2265625, "learning_rate": 1.6646911510132828e-05, "loss": 0.5316, "step": 4726 }, { "epoch": 0.8184395628178769, "grad_norm": 1.2265625, "learning_rate": 1.6645548763190387e-05, "loss": 0.5286, "step": 4727 }, { "epoch": 0.8186127042527865, "grad_norm": 1.34375, "learning_rate": 1.6644185795186946e-05, "loss": 0.6086, "step": 4728 }, { "epoch": 0.8187858456876962, "grad_norm": 1.265625, "learning_rate": 1.664282260616784e-05, "loss": 0.5081, "step": 4729 }, { "epoch": 0.8189589871226057, "grad_norm": 1.328125, "learning_rate": 1.6641459196178415e-05, "loss": 0.56, "step": 4730 }, { "epoch": 0.8191321285575154, "grad_norm": 1.296875, "learning_rate": 1.6640095565264032e-05, "loss": 0.6278, "step": 4731 }, { "epoch": 0.819305269992425, "grad_norm": 1.3671875, "learning_rate": 1.6638731713470043e-05, "loss": 0.5906, "step": 4732 }, { "epoch": 0.8194784114273347, "grad_norm": 1.3203125, "learning_rate": 1.6637367640841818e-05, "loss": 0.6386, "step": 4733 }, { "epoch": 0.8196515528622443, "grad_norm": 1.3671875, "learning_rate": 1.6636003347424735e-05, "loss": 0.5636, "step": 4734 }, { "epoch": 0.819824694297154, "grad_norm": 1.28125, "learning_rate": 1.6634638833264175e-05, "loss": 0.5994, "step": 4735 }, { "epoch": 0.8199978357320636, "grad_norm": 1.2890625, "learning_rate": 1.663327409840553e-05, "loss": 0.6007, "step": 4736 }, { "epoch": 0.8201709771669733, "grad_norm": 1.4140625, "learning_rate": 1.663190914289419e-05, "loss": 0.5637, "step": 4737 }, { "epoch": 0.8203441186018829, "grad_norm": 1.2890625, "learning_rate": 1.663054396677557e-05, "loss": 0.6194, "step": 4738 }, { "epoch": 0.8205172600367926, "grad_norm": 1.25, "learning_rate": 1.662917857009507e-05, "loss": 0.5675, "step": 4739 }, { "epoch": 0.8206904014717022, "grad_norm": 1.3203125, "learning_rate": 1.662781295289812e-05, "loss": 0.5956, "step": 4740 }, { "epoch": 0.8208635429066118, "grad_norm": 1.421875, "learning_rate": 1.662644711523014e-05, "loss": 0.7534, "step": 4741 }, { "epoch": 0.8210366843415214, "grad_norm": 1.3671875, "learning_rate": 1.6625081057136573e-05, "loss": 0.567, "step": 4742 }, { "epoch": 0.8212098257764311, "grad_norm": 1.4375, "learning_rate": 1.6623714778662846e-05, "loss": 0.6469, "step": 4743 }, { "epoch": 0.8213829672113407, "grad_norm": 1.390625, "learning_rate": 1.662234827985442e-05, "loss": 0.5499, "step": 4744 }, { "epoch": 0.8215561086462504, "grad_norm": 1.3125, "learning_rate": 1.662098156075674e-05, "loss": 0.6358, "step": 4745 }, { "epoch": 0.82172925008116, "grad_norm": 1.2265625, "learning_rate": 1.661961462141528e-05, "loss": 0.5917, "step": 4746 }, { "epoch": 0.8219023915160697, "grad_norm": 1.25, "learning_rate": 1.6618247461875504e-05, "loss": 0.5742, "step": 4747 }, { "epoch": 0.8220755329509793, "grad_norm": 1.2578125, "learning_rate": 1.6616880082182893e-05, "loss": 0.6987, "step": 4748 }, { "epoch": 0.822248674385889, "grad_norm": 1.421875, "learning_rate": 1.6615512482382932e-05, "loss": 0.5685, "step": 4749 }, { "epoch": 0.8224218158207987, "grad_norm": 1.3046875, "learning_rate": 1.661414466252111e-05, "loss": 0.5387, "step": 4750 }, { "epoch": 0.8225949572557083, "grad_norm": 1.28125, "learning_rate": 1.661277662264293e-05, "loss": 0.5839, "step": 4751 }, { "epoch": 0.8227680986906178, "grad_norm": 1.546875, "learning_rate": 1.6611408362793898e-05, "loss": 0.6669, "step": 4752 }, { "epoch": 0.8229412401255275, "grad_norm": 1.53125, "learning_rate": 1.6610039883019534e-05, "loss": 0.6586, "step": 4753 }, { "epoch": 0.8231143815604371, "grad_norm": 1.2890625, "learning_rate": 1.660867118336535e-05, "loss": 0.6048, "step": 4754 }, { "epoch": 0.8232875229953468, "grad_norm": 1.28125, "learning_rate": 1.6607302263876884e-05, "loss": 0.6327, "step": 4755 }, { "epoch": 0.8234606644302565, "grad_norm": 1.203125, "learning_rate": 1.660593312459966e-05, "loss": 0.5591, "step": 4756 }, { "epoch": 0.8236338058651661, "grad_norm": 1.3671875, "learning_rate": 1.660456376557924e-05, "loss": 0.5729, "step": 4757 }, { "epoch": 0.8238069473000758, "grad_norm": 1.21875, "learning_rate": 1.660319418686116e-05, "loss": 0.5295, "step": 4758 }, { "epoch": 0.8239800887349854, "grad_norm": 1.296875, "learning_rate": 1.6601824388490987e-05, "loss": 0.5317, "step": 4759 }, { "epoch": 0.8241532301698951, "grad_norm": 1.3515625, "learning_rate": 1.6600454370514284e-05, "loss": 0.5832, "step": 4760 }, { "epoch": 0.8243263716048047, "grad_norm": 1.3046875, "learning_rate": 1.659908413297662e-05, "loss": 0.6246, "step": 4761 }, { "epoch": 0.8244995130397144, "grad_norm": 1.3046875, "learning_rate": 1.659771367592358e-05, "loss": 0.5743, "step": 4762 }, { "epoch": 0.824672654474624, "grad_norm": 1.4453125, "learning_rate": 1.659634299940075e-05, "loss": 0.6152, "step": 4763 }, { "epoch": 0.8248457959095336, "grad_norm": 1.40625, "learning_rate": 1.6594972103453727e-05, "loss": 0.6067, "step": 4764 }, { "epoch": 0.8250189373444432, "grad_norm": 1.3515625, "learning_rate": 1.6593600988128107e-05, "loss": 0.5487, "step": 4765 }, { "epoch": 0.8251920787793529, "grad_norm": 1.2890625, "learning_rate": 1.6592229653469513e-05, "loss": 0.5533, "step": 4766 }, { "epoch": 0.8253652202142625, "grad_norm": 1.3515625, "learning_rate": 1.6590858099523545e-05, "loss": 0.5973, "step": 4767 }, { "epoch": 0.8255383616491722, "grad_norm": 1.2578125, "learning_rate": 1.6589486326335834e-05, "loss": 0.6221, "step": 4768 }, { "epoch": 0.8257115030840818, "grad_norm": 1.265625, "learning_rate": 1.6588114333952016e-05, "loss": 0.5301, "step": 4769 }, { "epoch": 0.8258846445189915, "grad_norm": 1.3125, "learning_rate": 1.6586742122417728e-05, "loss": 0.6459, "step": 4770 }, { "epoch": 0.8260577859539011, "grad_norm": 1.21875, "learning_rate": 1.658536969177861e-05, "loss": 0.504, "step": 4771 }, { "epoch": 0.8262309273888108, "grad_norm": 1.25, "learning_rate": 1.6583997042080317e-05, "loss": 0.5535, "step": 4772 }, { "epoch": 0.8264040688237204, "grad_norm": 1.265625, "learning_rate": 1.6582624173368517e-05, "loss": 0.5791, "step": 4773 }, { "epoch": 0.8265772102586301, "grad_norm": 1.2578125, "learning_rate": 1.658125108568887e-05, "loss": 0.5393, "step": 4774 }, { "epoch": 0.8267503516935396, "grad_norm": 1.359375, "learning_rate": 1.6579877779087055e-05, "loss": 0.5665, "step": 4775 }, { "epoch": 0.8269234931284493, "grad_norm": 1.3203125, "learning_rate": 1.657850425360875e-05, "loss": 0.6132, "step": 4776 }, { "epoch": 0.8270966345633589, "grad_norm": 1.328125, "learning_rate": 1.6577130509299647e-05, "loss": 0.5766, "step": 4777 }, { "epoch": 0.8272697759982686, "grad_norm": 1.3515625, "learning_rate": 1.6575756546205446e-05, "loss": 0.5914, "step": 4778 }, { "epoch": 0.8274429174331782, "grad_norm": 1.2890625, "learning_rate": 1.6574382364371852e-05, "loss": 0.5623, "step": 4779 }, { "epoch": 0.8276160588680879, "grad_norm": 1.21875, "learning_rate": 1.657300796384457e-05, "loss": 0.6547, "step": 4780 }, { "epoch": 0.8277892003029975, "grad_norm": 1.3515625, "learning_rate": 1.657163334466932e-05, "loss": 0.6548, "step": 4781 }, { "epoch": 0.8279623417379072, "grad_norm": 1.2734375, "learning_rate": 1.6570258506891832e-05, "loss": 0.622, "step": 4782 }, { "epoch": 0.8281354831728168, "grad_norm": 1.2890625, "learning_rate": 1.6568883450557834e-05, "loss": 0.5339, "step": 4783 }, { "epoch": 0.8283086246077265, "grad_norm": 1.171875, "learning_rate": 1.6567508175713077e-05, "loss": 0.5736, "step": 4784 }, { "epoch": 0.8284817660426361, "grad_norm": 1.40625, "learning_rate": 1.6566132682403296e-05, "loss": 0.5642, "step": 4785 }, { "epoch": 0.8286549074775457, "grad_norm": 2.640625, "learning_rate": 1.6564756970674256e-05, "loss": 0.6854, "step": 4786 }, { "epoch": 0.8288280489124553, "grad_norm": 1.390625, "learning_rate": 1.6563381040571712e-05, "loss": 0.5917, "step": 4787 }, { "epoch": 0.829001190347365, "grad_norm": 1.3203125, "learning_rate": 1.6562004892141442e-05, "loss": 0.5704, "step": 4788 }, { "epoch": 0.8291743317822746, "grad_norm": 1.296875, "learning_rate": 1.6560628525429213e-05, "loss": 0.5792, "step": 4789 }, { "epoch": 0.8293474732171843, "grad_norm": 1.2890625, "learning_rate": 1.6559251940480815e-05, "loss": 0.5195, "step": 4790 }, { "epoch": 0.8295206146520939, "grad_norm": 1.28125, "learning_rate": 1.655787513734204e-05, "loss": 0.6208, "step": 4791 }, { "epoch": 0.8296937560870036, "grad_norm": 1.3828125, "learning_rate": 1.6556498116058687e-05, "loss": 0.5477, "step": 4792 }, { "epoch": 0.8298668975219132, "grad_norm": 1.4140625, "learning_rate": 1.6555120876676557e-05, "loss": 0.5944, "step": 4793 }, { "epoch": 0.8300400389568229, "grad_norm": 1.21875, "learning_rate": 1.6553743419241466e-05, "loss": 0.5558, "step": 4794 }, { "epoch": 0.8302131803917325, "grad_norm": 1.2421875, "learning_rate": 1.6552365743799237e-05, "loss": 0.5707, "step": 4795 }, { "epoch": 0.8303863218266422, "grad_norm": 1.1640625, "learning_rate": 1.6550987850395698e-05, "loss": 0.5385, "step": 4796 }, { "epoch": 0.8305594632615518, "grad_norm": 1.296875, "learning_rate": 1.6549609739076677e-05, "loss": 0.6118, "step": 4797 }, { "epoch": 0.8307326046964614, "grad_norm": 1.6328125, "learning_rate": 1.6548231409888022e-05, "loss": 0.6184, "step": 4798 }, { "epoch": 0.830905746131371, "grad_norm": 1.3125, "learning_rate": 1.6546852862875578e-05, "loss": 0.5618, "step": 4799 }, { "epoch": 0.8310788875662807, "grad_norm": 1.4296875, "learning_rate": 1.654547409808521e-05, "loss": 0.6172, "step": 4800 }, { "epoch": 0.8312520290011903, "grad_norm": 1.28125, "learning_rate": 1.6544095115562778e-05, "loss": 0.5001, "step": 4801 }, { "epoch": 0.8314251704361, "grad_norm": 1.4921875, "learning_rate": 1.654271591535415e-05, "loss": 0.6561, "step": 4802 }, { "epoch": 0.8315983118710096, "grad_norm": 1.2109375, "learning_rate": 1.6541336497505207e-05, "loss": 0.5249, "step": 4803 }, { "epoch": 0.8317714533059193, "grad_norm": 1.2421875, "learning_rate": 1.6539956862061833e-05, "loss": 0.5496, "step": 4804 }, { "epoch": 0.8319445947408289, "grad_norm": 1.3359375, "learning_rate": 1.6538577009069922e-05, "loss": 0.5334, "step": 4805 }, { "epoch": 0.8321177361757386, "grad_norm": 1.3046875, "learning_rate": 1.6537196938575376e-05, "loss": 0.5531, "step": 4806 }, { "epoch": 0.8322908776106482, "grad_norm": 1.328125, "learning_rate": 1.65358166506241e-05, "loss": 0.6038, "step": 4807 }, { "epoch": 0.8324640190455579, "grad_norm": 1.2734375, "learning_rate": 1.653443614526201e-05, "loss": 0.5597, "step": 4808 }, { "epoch": 0.8326371604804674, "grad_norm": 1.2265625, "learning_rate": 1.6533055422535026e-05, "loss": 0.5111, "step": 4809 }, { "epoch": 0.8328103019153771, "grad_norm": 1.359375, "learning_rate": 1.6531674482489078e-05, "loss": 0.6618, "step": 4810 }, { "epoch": 0.8329834433502867, "grad_norm": 1.34375, "learning_rate": 1.65302933251701e-05, "loss": 0.5676, "step": 4811 }, { "epoch": 0.8331565847851964, "grad_norm": 1.4453125, "learning_rate": 1.6528911950624044e-05, "loss": 0.6364, "step": 4812 }, { "epoch": 0.833329726220106, "grad_norm": 1.390625, "learning_rate": 1.652753035889685e-05, "loss": 0.6045, "step": 4813 }, { "epoch": 0.8335028676550157, "grad_norm": 1.4609375, "learning_rate": 1.6526148550034484e-05, "loss": 0.6329, "step": 4814 }, { "epoch": 0.8336760090899253, "grad_norm": 1.4375, "learning_rate": 1.6524766524082908e-05, "loss": 0.564, "step": 4815 }, { "epoch": 0.833849150524835, "grad_norm": 1.3828125, "learning_rate": 1.652338428108809e-05, "loss": 0.5602, "step": 4816 }, { "epoch": 0.8340222919597446, "grad_norm": 1.34375, "learning_rate": 1.652200182109602e-05, "loss": 0.6034, "step": 4817 }, { "epoch": 0.8341954333946543, "grad_norm": 1.25, "learning_rate": 1.652061914415268e-05, "loss": 0.6169, "step": 4818 }, { "epoch": 0.834368574829564, "grad_norm": 1.4375, "learning_rate": 1.6519236250304058e-05, "loss": 0.5422, "step": 4819 }, { "epoch": 0.8345417162644735, "grad_norm": 1.4375, "learning_rate": 1.651785313959616e-05, "loss": 0.6809, "step": 4820 }, { "epoch": 0.8347148576993831, "grad_norm": 1.3984375, "learning_rate": 1.6516469812075e-05, "loss": 0.6567, "step": 4821 }, { "epoch": 0.8348879991342928, "grad_norm": 1.328125, "learning_rate": 1.6515086267786582e-05, "loss": 0.5425, "step": 4822 }, { "epoch": 0.8350611405692024, "grad_norm": 1.2734375, "learning_rate": 1.6513702506776942e-05, "loss": 0.5612, "step": 4823 }, { "epoch": 0.8352342820041121, "grad_norm": 1.2734375, "learning_rate": 1.65123185290921e-05, "loss": 0.6514, "step": 4824 }, { "epoch": 0.8354074234390217, "grad_norm": 1.28125, "learning_rate": 1.6510934334778098e-05, "loss": 0.5649, "step": 4825 }, { "epoch": 0.8355805648739314, "grad_norm": 1.265625, "learning_rate": 1.650954992388098e-05, "loss": 0.6059, "step": 4826 }, { "epoch": 0.835753706308841, "grad_norm": 1.4921875, "learning_rate": 1.6508165296446796e-05, "loss": 0.578, "step": 4827 }, { "epoch": 0.8359268477437507, "grad_norm": 1.515625, "learning_rate": 1.650678045252161e-05, "loss": 0.6211, "step": 4828 }, { "epoch": 0.8360999891786604, "grad_norm": 1.3515625, "learning_rate": 1.650539539215148e-05, "loss": 0.6014, "step": 4829 }, { "epoch": 0.83627313061357, "grad_norm": 1.3515625, "learning_rate": 1.6504010115382486e-05, "loss": 0.6055, "step": 4830 }, { "epoch": 0.8364462720484797, "grad_norm": 1.2578125, "learning_rate": 1.6502624622260706e-05, "loss": 0.5945, "step": 4831 }, { "epoch": 0.8366194134833892, "grad_norm": 1.328125, "learning_rate": 1.6501238912832226e-05, "loss": 0.6109, "step": 4832 }, { "epoch": 0.8367925549182988, "grad_norm": 1.15625, "learning_rate": 1.6499852987143147e-05, "loss": 0.5193, "step": 4833 }, { "epoch": 0.8369656963532085, "grad_norm": 1.28125, "learning_rate": 1.6498466845239567e-05, "loss": 0.6037, "step": 4834 }, { "epoch": 0.8371388377881182, "grad_norm": 1.25, "learning_rate": 1.6497080487167595e-05, "loss": 0.5621, "step": 4835 }, { "epoch": 0.8373119792230278, "grad_norm": 1.3125, "learning_rate": 1.649569391297335e-05, "loss": 0.6203, "step": 4836 }, { "epoch": 0.8374851206579375, "grad_norm": 1.3046875, "learning_rate": 1.649430712270295e-05, "loss": 0.6287, "step": 4837 }, { "epoch": 0.8376582620928471, "grad_norm": 1.3203125, "learning_rate": 1.6492920116402534e-05, "loss": 0.5844, "step": 4838 }, { "epoch": 0.8378314035277568, "grad_norm": 1.328125, "learning_rate": 1.6491532894118232e-05, "loss": 0.6181, "step": 4839 }, { "epoch": 0.8380045449626664, "grad_norm": 1.328125, "learning_rate": 1.6490145455896195e-05, "loss": 0.5949, "step": 4840 }, { "epoch": 0.8381776863975761, "grad_norm": 1.3125, "learning_rate": 1.6488757801782576e-05, "loss": 0.555, "step": 4841 }, { "epoch": 0.8383508278324857, "grad_norm": 1.2421875, "learning_rate": 1.648736993182353e-05, "loss": 0.5431, "step": 4842 }, { "epoch": 0.8385239692673953, "grad_norm": 1.359375, "learning_rate": 1.6485981846065225e-05, "loss": 0.6251, "step": 4843 }, { "epoch": 0.8386971107023049, "grad_norm": 1.390625, "learning_rate": 1.648459354455384e-05, "loss": 0.6443, "step": 4844 }, { "epoch": 0.8388702521372146, "grad_norm": 1.1953125, "learning_rate": 1.648320502733555e-05, "loss": 0.546, "step": 4845 }, { "epoch": 0.8390433935721242, "grad_norm": 1.3125, "learning_rate": 1.6481816294456546e-05, "loss": 0.5587, "step": 4846 }, { "epoch": 0.8392165350070339, "grad_norm": 1.328125, "learning_rate": 1.6480427345963024e-05, "loss": 0.6324, "step": 4847 }, { "epoch": 0.8393896764419435, "grad_norm": 1.4140625, "learning_rate": 1.6479038181901183e-05, "loss": 0.5774, "step": 4848 }, { "epoch": 0.8395628178768532, "grad_norm": 1.28125, "learning_rate": 1.6477648802317242e-05, "loss": 0.5744, "step": 4849 }, { "epoch": 0.8397359593117628, "grad_norm": 1.2890625, "learning_rate": 1.6476259207257407e-05, "loss": 0.6008, "step": 4850 }, { "epoch": 0.8399091007466725, "grad_norm": 1.4765625, "learning_rate": 1.647486939676791e-05, "loss": 0.7456, "step": 4851 }, { "epoch": 0.8400822421815821, "grad_norm": 1.2734375, "learning_rate": 1.6473479370894975e-05, "loss": 0.5312, "step": 4852 }, { "epoch": 0.8402553836164918, "grad_norm": 1.3828125, "learning_rate": 1.6472089129684848e-05, "loss": 0.5999, "step": 4853 }, { "epoch": 0.8404285250514013, "grad_norm": 1.4765625, "learning_rate": 1.6470698673183774e-05, "loss": 0.6561, "step": 4854 }, { "epoch": 0.840601666486311, "grad_norm": 1.40625, "learning_rate": 1.6469308001438e-05, "loss": 0.5945, "step": 4855 }, { "epoch": 0.8407748079212206, "grad_norm": 1.5546875, "learning_rate": 1.6467917114493794e-05, "loss": 0.6443, "step": 4856 }, { "epoch": 0.8409479493561303, "grad_norm": 1.3515625, "learning_rate": 1.6466526012397418e-05, "loss": 0.6332, "step": 4857 }, { "epoch": 0.8411210907910399, "grad_norm": 1.4609375, "learning_rate": 1.646513469519514e-05, "loss": 0.641, "step": 4858 }, { "epoch": 0.8412942322259496, "grad_norm": 1.4453125, "learning_rate": 1.6463743162933256e-05, "loss": 0.5614, "step": 4859 }, { "epoch": 0.8414673736608592, "grad_norm": 1.3046875, "learning_rate": 1.6462351415658047e-05, "loss": 0.5819, "step": 4860 }, { "epoch": 0.8416405150957689, "grad_norm": 1.40625, "learning_rate": 1.6460959453415807e-05, "loss": 0.5661, "step": 4861 }, { "epoch": 0.8418136565306785, "grad_norm": 1.3046875, "learning_rate": 1.6459567276252844e-05, "loss": 0.5904, "step": 4862 }, { "epoch": 0.8419867979655882, "grad_norm": 1.328125, "learning_rate": 1.6458174884215464e-05, "loss": 0.6238, "step": 4863 }, { "epoch": 0.8421599394004978, "grad_norm": 1.359375, "learning_rate": 1.6456782277349984e-05, "loss": 0.5952, "step": 4864 }, { "epoch": 0.8423330808354075, "grad_norm": 1.3359375, "learning_rate": 1.645538945570273e-05, "loss": 0.6376, "step": 4865 }, { "epoch": 0.842506222270317, "grad_norm": 1.3984375, "learning_rate": 1.6453996419320036e-05, "loss": 0.6413, "step": 4866 }, { "epoch": 0.8426793637052267, "grad_norm": 1.1953125, "learning_rate": 1.6452603168248235e-05, "loss": 0.5027, "step": 4867 }, { "epoch": 0.8428525051401363, "grad_norm": 1.328125, "learning_rate": 1.645120970253368e-05, "loss": 0.6203, "step": 4868 }, { "epoch": 0.843025646575046, "grad_norm": 1.28125, "learning_rate": 1.6449816022222712e-05, "loss": 0.5482, "step": 4869 }, { "epoch": 0.8431987880099556, "grad_norm": 1.359375, "learning_rate": 1.6448422127361707e-05, "loss": 0.5565, "step": 4870 }, { "epoch": 0.8433719294448653, "grad_norm": 1.484375, "learning_rate": 1.644702801799702e-05, "loss": 0.6587, "step": 4871 }, { "epoch": 0.8435450708797749, "grad_norm": 1.3984375, "learning_rate": 1.6445633694175034e-05, "loss": 0.5071, "step": 4872 }, { "epoch": 0.8437182123146846, "grad_norm": 1.4375, "learning_rate": 1.6444239155942124e-05, "loss": 0.5834, "step": 4873 }, { "epoch": 0.8438913537495942, "grad_norm": 1.296875, "learning_rate": 1.644284440334468e-05, "loss": 0.5727, "step": 4874 }, { "epoch": 0.8440644951845039, "grad_norm": 1.3671875, "learning_rate": 1.64414494364291e-05, "loss": 0.577, "step": 4875 }, { "epoch": 0.8442376366194135, "grad_norm": 1.3359375, "learning_rate": 1.6440054255241787e-05, "loss": 0.5472, "step": 4876 }, { "epoch": 0.8444107780543231, "grad_norm": 1.328125, "learning_rate": 1.6438658859829146e-05, "loss": 0.5186, "step": 4877 }, { "epoch": 0.8445839194892327, "grad_norm": 1.28125, "learning_rate": 1.64372632502376e-05, "loss": 0.6087, "step": 4878 }, { "epoch": 0.8447570609241424, "grad_norm": 1.25, "learning_rate": 1.643586742651357e-05, "loss": 0.4926, "step": 4879 }, { "epoch": 0.844930202359052, "grad_norm": 1.3984375, "learning_rate": 1.6434471388703488e-05, "loss": 0.6089, "step": 4880 }, { "epoch": 0.8451033437939617, "grad_norm": 1.1796875, "learning_rate": 1.6433075136853794e-05, "loss": 0.5652, "step": 4881 }, { "epoch": 0.8452764852288713, "grad_norm": 1.2109375, "learning_rate": 1.6431678671010933e-05, "loss": 0.5456, "step": 4882 }, { "epoch": 0.845449626663781, "grad_norm": 1.390625, "learning_rate": 1.6430281991221356e-05, "loss": 0.596, "step": 4883 }, { "epoch": 0.8456227680986906, "grad_norm": 1.359375, "learning_rate": 1.6428885097531524e-05, "loss": 0.5542, "step": 4884 }, { "epoch": 0.8457959095336003, "grad_norm": 1.3359375, "learning_rate": 1.6427487989987903e-05, "loss": 0.5464, "step": 4885 }, { "epoch": 0.8459690509685099, "grad_norm": 1.328125, "learning_rate": 1.642609066863697e-05, "loss": 0.6035, "step": 4886 }, { "epoch": 0.8461421924034196, "grad_norm": 1.3515625, "learning_rate": 1.6424693133525206e-05, "loss": 0.5981, "step": 4887 }, { "epoch": 0.8463153338383291, "grad_norm": 1.3046875, "learning_rate": 1.6423295384699097e-05, "loss": 0.5781, "step": 4888 }, { "epoch": 0.8464884752732388, "grad_norm": 1.3515625, "learning_rate": 1.6421897422205143e-05, "loss": 0.5302, "step": 4889 }, { "epoch": 0.8466616167081484, "grad_norm": 1.515625, "learning_rate": 1.6420499246089838e-05, "loss": 0.671, "step": 4890 }, { "epoch": 0.8468347581430581, "grad_norm": 1.4296875, "learning_rate": 1.6419100856399695e-05, "loss": 0.4947, "step": 4891 }, { "epoch": 0.8470078995779677, "grad_norm": 1.34375, "learning_rate": 1.6417702253181237e-05, "loss": 0.568, "step": 4892 }, { "epoch": 0.8471810410128774, "grad_norm": 1.3125, "learning_rate": 1.6416303436480983e-05, "loss": 0.6667, "step": 4893 }, { "epoch": 0.847354182447787, "grad_norm": 1.3125, "learning_rate": 1.6414904406345464e-05, "loss": 0.6378, "step": 4894 }, { "epoch": 0.8475273238826967, "grad_norm": 1.4296875, "learning_rate": 1.6413505162821218e-05, "loss": 0.5788, "step": 4895 }, { "epoch": 0.8477004653176063, "grad_norm": 1.296875, "learning_rate": 1.6412105705954787e-05, "loss": 0.5711, "step": 4896 }, { "epoch": 0.847873606752516, "grad_norm": 1.375, "learning_rate": 1.641070603579273e-05, "loss": 0.5663, "step": 4897 }, { "epoch": 0.8480467481874256, "grad_norm": 1.59375, "learning_rate": 1.6409306152381602e-05, "loss": 0.6245, "step": 4898 }, { "epoch": 0.8482198896223353, "grad_norm": 1.3125, "learning_rate": 1.6407906055767974e-05, "loss": 0.5306, "step": 4899 }, { "epoch": 0.8483930310572448, "grad_norm": 1.390625, "learning_rate": 1.640650574599841e-05, "loss": 0.6403, "step": 4900 }, { "epoch": 0.8485661724921545, "grad_norm": 1.2421875, "learning_rate": 1.6405105223119503e-05, "loss": 0.543, "step": 4901 }, { "epoch": 0.8487393139270641, "grad_norm": 1.4921875, "learning_rate": 1.6403704487177828e-05, "loss": 0.5475, "step": 4902 }, { "epoch": 0.8489124553619738, "grad_norm": 1.2890625, "learning_rate": 1.6402303538219987e-05, "loss": 0.5823, "step": 4903 }, { "epoch": 0.8490855967968834, "grad_norm": 1.34375, "learning_rate": 1.6400902376292587e-05, "loss": 0.6326, "step": 4904 }, { "epoch": 0.8492587382317931, "grad_norm": 1.3046875, "learning_rate": 1.6399501001442227e-05, "loss": 0.6021, "step": 4905 }, { "epoch": 0.8494318796667027, "grad_norm": 1.609375, "learning_rate": 1.639809941371553e-05, "loss": 0.5729, "step": 4906 }, { "epoch": 0.8496050211016124, "grad_norm": 1.1875, "learning_rate": 1.639669761315911e-05, "loss": 0.5592, "step": 4907 }, { "epoch": 0.849778162536522, "grad_norm": 1.2890625, "learning_rate": 1.639529559981961e-05, "loss": 0.5412, "step": 4908 }, { "epoch": 0.8499513039714317, "grad_norm": 1.3359375, "learning_rate": 1.639389337374366e-05, "loss": 0.6043, "step": 4909 }, { "epoch": 0.8501244454063414, "grad_norm": 1.21875, "learning_rate": 1.63924909349779e-05, "loss": 0.562, "step": 4910 }, { "epoch": 0.8502975868412509, "grad_norm": 1.3125, "learning_rate": 1.6391088283568992e-05, "loss": 0.5794, "step": 4911 }, { "epoch": 0.8504707282761605, "grad_norm": 1.359375, "learning_rate": 1.6389685419563587e-05, "loss": 0.6103, "step": 4912 }, { "epoch": 0.8506438697110702, "grad_norm": 1.359375, "learning_rate": 1.6388282343008355e-05, "loss": 0.6488, "step": 4913 }, { "epoch": 0.8508170111459799, "grad_norm": 1.375, "learning_rate": 1.6386879053949964e-05, "loss": 0.6146, "step": 4914 }, { "epoch": 0.8509901525808895, "grad_norm": 1.3046875, "learning_rate": 1.6385475552435097e-05, "loss": 0.5626, "step": 4915 }, { "epoch": 0.8511632940157992, "grad_norm": 1.359375, "learning_rate": 1.638407183851044e-05, "loss": 0.5856, "step": 4916 }, { "epoch": 0.8513364354507088, "grad_norm": 1.4375, "learning_rate": 1.6382667912222688e-05, "loss": 0.5681, "step": 4917 }, { "epoch": 0.8515095768856185, "grad_norm": 1.2890625, "learning_rate": 1.638126377361854e-05, "loss": 0.5313, "step": 4918 }, { "epoch": 0.8516827183205281, "grad_norm": 1.25, "learning_rate": 1.6379859422744705e-05, "loss": 0.5732, "step": 4919 }, { "epoch": 0.8518558597554378, "grad_norm": 1.2265625, "learning_rate": 1.63784548596479e-05, "loss": 0.5696, "step": 4920 }, { "epoch": 0.8520290011903474, "grad_norm": 1.3984375, "learning_rate": 1.637705008437484e-05, "loss": 0.6329, "step": 4921 }, { "epoch": 0.852202142625257, "grad_norm": 1.25, "learning_rate": 1.637564509697226e-05, "loss": 0.5945, "step": 4922 }, { "epoch": 0.8523752840601666, "grad_norm": 1.25, "learning_rate": 1.63742398974869e-05, "loss": 0.5858, "step": 4923 }, { "epoch": 0.8525484254950763, "grad_norm": 1.328125, "learning_rate": 1.6372834485965495e-05, "loss": 0.5576, "step": 4924 }, { "epoch": 0.8527215669299859, "grad_norm": 1.28125, "learning_rate": 1.6371428862454802e-05, "loss": 0.5652, "step": 4925 }, { "epoch": 0.8528947083648956, "grad_norm": 1.3671875, "learning_rate": 1.637002302700157e-05, "loss": 0.6479, "step": 4926 }, { "epoch": 0.8530678497998052, "grad_norm": 1.375, "learning_rate": 1.6368616979652576e-05, "loss": 0.5231, "step": 4927 }, { "epoch": 0.8532409912347149, "grad_norm": 1.2578125, "learning_rate": 1.636721072045458e-05, "loss": 0.5881, "step": 4928 }, { "epoch": 0.8534141326696245, "grad_norm": 1.46875, "learning_rate": 1.6365804249454363e-05, "loss": 0.5779, "step": 4929 }, { "epoch": 0.8535872741045342, "grad_norm": 1.375, "learning_rate": 1.6364397566698717e-05, "loss": 0.5501, "step": 4930 }, { "epoch": 0.8537604155394438, "grad_norm": 1.2578125, "learning_rate": 1.6362990672234426e-05, "loss": 0.5474, "step": 4931 }, { "epoch": 0.8539335569743535, "grad_norm": 1.34375, "learning_rate": 1.63615835661083e-05, "loss": 0.5671, "step": 4932 }, { "epoch": 0.8541066984092631, "grad_norm": 1.2734375, "learning_rate": 1.6360176248367134e-05, "loss": 0.5746, "step": 4933 }, { "epoch": 0.8542798398441727, "grad_norm": 1.34375, "learning_rate": 1.635876871905775e-05, "loss": 0.5356, "step": 4934 }, { "epoch": 0.8544529812790823, "grad_norm": 1.3359375, "learning_rate": 1.6357360978226964e-05, "loss": 0.5506, "step": 4935 }, { "epoch": 0.854626122713992, "grad_norm": 1.3046875, "learning_rate": 1.6355953025921606e-05, "loss": 0.5766, "step": 4936 }, { "epoch": 0.8547992641489016, "grad_norm": 1.3984375, "learning_rate": 1.6354544862188512e-05, "loss": 0.6114, "step": 4937 }, { "epoch": 0.8549724055838113, "grad_norm": 1.34375, "learning_rate": 1.6353136487074522e-05, "loss": 0.5618, "step": 4938 }, { "epoch": 0.8551455470187209, "grad_norm": 1.359375, "learning_rate": 1.6351727900626486e-05, "loss": 0.5699, "step": 4939 }, { "epoch": 0.8553186884536306, "grad_norm": 1.328125, "learning_rate": 1.635031910289126e-05, "loss": 0.5632, "step": 4940 }, { "epoch": 0.8554918298885402, "grad_norm": 1.3515625, "learning_rate": 1.6348910093915706e-05, "loss": 0.6749, "step": 4941 }, { "epoch": 0.8556649713234499, "grad_norm": 1.40625, "learning_rate": 1.6347500873746695e-05, "loss": 0.6096, "step": 4942 }, { "epoch": 0.8558381127583595, "grad_norm": 1.3515625, "learning_rate": 1.6346091442431104e-05, "loss": 0.6144, "step": 4943 }, { "epoch": 0.8560112541932692, "grad_norm": 1.2109375, "learning_rate": 1.6344681800015813e-05, "loss": 0.5457, "step": 4944 }, { "epoch": 0.8561843956281787, "grad_norm": 1.3125, "learning_rate": 1.6343271946547718e-05, "loss": 0.6648, "step": 4945 }, { "epoch": 0.8563575370630884, "grad_norm": 1.2421875, "learning_rate": 1.634186188207372e-05, "loss": 0.6138, "step": 4946 }, { "epoch": 0.856530678497998, "grad_norm": 1.3671875, "learning_rate": 1.6340451606640714e-05, "loss": 0.6748, "step": 4947 }, { "epoch": 0.8567038199329077, "grad_norm": 1.3125, "learning_rate": 1.6339041120295625e-05, "loss": 0.5773, "step": 4948 }, { "epoch": 0.8568769613678173, "grad_norm": 1.34375, "learning_rate": 1.633763042308536e-05, "loss": 0.6112, "step": 4949 }, { "epoch": 0.857050102802727, "grad_norm": 1.2578125, "learning_rate": 1.633621951505685e-05, "loss": 0.5278, "step": 4950 }, { "epoch": 0.8572232442376366, "grad_norm": 1.2734375, "learning_rate": 1.6334808396257032e-05, "loss": 0.5907, "step": 4951 }, { "epoch": 0.8573963856725463, "grad_norm": 1.40625, "learning_rate": 1.633339706673284e-05, "loss": 0.5598, "step": 4952 }, { "epoch": 0.8575695271074559, "grad_norm": 1.2890625, "learning_rate": 1.6331985526531226e-05, "loss": 0.5611, "step": 4953 }, { "epoch": 0.8577426685423656, "grad_norm": 1.46875, "learning_rate": 1.633057377569914e-05, "loss": 0.6194, "step": 4954 }, { "epoch": 0.8579158099772752, "grad_norm": 1.296875, "learning_rate": 1.6329161814283546e-05, "loss": 0.6364, "step": 4955 }, { "epoch": 0.8580889514121848, "grad_norm": 1.328125, "learning_rate": 1.6327749642331413e-05, "loss": 0.5643, "step": 4956 }, { "epoch": 0.8582620928470944, "grad_norm": 1.2734375, "learning_rate": 1.6326337259889716e-05, "loss": 0.5914, "step": 4957 }, { "epoch": 0.8584352342820041, "grad_norm": 1.265625, "learning_rate": 1.6324924667005435e-05, "loss": 0.579, "step": 4958 }, { "epoch": 0.8586083757169137, "grad_norm": 1.21875, "learning_rate": 1.6323511863725555e-05, "loss": 0.55, "step": 4959 }, { "epoch": 0.8587815171518234, "grad_norm": 1.28125, "learning_rate": 1.6322098850097084e-05, "loss": 0.6243, "step": 4960 }, { "epoch": 0.858954658586733, "grad_norm": 1.25, "learning_rate": 1.6320685626167016e-05, "loss": 0.5972, "step": 4961 }, { "epoch": 0.8591278000216427, "grad_norm": 1.3203125, "learning_rate": 1.6319272191982364e-05, "loss": 0.5768, "step": 4962 }, { "epoch": 0.8593009414565523, "grad_norm": 1.4296875, "learning_rate": 1.6317858547590145e-05, "loss": 0.6298, "step": 4963 }, { "epoch": 0.859474082891462, "grad_norm": 1.28125, "learning_rate": 1.6316444693037383e-05, "loss": 0.5708, "step": 4964 }, { "epoch": 0.8596472243263716, "grad_norm": 1.3125, "learning_rate": 1.631503062837111e-05, "loss": 0.6187, "step": 4965 }, { "epoch": 0.8598203657612813, "grad_norm": 1.2578125, "learning_rate": 1.6313616353638362e-05, "loss": 0.642, "step": 4966 }, { "epoch": 0.8599935071961909, "grad_norm": 1.3046875, "learning_rate": 1.631220186888619e-05, "loss": 0.5221, "step": 4967 }, { "epoch": 0.8601666486311005, "grad_norm": 1.2890625, "learning_rate": 1.631078717416164e-05, "loss": 0.6231, "step": 4968 }, { "epoch": 0.8603397900660101, "grad_norm": 1.3515625, "learning_rate": 1.6309372269511772e-05, "loss": 0.6178, "step": 4969 }, { "epoch": 0.8605129315009198, "grad_norm": 1.3125, "learning_rate": 1.6307957154983654e-05, "loss": 0.4933, "step": 4970 }, { "epoch": 0.8606860729358294, "grad_norm": 1.3515625, "learning_rate": 1.6306541830624363e-05, "loss": 0.6021, "step": 4971 }, { "epoch": 0.8608592143707391, "grad_norm": 1.3203125, "learning_rate": 1.6305126296480968e-05, "loss": 0.6478, "step": 4972 }, { "epoch": 0.8610323558056487, "grad_norm": 1.3046875, "learning_rate": 1.6303710552600563e-05, "loss": 0.5863, "step": 4973 }, { "epoch": 0.8612054972405584, "grad_norm": 1.265625, "learning_rate": 1.6302294599030247e-05, "loss": 0.5593, "step": 4974 }, { "epoch": 0.861378638675468, "grad_norm": 1.328125, "learning_rate": 1.6300878435817115e-05, "loss": 0.5717, "step": 4975 }, { "epoch": 0.8615517801103777, "grad_norm": 1.265625, "learning_rate": 1.6299462063008272e-05, "loss": 0.5961, "step": 4976 }, { "epoch": 0.8617249215452873, "grad_norm": 1.34375, "learning_rate": 1.6298045480650837e-05, "loss": 0.6368, "step": 4977 }, { "epoch": 0.861898062980197, "grad_norm": 1.3515625, "learning_rate": 1.629662868879193e-05, "loss": 0.552, "step": 4978 }, { "epoch": 0.8620712044151065, "grad_norm": 1.3515625, "learning_rate": 1.6295211687478685e-05, "loss": 0.5184, "step": 4979 }, { "epoch": 0.8622443458500162, "grad_norm": 1.4765625, "learning_rate": 1.6293794476758234e-05, "loss": 0.6239, "step": 4980 }, { "epoch": 0.8624174872849258, "grad_norm": 1.453125, "learning_rate": 1.6292377056677716e-05, "loss": 0.5732, "step": 4981 }, { "epoch": 0.8625906287198355, "grad_norm": 1.3515625, "learning_rate": 1.629095942728429e-05, "loss": 0.5764, "step": 4982 }, { "epoch": 0.8627637701547451, "grad_norm": 1.3203125, "learning_rate": 1.6289541588625105e-05, "loss": 0.5514, "step": 4983 }, { "epoch": 0.8629369115896548, "grad_norm": 1.46875, "learning_rate": 1.6288123540747328e-05, "loss": 0.5794, "step": 4984 }, { "epoch": 0.8631100530245644, "grad_norm": 1.296875, "learning_rate": 1.628670528369813e-05, "loss": 0.5299, "step": 4985 }, { "epoch": 0.8632831944594741, "grad_norm": 1.2890625, "learning_rate": 1.6285286817524687e-05, "loss": 0.5803, "step": 4986 }, { "epoch": 0.8634563358943838, "grad_norm": 1.359375, "learning_rate": 1.6283868142274187e-05, "loss": 0.6264, "step": 4987 }, { "epoch": 0.8636294773292934, "grad_norm": 1.3359375, "learning_rate": 1.6282449257993814e-05, "loss": 0.5818, "step": 4988 }, { "epoch": 0.8638026187642031, "grad_norm": 1.296875, "learning_rate": 1.6281030164730775e-05, "loss": 0.6389, "step": 4989 }, { "epoch": 0.8639757601991126, "grad_norm": 1.3125, "learning_rate": 1.627961086253227e-05, "loss": 0.5606, "step": 4990 }, { "epoch": 0.8641489016340222, "grad_norm": 1.3125, "learning_rate": 1.6278191351445515e-05, "loss": 0.5644, "step": 4991 }, { "epoch": 0.8643220430689319, "grad_norm": 1.28125, "learning_rate": 1.6276771631517727e-05, "loss": 0.6357, "step": 4992 }, { "epoch": 0.8644951845038416, "grad_norm": 1.328125, "learning_rate": 1.6275351702796134e-05, "loss": 0.6879, "step": 4993 }, { "epoch": 0.8646683259387512, "grad_norm": 1.3828125, "learning_rate": 1.6273931565327965e-05, "loss": 0.6347, "step": 4994 }, { "epoch": 0.8648414673736609, "grad_norm": 1.203125, "learning_rate": 1.6272511219160465e-05, "loss": 0.5138, "step": 4995 }, { "epoch": 0.8650146088085705, "grad_norm": 1.3984375, "learning_rate": 1.627109066434088e-05, "loss": 0.6519, "step": 4996 }, { "epoch": 0.8651877502434802, "grad_norm": 1.4296875, "learning_rate": 1.6269669900916463e-05, "loss": 0.5986, "step": 4997 }, { "epoch": 0.8653608916783898, "grad_norm": 1.3203125, "learning_rate": 1.6268248928934476e-05, "loss": 0.5904, "step": 4998 }, { "epoch": 0.8655340331132995, "grad_norm": 1.5703125, "learning_rate": 1.626682774844219e-05, "loss": 0.5822, "step": 4999 }, { "epoch": 0.8657071745482091, "grad_norm": 1.2890625, "learning_rate": 1.626540635948687e-05, "loss": 0.7088, "step": 5000 }, { "epoch": 0.8658803159831188, "grad_norm": 1.2890625, "learning_rate": 1.626398476211581e-05, "loss": 0.5698, "step": 5001 }, { "epoch": 0.8660534574180283, "grad_norm": 1.40625, "learning_rate": 1.626256295637629e-05, "loss": 0.6068, "step": 5002 }, { "epoch": 0.866226598852938, "grad_norm": 1.2890625, "learning_rate": 1.626114094231561e-05, "loss": 0.5582, "step": 5003 }, { "epoch": 0.8663997402878476, "grad_norm": 1.359375, "learning_rate": 1.625971871998107e-05, "loss": 0.6163, "step": 5004 }, { "epoch": 0.8665728817227573, "grad_norm": 1.34375, "learning_rate": 1.6258296289419983e-05, "loss": 0.6312, "step": 5005 }, { "epoch": 0.8667460231576669, "grad_norm": 1.375, "learning_rate": 1.6256873650679658e-05, "loss": 0.585, "step": 5006 }, { "epoch": 0.8669191645925766, "grad_norm": 1.34375, "learning_rate": 1.625545080380743e-05, "loss": 0.6033, "step": 5007 }, { "epoch": 0.8670923060274862, "grad_norm": 1.265625, "learning_rate": 1.6254027748850622e-05, "loss": 0.5532, "step": 5008 }, { "epoch": 0.8672654474623959, "grad_norm": 1.3046875, "learning_rate": 1.6252604485856572e-05, "loss": 0.5062, "step": 5009 }, { "epoch": 0.8674385888973055, "grad_norm": 1.421875, "learning_rate": 1.6251181014872624e-05, "loss": 0.6674, "step": 5010 }, { "epoch": 0.8676117303322152, "grad_norm": 1.4453125, "learning_rate": 1.624975733594613e-05, "loss": 0.5807, "step": 5011 }, { "epoch": 0.8677848717671248, "grad_norm": 1.2578125, "learning_rate": 1.6248333449124443e-05, "loss": 0.5587, "step": 5012 }, { "epoch": 0.8679580132020344, "grad_norm": 1.2734375, "learning_rate": 1.624690935445494e-05, "loss": 0.5964, "step": 5013 }, { "epoch": 0.868131154636944, "grad_norm": 1.2734375, "learning_rate": 1.624548505198498e-05, "loss": 0.6037, "step": 5014 }, { "epoch": 0.8683042960718537, "grad_norm": 1.2109375, "learning_rate": 1.6244060541761948e-05, "loss": 0.5152, "step": 5015 }, { "epoch": 0.8684774375067633, "grad_norm": 1.46875, "learning_rate": 1.624263582383323e-05, "loss": 0.6663, "step": 5016 }, { "epoch": 0.868650578941673, "grad_norm": 1.265625, "learning_rate": 1.6241210898246216e-05, "loss": 0.6806, "step": 5017 }, { "epoch": 0.8688237203765826, "grad_norm": 1.4296875, "learning_rate": 1.623978576504831e-05, "loss": 0.6507, "step": 5018 }, { "epoch": 0.8689968618114923, "grad_norm": 1.2734375, "learning_rate": 1.6238360424286912e-05, "loss": 0.567, "step": 5019 }, { "epoch": 0.8691700032464019, "grad_norm": 1.34375, "learning_rate": 1.623693487600944e-05, "loss": 0.5743, "step": 5020 }, { "epoch": 0.8693431446813116, "grad_norm": 1.375, "learning_rate": 1.623550912026331e-05, "loss": 0.5532, "step": 5021 }, { "epoch": 0.8695162861162212, "grad_norm": 1.28125, "learning_rate": 1.6234083157095953e-05, "loss": 0.5741, "step": 5022 }, { "epoch": 0.8696894275511309, "grad_norm": 1.234375, "learning_rate": 1.6232656986554802e-05, "loss": 0.5739, "step": 5023 }, { "epoch": 0.8698625689860404, "grad_norm": 1.21875, "learning_rate": 1.62312306086873e-05, "loss": 0.5134, "step": 5024 }, { "epoch": 0.8700357104209501, "grad_norm": 1.390625, "learning_rate": 1.6229804023540888e-05, "loss": 0.6245, "step": 5025 }, { "epoch": 0.8702088518558597, "grad_norm": 1.3515625, "learning_rate": 1.6228377231163025e-05, "loss": 0.5861, "step": 5026 }, { "epoch": 0.8703819932907694, "grad_norm": 1.2734375, "learning_rate": 1.622695023160117e-05, "loss": 0.5344, "step": 5027 }, { "epoch": 0.870555134725679, "grad_norm": 1.375, "learning_rate": 1.62255230249028e-05, "loss": 0.554, "step": 5028 }, { "epoch": 0.8707282761605887, "grad_norm": 1.2578125, "learning_rate": 1.6224095611115385e-05, "loss": 0.6035, "step": 5029 }, { "epoch": 0.8709014175954983, "grad_norm": 1.4296875, "learning_rate": 1.6222667990286405e-05, "loss": 0.5709, "step": 5030 }, { "epoch": 0.871074559030408, "grad_norm": 1.4921875, "learning_rate": 1.622124016246335e-05, "loss": 0.6442, "step": 5031 }, { "epoch": 0.8712477004653176, "grad_norm": 1.4140625, "learning_rate": 1.6219812127693716e-05, "loss": 0.6535, "step": 5032 }, { "epoch": 0.8714208419002273, "grad_norm": 1.296875, "learning_rate": 1.621838388602501e-05, "loss": 0.5499, "step": 5033 }, { "epoch": 0.8715939833351369, "grad_norm": 1.2578125, "learning_rate": 1.6216955437504732e-05, "loss": 0.4973, "step": 5034 }, { "epoch": 0.8717671247700466, "grad_norm": 1.3984375, "learning_rate": 1.6215526782180407e-05, "loss": 0.6066, "step": 5035 }, { "epoch": 0.8719402662049561, "grad_norm": 1.296875, "learning_rate": 1.6214097920099564e-05, "loss": 0.4955, "step": 5036 }, { "epoch": 0.8721134076398658, "grad_norm": 1.234375, "learning_rate": 1.6212668851309715e-05, "loss": 0.5548, "step": 5037 }, { "epoch": 0.8722865490747754, "grad_norm": 1.34375, "learning_rate": 1.6211239575858414e-05, "loss": 0.5661, "step": 5038 }, { "epoch": 0.8724596905096851, "grad_norm": 1.3046875, "learning_rate": 1.62098100937932e-05, "loss": 0.587, "step": 5039 }, { "epoch": 0.8726328319445947, "grad_norm": 1.2578125, "learning_rate": 1.6208380405161623e-05, "loss": 0.6016, "step": 5040 }, { "epoch": 0.8728059733795044, "grad_norm": 1.375, "learning_rate": 1.6206950510011242e-05, "loss": 0.5495, "step": 5041 }, { "epoch": 0.872979114814414, "grad_norm": 1.2890625, "learning_rate": 1.6205520408389618e-05, "loss": 0.5083, "step": 5042 }, { "epoch": 0.8731522562493237, "grad_norm": 1.3671875, "learning_rate": 1.620409010034433e-05, "loss": 0.568, "step": 5043 }, { "epoch": 0.8733253976842333, "grad_norm": 1.4453125, "learning_rate": 1.620265958592295e-05, "loss": 0.6472, "step": 5044 }, { "epoch": 0.873498539119143, "grad_norm": 1.375, "learning_rate": 1.6201228865173066e-05, "loss": 0.5704, "step": 5045 }, { "epoch": 0.8736716805540526, "grad_norm": 1.34375, "learning_rate": 1.619979793814227e-05, "loss": 0.544, "step": 5046 }, { "epoch": 0.8738448219889622, "grad_norm": 1.3203125, "learning_rate": 1.619836680487816e-05, "loss": 0.5597, "step": 5047 }, { "epoch": 0.8740179634238718, "grad_norm": 1.3359375, "learning_rate": 1.6196935465428345e-05, "loss": 0.5574, "step": 5048 }, { "epoch": 0.8741911048587815, "grad_norm": 1.296875, "learning_rate": 1.6195503919840433e-05, "loss": 0.5474, "step": 5049 }, { "epoch": 0.8743642462936911, "grad_norm": 1.28125, "learning_rate": 1.619407216816205e-05, "loss": 0.5539, "step": 5050 }, { "epoch": 0.8745373877286008, "grad_norm": 1.25, "learning_rate": 1.6192640210440817e-05, "loss": 0.6323, "step": 5051 }, { "epoch": 0.8747105291635104, "grad_norm": 1.2578125, "learning_rate": 1.619120804672437e-05, "loss": 0.5737, "step": 5052 }, { "epoch": 0.8748836705984201, "grad_norm": 1.34375, "learning_rate": 1.6189775677060347e-05, "loss": 0.6472, "step": 5053 }, { "epoch": 0.8750568120333297, "grad_norm": 1.515625, "learning_rate": 1.61883431014964e-05, "loss": 0.5801, "step": 5054 }, { "epoch": 0.8752299534682394, "grad_norm": 1.2109375, "learning_rate": 1.6186910320080174e-05, "loss": 0.6008, "step": 5055 }, { "epoch": 0.875403094903149, "grad_norm": 1.2734375, "learning_rate": 1.6185477332859344e-05, "loss": 0.526, "step": 5056 }, { "epoch": 0.8755762363380587, "grad_norm": 1.2265625, "learning_rate": 1.6184044139881562e-05, "loss": 0.6402, "step": 5057 }, { "epoch": 0.8757493777729682, "grad_norm": 1.2890625, "learning_rate": 1.6182610741194514e-05, "loss": 0.6216, "step": 5058 }, { "epoch": 0.8759225192078779, "grad_norm": 1.3046875, "learning_rate": 1.6181177136845874e-05, "loss": 0.61, "step": 5059 }, { "epoch": 0.8760956606427875, "grad_norm": 1.3125, "learning_rate": 1.6179743326883334e-05, "loss": 0.5665, "step": 5060 }, { "epoch": 0.8762688020776972, "grad_norm": 1.421875, "learning_rate": 1.617830931135459e-05, "loss": 0.6389, "step": 5061 }, { "epoch": 0.8764419435126068, "grad_norm": 1.28125, "learning_rate": 1.617687509030734e-05, "loss": 0.6599, "step": 5062 }, { "epoch": 0.8766150849475165, "grad_norm": 1.1953125, "learning_rate": 1.6175440663789296e-05, "loss": 0.5884, "step": 5063 }, { "epoch": 0.8767882263824262, "grad_norm": 1.3828125, "learning_rate": 1.617400603184817e-05, "loss": 0.606, "step": 5064 }, { "epoch": 0.8769613678173358, "grad_norm": 1.328125, "learning_rate": 1.6172571194531686e-05, "loss": 0.5433, "step": 5065 }, { "epoch": 0.8771345092522455, "grad_norm": 1.21875, "learning_rate": 1.6171136151887577e-05, "loss": 0.543, "step": 5066 }, { "epoch": 0.8773076506871551, "grad_norm": 1.3125, "learning_rate": 1.6169700903963575e-05, "loss": 0.6327, "step": 5067 }, { "epoch": 0.8774807921220648, "grad_norm": 1.2265625, "learning_rate": 1.616826545080742e-05, "loss": 0.5348, "step": 5068 }, { "epoch": 0.8776539335569744, "grad_norm": 1.3984375, "learning_rate": 1.6166829792466866e-05, "loss": 0.5939, "step": 5069 }, { "epoch": 0.877827074991884, "grad_norm": 1.3046875, "learning_rate": 1.6165393928989674e-05, "loss": 0.5438, "step": 5070 }, { "epoch": 0.8780002164267936, "grad_norm": 1.390625, "learning_rate": 1.6163957860423595e-05, "loss": 0.5768, "step": 5071 }, { "epoch": 0.8781733578617033, "grad_norm": 1.2109375, "learning_rate": 1.616252158681641e-05, "loss": 0.6283, "step": 5072 }, { "epoch": 0.8783464992966129, "grad_norm": 1.3046875, "learning_rate": 1.616108510821589e-05, "loss": 0.5586, "step": 5073 }, { "epoch": 0.8785196407315226, "grad_norm": 1.3046875, "learning_rate": 1.6159648424669826e-05, "loss": 0.545, "step": 5074 }, { "epoch": 0.8786927821664322, "grad_norm": 1.3671875, "learning_rate": 1.6158211536225995e-05, "loss": 0.6117, "step": 5075 }, { "epoch": 0.8788659236013419, "grad_norm": 1.359375, "learning_rate": 1.615677444293221e-05, "loss": 0.5839, "step": 5076 }, { "epoch": 0.8790390650362515, "grad_norm": 1.3984375, "learning_rate": 1.6155337144836267e-05, "loss": 0.582, "step": 5077 }, { "epoch": 0.8792122064711612, "grad_norm": 1.359375, "learning_rate": 1.6153899641985975e-05, "loss": 0.6214, "step": 5078 }, { "epoch": 0.8793853479060708, "grad_norm": 1.2734375, "learning_rate": 1.6152461934429154e-05, "loss": 0.5728, "step": 5079 }, { "epoch": 0.8795584893409805, "grad_norm": 1.4375, "learning_rate": 1.6151024022213633e-05, "loss": 0.5404, "step": 5080 }, { "epoch": 0.87973163077589, "grad_norm": 1.3828125, "learning_rate": 1.614958590538724e-05, "loss": 0.5712, "step": 5081 }, { "epoch": 0.8799047722107997, "grad_norm": 1.5234375, "learning_rate": 1.6148147583997813e-05, "loss": 0.6868, "step": 5082 }, { "epoch": 0.8800779136457093, "grad_norm": 1.3828125, "learning_rate": 1.6146709058093196e-05, "loss": 0.5757, "step": 5083 }, { "epoch": 0.880251055080619, "grad_norm": 1.46875, "learning_rate": 1.6145270327721246e-05, "loss": 0.6005, "step": 5084 }, { "epoch": 0.8804241965155286, "grad_norm": 1.1484375, "learning_rate": 1.6143831392929813e-05, "loss": 0.484, "step": 5085 }, { "epoch": 0.8805973379504383, "grad_norm": 1.4296875, "learning_rate": 1.614239225376677e-05, "loss": 0.6084, "step": 5086 }, { "epoch": 0.8807704793853479, "grad_norm": 1.359375, "learning_rate": 1.6140952910279985e-05, "loss": 0.5698, "step": 5087 }, { "epoch": 0.8809436208202576, "grad_norm": 1.3359375, "learning_rate": 1.6139513362517337e-05, "loss": 0.6207, "step": 5088 }, { "epoch": 0.8811167622551672, "grad_norm": 1.2265625, "learning_rate": 1.613807361052672e-05, "loss": 0.564, "step": 5089 }, { "epoch": 0.8812899036900769, "grad_norm": 1.3828125, "learning_rate": 1.613663365435602e-05, "loss": 0.61, "step": 5090 }, { "epoch": 0.8814630451249865, "grad_norm": 1.3046875, "learning_rate": 1.613519349405313e-05, "loss": 0.6503, "step": 5091 }, { "epoch": 0.8816361865598961, "grad_norm": 1.25, "learning_rate": 1.6133753129665968e-05, "loss": 0.5495, "step": 5092 }, { "epoch": 0.8818093279948057, "grad_norm": 1.3046875, "learning_rate": 1.613231256124244e-05, "loss": 0.5464, "step": 5093 }, { "epoch": 0.8819824694297154, "grad_norm": 1.265625, "learning_rate": 1.6130871788830466e-05, "loss": 0.5331, "step": 5094 }, { "epoch": 0.882155610864625, "grad_norm": 1.2578125, "learning_rate": 1.612943081247798e-05, "loss": 0.6245, "step": 5095 }, { "epoch": 0.8823287522995347, "grad_norm": 1.421875, "learning_rate": 1.6127989632232905e-05, "loss": 0.6231, "step": 5096 }, { "epoch": 0.8825018937344443, "grad_norm": 1.2109375, "learning_rate": 1.6126548248143188e-05, "loss": 0.5595, "step": 5097 }, { "epoch": 0.882675035169354, "grad_norm": 1.53125, "learning_rate": 1.6125106660256774e-05, "loss": 0.6128, "step": 5098 }, { "epoch": 0.8828481766042636, "grad_norm": 1.46875, "learning_rate": 1.6123664868621616e-05, "loss": 0.6336, "step": 5099 }, { "epoch": 0.8830213180391733, "grad_norm": 1.203125, "learning_rate": 1.6122222873285673e-05, "loss": 0.6007, "step": 5100 }, { "epoch": 0.8831944594740829, "grad_norm": 1.28125, "learning_rate": 1.6120780674296917e-05, "loss": 0.5113, "step": 5101 }, { "epoch": 0.8833676009089926, "grad_norm": 1.2734375, "learning_rate": 1.6119338271703318e-05, "loss": 0.6177, "step": 5102 }, { "epoch": 0.8835407423439022, "grad_norm": 1.2734375, "learning_rate": 1.6117895665552862e-05, "loss": 0.6005, "step": 5103 }, { "epoch": 0.8837138837788118, "grad_norm": 1.328125, "learning_rate": 1.611645285589353e-05, "loss": 0.683, "step": 5104 }, { "epoch": 0.8838870252137214, "grad_norm": 1.1796875, "learning_rate": 1.6115009842773322e-05, "loss": 0.4308, "step": 5105 }, { "epoch": 0.8840601666486311, "grad_norm": 1.3984375, "learning_rate": 1.6113566626240235e-05, "loss": 0.6559, "step": 5106 }, { "epoch": 0.8842333080835407, "grad_norm": 1.3828125, "learning_rate": 1.6112123206342275e-05, "loss": 0.5964, "step": 5107 }, { "epoch": 0.8844064495184504, "grad_norm": 1.3515625, "learning_rate": 1.6110679583127463e-05, "loss": 0.572, "step": 5108 }, { "epoch": 0.88457959095336, "grad_norm": 1.28125, "learning_rate": 1.610923575664382e-05, "loss": 0.5624, "step": 5109 }, { "epoch": 0.8847527323882697, "grad_norm": 1.296875, "learning_rate": 1.610779172693937e-05, "loss": 0.5699, "step": 5110 }, { "epoch": 0.8849258738231793, "grad_norm": 1.265625, "learning_rate": 1.6106347494062145e-05, "loss": 0.5957, "step": 5111 }, { "epoch": 0.885099015258089, "grad_norm": 1.296875, "learning_rate": 1.6104903058060196e-05, "loss": 0.5751, "step": 5112 }, { "epoch": 0.8852721566929986, "grad_norm": 1.3671875, "learning_rate": 1.610345841898157e-05, "loss": 0.5377, "step": 5113 }, { "epoch": 0.8854452981279083, "grad_norm": 1.21875, "learning_rate": 1.6102013576874314e-05, "loss": 0.5423, "step": 5114 }, { "epoch": 0.8856184395628178, "grad_norm": 1.3203125, "learning_rate": 1.6100568531786497e-05, "loss": 0.6598, "step": 5115 }, { "epoch": 0.8857915809977275, "grad_norm": 1.5390625, "learning_rate": 1.6099123283766184e-05, "loss": 0.5782, "step": 5116 }, { "epoch": 0.8859647224326371, "grad_norm": 1.328125, "learning_rate": 1.6097677832861453e-05, "loss": 0.5673, "step": 5117 }, { "epoch": 0.8861378638675468, "grad_norm": 1.390625, "learning_rate": 1.6096232179120388e-05, "loss": 0.5335, "step": 5118 }, { "epoch": 0.8863110053024564, "grad_norm": 1.4609375, "learning_rate": 1.609478632259107e-05, "loss": 0.6004, "step": 5119 }, { "epoch": 0.8864841467373661, "grad_norm": 1.3125, "learning_rate": 1.6093340263321605e-05, "loss": 0.5917, "step": 5120 }, { "epoch": 0.8866572881722757, "grad_norm": 1.2734375, "learning_rate": 1.6091894001360092e-05, "loss": 0.5443, "step": 5121 }, { "epoch": 0.8868304296071854, "grad_norm": 1.296875, "learning_rate": 1.6090447536754634e-05, "loss": 0.5957, "step": 5122 }, { "epoch": 0.887003571042095, "grad_norm": 1.5390625, "learning_rate": 1.6089000869553356e-05, "loss": 0.5908, "step": 5123 }, { "epoch": 0.8871767124770047, "grad_norm": 1.25, "learning_rate": 1.6087553999804372e-05, "loss": 0.5437, "step": 5124 }, { "epoch": 0.8873498539119143, "grad_norm": 1.3984375, "learning_rate": 1.608610692755582e-05, "loss": 0.611, "step": 5125 }, { "epoch": 0.8875229953468239, "grad_norm": 1.3203125, "learning_rate": 1.608465965285583e-05, "loss": 0.545, "step": 5126 }, { "epoch": 0.8876961367817335, "grad_norm": 1.3203125, "learning_rate": 1.608321217575255e-05, "loss": 0.5707, "step": 5127 }, { "epoch": 0.8878692782166432, "grad_norm": 1.28125, "learning_rate": 1.6081764496294124e-05, "loss": 0.6196, "step": 5128 }, { "epoch": 0.8880424196515528, "grad_norm": 1.328125, "learning_rate": 1.608031661452871e-05, "loss": 0.6746, "step": 5129 }, { "epoch": 0.8882155610864625, "grad_norm": 1.3203125, "learning_rate": 1.6078868530504475e-05, "loss": 0.5939, "step": 5130 }, { "epoch": 0.8883887025213721, "grad_norm": 1.2890625, "learning_rate": 1.6077420244269585e-05, "loss": 0.5684, "step": 5131 }, { "epoch": 0.8885618439562818, "grad_norm": 1.3046875, "learning_rate": 1.6075971755872216e-05, "loss": 0.5763, "step": 5132 }, { "epoch": 0.8887349853911914, "grad_norm": 1.3359375, "learning_rate": 1.6074523065360555e-05, "loss": 0.5788, "step": 5133 }, { "epoch": 0.8889081268261011, "grad_norm": 1.3515625, "learning_rate": 1.6073074172782787e-05, "loss": 0.5827, "step": 5134 }, { "epoch": 0.8890812682610107, "grad_norm": 1.390625, "learning_rate": 1.6071625078187113e-05, "loss": 0.6393, "step": 5135 }, { "epoch": 0.8892544096959204, "grad_norm": 1.2578125, "learning_rate": 1.6070175781621738e-05, "loss": 0.5845, "step": 5136 }, { "epoch": 0.88942755113083, "grad_norm": 1.2421875, "learning_rate": 1.6068726283134863e-05, "loss": 0.5578, "step": 5137 }, { "epoch": 0.8896006925657396, "grad_norm": 1.390625, "learning_rate": 1.6067276582774716e-05, "loss": 0.6663, "step": 5138 }, { "epoch": 0.8897738340006492, "grad_norm": 1.46875, "learning_rate": 1.6065826680589515e-05, "loss": 0.6197, "step": 5139 }, { "epoch": 0.8899469754355589, "grad_norm": 1.4453125, "learning_rate": 1.6064376576627487e-05, "loss": 0.6565, "step": 5140 }, { "epoch": 0.8901201168704685, "grad_norm": 1.328125, "learning_rate": 1.6062926270936878e-05, "loss": 0.5295, "step": 5141 }, { "epoch": 0.8902932583053782, "grad_norm": 1.265625, "learning_rate": 1.6061475763565924e-05, "loss": 0.5434, "step": 5142 }, { "epoch": 0.8904663997402879, "grad_norm": 1.3359375, "learning_rate": 1.6060025054562875e-05, "loss": 0.5813, "step": 5143 }, { "epoch": 0.8906395411751975, "grad_norm": 1.4453125, "learning_rate": 1.6058574143975995e-05, "loss": 0.6354, "step": 5144 }, { "epoch": 0.8908126826101072, "grad_norm": 1.4140625, "learning_rate": 1.6057123031853543e-05, "loss": 0.5582, "step": 5145 }, { "epoch": 0.8909858240450168, "grad_norm": 1.2265625, "learning_rate": 1.605567171824379e-05, "loss": 0.5744, "step": 5146 }, { "epoch": 0.8911589654799265, "grad_norm": 1.390625, "learning_rate": 1.6054220203195016e-05, "loss": 0.5672, "step": 5147 }, { "epoch": 0.8913321069148361, "grad_norm": 1.3359375, "learning_rate": 1.6052768486755502e-05, "loss": 0.5712, "step": 5148 }, { "epoch": 0.8915052483497456, "grad_norm": 1.265625, "learning_rate": 1.605131656897354e-05, "loss": 0.5692, "step": 5149 }, { "epoch": 0.8916783897846553, "grad_norm": 1.375, "learning_rate": 1.604986444989742e-05, "loss": 0.5632, "step": 5150 }, { "epoch": 0.891851531219565, "grad_norm": 1.3828125, "learning_rate": 1.604841212957546e-05, "loss": 0.6156, "step": 5151 }, { "epoch": 0.8920246726544746, "grad_norm": 1.2578125, "learning_rate": 1.6046959608055966e-05, "loss": 0.5562, "step": 5152 }, { "epoch": 0.8921978140893843, "grad_norm": 1.21875, "learning_rate": 1.6045506885387247e-05, "loss": 0.5637, "step": 5153 }, { "epoch": 0.8923709555242939, "grad_norm": 1.3046875, "learning_rate": 1.6044053961617636e-05, "loss": 0.5891, "step": 5154 }, { "epoch": 0.8925440969592036, "grad_norm": 1.453125, "learning_rate": 1.604260083679546e-05, "loss": 0.6363, "step": 5155 }, { "epoch": 0.8927172383941132, "grad_norm": 1.3984375, "learning_rate": 1.6041147510969058e-05, "loss": 0.6065, "step": 5156 }, { "epoch": 0.8928903798290229, "grad_norm": 1.2421875, "learning_rate": 1.603969398418677e-05, "loss": 0.5521, "step": 5157 }, { "epoch": 0.8930635212639325, "grad_norm": 1.40625, "learning_rate": 1.6038240256496956e-05, "loss": 0.5524, "step": 5158 }, { "epoch": 0.8932366626988422, "grad_norm": 1.421875, "learning_rate": 1.6036786327947966e-05, "loss": 0.5829, "step": 5159 }, { "epoch": 0.8934098041337517, "grad_norm": 1.359375, "learning_rate": 1.603533219858817e-05, "loss": 0.5579, "step": 5160 }, { "epoch": 0.8935829455686614, "grad_norm": 1.359375, "learning_rate": 1.6033877868465926e-05, "loss": 0.5546, "step": 5161 }, { "epoch": 0.893756087003571, "grad_norm": 1.2734375, "learning_rate": 1.6032423337629625e-05, "loss": 0.5416, "step": 5162 }, { "epoch": 0.8939292284384807, "grad_norm": 1.2109375, "learning_rate": 1.603096860612765e-05, "loss": 0.5492, "step": 5163 }, { "epoch": 0.8941023698733903, "grad_norm": 1.3125, "learning_rate": 1.6029513674008387e-05, "loss": 0.5789, "step": 5164 }, { "epoch": 0.8942755113083, "grad_norm": 1.5078125, "learning_rate": 1.602805854132024e-05, "loss": 0.6264, "step": 5165 }, { "epoch": 0.8944486527432096, "grad_norm": 1.3515625, "learning_rate": 1.6026603208111602e-05, "loss": 0.6021, "step": 5166 }, { "epoch": 0.8946217941781193, "grad_norm": 1.3359375, "learning_rate": 1.602514767443089e-05, "loss": 0.6326, "step": 5167 }, { "epoch": 0.8947949356130289, "grad_norm": 1.390625, "learning_rate": 1.6023691940326527e-05, "loss": 0.6768, "step": 5168 }, { "epoch": 0.8949680770479386, "grad_norm": 1.2734375, "learning_rate": 1.6022236005846935e-05, "loss": 0.5528, "step": 5169 }, { "epoch": 0.8951412184828482, "grad_norm": 1.421875, "learning_rate": 1.6020779871040538e-05, "loss": 0.6406, "step": 5170 }, { "epoch": 0.8953143599177579, "grad_norm": 1.484375, "learning_rate": 1.601932353595578e-05, "loss": 0.5853, "step": 5171 }, { "epoch": 0.8954875013526674, "grad_norm": 1.34375, "learning_rate": 1.6017867000641107e-05, "loss": 0.6378, "step": 5172 }, { "epoch": 0.8956606427875771, "grad_norm": 1.359375, "learning_rate": 1.601641026514496e-05, "loss": 0.5749, "step": 5173 }, { "epoch": 0.8958337842224867, "grad_norm": 1.328125, "learning_rate": 1.601495332951581e-05, "loss": 0.5736, "step": 5174 }, { "epoch": 0.8960069256573964, "grad_norm": 1.2578125, "learning_rate": 1.601349619380211e-05, "loss": 0.5718, "step": 5175 }, { "epoch": 0.896180067092306, "grad_norm": 1.3828125, "learning_rate": 1.6012038858052337e-05, "loss": 0.5787, "step": 5176 }, { "epoch": 0.8963532085272157, "grad_norm": 1.2890625, "learning_rate": 1.6010581322314966e-05, "loss": 0.5365, "step": 5177 }, { "epoch": 0.8965263499621253, "grad_norm": 1.25, "learning_rate": 1.6009123586638484e-05, "loss": 0.5502, "step": 5178 }, { "epoch": 0.896699491397035, "grad_norm": 1.25, "learning_rate": 1.600766565107138e-05, "loss": 0.5593, "step": 5179 }, { "epoch": 0.8968726328319446, "grad_norm": 1.40625, "learning_rate": 1.600620751566215e-05, "loss": 0.6318, "step": 5180 }, { "epoch": 0.8970457742668543, "grad_norm": 1.34375, "learning_rate": 1.6004749180459302e-05, "loss": 0.6155, "step": 5181 }, { "epoch": 0.8972189157017639, "grad_norm": 1.265625, "learning_rate": 1.6003290645511343e-05, "loss": 0.539, "step": 5182 }, { "epoch": 0.8973920571366735, "grad_norm": 1.3046875, "learning_rate": 1.6001831910866795e-05, "loss": 0.6094, "step": 5183 }, { "epoch": 0.8975651985715831, "grad_norm": 1.1796875, "learning_rate": 1.6000372976574175e-05, "loss": 0.5539, "step": 5184 }, { "epoch": 0.8977383400064928, "grad_norm": 1.421875, "learning_rate": 1.5998913842682023e-05, "loss": 0.664, "step": 5185 }, { "epoch": 0.8979114814414024, "grad_norm": 1.3515625, "learning_rate": 1.599745450923887e-05, "loss": 0.6112, "step": 5186 }, { "epoch": 0.8980846228763121, "grad_norm": 1.3125, "learning_rate": 1.5995994976293256e-05, "loss": 0.5657, "step": 5187 }, { "epoch": 0.8982577643112217, "grad_norm": 1.3046875, "learning_rate": 1.5994535243893742e-05, "loss": 0.5906, "step": 5188 }, { "epoch": 0.8984309057461314, "grad_norm": 1.40625, "learning_rate": 1.5993075312088875e-05, "loss": 0.5769, "step": 5189 }, { "epoch": 0.898604047181041, "grad_norm": 1.25, "learning_rate": 1.5991615180927232e-05, "loss": 0.6136, "step": 5190 }, { "epoch": 0.8987771886159507, "grad_norm": 1.3203125, "learning_rate": 1.599015485045737e-05, "loss": 0.5963, "step": 5191 }, { "epoch": 0.8989503300508603, "grad_norm": 1.3203125, "learning_rate": 1.598869432072788e-05, "loss": 0.5866, "step": 5192 }, { "epoch": 0.89912347148577, "grad_norm": 1.265625, "learning_rate": 1.5987233591787333e-05, "loss": 0.5578, "step": 5193 }, { "epoch": 0.8992966129206795, "grad_norm": 1.34375, "learning_rate": 1.598577266368432e-05, "loss": 0.5531, "step": 5194 }, { "epoch": 0.8994697543555892, "grad_norm": 1.375, "learning_rate": 1.598431153646745e-05, "loss": 0.6602, "step": 5195 }, { "epoch": 0.8996428957904988, "grad_norm": 1.3125, "learning_rate": 1.5982850210185313e-05, "loss": 0.6436, "step": 5196 }, { "epoch": 0.8998160372254085, "grad_norm": 1.34375, "learning_rate": 1.598138868488653e-05, "loss": 0.7071, "step": 5197 }, { "epoch": 0.8999891786603181, "grad_norm": 1.7734375, "learning_rate": 1.597992696061971e-05, "loss": 0.6377, "step": 5198 }, { "epoch": 0.9001623200952278, "grad_norm": 1.3828125, "learning_rate": 1.5978465037433483e-05, "loss": 0.5966, "step": 5199 }, { "epoch": 0.9003354615301374, "grad_norm": 1.3046875, "learning_rate": 1.5977002915376473e-05, "loss": 0.6107, "step": 5200 }, { "epoch": 0.9005086029650471, "grad_norm": 1.390625, "learning_rate": 1.597554059449732e-05, "loss": 0.6933, "step": 5201 }, { "epoch": 0.9006817443999567, "grad_norm": 1.2578125, "learning_rate": 1.597407807484467e-05, "loss": 0.5942, "step": 5202 }, { "epoch": 0.9008548858348664, "grad_norm": 1.203125, "learning_rate": 1.597261535646717e-05, "loss": 0.5906, "step": 5203 }, { "epoch": 0.901028027269776, "grad_norm": 1.296875, "learning_rate": 1.597115243941348e-05, "loss": 0.6677, "step": 5204 }, { "epoch": 0.9012011687046857, "grad_norm": 1.3125, "learning_rate": 1.5969689323732252e-05, "loss": 0.6584, "step": 5205 }, { "epoch": 0.9013743101395952, "grad_norm": 1.375, "learning_rate": 1.596822600947217e-05, "loss": 0.5996, "step": 5206 }, { "epoch": 0.9015474515745049, "grad_norm": 1.265625, "learning_rate": 1.5966762496681903e-05, "loss": 0.5421, "step": 5207 }, { "epoch": 0.9017205930094145, "grad_norm": 1.328125, "learning_rate": 1.5965298785410134e-05, "loss": 0.5742, "step": 5208 }, { "epoch": 0.9018937344443242, "grad_norm": 1.3203125, "learning_rate": 1.5963834875705556e-05, "loss": 0.5433, "step": 5209 }, { "epoch": 0.9020668758792338, "grad_norm": 1.2734375, "learning_rate": 1.5962370767616862e-05, "loss": 0.5343, "step": 5210 }, { "epoch": 0.9022400173141435, "grad_norm": 1.3046875, "learning_rate": 1.5960906461192758e-05, "loss": 0.5403, "step": 5211 }, { "epoch": 0.9024131587490531, "grad_norm": 1.3203125, "learning_rate": 1.595944195648195e-05, "loss": 0.6592, "step": 5212 }, { "epoch": 0.9025863001839628, "grad_norm": 1.3359375, "learning_rate": 1.595797725353316e-05, "loss": 0.5385, "step": 5213 }, { "epoch": 0.9027594416188724, "grad_norm": 1.3671875, "learning_rate": 1.5956512352395103e-05, "loss": 0.6099, "step": 5214 }, { "epoch": 0.9029325830537821, "grad_norm": 1.265625, "learning_rate": 1.5955047253116512e-05, "loss": 0.5225, "step": 5215 }, { "epoch": 0.9031057244886918, "grad_norm": 1.2890625, "learning_rate": 1.5953581955746123e-05, "loss": 0.5866, "step": 5216 }, { "epoch": 0.9032788659236013, "grad_norm": 1.3515625, "learning_rate": 1.5952116460332676e-05, "loss": 0.5423, "step": 5217 }, { "epoch": 0.903452007358511, "grad_norm": 1.2578125, "learning_rate": 1.5950650766924924e-05, "loss": 0.5673, "step": 5218 }, { "epoch": 0.9036251487934206, "grad_norm": 1.4453125, "learning_rate": 1.594918487557162e-05, "loss": 0.624, "step": 5219 }, { "epoch": 0.9037982902283302, "grad_norm": 1.28125, "learning_rate": 1.5947718786321528e-05, "loss": 0.5651, "step": 5220 }, { "epoch": 0.9039714316632399, "grad_norm": 1.3203125, "learning_rate": 1.594625249922341e-05, "loss": 0.6479, "step": 5221 }, { "epoch": 0.9041445730981496, "grad_norm": 1.25, "learning_rate": 1.5944786014326053e-05, "loss": 0.5685, "step": 5222 }, { "epoch": 0.9043177145330592, "grad_norm": 1.328125, "learning_rate": 1.594331933167823e-05, "loss": 0.5828, "step": 5223 }, { "epoch": 0.9044908559679689, "grad_norm": 1.359375, "learning_rate": 1.5941852451328735e-05, "loss": 0.5906, "step": 5224 }, { "epoch": 0.9046639974028785, "grad_norm": 1.3828125, "learning_rate": 1.5940385373326356e-05, "loss": 0.6011, "step": 5225 }, { "epoch": 0.9048371388377882, "grad_norm": 1.3203125, "learning_rate": 1.59389180977199e-05, "loss": 0.5793, "step": 5226 }, { "epoch": 0.9050102802726978, "grad_norm": 1.484375, "learning_rate": 1.5937450624558176e-05, "loss": 0.5261, "step": 5227 }, { "epoch": 0.9051834217076073, "grad_norm": 1.2734375, "learning_rate": 1.5935982953889998e-05, "loss": 0.5731, "step": 5228 }, { "epoch": 0.905356563142517, "grad_norm": 1.359375, "learning_rate": 1.5934515085764186e-05, "loss": 0.5982, "step": 5229 }, { "epoch": 0.9055297045774267, "grad_norm": 1.28125, "learning_rate": 1.5933047020229566e-05, "loss": 0.6299, "step": 5230 }, { "epoch": 0.9057028460123363, "grad_norm": 1.3671875, "learning_rate": 1.5931578757334975e-05, "loss": 0.5899, "step": 5231 }, { "epoch": 0.905875987447246, "grad_norm": 1.375, "learning_rate": 1.5930110297129257e-05, "loss": 0.5773, "step": 5232 }, { "epoch": 0.9060491288821556, "grad_norm": 1.3046875, "learning_rate": 1.5928641639661254e-05, "loss": 0.5428, "step": 5233 }, { "epoch": 0.9062222703170653, "grad_norm": 1.2890625, "learning_rate": 1.592717278497982e-05, "loss": 0.5796, "step": 5234 }, { "epoch": 0.9063954117519749, "grad_norm": 1.3671875, "learning_rate": 1.5925703733133823e-05, "loss": 0.6257, "step": 5235 }, { "epoch": 0.9065685531868846, "grad_norm": 1.40625, "learning_rate": 1.5924234484172126e-05, "loss": 0.7337, "step": 5236 }, { "epoch": 0.9067416946217942, "grad_norm": 1.421875, "learning_rate": 1.5922765038143598e-05, "loss": 0.6374, "step": 5237 }, { "epoch": 0.9069148360567039, "grad_norm": 1.28125, "learning_rate": 1.5921295395097125e-05, "loss": 0.5594, "step": 5238 }, { "epoch": 0.9070879774916134, "grad_norm": 1.28125, "learning_rate": 1.5919825555081593e-05, "loss": 0.5875, "step": 5239 }, { "epoch": 0.9072611189265231, "grad_norm": 1.328125, "learning_rate": 1.5918355518145897e-05, "loss": 0.5432, "step": 5240 }, { "epoch": 0.9074342603614327, "grad_norm": 1.4375, "learning_rate": 1.5916885284338937e-05, "loss": 0.6143, "step": 5241 }, { "epoch": 0.9076074017963424, "grad_norm": 1.3046875, "learning_rate": 1.5915414853709615e-05, "loss": 0.6364, "step": 5242 }, { "epoch": 0.907780543231252, "grad_norm": 1.3203125, "learning_rate": 1.591394422630685e-05, "loss": 0.5822, "step": 5243 }, { "epoch": 0.9079536846661617, "grad_norm": 1.46875, "learning_rate": 1.5912473402179558e-05, "loss": 0.5536, "step": 5244 }, { "epoch": 0.9081268261010713, "grad_norm": 1.3046875, "learning_rate": 1.5911002381376662e-05, "loss": 0.6584, "step": 5245 }, { "epoch": 0.908299967535981, "grad_norm": 1.328125, "learning_rate": 1.5909531163947105e-05, "loss": 0.5192, "step": 5246 }, { "epoch": 0.9084731089708906, "grad_norm": 1.296875, "learning_rate": 1.590805974993982e-05, "loss": 0.5756, "step": 5247 }, { "epoch": 0.9086462504058003, "grad_norm": 1.25, "learning_rate": 1.5906588139403752e-05, "loss": 0.5826, "step": 5248 }, { "epoch": 0.9088193918407099, "grad_norm": 1.8203125, "learning_rate": 1.5905116332387853e-05, "loss": 0.7246, "step": 5249 }, { "epoch": 0.9089925332756196, "grad_norm": 1.234375, "learning_rate": 1.5903644328941086e-05, "loss": 0.5619, "step": 5250 }, { "epoch": 0.9091656747105291, "grad_norm": 1.1875, "learning_rate": 1.590217212911241e-05, "loss": 0.5104, "step": 5251 }, { "epoch": 0.9093388161454388, "grad_norm": 1.4140625, "learning_rate": 1.590069973295081e-05, "loss": 0.6627, "step": 5252 }, { "epoch": 0.9095119575803484, "grad_norm": 1.3515625, "learning_rate": 1.589922714050525e-05, "loss": 0.5974, "step": 5253 }, { "epoch": 0.9096850990152581, "grad_norm": 1.234375, "learning_rate": 1.589775435182472e-05, "loss": 0.5393, "step": 5254 }, { "epoch": 0.9098582404501677, "grad_norm": 1.484375, "learning_rate": 1.5896281366958214e-05, "loss": 0.7444, "step": 5255 }, { "epoch": 0.9100313818850774, "grad_norm": 1.2890625, "learning_rate": 1.5894808185954727e-05, "loss": 0.5582, "step": 5256 }, { "epoch": 0.910204523319987, "grad_norm": 1.375, "learning_rate": 1.5893334808863266e-05, "loss": 0.5455, "step": 5257 }, { "epoch": 0.9103776647548967, "grad_norm": 1.4765625, "learning_rate": 1.5891861235732843e-05, "loss": 0.6342, "step": 5258 }, { "epoch": 0.9105508061898063, "grad_norm": 1.296875, "learning_rate": 1.5890387466612474e-05, "loss": 0.5309, "step": 5259 }, { "epoch": 0.910723947624716, "grad_norm": 1.25, "learning_rate": 1.588891350155118e-05, "loss": 0.5982, "step": 5260 }, { "epoch": 0.9108970890596256, "grad_norm": 1.3515625, "learning_rate": 1.5887439340598002e-05, "loss": 0.5914, "step": 5261 }, { "epoch": 0.9110702304945352, "grad_norm": 1.3984375, "learning_rate": 1.5885964983801966e-05, "loss": 0.6181, "step": 5262 }, { "epoch": 0.9112433719294448, "grad_norm": 1.375, "learning_rate": 1.588449043121212e-05, "loss": 0.6089, "step": 5263 }, { "epoch": 0.9114165133643545, "grad_norm": 1.4140625, "learning_rate": 1.5883015682877516e-05, "loss": 0.6094, "step": 5264 }, { "epoch": 0.9115896547992641, "grad_norm": 1.2734375, "learning_rate": 1.588154073884721e-05, "loss": 0.5518, "step": 5265 }, { "epoch": 0.9117627962341738, "grad_norm": 1.3203125, "learning_rate": 1.588006559917026e-05, "loss": 0.6409, "step": 5266 }, { "epoch": 0.9119359376690834, "grad_norm": 1.4375, "learning_rate": 1.5878590263895747e-05, "loss": 0.6283, "step": 5267 }, { "epoch": 0.9121090791039931, "grad_norm": 1.3671875, "learning_rate": 1.5877114733072737e-05, "loss": 0.5919, "step": 5268 }, { "epoch": 0.9122822205389027, "grad_norm": 1.375, "learning_rate": 1.5875639006750315e-05, "loss": 0.5805, "step": 5269 }, { "epoch": 0.9124553619738124, "grad_norm": 1.3203125, "learning_rate": 1.587416308497757e-05, "loss": 0.5605, "step": 5270 }, { "epoch": 0.912628503408722, "grad_norm": 1.3046875, "learning_rate": 1.5872686967803603e-05, "loss": 0.604, "step": 5271 }, { "epoch": 0.9128016448436317, "grad_norm": 1.25, "learning_rate": 1.5871210655277514e-05, "loss": 0.5331, "step": 5272 }, { "epoch": 0.9129747862785412, "grad_norm": 1.34375, "learning_rate": 1.586973414744841e-05, "loss": 0.5946, "step": 5273 }, { "epoch": 0.9131479277134509, "grad_norm": 1.3046875, "learning_rate": 1.5868257444365408e-05, "loss": 0.5081, "step": 5274 }, { "epoch": 0.9133210691483605, "grad_norm": 1.3359375, "learning_rate": 1.5866780546077625e-05, "loss": 0.707, "step": 5275 }, { "epoch": 0.9134942105832702, "grad_norm": 1.3828125, "learning_rate": 1.58653034526342e-05, "loss": 0.6715, "step": 5276 }, { "epoch": 0.9136673520181798, "grad_norm": 1.421875, "learning_rate": 1.5863826164084254e-05, "loss": 0.6539, "step": 5277 }, { "epoch": 0.9138404934530895, "grad_norm": 1.2578125, "learning_rate": 1.586234868047694e-05, "loss": 0.5247, "step": 5278 }, { "epoch": 0.9140136348879991, "grad_norm": 1.34375, "learning_rate": 1.58608710018614e-05, "loss": 0.6235, "step": 5279 }, { "epoch": 0.9141867763229088, "grad_norm": 1.2421875, "learning_rate": 1.585939312828679e-05, "loss": 0.5502, "step": 5280 }, { "epoch": 0.9143599177578184, "grad_norm": 1.3125, "learning_rate": 1.585791505980227e-05, "loss": 0.5223, "step": 5281 }, { "epoch": 0.9145330591927281, "grad_norm": 1.3125, "learning_rate": 1.5856436796457008e-05, "loss": 0.5052, "step": 5282 }, { "epoch": 0.9147062006276377, "grad_norm": 1.3828125, "learning_rate": 1.5854958338300173e-05, "loss": 0.6012, "step": 5283 }, { "epoch": 0.9148793420625474, "grad_norm": 1.25, "learning_rate": 1.5853479685380957e-05, "loss": 0.5353, "step": 5284 }, { "epoch": 0.9150524834974569, "grad_norm": 1.4375, "learning_rate": 1.5852000837748534e-05, "loss": 0.5766, "step": 5285 }, { "epoch": 0.9152256249323666, "grad_norm": 1.28125, "learning_rate": 1.58505217954521e-05, "loss": 0.5817, "step": 5286 }, { "epoch": 0.9153987663672762, "grad_norm": 1.3203125, "learning_rate": 1.5849042558540863e-05, "loss": 0.5978, "step": 5287 }, { "epoch": 0.9155719078021859, "grad_norm": 1.2265625, "learning_rate": 1.584756312706402e-05, "loss": 0.5687, "step": 5288 }, { "epoch": 0.9157450492370955, "grad_norm": 1.21875, "learning_rate": 1.5846083501070787e-05, "loss": 0.5723, "step": 5289 }, { "epoch": 0.9159181906720052, "grad_norm": 1.2578125, "learning_rate": 1.5844603680610383e-05, "loss": 0.5971, "step": 5290 }, { "epoch": 0.9160913321069148, "grad_norm": 1.28125, "learning_rate": 1.584312366573203e-05, "loss": 0.6709, "step": 5291 }, { "epoch": 0.9162644735418245, "grad_norm": 1.359375, "learning_rate": 1.5841643456484966e-05, "loss": 0.65, "step": 5292 }, { "epoch": 0.9164376149767341, "grad_norm": 1.390625, "learning_rate": 1.5840163052918427e-05, "loss": 0.5841, "step": 5293 }, { "epoch": 0.9166107564116438, "grad_norm": 1.28125, "learning_rate": 1.5838682455081657e-05, "loss": 0.6087, "step": 5294 }, { "epoch": 0.9167838978465535, "grad_norm": 1.234375, "learning_rate": 1.5837201663023907e-05, "loss": 0.5453, "step": 5295 }, { "epoch": 0.916957039281463, "grad_norm": 1.34375, "learning_rate": 1.5835720676794435e-05, "loss": 0.6048, "step": 5296 }, { "epoch": 0.9171301807163726, "grad_norm": 1.2578125, "learning_rate": 1.583423949644251e-05, "loss": 0.5585, "step": 5297 }, { "epoch": 0.9173033221512823, "grad_norm": 1.3671875, "learning_rate": 1.5832758122017394e-05, "loss": 0.5946, "step": 5298 }, { "epoch": 0.917476463586192, "grad_norm": 1.375, "learning_rate": 1.583127655356837e-05, "loss": 0.588, "step": 5299 }, { "epoch": 0.9176496050211016, "grad_norm": 1.2734375, "learning_rate": 1.5829794791144723e-05, "loss": 0.5964, "step": 5300 }, { "epoch": 0.9178227464560113, "grad_norm": 1.3828125, "learning_rate": 1.582831283479574e-05, "loss": 0.6022, "step": 5301 }, { "epoch": 0.9179958878909209, "grad_norm": 1.3125, "learning_rate": 1.5826830684570714e-05, "loss": 0.5931, "step": 5302 }, { "epoch": 0.9181690293258306, "grad_norm": 1.3984375, "learning_rate": 1.5825348340518958e-05, "loss": 0.5967, "step": 5303 }, { "epoch": 0.9183421707607402, "grad_norm": 1.28125, "learning_rate": 1.5823865802689777e-05, "loss": 0.5785, "step": 5304 }, { "epoch": 0.9185153121956499, "grad_norm": 1.40625, "learning_rate": 1.582238307113248e-05, "loss": 0.6652, "step": 5305 }, { "epoch": 0.9186884536305595, "grad_norm": 1.421875, "learning_rate": 1.5820900145896397e-05, "loss": 0.5075, "step": 5306 }, { "epoch": 0.918861595065469, "grad_norm": 1.3671875, "learning_rate": 1.5819417027030857e-05, "loss": 0.5587, "step": 5307 }, { "epoch": 0.9190347365003787, "grad_norm": 1.2734375, "learning_rate": 1.581793371458519e-05, "loss": 0.5258, "step": 5308 }, { "epoch": 0.9192078779352884, "grad_norm": 1.4140625, "learning_rate": 1.581645020860874e-05, "loss": 0.5943, "step": 5309 }, { "epoch": 0.919381019370198, "grad_norm": 1.2578125, "learning_rate": 1.581496650915086e-05, "loss": 0.5654, "step": 5310 }, { "epoch": 0.9195541608051077, "grad_norm": 1.3203125, "learning_rate": 1.58134826162609e-05, "loss": 0.5778, "step": 5311 }, { "epoch": 0.9197273022400173, "grad_norm": 1.3203125, "learning_rate": 1.5811998529988222e-05, "loss": 0.6067, "step": 5312 }, { "epoch": 0.919900443674927, "grad_norm": 1.28125, "learning_rate": 1.581051425038219e-05, "loss": 0.5295, "step": 5313 }, { "epoch": 0.9200735851098366, "grad_norm": 1.453125, "learning_rate": 1.5809029777492183e-05, "loss": 0.577, "step": 5314 }, { "epoch": 0.9202467265447463, "grad_norm": 1.40625, "learning_rate": 1.5807545111367574e-05, "loss": 0.6278, "step": 5315 }, { "epoch": 0.9204198679796559, "grad_norm": 1.4296875, "learning_rate": 1.580606025205776e-05, "loss": 0.4763, "step": 5316 }, { "epoch": 0.9205930094145656, "grad_norm": 1.296875, "learning_rate": 1.580457519961213e-05, "loss": 0.6364, "step": 5317 }, { "epoch": 0.9207661508494752, "grad_norm": 1.375, "learning_rate": 1.5803089954080078e-05, "loss": 0.5469, "step": 5318 }, { "epoch": 0.9209392922843848, "grad_norm": 1.265625, "learning_rate": 1.5801604515511017e-05, "loss": 0.5613, "step": 5319 }, { "epoch": 0.9211124337192944, "grad_norm": 1.28125, "learning_rate": 1.5800118883954355e-05, "loss": 0.5302, "step": 5320 }, { "epoch": 0.9212855751542041, "grad_norm": 1.3046875, "learning_rate": 1.5798633059459512e-05, "loss": 0.5567, "step": 5321 }, { "epoch": 0.9214587165891137, "grad_norm": 1.3515625, "learning_rate": 1.5797147042075918e-05, "loss": 0.6405, "step": 5322 }, { "epoch": 0.9216318580240234, "grad_norm": 1.359375, "learning_rate": 1.5795660831852998e-05, "loss": 0.5549, "step": 5323 }, { "epoch": 0.921804999458933, "grad_norm": 1.2734375, "learning_rate": 1.5794174428840192e-05, "loss": 0.5797, "step": 5324 }, { "epoch": 0.9219781408938427, "grad_norm": 1.375, "learning_rate": 1.5792687833086943e-05, "loss": 0.6312, "step": 5325 }, { "epoch": 0.9221512823287523, "grad_norm": 1.328125, "learning_rate": 1.5791201044642707e-05, "loss": 0.518, "step": 5326 }, { "epoch": 0.922324423763662, "grad_norm": 1.4140625, "learning_rate": 1.578971406355694e-05, "loss": 0.6485, "step": 5327 }, { "epoch": 0.9224975651985716, "grad_norm": 1.3515625, "learning_rate": 1.5788226889879103e-05, "loss": 0.6625, "step": 5328 }, { "epoch": 0.9226707066334813, "grad_norm": 1.234375, "learning_rate": 1.5786739523658664e-05, "loss": 0.6041, "step": 5329 }, { "epoch": 0.9228438480683908, "grad_norm": 1.25, "learning_rate": 1.5785251964945106e-05, "loss": 0.6421, "step": 5330 }, { "epoch": 0.9230169895033005, "grad_norm": 1.3515625, "learning_rate": 1.5783764213787905e-05, "loss": 0.6248, "step": 5331 }, { "epoch": 0.9231901309382101, "grad_norm": 1.2578125, "learning_rate": 1.578227627023656e-05, "loss": 0.5649, "step": 5332 }, { "epoch": 0.9233632723731198, "grad_norm": 1.3828125, "learning_rate": 1.5780788134340554e-05, "loss": 0.5747, "step": 5333 }, { "epoch": 0.9235364138080294, "grad_norm": 1.3671875, "learning_rate": 1.5779299806149398e-05, "loss": 0.5771, "step": 5334 }, { "epoch": 0.9237095552429391, "grad_norm": 1.2109375, "learning_rate": 1.57778112857126e-05, "loss": 0.5459, "step": 5335 }, { "epoch": 0.9238826966778487, "grad_norm": 1.453125, "learning_rate": 1.5776322573079673e-05, "loss": 0.5979, "step": 5336 }, { "epoch": 0.9240558381127584, "grad_norm": 1.265625, "learning_rate": 1.5774833668300136e-05, "loss": 0.5931, "step": 5337 }, { "epoch": 0.924228979547668, "grad_norm": 1.3671875, "learning_rate": 1.577334457142352e-05, "loss": 0.6034, "step": 5338 }, { "epoch": 0.9244021209825777, "grad_norm": 1.28125, "learning_rate": 1.577185528249936e-05, "loss": 0.579, "step": 5339 }, { "epoch": 0.9245752624174873, "grad_norm": 1.5234375, "learning_rate": 1.5770365801577193e-05, "loss": 0.6148, "step": 5340 }, { "epoch": 0.9247484038523969, "grad_norm": 1.25, "learning_rate": 1.576887612870657e-05, "loss": 0.5367, "step": 5341 }, { "epoch": 0.9249215452873065, "grad_norm": 1.234375, "learning_rate": 1.5767386263937038e-05, "loss": 0.602, "step": 5342 }, { "epoch": 0.9250946867222162, "grad_norm": 1.34375, "learning_rate": 1.5765896207318165e-05, "loss": 0.5344, "step": 5343 }, { "epoch": 0.9252678281571258, "grad_norm": 1.2109375, "learning_rate": 1.5764405958899513e-05, "loss": 0.5154, "step": 5344 }, { "epoch": 0.9254409695920355, "grad_norm": 1.453125, "learning_rate": 1.576291551873065e-05, "loss": 0.5298, "step": 5345 }, { "epoch": 0.9256141110269451, "grad_norm": 1.359375, "learning_rate": 1.5761424886861162e-05, "loss": 0.6126, "step": 5346 }, { "epoch": 0.9257872524618548, "grad_norm": 1.2734375, "learning_rate": 1.5759934063340627e-05, "loss": 0.558, "step": 5347 }, { "epoch": 0.9259603938967644, "grad_norm": 1.359375, "learning_rate": 1.5758443048218644e-05, "loss": 0.6401, "step": 5348 }, { "epoch": 0.9261335353316741, "grad_norm": 1.3046875, "learning_rate": 1.575695184154481e-05, "loss": 0.5668, "step": 5349 }, { "epoch": 0.9263066767665837, "grad_norm": 1.3515625, "learning_rate": 1.575546044336872e-05, "loss": 0.5653, "step": 5350 }, { "epoch": 0.9264798182014934, "grad_norm": 1.3046875, "learning_rate": 1.5753968853739992e-05, "loss": 0.5415, "step": 5351 }, { "epoch": 0.926652959636403, "grad_norm": 1.390625, "learning_rate": 1.5752477072708247e-05, "loss": 0.5643, "step": 5352 }, { "epoch": 0.9268261010713126, "grad_norm": 1.3046875, "learning_rate": 1.57509851003231e-05, "loss": 0.5508, "step": 5353 }, { "epoch": 0.9269992425062222, "grad_norm": 1.375, "learning_rate": 1.5749492936634183e-05, "loss": 0.6104, "step": 5354 }, { "epoch": 0.9271723839411319, "grad_norm": 1.2734375, "learning_rate": 1.5748000581691138e-05, "loss": 0.5585, "step": 5355 }, { "epoch": 0.9273455253760415, "grad_norm": 1.2734375, "learning_rate": 1.57465080355436e-05, "loss": 0.5431, "step": 5356 }, { "epoch": 0.9275186668109512, "grad_norm": 1.3359375, "learning_rate": 1.5745015298241223e-05, "loss": 0.5628, "step": 5357 }, { "epoch": 0.9276918082458608, "grad_norm": 1.3046875, "learning_rate": 1.574352236983366e-05, "loss": 0.5822, "step": 5358 }, { "epoch": 0.9278649496807705, "grad_norm": 1.2890625, "learning_rate": 1.574202925037057e-05, "loss": 0.6629, "step": 5359 }, { "epoch": 0.9280380911156801, "grad_norm": 1.328125, "learning_rate": 1.5740535939901628e-05, "loss": 0.5595, "step": 5360 }, { "epoch": 0.9282112325505898, "grad_norm": 1.4375, "learning_rate": 1.57390424384765e-05, "loss": 0.6029, "step": 5361 }, { "epoch": 0.9283843739854994, "grad_norm": 1.375, "learning_rate": 1.573754874614487e-05, "loss": 0.723, "step": 5362 }, { "epoch": 0.9285575154204091, "grad_norm": 1.4296875, "learning_rate": 1.5736054862956425e-05, "loss": 0.5495, "step": 5363 }, { "epoch": 0.9287306568553186, "grad_norm": 1.3671875, "learning_rate": 1.573456078896086e-05, "loss": 0.5794, "step": 5364 }, { "epoch": 0.9289037982902283, "grad_norm": 1.390625, "learning_rate": 1.5733066524207875e-05, "loss": 0.6683, "step": 5365 }, { "epoch": 0.9290769397251379, "grad_norm": 1.3046875, "learning_rate": 1.5731572068747173e-05, "loss": 0.6201, "step": 5366 }, { "epoch": 0.9292500811600476, "grad_norm": 1.421875, "learning_rate": 1.5730077422628465e-05, "loss": 0.6563, "step": 5367 }, { "epoch": 0.9294232225949572, "grad_norm": 1.390625, "learning_rate": 1.5728582585901476e-05, "loss": 0.574, "step": 5368 }, { "epoch": 0.9295963640298669, "grad_norm": 1.3125, "learning_rate": 1.5727087558615926e-05, "loss": 0.6239, "step": 5369 }, { "epoch": 0.9297695054647765, "grad_norm": 1.4609375, "learning_rate": 1.5725592340821547e-05, "loss": 0.7492, "step": 5370 }, { "epoch": 0.9299426468996862, "grad_norm": 1.3046875, "learning_rate": 1.572409693256808e-05, "loss": 0.6365, "step": 5371 }, { "epoch": 0.9301157883345958, "grad_norm": 1.328125, "learning_rate": 1.5722601333905264e-05, "loss": 0.6433, "step": 5372 }, { "epoch": 0.9302889297695055, "grad_norm": 1.328125, "learning_rate": 1.5721105544882852e-05, "loss": 0.5976, "step": 5373 }, { "epoch": 0.9304620712044152, "grad_norm": 1.1796875, "learning_rate": 1.57196095655506e-05, "loss": 0.592, "step": 5374 }, { "epoch": 0.9306352126393247, "grad_norm": 1.40625, "learning_rate": 1.5718113395958274e-05, "loss": 0.5774, "step": 5375 }, { "epoch": 0.9308083540742343, "grad_norm": 1.2734375, "learning_rate": 1.571661703615564e-05, "loss": 0.5701, "step": 5376 }, { "epoch": 0.930981495509144, "grad_norm": 1.2265625, "learning_rate": 1.5715120486192473e-05, "loss": 0.5391, "step": 5377 }, { "epoch": 0.9311546369440536, "grad_norm": 1.3203125, "learning_rate": 1.5713623746118558e-05, "loss": 0.6072, "step": 5378 }, { "epoch": 0.9313277783789633, "grad_norm": 1.34375, "learning_rate": 1.571212681598368e-05, "loss": 0.6177, "step": 5379 }, { "epoch": 0.931500919813873, "grad_norm": 1.28125, "learning_rate": 1.5710629695837642e-05, "loss": 0.6018, "step": 5380 }, { "epoch": 0.9316740612487826, "grad_norm": 1.3515625, "learning_rate": 1.570913238573023e-05, "loss": 0.584, "step": 5381 }, { "epoch": 0.9318472026836923, "grad_norm": 1.25, "learning_rate": 1.570763488571127e-05, "loss": 0.521, "step": 5382 }, { "epoch": 0.9320203441186019, "grad_norm": 1.359375, "learning_rate": 1.5706137195830555e-05, "loss": 0.6448, "step": 5383 }, { "epoch": 0.9321934855535116, "grad_norm": 1.28125, "learning_rate": 1.5704639316137923e-05, "loss": 0.5638, "step": 5384 }, { "epoch": 0.9323666269884212, "grad_norm": 1.265625, "learning_rate": 1.570314124668319e-05, "loss": 0.5454, "step": 5385 }, { "epoch": 0.9325397684233309, "grad_norm": 1.3125, "learning_rate": 1.5701642987516188e-05, "loss": 0.6031, "step": 5386 }, { "epoch": 0.9327129098582404, "grad_norm": 1.3046875, "learning_rate": 1.5700144538686767e-05, "loss": 0.6039, "step": 5387 }, { "epoch": 0.93288605129315, "grad_norm": 1.390625, "learning_rate": 1.569864590024476e-05, "loss": 0.6095, "step": 5388 }, { "epoch": 0.9330591927280597, "grad_norm": 1.3515625, "learning_rate": 1.569714707224002e-05, "loss": 0.5006, "step": 5389 }, { "epoch": 0.9332323341629694, "grad_norm": 1.296875, "learning_rate": 1.569564805472241e-05, "loss": 0.572, "step": 5390 }, { "epoch": 0.933405475597879, "grad_norm": 1.328125, "learning_rate": 1.5694148847741793e-05, "loss": 0.5896, "step": 5391 }, { "epoch": 0.9335786170327887, "grad_norm": 1.3671875, "learning_rate": 1.569264945134804e-05, "loss": 0.578, "step": 5392 }, { "epoch": 0.9337517584676983, "grad_norm": 1.3046875, "learning_rate": 1.5691149865591023e-05, "loss": 0.6147, "step": 5393 }, { "epoch": 0.933924899902608, "grad_norm": 1.3125, "learning_rate": 1.568965009052063e-05, "loss": 0.5683, "step": 5394 }, { "epoch": 0.9340980413375176, "grad_norm": 1.3359375, "learning_rate": 1.5688150126186743e-05, "loss": 0.5514, "step": 5395 }, { "epoch": 0.9342711827724273, "grad_norm": 1.2578125, "learning_rate": 1.5686649972639267e-05, "loss": 0.6313, "step": 5396 }, { "epoch": 0.9344443242073369, "grad_norm": 1.328125, "learning_rate": 1.5685149629928097e-05, "loss": 0.6027, "step": 5397 }, { "epoch": 0.9346174656422465, "grad_norm": 1.3046875, "learning_rate": 1.5683649098103147e-05, "loss": 0.5721, "step": 5398 }, { "epoch": 0.9347906070771561, "grad_norm": 1.234375, "learning_rate": 1.5682148377214327e-05, "loss": 0.6148, "step": 5399 }, { "epoch": 0.9349637485120658, "grad_norm": 1.265625, "learning_rate": 1.568064746731156e-05, "loss": 0.5585, "step": 5400 }, { "epoch": 0.9351368899469754, "grad_norm": 1.34375, "learning_rate": 1.567914636844477e-05, "loss": 0.4974, "step": 5401 }, { "epoch": 0.9353100313818851, "grad_norm": 1.3203125, "learning_rate": 1.5677645080663893e-05, "loss": 0.6012, "step": 5402 }, { "epoch": 0.9354831728167947, "grad_norm": 1.25, "learning_rate": 1.567614360401887e-05, "loss": 0.5501, "step": 5403 }, { "epoch": 0.9356563142517044, "grad_norm": 1.265625, "learning_rate": 1.5674641938559644e-05, "loss": 0.6171, "step": 5404 }, { "epoch": 0.935829455686614, "grad_norm": 1.34375, "learning_rate": 1.5673140084336167e-05, "loss": 0.6112, "step": 5405 }, { "epoch": 0.9360025971215237, "grad_norm": 1.3046875, "learning_rate": 1.56716380413984e-05, "loss": 0.5591, "step": 5406 }, { "epoch": 0.9361757385564333, "grad_norm": 1.265625, "learning_rate": 1.5670135809796308e-05, "loss": 0.5794, "step": 5407 }, { "epoch": 0.936348879991343, "grad_norm": 1.1875, "learning_rate": 1.566863338957986e-05, "loss": 0.6029, "step": 5408 }, { "epoch": 0.9365220214262525, "grad_norm": 1.3203125, "learning_rate": 1.5667130780799033e-05, "loss": 0.5968, "step": 5409 }, { "epoch": 0.9366951628611622, "grad_norm": 1.265625, "learning_rate": 1.5665627983503808e-05, "loss": 0.5945, "step": 5410 }, { "epoch": 0.9368683042960718, "grad_norm": 1.3203125, "learning_rate": 1.566412499774418e-05, "loss": 0.5713, "step": 5411 }, { "epoch": 0.9370414457309815, "grad_norm": 1.3046875, "learning_rate": 1.5662621823570147e-05, "loss": 0.6048, "step": 5412 }, { "epoch": 0.9372145871658911, "grad_norm": 1.2109375, "learning_rate": 1.5661118461031703e-05, "loss": 0.5409, "step": 5413 }, { "epoch": 0.9373877286008008, "grad_norm": 1.359375, "learning_rate": 1.5659614910178864e-05, "loss": 0.5938, "step": 5414 }, { "epoch": 0.9375608700357104, "grad_norm": 1.2890625, "learning_rate": 1.565811117106164e-05, "loss": 0.5607, "step": 5415 }, { "epoch": 0.9377340114706201, "grad_norm": 1.5546875, "learning_rate": 1.5656607243730055e-05, "loss": 0.6324, "step": 5416 }, { "epoch": 0.9379071529055297, "grad_norm": 1.1953125, "learning_rate": 1.5655103128234134e-05, "loss": 0.5689, "step": 5417 }, { "epoch": 0.9380802943404394, "grad_norm": 1.421875, "learning_rate": 1.5653598824623918e-05, "loss": 0.6348, "step": 5418 }, { "epoch": 0.938253435775349, "grad_norm": 1.3359375, "learning_rate": 1.5652094332949433e-05, "loss": 0.6408, "step": 5419 }, { "epoch": 0.9384265772102587, "grad_norm": 1.328125, "learning_rate": 1.5650589653260742e-05, "loss": 0.6102, "step": 5420 }, { "epoch": 0.9385997186451682, "grad_norm": 1.21875, "learning_rate": 1.5649084785607885e-05, "loss": 0.5326, "step": 5421 }, { "epoch": 0.9387728600800779, "grad_norm": 1.2578125, "learning_rate": 1.5647579730040924e-05, "loss": 0.6098, "step": 5422 }, { "epoch": 0.9389460015149875, "grad_norm": 1.3671875, "learning_rate": 1.5646074486609923e-05, "loss": 0.6234, "step": 5423 }, { "epoch": 0.9391191429498972, "grad_norm": 1.3359375, "learning_rate": 1.5644569055364958e-05, "loss": 0.5629, "step": 5424 }, { "epoch": 0.9392922843848068, "grad_norm": 1.3203125, "learning_rate": 1.5643063436356102e-05, "loss": 0.5432, "step": 5425 }, { "epoch": 0.9394654258197165, "grad_norm": 1.140625, "learning_rate": 1.5641557629633442e-05, "loss": 0.5298, "step": 5426 }, { "epoch": 0.9396385672546261, "grad_norm": 1.3125, "learning_rate": 1.5640051635247062e-05, "loss": 0.6488, "step": 5427 }, { "epoch": 0.9398117086895358, "grad_norm": 1.4453125, "learning_rate": 1.5638545453247065e-05, "loss": 0.5824, "step": 5428 }, { "epoch": 0.9399848501244454, "grad_norm": 1.296875, "learning_rate": 1.5637039083683548e-05, "loss": 0.6151, "step": 5429 }, { "epoch": 0.9401579915593551, "grad_norm": 1.3984375, "learning_rate": 1.5635532526606625e-05, "loss": 0.619, "step": 5430 }, { "epoch": 0.9403311329942647, "grad_norm": 1.3671875, "learning_rate": 1.563402578206641e-05, "loss": 0.6278, "step": 5431 }, { "epoch": 0.9405042744291743, "grad_norm": 1.265625, "learning_rate": 1.5632518850113016e-05, "loss": 0.5974, "step": 5432 }, { "epoch": 0.9406774158640839, "grad_norm": 1.2734375, "learning_rate": 1.5631011730796578e-05, "loss": 0.6133, "step": 5433 }, { "epoch": 0.9408505572989936, "grad_norm": 1.5625, "learning_rate": 1.5629504424167233e-05, "loss": 0.5888, "step": 5434 }, { "epoch": 0.9410236987339032, "grad_norm": 1.234375, "learning_rate": 1.5627996930275113e-05, "loss": 0.5925, "step": 5435 }, { "epoch": 0.9411968401688129, "grad_norm": 1.2109375, "learning_rate": 1.5626489249170368e-05, "loss": 0.544, "step": 5436 }, { "epoch": 0.9413699816037225, "grad_norm": 1.3671875, "learning_rate": 1.5624981380903146e-05, "loss": 0.6239, "step": 5437 }, { "epoch": 0.9415431230386322, "grad_norm": 1.40625, "learning_rate": 1.562347332552361e-05, "loss": 0.629, "step": 5438 }, { "epoch": 0.9417162644735418, "grad_norm": 1.28125, "learning_rate": 1.5621965083081926e-05, "loss": 0.5454, "step": 5439 }, { "epoch": 0.9418894059084515, "grad_norm": 1.203125, "learning_rate": 1.562045665362826e-05, "loss": 0.5776, "step": 5440 }, { "epoch": 0.9420625473433611, "grad_norm": 1.28125, "learning_rate": 1.5618948037212795e-05, "loss": 0.6245, "step": 5441 }, { "epoch": 0.9422356887782708, "grad_norm": 1.34375, "learning_rate": 1.561743923388571e-05, "loss": 0.6032, "step": 5442 }, { "epoch": 0.9424088302131803, "grad_norm": 1.1953125, "learning_rate": 1.5615930243697196e-05, "loss": 0.5986, "step": 5443 }, { "epoch": 0.94258197164809, "grad_norm": 1.34375, "learning_rate": 1.5614421066697446e-05, "loss": 0.5863, "step": 5444 }, { "epoch": 0.9427551130829996, "grad_norm": 1.2890625, "learning_rate": 1.561291170293667e-05, "loss": 0.574, "step": 5445 }, { "epoch": 0.9429282545179093, "grad_norm": 1.2890625, "learning_rate": 1.5611402152465066e-05, "loss": 0.6197, "step": 5446 }, { "epoch": 0.9431013959528189, "grad_norm": 1.328125, "learning_rate": 1.5609892415332854e-05, "loss": 0.5916, "step": 5447 }, { "epoch": 0.9432745373877286, "grad_norm": 1.296875, "learning_rate": 1.5608382491590254e-05, "loss": 0.5541, "step": 5448 }, { "epoch": 0.9434476788226382, "grad_norm": 1.453125, "learning_rate": 1.5606872381287495e-05, "loss": 0.65, "step": 5449 }, { "epoch": 0.9436208202575479, "grad_norm": 1.2890625, "learning_rate": 1.5605362084474807e-05, "loss": 0.6001, "step": 5450 }, { "epoch": 0.9437939616924576, "grad_norm": 1.328125, "learning_rate": 1.560385160120243e-05, "loss": 0.5564, "step": 5451 }, { "epoch": 0.9439671031273672, "grad_norm": 1.3359375, "learning_rate": 1.5602340931520612e-05, "loss": 0.5724, "step": 5452 }, { "epoch": 0.9441402445622769, "grad_norm": 1.1953125, "learning_rate": 1.5600830075479604e-05, "loss": 0.5419, "step": 5453 }, { "epoch": 0.9443133859971865, "grad_norm": 1.359375, "learning_rate": 1.5599319033129658e-05, "loss": 0.578, "step": 5454 }, { "epoch": 0.944486527432096, "grad_norm": 1.234375, "learning_rate": 1.5597807804521042e-05, "loss": 0.5841, "step": 5455 }, { "epoch": 0.9446596688670057, "grad_norm": 1.296875, "learning_rate": 1.559629638970403e-05, "loss": 0.6464, "step": 5456 }, { "epoch": 0.9448328103019153, "grad_norm": 1.328125, "learning_rate": 1.5594784788728893e-05, "loss": 0.734, "step": 5457 }, { "epoch": 0.945005951736825, "grad_norm": 1.3984375, "learning_rate": 1.5593273001645922e-05, "loss": 0.5795, "step": 5458 }, { "epoch": 0.9451790931717347, "grad_norm": 1.28125, "learning_rate": 1.559176102850539e-05, "loss": 0.5419, "step": 5459 }, { "epoch": 0.9453522346066443, "grad_norm": 1.2890625, "learning_rate": 1.559024886935761e-05, "loss": 0.5791, "step": 5460 }, { "epoch": 0.945525376041554, "grad_norm": 1.4453125, "learning_rate": 1.5588736524252873e-05, "loss": 0.5891, "step": 5461 }, { "epoch": 0.9456985174764636, "grad_norm": 1.2890625, "learning_rate": 1.5587223993241488e-05, "loss": 0.5399, "step": 5462 }, { "epoch": 0.9458716589113733, "grad_norm": 1.3046875, "learning_rate": 1.5585711276373768e-05, "loss": 0.7007, "step": 5463 }, { "epoch": 0.9460448003462829, "grad_norm": 1.3046875, "learning_rate": 1.558419837370004e-05, "loss": 0.6131, "step": 5464 }, { "epoch": 0.9462179417811926, "grad_norm": 1.4921875, "learning_rate": 1.558268528527062e-05, "loss": 0.6843, "step": 5465 }, { "epoch": 0.9463910832161021, "grad_norm": 1.484375, "learning_rate": 1.5581172011135843e-05, "loss": 0.5988, "step": 5466 }, { "epoch": 0.9465642246510118, "grad_norm": 1.484375, "learning_rate": 1.557965855134605e-05, "loss": 0.5552, "step": 5467 }, { "epoch": 0.9467373660859214, "grad_norm": 1.2265625, "learning_rate": 1.5578144905951583e-05, "loss": 0.6076, "step": 5468 }, { "epoch": 0.9469105075208311, "grad_norm": 1.359375, "learning_rate": 1.5576631075002796e-05, "loss": 0.6383, "step": 5469 }, { "epoch": 0.9470836489557407, "grad_norm": 1.328125, "learning_rate": 1.5575117058550044e-05, "loss": 0.6339, "step": 5470 }, { "epoch": 0.9472567903906504, "grad_norm": 1.25, "learning_rate": 1.557360285664369e-05, "loss": 0.6052, "step": 5471 }, { "epoch": 0.94742993182556, "grad_norm": 1.3671875, "learning_rate": 1.55720884693341e-05, "loss": 0.6209, "step": 5472 }, { "epoch": 0.9476030732604697, "grad_norm": 1.3046875, "learning_rate": 1.5570573896671652e-05, "loss": 0.6295, "step": 5473 }, { "epoch": 0.9477762146953793, "grad_norm": 1.484375, "learning_rate": 1.556905913870673e-05, "loss": 0.5671, "step": 5474 }, { "epoch": 0.947949356130289, "grad_norm": 1.328125, "learning_rate": 1.556754419548972e-05, "loss": 0.6138, "step": 5475 }, { "epoch": 0.9481224975651986, "grad_norm": 1.3125, "learning_rate": 1.5566029067071012e-05, "loss": 0.5982, "step": 5476 }, { "epoch": 0.9482956390001082, "grad_norm": 1.359375, "learning_rate": 1.5564513753501013e-05, "loss": 0.5749, "step": 5477 }, { "epoch": 0.9484687804350178, "grad_norm": 1.3828125, "learning_rate": 1.5562998254830124e-05, "loss": 0.6525, "step": 5478 }, { "epoch": 0.9486419218699275, "grad_norm": 1.34375, "learning_rate": 1.556148257110876e-05, "loss": 0.6417, "step": 5479 }, { "epoch": 0.9488150633048371, "grad_norm": 1.3125, "learning_rate": 1.5559966702387337e-05, "loss": 0.5364, "step": 5480 }, { "epoch": 0.9489882047397468, "grad_norm": 1.296875, "learning_rate": 1.555845064871628e-05, "loss": 0.5649, "step": 5481 }, { "epoch": 0.9491613461746564, "grad_norm": 1.28125, "learning_rate": 1.5556934410146024e-05, "loss": 0.6264, "step": 5482 }, { "epoch": 0.9493344876095661, "grad_norm": 1.3125, "learning_rate": 1.5555417986727003e-05, "loss": 0.6663, "step": 5483 }, { "epoch": 0.9495076290444757, "grad_norm": 1.3203125, "learning_rate": 1.5553901378509655e-05, "loss": 0.6114, "step": 5484 }, { "epoch": 0.9496807704793854, "grad_norm": 1.21875, "learning_rate": 1.555238458554444e-05, "loss": 0.577, "step": 5485 }, { "epoch": 0.949853911914295, "grad_norm": 1.3359375, "learning_rate": 1.5550867607881804e-05, "loss": 0.6709, "step": 5486 }, { "epoch": 0.9500270533492047, "grad_norm": 1.3515625, "learning_rate": 1.5549350445572213e-05, "loss": 0.5803, "step": 5487 }, { "epoch": 0.9502001947841143, "grad_norm": 1.2421875, "learning_rate": 1.5547833098666136e-05, "loss": 0.5761, "step": 5488 }, { "epoch": 0.9503733362190239, "grad_norm": 1.40625, "learning_rate": 1.554631556721404e-05, "loss": 0.5926, "step": 5489 }, { "epoch": 0.9505464776539335, "grad_norm": 1.375, "learning_rate": 1.5544797851266414e-05, "loss": 0.6226, "step": 5490 }, { "epoch": 0.9507196190888432, "grad_norm": 1.328125, "learning_rate": 1.554327995087374e-05, "loss": 0.5928, "step": 5491 }, { "epoch": 0.9508927605237528, "grad_norm": 1.4296875, "learning_rate": 1.5541761866086504e-05, "loss": 0.6875, "step": 5492 }, { "epoch": 0.9510659019586625, "grad_norm": 1.28125, "learning_rate": 1.5540243596955216e-05, "loss": 0.606, "step": 5493 }, { "epoch": 0.9512390433935721, "grad_norm": 1.3125, "learning_rate": 1.5538725143530372e-05, "loss": 0.6107, "step": 5494 }, { "epoch": 0.9514121848284818, "grad_norm": 1.484375, "learning_rate": 1.5537206505862486e-05, "loss": 0.5466, "step": 5495 }, { "epoch": 0.9515853262633914, "grad_norm": 1.5390625, "learning_rate": 1.5535687684002075e-05, "loss": 0.6385, "step": 5496 }, { "epoch": 0.9517584676983011, "grad_norm": 1.2578125, "learning_rate": 1.5534168677999656e-05, "loss": 0.5263, "step": 5497 }, { "epoch": 0.9519316091332107, "grad_norm": 1.34375, "learning_rate": 1.553264948790577e-05, "loss": 0.6289, "step": 5498 }, { "epoch": 0.9521047505681204, "grad_norm": 1.265625, "learning_rate": 1.553113011377094e-05, "loss": 0.618, "step": 5499 }, { "epoch": 0.9522778920030299, "grad_norm": 1.359375, "learning_rate": 1.5529610555645714e-05, "loss": 0.5358, "step": 5500 }, { "epoch": 0.9524510334379396, "grad_norm": 1.2421875, "learning_rate": 1.5528090813580642e-05, "loss": 0.5356, "step": 5501 }, { "epoch": 0.9526241748728492, "grad_norm": 1.21875, "learning_rate": 1.5526570887626265e-05, "loss": 0.6244, "step": 5502 }, { "epoch": 0.9527973163077589, "grad_norm": 1.2890625, "learning_rate": 1.552505077783316e-05, "loss": 0.5431, "step": 5503 }, { "epoch": 0.9529704577426685, "grad_norm": 1.2578125, "learning_rate": 1.552353048425188e-05, "loss": 0.5542, "step": 5504 }, { "epoch": 0.9531435991775782, "grad_norm": 1.2421875, "learning_rate": 1.5522010006933e-05, "loss": 0.488, "step": 5505 }, { "epoch": 0.9533167406124878, "grad_norm": 1.3515625, "learning_rate": 1.5520489345927095e-05, "loss": 0.5984, "step": 5506 }, { "epoch": 0.9534898820473975, "grad_norm": 1.3984375, "learning_rate": 1.5518968501284758e-05, "loss": 0.6148, "step": 5507 }, { "epoch": 0.9536630234823071, "grad_norm": 1.328125, "learning_rate": 1.5517447473056568e-05, "loss": 0.6506, "step": 5508 }, { "epoch": 0.9538361649172168, "grad_norm": 1.359375, "learning_rate": 1.5515926261293133e-05, "loss": 0.6042, "step": 5509 }, { "epoch": 0.9540093063521264, "grad_norm": 1.3046875, "learning_rate": 1.5514404866045045e-05, "loss": 0.5825, "step": 5510 }, { "epoch": 0.954182447787036, "grad_norm": 1.3671875, "learning_rate": 1.5512883287362916e-05, "loss": 0.6319, "step": 5511 }, { "epoch": 0.9543555892219456, "grad_norm": 1.3203125, "learning_rate": 1.5511361525297367e-05, "loss": 0.6178, "step": 5512 }, { "epoch": 0.9545287306568553, "grad_norm": 1.296875, "learning_rate": 1.5509839579899005e-05, "loss": 0.5677, "step": 5513 }, { "epoch": 0.9547018720917649, "grad_norm": 1.3203125, "learning_rate": 1.550831745121847e-05, "loss": 0.5406, "step": 5514 }, { "epoch": 0.9548750135266746, "grad_norm": 1.3671875, "learning_rate": 1.550679513930639e-05, "loss": 0.6367, "step": 5515 }, { "epoch": 0.9550481549615842, "grad_norm": 1.296875, "learning_rate": 1.5505272644213404e-05, "loss": 0.5788, "step": 5516 }, { "epoch": 0.9552212963964939, "grad_norm": 1.3984375, "learning_rate": 1.5503749965990155e-05, "loss": 0.5929, "step": 5517 }, { "epoch": 0.9553944378314035, "grad_norm": 1.5703125, "learning_rate": 1.5502227104687295e-05, "loss": 0.7156, "step": 5518 }, { "epoch": 0.9555675792663132, "grad_norm": 1.4296875, "learning_rate": 1.550070406035548e-05, "loss": 0.6212, "step": 5519 }, { "epoch": 0.9557407207012228, "grad_norm": 1.328125, "learning_rate": 1.5499180833045385e-05, "loss": 0.5864, "step": 5520 }, { "epoch": 0.9559138621361325, "grad_norm": 1.3359375, "learning_rate": 1.549765742280766e-05, "loss": 0.5261, "step": 5521 }, { "epoch": 0.9560870035710421, "grad_norm": 1.3671875, "learning_rate": 1.5496133829693e-05, "loss": 0.6081, "step": 5522 }, { "epoch": 0.9562601450059517, "grad_norm": 1.328125, "learning_rate": 1.5494610053752067e-05, "loss": 0.5331, "step": 5523 }, { "epoch": 0.9564332864408613, "grad_norm": 1.296875, "learning_rate": 1.5493086095035566e-05, "loss": 0.5873, "step": 5524 }, { "epoch": 0.956606427875771, "grad_norm": 1.3046875, "learning_rate": 1.5491561953594177e-05, "loss": 0.5739, "step": 5525 }, { "epoch": 0.9567795693106806, "grad_norm": 1.3359375, "learning_rate": 1.549003762947861e-05, "loss": 0.5529, "step": 5526 }, { "epoch": 0.9569527107455903, "grad_norm": 1.4765625, "learning_rate": 1.548851312273957e-05, "loss": 0.71, "step": 5527 }, { "epoch": 0.9571258521805, "grad_norm": 1.28125, "learning_rate": 1.5486988433427762e-05, "loss": 0.5192, "step": 5528 }, { "epoch": 0.9572989936154096, "grad_norm": 1.3515625, "learning_rate": 1.548546356159391e-05, "loss": 0.5858, "step": 5529 }, { "epoch": 0.9574721350503193, "grad_norm": 1.2421875, "learning_rate": 1.5483938507288737e-05, "loss": 0.5422, "step": 5530 }, { "epoch": 0.9576452764852289, "grad_norm": 1.3203125, "learning_rate": 1.548241327056297e-05, "loss": 0.5554, "step": 5531 }, { "epoch": 0.9578184179201386, "grad_norm": 1.2890625, "learning_rate": 1.548088785146735e-05, "loss": 0.6003, "step": 5532 }, { "epoch": 0.9579915593550482, "grad_norm": 1.296875, "learning_rate": 1.5479362250052616e-05, "loss": 0.5905, "step": 5533 }, { "epoch": 0.9581647007899577, "grad_norm": 1.390625, "learning_rate": 1.5477836466369522e-05, "loss": 0.6029, "step": 5534 }, { "epoch": 0.9583378422248674, "grad_norm": 1.2421875, "learning_rate": 1.547631050046881e-05, "loss": 0.5867, "step": 5535 }, { "epoch": 0.958510983659777, "grad_norm": 1.3359375, "learning_rate": 1.5474784352401257e-05, "loss": 0.5355, "step": 5536 }, { "epoch": 0.9586841250946867, "grad_norm": 1.28125, "learning_rate": 1.5473258022217615e-05, "loss": 0.5089, "step": 5537 }, { "epoch": 0.9588572665295964, "grad_norm": 1.4453125, "learning_rate": 1.5471731509968665e-05, "loss": 0.5833, "step": 5538 }, { "epoch": 0.959030407964506, "grad_norm": 1.34375, "learning_rate": 1.5470204815705187e-05, "loss": 0.5991, "step": 5539 }, { "epoch": 0.9592035493994157, "grad_norm": 1.265625, "learning_rate": 1.5468677939477957e-05, "loss": 0.5595, "step": 5540 }, { "epoch": 0.9593766908343253, "grad_norm": 1.3203125, "learning_rate": 1.546715088133777e-05, "loss": 0.6555, "step": 5541 }, { "epoch": 0.959549832269235, "grad_norm": 1.2890625, "learning_rate": 1.546562364133543e-05, "loss": 0.5643, "step": 5542 }, { "epoch": 0.9597229737041446, "grad_norm": 1.3359375, "learning_rate": 1.546409621952173e-05, "loss": 0.6062, "step": 5543 }, { "epoch": 0.9598961151390543, "grad_norm": 1.3125, "learning_rate": 1.5462568615947488e-05, "loss": 0.6276, "step": 5544 }, { "epoch": 0.9600692565739638, "grad_norm": 1.203125, "learning_rate": 1.5461040830663506e-05, "loss": 0.5147, "step": 5545 }, { "epoch": 0.9602423980088735, "grad_norm": 1.328125, "learning_rate": 1.5459512863720617e-05, "loss": 0.5836, "step": 5546 }, { "epoch": 0.9604155394437831, "grad_norm": 1.3359375, "learning_rate": 1.5457984715169643e-05, "loss": 0.5883, "step": 5547 }, { "epoch": 0.9605886808786928, "grad_norm": 1.4453125, "learning_rate": 1.5456456385061418e-05, "loss": 0.6297, "step": 5548 }, { "epoch": 0.9607618223136024, "grad_norm": 1.21875, "learning_rate": 1.5454927873446783e-05, "loss": 0.5767, "step": 5549 }, { "epoch": 0.9609349637485121, "grad_norm": 1.3671875, "learning_rate": 1.545339918037658e-05, "loss": 0.5439, "step": 5550 }, { "epoch": 0.9611081051834217, "grad_norm": 1.40625, "learning_rate": 1.545187030590166e-05, "loss": 0.6498, "step": 5551 }, { "epoch": 0.9612812466183314, "grad_norm": 1.3046875, "learning_rate": 1.5450341250072883e-05, "loss": 0.6446, "step": 5552 }, { "epoch": 0.961454388053241, "grad_norm": 1.3515625, "learning_rate": 1.5448812012941115e-05, "loss": 0.568, "step": 5553 }, { "epoch": 0.9616275294881507, "grad_norm": 1.3515625, "learning_rate": 1.544728259455722e-05, "loss": 0.5782, "step": 5554 }, { "epoch": 0.9618006709230603, "grad_norm": 1.1875, "learning_rate": 1.5445752994972072e-05, "loss": 0.5763, "step": 5555 }, { "epoch": 0.96197381235797, "grad_norm": 1.3515625, "learning_rate": 1.5444223214236557e-05, "loss": 0.784, "step": 5556 }, { "epoch": 0.9621469537928795, "grad_norm": 1.265625, "learning_rate": 1.5442693252401562e-05, "loss": 0.5502, "step": 5557 }, { "epoch": 0.9623200952277892, "grad_norm": 1.2890625, "learning_rate": 1.544116310951798e-05, "loss": 0.6132, "step": 5558 }, { "epoch": 0.9624932366626988, "grad_norm": 1.2265625, "learning_rate": 1.5439632785636707e-05, "loss": 0.5614, "step": 5559 }, { "epoch": 0.9626663780976085, "grad_norm": 1.1953125, "learning_rate": 1.5438102280808653e-05, "loss": 0.4584, "step": 5560 }, { "epoch": 0.9628395195325181, "grad_norm": 1.390625, "learning_rate": 1.543657159508473e-05, "loss": 0.5828, "step": 5561 }, { "epoch": 0.9630126609674278, "grad_norm": 1.3125, "learning_rate": 1.543504072851585e-05, "loss": 0.5666, "step": 5562 }, { "epoch": 0.9631858024023374, "grad_norm": 1.2890625, "learning_rate": 1.5433509681152937e-05, "loss": 0.557, "step": 5563 }, { "epoch": 0.9633589438372471, "grad_norm": 1.3984375, "learning_rate": 1.5431978453046926e-05, "loss": 0.6249, "step": 5564 }, { "epoch": 0.9635320852721567, "grad_norm": 1.359375, "learning_rate": 1.5430447044248753e-05, "loss": 0.6055, "step": 5565 }, { "epoch": 0.9637052267070664, "grad_norm": 1.3125, "learning_rate": 1.5428915454809355e-05, "loss": 0.6214, "step": 5566 }, { "epoch": 0.963878368141976, "grad_norm": 1.3359375, "learning_rate": 1.5427383684779677e-05, "loss": 0.612, "step": 5567 }, { "epoch": 0.9640515095768856, "grad_norm": 1.34375, "learning_rate": 1.5425851734210683e-05, "loss": 0.5724, "step": 5568 }, { "epoch": 0.9642246510117952, "grad_norm": 1.34375, "learning_rate": 1.5424319603153323e-05, "loss": 0.5494, "step": 5569 }, { "epoch": 0.9643977924467049, "grad_norm": 1.296875, "learning_rate": 1.542278729165857e-05, "loss": 0.5903, "step": 5570 }, { "epoch": 0.9645709338816145, "grad_norm": 1.3046875, "learning_rate": 1.5421254799777384e-05, "loss": 0.5479, "step": 5571 }, { "epoch": 0.9647440753165242, "grad_norm": 1.4375, "learning_rate": 1.5419722127560757e-05, "loss": 0.558, "step": 5572 }, { "epoch": 0.9649172167514338, "grad_norm": 1.2578125, "learning_rate": 1.541818927505966e-05, "loss": 0.5753, "step": 5573 }, { "epoch": 0.9650903581863435, "grad_norm": 1.2890625, "learning_rate": 1.5416656242325095e-05, "loss": 0.5861, "step": 5574 }, { "epoch": 0.9652634996212531, "grad_norm": 1.1640625, "learning_rate": 1.5415123029408046e-05, "loss": 0.5214, "step": 5575 }, { "epoch": 0.9654366410561628, "grad_norm": 1.3125, "learning_rate": 1.541358963635952e-05, "loss": 0.59, "step": 5576 }, { "epoch": 0.9656097824910724, "grad_norm": 1.28125, "learning_rate": 1.5412056063230526e-05, "loss": 0.5556, "step": 5577 }, { "epoch": 0.9657829239259821, "grad_norm": 1.2734375, "learning_rate": 1.5410522310072075e-05, "loss": 0.5672, "step": 5578 }, { "epoch": 0.9659560653608916, "grad_norm": 1.3046875, "learning_rate": 1.5408988376935186e-05, "loss": 0.5861, "step": 5579 }, { "epoch": 0.9661292067958013, "grad_norm": 1.3671875, "learning_rate": 1.5407454263870887e-05, "loss": 0.6084, "step": 5580 }, { "epoch": 0.9663023482307109, "grad_norm": 1.2890625, "learning_rate": 1.5405919970930208e-05, "loss": 0.5739, "step": 5581 }, { "epoch": 0.9664754896656206, "grad_norm": 1.3125, "learning_rate": 1.5404385498164186e-05, "loss": 0.6115, "step": 5582 }, { "epoch": 0.9666486311005302, "grad_norm": 1.328125, "learning_rate": 1.540285084562387e-05, "loss": 0.5084, "step": 5583 }, { "epoch": 0.9668217725354399, "grad_norm": 1.2265625, "learning_rate": 1.54013160133603e-05, "loss": 0.5953, "step": 5584 }, { "epoch": 0.9669949139703495, "grad_norm": 1.25, "learning_rate": 1.5399781001424536e-05, "loss": 0.5809, "step": 5585 }, { "epoch": 0.9671680554052592, "grad_norm": 1.296875, "learning_rate": 1.5398245809867643e-05, "loss": 0.5891, "step": 5586 }, { "epoch": 0.9673411968401688, "grad_norm": 1.296875, "learning_rate": 1.5396710438740687e-05, "loss": 0.6207, "step": 5587 }, { "epoch": 0.9675143382750785, "grad_norm": 1.328125, "learning_rate": 1.5395174888094736e-05, "loss": 0.5908, "step": 5588 }, { "epoch": 0.9676874797099881, "grad_norm": 1.46875, "learning_rate": 1.5393639157980872e-05, "loss": 0.6062, "step": 5589 }, { "epoch": 0.9678606211448978, "grad_norm": 1.2578125, "learning_rate": 1.5392103248450182e-05, "loss": 0.5587, "step": 5590 }, { "epoch": 0.9680337625798073, "grad_norm": 1.2578125, "learning_rate": 1.5390567159553754e-05, "loss": 0.5253, "step": 5591 }, { "epoch": 0.968206904014717, "grad_norm": 1.359375, "learning_rate": 1.5389030891342694e-05, "loss": 0.6217, "step": 5592 }, { "epoch": 0.9683800454496266, "grad_norm": 1.3046875, "learning_rate": 1.5387494443868095e-05, "loss": 0.5133, "step": 5593 }, { "epoch": 0.9685531868845363, "grad_norm": 1.4453125, "learning_rate": 1.538595781718107e-05, "loss": 0.5995, "step": 5594 }, { "epoch": 0.9687263283194459, "grad_norm": 1.2734375, "learning_rate": 1.5384421011332737e-05, "loss": 0.6215, "step": 5595 }, { "epoch": 0.9688994697543556, "grad_norm": 1.265625, "learning_rate": 1.5382884026374215e-05, "loss": 0.6147, "step": 5596 }, { "epoch": 0.9690726111892652, "grad_norm": 1.4609375, "learning_rate": 1.538134686235663e-05, "loss": 0.6193, "step": 5597 }, { "epoch": 0.9692457526241749, "grad_norm": 1.2890625, "learning_rate": 1.5379809519331112e-05, "loss": 0.5325, "step": 5598 }, { "epoch": 0.9694188940590845, "grad_norm": 1.40625, "learning_rate": 1.537827199734881e-05, "loss": 0.6118, "step": 5599 }, { "epoch": 0.9695920354939942, "grad_norm": 1.3125, "learning_rate": 1.5376734296460855e-05, "loss": 0.5494, "step": 5600 }, { "epoch": 0.9697651769289038, "grad_norm": 1.328125, "learning_rate": 1.537519641671841e-05, "loss": 0.5157, "step": 5601 }, { "epoch": 0.9699383183638134, "grad_norm": 1.328125, "learning_rate": 1.537365835817263e-05, "loss": 0.6211, "step": 5602 }, { "epoch": 0.970111459798723, "grad_norm": 1.3515625, "learning_rate": 1.5372120120874667e-05, "loss": 0.5706, "step": 5603 }, { "epoch": 0.9702846012336327, "grad_norm": 1.234375, "learning_rate": 1.5370581704875705e-05, "loss": 0.5394, "step": 5604 }, { "epoch": 0.9704577426685423, "grad_norm": 1.3984375, "learning_rate": 1.5369043110226907e-05, "loss": 0.6378, "step": 5605 }, { "epoch": 0.970630884103452, "grad_norm": 1.34375, "learning_rate": 1.536750433697946e-05, "loss": 0.6543, "step": 5606 }, { "epoch": 0.9708040255383616, "grad_norm": 1.3515625, "learning_rate": 1.5365965385184546e-05, "loss": 0.5181, "step": 5607 }, { "epoch": 0.9709771669732713, "grad_norm": 1.21875, "learning_rate": 1.5364426254893365e-05, "loss": 0.5701, "step": 5608 }, { "epoch": 0.971150308408181, "grad_norm": 1.390625, "learning_rate": 1.5362886946157107e-05, "loss": 0.6115, "step": 5609 }, { "epoch": 0.9713234498430906, "grad_norm": 1.296875, "learning_rate": 1.536134745902698e-05, "loss": 0.5973, "step": 5610 }, { "epoch": 0.9714965912780003, "grad_norm": 1.2421875, "learning_rate": 1.5359807793554195e-05, "loss": 0.5097, "step": 5611 }, { "epoch": 0.9716697327129099, "grad_norm": 1.2578125, "learning_rate": 1.5358267949789968e-05, "loss": 0.5979, "step": 5612 }, { "epoch": 0.9718428741478194, "grad_norm": 1.3203125, "learning_rate": 1.5356727927785523e-05, "loss": 0.5949, "step": 5613 }, { "epoch": 0.9720160155827291, "grad_norm": 1.234375, "learning_rate": 1.535518772759208e-05, "loss": 0.6173, "step": 5614 }, { "epoch": 0.9721891570176387, "grad_norm": 1.3125, "learning_rate": 1.535364734926088e-05, "loss": 0.5865, "step": 5615 }, { "epoch": 0.9723622984525484, "grad_norm": 1.2890625, "learning_rate": 1.5352106792843162e-05, "loss": 0.5935, "step": 5616 }, { "epoch": 0.972535439887458, "grad_norm": 1.3515625, "learning_rate": 1.5350566058390174e-05, "loss": 0.7047, "step": 5617 }, { "epoch": 0.9727085813223677, "grad_norm": 1.34375, "learning_rate": 1.5349025145953165e-05, "loss": 0.5616, "step": 5618 }, { "epoch": 0.9728817227572774, "grad_norm": 1.421875, "learning_rate": 1.534748405558339e-05, "loss": 0.6937, "step": 5619 }, { "epoch": 0.973054864192187, "grad_norm": 1.25, "learning_rate": 1.534594278733212e-05, "loss": 0.5352, "step": 5620 }, { "epoch": 0.9732280056270967, "grad_norm": 1.296875, "learning_rate": 1.5344401341250616e-05, "loss": 0.592, "step": 5621 }, { "epoch": 0.9734011470620063, "grad_norm": 1.3125, "learning_rate": 1.5342859717390163e-05, "loss": 0.6218, "step": 5622 }, { "epoch": 0.973574288496916, "grad_norm": 1.2578125, "learning_rate": 1.5341317915802032e-05, "loss": 0.5824, "step": 5623 }, { "epoch": 0.9737474299318256, "grad_norm": 1.3515625, "learning_rate": 1.5339775936537516e-05, "loss": 0.6779, "step": 5624 }, { "epoch": 0.9739205713667352, "grad_norm": 1.2734375, "learning_rate": 1.533823377964791e-05, "loss": 0.5754, "step": 5625 }, { "epoch": 0.9740937128016448, "grad_norm": 1.296875, "learning_rate": 1.533669144518451e-05, "loss": 0.5697, "step": 5626 }, { "epoch": 0.9742668542365545, "grad_norm": 1.421875, "learning_rate": 1.533514893319862e-05, "loss": 0.639, "step": 5627 }, { "epoch": 0.9744399956714641, "grad_norm": 1.296875, "learning_rate": 1.5333606243741552e-05, "loss": 0.6212, "step": 5628 }, { "epoch": 0.9746131371063738, "grad_norm": 1.296875, "learning_rate": 1.5332063376864624e-05, "loss": 0.6457, "step": 5629 }, { "epoch": 0.9747862785412834, "grad_norm": 1.359375, "learning_rate": 1.5330520332619162e-05, "loss": 0.5389, "step": 5630 }, { "epoch": 0.9749594199761931, "grad_norm": 1.3828125, "learning_rate": 1.5328977111056488e-05, "loss": 0.5954, "step": 5631 }, { "epoch": 0.9751325614111027, "grad_norm": 1.2734375, "learning_rate": 1.532743371222794e-05, "loss": 0.6265, "step": 5632 }, { "epoch": 0.9753057028460124, "grad_norm": 1.3046875, "learning_rate": 1.5325890136184854e-05, "loss": 0.602, "step": 5633 }, { "epoch": 0.975478844280922, "grad_norm": 1.2421875, "learning_rate": 1.5324346382978586e-05, "loss": 0.5409, "step": 5634 }, { "epoch": 0.9756519857158317, "grad_norm": 1.3359375, "learning_rate": 1.5322802452660476e-05, "loss": 0.6237, "step": 5635 }, { "epoch": 0.9758251271507412, "grad_norm": 1.25, "learning_rate": 1.5321258345281895e-05, "loss": 0.5915, "step": 5636 }, { "epoch": 0.9759982685856509, "grad_norm": 1.3203125, "learning_rate": 1.5319714060894195e-05, "loss": 0.5639, "step": 5637 }, { "epoch": 0.9761714100205605, "grad_norm": 1.265625, "learning_rate": 1.5318169599548755e-05, "loss": 0.6093, "step": 5638 }, { "epoch": 0.9763445514554702, "grad_norm": 1.3828125, "learning_rate": 1.5316624961296947e-05, "loss": 0.6546, "step": 5639 }, { "epoch": 0.9765176928903798, "grad_norm": 1.265625, "learning_rate": 1.5315080146190152e-05, "loss": 0.4837, "step": 5640 }, { "epoch": 0.9766908343252895, "grad_norm": 1.2265625, "learning_rate": 1.5313535154279754e-05, "loss": 0.5234, "step": 5641 }, { "epoch": 0.9768639757601991, "grad_norm": 1.3359375, "learning_rate": 1.5311989985617155e-05, "loss": 0.5284, "step": 5642 }, { "epoch": 0.9770371171951088, "grad_norm": 1.3125, "learning_rate": 1.531044464025375e-05, "loss": 0.5659, "step": 5643 }, { "epoch": 0.9772102586300184, "grad_norm": 1.3046875, "learning_rate": 1.5308899118240943e-05, "loss": 0.5851, "step": 5644 }, { "epoch": 0.9773834000649281, "grad_norm": 1.4609375, "learning_rate": 1.5307353419630147e-05, "loss": 0.5986, "step": 5645 }, { "epoch": 0.9775565414998377, "grad_norm": 1.390625, "learning_rate": 1.5305807544472776e-05, "loss": 0.6033, "step": 5646 }, { "epoch": 0.9777296829347473, "grad_norm": 1.2578125, "learning_rate": 1.5304261492820257e-05, "loss": 0.5288, "step": 5647 }, { "epoch": 0.9779028243696569, "grad_norm": 1.3359375, "learning_rate": 1.5302715264724016e-05, "loss": 0.622, "step": 5648 }, { "epoch": 0.9780759658045666, "grad_norm": 1.359375, "learning_rate": 1.5301168860235488e-05, "loss": 0.5754, "step": 5649 }, { "epoch": 0.9782491072394762, "grad_norm": 1.28125, "learning_rate": 1.529962227940611e-05, "loss": 0.5743, "step": 5650 }, { "epoch": 0.9784222486743859, "grad_norm": 1.4453125, "learning_rate": 1.529807552228734e-05, "loss": 0.6103, "step": 5651 }, { "epoch": 0.9785953901092955, "grad_norm": 1.296875, "learning_rate": 1.5296528588930616e-05, "loss": 0.6414, "step": 5652 }, { "epoch": 0.9787685315442052, "grad_norm": 1.25, "learning_rate": 1.5294981479387404e-05, "loss": 0.5348, "step": 5653 }, { "epoch": 0.9789416729791148, "grad_norm": 1.3515625, "learning_rate": 1.5293434193709165e-05, "loss": 0.6131, "step": 5654 }, { "epoch": 0.9791148144140245, "grad_norm": 1.3828125, "learning_rate": 1.529188673194737e-05, "loss": 0.5838, "step": 5655 }, { "epoch": 0.9792879558489341, "grad_norm": 1.2734375, "learning_rate": 1.5290339094153494e-05, "loss": 0.5785, "step": 5656 }, { "epoch": 0.9794610972838438, "grad_norm": 1.3046875, "learning_rate": 1.5288791280379016e-05, "loss": 0.6193, "step": 5657 }, { "epoch": 0.9796342387187534, "grad_norm": 1.2734375, "learning_rate": 1.5287243290675432e-05, "loss": 0.5765, "step": 5658 }, { "epoch": 0.979807380153663, "grad_norm": 1.3671875, "learning_rate": 1.528569512509422e-05, "loss": 0.597, "step": 5659 }, { "epoch": 0.9799805215885726, "grad_norm": 1.296875, "learning_rate": 1.5284146783686894e-05, "loss": 0.5369, "step": 5660 }, { "epoch": 0.9801536630234823, "grad_norm": 1.25, "learning_rate": 1.528259826650495e-05, "loss": 0.5968, "step": 5661 }, { "epoch": 0.9803268044583919, "grad_norm": 1.265625, "learning_rate": 1.5281049573599903e-05, "loss": 0.6072, "step": 5662 }, { "epoch": 0.9804999458933016, "grad_norm": 1.5234375, "learning_rate": 1.5279500705023267e-05, "loss": 0.6274, "step": 5663 }, { "epoch": 0.9806730873282112, "grad_norm": 1.3203125, "learning_rate": 1.5277951660826568e-05, "loss": 0.624, "step": 5664 }, { "epoch": 0.9808462287631209, "grad_norm": 1.34375, "learning_rate": 1.527640244106133e-05, "loss": 0.5091, "step": 5665 }, { "epoch": 0.9810193701980305, "grad_norm": 1.28125, "learning_rate": 1.5274853045779088e-05, "loss": 0.6153, "step": 5666 }, { "epoch": 0.9811925116329402, "grad_norm": 1.40625, "learning_rate": 1.527330347503138e-05, "loss": 0.6492, "step": 5667 }, { "epoch": 0.9813656530678498, "grad_norm": 1.3671875, "learning_rate": 1.5271753728869757e-05, "loss": 0.6861, "step": 5668 }, { "epoch": 0.9815387945027595, "grad_norm": 1.5078125, "learning_rate": 1.527020380734577e-05, "loss": 0.6201, "step": 5669 }, { "epoch": 0.981711935937669, "grad_norm": 1.25, "learning_rate": 1.526865371051097e-05, "loss": 0.5506, "step": 5670 }, { "epoch": 0.9818850773725787, "grad_norm": 1.34375, "learning_rate": 1.5267103438416925e-05, "loss": 0.557, "step": 5671 }, { "epoch": 0.9820582188074883, "grad_norm": 1.40625, "learning_rate": 1.5265552991115207e-05, "loss": 0.5897, "step": 5672 }, { "epoch": 0.982231360242398, "grad_norm": 1.3203125, "learning_rate": 1.5264002368657386e-05, "loss": 0.5607, "step": 5673 }, { "epoch": 0.9824045016773076, "grad_norm": 1.390625, "learning_rate": 1.5262451571095042e-05, "loss": 0.6865, "step": 5674 }, { "epoch": 0.9825776431122173, "grad_norm": 1.3515625, "learning_rate": 1.5260900598479763e-05, "loss": 0.5904, "step": 5675 }, { "epoch": 0.9827507845471269, "grad_norm": 1.4140625, "learning_rate": 1.5259349450863145e-05, "loss": 0.6002, "step": 5676 }, { "epoch": 0.9829239259820366, "grad_norm": 1.3125, "learning_rate": 1.5257798128296783e-05, "loss": 0.5096, "step": 5677 }, { "epoch": 0.9830970674169462, "grad_norm": 1.359375, "learning_rate": 1.5256246630832279e-05, "loss": 0.6257, "step": 5678 }, { "epoch": 0.9832702088518559, "grad_norm": 1.3125, "learning_rate": 1.5254694958521245e-05, "loss": 0.6017, "step": 5679 }, { "epoch": 0.9834433502867655, "grad_norm": 1.2109375, "learning_rate": 1.5253143111415298e-05, "loss": 0.5488, "step": 5680 }, { "epoch": 0.9836164917216751, "grad_norm": 1.2109375, "learning_rate": 1.5251591089566055e-05, "loss": 0.5163, "step": 5681 }, { "epoch": 0.9837896331565847, "grad_norm": 1.453125, "learning_rate": 1.5250038893025153e-05, "loss": 0.6343, "step": 5682 }, { "epoch": 0.9839627745914944, "grad_norm": 1.328125, "learning_rate": 1.5248486521844212e-05, "loss": 0.6098, "step": 5683 }, { "epoch": 0.984135916026404, "grad_norm": 1.46875, "learning_rate": 1.524693397607488e-05, "loss": 0.5311, "step": 5684 }, { "epoch": 0.9843090574613137, "grad_norm": 1.3359375, "learning_rate": 1.52453812557688e-05, "loss": 0.5354, "step": 5685 }, { "epoch": 0.9844821988962233, "grad_norm": 1.2421875, "learning_rate": 1.524382836097762e-05, "loss": 0.526, "step": 5686 }, { "epoch": 0.984655340331133, "grad_norm": 1.453125, "learning_rate": 1.5242275291752997e-05, "loss": 0.6226, "step": 5687 }, { "epoch": 0.9848284817660427, "grad_norm": 1.4140625, "learning_rate": 1.5240722048146594e-05, "loss": 0.6191, "step": 5688 }, { "epoch": 0.9850016232009523, "grad_norm": 1.3359375, "learning_rate": 1.5239168630210082e-05, "loss": 0.568, "step": 5689 }, { "epoch": 0.985174764635862, "grad_norm": 1.359375, "learning_rate": 1.5237615037995129e-05, "loss": 0.5626, "step": 5690 }, { "epoch": 0.9853479060707716, "grad_norm": 1.2109375, "learning_rate": 1.5236061271553414e-05, "loss": 0.5885, "step": 5691 }, { "epoch": 0.9855210475056813, "grad_norm": 1.2421875, "learning_rate": 1.523450733093663e-05, "loss": 0.5868, "step": 5692 }, { "epoch": 0.9856941889405908, "grad_norm": 1.3984375, "learning_rate": 1.5232953216196462e-05, "loss": 0.6454, "step": 5693 }, { "epoch": 0.9858673303755005, "grad_norm": 1.4453125, "learning_rate": 1.523139892738461e-05, "loss": 0.5466, "step": 5694 }, { "epoch": 0.9860404718104101, "grad_norm": 1.546875, "learning_rate": 1.522984446455277e-05, "loss": 0.6158, "step": 5695 }, { "epoch": 0.9862136132453198, "grad_norm": 1.3203125, "learning_rate": 1.522828982775266e-05, "loss": 0.5965, "step": 5696 }, { "epoch": 0.9863867546802294, "grad_norm": 1.46875, "learning_rate": 1.5226735017035986e-05, "loss": 0.6326, "step": 5697 }, { "epoch": 0.9865598961151391, "grad_norm": 1.2890625, "learning_rate": 1.5225180032454474e-05, "loss": 0.6115, "step": 5698 }, { "epoch": 0.9867330375500487, "grad_norm": 1.3359375, "learning_rate": 1.5223624874059848e-05, "loss": 0.5983, "step": 5699 }, { "epoch": 0.9869061789849584, "grad_norm": 1.4921875, "learning_rate": 1.5222069541903839e-05, "loss": 0.6215, "step": 5700 }, { "epoch": 0.987079320419868, "grad_norm": 1.421875, "learning_rate": 1.5220514036038183e-05, "loss": 0.5935, "step": 5701 }, { "epoch": 0.9872524618547777, "grad_norm": 1.359375, "learning_rate": 1.5218958356514625e-05, "loss": 0.593, "step": 5702 }, { "epoch": 0.9874256032896873, "grad_norm": 1.4296875, "learning_rate": 1.5217402503384914e-05, "loss": 0.5674, "step": 5703 }, { "epoch": 0.9875987447245969, "grad_norm": 1.3515625, "learning_rate": 1.5215846476700807e-05, "loss": 0.6392, "step": 5704 }, { "epoch": 0.9877718861595065, "grad_norm": 1.359375, "learning_rate": 1.5214290276514057e-05, "loss": 0.6043, "step": 5705 }, { "epoch": 0.9879450275944162, "grad_norm": 1.4609375, "learning_rate": 1.5212733902876438e-05, "loss": 0.6398, "step": 5706 }, { "epoch": 0.9881181690293258, "grad_norm": 1.359375, "learning_rate": 1.521117735583972e-05, "loss": 0.62, "step": 5707 }, { "epoch": 0.9882913104642355, "grad_norm": 1.2734375, "learning_rate": 1.5209620635455675e-05, "loss": 0.5727, "step": 5708 }, { "epoch": 0.9884644518991451, "grad_norm": 1.2265625, "learning_rate": 1.5208063741776093e-05, "loss": 0.6027, "step": 5709 }, { "epoch": 0.9886375933340548, "grad_norm": 1.2890625, "learning_rate": 1.5206506674852768e-05, "loss": 0.5562, "step": 5710 }, { "epoch": 0.9888107347689644, "grad_norm": 1.25, "learning_rate": 1.5204949434737481e-05, "loss": 0.5545, "step": 5711 }, { "epoch": 0.9889838762038741, "grad_norm": 1.3359375, "learning_rate": 1.5203392021482044e-05, "loss": 0.6228, "step": 5712 }, { "epoch": 0.9891570176387837, "grad_norm": 1.40625, "learning_rate": 1.520183443513826e-05, "loss": 0.6433, "step": 5713 }, { "epoch": 0.9893301590736934, "grad_norm": 1.2890625, "learning_rate": 1.520027667575794e-05, "loss": 0.6124, "step": 5714 }, { "epoch": 0.9895033005086029, "grad_norm": 1.4921875, "learning_rate": 1.5198718743392906e-05, "loss": 0.698, "step": 5715 }, { "epoch": 0.9896764419435126, "grad_norm": 1.46875, "learning_rate": 1.5197160638094981e-05, "loss": 0.6458, "step": 5716 }, { "epoch": 0.9898495833784222, "grad_norm": 1.375, "learning_rate": 1.5195602359915991e-05, "loss": 0.6171, "step": 5717 }, { "epoch": 0.9900227248133319, "grad_norm": 1.2734375, "learning_rate": 1.5194043908907774e-05, "loss": 0.5044, "step": 5718 }, { "epoch": 0.9901958662482415, "grad_norm": 1.3046875, "learning_rate": 1.5192485285122169e-05, "loss": 0.5384, "step": 5719 }, { "epoch": 0.9903690076831512, "grad_norm": 1.3359375, "learning_rate": 1.5190926488611028e-05, "loss": 0.599, "step": 5720 }, { "epoch": 0.9905421491180608, "grad_norm": 1.328125, "learning_rate": 1.51893675194262e-05, "loss": 0.6605, "step": 5721 }, { "epoch": 0.9907152905529705, "grad_norm": 1.34375, "learning_rate": 1.5187808377619542e-05, "loss": 0.6849, "step": 5722 }, { "epoch": 0.9908884319878801, "grad_norm": 1.3984375, "learning_rate": 1.518624906324292e-05, "loss": 0.6056, "step": 5723 }, { "epoch": 0.9910615734227898, "grad_norm": 1.2265625, "learning_rate": 1.5184689576348204e-05, "loss": 0.5135, "step": 5724 }, { "epoch": 0.9912347148576994, "grad_norm": 1.3515625, "learning_rate": 1.5183129916987267e-05, "loss": 0.5463, "step": 5725 }, { "epoch": 0.9914078562926091, "grad_norm": 1.3359375, "learning_rate": 1.5181570085211994e-05, "loss": 0.5785, "step": 5726 }, { "epoch": 0.9915809977275186, "grad_norm": 1.28125, "learning_rate": 1.5180010081074273e-05, "loss": 0.6052, "step": 5727 }, { "epoch": 0.9917541391624283, "grad_norm": 1.3984375, "learning_rate": 1.517844990462599e-05, "loss": 0.5946, "step": 5728 }, { "epoch": 0.9919272805973379, "grad_norm": 1.328125, "learning_rate": 1.517688955591905e-05, "loss": 0.565, "step": 5729 }, { "epoch": 0.9921004220322476, "grad_norm": 1.3515625, "learning_rate": 1.5175329035005356e-05, "loss": 0.5722, "step": 5730 }, { "epoch": 0.9922735634671572, "grad_norm": 1.34375, "learning_rate": 1.5173768341936814e-05, "loss": 0.6423, "step": 5731 }, { "epoch": 0.9924467049020669, "grad_norm": 1.34375, "learning_rate": 1.5172207476765347e-05, "loss": 0.648, "step": 5732 }, { "epoch": 0.9926198463369765, "grad_norm": 1.2734375, "learning_rate": 1.5170646439542868e-05, "loss": 0.6271, "step": 5733 }, { "epoch": 0.9927929877718862, "grad_norm": 1.3515625, "learning_rate": 1.5169085230321314e-05, "loss": 0.5629, "step": 5734 }, { "epoch": 0.9929661292067958, "grad_norm": 1.3125, "learning_rate": 1.5167523849152606e-05, "loss": 0.5212, "step": 5735 }, { "epoch": 0.9931392706417055, "grad_norm": 1.2734375, "learning_rate": 1.5165962296088693e-05, "loss": 0.5757, "step": 5736 }, { "epoch": 0.9933124120766151, "grad_norm": 1.2578125, "learning_rate": 1.5164400571181515e-05, "loss": 0.6007, "step": 5737 }, { "epoch": 0.9934855535115247, "grad_norm": 1.25, "learning_rate": 1.5162838674483017e-05, "loss": 0.6118, "step": 5738 }, { "epoch": 0.9936586949464343, "grad_norm": 1.234375, "learning_rate": 1.5161276606045168e-05, "loss": 0.5367, "step": 5739 }, { "epoch": 0.993831836381344, "grad_norm": 1.4296875, "learning_rate": 1.5159714365919915e-05, "loss": 0.5882, "step": 5740 }, { "epoch": 0.9940049778162536, "grad_norm": 1.2578125, "learning_rate": 1.5158151954159235e-05, "loss": 0.5594, "step": 5741 }, { "epoch": 0.9941781192511633, "grad_norm": 1.34375, "learning_rate": 1.5156589370815096e-05, "loss": 0.6091, "step": 5742 }, { "epoch": 0.9943512606860729, "grad_norm": 1.2421875, "learning_rate": 1.5155026615939478e-05, "loss": 0.6118, "step": 5743 }, { "epoch": 0.9945244021209826, "grad_norm": 1.3046875, "learning_rate": 1.515346368958437e-05, "loss": 0.5554, "step": 5744 }, { "epoch": 0.9946975435558922, "grad_norm": 1.2890625, "learning_rate": 1.5151900591801752e-05, "loss": 0.5254, "step": 5745 }, { "epoch": 0.9948706849908019, "grad_norm": 1.3125, "learning_rate": 1.5150337322643624e-05, "loss": 0.5552, "step": 5746 }, { "epoch": 0.9950438264257115, "grad_norm": 1.6484375, "learning_rate": 1.5148773882161991e-05, "loss": 0.7394, "step": 5747 }, { "epoch": 0.9952169678606212, "grad_norm": 1.4609375, "learning_rate": 1.5147210270408858e-05, "loss": 0.6288, "step": 5748 }, { "epoch": 0.9953901092955307, "grad_norm": 1.390625, "learning_rate": 1.5145646487436238e-05, "loss": 0.5804, "step": 5749 }, { "epoch": 0.9955632507304404, "grad_norm": 1.3125, "learning_rate": 1.5144082533296147e-05, "loss": 0.5748, "step": 5750 }, { "epoch": 0.99573639216535, "grad_norm": 1.3046875, "learning_rate": 1.5142518408040611e-05, "loss": 0.5737, "step": 5751 }, { "epoch": 0.9959095336002597, "grad_norm": 1.2734375, "learning_rate": 1.5140954111721659e-05, "loss": 0.5606, "step": 5752 }, { "epoch": 0.9960826750351693, "grad_norm": 1.265625, "learning_rate": 1.513938964439133e-05, "loss": 0.5708, "step": 5753 }, { "epoch": 0.996255816470079, "grad_norm": 1.3671875, "learning_rate": 1.5137825006101662e-05, "loss": 0.4915, "step": 5754 }, { "epoch": 0.9964289579049886, "grad_norm": 1.3125, "learning_rate": 1.5136260196904704e-05, "loss": 0.5718, "step": 5755 }, { "epoch": 0.9966020993398983, "grad_norm": 1.3203125, "learning_rate": 1.5134695216852509e-05, "loss": 0.5587, "step": 5756 }, { "epoch": 0.996775240774808, "grad_norm": 1.28125, "learning_rate": 1.5133130065997126e-05, "loss": 0.592, "step": 5757 }, { "epoch": 0.9969483822097176, "grad_norm": 1.203125, "learning_rate": 1.5131564744390632e-05, "loss": 0.5946, "step": 5758 }, { "epoch": 0.9971215236446273, "grad_norm": 1.2890625, "learning_rate": 1.512999925208509e-05, "loss": 0.4997, "step": 5759 }, { "epoch": 0.9972946650795369, "grad_norm": 1.2734375, "learning_rate": 1.5128433589132581e-05, "loss": 0.5556, "step": 5760 }, { "epoch": 0.9974678065144464, "grad_norm": 1.3203125, "learning_rate": 1.5126867755585175e-05, "loss": 0.6232, "step": 5761 }, { "epoch": 0.9976409479493561, "grad_norm": 1.203125, "learning_rate": 1.5125301751494968e-05, "loss": 0.5114, "step": 5762 }, { "epoch": 0.9978140893842657, "grad_norm": 1.234375, "learning_rate": 1.5123735576914049e-05, "loss": 0.5654, "step": 5763 }, { "epoch": 0.9979872308191754, "grad_norm": 1.46875, "learning_rate": 1.5122169231894516e-05, "loss": 0.5234, "step": 5764 }, { "epoch": 0.998160372254085, "grad_norm": 1.328125, "learning_rate": 1.512060271648848e-05, "loss": 0.5585, "step": 5765 }, { "epoch": 0.9983335136889947, "grad_norm": 1.3984375, "learning_rate": 1.5119036030748039e-05, "loss": 0.655, "step": 5766 }, { "epoch": 0.9985066551239044, "grad_norm": 1.34375, "learning_rate": 1.511746917472531e-05, "loss": 0.6203, "step": 5767 }, { "epoch": 0.998679796558814, "grad_norm": 1.3671875, "learning_rate": 1.5115902148472418e-05, "loss": 0.554, "step": 5768 }, { "epoch": 0.9988529379937237, "grad_norm": 1.2890625, "learning_rate": 1.5114334952041492e-05, "loss": 0.5338, "step": 5769 }, { "epoch": 0.9990260794286333, "grad_norm": 1.5, "learning_rate": 1.5112767585484657e-05, "loss": 0.5582, "step": 5770 }, { "epoch": 0.999199220863543, "grad_norm": 1.3671875, "learning_rate": 1.5111200048854055e-05, "loss": 0.552, "step": 5771 }, { "epoch": 0.9993723622984525, "grad_norm": 1.34375, "learning_rate": 1.5109632342201828e-05, "loss": 0.5904, "step": 5772 }, { "epoch": 0.9995455037333622, "grad_norm": 1.2734375, "learning_rate": 1.5108064465580123e-05, "loss": 0.5524, "step": 5773 }, { "epoch": 0.9997186451682718, "grad_norm": 1.3828125, "learning_rate": 1.5106496419041101e-05, "loss": 0.5289, "step": 5774 }, { "epoch": 0.9998917866031815, "grad_norm": 1.3828125, "learning_rate": 1.5104928202636915e-05, "loss": 0.6361, "step": 5775 }, { "epoch": 1.000064928038091, "grad_norm": 1.328125, "learning_rate": 1.5103359816419736e-05, "loss": 0.6446, "step": 5776 }, { "epoch": 1.000064928038091, "eval_loss": 0.6593221426010132, "eval_runtime": 2676.4112, "eval_samples_per_second": 18.711, "eval_steps_per_second": 18.711, "step": 5776 }, { "epoch": 1.0002380694730009, "grad_norm": 1.3046875, "learning_rate": 1.5101791260441734e-05, "loss": 0.5723, "step": 5777 }, { "epoch": 1.0004112109079104, "grad_norm": 1.3984375, "learning_rate": 1.510022253475508e-05, "loss": 0.6236, "step": 5778 }, { "epoch": 1.00058435234282, "grad_norm": 1.28125, "learning_rate": 1.5098653639411971e-05, "loss": 0.5965, "step": 5779 }, { "epoch": 1.0007574937777297, "grad_norm": 1.46875, "learning_rate": 1.5097084574464585e-05, "loss": 0.6492, "step": 5780 }, { "epoch": 1.0009306352126393, "grad_norm": 1.40625, "learning_rate": 1.5095515339965117e-05, "loss": 0.6855, "step": 5781 }, { "epoch": 1.001103776647549, "grad_norm": 1.1953125, "learning_rate": 1.5093945935965768e-05, "loss": 0.4919, "step": 5782 }, { "epoch": 1.0012769180824586, "grad_norm": 1.328125, "learning_rate": 1.5092376362518745e-05, "loss": 0.5067, "step": 5783 }, { "epoch": 1.0014500595173683, "grad_norm": 1.3671875, "learning_rate": 1.5090806619676258e-05, "loss": 0.6047, "step": 5784 }, { "epoch": 1.0016232009522779, "grad_norm": 1.3515625, "learning_rate": 1.5089236707490525e-05, "loss": 0.5806, "step": 5785 }, { "epoch": 1.0017963423871876, "grad_norm": 1.4140625, "learning_rate": 1.5087666626013765e-05, "loss": 0.6536, "step": 5786 }, { "epoch": 1.0019694838220972, "grad_norm": 1.2578125, "learning_rate": 1.5086096375298208e-05, "loss": 0.5474, "step": 5787 }, { "epoch": 1.002142625257007, "grad_norm": 1.2265625, "learning_rate": 1.5084525955396089e-05, "loss": 0.5508, "step": 5788 }, { "epoch": 1.0023157666919165, "grad_norm": 1.34375, "learning_rate": 1.5082955366359649e-05, "loss": 0.5661, "step": 5789 }, { "epoch": 1.002488908126826, "grad_norm": 1.2109375, "learning_rate": 1.5081384608241126e-05, "loss": 0.5952, "step": 5790 }, { "epoch": 1.0026620495617358, "grad_norm": 1.3359375, "learning_rate": 1.5079813681092779e-05, "loss": 0.5277, "step": 5791 }, { "epoch": 1.0028351909966453, "grad_norm": 1.234375, "learning_rate": 1.5078242584966853e-05, "loss": 0.5171, "step": 5792 }, { "epoch": 1.003008332431555, "grad_norm": 1.3046875, "learning_rate": 1.5076671319915621e-05, "loss": 0.5629, "step": 5793 }, { "epoch": 1.0031814738664646, "grad_norm": 1.265625, "learning_rate": 1.5075099885991345e-05, "loss": 0.599, "step": 5794 }, { "epoch": 1.0033546153013744, "grad_norm": 1.265625, "learning_rate": 1.5073528283246299e-05, "loss": 0.584, "step": 5795 }, { "epoch": 1.003527756736284, "grad_norm": 1.25, "learning_rate": 1.5071956511732762e-05, "loss": 0.5277, "step": 5796 }, { "epoch": 1.0037008981711937, "grad_norm": 1.2578125, "learning_rate": 1.5070384571503015e-05, "loss": 0.573, "step": 5797 }, { "epoch": 1.0038740396061032, "grad_norm": 1.3359375, "learning_rate": 1.5068812462609355e-05, "loss": 0.6417, "step": 5798 }, { "epoch": 1.004047181041013, "grad_norm": 1.375, "learning_rate": 1.5067240185104068e-05, "loss": 0.6015, "step": 5799 }, { "epoch": 1.0042203224759225, "grad_norm": 1.4375, "learning_rate": 1.5065667739039462e-05, "loss": 0.5818, "step": 5800 }, { "epoch": 1.004393463910832, "grad_norm": 1.3671875, "learning_rate": 1.5064095124467841e-05, "loss": 0.6467, "step": 5801 }, { "epoch": 1.0045666053457418, "grad_norm": 1.375, "learning_rate": 1.5062522341441519e-05, "loss": 0.5906, "step": 5802 }, { "epoch": 1.0047397467806514, "grad_norm": 1.359375, "learning_rate": 1.5060949390012813e-05, "loss": 0.5803, "step": 5803 }, { "epoch": 1.0049128882155611, "grad_norm": 1.25, "learning_rate": 1.5059376270234047e-05, "loss": 0.5316, "step": 5804 }, { "epoch": 1.0050860296504707, "grad_norm": 1.328125, "learning_rate": 1.5057802982157546e-05, "loss": 0.6477, "step": 5805 }, { "epoch": 1.0052591710853804, "grad_norm": 1.3671875, "learning_rate": 1.505622952583565e-05, "loss": 0.6473, "step": 5806 }, { "epoch": 1.00543231252029, "grad_norm": 1.4140625, "learning_rate": 1.5054655901320697e-05, "loss": 0.5319, "step": 5807 }, { "epoch": 1.0056054539551997, "grad_norm": 1.2734375, "learning_rate": 1.5053082108665034e-05, "loss": 0.5528, "step": 5808 }, { "epoch": 1.0057785953901093, "grad_norm": 1.2578125, "learning_rate": 1.5051508147921011e-05, "loss": 0.5776, "step": 5809 }, { "epoch": 1.005951736825019, "grad_norm": 1.34375, "learning_rate": 1.5049934019140989e-05, "loss": 0.5683, "step": 5810 }, { "epoch": 1.0061248782599286, "grad_norm": 1.3046875, "learning_rate": 1.5048359722377322e-05, "loss": 0.5964, "step": 5811 }, { "epoch": 1.0062980196948381, "grad_norm": 1.2734375, "learning_rate": 1.5046785257682387e-05, "loss": 0.5252, "step": 5812 }, { "epoch": 1.0064711611297479, "grad_norm": 1.3125, "learning_rate": 1.5045210625108553e-05, "loss": 0.5807, "step": 5813 }, { "epoch": 1.0066443025646574, "grad_norm": 1.2578125, "learning_rate": 1.5043635824708198e-05, "loss": 0.5246, "step": 5814 }, { "epoch": 1.0068174439995672, "grad_norm": 1.453125, "learning_rate": 1.5042060856533716e-05, "loss": 0.5703, "step": 5815 }, { "epoch": 1.0069905854344767, "grad_norm": 1.2421875, "learning_rate": 1.5040485720637485e-05, "loss": 0.5899, "step": 5816 }, { "epoch": 1.0071637268693865, "grad_norm": 1.25, "learning_rate": 1.503891041707191e-05, "loss": 0.5101, "step": 5817 }, { "epoch": 1.007336868304296, "grad_norm": 1.2578125, "learning_rate": 1.5037334945889392e-05, "loss": 0.6475, "step": 5818 }, { "epoch": 1.0075100097392058, "grad_norm": 1.296875, "learning_rate": 1.5035759307142335e-05, "loss": 0.6248, "step": 5819 }, { "epoch": 1.0076831511741153, "grad_norm": 1.265625, "learning_rate": 1.5034183500883153e-05, "loss": 0.6114, "step": 5820 }, { "epoch": 1.007856292609025, "grad_norm": 1.46875, "learning_rate": 1.5032607527164263e-05, "loss": 0.5526, "step": 5821 }, { "epoch": 1.0080294340439346, "grad_norm": 1.2265625, "learning_rate": 1.5031031386038095e-05, "loss": 0.5768, "step": 5822 }, { "epoch": 1.0082025754788442, "grad_norm": 1.25, "learning_rate": 1.5029455077557073e-05, "loss": 0.5866, "step": 5823 }, { "epoch": 1.008375716913754, "grad_norm": 1.3203125, "learning_rate": 1.5027878601773633e-05, "loss": 0.6233, "step": 5824 }, { "epoch": 1.0085488583486635, "grad_norm": 1.265625, "learning_rate": 1.5026301958740215e-05, "loss": 0.5955, "step": 5825 }, { "epoch": 1.0087219997835732, "grad_norm": 1.4375, "learning_rate": 1.5024725148509269e-05, "loss": 0.6372, "step": 5826 }, { "epoch": 1.0088951412184828, "grad_norm": 1.3984375, "learning_rate": 1.5023148171133242e-05, "loss": 0.5799, "step": 5827 }, { "epoch": 1.0090682826533925, "grad_norm": 1.25, "learning_rate": 1.5021571026664595e-05, "loss": 0.5762, "step": 5828 }, { "epoch": 1.009241424088302, "grad_norm": 1.3046875, "learning_rate": 1.501999371515579e-05, "loss": 0.5222, "step": 5829 }, { "epoch": 1.0094145655232118, "grad_norm": 1.453125, "learning_rate": 1.5018416236659295e-05, "loss": 0.711, "step": 5830 }, { "epoch": 1.0095877069581214, "grad_norm": 1.3125, "learning_rate": 1.5016838591227583e-05, "loss": 0.5826, "step": 5831 }, { "epoch": 1.0097608483930312, "grad_norm": 1.421875, "learning_rate": 1.5015260778913134e-05, "loss": 0.6753, "step": 5832 }, { "epoch": 1.0099339898279407, "grad_norm": 1.3984375, "learning_rate": 1.5013682799768435e-05, "loss": 0.6735, "step": 5833 }, { "epoch": 1.0101071312628502, "grad_norm": 1.3125, "learning_rate": 1.5012104653845978e-05, "loss": 0.5468, "step": 5834 }, { "epoch": 1.01028027269776, "grad_norm": 1.21875, "learning_rate": 1.5010526341198257e-05, "loss": 0.5242, "step": 5835 }, { "epoch": 1.0104534141326695, "grad_norm": 1.421875, "learning_rate": 1.5008947861877774e-05, "loss": 0.6067, "step": 5836 }, { "epoch": 1.0106265555675793, "grad_norm": 1.421875, "learning_rate": 1.5007369215937033e-05, "loss": 0.5976, "step": 5837 }, { "epoch": 1.0107996970024888, "grad_norm": 1.3125, "learning_rate": 1.500579040342855e-05, "loss": 0.5533, "step": 5838 }, { "epoch": 1.0109728384373986, "grad_norm": 1.1953125, "learning_rate": 1.5004211424404846e-05, "loss": 0.5203, "step": 5839 }, { "epoch": 1.0111459798723081, "grad_norm": 1.3359375, "learning_rate": 1.5002632278918441e-05, "loss": 0.5726, "step": 5840 }, { "epoch": 1.011319121307218, "grad_norm": 1.34375, "learning_rate": 1.5001052967021869e-05, "loss": 0.5787, "step": 5841 }, { "epoch": 1.0114922627421274, "grad_norm": 1.3203125, "learning_rate": 1.4999473488767659e-05, "loss": 0.606, "step": 5842 }, { "epoch": 1.0116654041770372, "grad_norm": 1.171875, "learning_rate": 1.4997893844208358e-05, "loss": 0.5147, "step": 5843 }, { "epoch": 1.0118385456119467, "grad_norm": 1.328125, "learning_rate": 1.4996314033396506e-05, "loss": 0.5456, "step": 5844 }, { "epoch": 1.0120116870468565, "grad_norm": 1.359375, "learning_rate": 1.4994734056384656e-05, "loss": 0.5897, "step": 5845 }, { "epoch": 1.012184828481766, "grad_norm": 1.3046875, "learning_rate": 1.4993153913225374e-05, "loss": 0.6343, "step": 5846 }, { "epoch": 1.0123579699166756, "grad_norm": 1.3359375, "learning_rate": 1.4991573603971209e-05, "loss": 0.6561, "step": 5847 }, { "epoch": 1.0125311113515854, "grad_norm": 1.4453125, "learning_rate": 1.4989993128674736e-05, "loss": 0.6236, "step": 5848 }, { "epoch": 1.012704252786495, "grad_norm": 1.2578125, "learning_rate": 1.4988412487388528e-05, "loss": 0.6316, "step": 5849 }, { "epoch": 1.0128773942214047, "grad_norm": 1.375, "learning_rate": 1.4986831680165168e-05, "loss": 0.6175, "step": 5850 }, { "epoch": 1.0130505356563142, "grad_norm": 1.2734375, "learning_rate": 1.4985250707057234e-05, "loss": 0.5473, "step": 5851 }, { "epoch": 1.013223677091224, "grad_norm": 1.2890625, "learning_rate": 1.4983669568117322e-05, "loss": 0.6338, "step": 5852 }, { "epoch": 1.0133968185261335, "grad_norm": 1.328125, "learning_rate": 1.4982088263398026e-05, "loss": 0.582, "step": 5853 }, { "epoch": 1.0135699599610433, "grad_norm": 1.3515625, "learning_rate": 1.4980506792951943e-05, "loss": 0.5982, "step": 5854 }, { "epoch": 1.0137431013959528, "grad_norm": 1.2578125, "learning_rate": 1.4978925156831689e-05, "loss": 0.5625, "step": 5855 }, { "epoch": 1.0139162428308626, "grad_norm": 1.3203125, "learning_rate": 1.4977343355089868e-05, "loss": 0.6219, "step": 5856 }, { "epoch": 1.014089384265772, "grad_norm": 1.359375, "learning_rate": 1.4975761387779101e-05, "loss": 0.5053, "step": 5857 }, { "epoch": 1.0142625257006816, "grad_norm": 1.328125, "learning_rate": 1.4974179254952015e-05, "loss": 0.6161, "step": 5858 }, { "epoch": 1.0144356671355914, "grad_norm": 1.2421875, "learning_rate": 1.4972596956661229e-05, "loss": 0.5918, "step": 5859 }, { "epoch": 1.014608808570501, "grad_norm": 1.4453125, "learning_rate": 1.497101449295939e-05, "loss": 0.6602, "step": 5860 }, { "epoch": 1.0147819500054107, "grad_norm": 1.2734375, "learning_rate": 1.4969431863899129e-05, "loss": 0.5723, "step": 5861 }, { "epoch": 1.0149550914403203, "grad_norm": 1.1796875, "learning_rate": 1.4967849069533094e-05, "loss": 0.53, "step": 5862 }, { "epoch": 1.01512823287523, "grad_norm": 1.265625, "learning_rate": 1.4966266109913936e-05, "loss": 0.6078, "step": 5863 }, { "epoch": 1.0153013743101396, "grad_norm": 1.234375, "learning_rate": 1.496468298509431e-05, "loss": 0.5988, "step": 5864 }, { "epoch": 1.0154745157450493, "grad_norm": 1.25, "learning_rate": 1.496309969512688e-05, "loss": 0.5402, "step": 5865 }, { "epoch": 1.0156476571799589, "grad_norm": 1.265625, "learning_rate": 1.4961516240064314e-05, "loss": 0.547, "step": 5866 }, { "epoch": 1.0158207986148686, "grad_norm": 1.25, "learning_rate": 1.495993261995928e-05, "loss": 0.5697, "step": 5867 }, { "epoch": 1.0159939400497782, "grad_norm": 1.328125, "learning_rate": 1.4958348834864462e-05, "loss": 0.6331, "step": 5868 }, { "epoch": 1.0161670814846877, "grad_norm": 1.3203125, "learning_rate": 1.4956764884832542e-05, "loss": 0.6376, "step": 5869 }, { "epoch": 1.0163402229195975, "grad_norm": 1.4453125, "learning_rate": 1.4955180769916209e-05, "loss": 0.609, "step": 5870 }, { "epoch": 1.016513364354507, "grad_norm": 1.2578125, "learning_rate": 1.4953596490168156e-05, "loss": 0.5739, "step": 5871 }, { "epoch": 1.0166865057894168, "grad_norm": 1.25, "learning_rate": 1.495201204564109e-05, "loss": 0.5971, "step": 5872 }, { "epoch": 1.0001298560761822, "grad_norm": 1.3046875, "learning_rate": 1.4950427436387709e-05, "loss": 0.5061, "step": 5873 }, { "epoch": 1.000302997511092, "grad_norm": 1.3828125, "learning_rate": 1.4948842662460727e-05, "loss": 0.5424, "step": 5874 }, { "epoch": 1.0004761389460015, "grad_norm": 1.2265625, "learning_rate": 1.494725772391286e-05, "loss": 0.4472, "step": 5875 }, { "epoch": 1.000649280380911, "grad_norm": 1.453125, "learning_rate": 1.4945672620796832e-05, "loss": 0.4498, "step": 5876 }, { "epoch": 1.0008224218158208, "grad_norm": 1.4921875, "learning_rate": 1.494408735316537e-05, "loss": 0.4837, "step": 5877 }, { "epoch": 1.0009955632507304, "grad_norm": 1.3671875, "learning_rate": 1.4942501921071207e-05, "loss": 0.4321, "step": 5878 }, { "epoch": 1.0011687046856401, "grad_norm": 1.4609375, "learning_rate": 1.494091632456708e-05, "loss": 0.4262, "step": 5879 }, { "epoch": 1.0013418461205497, "grad_norm": 1.421875, "learning_rate": 1.4939330563705739e-05, "loss": 0.4579, "step": 5880 }, { "epoch": 1.0015149875554594, "grad_norm": 1.4375, "learning_rate": 1.4937744638539926e-05, "loss": 0.5102, "step": 5881 }, { "epoch": 1.001688128990369, "grad_norm": 1.359375, "learning_rate": 1.49361585491224e-05, "loss": 0.488, "step": 5882 }, { "epoch": 1.0018612704252787, "grad_norm": 1.3515625, "learning_rate": 1.493457229550592e-05, "loss": 0.4827, "step": 5883 }, { "epoch": 1.0020344118601883, "grad_norm": 1.34375, "learning_rate": 1.4932985877743257e-05, "loss": 0.4543, "step": 5884 }, { "epoch": 1.002207553295098, "grad_norm": 1.453125, "learning_rate": 1.4931399295887172e-05, "loss": 0.4312, "step": 5885 }, { "epoch": 1.0023806947300076, "grad_norm": 1.4921875, "learning_rate": 1.4929812549990453e-05, "loss": 0.4935, "step": 5886 }, { "epoch": 1.0025538361649171, "grad_norm": 1.4609375, "learning_rate": 1.4928225640105875e-05, "loss": 0.4379, "step": 5887 }, { "epoch": 1.0027269775998269, "grad_norm": 1.3125, "learning_rate": 1.4926638566286231e-05, "loss": 0.4512, "step": 5888 }, { "epoch": 1.0029001190347364, "grad_norm": 1.328125, "learning_rate": 1.4925051328584307e-05, "loss": 0.4948, "step": 5889 }, { "epoch": 1.0030732604696462, "grad_norm": 1.3984375, "learning_rate": 1.492346392705291e-05, "loss": 0.4576, "step": 5890 }, { "epoch": 1.0032464019045557, "grad_norm": 1.3125, "learning_rate": 1.4921876361744842e-05, "loss": 0.4683, "step": 5891 }, { "epoch": 1.0034195433394655, "grad_norm": 1.453125, "learning_rate": 1.4920288632712906e-05, "loss": 0.5368, "step": 5892 }, { "epoch": 1.003592684774375, "grad_norm": 1.3671875, "learning_rate": 1.4918700740009926e-05, "loss": 0.4636, "step": 5893 }, { "epoch": 1.0037658262092848, "grad_norm": 1.5625, "learning_rate": 1.4917112683688718e-05, "loss": 0.4782, "step": 5894 }, { "epoch": 1.0039389676441943, "grad_norm": 1.4453125, "learning_rate": 1.4915524463802106e-05, "loss": 0.5768, "step": 5895 }, { "epoch": 1.004112109079104, "grad_norm": 1.3671875, "learning_rate": 1.4913936080402929e-05, "loss": 0.4863, "step": 5896 }, { "epoch": 1.0042852505140136, "grad_norm": 1.3515625, "learning_rate": 1.4912347533544012e-05, "loss": 0.6206, "step": 5897 }, { "epoch": 1.0044583919489232, "grad_norm": 1.40625, "learning_rate": 1.4910758823278208e-05, "loss": 0.5375, "step": 5898 }, { "epoch": 1.004631533383833, "grad_norm": 1.453125, "learning_rate": 1.490916994965836e-05, "loss": 0.4932, "step": 5899 }, { "epoch": 1.0048046748187425, "grad_norm": 1.390625, "learning_rate": 1.4907580912737323e-05, "loss": 0.498, "step": 5900 }, { "epoch": 1.0049778162536522, "grad_norm": 1.34375, "learning_rate": 1.4905991712567954e-05, "loss": 0.4934, "step": 5901 }, { "epoch": 1.0051509576885618, "grad_norm": 1.3125, "learning_rate": 1.4904402349203115e-05, "loss": 0.5349, "step": 5902 }, { "epoch": 1.0053240991234715, "grad_norm": 1.5390625, "learning_rate": 1.4902812822695678e-05, "loss": 0.4436, "step": 5903 }, { "epoch": 1.005497240558381, "grad_norm": 1.3515625, "learning_rate": 1.4901223133098519e-05, "loss": 0.5537, "step": 5904 }, { "epoch": 1.0056703819932908, "grad_norm": 1.4765625, "learning_rate": 1.4899633280464519e-05, "loss": 0.4902, "step": 5905 }, { "epoch": 1.0058435234282004, "grad_norm": 1.40625, "learning_rate": 1.4898043264846558e-05, "loss": 0.4675, "step": 5906 }, { "epoch": 1.0060166648631101, "grad_norm": 1.3515625, "learning_rate": 1.4896453086297531e-05, "loss": 0.4375, "step": 5907 }, { "epoch": 1.0061898062980197, "grad_norm": 1.4140625, "learning_rate": 1.4894862744870335e-05, "loss": 0.4594, "step": 5908 }, { "epoch": 1.0063629477329294, "grad_norm": 1.421875, "learning_rate": 1.489327224061787e-05, "loss": 0.4737, "step": 5909 }, { "epoch": 1.006536089167839, "grad_norm": 1.3671875, "learning_rate": 1.4891681573593046e-05, "loss": 0.4663, "step": 5910 }, { "epoch": 1.0067092306027485, "grad_norm": 1.4453125, "learning_rate": 1.4890090743848774e-05, "loss": 0.5532, "step": 5911 }, { "epoch": 1.0068823720376583, "grad_norm": 1.5703125, "learning_rate": 1.4888499751437973e-05, "loss": 0.4515, "step": 5912 }, { "epoch": 1.0070555134725678, "grad_norm": 1.3828125, "learning_rate": 1.4886908596413562e-05, "loss": 0.4726, "step": 5913 }, { "epoch": 1.0072286549074776, "grad_norm": 1.359375, "learning_rate": 1.488531727882848e-05, "loss": 0.5415, "step": 5914 }, { "epoch": 1.0074017963423871, "grad_norm": 1.359375, "learning_rate": 1.4883725798735652e-05, "loss": 0.4828, "step": 5915 }, { "epoch": 1.007574937777297, "grad_norm": 1.359375, "learning_rate": 1.488213415618802e-05, "loss": 0.3872, "step": 5916 }, { "epoch": 1.0077480792122064, "grad_norm": 1.359375, "learning_rate": 1.4880542351238533e-05, "loss": 0.471, "step": 5917 }, { "epoch": 1.0079212206471162, "grad_norm": 1.2890625, "learning_rate": 1.4878950383940139e-05, "loss": 0.4653, "step": 5918 }, { "epoch": 1.0080943620820257, "grad_norm": 1.421875, "learning_rate": 1.4877358254345792e-05, "loss": 0.4572, "step": 5919 }, { "epoch": 1.0082675035169355, "grad_norm": 1.3828125, "learning_rate": 1.4875765962508458e-05, "loss": 0.5176, "step": 5920 }, { "epoch": 1.008440644951845, "grad_norm": 1.4140625, "learning_rate": 1.48741735084811e-05, "loss": 0.4532, "step": 5921 }, { "epoch": 1.0086137863867546, "grad_norm": 1.421875, "learning_rate": 1.4872580892316693e-05, "loss": 0.4866, "step": 5922 }, { "epoch": 1.0087869278216643, "grad_norm": 1.484375, "learning_rate": 1.4870988114068212e-05, "loss": 0.4314, "step": 5923 }, { "epoch": 1.0089600692565739, "grad_norm": 1.3984375, "learning_rate": 1.4869395173788642e-05, "loss": 0.5305, "step": 5924 }, { "epoch": 1.0091332106914837, "grad_norm": 1.421875, "learning_rate": 1.486780207153097e-05, "loss": 0.4601, "step": 5925 }, { "epoch": 1.0093063521263932, "grad_norm": 1.53125, "learning_rate": 1.4866208807348192e-05, "loss": 0.4902, "step": 5926 }, { "epoch": 1.009479493561303, "grad_norm": 1.3359375, "learning_rate": 1.4864615381293304e-05, "loss": 0.5173, "step": 5927 }, { "epoch": 1.0096526349962125, "grad_norm": 1.421875, "learning_rate": 1.4863021793419311e-05, "loss": 0.5366, "step": 5928 }, { "epoch": 1.0098257764311223, "grad_norm": 1.5390625, "learning_rate": 1.4861428043779226e-05, "loss": 0.4889, "step": 5929 }, { "epoch": 1.0099989178660318, "grad_norm": 1.375, "learning_rate": 1.485983413242606e-05, "loss": 0.5332, "step": 5930 }, { "epoch": 1.0101720593009416, "grad_norm": 1.34375, "learning_rate": 1.4858240059412841e-05, "loss": 0.5036, "step": 5931 }, { "epoch": 1.010345200735851, "grad_norm": 1.453125, "learning_rate": 1.4856645824792585e-05, "loss": 0.5085, "step": 5932 }, { "epoch": 1.0105183421707606, "grad_norm": 1.3515625, "learning_rate": 1.4855051428618333e-05, "loss": 0.47, "step": 5933 }, { "epoch": 1.0106914836056704, "grad_norm": 1.453125, "learning_rate": 1.4853456870943118e-05, "loss": 0.4544, "step": 5934 }, { "epoch": 1.01086462504058, "grad_norm": 1.3203125, "learning_rate": 1.4851862151819975e-05, "loss": 0.4902, "step": 5935 }, { "epoch": 1.0110377664754897, "grad_norm": 1.4296875, "learning_rate": 1.4850267271301965e-05, "loss": 0.4776, "step": 5936 }, { "epoch": 1.0112109079103992, "grad_norm": 1.4453125, "learning_rate": 1.4848672229442132e-05, "loss": 0.4463, "step": 5937 }, { "epoch": 1.011384049345309, "grad_norm": 1.40625, "learning_rate": 1.4847077026293537e-05, "loss": 0.4316, "step": 5938 }, { "epoch": 1.0115571907802186, "grad_norm": 1.4140625, "learning_rate": 1.484548166190924e-05, "loss": 0.5563, "step": 5939 }, { "epoch": 1.0117303322151283, "grad_norm": 1.3046875, "learning_rate": 1.4843886136342316e-05, "loss": 0.4588, "step": 5940 }, { "epoch": 1.0119034736500379, "grad_norm": 1.4140625, "learning_rate": 1.4842290449645835e-05, "loss": 0.4778, "step": 5941 }, { "epoch": 1.0120766150849476, "grad_norm": 1.4609375, "learning_rate": 1.484069460187288e-05, "loss": 0.4545, "step": 5942 }, { "epoch": 1.0122497565198572, "grad_norm": 1.546875, "learning_rate": 1.4839098593076535e-05, "loss": 0.5021, "step": 5943 }, { "epoch": 1.0124228979547667, "grad_norm": 1.4453125, "learning_rate": 1.483750242330989e-05, "loss": 0.4412, "step": 5944 }, { "epoch": 1.0125960393896765, "grad_norm": 1.3125, "learning_rate": 1.4835906092626037e-05, "loss": 0.4654, "step": 5945 }, { "epoch": 1.012769180824586, "grad_norm": 1.3671875, "learning_rate": 1.4834309601078084e-05, "loss": 0.4754, "step": 5946 }, { "epoch": 1.0129423222594958, "grad_norm": 1.3203125, "learning_rate": 1.4832712948719135e-05, "loss": 0.4877, "step": 5947 }, { "epoch": 1.0131154636944053, "grad_norm": 1.4296875, "learning_rate": 1.4831116135602299e-05, "loss": 0.4468, "step": 5948 }, { "epoch": 1.013288605129315, "grad_norm": 1.4296875, "learning_rate": 1.4829519161780697e-05, "loss": 0.5044, "step": 5949 }, { "epoch": 1.0134617465642246, "grad_norm": 1.4140625, "learning_rate": 1.482792202730745e-05, "loss": 0.4451, "step": 5950 }, { "epoch": 1.0136348879991344, "grad_norm": 1.3671875, "learning_rate": 1.4826324732235686e-05, "loss": 0.4155, "step": 5951 }, { "epoch": 1.013808029434044, "grad_norm": 1.3203125, "learning_rate": 1.482472727661854e-05, "loss": 0.4557, "step": 5952 }, { "epoch": 1.0139811708689537, "grad_norm": 1.359375, "learning_rate": 1.4823129660509146e-05, "loss": 0.4821, "step": 5953 }, { "epoch": 1.0141543123038632, "grad_norm": 1.3984375, "learning_rate": 1.4821531883960651e-05, "loss": 0.506, "step": 5954 }, { "epoch": 1.0143274537387728, "grad_norm": 1.3046875, "learning_rate": 1.4819933947026205e-05, "loss": 0.4993, "step": 5955 }, { "epoch": 1.0145005951736825, "grad_norm": 1.3984375, "learning_rate": 1.4818335849758963e-05, "loss": 0.5009, "step": 5956 }, { "epoch": 1.014673736608592, "grad_norm": 1.2734375, "learning_rate": 1.4816737592212083e-05, "loss": 0.5006, "step": 5957 }, { "epoch": 1.0148468780435018, "grad_norm": 1.390625, "learning_rate": 1.4815139174438729e-05, "loss": 0.4052, "step": 5958 }, { "epoch": 1.0150200194784114, "grad_norm": 1.609375, "learning_rate": 1.4813540596492074e-05, "loss": 0.5502, "step": 5959 }, { "epoch": 1.0151931609133211, "grad_norm": 1.2734375, "learning_rate": 1.4811941858425297e-05, "loss": 0.4528, "step": 5960 }, { "epoch": 1.0153663023482307, "grad_norm": 1.3984375, "learning_rate": 1.4810342960291572e-05, "loss": 0.4547, "step": 5961 }, { "epoch": 1.0155394437831404, "grad_norm": 1.40625, "learning_rate": 1.480874390214409e-05, "loss": 0.493, "step": 5962 }, { "epoch": 1.01571258521805, "grad_norm": 1.3046875, "learning_rate": 1.4807144684036044e-05, "loss": 0.4918, "step": 5963 }, { "epoch": 1.0158857266529597, "grad_norm": 1.3203125, "learning_rate": 1.4805545306020628e-05, "loss": 0.4508, "step": 5964 }, { "epoch": 1.0160588680878693, "grad_norm": 1.453125, "learning_rate": 1.4803945768151044e-05, "loss": 0.4803, "step": 5965 }, { "epoch": 1.0162320095227788, "grad_norm": 1.375, "learning_rate": 1.4802346070480505e-05, "loss": 0.5128, "step": 5966 }, { "epoch": 1.0164051509576886, "grad_norm": 1.3671875, "learning_rate": 1.4800746213062218e-05, "loss": 0.4397, "step": 5967 }, { "epoch": 1.0165782923925981, "grad_norm": 1.40625, "learning_rate": 1.4799146195949408e-05, "loss": 0.4483, "step": 5968 }, { "epoch": 1.0167514338275079, "grad_norm": 1.5234375, "learning_rate": 1.4797546019195292e-05, "loss": 0.5302, "step": 5969 }, { "epoch": 1.0169245752624174, "grad_norm": 1.40625, "learning_rate": 1.4795945682853105e-05, "loss": 0.4712, "step": 5970 }, { "epoch": 1.0170977166973272, "grad_norm": 1.359375, "learning_rate": 1.4794345186976079e-05, "loss": 0.5004, "step": 5971 }, { "epoch": 1.0172708581322367, "grad_norm": 1.4375, "learning_rate": 1.4792744531617453e-05, "loss": 0.4611, "step": 5972 }, { "epoch": 1.0174439995671465, "grad_norm": 1.359375, "learning_rate": 1.4791143716830469e-05, "loss": 0.4157, "step": 5973 }, { "epoch": 1.017617141002056, "grad_norm": 1.3046875, "learning_rate": 1.4789542742668386e-05, "loss": 0.4605, "step": 5974 }, { "epoch": 1.0177902824369658, "grad_norm": 1.3203125, "learning_rate": 1.478794160918445e-05, "loss": 0.5121, "step": 5975 }, { "epoch": 1.0179634238718753, "grad_norm": 1.3359375, "learning_rate": 1.4786340316431931e-05, "loss": 0.5301, "step": 5976 }, { "epoch": 1.018136565306785, "grad_norm": 1.234375, "learning_rate": 1.478473886446409e-05, "loss": 0.4693, "step": 5977 }, { "epoch": 1.0183097067416946, "grad_norm": 1.671875, "learning_rate": 1.4783137253334197e-05, "loss": 0.5231, "step": 5978 }, { "epoch": 1.0184828481766042, "grad_norm": 1.34375, "learning_rate": 1.4781535483095533e-05, "loss": 0.4855, "step": 5979 }, { "epoch": 1.018655989611514, "grad_norm": 1.2578125, "learning_rate": 1.4779933553801375e-05, "loss": 0.5126, "step": 5980 }, { "epoch": 1.0188291310464235, "grad_norm": 1.359375, "learning_rate": 1.4778331465505019e-05, "loss": 0.4901, "step": 5981 }, { "epoch": 1.0190022724813332, "grad_norm": 1.5625, "learning_rate": 1.477672921825975e-05, "loss": 0.6101, "step": 5982 }, { "epoch": 1.0191754139162428, "grad_norm": 1.40625, "learning_rate": 1.4775126812118865e-05, "loss": 0.5121, "step": 5983 }, { "epoch": 1.0193485553511525, "grad_norm": 1.40625, "learning_rate": 1.4773524247135673e-05, "loss": 0.479, "step": 5984 }, { "epoch": 1.019521696786062, "grad_norm": 1.2578125, "learning_rate": 1.4771921523363482e-05, "loss": 0.4542, "step": 5985 }, { "epoch": 1.0196948382209718, "grad_norm": 1.5, "learning_rate": 1.4770318640855603e-05, "loss": 0.4709, "step": 5986 }, { "epoch": 1.0198679796558814, "grad_norm": 1.3984375, "learning_rate": 1.4768715599665355e-05, "loss": 0.4262, "step": 5987 }, { "epoch": 1.0200411210907911, "grad_norm": 1.328125, "learning_rate": 1.4767112399846066e-05, "loss": 0.4598, "step": 5988 }, { "epoch": 1.0202142625257007, "grad_norm": 1.234375, "learning_rate": 1.476550904145106e-05, "loss": 0.371, "step": 5989 }, { "epoch": 1.0203874039606102, "grad_norm": 1.40625, "learning_rate": 1.4763905524533679e-05, "loss": 0.5413, "step": 5990 }, { "epoch": 1.02056054539552, "grad_norm": 1.5703125, "learning_rate": 1.4762301849147255e-05, "loss": 0.5506, "step": 5991 }, { "epoch": 1.0207336868304295, "grad_norm": 1.2578125, "learning_rate": 1.476069801534514e-05, "loss": 0.4342, "step": 5992 }, { "epoch": 1.0209068282653393, "grad_norm": 1.359375, "learning_rate": 1.4759094023180684e-05, "loss": 0.4859, "step": 5993 }, { "epoch": 1.0210799697002488, "grad_norm": 1.359375, "learning_rate": 1.475748987270724e-05, "loss": 0.4208, "step": 5994 }, { "epoch": 1.0212531111351586, "grad_norm": 1.421875, "learning_rate": 1.475588556397817e-05, "loss": 0.5409, "step": 5995 }, { "epoch": 1.0214262525700681, "grad_norm": 1.453125, "learning_rate": 1.4754281097046843e-05, "loss": 0.4624, "step": 5996 }, { "epoch": 1.021599394004978, "grad_norm": 1.390625, "learning_rate": 1.4752676471966629e-05, "loss": 0.5097, "step": 5997 }, { "epoch": 1.0217725354398874, "grad_norm": 1.4296875, "learning_rate": 1.4751071688790908e-05, "loss": 0.4432, "step": 5998 }, { "epoch": 1.0219456768747972, "grad_norm": 1.34375, "learning_rate": 1.4749466747573056e-05, "loss": 0.4481, "step": 5999 }, { "epoch": 1.0221188183097067, "grad_norm": 1.3828125, "learning_rate": 1.4747861648366467e-05, "loss": 0.447, "step": 6000 }, { "epoch": 1.0222919597446163, "grad_norm": 1.3125, "learning_rate": 1.4746256391224528e-05, "loss": 0.4426, "step": 6001 }, { "epoch": 1.022465101179526, "grad_norm": 1.265625, "learning_rate": 1.4744650976200643e-05, "loss": 0.4003, "step": 6002 }, { "epoch": 1.0226382426144356, "grad_norm": 1.4140625, "learning_rate": 1.4743045403348213e-05, "loss": 0.5257, "step": 6003 }, { "epoch": 1.0228113840493454, "grad_norm": 1.3671875, "learning_rate": 1.4741439672720645e-05, "loss": 0.5255, "step": 6004 }, { "epoch": 1.022984525484255, "grad_norm": 1.578125, "learning_rate": 1.4739833784371353e-05, "loss": 0.4928, "step": 6005 }, { "epoch": 1.0231576669191647, "grad_norm": 1.4140625, "learning_rate": 1.473822773835376e-05, "loss": 0.4903, "step": 6006 }, { "epoch": 1.0233308083540742, "grad_norm": 1.40625, "learning_rate": 1.4736621534721288e-05, "loss": 0.4391, "step": 6007 }, { "epoch": 1.023503949788984, "grad_norm": 1.3671875, "learning_rate": 1.4735015173527362e-05, "loss": 0.4455, "step": 6008 }, { "epoch": 1.0236770912238935, "grad_norm": 1.4375, "learning_rate": 1.4733408654825426e-05, "loss": 0.4447, "step": 6009 }, { "epoch": 1.0238502326588033, "grad_norm": 1.5, "learning_rate": 1.4731801978668914e-05, "loss": 0.4471, "step": 6010 }, { "epoch": 1.0240233740937128, "grad_norm": 1.2734375, "learning_rate": 1.4730195145111268e-05, "loss": 0.4305, "step": 6011 }, { "epoch": 1.0241965155286223, "grad_norm": 1.375, "learning_rate": 1.4728588154205947e-05, "loss": 0.4517, "step": 6012 }, { "epoch": 1.024369656963532, "grad_norm": 1.4453125, "learning_rate": 1.4726981006006402e-05, "loss": 0.5004, "step": 6013 }, { "epoch": 1.0245427983984416, "grad_norm": 1.3203125, "learning_rate": 1.47253737005661e-05, "loss": 0.4674, "step": 6014 }, { "epoch": 1.0247159398333514, "grad_norm": 1.3359375, "learning_rate": 1.4723766237938495e-05, "loss": 0.501, "step": 6015 }, { "epoch": 1.024889081268261, "grad_norm": 1.390625, "learning_rate": 1.4722158618177067e-05, "loss": 0.4888, "step": 6016 }, { "epoch": 1.0250622227031707, "grad_norm": 1.46875, "learning_rate": 1.4720550841335293e-05, "loss": 0.522, "step": 6017 }, { "epoch": 1.0252353641380803, "grad_norm": 1.4453125, "learning_rate": 1.4718942907466651e-05, "loss": 0.4944, "step": 6018 }, { "epoch": 1.02540850557299, "grad_norm": 1.7265625, "learning_rate": 1.4717334816624634e-05, "loss": 0.4431, "step": 6019 }, { "epoch": 1.0255816470078996, "grad_norm": 1.390625, "learning_rate": 1.4715726568862728e-05, "loss": 0.5469, "step": 6020 }, { "epoch": 1.0257547884428093, "grad_norm": 1.40625, "learning_rate": 1.4714118164234433e-05, "loss": 0.4745, "step": 6021 }, { "epoch": 1.0259279298777189, "grad_norm": 1.3671875, "learning_rate": 1.4712509602793253e-05, "loss": 0.4548, "step": 6022 }, { "epoch": 1.0261010713126284, "grad_norm": 1.4140625, "learning_rate": 1.4710900884592694e-05, "loss": 0.4765, "step": 6023 }, { "epoch": 1.0262742127475382, "grad_norm": 1.3828125, "learning_rate": 1.4709292009686272e-05, "loss": 0.5229, "step": 6024 }, { "epoch": 1.0264473541824477, "grad_norm": 1.296875, "learning_rate": 1.4707682978127502e-05, "loss": 0.527, "step": 6025 }, { "epoch": 1.0266204956173575, "grad_norm": 1.4453125, "learning_rate": 1.4706073789969912e-05, "loss": 0.4816, "step": 6026 }, { "epoch": 1.026793637052267, "grad_norm": 1.3984375, "learning_rate": 1.4704464445267026e-05, "loss": 0.4597, "step": 6027 }, { "epoch": 1.0269667784871768, "grad_norm": 1.3359375, "learning_rate": 1.4702854944072383e-05, "loss": 0.4347, "step": 6028 }, { "epoch": 1.0271399199220863, "grad_norm": 1.5546875, "learning_rate": 1.4701245286439516e-05, "loss": 0.4877, "step": 6029 }, { "epoch": 1.027313061356996, "grad_norm": 1.7265625, "learning_rate": 1.4699635472421975e-05, "loss": 0.4987, "step": 6030 }, { "epoch": 1.0274862027919056, "grad_norm": 1.4296875, "learning_rate": 1.469802550207331e-05, "loss": 0.4657, "step": 6031 }, { "epoch": 1.0276593442268154, "grad_norm": 1.3828125, "learning_rate": 1.469641537544707e-05, "loss": 0.4498, "step": 6032 }, { "epoch": 1.027832485661725, "grad_norm": 1.4296875, "learning_rate": 1.4694805092596823e-05, "loss": 0.4769, "step": 6033 }, { "epoch": 1.0280056270966345, "grad_norm": 1.359375, "learning_rate": 1.4693194653576128e-05, "loss": 0.4627, "step": 6034 }, { "epoch": 1.0281787685315442, "grad_norm": 1.53125, "learning_rate": 1.4691584058438558e-05, "loss": 0.4841, "step": 6035 }, { "epoch": 1.0283519099664538, "grad_norm": 1.5390625, "learning_rate": 1.4689973307237687e-05, "loss": 0.4965, "step": 6036 }, { "epoch": 1.0285250514013635, "grad_norm": 1.4921875, "learning_rate": 1.46883624000271e-05, "loss": 0.5468, "step": 6037 }, { "epoch": 1.028698192836273, "grad_norm": 1.2578125, "learning_rate": 1.4686751336860379e-05, "loss": 0.4161, "step": 6038 }, { "epoch": 1.0288713342711828, "grad_norm": 1.359375, "learning_rate": 1.4685140117791114e-05, "loss": 0.4379, "step": 6039 }, { "epoch": 1.0290444757060924, "grad_norm": 1.4296875, "learning_rate": 1.468352874287291e-05, "loss": 0.5051, "step": 6040 }, { "epoch": 1.0292176171410021, "grad_norm": 1.359375, "learning_rate": 1.4681917212159358e-05, "loss": 0.4567, "step": 6041 }, { "epoch": 1.0293907585759117, "grad_norm": 1.75, "learning_rate": 1.4680305525704071e-05, "loss": 0.4445, "step": 6042 }, { "epoch": 1.0295639000108214, "grad_norm": 1.421875, "learning_rate": 1.4678693683560657e-05, "loss": 0.4989, "step": 6043 }, { "epoch": 1.029737041445731, "grad_norm": 1.40625, "learning_rate": 1.4677081685782736e-05, "loss": 0.4128, "step": 6044 }, { "epoch": 1.0299101828806405, "grad_norm": 1.4140625, "learning_rate": 1.4675469532423932e-05, "loss": 0.5038, "step": 6045 }, { "epoch": 1.0300833243155503, "grad_norm": 1.453125, "learning_rate": 1.4673857223537869e-05, "loss": 0.4724, "step": 6046 }, { "epoch": 1.0302564657504598, "grad_norm": 1.2109375, "learning_rate": 1.467224475917818e-05, "loss": 0.4285, "step": 6047 }, { "epoch": 1.0304296071853696, "grad_norm": 1.265625, "learning_rate": 1.4670632139398508e-05, "loss": 0.4623, "step": 6048 }, { "epoch": 1.0306027486202791, "grad_norm": 1.296875, "learning_rate": 1.4669019364252485e-05, "loss": 0.4406, "step": 6049 }, { "epoch": 1.0307758900551889, "grad_norm": 1.375, "learning_rate": 1.4667406433793774e-05, "loss": 0.4611, "step": 6050 }, { "epoch": 1.0309490314900984, "grad_norm": 1.3125, "learning_rate": 1.4665793348076016e-05, "loss": 0.4116, "step": 6051 }, { "epoch": 1.0311221729250082, "grad_norm": 1.453125, "learning_rate": 1.4664180107152876e-05, "loss": 0.4694, "step": 6052 }, { "epoch": 1.0312953143599177, "grad_norm": 1.3984375, "learning_rate": 1.4662566711078014e-05, "loss": 0.4902, "step": 6053 }, { "epoch": 1.0314684557948275, "grad_norm": 1.3359375, "learning_rate": 1.46609531599051e-05, "loss": 0.4183, "step": 6054 }, { "epoch": 1.031641597229737, "grad_norm": 1.359375, "learning_rate": 1.4659339453687811e-05, "loss": 0.4595, "step": 6055 }, { "epoch": 1.0318147386646466, "grad_norm": 1.390625, "learning_rate": 1.4657725592479821e-05, "loss": 0.4535, "step": 6056 }, { "epoch": 1.0319878800995563, "grad_norm": 1.453125, "learning_rate": 1.465611157633482e-05, "loss": 0.4861, "step": 6057 }, { "epoch": 1.0321610215344659, "grad_norm": 1.3359375, "learning_rate": 1.4654497405306492e-05, "loss": 0.4591, "step": 6058 }, { "epoch": 1.0323341629693756, "grad_norm": 1.4296875, "learning_rate": 1.4652883079448534e-05, "loss": 0.4, "step": 6059 }, { "epoch": 1.0325073044042852, "grad_norm": 1.359375, "learning_rate": 1.4651268598814647e-05, "loss": 0.5155, "step": 6060 }, { "epoch": 1.032680445839195, "grad_norm": 1.3203125, "learning_rate": 1.4649653963458535e-05, "loss": 0.4406, "step": 6061 }, { "epoch": 1.0328535872741045, "grad_norm": 1.40625, "learning_rate": 1.4648039173433909e-05, "loss": 0.5649, "step": 6062 }, { "epoch": 1.0330267287090142, "grad_norm": 1.421875, "learning_rate": 1.464642422879448e-05, "loss": 0.4682, "step": 6063 }, { "epoch": 1.0331998701439238, "grad_norm": 1.34375, "learning_rate": 1.4644809129593975e-05, "loss": 0.4404, "step": 6064 }, { "epoch": 1.0333730115788335, "grad_norm": 1.4140625, "learning_rate": 1.4643193875886113e-05, "loss": 0.4926, "step": 6065 }, { "epoch": 1.033546153013743, "grad_norm": 1.5703125, "learning_rate": 1.4641578467724631e-05, "loss": 0.5697, "step": 6066 }, { "epoch": 1.0337192944486528, "grad_norm": 1.3828125, "learning_rate": 1.4639962905163258e-05, "loss": 0.4906, "step": 6067 }, { "epoch": 1.0338924358835624, "grad_norm": 1.3203125, "learning_rate": 1.463834718825574e-05, "loss": 0.4528, "step": 6068 }, { "epoch": 1.034065577318472, "grad_norm": 1.453125, "learning_rate": 1.4636731317055821e-05, "loss": 0.495, "step": 6069 }, { "epoch": 1.0342387187533817, "grad_norm": 1.34375, "learning_rate": 1.463511529161725e-05, "loss": 0.4516, "step": 6070 }, { "epoch": 1.0344118601882912, "grad_norm": 1.3203125, "learning_rate": 1.4633499111993792e-05, "loss": 0.501, "step": 6071 }, { "epoch": 1.034585001623201, "grad_norm": 1.3515625, "learning_rate": 1.4631882778239197e-05, "loss": 0.4778, "step": 6072 }, { "epoch": 1.0347581430581105, "grad_norm": 1.4140625, "learning_rate": 1.4630266290407238e-05, "loss": 0.4339, "step": 6073 }, { "epoch": 1.0349312844930203, "grad_norm": 1.359375, "learning_rate": 1.462864964855169e-05, "loss": 0.4747, "step": 6074 }, { "epoch": 1.0351044259279298, "grad_norm": 1.390625, "learning_rate": 1.462703285272632e-05, "loss": 0.4637, "step": 6075 }, { "epoch": 1.0352775673628396, "grad_norm": 1.4609375, "learning_rate": 1.4625415902984916e-05, "loss": 0.5306, "step": 6076 }, { "epoch": 1.0354507087977491, "grad_norm": 1.3671875, "learning_rate": 1.4623798799381264e-05, "loss": 0.4998, "step": 6077 }, { "epoch": 1.035623850232659, "grad_norm": 1.390625, "learning_rate": 1.4622181541969161e-05, "loss": 0.4848, "step": 6078 }, { "epoch": 1.0357969916675684, "grad_norm": 1.453125, "learning_rate": 1.4620564130802396e-05, "loss": 0.4141, "step": 6079 }, { "epoch": 1.035970133102478, "grad_norm": 1.40625, "learning_rate": 1.4618946565934775e-05, "loss": 0.4276, "step": 6080 }, { "epoch": 1.0361432745373877, "grad_norm": 1.3515625, "learning_rate": 1.4617328847420107e-05, "loss": 0.4259, "step": 6081 }, { "epoch": 1.0363164159722973, "grad_norm": 1.484375, "learning_rate": 1.4615710975312203e-05, "loss": 0.5231, "step": 6082 }, { "epoch": 1.036489557407207, "grad_norm": 1.3671875, "learning_rate": 1.4614092949664881e-05, "loss": 0.4426, "step": 6083 }, { "epoch": 1.0366626988421166, "grad_norm": 1.3359375, "learning_rate": 1.4612474770531966e-05, "loss": 0.4889, "step": 6084 }, { "epoch": 1.0368358402770264, "grad_norm": 1.28125, "learning_rate": 1.4610856437967282e-05, "loss": 0.411, "step": 6085 }, { "epoch": 1.037008981711936, "grad_norm": 1.34375, "learning_rate": 1.4609237952024667e-05, "loss": 0.4727, "step": 6086 }, { "epoch": 1.0371821231468457, "grad_norm": 1.3828125, "learning_rate": 1.460761931275795e-05, "loss": 0.4687, "step": 6087 }, { "epoch": 1.0373552645817552, "grad_norm": 1.453125, "learning_rate": 1.4606000520220987e-05, "loss": 0.4709, "step": 6088 }, { "epoch": 1.037528406016665, "grad_norm": 1.3828125, "learning_rate": 1.4604381574467616e-05, "loss": 0.4654, "step": 6089 }, { "epoch": 1.0377015474515745, "grad_norm": 1.3828125, "learning_rate": 1.4602762475551698e-05, "loss": 0.509, "step": 6090 }, { "epoch": 1.037874688886484, "grad_norm": 1.4453125, "learning_rate": 1.4601143223527083e-05, "loss": 0.4976, "step": 6091 }, { "epoch": 1.0380478303213938, "grad_norm": 1.40625, "learning_rate": 1.4599523818447643e-05, "loss": 0.4676, "step": 6092 }, { "epoch": 1.0382209717563033, "grad_norm": 1.3515625, "learning_rate": 1.4597904260367239e-05, "loss": 0.4589, "step": 6093 }, { "epoch": 1.038394113191213, "grad_norm": 1.5, "learning_rate": 1.4596284549339753e-05, "loss": 0.5265, "step": 6094 }, { "epoch": 1.0385672546261226, "grad_norm": 1.6015625, "learning_rate": 1.459466468541906e-05, "loss": 0.5336, "step": 6095 }, { "epoch": 1.0387403960610324, "grad_norm": 1.5546875, "learning_rate": 1.4593044668659041e-05, "loss": 0.5018, "step": 6096 }, { "epoch": 1.038913537495942, "grad_norm": 1.3984375, "learning_rate": 1.4591424499113591e-05, "loss": 0.5298, "step": 6097 }, { "epoch": 1.0390866789308517, "grad_norm": 1.3984375, "learning_rate": 1.45898041768366e-05, "loss": 0.4848, "step": 6098 }, { "epoch": 1.0392598203657613, "grad_norm": 1.2578125, "learning_rate": 1.4588183701881967e-05, "loss": 0.4827, "step": 6099 }, { "epoch": 1.039432961800671, "grad_norm": 1.4609375, "learning_rate": 1.4586563074303601e-05, "loss": 0.4745, "step": 6100 }, { "epoch": 1.0396061032355806, "grad_norm": 1.375, "learning_rate": 1.4584942294155406e-05, "loss": 0.4725, "step": 6101 }, { "epoch": 1.03977924467049, "grad_norm": 1.4765625, "learning_rate": 1.45833213614913e-05, "loss": 0.492, "step": 6102 }, { "epoch": 1.0399523861053999, "grad_norm": 1.4375, "learning_rate": 1.4581700276365198e-05, "loss": 0.5333, "step": 6103 }, { "epoch": 1.0401255275403094, "grad_norm": 1.34375, "learning_rate": 1.4580079038831032e-05, "loss": 0.4273, "step": 6104 }, { "epoch": 1.0402986689752192, "grad_norm": 1.3515625, "learning_rate": 1.4578457648942726e-05, "loss": 0.467, "step": 6105 }, { "epoch": 1.0404718104101287, "grad_norm": 1.40625, "learning_rate": 1.4576836106754213e-05, "loss": 0.4984, "step": 6106 }, { "epoch": 1.0406449518450385, "grad_norm": 1.4140625, "learning_rate": 1.4575214412319442e-05, "loss": 0.5271, "step": 6107 }, { "epoch": 1.040818093279948, "grad_norm": 1.4921875, "learning_rate": 1.4573592565692347e-05, "loss": 0.5208, "step": 6108 }, { "epoch": 1.0409912347148578, "grad_norm": 1.40625, "learning_rate": 1.4571970566926885e-05, "loss": 0.4899, "step": 6109 }, { "epoch": 1.0411643761497673, "grad_norm": 1.3515625, "learning_rate": 1.457034841607701e-05, "loss": 0.4703, "step": 6110 }, { "epoch": 1.041337517584677, "grad_norm": 1.46875, "learning_rate": 1.4568726113196678e-05, "loss": 0.444, "step": 6111 }, { "epoch": 1.0415106590195866, "grad_norm": 1.3984375, "learning_rate": 1.4567103658339861e-05, "loss": 0.4797, "step": 6112 }, { "epoch": 1.0416838004544964, "grad_norm": 1.4296875, "learning_rate": 1.4565481051560521e-05, "loss": 0.4624, "step": 6113 }, { "epoch": 1.041856941889406, "grad_norm": 1.3359375, "learning_rate": 1.4563858292912641e-05, "loss": 0.4585, "step": 6114 }, { "epoch": 1.0420300833243155, "grad_norm": 1.296875, "learning_rate": 1.4562235382450195e-05, "loss": 0.4376, "step": 6115 }, { "epoch": 1.0422032247592252, "grad_norm": 1.4140625, "learning_rate": 1.4560612320227174e-05, "loss": 0.4243, "step": 6116 }, { "epoch": 1.0423763661941348, "grad_norm": 1.515625, "learning_rate": 1.4558989106297565e-05, "loss": 0.5518, "step": 6117 }, { "epoch": 1.0425495076290445, "grad_norm": 1.4140625, "learning_rate": 1.4557365740715364e-05, "loss": 0.4913, "step": 6118 }, { "epoch": 1.042722649063954, "grad_norm": 1.296875, "learning_rate": 1.455574222353457e-05, "loss": 0.4447, "step": 6119 }, { "epoch": 1.0428957904988638, "grad_norm": 1.3984375, "learning_rate": 1.4554118554809189e-05, "loss": 0.4698, "step": 6120 }, { "epoch": 1.0430689319337734, "grad_norm": 1.3984375, "learning_rate": 1.4552494734593237e-05, "loss": 0.476, "step": 6121 }, { "epoch": 1.0432420733686831, "grad_norm": 1.3828125, "learning_rate": 1.4550870762940724e-05, "loss": 0.466, "step": 6122 }, { "epoch": 1.0434152148035927, "grad_norm": 1.4453125, "learning_rate": 1.4549246639905667e-05, "loss": 0.473, "step": 6123 }, { "epoch": 1.0435883562385024, "grad_norm": 1.3359375, "learning_rate": 1.4547622365542103e-05, "loss": 0.5022, "step": 6124 }, { "epoch": 1.043761497673412, "grad_norm": 1.375, "learning_rate": 1.4545997939904051e-05, "loss": 0.5368, "step": 6125 }, { "epoch": 1.0439346391083215, "grad_norm": 1.296875, "learning_rate": 1.4544373363045557e-05, "loss": 0.5209, "step": 6126 }, { "epoch": 1.0441077805432313, "grad_norm": 1.4765625, "learning_rate": 1.4542748635020652e-05, "loss": 0.526, "step": 6127 }, { "epoch": 1.0442809219781408, "grad_norm": 1.3203125, "learning_rate": 1.4541123755883391e-05, "loss": 0.45, "step": 6128 }, { "epoch": 1.0444540634130506, "grad_norm": 1.40625, "learning_rate": 1.4539498725687817e-05, "loss": 0.4897, "step": 6129 }, { "epoch": 1.0446272048479601, "grad_norm": 1.4921875, "learning_rate": 1.4537873544487991e-05, "loss": 0.4801, "step": 6130 }, { "epoch": 1.0448003462828699, "grad_norm": 1.3671875, "learning_rate": 1.453624821233797e-05, "loss": 0.4693, "step": 6131 }, { "epoch": 1.0449734877177794, "grad_norm": 1.515625, "learning_rate": 1.453462272929182e-05, "loss": 0.4315, "step": 6132 }, { "epoch": 1.0451466291526892, "grad_norm": 1.453125, "learning_rate": 1.4532997095403621e-05, "loss": 0.5072, "step": 6133 }, { "epoch": 1.0453197705875987, "grad_norm": 1.4453125, "learning_rate": 1.4531371310727438e-05, "loss": 0.5191, "step": 6134 }, { "epoch": 1.0454929120225085, "grad_norm": 1.3125, "learning_rate": 1.4529745375317355e-05, "loss": 0.4593, "step": 6135 }, { "epoch": 1.045666053457418, "grad_norm": 1.5859375, "learning_rate": 1.4528119289227458e-05, "loss": 0.5981, "step": 6136 }, { "epoch": 1.0458391948923276, "grad_norm": 1.296875, "learning_rate": 1.4526493052511838e-05, "loss": 0.4704, "step": 6137 }, { "epoch": 1.0460123363272373, "grad_norm": 1.4140625, "learning_rate": 1.4524866665224597e-05, "loss": 0.4447, "step": 6138 }, { "epoch": 1.0461854777621469, "grad_norm": 1.4609375, "learning_rate": 1.4523240127419826e-05, "loss": 0.5528, "step": 6139 }, { "epoch": 1.0463586191970566, "grad_norm": 1.390625, "learning_rate": 1.4521613439151639e-05, "loss": 0.429, "step": 6140 }, { "epoch": 1.0465317606319662, "grad_norm": 1.3671875, "learning_rate": 1.4519986600474138e-05, "loss": 0.4793, "step": 6141 }, { "epoch": 1.046704902066876, "grad_norm": 1.4375, "learning_rate": 1.4518359611441452e-05, "loss": 0.4518, "step": 6142 }, { "epoch": 1.0468780435017855, "grad_norm": 1.4140625, "learning_rate": 1.4516732472107691e-05, "loss": 0.575, "step": 6143 }, { "epoch": 1.0470511849366952, "grad_norm": 1.5, "learning_rate": 1.4515105182526985e-05, "loss": 0.4712, "step": 6144 }, { "epoch": 1.0472243263716048, "grad_norm": 1.4296875, "learning_rate": 1.4513477742753465e-05, "loss": 0.4769, "step": 6145 }, { "epoch": 1.0473974678065145, "grad_norm": 1.3046875, "learning_rate": 1.4511850152841265e-05, "loss": 0.4262, "step": 6146 }, { "epoch": 1.047570609241424, "grad_norm": 1.453125, "learning_rate": 1.451022241284453e-05, "loss": 0.4892, "step": 6147 }, { "epoch": 1.0477437506763336, "grad_norm": 1.3203125, "learning_rate": 1.4508594522817403e-05, "loss": 0.4746, "step": 6148 }, { "epoch": 1.0479168921112434, "grad_norm": 1.625, "learning_rate": 1.4506966482814035e-05, "loss": 0.5319, "step": 6149 }, { "epoch": 1.048090033546153, "grad_norm": 1.3984375, "learning_rate": 1.4505338292888586e-05, "loss": 0.5108, "step": 6150 }, { "epoch": 1.0482631749810627, "grad_norm": 1.3125, "learning_rate": 1.4503709953095209e-05, "loss": 0.4728, "step": 6151 }, { "epoch": 1.0484363164159722, "grad_norm": 1.390625, "learning_rate": 1.4502081463488078e-05, "loss": 0.4367, "step": 6152 }, { "epoch": 1.048609457850882, "grad_norm": 1.3984375, "learning_rate": 1.4500452824121358e-05, "loss": 0.4534, "step": 6153 }, { "epoch": 1.0487825992857915, "grad_norm": 1.390625, "learning_rate": 1.449882403504923e-05, "loss": 0.42, "step": 6154 }, { "epoch": 1.0489557407207013, "grad_norm": 1.4140625, "learning_rate": 1.4497195096325873e-05, "loss": 0.523, "step": 6155 }, { "epoch": 1.0491288821556108, "grad_norm": 1.421875, "learning_rate": 1.449556600800547e-05, "loss": 0.5352, "step": 6156 }, { "epoch": 1.0493020235905206, "grad_norm": 1.484375, "learning_rate": 1.4493936770142214e-05, "loss": 0.513, "step": 6157 }, { "epoch": 1.0494751650254301, "grad_norm": 1.390625, "learning_rate": 1.44923073827903e-05, "loss": 0.5011, "step": 6158 }, { "epoch": 1.0496483064603397, "grad_norm": 1.4609375, "learning_rate": 1.4490677846003933e-05, "loss": 0.4307, "step": 6159 }, { "epoch": 1.0498214478952494, "grad_norm": 1.359375, "learning_rate": 1.4489048159837314e-05, "loss": 0.416, "step": 6160 }, { "epoch": 1.049994589330159, "grad_norm": 1.515625, "learning_rate": 1.4487418324344654e-05, "loss": 0.459, "step": 6161 }, { "epoch": 1.0501677307650688, "grad_norm": 1.34375, "learning_rate": 1.4485788339580169e-05, "loss": 0.4593, "step": 6162 }, { "epoch": 1.0503408721999783, "grad_norm": 1.421875, "learning_rate": 1.448415820559808e-05, "loss": 0.428, "step": 6163 }, { "epoch": 1.050514013634888, "grad_norm": 1.3203125, "learning_rate": 1.4482527922452618e-05, "loss": 0.4897, "step": 6164 }, { "epoch": 1.0506871550697976, "grad_norm": 1.3359375, "learning_rate": 1.4480897490198005e-05, "loss": 0.4429, "step": 6165 }, { "epoch": 1.0508602965047074, "grad_norm": 1.359375, "learning_rate": 1.4479266908888483e-05, "loss": 0.5498, "step": 6166 }, { "epoch": 1.051033437939617, "grad_norm": 1.4296875, "learning_rate": 1.4477636178578288e-05, "loss": 0.481, "step": 6167 }, { "epoch": 1.0512065793745267, "grad_norm": 1.4453125, "learning_rate": 1.4476005299321666e-05, "loss": 0.4783, "step": 6168 }, { "epoch": 1.0513797208094362, "grad_norm": 1.2421875, "learning_rate": 1.4474374271172869e-05, "loss": 0.447, "step": 6169 }, { "epoch": 1.0515528622443457, "grad_norm": 1.34375, "learning_rate": 1.4472743094186153e-05, "loss": 0.4604, "step": 6170 }, { "epoch": 1.0517260036792555, "grad_norm": 1.2734375, "learning_rate": 1.4471111768415777e-05, "loss": 0.4336, "step": 6171 }, { "epoch": 1.051899145114165, "grad_norm": 1.2890625, "learning_rate": 1.4469480293916007e-05, "loss": 0.4595, "step": 6172 }, { "epoch": 1.0520722865490748, "grad_norm": 1.375, "learning_rate": 1.4467848670741112e-05, "loss": 0.5185, "step": 6173 }, { "epoch": 1.0522454279839843, "grad_norm": 1.3671875, "learning_rate": 1.4466216898945368e-05, "loss": 0.5247, "step": 6174 }, { "epoch": 1.0524185694188941, "grad_norm": 1.3828125, "learning_rate": 1.4464584978583056e-05, "loss": 0.4322, "step": 6175 }, { "epoch": 1.0525917108538037, "grad_norm": 1.3828125, "learning_rate": 1.4462952909708461e-05, "loss": 0.4449, "step": 6176 }, { "epoch": 1.0527648522887134, "grad_norm": 1.3046875, "learning_rate": 1.4461320692375871e-05, "loss": 0.43, "step": 6177 }, { "epoch": 1.052937993723623, "grad_norm": 1.296875, "learning_rate": 1.4459688326639581e-05, "loss": 0.4352, "step": 6178 }, { "epoch": 1.0531111351585327, "grad_norm": 1.5234375, "learning_rate": 1.4458055812553893e-05, "loss": 0.4408, "step": 6179 }, { "epoch": 1.0532842765934423, "grad_norm": 1.5234375, "learning_rate": 1.4456423150173113e-05, "loss": 0.4257, "step": 6180 }, { "epoch": 1.0534574180283518, "grad_norm": 1.3828125, "learning_rate": 1.4454790339551546e-05, "loss": 0.5084, "step": 6181 }, { "epoch": 1.0536305594632616, "grad_norm": 1.453125, "learning_rate": 1.4453157380743509e-05, "loss": 0.5271, "step": 6182 }, { "epoch": 1.053803700898171, "grad_norm": 1.3515625, "learning_rate": 1.4451524273803323e-05, "loss": 0.469, "step": 6183 }, { "epoch": 1.0539768423330809, "grad_norm": 1.328125, "learning_rate": 1.444989101878531e-05, "loss": 0.4534, "step": 6184 }, { "epoch": 1.0541499837679904, "grad_norm": 1.375, "learning_rate": 1.4448257615743802e-05, "loss": 0.4319, "step": 6185 }, { "epoch": 1.0543231252029002, "grad_norm": 1.390625, "learning_rate": 1.444662406473313e-05, "loss": 0.4859, "step": 6186 }, { "epoch": 1.0544962666378097, "grad_norm": 1.421875, "learning_rate": 1.4444990365807637e-05, "loss": 0.4483, "step": 6187 }, { "epoch": 1.0546694080727195, "grad_norm": 1.3046875, "learning_rate": 1.4443356519021667e-05, "loss": 0.4289, "step": 6188 }, { "epoch": 1.054842549507629, "grad_norm": 1.546875, "learning_rate": 1.4441722524429563e-05, "loss": 0.549, "step": 6189 }, { "epoch": 1.0550156909425388, "grad_norm": 1.4296875, "learning_rate": 1.4440088382085688e-05, "loss": 0.4446, "step": 6190 }, { "epoch": 1.0551888323774483, "grad_norm": 1.3984375, "learning_rate": 1.4438454092044395e-05, "loss": 0.5588, "step": 6191 }, { "epoch": 1.0553619738123579, "grad_norm": 1.5078125, "learning_rate": 1.4436819654360052e-05, "loss": 0.4865, "step": 6192 }, { "epoch": 1.0555351152472676, "grad_norm": 1.2890625, "learning_rate": 1.4435185069087024e-05, "loss": 0.4512, "step": 6193 }, { "epoch": 1.0557082566821772, "grad_norm": 1.3359375, "learning_rate": 1.4433550336279685e-05, "loss": 0.467, "step": 6194 }, { "epoch": 1.055881398117087, "grad_norm": 1.40625, "learning_rate": 1.4431915455992416e-05, "loss": 0.474, "step": 6195 }, { "epoch": 1.0560545395519965, "grad_norm": 1.46875, "learning_rate": 1.4430280428279597e-05, "loss": 0.4628, "step": 6196 }, { "epoch": 1.0562276809869062, "grad_norm": 1.3671875, "learning_rate": 1.4428645253195621e-05, "loss": 0.5215, "step": 6197 }, { "epoch": 1.0564008224218158, "grad_norm": 1.46875, "learning_rate": 1.4427009930794877e-05, "loss": 0.4198, "step": 6198 }, { "epoch": 1.0565739638567255, "grad_norm": 1.375, "learning_rate": 1.4425374461131767e-05, "loss": 0.5005, "step": 6199 }, { "epoch": 1.056747105291635, "grad_norm": 1.328125, "learning_rate": 1.4423738844260691e-05, "loss": 0.4401, "step": 6200 }, { "epoch": 1.0569202467265448, "grad_norm": 1.4140625, "learning_rate": 1.4422103080236058e-05, "loss": 0.4754, "step": 6201 }, { "epoch": 1.0570933881614544, "grad_norm": 1.359375, "learning_rate": 1.4420467169112284e-05, "loss": 0.4767, "step": 6202 }, { "epoch": 1.0572665295963641, "grad_norm": 1.4296875, "learning_rate": 1.4418831110943782e-05, "loss": 0.4636, "step": 6203 }, { "epoch": 1.0574396710312737, "grad_norm": 1.34375, "learning_rate": 1.4417194905784979e-05, "loss": 0.4613, "step": 6204 }, { "epoch": 1.0576128124661832, "grad_norm": 1.453125, "learning_rate": 1.4415558553690296e-05, "loss": 0.4892, "step": 6205 }, { "epoch": 1.057785953901093, "grad_norm": 1.515625, "learning_rate": 1.4413922054714174e-05, "loss": 0.5137, "step": 6206 }, { "epoch": 1.0579590953360025, "grad_norm": 1.3125, "learning_rate": 1.4412285408911042e-05, "loss": 0.4599, "step": 6207 }, { "epoch": 1.0581322367709123, "grad_norm": 1.2578125, "learning_rate": 1.4410648616335347e-05, "loss": 0.4171, "step": 6208 }, { "epoch": 1.0583053782058218, "grad_norm": 1.609375, "learning_rate": 1.440901167704154e-05, "loss": 0.4536, "step": 6209 }, { "epoch": 1.0584785196407316, "grad_norm": 1.4375, "learning_rate": 1.4407374591084064e-05, "loss": 0.5132, "step": 6210 }, { "epoch": 1.0586516610756411, "grad_norm": 1.4375, "learning_rate": 1.440573735851738e-05, "loss": 0.44, "step": 6211 }, { "epoch": 1.0588248025105509, "grad_norm": 1.40625, "learning_rate": 1.4404099979395954e-05, "loss": 0.4756, "step": 6212 }, { "epoch": 1.0589979439454604, "grad_norm": 1.34375, "learning_rate": 1.4402462453774245e-05, "loss": 0.4427, "step": 6213 }, { "epoch": 1.0591710853803702, "grad_norm": 1.390625, "learning_rate": 1.440082478170673e-05, "loss": 0.4632, "step": 6214 }, { "epoch": 1.0593442268152797, "grad_norm": 1.2890625, "learning_rate": 1.4399186963247885e-05, "loss": 0.4393, "step": 6215 }, { "epoch": 1.0595173682501893, "grad_norm": 1.390625, "learning_rate": 1.4397548998452186e-05, "loss": 0.4292, "step": 6216 }, { "epoch": 1.059690509685099, "grad_norm": 1.3515625, "learning_rate": 1.4395910887374124e-05, "loss": 0.5222, "step": 6217 }, { "epoch": 1.0598636511200086, "grad_norm": 1.5234375, "learning_rate": 1.4394272630068194e-05, "loss": 0.5305, "step": 6218 }, { "epoch": 1.0600367925549183, "grad_norm": 1.4140625, "learning_rate": 1.4392634226588882e-05, "loss": 0.5051, "step": 6219 }, { "epoch": 1.0602099339898279, "grad_norm": 1.46875, "learning_rate": 1.4390995676990693e-05, "loss": 0.5103, "step": 6220 }, { "epoch": 1.0603830754247376, "grad_norm": 1.390625, "learning_rate": 1.4389356981328137e-05, "loss": 0.4937, "step": 6221 }, { "epoch": 1.0605562168596472, "grad_norm": 1.40625, "learning_rate": 1.4387718139655715e-05, "loss": 0.51, "step": 6222 }, { "epoch": 1.060729358294557, "grad_norm": 1.40625, "learning_rate": 1.4386079152027952e-05, "loss": 0.4986, "step": 6223 }, { "epoch": 1.0609024997294665, "grad_norm": 1.515625, "learning_rate": 1.4384440018499364e-05, "loss": 0.489, "step": 6224 }, { "epoch": 1.0610756411643762, "grad_norm": 1.4140625, "learning_rate": 1.4382800739124474e-05, "loss": 0.5348, "step": 6225 }, { "epoch": 1.0612487825992858, "grad_norm": 1.5703125, "learning_rate": 1.4381161313957814e-05, "loss": 0.4488, "step": 6226 }, { "epoch": 1.0614219240341953, "grad_norm": 1.4296875, "learning_rate": 1.4379521743053916e-05, "loss": 0.4627, "step": 6227 }, { "epoch": 1.061595065469105, "grad_norm": 1.3515625, "learning_rate": 1.4377882026467329e-05, "loss": 0.4416, "step": 6228 }, { "epoch": 1.0617682069040146, "grad_norm": 1.453125, "learning_rate": 1.4376242164252585e-05, "loss": 0.4761, "step": 6229 }, { "epoch": 1.0619413483389244, "grad_norm": 1.7265625, "learning_rate": 1.4374602156464241e-05, "loss": 0.4726, "step": 6230 }, { "epoch": 1.062114489773834, "grad_norm": 1.46875, "learning_rate": 1.437296200315685e-05, "loss": 0.5836, "step": 6231 }, { "epoch": 1.0622876312087437, "grad_norm": 1.6796875, "learning_rate": 1.4371321704384964e-05, "loss": 0.4996, "step": 6232 }, { "epoch": 1.0624607726436532, "grad_norm": 1.34375, "learning_rate": 1.4369681260203157e-05, "loss": 0.4328, "step": 6233 }, { "epoch": 1.062633914078563, "grad_norm": 1.359375, "learning_rate": 1.436804067066599e-05, "loss": 0.4193, "step": 6234 }, { "epoch": 1.0628070555134725, "grad_norm": 1.3203125, "learning_rate": 1.4366399935828044e-05, "loss": 0.443, "step": 6235 }, { "epoch": 1.0629801969483823, "grad_norm": 1.375, "learning_rate": 1.4364759055743888e-05, "loss": 0.4793, "step": 6236 }, { "epoch": 1.0631533383832918, "grad_norm": 1.328125, "learning_rate": 1.4363118030468113e-05, "loss": 0.6027, "step": 6237 }, { "epoch": 1.0633264798182016, "grad_norm": 1.484375, "learning_rate": 1.4361476860055302e-05, "loss": 0.4471, "step": 6238 }, { "epoch": 1.0634996212531111, "grad_norm": 1.3203125, "learning_rate": 1.4359835544560048e-05, "loss": 0.4423, "step": 6239 }, { "epoch": 1.0636727626880207, "grad_norm": 1.3125, "learning_rate": 1.4358194084036954e-05, "loss": 0.4443, "step": 6240 }, { "epoch": 1.0638459041229305, "grad_norm": 1.4296875, "learning_rate": 1.4356552478540616e-05, "loss": 0.5061, "step": 6241 }, { "epoch": 1.06401904555784, "grad_norm": 1.3046875, "learning_rate": 1.4354910728125643e-05, "loss": 0.4797, "step": 6242 }, { "epoch": 1.0641921869927498, "grad_norm": 1.484375, "learning_rate": 1.4353268832846647e-05, "loss": 0.4919, "step": 6243 }, { "epoch": 1.0643653284276593, "grad_norm": 1.421875, "learning_rate": 1.4351626792758247e-05, "loss": 0.4247, "step": 6244 }, { "epoch": 1.064538469862569, "grad_norm": 1.453125, "learning_rate": 1.4349984607915065e-05, "loss": 0.415, "step": 6245 }, { "epoch": 1.0647116112974786, "grad_norm": 1.46875, "learning_rate": 1.4348342278371723e-05, "loss": 0.478, "step": 6246 }, { "epoch": 1.0648847527323884, "grad_norm": 1.2890625, "learning_rate": 1.4346699804182858e-05, "loss": 0.4331, "step": 6247 }, { "epoch": 1.065057894167298, "grad_norm": 1.3046875, "learning_rate": 1.43450571854031e-05, "loss": 0.4744, "step": 6248 }, { "epoch": 1.0652310356022077, "grad_norm": 1.421875, "learning_rate": 1.4343414422087093e-05, "loss": 0.4718, "step": 6249 }, { "epoch": 1.0654041770371172, "grad_norm": 1.5234375, "learning_rate": 1.4341771514289482e-05, "loss": 0.4883, "step": 6250 }, { "epoch": 1.0655773184720267, "grad_norm": 1.2578125, "learning_rate": 1.434012846206492e-05, "loss": 0.44, "step": 6251 }, { "epoch": 1.0657504599069365, "grad_norm": 1.2734375, "learning_rate": 1.4338485265468063e-05, "loss": 0.4435, "step": 6252 }, { "epoch": 1.065923601341846, "grad_norm": 1.3203125, "learning_rate": 1.4336841924553565e-05, "loss": 0.4461, "step": 6253 }, { "epoch": 1.0660967427767558, "grad_norm": 1.4375, "learning_rate": 1.4335198439376097e-05, "loss": 0.4899, "step": 6254 }, { "epoch": 1.0662698842116654, "grad_norm": 1.3828125, "learning_rate": 1.4333554809990325e-05, "loss": 0.4278, "step": 6255 }, { "epoch": 1.0664430256465751, "grad_norm": 1.390625, "learning_rate": 1.4331911036450927e-05, "loss": 0.4209, "step": 6256 }, { "epoch": 1.0666161670814847, "grad_norm": 1.5703125, "learning_rate": 1.4330267118812577e-05, "loss": 0.4456, "step": 6257 }, { "epoch": 1.0667893085163944, "grad_norm": 1.2734375, "learning_rate": 1.4328623057129965e-05, "loss": 0.3613, "step": 6258 }, { "epoch": 1.066962449951304, "grad_norm": 1.4375, "learning_rate": 1.4326978851457778e-05, "loss": 0.4597, "step": 6259 }, { "epoch": 1.0671355913862137, "grad_norm": 1.390625, "learning_rate": 1.4325334501850707e-05, "loss": 0.4904, "step": 6260 }, { "epoch": 1.0673087328211233, "grad_norm": 1.5390625, "learning_rate": 1.4323690008363455e-05, "loss": 0.487, "step": 6261 }, { "epoch": 1.0674818742560328, "grad_norm": 1.5, "learning_rate": 1.4322045371050722e-05, "loss": 0.4778, "step": 6262 }, { "epoch": 1.0676550156909426, "grad_norm": 1.359375, "learning_rate": 1.4320400589967216e-05, "loss": 0.458, "step": 6263 }, { "epoch": 1.067828157125852, "grad_norm": 1.390625, "learning_rate": 1.4318755665167655e-05, "loss": 0.502, "step": 6264 }, { "epoch": 1.0680012985607619, "grad_norm": 1.375, "learning_rate": 1.4317110596706747e-05, "loss": 0.4936, "step": 6265 }, { "epoch": 1.0681744399956714, "grad_norm": 1.3984375, "learning_rate": 1.4315465384639225e-05, "loss": 0.4675, "step": 6266 }, { "epoch": 1.0683475814305812, "grad_norm": 1.4765625, "learning_rate": 1.4313820029019808e-05, "loss": 0.5153, "step": 6267 }, { "epoch": 1.0685207228654907, "grad_norm": 1.3984375, "learning_rate": 1.4312174529903231e-05, "loss": 0.469, "step": 6268 }, { "epoch": 1.0686938643004005, "grad_norm": 1.5234375, "learning_rate": 1.4310528887344232e-05, "loss": 0.5318, "step": 6269 }, { "epoch": 1.06886700573531, "grad_norm": 1.4609375, "learning_rate": 1.4308883101397548e-05, "loss": 0.486, "step": 6270 }, { "epoch": 1.0690401471702198, "grad_norm": 1.4140625, "learning_rate": 1.4307237172117934e-05, "loss": 0.449, "step": 6271 }, { "epoch": 1.0692132886051293, "grad_norm": 1.3125, "learning_rate": 1.4305591099560132e-05, "loss": 0.4802, "step": 6272 }, { "epoch": 1.0693864300400389, "grad_norm": 1.3515625, "learning_rate": 1.4303944883778904e-05, "loss": 0.4123, "step": 6273 }, { "epoch": 1.0695595714749486, "grad_norm": 1.453125, "learning_rate": 1.4302298524829005e-05, "loss": 0.4784, "step": 6274 }, { "epoch": 1.0697327129098582, "grad_norm": 1.4921875, "learning_rate": 1.4300652022765207e-05, "loss": 0.5522, "step": 6275 }, { "epoch": 1.069905854344768, "grad_norm": 1.328125, "learning_rate": 1.4299005377642274e-05, "loss": 0.4476, "step": 6276 }, { "epoch": 1.0700789957796775, "grad_norm": 1.34375, "learning_rate": 1.4297358589514982e-05, "loss": 0.4699, "step": 6277 }, { "epoch": 1.0702521372145872, "grad_norm": 1.609375, "learning_rate": 1.4295711658438117e-05, "loss": 0.5348, "step": 6278 }, { "epoch": 1.0704252786494968, "grad_norm": 1.4140625, "learning_rate": 1.4294064584466455e-05, "loss": 0.5667, "step": 6279 }, { "epoch": 1.0705984200844065, "grad_norm": 1.34375, "learning_rate": 1.4292417367654792e-05, "loss": 0.4728, "step": 6280 }, { "epoch": 1.070771561519316, "grad_norm": 1.390625, "learning_rate": 1.4290770008057911e-05, "loss": 0.5172, "step": 6281 }, { "epoch": 1.0709447029542258, "grad_norm": 1.2890625, "learning_rate": 1.4289122505730623e-05, "loss": 0.4606, "step": 6282 }, { "epoch": 1.0711178443891354, "grad_norm": 1.4453125, "learning_rate": 1.4287474860727727e-05, "loss": 0.474, "step": 6283 }, { "epoch": 1.071290985824045, "grad_norm": 1.3828125, "learning_rate": 1.4285827073104027e-05, "loss": 0.467, "step": 6284 }, { "epoch": 1.0714641272589547, "grad_norm": 1.46875, "learning_rate": 1.4284179142914345e-05, "loss": 0.5369, "step": 6285 }, { "epoch": 1.0716372686938642, "grad_norm": 1.46875, "learning_rate": 1.428253107021349e-05, "loss": 0.5094, "step": 6286 }, { "epoch": 1.071810410128774, "grad_norm": 1.5, "learning_rate": 1.4280882855056285e-05, "loss": 0.5294, "step": 6287 }, { "epoch": 1.0719835515636835, "grad_norm": 1.3828125, "learning_rate": 1.4279234497497563e-05, "loss": 0.4239, "step": 6288 }, { "epoch": 1.0721566929985933, "grad_norm": 1.625, "learning_rate": 1.4277585997592152e-05, "loss": 0.4382, "step": 6289 }, { "epoch": 1.0723298344335028, "grad_norm": 1.421875, "learning_rate": 1.4275937355394889e-05, "loss": 0.4204, "step": 6290 }, { "epoch": 1.0725029758684126, "grad_norm": 1.5234375, "learning_rate": 1.4274288570960612e-05, "loss": 0.49, "step": 6291 }, { "epoch": 1.0726761173033221, "grad_norm": 1.3125, "learning_rate": 1.4272639644344173e-05, "loss": 0.4852, "step": 6292 }, { "epoch": 1.072849258738232, "grad_norm": 1.3515625, "learning_rate": 1.4270990575600416e-05, "loss": 0.4624, "step": 6293 }, { "epoch": 1.0730224001731414, "grad_norm": 1.578125, "learning_rate": 1.4269341364784206e-05, "loss": 0.5102, "step": 6294 }, { "epoch": 1.073195541608051, "grad_norm": 1.34375, "learning_rate": 1.4267692011950395e-05, "loss": 0.4772, "step": 6295 }, { "epoch": 1.0733686830429607, "grad_norm": 1.3984375, "learning_rate": 1.4266042517153849e-05, "loss": 0.4917, "step": 6296 }, { "epoch": 1.0735418244778703, "grad_norm": 1.3046875, "learning_rate": 1.426439288044944e-05, "loss": 0.5114, "step": 6297 }, { "epoch": 1.07371496591278, "grad_norm": 1.2578125, "learning_rate": 1.4262743101892042e-05, "loss": 0.4628, "step": 6298 }, { "epoch": 1.0738881073476896, "grad_norm": 1.3515625, "learning_rate": 1.4261093181536536e-05, "loss": 0.4558, "step": 6299 }, { "epoch": 1.0740612487825993, "grad_norm": 1.4140625, "learning_rate": 1.4259443119437802e-05, "loss": 0.4686, "step": 6300 }, { "epoch": 1.0742343902175089, "grad_norm": 1.375, "learning_rate": 1.4257792915650728e-05, "loss": 0.4438, "step": 6301 }, { "epoch": 1.0744075316524186, "grad_norm": 1.515625, "learning_rate": 1.425614257023021e-05, "loss": 0.4975, "step": 6302 }, { "epoch": 1.0745806730873282, "grad_norm": 1.65625, "learning_rate": 1.4254492083231143e-05, "loss": 0.5339, "step": 6303 }, { "epoch": 1.074753814522238, "grad_norm": 1.5078125, "learning_rate": 1.4252841454708439e-05, "loss": 0.5537, "step": 6304 }, { "epoch": 1.0749269559571475, "grad_norm": 1.40625, "learning_rate": 1.4251190684716992e-05, "loss": 0.498, "step": 6305 }, { "epoch": 1.075100097392057, "grad_norm": 1.3671875, "learning_rate": 1.4249539773311724e-05, "loss": 0.4162, "step": 6306 }, { "epoch": 1.0752732388269668, "grad_norm": 1.234375, "learning_rate": 1.4247888720547547e-05, "loss": 0.4246, "step": 6307 }, { "epoch": 1.0754463802618763, "grad_norm": 1.3828125, "learning_rate": 1.4246237526479378e-05, "loss": 0.4807, "step": 6308 }, { "epoch": 1.075619521696786, "grad_norm": 1.3515625, "learning_rate": 1.4244586191162157e-05, "loss": 0.4654, "step": 6309 }, { "epoch": 1.0757926631316956, "grad_norm": 1.3984375, "learning_rate": 1.4242934714650802e-05, "loss": 0.4454, "step": 6310 }, { "epoch": 1.0759658045666054, "grad_norm": 1.46875, "learning_rate": 1.4241283097000255e-05, "loss": 0.5049, "step": 6311 }, { "epoch": 1.076138946001515, "grad_norm": 1.359375, "learning_rate": 1.4239631338265453e-05, "loss": 0.4815, "step": 6312 }, { "epoch": 1.0763120874364247, "grad_norm": 1.390625, "learning_rate": 1.4237979438501344e-05, "loss": 0.4689, "step": 6313 }, { "epoch": 1.0764852288713342, "grad_norm": 1.4296875, "learning_rate": 1.4236327397762874e-05, "loss": 0.4253, "step": 6314 }, { "epoch": 1.076658370306244, "grad_norm": 1.3984375, "learning_rate": 1.4234675216105001e-05, "loss": 0.5045, "step": 6315 }, { "epoch": 1.0768315117411535, "grad_norm": 1.4765625, "learning_rate": 1.4233022893582682e-05, "loss": 0.455, "step": 6316 }, { "epoch": 1.077004653176063, "grad_norm": 1.3046875, "learning_rate": 1.4231370430250882e-05, "loss": 0.4324, "step": 6317 }, { "epoch": 1.0771777946109728, "grad_norm": 1.34375, "learning_rate": 1.4229717826164568e-05, "loss": 0.4699, "step": 6318 }, { "epoch": 1.0773509360458824, "grad_norm": 1.546875, "learning_rate": 1.4228065081378712e-05, "loss": 0.4909, "step": 6319 }, { "epoch": 1.0775240774807922, "grad_norm": 1.3828125, "learning_rate": 1.4226412195948293e-05, "loss": 0.4566, "step": 6320 }, { "epoch": 1.0776972189157017, "grad_norm": 1.4765625, "learning_rate": 1.42247591699283e-05, "loss": 0.4325, "step": 6321 }, { "epoch": 1.0778703603506115, "grad_norm": 1.328125, "learning_rate": 1.4223106003373708e-05, "loss": 0.4884, "step": 6322 }, { "epoch": 1.078043501785521, "grad_norm": 1.296875, "learning_rate": 1.4221452696339518e-05, "loss": 0.412, "step": 6323 }, { "epoch": 1.0782166432204308, "grad_norm": 1.328125, "learning_rate": 1.421979924888072e-05, "loss": 0.4922, "step": 6324 }, { "epoch": 1.0783897846553403, "grad_norm": 1.453125, "learning_rate": 1.421814566105232e-05, "loss": 0.5091, "step": 6325 }, { "epoch": 1.07856292609025, "grad_norm": 1.3359375, "learning_rate": 1.4216491932909322e-05, "loss": 0.4034, "step": 6326 }, { "epoch": 1.0787360675251596, "grad_norm": 1.4765625, "learning_rate": 1.4214838064506738e-05, "loss": 0.4606, "step": 6327 }, { "epoch": 1.0789092089600691, "grad_norm": 1.375, "learning_rate": 1.4213184055899583e-05, "loss": 0.4777, "step": 6328 }, { "epoch": 1.079082350394979, "grad_norm": 1.3828125, "learning_rate": 1.4211529907142871e-05, "loss": 0.5141, "step": 6329 }, { "epoch": 1.0792554918298884, "grad_norm": 1.4453125, "learning_rate": 1.4209875618291635e-05, "loss": 0.4694, "step": 6330 }, { "epoch": 1.0794286332647982, "grad_norm": 1.5, "learning_rate": 1.4208221189400897e-05, "loss": 0.4843, "step": 6331 }, { "epoch": 1.0796017746997078, "grad_norm": 2.140625, "learning_rate": 1.4206566620525698e-05, "loss": 0.4596, "step": 6332 }, { "epoch": 1.0797749161346175, "grad_norm": 1.2890625, "learning_rate": 1.4204911911721069e-05, "loss": 0.4943, "step": 6333 }, { "epoch": 1.079948057569527, "grad_norm": 1.3828125, "learning_rate": 1.4203257063042058e-05, "loss": 0.4917, "step": 6334 }, { "epoch": 1.0801211990044368, "grad_norm": 1.3046875, "learning_rate": 1.420160207454371e-05, "loss": 0.4681, "step": 6335 }, { "epoch": 1.0802943404393464, "grad_norm": 1.359375, "learning_rate": 1.419994694628108e-05, "loss": 0.4504, "step": 6336 }, { "epoch": 1.0804674818742561, "grad_norm": 1.2890625, "learning_rate": 1.4198291678309224e-05, "loss": 0.4173, "step": 6337 }, { "epoch": 1.0806406233091657, "grad_norm": 1.3984375, "learning_rate": 1.4196636270683204e-05, "loss": 0.5349, "step": 6338 }, { "epoch": 1.0808137647440752, "grad_norm": 1.390625, "learning_rate": 1.4194980723458083e-05, "loss": 0.4809, "step": 6339 }, { "epoch": 1.080986906178985, "grad_norm": 1.40625, "learning_rate": 1.419332503668894e-05, "loss": 0.5407, "step": 6340 }, { "epoch": 1.0811600476138945, "grad_norm": 1.4140625, "learning_rate": 1.4191669210430838e-05, "loss": 0.4727, "step": 6341 }, { "epoch": 1.0813331890488043, "grad_norm": 1.34375, "learning_rate": 1.419001324473887e-05, "loss": 0.4692, "step": 6342 }, { "epoch": 1.0815063304837138, "grad_norm": 1.40625, "learning_rate": 1.4188357139668112e-05, "loss": 0.489, "step": 6343 }, { "epoch": 1.0816794719186236, "grad_norm": 1.2578125, "learning_rate": 1.4186700895273662e-05, "loss": 0.4624, "step": 6344 }, { "epoch": 1.081852613353533, "grad_norm": 1.46875, "learning_rate": 1.4185044511610606e-05, "loss": 0.4924, "step": 6345 }, { "epoch": 1.0820257547884429, "grad_norm": 1.421875, "learning_rate": 1.4183387988734043e-05, "loss": 0.5338, "step": 6346 }, { "epoch": 1.0821988962233524, "grad_norm": 1.3828125, "learning_rate": 1.4181731326699085e-05, "loss": 0.4566, "step": 6347 }, { "epoch": 1.0823720376582622, "grad_norm": 1.59375, "learning_rate": 1.4180074525560832e-05, "loss": 0.4668, "step": 6348 }, { "epoch": 1.0825451790931717, "grad_norm": 1.4765625, "learning_rate": 1.4178417585374401e-05, "loss": 0.4401, "step": 6349 }, { "epoch": 1.0827183205280815, "grad_norm": 1.46875, "learning_rate": 1.4176760506194906e-05, "loss": 0.4584, "step": 6350 }, { "epoch": 1.082891461962991, "grad_norm": 1.3515625, "learning_rate": 1.4175103288077471e-05, "loss": 0.4681, "step": 6351 }, { "epoch": 1.0830646033979006, "grad_norm": 1.34375, "learning_rate": 1.4173445931077224e-05, "loss": 0.5018, "step": 6352 }, { "epoch": 1.0832377448328103, "grad_norm": 1.453125, "learning_rate": 1.417178843524929e-05, "loss": 0.5265, "step": 6353 }, { "epoch": 1.0834108862677199, "grad_norm": 1.7421875, "learning_rate": 1.4170130800648814e-05, "loss": 0.5683, "step": 6354 }, { "epoch": 1.0835840277026296, "grad_norm": 1.4921875, "learning_rate": 1.4168473027330929e-05, "loss": 0.4822, "step": 6355 }, { "epoch": 1.0837571691375392, "grad_norm": 1.3828125, "learning_rate": 1.4166815115350785e-05, "loss": 0.4008, "step": 6356 }, { "epoch": 1.083930310572449, "grad_norm": 1.4140625, "learning_rate": 1.4165157064763524e-05, "loss": 0.4745, "step": 6357 }, { "epoch": 1.0841034520073585, "grad_norm": 1.3046875, "learning_rate": 1.416349887562431e-05, "loss": 0.4782, "step": 6358 }, { "epoch": 1.0842765934422682, "grad_norm": 1.3125, "learning_rate": 1.4161840547988297e-05, "loss": 0.4181, "step": 6359 }, { "epoch": 1.0844497348771778, "grad_norm": 1.3828125, "learning_rate": 1.4160182081910647e-05, "loss": 0.5006, "step": 6360 }, { "epoch": 1.0846228763120875, "grad_norm": 1.359375, "learning_rate": 1.4158523477446532e-05, "loss": 0.4497, "step": 6361 }, { "epoch": 1.084796017746997, "grad_norm": 1.4296875, "learning_rate": 1.415686473465112e-05, "loss": 0.4635, "step": 6362 }, { "epoch": 1.0849691591819066, "grad_norm": 1.3515625, "learning_rate": 1.4155205853579592e-05, "loss": 0.4311, "step": 6363 }, { "epoch": 1.0851423006168164, "grad_norm": 1.3984375, "learning_rate": 1.4153546834287131e-05, "loss": 0.4801, "step": 6364 }, { "epoch": 1.085315442051726, "grad_norm": 1.4375, "learning_rate": 1.4151887676828917e-05, "loss": 0.5181, "step": 6365 }, { "epoch": 1.0854885834866357, "grad_norm": 1.3359375, "learning_rate": 1.415022838126015e-05, "loss": 0.4472, "step": 6366 }, { "epoch": 1.0856617249215452, "grad_norm": 1.4921875, "learning_rate": 1.4148568947636019e-05, "loss": 0.4566, "step": 6367 }, { "epoch": 1.085834866356455, "grad_norm": 1.40625, "learning_rate": 1.4146909376011726e-05, "loss": 0.4651, "step": 6368 }, { "epoch": 1.0860080077913645, "grad_norm": 1.453125, "learning_rate": 1.4145249666442476e-05, "loss": 0.4717, "step": 6369 }, { "epoch": 1.0861811492262743, "grad_norm": 1.25, "learning_rate": 1.4143589818983476e-05, "loss": 0.4308, "step": 6370 }, { "epoch": 1.0863542906611838, "grad_norm": 1.375, "learning_rate": 1.4141929833689947e-05, "loss": 0.4404, "step": 6371 }, { "epoch": 1.0865274320960936, "grad_norm": 1.359375, "learning_rate": 1.4140269710617103e-05, "loss": 0.4729, "step": 6372 }, { "epoch": 1.0867005735310031, "grad_norm": 1.4765625, "learning_rate": 1.4138609449820164e-05, "loss": 0.4414, "step": 6373 }, { "epoch": 1.086873714965913, "grad_norm": 1.53125, "learning_rate": 1.4136949051354365e-05, "loss": 0.4955, "step": 6374 }, { "epoch": 1.0870468564008224, "grad_norm": 1.4375, "learning_rate": 1.4135288515274935e-05, "loss": 0.4476, "step": 6375 }, { "epoch": 1.087219997835732, "grad_norm": 1.3671875, "learning_rate": 1.4133627841637107e-05, "loss": 0.42, "step": 6376 }, { "epoch": 1.0873931392706417, "grad_norm": 1.40625, "learning_rate": 1.4131967030496125e-05, "loss": 0.518, "step": 6377 }, { "epoch": 1.0875662807055513, "grad_norm": 1.5, "learning_rate": 1.4130306081907241e-05, "loss": 0.4714, "step": 6378 }, { "epoch": 1.087739422140461, "grad_norm": 1.390625, "learning_rate": 1.4128644995925696e-05, "loss": 0.4598, "step": 6379 }, { "epoch": 1.0879125635753706, "grad_norm": 1.3671875, "learning_rate": 1.4126983772606755e-05, "loss": 0.4527, "step": 6380 }, { "epoch": 1.0880857050102803, "grad_norm": 1.328125, "learning_rate": 1.4125322412005669e-05, "loss": 0.4491, "step": 6381 }, { "epoch": 1.0882588464451899, "grad_norm": 1.4921875, "learning_rate": 1.4123660914177709e-05, "loss": 0.3992, "step": 6382 }, { "epoch": 1.0884319878800996, "grad_norm": 1.515625, "learning_rate": 1.412199927917814e-05, "loss": 0.5015, "step": 6383 }, { "epoch": 1.0886051293150092, "grad_norm": 1.515625, "learning_rate": 1.4120337507062232e-05, "loss": 0.5137, "step": 6384 }, { "epoch": 1.088778270749919, "grad_norm": 1.2890625, "learning_rate": 1.4118675597885274e-05, "loss": 0.4171, "step": 6385 }, { "epoch": 1.0889514121848285, "grad_norm": 1.3671875, "learning_rate": 1.4117013551702538e-05, "loss": 0.5005, "step": 6386 }, { "epoch": 1.089124553619738, "grad_norm": 1.3203125, "learning_rate": 1.4115351368569319e-05, "loss": 0.4965, "step": 6387 }, { "epoch": 1.0892976950546478, "grad_norm": 1.4375, "learning_rate": 1.4113689048540904e-05, "loss": 0.4881, "step": 6388 }, { "epoch": 1.0894708364895573, "grad_norm": 1.2578125, "learning_rate": 1.411202659167259e-05, "loss": 0.4517, "step": 6389 }, { "epoch": 1.089643977924467, "grad_norm": 1.390625, "learning_rate": 1.4110363998019679e-05, "loss": 0.4521, "step": 6390 }, { "epoch": 1.0898171193593766, "grad_norm": 1.421875, "learning_rate": 1.4108701267637472e-05, "loss": 0.476, "step": 6391 }, { "epoch": 1.0899902607942864, "grad_norm": 1.46875, "learning_rate": 1.4107038400581288e-05, "loss": 0.4696, "step": 6392 }, { "epoch": 1.090163402229196, "grad_norm": 1.3984375, "learning_rate": 1.4105375396906433e-05, "loss": 0.4683, "step": 6393 }, { "epoch": 1.0903365436641057, "grad_norm": 1.390625, "learning_rate": 1.4103712256668232e-05, "loss": 0.4509, "step": 6394 }, { "epoch": 1.0905096850990152, "grad_norm": 1.375, "learning_rate": 1.4102048979922001e-05, "loss": 0.501, "step": 6395 }, { "epoch": 1.090682826533925, "grad_norm": 1.4453125, "learning_rate": 1.4100385566723074e-05, "loss": 0.4736, "step": 6396 }, { "epoch": 1.0908559679688346, "grad_norm": 1.453125, "learning_rate": 1.4098722017126787e-05, "loss": 0.4835, "step": 6397 }, { "epoch": 1.091029109403744, "grad_norm": 1.359375, "learning_rate": 1.4097058331188467e-05, "loss": 0.4294, "step": 6398 }, { "epoch": 1.0912022508386539, "grad_norm": 1.40625, "learning_rate": 1.4095394508963464e-05, "loss": 0.4566, "step": 6399 }, { "epoch": 1.0913753922735634, "grad_norm": 1.484375, "learning_rate": 1.4093730550507122e-05, "loss": 0.4406, "step": 6400 }, { "epoch": 1.0915485337084732, "grad_norm": 1.25, "learning_rate": 1.409206645587479e-05, "loss": 0.4011, "step": 6401 }, { "epoch": 1.0917216751433827, "grad_norm": 1.40625, "learning_rate": 1.4090402225121823e-05, "loss": 0.4479, "step": 6402 }, { "epoch": 1.0918948165782925, "grad_norm": 1.4453125, "learning_rate": 1.4088737858303585e-05, "loss": 0.4243, "step": 6403 }, { "epoch": 1.092067958013202, "grad_norm": 1.453125, "learning_rate": 1.4087073355475437e-05, "loss": 0.5193, "step": 6404 }, { "epoch": 1.0922410994481118, "grad_norm": 1.2734375, "learning_rate": 1.408540871669275e-05, "loss": 0.4402, "step": 6405 }, { "epoch": 1.0924142408830213, "grad_norm": 1.3828125, "learning_rate": 1.4083743942010895e-05, "loss": 0.54, "step": 6406 }, { "epoch": 1.092587382317931, "grad_norm": 1.46875, "learning_rate": 1.4082079031485253e-05, "loss": 0.5382, "step": 6407 }, { "epoch": 1.0927605237528406, "grad_norm": 1.4296875, "learning_rate": 1.4080413985171202e-05, "loss": 0.4126, "step": 6408 }, { "epoch": 1.0929336651877501, "grad_norm": 1.4296875, "learning_rate": 1.4078748803124134e-05, "loss": 0.4822, "step": 6409 }, { "epoch": 1.09310680662266, "grad_norm": 1.390625, "learning_rate": 1.4077083485399435e-05, "loss": 0.5424, "step": 6410 }, { "epoch": 1.0932799480575695, "grad_norm": 1.8671875, "learning_rate": 1.4075418032052506e-05, "loss": 0.4558, "step": 6411 }, { "epoch": 1.0934530894924792, "grad_norm": 1.515625, "learning_rate": 1.4073752443138742e-05, "loss": 0.5447, "step": 6412 }, { "epoch": 1.0936262309273888, "grad_norm": 1.3515625, "learning_rate": 1.4072086718713558e-05, "loss": 0.4465, "step": 6413 }, { "epoch": 1.0937993723622985, "grad_norm": 1.5078125, "learning_rate": 1.4070420858832351e-05, "loss": 0.4457, "step": 6414 }, { "epoch": 1.093972513797208, "grad_norm": 1.421875, "learning_rate": 1.4068754863550544e-05, "loss": 0.4608, "step": 6415 }, { "epoch": 1.0941456552321178, "grad_norm": 1.4140625, "learning_rate": 1.4067088732923555e-05, "loss": 0.4829, "step": 6416 }, { "epoch": 1.0943187966670274, "grad_norm": 1.390625, "learning_rate": 1.4065422467006797e-05, "loss": 0.4724, "step": 6417 }, { "epoch": 1.0944919381019371, "grad_norm": 1.453125, "learning_rate": 1.4063756065855714e-05, "loss": 0.5284, "step": 6418 }, { "epoch": 1.0946650795368467, "grad_norm": 1.4140625, "learning_rate": 1.4062089529525724e-05, "loss": 0.5024, "step": 6419 }, { "epoch": 1.0948382209717562, "grad_norm": 1.4140625, "learning_rate": 1.4060422858072272e-05, "loss": 0.4702, "step": 6420 }, { "epoch": 1.095011362406666, "grad_norm": 1.4765625, "learning_rate": 1.4058756051550792e-05, "loss": 0.4174, "step": 6421 }, { "epoch": 1.0951845038415755, "grad_norm": 1.3515625, "learning_rate": 1.4057089110016733e-05, "loss": 0.4673, "step": 6422 }, { "epoch": 1.0953576452764853, "grad_norm": 1.421875, "learning_rate": 1.405542203352555e-05, "loss": 0.502, "step": 6423 }, { "epoch": 1.0955307867113948, "grad_norm": 1.421875, "learning_rate": 1.4053754822132688e-05, "loss": 0.4396, "step": 6424 }, { "epoch": 1.0957039281463046, "grad_norm": 1.4375, "learning_rate": 1.4052087475893616e-05, "loss": 0.5508, "step": 6425 }, { "epoch": 1.0958770695812141, "grad_norm": 1.484375, "learning_rate": 1.4050419994863789e-05, "loss": 0.4464, "step": 6426 }, { "epoch": 1.0960502110161239, "grad_norm": 1.28125, "learning_rate": 1.4048752379098678e-05, "loss": 0.4345, "step": 6427 }, { "epoch": 1.0962233524510334, "grad_norm": 1.328125, "learning_rate": 1.4047084628653755e-05, "loss": 0.4283, "step": 6428 }, { "epoch": 1.0963964938859432, "grad_norm": 1.375, "learning_rate": 1.40454167435845e-05, "loss": 0.4652, "step": 6429 }, { "epoch": 1.0965696353208527, "grad_norm": 1.3203125, "learning_rate": 1.404374872394639e-05, "loss": 0.4526, "step": 6430 }, { "epoch": 1.0967427767557623, "grad_norm": 1.515625, "learning_rate": 1.4042080569794916e-05, "loss": 0.4741, "step": 6431 }, { "epoch": 1.096915918190672, "grad_norm": 1.5546875, "learning_rate": 1.4040412281185565e-05, "loss": 0.4885, "step": 6432 }, { "epoch": 1.0970890596255816, "grad_norm": 1.2578125, "learning_rate": 1.4038743858173827e-05, "loss": 0.4244, "step": 6433 }, { "epoch": 1.0972622010604913, "grad_norm": 1.3984375, "learning_rate": 1.4037075300815208e-05, "loss": 0.4826, "step": 6434 }, { "epoch": 1.0974353424954009, "grad_norm": 1.3359375, "learning_rate": 1.4035406609165214e-05, "loss": 0.4612, "step": 6435 }, { "epoch": 1.0976084839303106, "grad_norm": 1.375, "learning_rate": 1.4033737783279346e-05, "loss": 0.4719, "step": 6436 }, { "epoch": 1.0977816253652202, "grad_norm": 1.4296875, "learning_rate": 1.4032068823213124e-05, "loss": 0.4764, "step": 6437 }, { "epoch": 1.09795476680013, "grad_norm": 1.3515625, "learning_rate": 1.4030399729022055e-05, "loss": 0.5073, "step": 6438 }, { "epoch": 1.0981279082350395, "grad_norm": 1.3125, "learning_rate": 1.402873050076167e-05, "loss": 0.4184, "step": 6439 }, { "epoch": 1.0983010496699492, "grad_norm": 1.5234375, "learning_rate": 1.4027061138487491e-05, "loss": 0.4647, "step": 6440 }, { "epoch": 1.0984741911048588, "grad_norm": 1.5390625, "learning_rate": 1.4025391642255053e-05, "loss": 0.496, "step": 6441 }, { "epoch": 1.0986473325397683, "grad_norm": 1.3515625, "learning_rate": 1.4023722012119886e-05, "loss": 0.4152, "step": 6442 }, { "epoch": 1.098820473974678, "grad_norm": 1.2890625, "learning_rate": 1.4022052248137527e-05, "loss": 0.4779, "step": 6443 }, { "epoch": 1.0989936154095876, "grad_norm": 1.3828125, "learning_rate": 1.4020382350363528e-05, "loss": 0.4916, "step": 6444 }, { "epoch": 1.0991667568444974, "grad_norm": 1.359375, "learning_rate": 1.4018712318853432e-05, "loss": 0.469, "step": 6445 }, { "epoch": 1.099339898279407, "grad_norm": 1.2890625, "learning_rate": 1.401704215366279e-05, "loss": 0.4631, "step": 6446 }, { "epoch": 1.0995130397143167, "grad_norm": 1.46875, "learning_rate": 1.4015371854847167e-05, "loss": 0.555, "step": 6447 }, { "epoch": 1.0996861811492262, "grad_norm": 1.4453125, "learning_rate": 1.4013701422462118e-05, "loss": 0.4508, "step": 6448 }, { "epoch": 1.099859322584136, "grad_norm": 1.28125, "learning_rate": 1.401203085656321e-05, "loss": 0.4344, "step": 6449 }, { "epoch": 1.1000324640190455, "grad_norm": 1.46875, "learning_rate": 1.4010360157206013e-05, "loss": 0.5099, "step": 6450 }, { "epoch": 1.1002056054539553, "grad_norm": 1.3828125, "learning_rate": 1.4008689324446108e-05, "loss": 0.5137, "step": 6451 }, { "epoch": 1.1003787468888648, "grad_norm": 1.3984375, "learning_rate": 1.4007018358339068e-05, "loss": 0.4692, "step": 6452 }, { "epoch": 1.1005518883237744, "grad_norm": 1.5390625, "learning_rate": 1.4005347258940479e-05, "loss": 0.5215, "step": 6453 }, { "epoch": 1.1007250297586841, "grad_norm": 1.359375, "learning_rate": 1.4003676026305928e-05, "loss": 0.4466, "step": 6454 }, { "epoch": 1.1008981711935937, "grad_norm": 1.40625, "learning_rate": 1.4002004660491008e-05, "loss": 0.462, "step": 6455 }, { "epoch": 1.1010713126285034, "grad_norm": 1.2265625, "learning_rate": 1.4000333161551324e-05, "loss": 0.4237, "step": 6456 }, { "epoch": 1.101244454063413, "grad_norm": 1.265625, "learning_rate": 1.3998661529542463e-05, "loss": 0.4296, "step": 6457 }, { "epoch": 1.1014175954983227, "grad_norm": 1.359375, "learning_rate": 1.3996989764520044e-05, "loss": 0.4478, "step": 6458 }, { "epoch": 1.1015907369332323, "grad_norm": 1.3125, "learning_rate": 1.399531786653967e-05, "loss": 0.4081, "step": 6459 }, { "epoch": 1.101763878368142, "grad_norm": 1.34375, "learning_rate": 1.3993645835656955e-05, "loss": 0.435, "step": 6460 }, { "epoch": 1.1019370198030516, "grad_norm": 1.3671875, "learning_rate": 1.3991973671927527e-05, "loss": 0.5049, "step": 6461 }, { "epoch": 1.1021101612379613, "grad_norm": 1.375, "learning_rate": 1.3990301375407001e-05, "loss": 0.4647, "step": 6462 }, { "epoch": 1.102283302672871, "grad_norm": 1.4609375, "learning_rate": 1.398862894615101e-05, "loss": 0.4857, "step": 6463 }, { "epoch": 1.1024564441077804, "grad_norm": 1.296875, "learning_rate": 1.3986956384215185e-05, "loss": 0.4363, "step": 6464 }, { "epoch": 1.1026295855426902, "grad_norm": 3.484375, "learning_rate": 1.3985283689655162e-05, "loss": 0.6303, "step": 6465 }, { "epoch": 1.1028027269775997, "grad_norm": 1.4921875, "learning_rate": 1.3983610862526584e-05, "loss": 0.5347, "step": 6466 }, { "epoch": 1.1029758684125095, "grad_norm": 1.2578125, "learning_rate": 1.3981937902885094e-05, "loss": 0.4217, "step": 6467 }, { "epoch": 1.103149009847419, "grad_norm": 1.515625, "learning_rate": 1.3980264810786348e-05, "loss": 0.4577, "step": 6468 }, { "epoch": 1.1033221512823288, "grad_norm": 1.4140625, "learning_rate": 1.3978591586285995e-05, "loss": 0.5029, "step": 6469 }, { "epoch": 1.1034952927172383, "grad_norm": 1.359375, "learning_rate": 1.3976918229439698e-05, "loss": 0.4367, "step": 6470 }, { "epoch": 1.103668434152148, "grad_norm": 1.390625, "learning_rate": 1.3975244740303113e-05, "loss": 0.4787, "step": 6471 }, { "epoch": 1.1038415755870576, "grad_norm": 1.484375, "learning_rate": 1.3973571118931917e-05, "loss": 0.509, "step": 6472 }, { "epoch": 1.1040147170219674, "grad_norm": 1.3671875, "learning_rate": 1.3971897365381776e-05, "loss": 0.4517, "step": 6473 }, { "epoch": 1.104187858456877, "grad_norm": 1.3671875, "learning_rate": 1.3970223479708373e-05, "loss": 0.4603, "step": 6474 }, { "epoch": 1.1043609998917865, "grad_norm": 1.40625, "learning_rate": 1.3968549461967383e-05, "loss": 0.4876, "step": 6475 }, { "epoch": 1.1045341413266963, "grad_norm": 1.6171875, "learning_rate": 1.3966875312214491e-05, "loss": 0.5311, "step": 6476 }, { "epoch": 1.1047072827616058, "grad_norm": 1.4296875, "learning_rate": 1.3965201030505393e-05, "loss": 0.4703, "step": 6477 }, { "epoch": 1.1048804241965156, "grad_norm": 1.4609375, "learning_rate": 1.3963526616895777e-05, "loss": 0.464, "step": 6478 }, { "epoch": 1.105053565631425, "grad_norm": 1.46875, "learning_rate": 1.3961852071441344e-05, "loss": 0.4773, "step": 6479 }, { "epoch": 1.1052267070663349, "grad_norm": 1.4375, "learning_rate": 1.3960177394197798e-05, "loss": 0.5069, "step": 6480 }, { "epoch": 1.1053998485012444, "grad_norm": 1.4375, "learning_rate": 1.395850258522084e-05, "loss": 0.4895, "step": 6481 }, { "epoch": 1.1055729899361542, "grad_norm": 1.3671875, "learning_rate": 1.3956827644566195e-05, "loss": 0.4517, "step": 6482 }, { "epoch": 1.1057461313710637, "grad_norm": 1.359375, "learning_rate": 1.3955152572289568e-05, "loss": 0.44, "step": 6483 }, { "epoch": 1.1059192728059735, "grad_norm": 1.59375, "learning_rate": 1.3953477368446679e-05, "loss": 0.7168, "step": 6484 }, { "epoch": 1.106092414240883, "grad_norm": 1.2734375, "learning_rate": 1.3951802033093262e-05, "loss": 0.531, "step": 6485 }, { "epoch": 1.1062655556757928, "grad_norm": 1.421875, "learning_rate": 1.3950126566285039e-05, "loss": 0.4409, "step": 6486 }, { "epoch": 1.1064386971107023, "grad_norm": 1.3984375, "learning_rate": 1.3948450968077742e-05, "loss": 0.3882, "step": 6487 }, { "epoch": 1.1066118385456118, "grad_norm": 1.4140625, "learning_rate": 1.3946775238527112e-05, "loss": 0.4799, "step": 6488 }, { "epoch": 1.1067849799805216, "grad_norm": 1.328125, "learning_rate": 1.3945099377688898e-05, "loss": 0.4802, "step": 6489 }, { "epoch": 1.1069581214154312, "grad_norm": 1.3125, "learning_rate": 1.3943423385618834e-05, "loss": 0.4373, "step": 6490 }, { "epoch": 1.107131262850341, "grad_norm": 1.3515625, "learning_rate": 1.3941747262372678e-05, "loss": 0.4924, "step": 6491 }, { "epoch": 1.1073044042852505, "grad_norm": 1.375, "learning_rate": 1.3940071008006185e-05, "loss": 0.471, "step": 6492 }, { "epoch": 1.1074775457201602, "grad_norm": 1.2890625, "learning_rate": 1.3938394622575115e-05, "loss": 0.423, "step": 6493 }, { "epoch": 1.1076506871550698, "grad_norm": 1.3671875, "learning_rate": 1.3936718106135235e-05, "loss": 0.4989, "step": 6494 }, { "epoch": 1.1078238285899795, "grad_norm": 1.296875, "learning_rate": 1.3935041458742307e-05, "loss": 0.482, "step": 6495 }, { "epoch": 1.107996970024889, "grad_norm": 1.4765625, "learning_rate": 1.3933364680452106e-05, "loss": 0.4519, "step": 6496 }, { "epoch": 1.1081701114597988, "grad_norm": 1.3359375, "learning_rate": 1.3931687771320413e-05, "loss": 0.4805, "step": 6497 }, { "epoch": 1.1083432528947084, "grad_norm": 1.4765625, "learning_rate": 1.3930010731403004e-05, "loss": 0.4989, "step": 6498 }, { "epoch": 1.108516394329618, "grad_norm": 1.328125, "learning_rate": 1.3928333560755671e-05, "loss": 0.4604, "step": 6499 }, { "epoch": 1.1086895357645277, "grad_norm": 1.4375, "learning_rate": 1.3926656259434199e-05, "loss": 0.5386, "step": 6500 }, { "epoch": 1.1088626771994372, "grad_norm": 1.40625, "learning_rate": 1.3924978827494387e-05, "loss": 0.4333, "step": 6501 }, { "epoch": 1.109035818634347, "grad_norm": 1.453125, "learning_rate": 1.3923301264992029e-05, "loss": 0.5013, "step": 6502 }, { "epoch": 1.1092089600692565, "grad_norm": 1.3203125, "learning_rate": 1.3921623571982932e-05, "loss": 0.4744, "step": 6503 }, { "epoch": 1.1093821015041663, "grad_norm": 1.4765625, "learning_rate": 1.39199457485229e-05, "loss": 0.5223, "step": 6504 }, { "epoch": 1.1095552429390758, "grad_norm": 1.3984375, "learning_rate": 1.3918267794667751e-05, "loss": 0.4609, "step": 6505 }, { "epoch": 1.1097283843739856, "grad_norm": 1.4921875, "learning_rate": 1.3916589710473297e-05, "loss": 0.4871, "step": 6506 }, { "epoch": 1.1099015258088951, "grad_norm": 1.3359375, "learning_rate": 1.391491149599536e-05, "loss": 0.4424, "step": 6507 }, { "epoch": 1.1100746672438049, "grad_norm": 1.375, "learning_rate": 1.3913233151289764e-05, "loss": 0.5959, "step": 6508 }, { "epoch": 1.1102478086787144, "grad_norm": 1.40625, "learning_rate": 1.391155467641234e-05, "loss": 0.4543, "step": 6509 }, { "epoch": 1.1104209501136242, "grad_norm": 1.5, "learning_rate": 1.390987607141892e-05, "loss": 0.4902, "step": 6510 }, { "epoch": 1.1105940915485337, "grad_norm": 1.546875, "learning_rate": 1.3908197336365344e-05, "loss": 0.5159, "step": 6511 }, { "epoch": 1.1107672329834433, "grad_norm": 1.375, "learning_rate": 1.390651847130745e-05, "loss": 0.4795, "step": 6512 }, { "epoch": 1.110940374418353, "grad_norm": 1.46875, "learning_rate": 1.3904839476301091e-05, "loss": 0.4738, "step": 6513 }, { "epoch": 1.1111135158532626, "grad_norm": 1.390625, "learning_rate": 1.3903160351402111e-05, "loss": 0.4558, "step": 6514 }, { "epoch": 1.1112866572881723, "grad_norm": 1.3984375, "learning_rate": 1.3901481096666372e-05, "loss": 0.4311, "step": 6515 }, { "epoch": 1.1114597987230819, "grad_norm": 1.3046875, "learning_rate": 1.389980171214973e-05, "loss": 0.4369, "step": 6516 }, { "epoch": 1.1116329401579916, "grad_norm": 1.2734375, "learning_rate": 1.3898122197908045e-05, "loss": 0.4412, "step": 6517 }, { "epoch": 1.1118060815929012, "grad_norm": 1.3828125, "learning_rate": 1.3896442553997196e-05, "loss": 0.4472, "step": 6518 }, { "epoch": 1.111979223027811, "grad_norm": 1.3515625, "learning_rate": 1.3894762780473043e-05, "loss": 0.4012, "step": 6519 }, { "epoch": 1.1121523644627205, "grad_norm": 1.3671875, "learning_rate": 1.3893082877391475e-05, "loss": 0.4887, "step": 6520 }, { "epoch": 1.1123255058976302, "grad_norm": 1.3125, "learning_rate": 1.3891402844808364e-05, "loss": 0.4719, "step": 6521 }, { "epoch": 1.1124986473325398, "grad_norm": 1.46875, "learning_rate": 1.3889722682779598e-05, "loss": 0.458, "step": 6522 }, { "epoch": 1.1126717887674493, "grad_norm": 1.4375, "learning_rate": 1.3888042391361071e-05, "loss": 0.5477, "step": 6523 }, { "epoch": 1.112844930202359, "grad_norm": 1.515625, "learning_rate": 1.388636197060867e-05, "loss": 0.5453, "step": 6524 }, { "epoch": 1.1130180716372686, "grad_norm": 1.3984375, "learning_rate": 1.3884681420578298e-05, "loss": 0.4438, "step": 6525 }, { "epoch": 1.1131912130721784, "grad_norm": 1.40625, "learning_rate": 1.3883000741325857e-05, "loss": 0.4543, "step": 6526 }, { "epoch": 1.113364354507088, "grad_norm": 1.3515625, "learning_rate": 1.3881319932907256e-05, "loss": 0.4889, "step": 6527 }, { "epoch": 1.1135374959419977, "grad_norm": 1.78125, "learning_rate": 1.3879638995378399e-05, "loss": 0.4827, "step": 6528 }, { "epoch": 1.1137106373769072, "grad_norm": 1.5234375, "learning_rate": 1.3877957928795209e-05, "loss": 0.5758, "step": 6529 }, { "epoch": 1.113883778811817, "grad_norm": 1.4140625, "learning_rate": 1.3876276733213602e-05, "loss": 0.447, "step": 6530 }, { "epoch": 1.1140569202467265, "grad_norm": 1.359375, "learning_rate": 1.3874595408689506e-05, "loss": 0.426, "step": 6531 }, { "epoch": 1.1142300616816363, "grad_norm": 1.421875, "learning_rate": 1.3872913955278848e-05, "loss": 0.4592, "step": 6532 }, { "epoch": 1.1144032031165458, "grad_norm": 1.4609375, "learning_rate": 1.3871232373037558e-05, "loss": 0.4218, "step": 6533 }, { "epoch": 1.1145763445514554, "grad_norm": 1.4140625, "learning_rate": 1.3869550662021573e-05, "loss": 0.4713, "step": 6534 }, { "epoch": 1.1147494859863651, "grad_norm": 1.5234375, "learning_rate": 1.3867868822286838e-05, "loss": 0.5203, "step": 6535 }, { "epoch": 1.1149226274212747, "grad_norm": 1.3203125, "learning_rate": 1.3866186853889296e-05, "loss": 0.4749, "step": 6536 }, { "epoch": 1.1150957688561844, "grad_norm": 1.3828125, "learning_rate": 1.38645047568849e-05, "loss": 0.5112, "step": 6537 }, { "epoch": 1.115268910291094, "grad_norm": 1.4921875, "learning_rate": 1.3862822531329598e-05, "loss": 0.452, "step": 6538 }, { "epoch": 1.1154420517260037, "grad_norm": 1.4140625, "learning_rate": 1.3861140177279355e-05, "loss": 0.4536, "step": 6539 }, { "epoch": 1.1156151931609133, "grad_norm": 1.3359375, "learning_rate": 1.3859457694790131e-05, "loss": 0.4639, "step": 6540 }, { "epoch": 1.115788334595823, "grad_norm": 1.453125, "learning_rate": 1.385777508391789e-05, "loss": 0.485, "step": 6541 }, { "epoch": 1.1159614760307326, "grad_norm": 1.3828125, "learning_rate": 1.3856092344718609e-05, "loss": 0.4149, "step": 6542 }, { "epoch": 1.1161346174656424, "grad_norm": 1.4140625, "learning_rate": 1.3854409477248256e-05, "loss": 0.4754, "step": 6543 }, { "epoch": 1.116307758900552, "grad_norm": 1.390625, "learning_rate": 1.3852726481562821e-05, "loss": 0.4911, "step": 6544 }, { "epoch": 1.1164809003354614, "grad_norm": 1.40625, "learning_rate": 1.385104335771828e-05, "loss": 0.4868, "step": 6545 }, { "epoch": 1.1166540417703712, "grad_norm": 1.4375, "learning_rate": 1.3849360105770622e-05, "loss": 0.4766, "step": 6546 }, { "epoch": 1.1168271832052807, "grad_norm": 1.421875, "learning_rate": 1.3847676725775843e-05, "loss": 0.4949, "step": 6547 }, { "epoch": 1.1170003246401905, "grad_norm": 1.453125, "learning_rate": 1.3845993217789937e-05, "loss": 0.5459, "step": 6548 }, { "epoch": 1.1171734660751, "grad_norm": 1.2421875, "learning_rate": 1.3844309581868909e-05, "loss": 0.489, "step": 6549 }, { "epoch": 1.1173466075100098, "grad_norm": 1.4296875, "learning_rate": 1.3842625818068758e-05, "loss": 0.5052, "step": 6550 }, { "epoch": 1.1175197489449193, "grad_norm": 1.3984375, "learning_rate": 1.3840941926445502e-05, "loss": 0.4948, "step": 6551 }, { "epoch": 1.117692890379829, "grad_norm": 1.5078125, "learning_rate": 1.3839257907055144e-05, "loss": 0.4704, "step": 6552 }, { "epoch": 1.1178660318147386, "grad_norm": 1.640625, "learning_rate": 1.3837573759953712e-05, "loss": 0.436, "step": 6553 }, { "epoch": 1.1180391732496484, "grad_norm": 1.3984375, "learning_rate": 1.3835889485197226e-05, "loss": 0.5204, "step": 6554 }, { "epoch": 1.118212314684558, "grad_norm": 1.4375, "learning_rate": 1.3834205082841705e-05, "loss": 0.5745, "step": 6555 }, { "epoch": 1.1183854561194675, "grad_norm": 1.4140625, "learning_rate": 1.383252055294319e-05, "loss": 0.5329, "step": 6556 }, { "epoch": 1.1185585975543773, "grad_norm": 1.3671875, "learning_rate": 1.383083589555771e-05, "loss": 0.5005, "step": 6557 }, { "epoch": 1.1187317389892868, "grad_norm": 1.4375, "learning_rate": 1.382915111074131e-05, "loss": 0.4866, "step": 6558 }, { "epoch": 1.1189048804241966, "grad_norm": 1.4140625, "learning_rate": 1.3827466198550027e-05, "loss": 0.4365, "step": 6559 }, { "epoch": 1.119078021859106, "grad_norm": 1.328125, "learning_rate": 1.3825781159039911e-05, "loss": 0.4649, "step": 6560 }, { "epoch": 1.1192511632940159, "grad_norm": 1.5, "learning_rate": 1.3824095992267017e-05, "loss": 0.4638, "step": 6561 }, { "epoch": 1.1194243047289254, "grad_norm": 1.5703125, "learning_rate": 1.3822410698287398e-05, "loss": 0.4975, "step": 6562 }, { "epoch": 1.1195974461638352, "grad_norm": 1.71875, "learning_rate": 1.3820725277157115e-05, "loss": 0.5753, "step": 6563 }, { "epoch": 1.1197705875987447, "grad_norm": 1.3359375, "learning_rate": 1.3819039728932234e-05, "loss": 0.4849, "step": 6564 }, { "epoch": 1.1199437290336545, "grad_norm": 1.3046875, "learning_rate": 1.3817354053668824e-05, "loss": 0.4775, "step": 6565 }, { "epoch": 1.120116870468564, "grad_norm": 1.3671875, "learning_rate": 1.3815668251422953e-05, "loss": 0.4611, "step": 6566 }, { "epoch": 1.1202900119034735, "grad_norm": 1.5859375, "learning_rate": 1.3813982322250708e-05, "loss": 0.4396, "step": 6567 }, { "epoch": 1.1204631533383833, "grad_norm": 1.3671875, "learning_rate": 1.381229626620816e-05, "loss": 0.4722, "step": 6568 }, { "epoch": 1.1206362947732929, "grad_norm": 1.3671875, "learning_rate": 1.3810610083351403e-05, "loss": 0.4538, "step": 6569 }, { "epoch": 1.1208094362082026, "grad_norm": 1.3984375, "learning_rate": 1.3808923773736527e-05, "loss": 0.4274, "step": 6570 }, { "epoch": 1.1209825776431122, "grad_norm": 1.5546875, "learning_rate": 1.3807237337419622e-05, "loss": 0.5259, "step": 6571 }, { "epoch": 1.121155719078022, "grad_norm": 1.5, "learning_rate": 1.3805550774456784e-05, "loss": 0.4643, "step": 6572 }, { "epoch": 1.1213288605129315, "grad_norm": 1.3828125, "learning_rate": 1.3803864084904124e-05, "loss": 0.455, "step": 6573 }, { "epoch": 1.1215020019478412, "grad_norm": 1.3359375, "learning_rate": 1.3802177268817742e-05, "loss": 0.4815, "step": 6574 }, { "epoch": 1.1216751433827508, "grad_norm": 1.5078125, "learning_rate": 1.3800490326253754e-05, "loss": 0.5435, "step": 6575 }, { "epoch": 1.1218482848176605, "grad_norm": 1.3828125, "learning_rate": 1.3798803257268272e-05, "loss": 0.4375, "step": 6576 }, { "epoch": 1.12202142625257, "grad_norm": 1.484375, "learning_rate": 1.379711606191742e-05, "loss": 0.4674, "step": 6577 }, { "epoch": 1.1221945676874796, "grad_norm": 1.4453125, "learning_rate": 1.3795428740257312e-05, "loss": 0.446, "step": 6578 }, { "epoch": 1.1223677091223894, "grad_norm": 1.4296875, "learning_rate": 1.3793741292344088e-05, "loss": 0.4335, "step": 6579 }, { "epoch": 1.122540850557299, "grad_norm": 1.46875, "learning_rate": 1.379205371823387e-05, "loss": 0.5465, "step": 6580 }, { "epoch": 1.1227139919922087, "grad_norm": 1.4296875, "learning_rate": 1.3790366017982802e-05, "loss": 0.4691, "step": 6581 }, { "epoch": 1.1228871334271182, "grad_norm": 1.390625, "learning_rate": 1.3788678191647022e-05, "loss": 0.4762, "step": 6582 }, { "epoch": 1.123060274862028, "grad_norm": 1.359375, "learning_rate": 1.3786990239282672e-05, "loss": 0.4562, "step": 6583 }, { "epoch": 1.1232334162969375, "grad_norm": 1.3046875, "learning_rate": 1.3785302160945903e-05, "loss": 0.4805, "step": 6584 }, { "epoch": 1.1234065577318473, "grad_norm": 1.4609375, "learning_rate": 1.378361395669287e-05, "loss": 0.5404, "step": 6585 }, { "epoch": 1.1235796991667568, "grad_norm": 1.4453125, "learning_rate": 1.3781925626579727e-05, "loss": 0.5048, "step": 6586 }, { "epoch": 1.1237528406016666, "grad_norm": 1.328125, "learning_rate": 1.3780237170662638e-05, "loss": 0.4657, "step": 6587 }, { "epoch": 1.1239259820365761, "grad_norm": 1.4765625, "learning_rate": 1.3778548588997767e-05, "loss": 0.4951, "step": 6588 }, { "epoch": 1.1240991234714857, "grad_norm": 1.421875, "learning_rate": 1.3776859881641285e-05, "loss": 0.5635, "step": 6589 }, { "epoch": 1.1242722649063954, "grad_norm": 1.296875, "learning_rate": 1.3775171048649363e-05, "loss": 0.4645, "step": 6590 }, { "epoch": 1.124445406341305, "grad_norm": 1.3359375, "learning_rate": 1.3773482090078185e-05, "loss": 0.4699, "step": 6591 }, { "epoch": 1.1246185477762147, "grad_norm": 1.40625, "learning_rate": 1.3771793005983929e-05, "loss": 0.3945, "step": 6592 }, { "epoch": 1.1247916892111243, "grad_norm": 1.53125, "learning_rate": 1.3770103796422782e-05, "loss": 0.5339, "step": 6593 }, { "epoch": 1.124964830646034, "grad_norm": 1.4921875, "learning_rate": 1.376841446145094e-05, "loss": 0.4353, "step": 6594 }, { "epoch": 1.1251379720809436, "grad_norm": 1.328125, "learning_rate": 1.3766725001124589e-05, "loss": 0.4497, "step": 6595 }, { "epoch": 1.1253111135158533, "grad_norm": 1.375, "learning_rate": 1.3765035415499932e-05, "loss": 0.486, "step": 6596 }, { "epoch": 1.1254842549507629, "grad_norm": 1.4140625, "learning_rate": 1.3763345704633176e-05, "loss": 0.5082, "step": 6597 }, { "epoch": 1.1256573963856726, "grad_norm": 1.328125, "learning_rate": 1.3761655868580521e-05, "loss": 0.4208, "step": 6598 }, { "epoch": 1.1258305378205822, "grad_norm": 1.2890625, "learning_rate": 1.3759965907398186e-05, "loss": 0.4438, "step": 6599 }, { "epoch": 1.1260036792554917, "grad_norm": 1.375, "learning_rate": 1.3758275821142382e-05, "loss": 0.4918, "step": 6600 }, { "epoch": 1.1261768206904015, "grad_norm": 1.4609375, "learning_rate": 1.3756585609869333e-05, "loss": 0.4762, "step": 6601 }, { "epoch": 1.126349962125311, "grad_norm": 1.46875, "learning_rate": 1.3754895273635258e-05, "loss": 0.4728, "step": 6602 }, { "epoch": 1.1265231035602208, "grad_norm": 1.53125, "learning_rate": 1.375320481249639e-05, "loss": 0.5106, "step": 6603 }, { "epoch": 1.1266962449951303, "grad_norm": 1.609375, "learning_rate": 1.3751514226508957e-05, "loss": 0.4132, "step": 6604 }, { "epoch": 1.12686938643004, "grad_norm": 1.4296875, "learning_rate": 1.3749823515729198e-05, "loss": 0.5115, "step": 6605 }, { "epoch": 1.1270425278649496, "grad_norm": 1.4453125, "learning_rate": 1.3748132680213353e-05, "loss": 0.4889, "step": 6606 }, { "epoch": 1.1272156692998594, "grad_norm": 1.40625, "learning_rate": 1.3746441720017667e-05, "loss": 0.5667, "step": 6607 }, { "epoch": 1.127388810734769, "grad_norm": 1.4375, "learning_rate": 1.3744750635198392e-05, "loss": 0.5057, "step": 6608 }, { "epoch": 1.1275619521696787, "grad_norm": 1.3125, "learning_rate": 1.3743059425811775e-05, "loss": 0.4194, "step": 6609 }, { "epoch": 1.1277350936045882, "grad_norm": 1.328125, "learning_rate": 1.3741368091914077e-05, "loss": 0.4406, "step": 6610 }, { "epoch": 1.1279082350394978, "grad_norm": 1.453125, "learning_rate": 1.373967663356156e-05, "loss": 0.5074, "step": 6611 }, { "epoch": 1.1280813764744075, "grad_norm": 1.3359375, "learning_rate": 1.373798505081049e-05, "loss": 0.5288, "step": 6612 }, { "epoch": 1.128254517909317, "grad_norm": 1.3359375, "learning_rate": 1.3736293343717134e-05, "loss": 0.4549, "step": 6613 }, { "epoch": 1.1284276593442268, "grad_norm": 1.3984375, "learning_rate": 1.3734601512337766e-05, "loss": 0.5458, "step": 6614 }, { "epoch": 1.1286008007791364, "grad_norm": 1.359375, "learning_rate": 1.3732909556728666e-05, "loss": 0.4623, "step": 6615 }, { "epoch": 1.1287739422140461, "grad_norm": 1.3046875, "learning_rate": 1.3731217476946116e-05, "loss": 0.452, "step": 6616 }, { "epoch": 1.1289470836489557, "grad_norm": 1.4453125, "learning_rate": 1.3729525273046398e-05, "loss": 0.4699, "step": 6617 }, { "epoch": 1.1291202250838654, "grad_norm": 1.484375, "learning_rate": 1.3727832945085808e-05, "loss": 0.4939, "step": 6618 }, { "epoch": 1.129293366518775, "grad_norm": 1.34375, "learning_rate": 1.3726140493120639e-05, "loss": 0.5384, "step": 6619 }, { "epoch": 1.1294665079536848, "grad_norm": 1.296875, "learning_rate": 1.3724447917207188e-05, "loss": 0.5131, "step": 6620 }, { "epoch": 1.1296396493885943, "grad_norm": 1.4296875, "learning_rate": 1.3722755217401757e-05, "loss": 0.5142, "step": 6621 }, { "epoch": 1.1298127908235038, "grad_norm": 1.6484375, "learning_rate": 1.3721062393760656e-05, "loss": 0.5074, "step": 6622 }, { "epoch": 1.1299859322584136, "grad_norm": 1.3515625, "learning_rate": 1.3719369446340194e-05, "loss": 0.4255, "step": 6623 }, { "epoch": 1.1301590736933234, "grad_norm": 1.34375, "learning_rate": 1.3717676375196685e-05, "loss": 0.4738, "step": 6624 }, { "epoch": 1.130332215128233, "grad_norm": 1.46875, "learning_rate": 1.3715983180386455e-05, "loss": 0.4658, "step": 6625 }, { "epoch": 1.1305053565631424, "grad_norm": 1.484375, "learning_rate": 1.3714289861965816e-05, "loss": 0.4614, "step": 6626 }, { "epoch": 1.1306784979980522, "grad_norm": 1.3671875, "learning_rate": 1.3712596419991103e-05, "loss": 0.5058, "step": 6627 }, { "epoch": 1.1308516394329617, "grad_norm": 1.4140625, "learning_rate": 1.3710902854518647e-05, "loss": 0.4458, "step": 6628 }, { "epoch": 1.1310247808678715, "grad_norm": 1.40625, "learning_rate": 1.3709209165604782e-05, "loss": 0.4661, "step": 6629 }, { "epoch": 1.131197922302781, "grad_norm": 1.3828125, "learning_rate": 1.3707515353305846e-05, "loss": 0.5167, "step": 6630 }, { "epoch": 1.1313710637376908, "grad_norm": 1.28125, "learning_rate": 1.3705821417678186e-05, "loss": 0.4837, "step": 6631 }, { "epoch": 1.1315442051726003, "grad_norm": 1.40625, "learning_rate": 1.3704127358778152e-05, "loss": 0.5069, "step": 6632 }, { "epoch": 1.1317173466075099, "grad_norm": 1.8515625, "learning_rate": 1.3702433176662089e-05, "loss": 0.6029, "step": 6633 }, { "epoch": 1.1318904880424197, "grad_norm": 1.5625, "learning_rate": 1.370073887138636e-05, "loss": 0.5462, "step": 6634 }, { "epoch": 1.1320636294773294, "grad_norm": 1.3046875, "learning_rate": 1.3699044443007323e-05, "loss": 0.4618, "step": 6635 }, { "epoch": 1.132236770912239, "grad_norm": 1.4453125, "learning_rate": 1.3697349891581342e-05, "loss": 0.6026, "step": 6636 }, { "epoch": 1.1324099123471485, "grad_norm": 1.3828125, "learning_rate": 1.3695655217164786e-05, "loss": 0.4207, "step": 6637 }, { "epoch": 1.1325830537820583, "grad_norm": 1.4609375, "learning_rate": 1.3693960419814023e-05, "loss": 0.4208, "step": 6638 }, { "epoch": 1.1327561952169678, "grad_norm": 1.34375, "learning_rate": 1.3692265499585438e-05, "loss": 0.4279, "step": 6639 }, { "epoch": 1.1329293366518776, "grad_norm": 1.3359375, "learning_rate": 1.3690570456535404e-05, "loss": 0.5194, "step": 6640 }, { "epoch": 1.133102478086787, "grad_norm": 1.34375, "learning_rate": 1.3688875290720312e-05, "loss": 0.4487, "step": 6641 }, { "epoch": 1.1332756195216969, "grad_norm": 1.359375, "learning_rate": 1.3687180002196545e-05, "loss": 0.4637, "step": 6642 }, { "epoch": 1.1334487609566064, "grad_norm": 1.46875, "learning_rate": 1.36854845910205e-05, "loss": 0.516, "step": 6643 }, { "epoch": 1.133621902391516, "grad_norm": 1.703125, "learning_rate": 1.3683789057248571e-05, "loss": 0.4839, "step": 6644 }, { "epoch": 1.1337950438264257, "grad_norm": 1.5859375, "learning_rate": 1.3682093400937162e-05, "loss": 0.574, "step": 6645 }, { "epoch": 1.1339681852613355, "grad_norm": 1.3203125, "learning_rate": 1.368039762214268e-05, "loss": 0.44, "step": 6646 }, { "epoch": 1.134141326696245, "grad_norm": 1.4921875, "learning_rate": 1.367870172092153e-05, "loss": 0.5219, "step": 6647 }, { "epoch": 1.1343144681311546, "grad_norm": 1.3671875, "learning_rate": 1.3677005697330126e-05, "loss": 0.5057, "step": 6648 }, { "epoch": 1.1344876095660643, "grad_norm": 1.2734375, "learning_rate": 1.3675309551424887e-05, "loss": 0.4795, "step": 6649 }, { "epoch": 1.1346607510009739, "grad_norm": 1.40625, "learning_rate": 1.3673613283262232e-05, "loss": 0.4111, "step": 6650 }, { "epoch": 1.1348338924358836, "grad_norm": 1.484375, "learning_rate": 1.3671916892898591e-05, "loss": 0.5506, "step": 6651 }, { "epoch": 1.1350070338707932, "grad_norm": 1.453125, "learning_rate": 1.3670220380390389e-05, "loss": 0.5041, "step": 6652 }, { "epoch": 1.135180175305703, "grad_norm": 1.453125, "learning_rate": 1.3668523745794064e-05, "loss": 0.4424, "step": 6653 }, { "epoch": 1.1353533167406125, "grad_norm": 1.3828125, "learning_rate": 1.3666826989166049e-05, "loss": 0.4995, "step": 6654 }, { "epoch": 1.1355264581755222, "grad_norm": 1.3125, "learning_rate": 1.3665130110562788e-05, "loss": 0.3915, "step": 6655 }, { "epoch": 1.1356995996104318, "grad_norm": 1.4765625, "learning_rate": 1.3663433110040727e-05, "loss": 0.5206, "step": 6656 }, { "epoch": 1.1358727410453415, "grad_norm": 1.484375, "learning_rate": 1.3661735987656318e-05, "loss": 0.495, "step": 6657 }, { "epoch": 1.136045882480251, "grad_norm": 1.609375, "learning_rate": 1.366003874346601e-05, "loss": 0.7055, "step": 6658 }, { "epoch": 1.1362190239151606, "grad_norm": 1.4765625, "learning_rate": 1.3658341377526266e-05, "loss": 0.5125, "step": 6659 }, { "epoch": 1.1363921653500704, "grad_norm": 1.7109375, "learning_rate": 1.3656643889893544e-05, "loss": 0.4835, "step": 6660 }, { "epoch": 1.13656530678498, "grad_norm": 1.359375, "learning_rate": 1.3654946280624312e-05, "loss": 0.4637, "step": 6661 }, { "epoch": 1.1367384482198897, "grad_norm": 1.3515625, "learning_rate": 1.3653248549775042e-05, "loss": 0.4634, "step": 6662 }, { "epoch": 1.1369115896547992, "grad_norm": 1.40625, "learning_rate": 1.3651550697402207e-05, "loss": 0.4819, "step": 6663 }, { "epoch": 1.137084731089709, "grad_norm": 1.3828125, "learning_rate": 1.3649852723562283e-05, "loss": 0.475, "step": 6664 }, { "epoch": 1.1372578725246185, "grad_norm": 1.375, "learning_rate": 1.3648154628311754e-05, "loss": 0.4316, "step": 6665 }, { "epoch": 1.1374310139595283, "grad_norm": 1.421875, "learning_rate": 1.3646456411707105e-05, "loss": 0.4351, "step": 6666 }, { "epoch": 1.1376041553944378, "grad_norm": 1.4609375, "learning_rate": 1.3644758073804832e-05, "loss": 0.4364, "step": 6667 }, { "epoch": 1.1377772968293476, "grad_norm": 1.390625, "learning_rate": 1.3643059614661421e-05, "loss": 0.5158, "step": 6668 }, { "epoch": 1.1379504382642571, "grad_norm": 1.34375, "learning_rate": 1.3641361034333375e-05, "loss": 0.409, "step": 6669 }, { "epoch": 1.1381235796991667, "grad_norm": 1.5, "learning_rate": 1.36396623328772e-05, "loss": 0.4626, "step": 6670 }, { "epoch": 1.1382967211340764, "grad_norm": 1.40625, "learning_rate": 1.363796351034939e-05, "loss": 0.5085, "step": 6671 }, { "epoch": 1.138469862568986, "grad_norm": 1.375, "learning_rate": 1.3636264566806473e-05, "loss": 0.5383, "step": 6672 }, { "epoch": 1.1386430040038957, "grad_norm": 1.359375, "learning_rate": 1.3634565502304948e-05, "loss": 0.4501, "step": 6673 }, { "epoch": 1.1388161454388053, "grad_norm": 1.5234375, "learning_rate": 1.3632866316901341e-05, "loss": 0.5197, "step": 6674 }, { "epoch": 1.138989286873715, "grad_norm": 1.5703125, "learning_rate": 1.3631167010652177e-05, "loss": 0.4832, "step": 6675 }, { "epoch": 1.1391624283086246, "grad_norm": 1.34375, "learning_rate": 1.3629467583613976e-05, "loss": 0.4359, "step": 6676 }, { "epoch": 1.1393355697435343, "grad_norm": 1.2421875, "learning_rate": 1.3627768035843274e-05, "loss": 0.4315, "step": 6677 }, { "epoch": 1.1395087111784439, "grad_norm": 1.34375, "learning_rate": 1.3626068367396603e-05, "loss": 0.4354, "step": 6678 }, { "epoch": 1.1396818526133536, "grad_norm": 1.6640625, "learning_rate": 1.3624368578330502e-05, "loss": 0.4919, "step": 6679 }, { "epoch": 1.1398549940482632, "grad_norm": 1.3828125, "learning_rate": 1.3622668668701514e-05, "loss": 0.5343, "step": 6680 }, { "epoch": 1.1400281354831727, "grad_norm": 1.3203125, "learning_rate": 1.3620968638566185e-05, "loss": 0.5053, "step": 6681 }, { "epoch": 1.1402012769180825, "grad_norm": 1.421875, "learning_rate": 1.3619268487981066e-05, "loss": 0.4932, "step": 6682 }, { "epoch": 1.140374418352992, "grad_norm": 1.3828125, "learning_rate": 1.3617568217002713e-05, "loss": 0.4312, "step": 6683 }, { "epoch": 1.1405475597879018, "grad_norm": 1.3359375, "learning_rate": 1.3615867825687682e-05, "loss": 0.4171, "step": 6684 }, { "epoch": 1.1407207012228113, "grad_norm": 1.328125, "learning_rate": 1.3614167314092538e-05, "loss": 0.4114, "step": 6685 }, { "epoch": 1.140893842657721, "grad_norm": 1.40625, "learning_rate": 1.3612466682273845e-05, "loss": 0.4601, "step": 6686 }, { "epoch": 1.1410669840926306, "grad_norm": 1.421875, "learning_rate": 1.361076593028818e-05, "loss": 0.5493, "step": 6687 }, { "epoch": 1.1412401255275404, "grad_norm": 1.546875, "learning_rate": 1.360906505819211e-05, "loss": 0.5338, "step": 6688 }, { "epoch": 1.14141326696245, "grad_norm": 1.453125, "learning_rate": 1.3607364066042221e-05, "loss": 0.4663, "step": 6689 }, { "epoch": 1.1415864083973597, "grad_norm": 1.3984375, "learning_rate": 1.3605662953895088e-05, "loss": 0.4888, "step": 6690 }, { "epoch": 1.1417595498322692, "grad_norm": 1.453125, "learning_rate": 1.3603961721807304e-05, "loss": 0.4877, "step": 6691 }, { "epoch": 1.1419326912671788, "grad_norm": 1.3359375, "learning_rate": 1.3602260369835453e-05, "loss": 0.513, "step": 6692 }, { "epoch": 1.1421058327020885, "grad_norm": 1.3671875, "learning_rate": 1.360055889803614e-05, "loss": 0.4302, "step": 6693 }, { "epoch": 1.142278974136998, "grad_norm": 1.3125, "learning_rate": 1.3598857306465953e-05, "loss": 0.4298, "step": 6694 }, { "epoch": 1.1424521155719078, "grad_norm": 1.3359375, "learning_rate": 1.35971555951815e-05, "loss": 0.4154, "step": 6695 }, { "epoch": 1.1426252570068174, "grad_norm": 1.34375, "learning_rate": 1.3595453764239389e-05, "loss": 0.4407, "step": 6696 }, { "epoch": 1.1427983984417271, "grad_norm": 1.359375, "learning_rate": 1.3593751813696225e-05, "loss": 0.4644, "step": 6697 }, { "epoch": 1.1429715398766367, "grad_norm": 1.296875, "learning_rate": 1.3592049743608626e-05, "loss": 0.4318, "step": 6698 }, { "epoch": 1.1431446813115465, "grad_norm": 1.515625, "learning_rate": 1.3590347554033212e-05, "loss": 0.4724, "step": 6699 }, { "epoch": 1.143317822746456, "grad_norm": 1.59375, "learning_rate": 1.3588645245026603e-05, "loss": 0.5475, "step": 6700 }, { "epoch": 1.1434909641813658, "grad_norm": 1.515625, "learning_rate": 1.3586942816645427e-05, "loss": 0.418, "step": 6701 }, { "epoch": 1.1436641056162753, "grad_norm": 1.5, "learning_rate": 1.3585240268946316e-05, "loss": 0.5308, "step": 6702 }, { "epoch": 1.1438372470511848, "grad_norm": 1.3359375, "learning_rate": 1.3583537601985898e-05, "loss": 0.4794, "step": 6703 }, { "epoch": 1.1440103884860946, "grad_norm": 1.34375, "learning_rate": 1.3581834815820817e-05, "loss": 0.4979, "step": 6704 }, { "epoch": 1.1441835299210041, "grad_norm": 1.34375, "learning_rate": 1.358013191050772e-05, "loss": 0.4826, "step": 6705 }, { "epoch": 1.144356671355914, "grad_norm": 1.59375, "learning_rate": 1.3578428886103238e-05, "loss": 0.6136, "step": 6706 }, { "epoch": 1.1445298127908234, "grad_norm": 1.6953125, "learning_rate": 1.3576725742664036e-05, "loss": 0.5054, "step": 6707 }, { "epoch": 1.1447029542257332, "grad_norm": 1.328125, "learning_rate": 1.3575022480246764e-05, "loss": 0.4256, "step": 6708 }, { "epoch": 1.1448760956606427, "grad_norm": 1.328125, "learning_rate": 1.3573319098908076e-05, "loss": 0.4579, "step": 6709 }, { "epoch": 1.1450492370955525, "grad_norm": 1.3984375, "learning_rate": 1.3571615598704639e-05, "loss": 0.502, "step": 6710 }, { "epoch": 1.145222378530462, "grad_norm": 1.3828125, "learning_rate": 1.3569911979693118e-05, "loss": 0.4726, "step": 6711 }, { "epoch": 1.1453955199653718, "grad_norm": 1.4765625, "learning_rate": 1.356820824193018e-05, "loss": 0.482, "step": 6712 }, { "epoch": 1.1455686614002814, "grad_norm": 1.40625, "learning_rate": 1.3566504385472506e-05, "loss": 0.4387, "step": 6713 }, { "epoch": 1.145741802835191, "grad_norm": 1.3359375, "learning_rate": 1.3564800410376764e-05, "loss": 0.4702, "step": 6714 }, { "epoch": 1.1459149442701007, "grad_norm": 1.34375, "learning_rate": 1.3563096316699644e-05, "loss": 0.4946, "step": 6715 }, { "epoch": 1.1460880857050102, "grad_norm": 1.3984375, "learning_rate": 1.356139210449783e-05, "loss": 0.4732, "step": 6716 }, { "epoch": 1.14626122713992, "grad_norm": 1.4296875, "learning_rate": 1.3559687773828012e-05, "loss": 0.4644, "step": 6717 }, { "epoch": 1.1464343685748295, "grad_norm": 1.4453125, "learning_rate": 1.3557983324746882e-05, "loss": 0.4954, "step": 6718 }, { "epoch": 1.1466075100097393, "grad_norm": 1.484375, "learning_rate": 1.3556278757311137e-05, "loss": 0.4862, "step": 6719 }, { "epoch": 1.1467806514446488, "grad_norm": 1.453125, "learning_rate": 1.3554574071577482e-05, "loss": 0.427, "step": 6720 }, { "epoch": 1.1469537928795586, "grad_norm": 1.46875, "learning_rate": 1.355286926760262e-05, "loss": 0.5006, "step": 6721 }, { "epoch": 1.147126934314468, "grad_norm": 1.421875, "learning_rate": 1.3551164345443261e-05, "loss": 0.4468, "step": 6722 }, { "epoch": 1.1473000757493779, "grad_norm": 1.3671875, "learning_rate": 1.3549459305156119e-05, "loss": 0.4498, "step": 6723 }, { "epoch": 1.1474732171842874, "grad_norm": 1.4375, "learning_rate": 1.3547754146797911e-05, "loss": 0.5067, "step": 6724 }, { "epoch": 1.147646358619197, "grad_norm": 1.375, "learning_rate": 1.3546048870425356e-05, "loss": 0.4958, "step": 6725 }, { "epoch": 1.1478195000541067, "grad_norm": 1.34375, "learning_rate": 1.3544343476095186e-05, "loss": 0.4676, "step": 6726 }, { "epoch": 1.1479926414890163, "grad_norm": 1.3046875, "learning_rate": 1.3542637963864123e-05, "loss": 0.4419, "step": 6727 }, { "epoch": 1.148165782923926, "grad_norm": 1.4921875, "learning_rate": 1.3540932333788903e-05, "loss": 0.5086, "step": 6728 }, { "epoch": 1.1483389243588356, "grad_norm": 1.4921875, "learning_rate": 1.3539226585926261e-05, "loss": 0.3991, "step": 6729 }, { "epoch": 1.1485120657937453, "grad_norm": 1.3046875, "learning_rate": 1.3537520720332943e-05, "loss": 0.4756, "step": 6730 }, { "epoch": 1.1486852072286549, "grad_norm": 1.578125, "learning_rate": 1.3535814737065687e-05, "loss": 0.5225, "step": 6731 }, { "epoch": 1.1488583486635646, "grad_norm": 1.3671875, "learning_rate": 1.3534108636181244e-05, "loss": 0.4789, "step": 6732 }, { "epoch": 1.1490314900984742, "grad_norm": 1.3984375, "learning_rate": 1.3532402417736371e-05, "loss": 0.5472, "step": 6733 }, { "epoch": 1.149204631533384, "grad_norm": 1.3515625, "learning_rate": 1.3530696081787822e-05, "loss": 0.4685, "step": 6734 }, { "epoch": 1.1493777729682935, "grad_norm": 1.4140625, "learning_rate": 1.3528989628392353e-05, "loss": 0.4836, "step": 6735 }, { "epoch": 1.149550914403203, "grad_norm": 1.2890625, "learning_rate": 1.3527283057606733e-05, "loss": 0.4345, "step": 6736 }, { "epoch": 1.1497240558381128, "grad_norm": 1.3359375, "learning_rate": 1.3525576369487729e-05, "loss": 0.4381, "step": 6737 }, { "epoch": 1.1498971972730223, "grad_norm": 1.53125, "learning_rate": 1.3523869564092116e-05, "loss": 0.4755, "step": 6738 }, { "epoch": 1.150070338707932, "grad_norm": 1.375, "learning_rate": 1.3522162641476665e-05, "loss": 0.5019, "step": 6739 }, { "epoch": 1.1502434801428416, "grad_norm": 1.40625, "learning_rate": 1.3520455601698159e-05, "loss": 0.4861, "step": 6740 }, { "epoch": 1.1504166215777514, "grad_norm": 1.390625, "learning_rate": 1.3518748444813384e-05, "loss": 0.4488, "step": 6741 }, { "epoch": 1.150589763012661, "grad_norm": 1.28125, "learning_rate": 1.3517041170879122e-05, "loss": 0.3938, "step": 6742 }, { "epoch": 1.1507629044475707, "grad_norm": 1.484375, "learning_rate": 1.3515333779952169e-05, "loss": 0.4536, "step": 6743 }, { "epoch": 1.1509360458824802, "grad_norm": 1.5234375, "learning_rate": 1.3513626272089317e-05, "loss": 0.4971, "step": 6744 }, { "epoch": 1.15110918731739, "grad_norm": 1.421875, "learning_rate": 1.3511918647347369e-05, "loss": 0.4993, "step": 6745 }, { "epoch": 1.1512823287522995, "grad_norm": 1.40625, "learning_rate": 1.3510210905783128e-05, "loss": 0.4381, "step": 6746 }, { "epoch": 1.151455470187209, "grad_norm": 1.3984375, "learning_rate": 1.3508503047453396e-05, "loss": 0.4715, "step": 6747 }, { "epoch": 1.1516286116221188, "grad_norm": 1.40625, "learning_rate": 1.3506795072414995e-05, "loss": 0.4825, "step": 6748 }, { "epoch": 1.1518017530570284, "grad_norm": 1.359375, "learning_rate": 1.3505086980724728e-05, "loss": 0.4245, "step": 6749 }, { "epoch": 1.1519748944919381, "grad_norm": 1.390625, "learning_rate": 1.3503378772439421e-05, "loss": 0.4678, "step": 6750 }, { "epoch": 1.1521480359268477, "grad_norm": 1.421875, "learning_rate": 1.3501670447615897e-05, "loss": 0.5197, "step": 6751 }, { "epoch": 1.1523211773617574, "grad_norm": 1.3671875, "learning_rate": 1.3499962006310975e-05, "loss": 0.468, "step": 6752 }, { "epoch": 1.152494318796667, "grad_norm": 1.375, "learning_rate": 1.3498253448581497e-05, "loss": 0.424, "step": 6753 }, { "epoch": 1.1526674602315767, "grad_norm": 1.4375, "learning_rate": 1.3496544774484288e-05, "loss": 0.492, "step": 6754 }, { "epoch": 1.1528406016664863, "grad_norm": 1.4140625, "learning_rate": 1.3494835984076194e-05, "loss": 0.4908, "step": 6755 }, { "epoch": 1.153013743101396, "grad_norm": 1.359375, "learning_rate": 1.3493127077414046e-05, "loss": 0.4647, "step": 6756 }, { "epoch": 1.1531868845363056, "grad_norm": 1.3828125, "learning_rate": 1.3491418054554699e-05, "loss": 0.5356, "step": 6757 }, { "epoch": 1.1533600259712151, "grad_norm": 1.390625, "learning_rate": 1.3489708915555001e-05, "loss": 0.4658, "step": 6758 }, { "epoch": 1.1535331674061249, "grad_norm": 1.3359375, "learning_rate": 1.3487999660471804e-05, "loss": 0.4323, "step": 6759 }, { "epoch": 1.1537063088410346, "grad_norm": 1.3125, "learning_rate": 1.348629028936197e-05, "loss": 0.467, "step": 6760 }, { "epoch": 1.1538794502759442, "grad_norm": 1.375, "learning_rate": 1.3484580802282355e-05, "loss": 0.4315, "step": 6761 }, { "epoch": 1.1540525917108537, "grad_norm": 1.5078125, "learning_rate": 1.3482871199289824e-05, "loss": 0.5213, "step": 6762 }, { "epoch": 1.1542257331457635, "grad_norm": 1.3828125, "learning_rate": 1.3481161480441252e-05, "loss": 0.4531, "step": 6763 }, { "epoch": 1.154398874580673, "grad_norm": 1.5546875, "learning_rate": 1.3479451645793504e-05, "loss": 0.4935, "step": 6764 }, { "epoch": 1.1545720160155828, "grad_norm": 1.53125, "learning_rate": 1.3477741695403467e-05, "loss": 0.4922, "step": 6765 }, { "epoch": 1.1547451574504923, "grad_norm": 1.3671875, "learning_rate": 1.3476031629328013e-05, "loss": 0.4286, "step": 6766 }, { "epoch": 1.154918298885402, "grad_norm": 1.328125, "learning_rate": 1.347432144762403e-05, "loss": 0.4492, "step": 6767 }, { "epoch": 1.1550914403203116, "grad_norm": 1.4140625, "learning_rate": 1.3472611150348405e-05, "loss": 0.5137, "step": 6768 }, { "epoch": 1.1552645817552212, "grad_norm": 1.359375, "learning_rate": 1.3470900737558032e-05, "loss": 0.503, "step": 6769 }, { "epoch": 1.155437723190131, "grad_norm": 1.3828125, "learning_rate": 1.3469190209309806e-05, "loss": 0.4465, "step": 6770 }, { "epoch": 1.1556108646250407, "grad_norm": 1.3515625, "learning_rate": 1.3467479565660625e-05, "loss": 0.4423, "step": 6771 }, { "epoch": 1.1557840060599502, "grad_norm": 1.34375, "learning_rate": 1.3465768806667399e-05, "loss": 0.4845, "step": 6772 }, { "epoch": 1.1559571474948598, "grad_norm": 1.4296875, "learning_rate": 1.3464057932387029e-05, "loss": 0.4847, "step": 6773 }, { "epoch": 1.1561302889297695, "grad_norm": 1.3671875, "learning_rate": 1.346234694287643e-05, "loss": 0.4632, "step": 6774 }, { "epoch": 1.156303430364679, "grad_norm": 1.4140625, "learning_rate": 1.3460635838192512e-05, "loss": 0.5148, "step": 6775 }, { "epoch": 1.1564765717995888, "grad_norm": 1.421875, "learning_rate": 1.3458924618392202e-05, "loss": 0.4156, "step": 6776 }, { "epoch": 1.1566497132344984, "grad_norm": 1.3515625, "learning_rate": 1.345721328353242e-05, "loss": 0.5141, "step": 6777 }, { "epoch": 1.1568228546694082, "grad_norm": 1.4296875, "learning_rate": 1.3455501833670089e-05, "loss": 0.4897, "step": 6778 }, { "epoch": 1.1569959961043177, "grad_norm": 1.3671875, "learning_rate": 1.3453790268862141e-05, "loss": 0.4442, "step": 6779 }, { "epoch": 1.1571691375392272, "grad_norm": 1.34375, "learning_rate": 1.3452078589165516e-05, "loss": 0.4306, "step": 6780 }, { "epoch": 1.157342278974137, "grad_norm": 1.4609375, "learning_rate": 1.3450366794637147e-05, "loss": 0.5051, "step": 6781 }, { "epoch": 1.1575154204090468, "grad_norm": 1.5, "learning_rate": 1.3448654885333974e-05, "loss": 0.4788, "step": 6782 }, { "epoch": 1.1576885618439563, "grad_norm": 1.375, "learning_rate": 1.3446942861312948e-05, "loss": 0.4471, "step": 6783 }, { "epoch": 1.1578617032788658, "grad_norm": 1.2890625, "learning_rate": 1.3445230722631016e-05, "loss": 0.4483, "step": 6784 }, { "epoch": 1.1580348447137756, "grad_norm": 1.3671875, "learning_rate": 1.3443518469345132e-05, "loss": 0.4676, "step": 6785 }, { "epoch": 1.1582079861486851, "grad_norm": 1.3125, "learning_rate": 1.3441806101512254e-05, "loss": 0.4514, "step": 6786 }, { "epoch": 1.158381127583595, "grad_norm": 1.4453125, "learning_rate": 1.344009361918934e-05, "loss": 0.4673, "step": 6787 }, { "epoch": 1.1585542690185044, "grad_norm": 1.4921875, "learning_rate": 1.3438381022433357e-05, "loss": 0.4798, "step": 6788 }, { "epoch": 1.1587274104534142, "grad_norm": 1.390625, "learning_rate": 1.3436668311301278e-05, "loss": 0.44, "step": 6789 }, { "epoch": 1.1589005518883237, "grad_norm": 1.359375, "learning_rate": 1.3434955485850068e-05, "loss": 0.4101, "step": 6790 }, { "epoch": 1.1590736933232335, "grad_norm": 1.4453125, "learning_rate": 1.3433242546136709e-05, "loss": 0.5043, "step": 6791 }, { "epoch": 1.159246834758143, "grad_norm": 1.546875, "learning_rate": 1.3431529492218175e-05, "loss": 0.4614, "step": 6792 }, { "epoch": 1.1594199761930528, "grad_norm": 1.4921875, "learning_rate": 1.3429816324151459e-05, "loss": 0.4951, "step": 6793 }, { "epoch": 1.1595931176279624, "grad_norm": 1.328125, "learning_rate": 1.3428103041993541e-05, "loss": 0.4375, "step": 6794 }, { "epoch": 1.159766259062872, "grad_norm": 1.359375, "learning_rate": 1.3426389645801415e-05, "loss": 0.448, "step": 6795 }, { "epoch": 1.1599394004977817, "grad_norm": 1.3125, "learning_rate": 1.3424676135632075e-05, "loss": 0.4627, "step": 6796 }, { "epoch": 1.1601125419326912, "grad_norm": 1.46875, "learning_rate": 1.3422962511542524e-05, "loss": 0.4508, "step": 6797 }, { "epoch": 1.160285683367601, "grad_norm": 1.4921875, "learning_rate": 1.342124877358976e-05, "loss": 0.4995, "step": 6798 }, { "epoch": 1.1604588248025105, "grad_norm": 1.390625, "learning_rate": 1.3419534921830795e-05, "loss": 0.4743, "step": 6799 }, { "epoch": 1.1606319662374203, "grad_norm": 1.484375, "learning_rate": 1.3417820956322634e-05, "loss": 0.5036, "step": 6800 }, { "epoch": 1.1608051076723298, "grad_norm": 1.625, "learning_rate": 1.3416106877122292e-05, "loss": 0.5699, "step": 6801 }, { "epoch": 1.1609782491072396, "grad_norm": 1.40625, "learning_rate": 1.3414392684286793e-05, "loss": 0.4935, "step": 6802 }, { "epoch": 1.161151390542149, "grad_norm": 1.3671875, "learning_rate": 1.3412678377873152e-05, "loss": 0.4581, "step": 6803 }, { "epoch": 1.1613245319770589, "grad_norm": 1.40625, "learning_rate": 1.34109639579384e-05, "loss": 0.4579, "step": 6804 }, { "epoch": 1.1614976734119684, "grad_norm": 1.34375, "learning_rate": 1.3409249424539562e-05, "loss": 0.5448, "step": 6805 }, { "epoch": 1.161670814846878, "grad_norm": 1.4453125, "learning_rate": 1.3407534777733666e-05, "loss": 0.4766, "step": 6806 }, { "epoch": 1.1618439562817877, "grad_norm": 1.5390625, "learning_rate": 1.3405820017577764e-05, "loss": 0.5314, "step": 6807 }, { "epoch": 1.1620170977166973, "grad_norm": 1.40625, "learning_rate": 1.3404105144128885e-05, "loss": 0.4799, "step": 6808 }, { "epoch": 1.162190239151607, "grad_norm": 1.3046875, "learning_rate": 1.3402390157444077e-05, "loss": 0.4588, "step": 6809 }, { "epoch": 1.1623633805865166, "grad_norm": 1.4453125, "learning_rate": 1.3400675057580389e-05, "loss": 0.5416, "step": 6810 }, { "epoch": 1.1625365220214263, "grad_norm": 1.3125, "learning_rate": 1.3398959844594867e-05, "loss": 0.4018, "step": 6811 }, { "epoch": 1.1627096634563359, "grad_norm": 1.84375, "learning_rate": 1.3397244518544576e-05, "loss": 0.4718, "step": 6812 }, { "epoch": 1.1628828048912456, "grad_norm": 1.4140625, "learning_rate": 1.3395529079486571e-05, "loss": 0.4836, "step": 6813 }, { "epoch": 1.1630559463261552, "grad_norm": 1.2421875, "learning_rate": 1.3393813527477914e-05, "loss": 0.4661, "step": 6814 }, { "epoch": 1.163229087761065, "grad_norm": 1.4375, "learning_rate": 1.3392097862575675e-05, "loss": 0.4387, "step": 6815 }, { "epoch": 1.1634022291959745, "grad_norm": 1.546875, "learning_rate": 1.3390382084836921e-05, "loss": 0.5271, "step": 6816 }, { "epoch": 1.163575370630884, "grad_norm": 1.3515625, "learning_rate": 1.338866619431873e-05, "loss": 0.4356, "step": 6817 }, { "epoch": 1.1637485120657938, "grad_norm": 1.359375, "learning_rate": 1.3386950191078177e-05, "loss": 0.4727, "step": 6818 }, { "epoch": 1.1639216535007033, "grad_norm": 1.3125, "learning_rate": 1.338523407517235e-05, "loss": 0.4665, "step": 6819 }, { "epoch": 1.164094794935613, "grad_norm": 1.390625, "learning_rate": 1.3383517846658326e-05, "loss": 0.4958, "step": 6820 }, { "epoch": 1.1642679363705226, "grad_norm": 1.328125, "learning_rate": 1.3381801505593201e-05, "loss": 0.4366, "step": 6821 }, { "epoch": 1.1644410778054324, "grad_norm": 1.328125, "learning_rate": 1.3380085052034066e-05, "loss": 0.4109, "step": 6822 }, { "epoch": 1.164614219240342, "grad_norm": 1.359375, "learning_rate": 1.3378368486038018e-05, "loss": 0.4934, "step": 6823 }, { "epoch": 1.1647873606752517, "grad_norm": 1.4296875, "learning_rate": 1.3376651807662162e-05, "loss": 0.445, "step": 6824 }, { "epoch": 1.1649605021101612, "grad_norm": 1.453125, "learning_rate": 1.3374935016963595e-05, "loss": 0.4529, "step": 6825 }, { "epoch": 1.165133643545071, "grad_norm": 1.40625, "learning_rate": 1.337321811399943e-05, "loss": 0.4094, "step": 6826 }, { "epoch": 1.1653067849799805, "grad_norm": 1.2734375, "learning_rate": 1.337150109882678e-05, "loss": 0.4538, "step": 6827 }, { "epoch": 1.16547992641489, "grad_norm": 1.4609375, "learning_rate": 1.3369783971502754e-05, "loss": 0.4917, "step": 6828 }, { "epoch": 1.1656530678497998, "grad_norm": 1.546875, "learning_rate": 1.3368066732084481e-05, "loss": 0.4832, "step": 6829 }, { "epoch": 1.1658262092847094, "grad_norm": 1.4140625, "learning_rate": 1.3366349380629076e-05, "loss": 0.5728, "step": 6830 }, { "epoch": 1.1659993507196191, "grad_norm": 1.34375, "learning_rate": 1.3364631917193671e-05, "loss": 0.4744, "step": 6831 }, { "epoch": 1.1661724921545287, "grad_norm": 1.3125, "learning_rate": 1.3362914341835393e-05, "loss": 0.4397, "step": 6832 }, { "epoch": 1.1663456335894384, "grad_norm": 1.46875, "learning_rate": 1.3361196654611379e-05, "loss": 0.5487, "step": 6833 }, { "epoch": 1.166518775024348, "grad_norm": 1.40625, "learning_rate": 1.3359478855578764e-05, "loss": 0.4917, "step": 6834 }, { "epoch": 1.1666919164592577, "grad_norm": 1.4921875, "learning_rate": 1.3357760944794693e-05, "loss": 0.5384, "step": 6835 }, { "epoch": 1.1668650578941673, "grad_norm": 1.3515625, "learning_rate": 1.3356042922316313e-05, "loss": 0.4691, "step": 6836 }, { "epoch": 1.167038199329077, "grad_norm": 1.421875, "learning_rate": 1.3354324788200765e-05, "loss": 0.4879, "step": 6837 }, { "epoch": 1.1672113407639866, "grad_norm": 1.296875, "learning_rate": 1.335260654250521e-05, "loss": 0.4384, "step": 6838 }, { "epoch": 1.1673844821988961, "grad_norm": 1.421875, "learning_rate": 1.3350888185286804e-05, "loss": 0.4869, "step": 6839 }, { "epoch": 1.1675576236338059, "grad_norm": 1.5, "learning_rate": 1.3349169716602704e-05, "loss": 0.5102, "step": 6840 }, { "epoch": 1.1677307650687154, "grad_norm": 1.34375, "learning_rate": 1.3347451136510074e-05, "loss": 0.443, "step": 6841 }, { "epoch": 1.1679039065036252, "grad_norm": 1.359375, "learning_rate": 1.3345732445066084e-05, "loss": 0.4216, "step": 6842 }, { "epoch": 1.1680770479385347, "grad_norm": 1.5078125, "learning_rate": 1.3344013642327907e-05, "loss": 0.4808, "step": 6843 }, { "epoch": 1.1682501893734445, "grad_norm": 1.3671875, "learning_rate": 1.334229472835271e-05, "loss": 0.4952, "step": 6844 }, { "epoch": 1.168423330808354, "grad_norm": 1.3515625, "learning_rate": 1.3340575703197682e-05, "loss": 0.4636, "step": 6845 }, { "epoch": 1.1685964722432638, "grad_norm": 1.390625, "learning_rate": 1.333885656692e-05, "loss": 0.4398, "step": 6846 }, { "epoch": 1.1687696136781733, "grad_norm": 1.484375, "learning_rate": 1.333713731957685e-05, "loss": 0.479, "step": 6847 }, { "epoch": 1.168942755113083, "grad_norm": 1.5078125, "learning_rate": 1.3335417961225426e-05, "loss": 0.5064, "step": 6848 }, { "epoch": 1.1691158965479926, "grad_norm": 1.4296875, "learning_rate": 1.3333698491922916e-05, "loss": 0.5321, "step": 6849 }, { "epoch": 1.1692890379829022, "grad_norm": 1.328125, "learning_rate": 1.3331978911726522e-05, "loss": 0.464, "step": 6850 }, { "epoch": 1.169462179417812, "grad_norm": 1.5546875, "learning_rate": 1.3330259220693443e-05, "loss": 0.4424, "step": 6851 }, { "epoch": 1.1696353208527215, "grad_norm": 1.3984375, "learning_rate": 1.3328539418880882e-05, "loss": 0.4229, "step": 6852 }, { "epoch": 1.1698084622876312, "grad_norm": 1.3203125, "learning_rate": 1.3326819506346053e-05, "loss": 0.4633, "step": 6853 }, { "epoch": 1.1699816037225408, "grad_norm": 1.3203125, "learning_rate": 1.3325099483146163e-05, "loss": 0.4348, "step": 6854 }, { "epoch": 1.1701547451574505, "grad_norm": 1.390625, "learning_rate": 1.3323379349338428e-05, "loss": 0.4412, "step": 6855 }, { "epoch": 1.17032788659236, "grad_norm": 1.40625, "learning_rate": 1.3321659104980067e-05, "loss": 0.4207, "step": 6856 }, { "epoch": 1.1705010280272699, "grad_norm": 1.484375, "learning_rate": 1.3319938750128309e-05, "loss": 0.4378, "step": 6857 }, { "epoch": 1.1706741694621794, "grad_norm": 1.421875, "learning_rate": 1.3318218284840373e-05, "loss": 0.5313, "step": 6858 }, { "epoch": 1.1708473108970892, "grad_norm": 1.484375, "learning_rate": 1.3316497709173496e-05, "loss": 0.4709, "step": 6859 }, { "epoch": 1.1710204523319987, "grad_norm": 1.46875, "learning_rate": 1.3314777023184907e-05, "loss": 0.4897, "step": 6860 }, { "epoch": 1.1711935937669082, "grad_norm": 1.296875, "learning_rate": 1.3313056226931844e-05, "loss": 0.4283, "step": 6861 }, { "epoch": 1.171366735201818, "grad_norm": 1.5234375, "learning_rate": 1.3311335320471554e-05, "loss": 0.4916, "step": 6862 }, { "epoch": 1.1715398766367275, "grad_norm": 1.421875, "learning_rate": 1.3309614303861276e-05, "loss": 0.4485, "step": 6863 }, { "epoch": 1.1717130180716373, "grad_norm": 1.2421875, "learning_rate": 1.3307893177158262e-05, "loss": 0.4218, "step": 6864 }, { "epoch": 1.1718861595065468, "grad_norm": 1.375, "learning_rate": 1.3306171940419765e-05, "loss": 0.4829, "step": 6865 }, { "epoch": 1.1720593009414566, "grad_norm": 1.4453125, "learning_rate": 1.3304450593703036e-05, "loss": 0.4933, "step": 6866 }, { "epoch": 1.1722324423763661, "grad_norm": 1.4140625, "learning_rate": 1.3302729137065342e-05, "loss": 0.4838, "step": 6867 }, { "epoch": 1.172405583811276, "grad_norm": 1.421875, "learning_rate": 1.3301007570563942e-05, "loss": 0.4549, "step": 6868 }, { "epoch": 1.1725787252461854, "grad_norm": 1.515625, "learning_rate": 1.3299285894256107e-05, "loss": 0.4827, "step": 6869 }, { "epoch": 1.1727518666810952, "grad_norm": 1.375, "learning_rate": 1.3297564108199102e-05, "loss": 0.4601, "step": 6870 }, { "epoch": 1.1729250081160048, "grad_norm": 1.3671875, "learning_rate": 1.3295842212450202e-05, "loss": 0.4651, "step": 6871 }, { "epoch": 1.1730981495509143, "grad_norm": 1.484375, "learning_rate": 1.3294120207066689e-05, "loss": 0.492, "step": 6872 }, { "epoch": 1.173271290985824, "grad_norm": 1.3359375, "learning_rate": 1.3292398092105842e-05, "loss": 0.4935, "step": 6873 }, { "epoch": 1.1734444324207336, "grad_norm": 1.4453125, "learning_rate": 1.3290675867624951e-05, "loss": 0.4944, "step": 6874 }, { "epoch": 1.1736175738556434, "grad_norm": 1.4375, "learning_rate": 1.3288953533681298e-05, "loss": 0.4608, "step": 6875 }, { "epoch": 1.173790715290553, "grad_norm": 1.640625, "learning_rate": 1.3287231090332183e-05, "loss": 0.5139, "step": 6876 }, { "epoch": 1.1739638567254627, "grad_norm": 1.4609375, "learning_rate": 1.3285508537634892e-05, "loss": 0.4682, "step": 6877 }, { "epoch": 1.1741369981603722, "grad_norm": 1.53125, "learning_rate": 1.3283785875646734e-05, "loss": 0.4927, "step": 6878 }, { "epoch": 1.174310139595282, "grad_norm": 1.375, "learning_rate": 1.3282063104425012e-05, "loss": 0.4801, "step": 6879 }, { "epoch": 1.1744832810301915, "grad_norm": 1.3359375, "learning_rate": 1.3280340224027026e-05, "loss": 0.4512, "step": 6880 }, { "epoch": 1.1746564224651013, "grad_norm": 1.3828125, "learning_rate": 1.3278617234510096e-05, "loss": 0.4387, "step": 6881 }, { "epoch": 1.1748295639000108, "grad_norm": 1.4375, "learning_rate": 1.3276894135931526e-05, "loss": 0.5104, "step": 6882 }, { "epoch": 1.1750027053349203, "grad_norm": 1.4140625, "learning_rate": 1.3275170928348646e-05, "loss": 0.514, "step": 6883 }, { "epoch": 1.1751758467698301, "grad_norm": 1.5078125, "learning_rate": 1.3273447611818768e-05, "loss": 0.4978, "step": 6884 }, { "epoch": 1.1753489882047397, "grad_norm": 1.5078125, "learning_rate": 1.3271724186399222e-05, "loss": 0.4954, "step": 6885 }, { "epoch": 1.1755221296396494, "grad_norm": 1.515625, "learning_rate": 1.3270000652147339e-05, "loss": 0.4899, "step": 6886 }, { "epoch": 1.175695271074559, "grad_norm": 1.3125, "learning_rate": 1.3268277009120443e-05, "loss": 0.4337, "step": 6887 }, { "epoch": 1.1758684125094687, "grad_norm": 1.421875, "learning_rate": 1.3266553257375878e-05, "loss": 0.4412, "step": 6888 }, { "epoch": 1.1760415539443783, "grad_norm": 1.375, "learning_rate": 1.3264829396970983e-05, "loss": 0.542, "step": 6889 }, { "epoch": 1.176214695379288, "grad_norm": 1.328125, "learning_rate": 1.3263105427963097e-05, "loss": 0.482, "step": 6890 }, { "epoch": 1.1763878368141976, "grad_norm": 1.3203125, "learning_rate": 1.326138135040957e-05, "loss": 0.4281, "step": 6891 }, { "epoch": 1.1765609782491073, "grad_norm": 1.4375, "learning_rate": 1.3259657164367753e-05, "loss": 0.5299, "step": 6892 }, { "epoch": 1.1767341196840169, "grad_norm": 1.3984375, "learning_rate": 1.3257932869895e-05, "loss": 0.4571, "step": 6893 }, { "epoch": 1.1769072611189264, "grad_norm": 1.390625, "learning_rate": 1.3256208467048669e-05, "loss": 0.4683, "step": 6894 }, { "epoch": 1.1770804025538362, "grad_norm": 1.328125, "learning_rate": 1.3254483955886119e-05, "loss": 0.4856, "step": 6895 }, { "epoch": 1.1772535439887457, "grad_norm": 1.359375, "learning_rate": 1.3252759336464718e-05, "loss": 0.5382, "step": 6896 }, { "epoch": 1.1774266854236555, "grad_norm": 1.3515625, "learning_rate": 1.3251034608841833e-05, "loss": 0.4735, "step": 6897 }, { "epoch": 1.177599826858565, "grad_norm": 1.3125, "learning_rate": 1.3249309773074836e-05, "loss": 0.4882, "step": 6898 }, { "epoch": 1.1777729682934748, "grad_norm": 1.3515625, "learning_rate": 1.3247584829221104e-05, "loss": 0.4743, "step": 6899 }, { "epoch": 1.1779461097283843, "grad_norm": 1.40625, "learning_rate": 1.324585977733802e-05, "loss": 0.5184, "step": 6900 }, { "epoch": 1.178119251163294, "grad_norm": 1.40625, "learning_rate": 1.324413461748296e-05, "loss": 0.3995, "step": 6901 }, { "epoch": 1.1782923925982036, "grad_norm": 1.390625, "learning_rate": 1.3242409349713312e-05, "loss": 0.4522, "step": 6902 }, { "epoch": 1.1784655340331134, "grad_norm": 1.3671875, "learning_rate": 1.324068397408647e-05, "loss": 0.5062, "step": 6903 }, { "epoch": 1.178638675468023, "grad_norm": 1.4921875, "learning_rate": 1.3238958490659823e-05, "loss": 0.4936, "step": 6904 }, { "epoch": 1.1788118169029325, "grad_norm": 1.3984375, "learning_rate": 1.3237232899490775e-05, "loss": 0.4402, "step": 6905 }, { "epoch": 1.1789849583378422, "grad_norm": 1.3984375, "learning_rate": 1.3235507200636721e-05, "loss": 0.4888, "step": 6906 }, { "epoch": 1.179158099772752, "grad_norm": 1.3046875, "learning_rate": 1.323378139415507e-05, "loss": 0.5732, "step": 6907 }, { "epoch": 1.1793312412076615, "grad_norm": 1.4140625, "learning_rate": 1.3232055480103223e-05, "loss": 0.4997, "step": 6908 }, { "epoch": 1.179504382642571, "grad_norm": 1.3359375, "learning_rate": 1.32303294585386e-05, "loss": 0.4499, "step": 6909 }, { "epoch": 1.1796775240774808, "grad_norm": 1.3828125, "learning_rate": 1.322860332951861e-05, "loss": 0.4552, "step": 6910 }, { "epoch": 1.1798506655123904, "grad_norm": 1.5, "learning_rate": 1.3226877093100677e-05, "loss": 0.4944, "step": 6911 }, { "epoch": 1.1800238069473001, "grad_norm": 1.4453125, "learning_rate": 1.3225150749342222e-05, "loss": 0.4925, "step": 6912 }, { "epoch": 1.1801969483822097, "grad_norm": 1.4765625, "learning_rate": 1.3223424298300667e-05, "loss": 0.4469, "step": 6913 }, { "epoch": 1.1803700898171194, "grad_norm": 1.4921875, "learning_rate": 1.3221697740033444e-05, "loss": 0.4664, "step": 6914 }, { "epoch": 1.180543231252029, "grad_norm": 1.4609375, "learning_rate": 1.3219971074597988e-05, "loss": 0.5474, "step": 6915 }, { "epoch": 1.1807163726869385, "grad_norm": 1.5390625, "learning_rate": 1.3218244302051732e-05, "loss": 0.4357, "step": 6916 }, { "epoch": 1.1808895141218483, "grad_norm": 1.5, "learning_rate": 1.3216517422452124e-05, "loss": 0.4588, "step": 6917 }, { "epoch": 1.181062655556758, "grad_norm": 1.4375, "learning_rate": 1.3214790435856599e-05, "loss": 0.5169, "step": 6918 }, { "epoch": 1.1812357969916676, "grad_norm": 1.375, "learning_rate": 1.321306334232261e-05, "loss": 0.4974, "step": 6919 }, { "epoch": 1.1814089384265771, "grad_norm": 1.453125, "learning_rate": 1.3211336141907602e-05, "loss": 0.4318, "step": 6920 }, { "epoch": 1.181582079861487, "grad_norm": 1.4609375, "learning_rate": 1.3209608834669036e-05, "loss": 0.4895, "step": 6921 }, { "epoch": 1.1817552212963964, "grad_norm": 1.421875, "learning_rate": 1.3207881420664369e-05, "loss": 0.4696, "step": 6922 }, { "epoch": 1.1819283627313062, "grad_norm": 1.3203125, "learning_rate": 1.3206153899951057e-05, "loss": 0.5201, "step": 6923 }, { "epoch": 1.1821015041662157, "grad_norm": 1.4296875, "learning_rate": 1.3204426272586573e-05, "loss": 0.5057, "step": 6924 }, { "epoch": 1.1822746456011255, "grad_norm": 1.4609375, "learning_rate": 1.3202698538628376e-05, "loss": 0.4998, "step": 6925 }, { "epoch": 1.182447787036035, "grad_norm": 1.296875, "learning_rate": 1.320097069813395e-05, "loss": 0.4327, "step": 6926 }, { "epoch": 1.1826209284709448, "grad_norm": 1.4375, "learning_rate": 1.3199242751160761e-05, "loss": 0.4791, "step": 6927 }, { "epoch": 1.1827940699058543, "grad_norm": 1.421875, "learning_rate": 1.3197514697766294e-05, "loss": 0.448, "step": 6928 }, { "epoch": 1.182967211340764, "grad_norm": 1.4765625, "learning_rate": 1.3195786538008032e-05, "loss": 0.4983, "step": 6929 }, { "epoch": 1.1831403527756736, "grad_norm": 1.3359375, "learning_rate": 1.3194058271943453e-05, "loss": 0.48, "step": 6930 }, { "epoch": 1.1833134942105832, "grad_norm": 1.390625, "learning_rate": 1.3192329899630058e-05, "loss": 0.4632, "step": 6931 }, { "epoch": 1.183486635645493, "grad_norm": 1.421875, "learning_rate": 1.3190601421125335e-05, "loss": 0.5168, "step": 6932 }, { "epoch": 1.1836597770804025, "grad_norm": 1.359375, "learning_rate": 1.3188872836486786e-05, "loss": 0.4563, "step": 6933 }, { "epoch": 1.1838329185153122, "grad_norm": 1.5390625, "learning_rate": 1.3187144145771902e-05, "loss": 0.4682, "step": 6934 }, { "epoch": 1.1840060599502218, "grad_norm": 1.2734375, "learning_rate": 1.3185415349038193e-05, "loss": 0.3972, "step": 6935 }, { "epoch": 1.1841792013851316, "grad_norm": 1.34375, "learning_rate": 1.3183686446343167e-05, "loss": 0.4454, "step": 6936 }, { "epoch": 1.184352342820041, "grad_norm": 1.3984375, "learning_rate": 1.3181957437744333e-05, "loss": 0.5119, "step": 6937 }, { "epoch": 1.1845254842549509, "grad_norm": 1.484375, "learning_rate": 1.318022832329921e-05, "loss": 0.4849, "step": 6938 }, { "epoch": 1.1846986256898604, "grad_norm": 1.4140625, "learning_rate": 1.317849910306531e-05, "loss": 0.5089, "step": 6939 }, { "epoch": 1.1848717671247702, "grad_norm": 1.390625, "learning_rate": 1.3176769777100158e-05, "loss": 0.4992, "step": 6940 }, { "epoch": 1.1850449085596797, "grad_norm": 1.3203125, "learning_rate": 1.3175040345461279e-05, "loss": 0.4285, "step": 6941 }, { "epoch": 1.1852180499945892, "grad_norm": 1.3984375, "learning_rate": 1.31733108082062e-05, "loss": 0.4575, "step": 6942 }, { "epoch": 1.185391191429499, "grad_norm": 1.4921875, "learning_rate": 1.317158116539246e-05, "loss": 0.4793, "step": 6943 }, { "epoch": 1.1855643328644085, "grad_norm": 1.390625, "learning_rate": 1.3169851417077587e-05, "loss": 0.4531, "step": 6944 }, { "epoch": 1.1857374742993183, "grad_norm": 1.4765625, "learning_rate": 1.3168121563319124e-05, "loss": 0.4867, "step": 6945 }, { "epoch": 1.1859106157342278, "grad_norm": 1.453125, "learning_rate": 1.3166391604174609e-05, "loss": 0.5558, "step": 6946 }, { "epoch": 1.1860837571691376, "grad_norm": 1.390625, "learning_rate": 1.3164661539701594e-05, "loss": 0.4187, "step": 6947 }, { "epoch": 1.1862568986040471, "grad_norm": 1.421875, "learning_rate": 1.3162931369957624e-05, "loss": 0.4955, "step": 6948 }, { "epoch": 1.186430040038957, "grad_norm": 1.359375, "learning_rate": 1.3161201095000257e-05, "loss": 0.4314, "step": 6949 }, { "epoch": 1.1866031814738665, "grad_norm": 1.40625, "learning_rate": 1.3159470714887049e-05, "loss": 0.5021, "step": 6950 }, { "epoch": 1.1867763229087762, "grad_norm": 1.3671875, "learning_rate": 1.3157740229675557e-05, "loss": 0.3987, "step": 6951 }, { "epoch": 1.1869494643436858, "grad_norm": 1.296875, "learning_rate": 1.3156009639423346e-05, "loss": 0.4505, "step": 6952 }, { "epoch": 1.1871226057785953, "grad_norm": 1.4140625, "learning_rate": 1.3154278944187986e-05, "loss": 0.4326, "step": 6953 }, { "epoch": 1.187295747213505, "grad_norm": 1.34375, "learning_rate": 1.3152548144027044e-05, "loss": 0.4248, "step": 6954 }, { "epoch": 1.1874688886484146, "grad_norm": 1.328125, "learning_rate": 1.3150817238998097e-05, "loss": 0.4677, "step": 6955 }, { "epoch": 1.1876420300833244, "grad_norm": 1.296875, "learning_rate": 1.3149086229158724e-05, "loss": 0.3775, "step": 6956 }, { "epoch": 1.187815171518234, "grad_norm": 1.453125, "learning_rate": 1.3147355114566497e-05, "loss": 0.489, "step": 6957 }, { "epoch": 1.1879883129531437, "grad_norm": 1.4921875, "learning_rate": 1.3145623895279012e-05, "loss": 0.5064, "step": 6958 }, { "epoch": 1.1881614543880532, "grad_norm": 1.3359375, "learning_rate": 1.3143892571353853e-05, "loss": 0.4757, "step": 6959 }, { "epoch": 1.188334595822963, "grad_norm": 1.3515625, "learning_rate": 1.3142161142848613e-05, "loss": 0.4342, "step": 6960 }, { "epoch": 1.1885077372578725, "grad_norm": 1.421875, "learning_rate": 1.3140429609820882e-05, "loss": 0.4838, "step": 6961 }, { "epoch": 1.1886808786927823, "grad_norm": 1.453125, "learning_rate": 1.3138697972328265e-05, "loss": 0.4527, "step": 6962 }, { "epoch": 1.1888540201276918, "grad_norm": 1.359375, "learning_rate": 1.3136966230428356e-05, "loss": 0.4409, "step": 6963 }, { "epoch": 1.1890271615626014, "grad_norm": 1.3828125, "learning_rate": 1.3135234384178772e-05, "loss": 0.4504, "step": 6964 }, { "epoch": 1.1892003029975111, "grad_norm": 1.4140625, "learning_rate": 1.3133502433637112e-05, "loss": 0.4956, "step": 6965 }, { "epoch": 1.1893734444324207, "grad_norm": 1.4375, "learning_rate": 1.3131770378860993e-05, "loss": 0.4684, "step": 6966 }, { "epoch": 1.1895465858673304, "grad_norm": 1.3359375, "learning_rate": 1.3130038219908034e-05, "loss": 0.4792, "step": 6967 }, { "epoch": 1.18971972730224, "grad_norm": 1.2578125, "learning_rate": 1.3128305956835845e-05, "loss": 0.4375, "step": 6968 }, { "epoch": 1.1898928687371497, "grad_norm": 1.3359375, "learning_rate": 1.3126573589702058e-05, "loss": 0.4939, "step": 6969 }, { "epoch": 1.1900660101720593, "grad_norm": 1.4609375, "learning_rate": 1.3124841118564295e-05, "loss": 0.5124, "step": 6970 }, { "epoch": 1.190239151606969, "grad_norm": 1.5078125, "learning_rate": 1.312310854348019e-05, "loss": 0.492, "step": 6971 }, { "epoch": 1.1904122930418786, "grad_norm": 1.4296875, "learning_rate": 1.312137586450737e-05, "loss": 0.4665, "step": 6972 }, { "epoch": 1.1905854344767883, "grad_norm": 1.421875, "learning_rate": 1.3119643081703475e-05, "loss": 0.4919, "step": 6973 }, { "epoch": 1.1907585759116979, "grad_norm": 1.4375, "learning_rate": 1.3117910195126144e-05, "loss": 0.4535, "step": 6974 }, { "epoch": 1.1909317173466074, "grad_norm": 1.453125, "learning_rate": 1.3116177204833024e-05, "loss": 0.4494, "step": 6975 }, { "epoch": 1.1911048587815172, "grad_norm": 1.40625, "learning_rate": 1.311444411088176e-05, "loss": 0.5438, "step": 6976 }, { "epoch": 1.1912780002164267, "grad_norm": 1.3828125, "learning_rate": 1.311271091333e-05, "loss": 0.4883, "step": 6977 }, { "epoch": 1.1914511416513365, "grad_norm": 1.53125, "learning_rate": 1.3110977612235403e-05, "loss": 0.548, "step": 6978 }, { "epoch": 1.191624283086246, "grad_norm": 1.3359375, "learning_rate": 1.3109244207655621e-05, "loss": 0.4684, "step": 6979 }, { "epoch": 1.1917974245211558, "grad_norm": 1.3125, "learning_rate": 1.310751069964832e-05, "loss": 0.4697, "step": 6980 }, { "epoch": 1.1919705659560653, "grad_norm": 1.375, "learning_rate": 1.3105777088271161e-05, "loss": 0.457, "step": 6981 }, { "epoch": 1.192143707390975, "grad_norm": 1.3984375, "learning_rate": 1.3104043373581812e-05, "loss": 0.4607, "step": 6982 }, { "epoch": 1.1923168488258846, "grad_norm": 1.3671875, "learning_rate": 1.3102309555637948e-05, "loss": 0.5106, "step": 6983 }, { "epoch": 1.1924899902607944, "grad_norm": 1.453125, "learning_rate": 1.3100575634497239e-05, "loss": 0.4786, "step": 6984 }, { "epoch": 1.192663131695704, "grad_norm": 1.421875, "learning_rate": 1.3098841610217363e-05, "loss": 0.475, "step": 6985 }, { "epoch": 1.1928362731306135, "grad_norm": 1.3046875, "learning_rate": 1.3097107482856002e-05, "loss": 0.4385, "step": 6986 }, { "epoch": 1.1930094145655232, "grad_norm": 1.4296875, "learning_rate": 1.3095373252470843e-05, "loss": 0.4908, "step": 6987 }, { "epoch": 1.1931825560004328, "grad_norm": 1.4609375, "learning_rate": 1.3093638919119576e-05, "loss": 0.5105, "step": 6988 }, { "epoch": 1.1933556974353425, "grad_norm": 1.390625, "learning_rate": 1.3091904482859887e-05, "loss": 0.4187, "step": 6989 }, { "epoch": 1.193528838870252, "grad_norm": 1.4296875, "learning_rate": 1.3090169943749475e-05, "loss": 0.4843, "step": 6990 }, { "epoch": 1.1937019803051618, "grad_norm": 1.4296875, "learning_rate": 1.308843530184604e-05, "loss": 0.508, "step": 6991 }, { "epoch": 1.1938751217400714, "grad_norm": 1.4140625, "learning_rate": 1.308670055720728e-05, "loss": 0.4578, "step": 6992 }, { "epoch": 1.1940482631749811, "grad_norm": 1.4921875, "learning_rate": 1.3084965709890902e-05, "loss": 0.4607, "step": 6993 }, { "epoch": 1.1942214046098907, "grad_norm": 1.4921875, "learning_rate": 1.3083230759954617e-05, "loss": 0.5247, "step": 6994 }, { "epoch": 1.1943945460448004, "grad_norm": 1.375, "learning_rate": 1.3081495707456134e-05, "loss": 0.4531, "step": 6995 }, { "epoch": 1.19456768747971, "grad_norm": 1.3671875, "learning_rate": 1.3079760552453169e-05, "loss": 0.4256, "step": 6996 }, { "epoch": 1.1947408289146195, "grad_norm": 1.53125, "learning_rate": 1.3078025295003447e-05, "loss": 0.4135, "step": 6997 }, { "epoch": 1.1949139703495293, "grad_norm": 1.421875, "learning_rate": 1.3076289935164681e-05, "loss": 0.4654, "step": 6998 }, { "epoch": 1.1950871117844388, "grad_norm": 1.3203125, "learning_rate": 1.3074554472994603e-05, "loss": 0.4875, "step": 6999 }, { "epoch": 1.1952602532193486, "grad_norm": 1.6015625, "learning_rate": 1.3072818908550943e-05, "loss": 0.5152, "step": 7000 }, { "epoch": 1.1954333946542581, "grad_norm": 1.4140625, "learning_rate": 1.3071083241891428e-05, "loss": 0.4546, "step": 7001 }, { "epoch": 1.195606536089168, "grad_norm": 1.3125, "learning_rate": 1.3069347473073802e-05, "loss": 0.4578, "step": 7002 }, { "epoch": 1.1957796775240774, "grad_norm": 1.453125, "learning_rate": 1.3067611602155799e-05, "loss": 0.5058, "step": 7003 }, { "epoch": 1.1959528189589872, "grad_norm": 1.25, "learning_rate": 1.3065875629195162e-05, "loss": 0.462, "step": 7004 }, { "epoch": 1.1961259603938967, "grad_norm": 1.3984375, "learning_rate": 1.3064139554249642e-05, "loss": 0.4551, "step": 7005 }, { "epoch": 1.1962991018288065, "grad_norm": 1.390625, "learning_rate": 1.306240337737698e-05, "loss": 0.4757, "step": 7006 }, { "epoch": 1.196472243263716, "grad_norm": 1.328125, "learning_rate": 1.3060667098634938e-05, "loss": 0.4839, "step": 7007 }, { "epoch": 1.1966453846986256, "grad_norm": 1.3828125, "learning_rate": 1.3058930718081268e-05, "loss": 0.4661, "step": 7008 }, { "epoch": 1.1968185261335353, "grad_norm": 1.3671875, "learning_rate": 1.3057194235773733e-05, "loss": 0.4608, "step": 7009 }, { "epoch": 1.1969916675684449, "grad_norm": 1.375, "learning_rate": 1.3055457651770094e-05, "loss": 0.4469, "step": 7010 }, { "epoch": 1.1971648090033546, "grad_norm": 1.34375, "learning_rate": 1.3053720966128114e-05, "loss": 0.4373, "step": 7011 }, { "epoch": 1.1973379504382642, "grad_norm": 1.6171875, "learning_rate": 1.3051984178905569e-05, "loss": 0.5896, "step": 7012 }, { "epoch": 1.197511091873174, "grad_norm": 1.5234375, "learning_rate": 1.305024729016023e-05, "loss": 0.463, "step": 7013 }, { "epoch": 1.1976842333080835, "grad_norm": 1.4765625, "learning_rate": 1.3048510299949875e-05, "loss": 0.5572, "step": 7014 }, { "epoch": 1.1978573747429933, "grad_norm": 1.484375, "learning_rate": 1.3046773208332281e-05, "loss": 0.528, "step": 7015 }, { "epoch": 1.1980305161779028, "grad_norm": 1.2265625, "learning_rate": 1.3045036015365233e-05, "loss": 0.4739, "step": 7016 }, { "epoch": 1.1982036576128126, "grad_norm": 1.3125, "learning_rate": 1.3043298721106521e-05, "loss": 0.4466, "step": 7017 }, { "epoch": 1.198376799047722, "grad_norm": 1.46875, "learning_rate": 1.304156132561393e-05, "loss": 0.5344, "step": 7018 }, { "epoch": 1.1985499404826316, "grad_norm": 1.28125, "learning_rate": 1.303982382894526e-05, "loss": 0.4926, "step": 7019 }, { "epoch": 1.1987230819175414, "grad_norm": 1.328125, "learning_rate": 1.3038086231158303e-05, "loss": 0.4474, "step": 7020 }, { "epoch": 1.198896223352451, "grad_norm": 1.4296875, "learning_rate": 1.3036348532310861e-05, "loss": 0.5042, "step": 7021 }, { "epoch": 1.1990693647873607, "grad_norm": 1.34375, "learning_rate": 1.3034610732460733e-05, "loss": 0.4205, "step": 7022 }, { "epoch": 1.1992425062222702, "grad_norm": 1.328125, "learning_rate": 1.3032872831665735e-05, "loss": 0.4639, "step": 7023 }, { "epoch": 1.19941564765718, "grad_norm": 1.390625, "learning_rate": 1.3031134829983671e-05, "loss": 0.4602, "step": 7024 }, { "epoch": 1.1995887890920895, "grad_norm": 1.3125, "learning_rate": 1.3029396727472356e-05, "loss": 0.4305, "step": 7025 }, { "epoch": 1.1997619305269993, "grad_norm": 1.4296875, "learning_rate": 1.3027658524189613e-05, "loss": 0.5205, "step": 7026 }, { "epoch": 1.1999350719619089, "grad_norm": 1.4453125, "learning_rate": 1.3025920220193252e-05, "loss": 0.4477, "step": 7027 }, { "epoch": 1.2001082133968186, "grad_norm": 1.3671875, "learning_rate": 1.3024181815541104e-05, "loss": 0.5133, "step": 7028 }, { "epoch": 1.2002813548317282, "grad_norm": 1.3125, "learning_rate": 1.3022443310290993e-05, "loss": 0.4461, "step": 7029 }, { "epoch": 1.2004544962666377, "grad_norm": 1.4140625, "learning_rate": 1.3020704704500751e-05, "loss": 0.575, "step": 7030 }, { "epoch": 1.2006276377015475, "grad_norm": 1.5234375, "learning_rate": 1.3018965998228214e-05, "loss": 0.5458, "step": 7031 }, { "epoch": 1.200800779136457, "grad_norm": 1.3828125, "learning_rate": 1.3017227191531217e-05, "loss": 0.474, "step": 7032 }, { "epoch": 1.2009739205713668, "grad_norm": 1.3203125, "learning_rate": 1.3015488284467599e-05, "loss": 0.45, "step": 7033 }, { "epoch": 1.2011470620062763, "grad_norm": 1.53125, "learning_rate": 1.3013749277095207e-05, "loss": 0.4781, "step": 7034 }, { "epoch": 1.201320203441186, "grad_norm": 1.390625, "learning_rate": 1.3012010169471886e-05, "loss": 0.4724, "step": 7035 }, { "epoch": 1.2014933448760956, "grad_norm": 1.5234375, "learning_rate": 1.301027096165549e-05, "loss": 0.4998, "step": 7036 }, { "epoch": 1.2016664863110054, "grad_norm": 1.390625, "learning_rate": 1.3008531653703864e-05, "loss": 0.4854, "step": 7037 }, { "epoch": 1.201839627745915, "grad_norm": 1.375, "learning_rate": 1.300679224567488e-05, "loss": 0.471, "step": 7038 }, { "epoch": 1.2020127691808247, "grad_norm": 1.2109375, "learning_rate": 1.3005052737626383e-05, "loss": 0.4239, "step": 7039 }, { "epoch": 1.2021859106157342, "grad_norm": 1.40625, "learning_rate": 1.3003313129616246e-05, "loss": 0.4747, "step": 7040 }, { "epoch": 1.2023590520506438, "grad_norm": 1.3203125, "learning_rate": 1.3001573421702336e-05, "loss": 0.4173, "step": 7041 }, { "epoch": 1.2025321934855535, "grad_norm": 1.390625, "learning_rate": 1.299983361394252e-05, "loss": 0.5015, "step": 7042 }, { "epoch": 1.2027053349204633, "grad_norm": 1.4375, "learning_rate": 1.2998093706394676e-05, "loss": 0.4666, "step": 7043 }, { "epoch": 1.2028784763553728, "grad_norm": 1.3671875, "learning_rate": 1.2996353699116674e-05, "loss": 0.4455, "step": 7044 }, { "epoch": 1.2030516177902824, "grad_norm": 1.3359375, "learning_rate": 1.2994613592166405e-05, "loss": 0.4875, "step": 7045 }, { "epoch": 1.2032247592251921, "grad_norm": 1.390625, "learning_rate": 1.2992873385601746e-05, "loss": 0.4606, "step": 7046 }, { "epoch": 1.2033979006601017, "grad_norm": 1.578125, "learning_rate": 1.2991133079480585e-05, "loss": 0.4625, "step": 7047 }, { "epoch": 1.2035710420950114, "grad_norm": 1.4140625, "learning_rate": 1.2989392673860813e-05, "loss": 0.4282, "step": 7048 }, { "epoch": 1.203744183529921, "grad_norm": 1.453125, "learning_rate": 1.2987652168800322e-05, "loss": 0.4816, "step": 7049 }, { "epoch": 1.2039173249648307, "grad_norm": 1.4140625, "learning_rate": 1.298591156435701e-05, "loss": 0.4831, "step": 7050 }, { "epoch": 1.2040904663997403, "grad_norm": 1.453125, "learning_rate": 1.2984170860588781e-05, "loss": 0.4474, "step": 7051 }, { "epoch": 1.2042636078346498, "grad_norm": 1.3671875, "learning_rate": 1.2982430057553534e-05, "loss": 0.5401, "step": 7052 }, { "epoch": 1.2044367492695596, "grad_norm": 1.359375, "learning_rate": 1.2980689155309177e-05, "loss": 0.4783, "step": 7053 }, { "epoch": 1.2046098907044693, "grad_norm": 1.3203125, "learning_rate": 1.2978948153913622e-05, "loss": 0.4753, "step": 7054 }, { "epoch": 1.2047830321393789, "grad_norm": 1.5, "learning_rate": 1.2977207053424781e-05, "loss": 0.4411, "step": 7055 }, { "epoch": 1.2049561735742884, "grad_norm": 1.421875, "learning_rate": 1.297546585390057e-05, "loss": 0.5456, "step": 7056 }, { "epoch": 1.2051293150091982, "grad_norm": 1.3515625, "learning_rate": 1.2973724555398914e-05, "loss": 0.4711, "step": 7057 }, { "epoch": 1.2053024564441077, "grad_norm": 1.4765625, "learning_rate": 1.2971983157977732e-05, "loss": 0.4964, "step": 7058 }, { "epoch": 1.2054755978790175, "grad_norm": 1.2578125, "learning_rate": 1.2970241661694953e-05, "loss": 0.5093, "step": 7059 }, { "epoch": 1.205648739313927, "grad_norm": 1.4296875, "learning_rate": 1.2968500066608502e-05, "loss": 0.428, "step": 7060 }, { "epoch": 1.2058218807488368, "grad_norm": 1.4921875, "learning_rate": 1.296675837277632e-05, "loss": 0.4616, "step": 7061 }, { "epoch": 1.2059950221837463, "grad_norm": 1.3984375, "learning_rate": 1.2965016580256338e-05, "loss": 0.4375, "step": 7062 }, { "epoch": 1.206168163618656, "grad_norm": 1.390625, "learning_rate": 1.2963274689106497e-05, "loss": 0.5413, "step": 7063 }, { "epoch": 1.2063413050535656, "grad_norm": 1.4609375, "learning_rate": 1.2961532699384742e-05, "loss": 0.5044, "step": 7064 }, { "epoch": 1.2065144464884754, "grad_norm": 1.359375, "learning_rate": 1.2959790611149017e-05, "loss": 0.4271, "step": 7065 }, { "epoch": 1.206687587923385, "grad_norm": 1.3828125, "learning_rate": 1.2958048424457275e-05, "loss": 0.4852, "step": 7066 }, { "epoch": 1.2068607293582945, "grad_norm": 1.3984375, "learning_rate": 1.2956306139367465e-05, "loss": 0.4385, "step": 7067 }, { "epoch": 1.2070338707932042, "grad_norm": 1.328125, "learning_rate": 1.2954563755937546e-05, "loss": 0.4699, "step": 7068 }, { "epoch": 1.2072070122281138, "grad_norm": 1.2890625, "learning_rate": 1.2952821274225478e-05, "loss": 0.4579, "step": 7069 }, { "epoch": 1.2073801536630235, "grad_norm": 1.390625, "learning_rate": 1.295107869428922e-05, "loss": 0.5271, "step": 7070 }, { "epoch": 1.207553295097933, "grad_norm": 1.484375, "learning_rate": 1.2949336016186744e-05, "loss": 0.5197, "step": 7071 }, { "epoch": 1.2077264365328428, "grad_norm": 1.3828125, "learning_rate": 1.2947593239976013e-05, "loss": 0.4623, "step": 7072 }, { "epoch": 1.2078995779677524, "grad_norm": 1.5078125, "learning_rate": 1.2945850365715007e-05, "loss": 0.4673, "step": 7073 }, { "epoch": 1.2080727194026621, "grad_norm": 1.3828125, "learning_rate": 1.2944107393461696e-05, "loss": 0.4628, "step": 7074 }, { "epoch": 1.2082458608375717, "grad_norm": 1.4609375, "learning_rate": 1.294236432327406e-05, "loss": 0.4587, "step": 7075 }, { "epoch": 1.2084190022724814, "grad_norm": 1.34375, "learning_rate": 1.2940621155210082e-05, "loss": 0.4668, "step": 7076 }, { "epoch": 1.208592143707391, "grad_norm": 1.3828125, "learning_rate": 1.2938877889327746e-05, "loss": 0.4224, "step": 7077 }, { "epoch": 1.2087652851423005, "grad_norm": 1.453125, "learning_rate": 1.2937134525685044e-05, "loss": 0.5092, "step": 7078 }, { "epoch": 1.2089384265772103, "grad_norm": 1.453125, "learning_rate": 1.2935391064339969e-05, "loss": 0.4714, "step": 7079 }, { "epoch": 1.2091115680121198, "grad_norm": 1.3515625, "learning_rate": 1.2933647505350513e-05, "loss": 0.4468, "step": 7080 }, { "epoch": 1.2092847094470296, "grad_norm": 1.453125, "learning_rate": 1.2931903848774676e-05, "loss": 0.5206, "step": 7081 }, { "epoch": 1.2094578508819391, "grad_norm": 1.390625, "learning_rate": 1.2930160094670456e-05, "loss": 0.4358, "step": 7082 }, { "epoch": 1.209630992316849, "grad_norm": 1.4609375, "learning_rate": 1.292841624309587e-05, "loss": 0.4759, "step": 7083 }, { "epoch": 1.2098041337517584, "grad_norm": 1.4609375, "learning_rate": 1.2926672294108912e-05, "loss": 0.461, "step": 7084 }, { "epoch": 1.2099772751866682, "grad_norm": 1.2734375, "learning_rate": 1.2924928247767605e-05, "loss": 0.4359, "step": 7085 }, { "epoch": 1.2101504166215777, "grad_norm": 1.328125, "learning_rate": 1.2923184104129956e-05, "loss": 0.4417, "step": 7086 }, { "epoch": 1.2103235580564875, "grad_norm": 1.3671875, "learning_rate": 1.2921439863253984e-05, "loss": 0.4418, "step": 7087 }, { "epoch": 1.210496699491397, "grad_norm": 1.3359375, "learning_rate": 1.2919695525197713e-05, "loss": 0.481, "step": 7088 }, { "epoch": 1.2106698409263066, "grad_norm": 1.4453125, "learning_rate": 1.2917951090019167e-05, "loss": 0.4629, "step": 7089 }, { "epoch": 1.2108429823612163, "grad_norm": 1.4609375, "learning_rate": 1.2916206557776376e-05, "loss": 0.6145, "step": 7090 }, { "epoch": 1.2110161237961259, "grad_norm": 1.4453125, "learning_rate": 1.291446192852737e-05, "loss": 0.4711, "step": 7091 }, { "epoch": 1.2111892652310356, "grad_norm": 1.3828125, "learning_rate": 1.2912717202330178e-05, "loss": 0.4684, "step": 7092 }, { "epoch": 1.2113624066659452, "grad_norm": 1.3828125, "learning_rate": 1.2910972379242841e-05, "loss": 0.5473, "step": 7093 }, { "epoch": 1.211535548100855, "grad_norm": 1.3046875, "learning_rate": 1.2909227459323403e-05, "loss": 0.4827, "step": 7094 }, { "epoch": 1.2117086895357645, "grad_norm": 1.3515625, "learning_rate": 1.2907482442629906e-05, "loss": 0.4848, "step": 7095 }, { "epoch": 1.2118818309706743, "grad_norm": 1.390625, "learning_rate": 1.2905737329220394e-05, "loss": 0.4984, "step": 7096 }, { "epoch": 1.2120549724055838, "grad_norm": 1.3125, "learning_rate": 1.2903992119152921e-05, "loss": 0.4401, "step": 7097 }, { "epoch": 1.2122281138404936, "grad_norm": 1.4375, "learning_rate": 1.2902246812485535e-05, "loss": 0.5076, "step": 7098 }, { "epoch": 1.212401255275403, "grad_norm": 1.4765625, "learning_rate": 1.2900501409276302e-05, "loss": 0.4553, "step": 7099 }, { "epoch": 1.2125743967103126, "grad_norm": 1.359375, "learning_rate": 1.2898755909583275e-05, "loss": 0.4415, "step": 7100 }, { "epoch": 1.2127475381452224, "grad_norm": 1.4140625, "learning_rate": 1.289701031346452e-05, "loss": 0.4612, "step": 7101 }, { "epoch": 1.212920679580132, "grad_norm": 1.484375, "learning_rate": 1.2895264620978104e-05, "loss": 0.4788, "step": 7102 }, { "epoch": 1.2130938210150417, "grad_norm": 1.4453125, "learning_rate": 1.2893518832182092e-05, "loss": 0.5056, "step": 7103 }, { "epoch": 1.2132669624499512, "grad_norm": 1.4296875, "learning_rate": 1.289177294713456e-05, "loss": 0.4472, "step": 7104 }, { "epoch": 1.213440103884861, "grad_norm": 1.28125, "learning_rate": 1.2890026965893586e-05, "loss": 0.3953, "step": 7105 }, { "epoch": 1.2136132453197706, "grad_norm": 1.4453125, "learning_rate": 1.2888280888517247e-05, "loss": 0.4781, "step": 7106 }, { "epoch": 1.2137863867546803, "grad_norm": 1.40625, "learning_rate": 1.2886534715063626e-05, "loss": 0.5081, "step": 7107 }, { "epoch": 1.2139595281895899, "grad_norm": 1.3359375, "learning_rate": 1.2884788445590807e-05, "loss": 0.4758, "step": 7108 }, { "epoch": 1.2141326696244996, "grad_norm": 1.3359375, "learning_rate": 1.288304208015688e-05, "loss": 0.4924, "step": 7109 }, { "epoch": 1.2143058110594092, "grad_norm": 1.3359375, "learning_rate": 1.288129561881994e-05, "loss": 0.4678, "step": 7110 }, { "epoch": 1.2144789524943187, "grad_norm": 1.2890625, "learning_rate": 1.287954906163808e-05, "loss": 0.4503, "step": 7111 }, { "epoch": 1.2146520939292285, "grad_norm": 1.484375, "learning_rate": 1.2877802408669393e-05, "loss": 0.5003, "step": 7112 }, { "epoch": 1.214825235364138, "grad_norm": 1.421875, "learning_rate": 1.2876055659971986e-05, "loss": 0.4993, "step": 7113 }, { "epoch": 1.2149983767990478, "grad_norm": 1.3125, "learning_rate": 1.2874308815603966e-05, "loss": 0.4163, "step": 7114 }, { "epoch": 1.2151715182339573, "grad_norm": 1.5390625, "learning_rate": 1.2872561875623438e-05, "loss": 0.4703, "step": 7115 }, { "epoch": 1.215344659668867, "grad_norm": 1.453125, "learning_rate": 1.2870814840088513e-05, "loss": 0.4604, "step": 7116 }, { "epoch": 1.2155178011037766, "grad_norm": 1.4453125, "learning_rate": 1.2869067709057304e-05, "loss": 0.4663, "step": 7117 }, { "epoch": 1.2156909425386864, "grad_norm": 1.5, "learning_rate": 1.286732048258793e-05, "loss": 0.4469, "step": 7118 }, { "epoch": 1.215864083973596, "grad_norm": 1.5, "learning_rate": 1.2865573160738514e-05, "loss": 0.5685, "step": 7119 }, { "epoch": 1.2160372254085057, "grad_norm": 1.453125, "learning_rate": 1.2863825743567174e-05, "loss": 0.4656, "step": 7120 }, { "epoch": 1.2162103668434152, "grad_norm": 1.4765625, "learning_rate": 1.2862078231132045e-05, "loss": 0.4681, "step": 7121 }, { "epoch": 1.2163835082783248, "grad_norm": 1.46875, "learning_rate": 1.2860330623491249e-05, "loss": 0.493, "step": 7122 }, { "epoch": 1.2165566497132345, "grad_norm": 1.3671875, "learning_rate": 1.2858582920702925e-05, "loss": 0.4858, "step": 7123 }, { "epoch": 1.216729791148144, "grad_norm": 1.4609375, "learning_rate": 1.2856835122825206e-05, "loss": 0.512, "step": 7124 }, { "epoch": 1.2169029325830538, "grad_norm": 1.421875, "learning_rate": 1.2855087229916232e-05, "loss": 0.4993, "step": 7125 }, { "epoch": 1.2170760740179634, "grad_norm": 1.4453125, "learning_rate": 1.2853339242034148e-05, "loss": 0.4191, "step": 7126 }, { "epoch": 1.2172492154528731, "grad_norm": 1.4140625, "learning_rate": 1.28515911592371e-05, "loss": 0.5085, "step": 7127 }, { "epoch": 1.2174223568877827, "grad_norm": 1.421875, "learning_rate": 1.2849842981583237e-05, "loss": 0.4207, "step": 7128 }, { "epoch": 1.2175954983226924, "grad_norm": 1.375, "learning_rate": 1.2848094709130707e-05, "loss": 0.4399, "step": 7129 }, { "epoch": 1.217768639757602, "grad_norm": 1.421875, "learning_rate": 1.284634634193767e-05, "loss": 0.5113, "step": 7130 }, { "epoch": 1.2179417811925117, "grad_norm": 1.4375, "learning_rate": 1.2844597880062285e-05, "loss": 0.4806, "step": 7131 }, { "epoch": 1.2181149226274213, "grad_norm": 1.3203125, "learning_rate": 1.2842849323562709e-05, "loss": 0.5183, "step": 7132 }, { "epoch": 1.2182880640623308, "grad_norm": 1.28125, "learning_rate": 1.2841100672497116e-05, "loss": 0.3877, "step": 7133 }, { "epoch": 1.2184612054972406, "grad_norm": 1.421875, "learning_rate": 1.2839351926923662e-05, "loss": 0.5648, "step": 7134 }, { "epoch": 1.2186343469321501, "grad_norm": 1.3828125, "learning_rate": 1.2837603086900527e-05, "loss": 0.5241, "step": 7135 }, { "epoch": 1.2188074883670599, "grad_norm": 1.46875, "learning_rate": 1.2835854152485881e-05, "loss": 0.4782, "step": 7136 }, { "epoch": 1.2189806298019694, "grad_norm": 1.453125, "learning_rate": 1.2834105123737905e-05, "loss": 0.4656, "step": 7137 }, { "epoch": 1.2191537712368792, "grad_norm": 1.328125, "learning_rate": 1.2832356000714776e-05, "loss": 0.4561, "step": 7138 }, { "epoch": 1.2193269126717887, "grad_norm": 1.4765625, "learning_rate": 1.283060678347468e-05, "loss": 0.4882, "step": 7139 }, { "epoch": 1.2195000541066985, "grad_norm": 1.375, "learning_rate": 1.2828857472075806e-05, "loss": 0.5159, "step": 7140 }, { "epoch": 1.219673195541608, "grad_norm": 1.4140625, "learning_rate": 1.2827108066576335e-05, "loss": 0.4834, "step": 7141 }, { "epoch": 1.2198463369765178, "grad_norm": 1.3359375, "learning_rate": 1.2825358567034471e-05, "loss": 0.4498, "step": 7142 }, { "epoch": 1.2200194784114273, "grad_norm": 1.4765625, "learning_rate": 1.2823608973508406e-05, "loss": 0.5332, "step": 7143 }, { "epoch": 1.2201926198463369, "grad_norm": 1.3515625, "learning_rate": 1.2821859286056338e-05, "loss": 0.4994, "step": 7144 }, { "epoch": 1.2203657612812466, "grad_norm": 1.3515625, "learning_rate": 1.2820109504736471e-05, "loss": 0.4687, "step": 7145 }, { "epoch": 1.2205389027161562, "grad_norm": 1.40625, "learning_rate": 1.2818359629607008e-05, "loss": 0.4721, "step": 7146 }, { "epoch": 1.220712044151066, "grad_norm": 1.3671875, "learning_rate": 1.2816609660726163e-05, "loss": 0.4366, "step": 7147 }, { "epoch": 1.2208851855859755, "grad_norm": 1.3828125, "learning_rate": 1.2814859598152142e-05, "loss": 0.4691, "step": 7148 }, { "epoch": 1.2210583270208852, "grad_norm": 1.5234375, "learning_rate": 1.2813109441943166e-05, "loss": 0.6931, "step": 7149 }, { "epoch": 1.2212314684557948, "grad_norm": 1.328125, "learning_rate": 1.2811359192157449e-05, "loss": 0.4906, "step": 7150 }, { "epoch": 1.2214046098907045, "grad_norm": 1.421875, "learning_rate": 1.2809608848853213e-05, "loss": 0.5175, "step": 7151 }, { "epoch": 1.221577751325614, "grad_norm": 1.34375, "learning_rate": 1.2807858412088681e-05, "loss": 0.4936, "step": 7152 }, { "epoch": 1.2217508927605238, "grad_norm": 1.3125, "learning_rate": 1.2806107881922084e-05, "loss": 0.4259, "step": 7153 }, { "epoch": 1.2219240341954334, "grad_norm": 1.375, "learning_rate": 1.2804357258411649e-05, "loss": 0.4309, "step": 7154 }, { "epoch": 1.222097175630343, "grad_norm": 1.359375, "learning_rate": 1.2802606541615612e-05, "loss": 0.4835, "step": 7155 }, { "epoch": 1.2222703170652527, "grad_norm": 1.421875, "learning_rate": 1.2800855731592207e-05, "loss": 0.4241, "step": 7156 }, { "epoch": 1.2224434585001622, "grad_norm": 1.4140625, "learning_rate": 1.279910482839968e-05, "loss": 0.4647, "step": 7157 }, { "epoch": 1.222616599935072, "grad_norm": 1.515625, "learning_rate": 1.2797353832096263e-05, "loss": 0.5061, "step": 7158 }, { "epoch": 1.2227897413699815, "grad_norm": 1.40625, "learning_rate": 1.2795602742740217e-05, "loss": 0.4789, "step": 7159 }, { "epoch": 1.2229628828048913, "grad_norm": 1.46875, "learning_rate": 1.2793851560389778e-05, "loss": 0.4848, "step": 7160 }, { "epoch": 1.2231360242398008, "grad_norm": 1.5625, "learning_rate": 1.2792100285103203e-05, "loss": 0.5805, "step": 7161 }, { "epoch": 1.2233091656747106, "grad_norm": 1.28125, "learning_rate": 1.279034891693875e-05, "loss": 0.4178, "step": 7162 }, { "epoch": 1.2234823071096201, "grad_norm": 1.40625, "learning_rate": 1.2788597455954674e-05, "loss": 0.4782, "step": 7163 }, { "epoch": 1.22365544854453, "grad_norm": 1.4140625, "learning_rate": 1.2786845902209235e-05, "loss": 0.4987, "step": 7164 }, { "epoch": 1.2238285899794394, "grad_norm": 1.328125, "learning_rate": 1.2785094255760701e-05, "loss": 0.4085, "step": 7165 }, { "epoch": 1.224001731414349, "grad_norm": 1.53125, "learning_rate": 1.2783342516667343e-05, "loss": 0.5022, "step": 7166 }, { "epoch": 1.2241748728492587, "grad_norm": 1.5078125, "learning_rate": 1.2781590684987424e-05, "loss": 0.4664, "step": 7167 }, { "epoch": 1.2243480142841683, "grad_norm": 1.3359375, "learning_rate": 1.2779838760779221e-05, "loss": 0.4293, "step": 7168 }, { "epoch": 1.224521155719078, "grad_norm": 1.359375, "learning_rate": 1.2778086744101011e-05, "loss": 0.4633, "step": 7169 }, { "epoch": 1.2246942971539876, "grad_norm": 1.3515625, "learning_rate": 1.2776334635011076e-05, "loss": 0.3888, "step": 7170 }, { "epoch": 1.2248674385888974, "grad_norm": 1.3984375, "learning_rate": 1.2774582433567697e-05, "loss": 0.4916, "step": 7171 }, { "epoch": 1.225040580023807, "grad_norm": 1.375, "learning_rate": 1.277283013982916e-05, "loss": 0.4661, "step": 7172 }, { "epoch": 1.2252137214587167, "grad_norm": 1.3515625, "learning_rate": 1.2771077753853756e-05, "loss": 0.4829, "step": 7173 }, { "epoch": 1.2253868628936262, "grad_norm": 1.375, "learning_rate": 1.2769325275699773e-05, "loss": 0.4822, "step": 7174 }, { "epoch": 1.225560004328536, "grad_norm": 1.40625, "learning_rate": 1.2767572705425513e-05, "loss": 0.4956, "step": 7175 }, { "epoch": 1.2257331457634455, "grad_norm": 1.4609375, "learning_rate": 1.2765820043089268e-05, "loss": 0.5106, "step": 7176 }, { "epoch": 1.225906287198355, "grad_norm": 1.4453125, "learning_rate": 1.2764067288749342e-05, "loss": 0.4788, "step": 7177 }, { "epoch": 1.2260794286332648, "grad_norm": 1.390625, "learning_rate": 1.2762314442464043e-05, "loss": 0.4318, "step": 7178 }, { "epoch": 1.2262525700681746, "grad_norm": 1.6796875, "learning_rate": 1.2760561504291671e-05, "loss": 0.5756, "step": 7179 }, { "epoch": 1.226425711503084, "grad_norm": 1.40625, "learning_rate": 1.2758808474290543e-05, "loss": 0.4988, "step": 7180 }, { "epoch": 1.2265988529379936, "grad_norm": 1.4453125, "learning_rate": 1.275705535251897e-05, "loss": 0.4772, "step": 7181 }, { "epoch": 1.2267719943729034, "grad_norm": 1.3203125, "learning_rate": 1.2755302139035271e-05, "loss": 0.4343, "step": 7182 }, { "epoch": 1.226945135807813, "grad_norm": 1.3515625, "learning_rate": 1.2753548833897764e-05, "loss": 0.4594, "step": 7183 }, { "epoch": 1.2271182772427227, "grad_norm": 1.2734375, "learning_rate": 1.2751795437164772e-05, "loss": 0.4237, "step": 7184 }, { "epoch": 1.2272914186776323, "grad_norm": 1.359375, "learning_rate": 1.2750041948894621e-05, "loss": 0.4382, "step": 7185 }, { "epoch": 1.227464560112542, "grad_norm": 1.3515625, "learning_rate": 1.2748288369145639e-05, "loss": 0.5084, "step": 7186 }, { "epoch": 1.2276377015474516, "grad_norm": 1.5, "learning_rate": 1.2746534697976159e-05, "loss": 0.4429, "step": 7187 }, { "epoch": 1.227810842982361, "grad_norm": 1.40625, "learning_rate": 1.2744780935444516e-05, "loss": 0.4929, "step": 7188 }, { "epoch": 1.2279839844172709, "grad_norm": 1.5546875, "learning_rate": 1.2743027081609048e-05, "loss": 0.5133, "step": 7189 }, { "epoch": 1.2281571258521806, "grad_norm": 1.515625, "learning_rate": 1.2741273136528097e-05, "loss": 0.5228, "step": 7190 }, { "epoch": 1.2283302672870902, "grad_norm": 3.03125, "learning_rate": 1.2739519100260004e-05, "loss": 0.4689, "step": 7191 }, { "epoch": 1.2285034087219997, "grad_norm": 1.5234375, "learning_rate": 1.2737764972863122e-05, "loss": 0.5505, "step": 7192 }, { "epoch": 1.2286765501569095, "grad_norm": 1.3671875, "learning_rate": 1.2736010754395799e-05, "loss": 0.5256, "step": 7193 }, { "epoch": 1.228849691591819, "grad_norm": 1.546875, "learning_rate": 1.2734256444916382e-05, "loss": 0.444, "step": 7194 }, { "epoch": 1.2290228330267288, "grad_norm": 1.390625, "learning_rate": 1.2732502044483236e-05, "loss": 0.4404, "step": 7195 }, { "epoch": 1.2291959744616383, "grad_norm": 1.390625, "learning_rate": 1.2730747553154712e-05, "loss": 0.496, "step": 7196 }, { "epoch": 1.229369115896548, "grad_norm": 1.6328125, "learning_rate": 1.272899297098918e-05, "loss": 0.4961, "step": 7197 }, { "epoch": 1.2295422573314576, "grad_norm": 1.3828125, "learning_rate": 1.2727238298045002e-05, "loss": 0.4167, "step": 7198 }, { "epoch": 1.2297153987663672, "grad_norm": 1.5078125, "learning_rate": 1.2725483534380548e-05, "loss": 0.4909, "step": 7199 }, { "epoch": 1.229888540201277, "grad_norm": 1.4765625, "learning_rate": 1.2723728680054186e-05, "loss": 0.5193, "step": 7200 }, { "epoch": 1.2300616816361867, "grad_norm": 1.3828125, "learning_rate": 1.2721973735124292e-05, "loss": 0.4781, "step": 7201 }, { "epoch": 1.2302348230710962, "grad_norm": 1.3671875, "learning_rate": 1.2720218699649243e-05, "loss": 0.3992, "step": 7202 }, { "epoch": 1.2304079645060058, "grad_norm": 1.609375, "learning_rate": 1.271846357368742e-05, "loss": 0.471, "step": 7203 }, { "epoch": 1.2305811059409155, "grad_norm": 1.4921875, "learning_rate": 1.271670835729721e-05, "loss": 0.4806, "step": 7204 }, { "epoch": 1.230754247375825, "grad_norm": 1.4375, "learning_rate": 1.2714953050536993e-05, "loss": 0.4396, "step": 7205 }, { "epoch": 1.2309273888107348, "grad_norm": 1.3359375, "learning_rate": 1.2713197653465162e-05, "loss": 0.4248, "step": 7206 }, { "epoch": 1.2311005302456444, "grad_norm": 1.5390625, "learning_rate": 1.2711442166140108e-05, "loss": 0.5348, "step": 7207 }, { "epoch": 1.2312736716805541, "grad_norm": 1.359375, "learning_rate": 1.2709686588620227e-05, "loss": 0.485, "step": 7208 }, { "epoch": 1.2314468131154637, "grad_norm": 1.421875, "learning_rate": 1.270793092096392e-05, "loss": 0.4974, "step": 7209 }, { "epoch": 1.2316199545503734, "grad_norm": 1.421875, "learning_rate": 1.2706175163229583e-05, "loss": 0.5528, "step": 7210 }, { "epoch": 1.231793095985283, "grad_norm": 1.375, "learning_rate": 1.2704419315475629e-05, "loss": 0.4534, "step": 7211 }, { "epoch": 1.2319662374201927, "grad_norm": 1.3984375, "learning_rate": 1.2702663377760453e-05, "loss": 0.4568, "step": 7212 }, { "epoch": 1.2321393788551023, "grad_norm": 1.296875, "learning_rate": 1.270090735014248e-05, "loss": 0.4859, "step": 7213 }, { "epoch": 1.2323125202900118, "grad_norm": 1.3359375, "learning_rate": 1.2699151232680111e-05, "loss": 0.4706, "step": 7214 }, { "epoch": 1.2324856617249216, "grad_norm": 1.34375, "learning_rate": 1.2697395025431767e-05, "loss": 0.4508, "step": 7215 }, { "epoch": 1.2326588031598311, "grad_norm": 1.4140625, "learning_rate": 1.269563872845587e-05, "loss": 0.5493, "step": 7216 }, { "epoch": 1.2328319445947409, "grad_norm": 1.3203125, "learning_rate": 1.2693882341810837e-05, "loss": 0.4554, "step": 7217 }, { "epoch": 1.2330050860296504, "grad_norm": 1.375, "learning_rate": 1.26921258655551e-05, "loss": 0.5363, "step": 7218 }, { "epoch": 1.2331782274645602, "grad_norm": 1.4296875, "learning_rate": 1.2690369299747082e-05, "loss": 0.5041, "step": 7219 }, { "epoch": 1.2333513688994697, "grad_norm": 1.4609375, "learning_rate": 1.2688612644445215e-05, "loss": 0.5048, "step": 7220 }, { "epoch": 1.2335245103343795, "grad_norm": 1.421875, "learning_rate": 1.2686855899707936e-05, "loss": 0.4485, "step": 7221 }, { "epoch": 1.233697651769289, "grad_norm": 1.34375, "learning_rate": 1.2685099065593683e-05, "loss": 0.4149, "step": 7222 }, { "epoch": 1.2338707932041988, "grad_norm": 1.4140625, "learning_rate": 1.2683342142160888e-05, "loss": 0.5115, "step": 7223 }, { "epoch": 1.2340439346391083, "grad_norm": 1.375, "learning_rate": 1.2681585129468003e-05, "loss": 0.4971, "step": 7224 }, { "epoch": 1.2342170760740179, "grad_norm": 1.4609375, "learning_rate": 1.2679828027573474e-05, "loss": 0.4974, "step": 7225 }, { "epoch": 1.2343902175089276, "grad_norm": 1.390625, "learning_rate": 1.2678070836535745e-05, "loss": 0.4973, "step": 7226 }, { "epoch": 1.2345633589438372, "grad_norm": 1.421875, "learning_rate": 1.2676313556413271e-05, "loss": 0.455, "step": 7227 }, { "epoch": 1.234736500378747, "grad_norm": 1.3671875, "learning_rate": 1.2674556187264506e-05, "loss": 0.502, "step": 7228 }, { "epoch": 1.2349096418136565, "grad_norm": 1.375, "learning_rate": 1.2672798729147909e-05, "loss": 0.5257, "step": 7229 }, { "epoch": 1.2350827832485662, "grad_norm": 1.390625, "learning_rate": 1.2671041182121941e-05, "loss": 0.5881, "step": 7230 }, { "epoch": 1.2352559246834758, "grad_norm": 1.3515625, "learning_rate": 1.2669283546245065e-05, "loss": 0.4912, "step": 7231 }, { "epoch": 1.2354290661183855, "grad_norm": 1.5546875, "learning_rate": 1.266752582157575e-05, "loss": 0.5622, "step": 7232 }, { "epoch": 1.235602207553295, "grad_norm": 1.2265625, "learning_rate": 1.2665768008172461e-05, "loss": 0.4405, "step": 7233 }, { "epoch": 1.2357753489882048, "grad_norm": 1.5, "learning_rate": 1.2664010106093679e-05, "loss": 0.5155, "step": 7234 }, { "epoch": 1.2359484904231144, "grad_norm": 1.390625, "learning_rate": 1.2662252115397872e-05, "loss": 0.4622, "step": 7235 }, { "epoch": 1.236121631858024, "grad_norm": 1.390625, "learning_rate": 1.2660494036143525e-05, "loss": 0.476, "step": 7236 }, { "epoch": 1.2362947732929337, "grad_norm": 1.5859375, "learning_rate": 1.2658735868389113e-05, "loss": 0.5135, "step": 7237 }, { "epoch": 1.2364679147278432, "grad_norm": 1.3515625, "learning_rate": 1.2656977612193127e-05, "loss": 0.462, "step": 7238 }, { "epoch": 1.236641056162753, "grad_norm": 1.3984375, "learning_rate": 1.2655219267614046e-05, "loss": 0.4473, "step": 7239 }, { "epoch": 1.2368141975976625, "grad_norm": 1.359375, "learning_rate": 1.2653460834710372e-05, "loss": 0.4243, "step": 7240 }, { "epoch": 1.2369873390325723, "grad_norm": 1.390625, "learning_rate": 1.265170231354059e-05, "loss": 0.4916, "step": 7241 }, { "epoch": 1.2371604804674818, "grad_norm": 1.4375, "learning_rate": 1.2649943704163202e-05, "loss": 0.4494, "step": 7242 }, { "epoch": 1.2373336219023916, "grad_norm": 1.3359375, "learning_rate": 1.2648185006636699e-05, "loss": 0.3856, "step": 7243 }, { "epoch": 1.2375067633373011, "grad_norm": 1.53125, "learning_rate": 1.2646426221019593e-05, "loss": 0.5126, "step": 7244 }, { "epoch": 1.237679904772211, "grad_norm": 1.4453125, "learning_rate": 1.2644667347370381e-05, "loss": 0.4637, "step": 7245 }, { "epoch": 1.2378530462071204, "grad_norm": 1.3515625, "learning_rate": 1.2642908385747575e-05, "loss": 0.4801, "step": 7246 }, { "epoch": 1.23802618764203, "grad_norm": 1.4765625, "learning_rate": 1.2641149336209688e-05, "loss": 0.4776, "step": 7247 }, { "epoch": 1.2381993290769397, "grad_norm": 1.359375, "learning_rate": 1.2639390198815232e-05, "loss": 0.4603, "step": 7248 }, { "epoch": 1.2383724705118493, "grad_norm": 1.453125, "learning_rate": 1.263763097362272e-05, "loss": 0.4691, "step": 7249 }, { "epoch": 1.238545611946759, "grad_norm": 1.453125, "learning_rate": 1.2635871660690677e-05, "loss": 0.4741, "step": 7250 }, { "epoch": 1.2387187533816686, "grad_norm": 1.390625, "learning_rate": 1.2634112260077627e-05, "loss": 0.4804, "step": 7251 }, { "epoch": 1.2388918948165784, "grad_norm": 1.4609375, "learning_rate": 1.2632352771842088e-05, "loss": 0.454, "step": 7252 }, { "epoch": 1.239065036251488, "grad_norm": 1.5, "learning_rate": 1.2630593196042596e-05, "loss": 0.5192, "step": 7253 }, { "epoch": 1.2392381776863977, "grad_norm": 1.375, "learning_rate": 1.262883353273768e-05, "loss": 0.5036, "step": 7254 }, { "epoch": 1.2394113191213072, "grad_norm": 1.34375, "learning_rate": 1.262707378198587e-05, "loss": 0.4446, "step": 7255 }, { "epoch": 1.239584460556217, "grad_norm": 1.484375, "learning_rate": 1.2625313943845711e-05, "loss": 0.4727, "step": 7256 }, { "epoch": 1.2397576019911265, "grad_norm": 1.4453125, "learning_rate": 1.262355401837574e-05, "loss": 0.4897, "step": 7257 }, { "epoch": 1.239930743426036, "grad_norm": 1.3828125, "learning_rate": 1.2621794005634496e-05, "loss": 0.4633, "step": 7258 }, { "epoch": 1.2401038848609458, "grad_norm": 1.390625, "learning_rate": 1.262003390568053e-05, "loss": 0.5828, "step": 7259 }, { "epoch": 1.2402770262958553, "grad_norm": 1.453125, "learning_rate": 1.2618273718572386e-05, "loss": 0.5241, "step": 7260 }, { "epoch": 1.240450167730765, "grad_norm": 1.4140625, "learning_rate": 1.2616513444368625e-05, "loss": 0.486, "step": 7261 }, { "epoch": 1.2406233091656746, "grad_norm": 1.3515625, "learning_rate": 1.2614753083127793e-05, "loss": 0.4671, "step": 7262 }, { "epoch": 1.2407964506005844, "grad_norm": 1.28125, "learning_rate": 1.2612992634908454e-05, "loss": 0.4252, "step": 7263 }, { "epoch": 1.240969592035494, "grad_norm": 1.3984375, "learning_rate": 1.261123209976916e-05, "loss": 0.5299, "step": 7264 }, { "epoch": 1.2411427334704037, "grad_norm": 1.3828125, "learning_rate": 1.260947147776848e-05, "loss": 0.4203, "step": 7265 }, { "epoch": 1.2413158749053133, "grad_norm": 1.4453125, "learning_rate": 1.260771076896498e-05, "loss": 0.4573, "step": 7266 }, { "epoch": 1.241489016340223, "grad_norm": 1.40625, "learning_rate": 1.2605949973417229e-05, "loss": 0.4755, "step": 7267 }, { "epoch": 1.2416621577751326, "grad_norm": 1.390625, "learning_rate": 1.26041890911838e-05, "loss": 0.4452, "step": 7268 }, { "epoch": 1.241835299210042, "grad_norm": 1.3046875, "learning_rate": 1.2602428122323265e-05, "loss": 0.4371, "step": 7269 }, { "epoch": 1.2420084406449519, "grad_norm": 1.3984375, "learning_rate": 1.2600667066894201e-05, "loss": 0.4494, "step": 7270 }, { "epoch": 1.2421815820798614, "grad_norm": 1.4296875, "learning_rate": 1.2598905924955194e-05, "loss": 0.5111, "step": 7271 }, { "epoch": 1.2423547235147712, "grad_norm": 1.4921875, "learning_rate": 1.2597144696564822e-05, "loss": 0.5087, "step": 7272 }, { "epoch": 1.2425278649496807, "grad_norm": 1.4765625, "learning_rate": 1.2595383381781678e-05, "loss": 0.5236, "step": 7273 }, { "epoch": 1.2427010063845905, "grad_norm": 1.3359375, "learning_rate": 1.2593621980664343e-05, "loss": 0.4483, "step": 7274 }, { "epoch": 1.2428741478195, "grad_norm": 1.3984375, "learning_rate": 1.2591860493271417e-05, "loss": 0.4943, "step": 7275 }, { "epoch": 1.2430472892544098, "grad_norm": 1.328125, "learning_rate": 1.259009891966149e-05, "loss": 0.4139, "step": 7276 }, { "epoch": 1.2432204306893193, "grad_norm": 1.3515625, "learning_rate": 1.2588337259893161e-05, "loss": 0.464, "step": 7277 }, { "epoch": 1.243393572124229, "grad_norm": 1.40625, "learning_rate": 1.258657551402503e-05, "loss": 0.4585, "step": 7278 }, { "epoch": 1.2435667135591386, "grad_norm": 1.4296875, "learning_rate": 1.2584813682115704e-05, "loss": 0.4995, "step": 7279 }, { "epoch": 1.2437398549940482, "grad_norm": 1.3359375, "learning_rate": 1.2583051764223787e-05, "loss": 0.4839, "step": 7280 }, { "epoch": 1.243912996428958, "grad_norm": 1.25, "learning_rate": 1.2581289760407886e-05, "loss": 0.3916, "step": 7281 }, { "epoch": 1.2440861378638675, "grad_norm": 1.3046875, "learning_rate": 1.2579527670726618e-05, "loss": 0.4702, "step": 7282 }, { "epoch": 1.2442592792987772, "grad_norm": 1.5625, "learning_rate": 1.2577765495238595e-05, "loss": 0.5419, "step": 7283 }, { "epoch": 1.2444324207336868, "grad_norm": 1.4375, "learning_rate": 1.2576003234002436e-05, "loss": 0.4941, "step": 7284 }, { "epoch": 1.2446055621685965, "grad_norm": 1.390625, "learning_rate": 1.2574240887076764e-05, "loss": 0.4616, "step": 7285 }, { "epoch": 1.244778703603506, "grad_norm": 1.359375, "learning_rate": 1.2572478454520195e-05, "loss": 0.4516, "step": 7286 }, { "epoch": 1.2449518450384158, "grad_norm": 1.4140625, "learning_rate": 1.2570715936391366e-05, "loss": 0.5289, "step": 7287 }, { "epoch": 1.2451249864733254, "grad_norm": 1.46875, "learning_rate": 1.2568953332748897e-05, "loss": 0.4926, "step": 7288 }, { "epoch": 1.2452981279082351, "grad_norm": 1.40625, "learning_rate": 1.2567190643651426e-05, "loss": 0.4578, "step": 7289 }, { "epoch": 1.2454712693431447, "grad_norm": 1.4375, "learning_rate": 1.2565427869157586e-05, "loss": 0.4333, "step": 7290 }, { "epoch": 1.2456444107780542, "grad_norm": 1.3125, "learning_rate": 1.2563665009326013e-05, "loss": 0.4204, "step": 7291 }, { "epoch": 1.245817552212964, "grad_norm": 1.375, "learning_rate": 1.2561902064215355e-05, "loss": 0.4519, "step": 7292 }, { "epoch": 1.2459906936478735, "grad_norm": 1.4140625, "learning_rate": 1.2560139033884244e-05, "loss": 0.5264, "step": 7293 }, { "epoch": 1.2461638350827833, "grad_norm": 1.3359375, "learning_rate": 1.2558375918391336e-05, "loss": 0.4963, "step": 7294 }, { "epoch": 1.2463369765176928, "grad_norm": 1.3984375, "learning_rate": 1.2556612717795276e-05, "loss": 0.4592, "step": 7295 }, { "epoch": 1.2465101179526026, "grad_norm": 1.4296875, "learning_rate": 1.2554849432154717e-05, "loss": 0.4754, "step": 7296 }, { "epoch": 1.2466832593875121, "grad_norm": 1.328125, "learning_rate": 1.2553086061528314e-05, "loss": 0.4397, "step": 7297 }, { "epoch": 1.2468564008224219, "grad_norm": 1.4765625, "learning_rate": 1.2551322605974722e-05, "loss": 0.5008, "step": 7298 }, { "epoch": 1.2470295422573314, "grad_norm": 1.3359375, "learning_rate": 1.2549559065552609e-05, "loss": 0.4461, "step": 7299 }, { "epoch": 1.2472026836922412, "grad_norm": 1.4140625, "learning_rate": 1.2547795440320626e-05, "loss": 0.5384, "step": 7300 }, { "epoch": 1.2473758251271507, "grad_norm": 1.4609375, "learning_rate": 1.2546031730337453e-05, "loss": 0.5012, "step": 7301 }, { "epoch": 1.2475489665620603, "grad_norm": 1.4765625, "learning_rate": 1.2544267935661751e-05, "loss": 0.4215, "step": 7302 }, { "epoch": 1.24772210799697, "grad_norm": 1.515625, "learning_rate": 1.2542504056352192e-05, "loss": 0.4667, "step": 7303 }, { "epoch": 1.2478952494318796, "grad_norm": 1.3515625, "learning_rate": 1.2540740092467449e-05, "loss": 0.4842, "step": 7304 }, { "epoch": 1.2480683908667893, "grad_norm": 1.390625, "learning_rate": 1.2538976044066204e-05, "loss": 0.4479, "step": 7305 }, { "epoch": 1.2482415323016989, "grad_norm": 1.390625, "learning_rate": 1.2537211911207139e-05, "loss": 0.4733, "step": 7306 }, { "epoch": 1.2484146737366086, "grad_norm": 1.3984375, "learning_rate": 1.253544769394893e-05, "loss": 0.4965, "step": 7307 }, { "epoch": 1.2485878151715182, "grad_norm": 1.6640625, "learning_rate": 1.2533683392350264e-05, "loss": 0.4901, "step": 7308 }, { "epoch": 1.248760956606428, "grad_norm": 1.3359375, "learning_rate": 1.2531919006469836e-05, "loss": 0.4729, "step": 7309 }, { "epoch": 1.2489340980413375, "grad_norm": 1.2890625, "learning_rate": 1.2530154536366331e-05, "loss": 0.3969, "step": 7310 }, { "epoch": 1.2491072394762472, "grad_norm": 1.4765625, "learning_rate": 1.2528389982098448e-05, "loss": 0.5, "step": 7311 }, { "epoch": 1.2492803809111568, "grad_norm": 1.5625, "learning_rate": 1.2526625343724879e-05, "loss": 0.4722, "step": 7312 }, { "epoch": 1.2494535223460663, "grad_norm": 1.625, "learning_rate": 1.2524860621304327e-05, "loss": 0.4785, "step": 7313 }, { "epoch": 1.249626663780976, "grad_norm": 1.5703125, "learning_rate": 1.2523095814895494e-05, "loss": 0.4764, "step": 7314 }, { "epoch": 1.2497998052158859, "grad_norm": 1.6953125, "learning_rate": 1.2521330924557087e-05, "loss": 0.5105, "step": 7315 }, { "epoch": 1.2499729466507954, "grad_norm": 1.3984375, "learning_rate": 1.251956595034781e-05, "loss": 0.4574, "step": 7316 }, { "epoch": 1.250146088085705, "grad_norm": 1.359375, "learning_rate": 1.251780089232638e-05, "loss": 0.4455, "step": 7317 }, { "epoch": 1.2503192295206147, "grad_norm": 1.421875, "learning_rate": 1.2516035750551504e-05, "loss": 0.4975, "step": 7318 }, { "epoch": 1.2504923709555242, "grad_norm": 1.4140625, "learning_rate": 1.2514270525081907e-05, "loss": 0.4751, "step": 7319 }, { "epoch": 1.250665512390434, "grad_norm": 1.375, "learning_rate": 1.2512505215976298e-05, "loss": 0.5033, "step": 7320 }, { "epoch": 1.2508386538253435, "grad_norm": 1.4296875, "learning_rate": 1.251073982329341e-05, "loss": 0.4899, "step": 7321 }, { "epoch": 1.2510117952602533, "grad_norm": 1.4140625, "learning_rate": 1.250897434709196e-05, "loss": 0.4581, "step": 7322 }, { "epoch": 1.2511849366951628, "grad_norm": 1.4609375, "learning_rate": 1.250720878743068e-05, "loss": 0.4318, "step": 7323 }, { "epoch": 1.2513580781300724, "grad_norm": 1.46875, "learning_rate": 1.2505443144368297e-05, "loss": 0.4493, "step": 7324 }, { "epoch": 1.2515312195649821, "grad_norm": 1.390625, "learning_rate": 1.2503677417963547e-05, "loss": 0.4546, "step": 7325 }, { "epoch": 1.251704360999892, "grad_norm": 1.3984375, "learning_rate": 1.2501911608275164e-05, "loss": 0.4952, "step": 7326 }, { "epoch": 1.2518775024348014, "grad_norm": 1.4609375, "learning_rate": 1.2500145715361891e-05, "loss": 0.4145, "step": 7327 }, { "epoch": 1.252050643869711, "grad_norm": 1.46875, "learning_rate": 1.2498379739282465e-05, "loss": 0.4428, "step": 7328 }, { "epoch": 1.2522237853046208, "grad_norm": 1.3984375, "learning_rate": 1.249661368009563e-05, "loss": 0.4744, "step": 7329 }, { "epoch": 1.2523969267395303, "grad_norm": 1.3671875, "learning_rate": 1.249484753786014e-05, "loss": 0.4707, "step": 7330 }, { "epoch": 1.25257006817444, "grad_norm": 1.3671875, "learning_rate": 1.2493081312634733e-05, "loss": 0.4705, "step": 7331 }, { "epoch": 1.2527432096093496, "grad_norm": 1.390625, "learning_rate": 1.2491315004478175e-05, "loss": 0.4563, "step": 7332 }, { "epoch": 1.2529163510442594, "grad_norm": 1.40625, "learning_rate": 1.2489548613449212e-05, "loss": 0.4599, "step": 7333 }, { "epoch": 1.253089492479169, "grad_norm": 1.421875, "learning_rate": 1.2487782139606605e-05, "loss": 0.4406, "step": 7334 }, { "epoch": 1.2532626339140784, "grad_norm": 1.390625, "learning_rate": 1.2486015583009118e-05, "loss": 0.4835, "step": 7335 }, { "epoch": 1.2534357753489882, "grad_norm": 1.3828125, "learning_rate": 1.2484248943715507e-05, "loss": 0.5271, "step": 7336 }, { "epoch": 1.253608916783898, "grad_norm": 1.375, "learning_rate": 1.2482482221784546e-05, "loss": 0.4596, "step": 7337 }, { "epoch": 1.2537820582188075, "grad_norm": 1.2890625, "learning_rate": 1.2480715417275003e-05, "loss": 0.4244, "step": 7338 }, { "epoch": 1.253955199653717, "grad_norm": 1.34375, "learning_rate": 1.2478948530245647e-05, "loss": 0.5062, "step": 7339 }, { "epoch": 1.2541283410886268, "grad_norm": 1.3828125, "learning_rate": 1.2477181560755253e-05, "loss": 0.4908, "step": 7340 }, { "epoch": 1.2543014825235363, "grad_norm": 1.3359375, "learning_rate": 1.2475414508862598e-05, "loss": 0.5248, "step": 7341 }, { "epoch": 1.2544746239584461, "grad_norm": 1.375, "learning_rate": 1.2473647374626466e-05, "loss": 0.4434, "step": 7342 }, { "epoch": 1.2546477653933557, "grad_norm": 1.3203125, "learning_rate": 1.2471880158105634e-05, "loss": 0.4654, "step": 7343 }, { "epoch": 1.2548209068282654, "grad_norm": 1.484375, "learning_rate": 1.2470112859358897e-05, "loss": 0.5071, "step": 7344 }, { "epoch": 1.254994048263175, "grad_norm": 1.4453125, "learning_rate": 1.2468345478445029e-05, "loss": 0.4495, "step": 7345 }, { "epoch": 1.2551671896980845, "grad_norm": 1.453125, "learning_rate": 1.2466578015422835e-05, "loss": 0.4178, "step": 7346 }, { "epoch": 1.2553403311329943, "grad_norm": 1.3515625, "learning_rate": 1.24648104703511e-05, "loss": 0.4208, "step": 7347 }, { "epoch": 1.255513472567904, "grad_norm": 1.4140625, "learning_rate": 1.2463042843288627e-05, "loss": 0.4939, "step": 7348 }, { "epoch": 1.2556866140028136, "grad_norm": 1.484375, "learning_rate": 1.2461275134294211e-05, "loss": 0.4674, "step": 7349 }, { "epoch": 1.255859755437723, "grad_norm": 1.46875, "learning_rate": 1.2459507343426653e-05, "loss": 0.4776, "step": 7350 }, { "epoch": 1.2560328968726329, "grad_norm": 1.3203125, "learning_rate": 1.2457739470744763e-05, "loss": 0.4744, "step": 7351 }, { "epoch": 1.2562060383075424, "grad_norm": 1.3828125, "learning_rate": 1.2455971516307339e-05, "loss": 0.4698, "step": 7352 }, { "epoch": 1.2563791797424522, "grad_norm": 1.34375, "learning_rate": 1.2454203480173203e-05, "loss": 0.4748, "step": 7353 }, { "epoch": 1.2565523211773617, "grad_norm": 1.4921875, "learning_rate": 1.2452435362401161e-05, "loss": 0.5059, "step": 7354 }, { "epoch": 1.2567254626122715, "grad_norm": 1.4375, "learning_rate": 1.2450667163050028e-05, "loss": 0.4823, "step": 7355 }, { "epoch": 1.256898604047181, "grad_norm": 1.4921875, "learning_rate": 1.2448898882178626e-05, "loss": 0.5339, "step": 7356 }, { "epoch": 1.2570717454820906, "grad_norm": 1.421875, "learning_rate": 1.2447130519845773e-05, "loss": 0.4398, "step": 7357 }, { "epoch": 1.2572448869170003, "grad_norm": 1.328125, "learning_rate": 1.2445362076110295e-05, "loss": 0.5125, "step": 7358 }, { "epoch": 1.25741802835191, "grad_norm": 1.359375, "learning_rate": 1.2443593551031014e-05, "loss": 0.454, "step": 7359 }, { "epoch": 1.2575911697868196, "grad_norm": 1.3046875, "learning_rate": 1.2441824944666763e-05, "loss": 0.4202, "step": 7360 }, { "epoch": 1.2577643112217292, "grad_norm": 1.4296875, "learning_rate": 1.2440056257076376e-05, "loss": 0.4432, "step": 7361 }, { "epoch": 1.257937452656639, "grad_norm": 1.3203125, "learning_rate": 1.2438287488318684e-05, "loss": 0.4049, "step": 7362 }, { "epoch": 1.2581105940915485, "grad_norm": 1.3984375, "learning_rate": 1.2436518638452523e-05, "loss": 0.4256, "step": 7363 }, { "epoch": 1.2582837355264582, "grad_norm": 1.28125, "learning_rate": 1.2434749707536735e-05, "loss": 0.4602, "step": 7364 }, { "epoch": 1.2584568769613678, "grad_norm": 1.34375, "learning_rate": 1.2432980695630166e-05, "loss": 0.4162, "step": 7365 }, { "epoch": 1.2586300183962775, "grad_norm": 1.4453125, "learning_rate": 1.2431211602791655e-05, "loss": 0.4679, "step": 7366 }, { "epoch": 1.258803159831187, "grad_norm": 1.2890625, "learning_rate": 1.2429442429080054e-05, "loss": 0.4815, "step": 7367 }, { "epoch": 1.2589763012660966, "grad_norm": 1.4140625, "learning_rate": 1.2427673174554213e-05, "loss": 0.4485, "step": 7368 }, { "epoch": 1.2591494427010064, "grad_norm": 1.46875, "learning_rate": 1.2425903839272981e-05, "loss": 0.4895, "step": 7369 }, { "epoch": 1.2593225841359161, "grad_norm": 1.390625, "learning_rate": 1.2424134423295225e-05, "loss": 0.4462, "step": 7370 }, { "epoch": 1.2594957255708257, "grad_norm": 1.3359375, "learning_rate": 1.242236492667979e-05, "loss": 0.4561, "step": 7371 }, { "epoch": 1.2596688670057352, "grad_norm": 1.3515625, "learning_rate": 1.2420595349485547e-05, "loss": 0.4723, "step": 7372 }, { "epoch": 1.259842008440645, "grad_norm": 1.5234375, "learning_rate": 1.2418825691771361e-05, "loss": 0.5083, "step": 7373 }, { "epoch": 1.2600151498755545, "grad_norm": 1.3046875, "learning_rate": 1.2417055953596089e-05, "loss": 0.4473, "step": 7374 }, { "epoch": 1.2601882913104643, "grad_norm": 1.3515625, "learning_rate": 1.2415286135018612e-05, "loss": 0.4821, "step": 7375 }, { "epoch": 1.2603614327453738, "grad_norm": 1.390625, "learning_rate": 1.2413516236097796e-05, "loss": 0.415, "step": 7376 }, { "epoch": 1.2605345741802836, "grad_norm": 1.484375, "learning_rate": 1.2411746256892515e-05, "loss": 0.4471, "step": 7377 }, { "epoch": 1.2607077156151931, "grad_norm": 1.4609375, "learning_rate": 1.2409976197461651e-05, "loss": 0.4696, "step": 7378 }, { "epoch": 1.2608808570501027, "grad_norm": 1.5859375, "learning_rate": 1.2408206057864079e-05, "loss": 0.5176, "step": 7379 }, { "epoch": 1.2610539984850124, "grad_norm": 1.375, "learning_rate": 1.2406435838158686e-05, "loss": 0.4966, "step": 7380 }, { "epoch": 1.2612271399199222, "grad_norm": 1.4921875, "learning_rate": 1.2404665538404354e-05, "loss": 0.551, "step": 7381 }, { "epoch": 1.2614002813548317, "grad_norm": 1.625, "learning_rate": 1.2402895158659975e-05, "loss": 0.4677, "step": 7382 }, { "epoch": 1.2615734227897413, "grad_norm": 1.3203125, "learning_rate": 1.2401124698984437e-05, "loss": 0.4593, "step": 7383 }, { "epoch": 1.261746564224651, "grad_norm": 1.3125, "learning_rate": 1.2399354159436633e-05, "loss": 0.5281, "step": 7384 }, { "epoch": 1.2619197056595606, "grad_norm": 1.4375, "learning_rate": 1.2397583540075461e-05, "loss": 0.4806, "step": 7385 }, { "epoch": 1.2620928470944703, "grad_norm": 1.375, "learning_rate": 1.2395812840959823e-05, "loss": 0.49, "step": 7386 }, { "epoch": 1.2622659885293799, "grad_norm": 1.3515625, "learning_rate": 1.2394042062148615e-05, "loss": 0.4772, "step": 7387 }, { "epoch": 1.2624391299642896, "grad_norm": 1.6171875, "learning_rate": 1.2392271203700742e-05, "loss": 0.5559, "step": 7388 }, { "epoch": 1.2626122713991992, "grad_norm": 1.3671875, "learning_rate": 1.2390500265675112e-05, "loss": 0.4122, "step": 7389 }, { "epoch": 1.262785412834109, "grad_norm": 1.3359375, "learning_rate": 1.2388729248130633e-05, "loss": 0.4666, "step": 7390 }, { "epoch": 1.2629585542690185, "grad_norm": 1.375, "learning_rate": 1.2386958151126221e-05, "loss": 0.4828, "step": 7391 }, { "epoch": 1.2631316957039282, "grad_norm": 1.3203125, "learning_rate": 1.2385186974720784e-05, "loss": 0.4144, "step": 7392 }, { "epoch": 1.2633048371388378, "grad_norm": 1.3515625, "learning_rate": 1.2383415718973245e-05, "loss": 0.4803, "step": 7393 }, { "epoch": 1.2634779785737473, "grad_norm": 1.3515625, "learning_rate": 1.2381644383942524e-05, "loss": 0.4425, "step": 7394 }, { "epoch": 1.263651120008657, "grad_norm": 1.484375, "learning_rate": 1.2379872969687538e-05, "loss": 0.474, "step": 7395 }, { "epoch": 1.2638242614435666, "grad_norm": 1.4140625, "learning_rate": 1.2378101476267218e-05, "loss": 0.4983, "step": 7396 }, { "epoch": 1.2639974028784764, "grad_norm": 1.265625, "learning_rate": 1.237632990374049e-05, "loss": 0.4146, "step": 7397 }, { "epoch": 1.264170544313386, "grad_norm": 1.46875, "learning_rate": 1.2374558252166283e-05, "loss": 0.4925, "step": 7398 }, { "epoch": 1.2643436857482957, "grad_norm": 1.3984375, "learning_rate": 1.2372786521603536e-05, "loss": 0.4418, "step": 7399 }, { "epoch": 1.2645168271832052, "grad_norm": 1.2734375, "learning_rate": 1.2371014712111174e-05, "loss": 0.453, "step": 7400 }, { "epoch": 1.264689968618115, "grad_norm": 1.2578125, "learning_rate": 1.2369242823748144e-05, "loss": 0.4171, "step": 7401 }, { "epoch": 1.2648631100530245, "grad_norm": 1.40625, "learning_rate": 1.2367470856573385e-05, "loss": 0.5203, "step": 7402 }, { "epoch": 1.2650362514879343, "grad_norm": 1.4609375, "learning_rate": 1.2365698810645844e-05, "loss": 0.5641, "step": 7403 }, { "epoch": 1.2652093929228438, "grad_norm": 1.4609375, "learning_rate": 1.2363926686024459e-05, "loss": 0.4745, "step": 7404 }, { "epoch": 1.2653825343577534, "grad_norm": 1.4140625, "learning_rate": 1.2362154482768184e-05, "loss": 0.4266, "step": 7405 }, { "epoch": 1.2655556757926631, "grad_norm": 1.28125, "learning_rate": 1.2360382200935971e-05, "loss": 0.4411, "step": 7406 }, { "epoch": 1.265728817227573, "grad_norm": 1.46875, "learning_rate": 1.2358609840586773e-05, "loss": 0.4958, "step": 7407 }, { "epoch": 1.2659019586624825, "grad_norm": 1.546875, "learning_rate": 1.2356837401779546e-05, "loss": 0.4906, "step": 7408 }, { "epoch": 1.266075100097392, "grad_norm": 1.3828125, "learning_rate": 1.235506488457325e-05, "loss": 0.4254, "step": 7409 }, { "epoch": 1.2662482415323018, "grad_norm": 1.3828125, "learning_rate": 1.235329228902685e-05, "loss": 0.475, "step": 7410 }, { "epoch": 1.2664213829672113, "grad_norm": 1.515625, "learning_rate": 1.2351519615199305e-05, "loss": 0.4478, "step": 7411 }, { "epoch": 1.266594524402121, "grad_norm": 1.421875, "learning_rate": 1.2349746863149586e-05, "loss": 0.5049, "step": 7412 }, { "epoch": 1.2667676658370306, "grad_norm": 1.4296875, "learning_rate": 1.2347974032936661e-05, "loss": 0.4919, "step": 7413 }, { "epoch": 1.2669408072719404, "grad_norm": 1.3203125, "learning_rate": 1.2346201124619502e-05, "loss": 0.4037, "step": 7414 }, { "epoch": 1.26711394870685, "grad_norm": 1.4140625, "learning_rate": 1.2344428138257088e-05, "loss": 0.5178, "step": 7415 }, { "epoch": 1.2672870901417594, "grad_norm": 1.375, "learning_rate": 1.234265507390839e-05, "loss": 0.4909, "step": 7416 }, { "epoch": 1.2674602315766692, "grad_norm": 1.3984375, "learning_rate": 1.2340881931632391e-05, "loss": 0.4829, "step": 7417 }, { "epoch": 1.267633373011579, "grad_norm": 1.328125, "learning_rate": 1.2339108711488077e-05, "loss": 0.4344, "step": 7418 }, { "epoch": 1.2678065144464885, "grad_norm": 1.4765625, "learning_rate": 1.2337335413534428e-05, "loss": 0.4898, "step": 7419 }, { "epoch": 1.267979655881398, "grad_norm": 1.3359375, "learning_rate": 1.2335562037830438e-05, "loss": 0.4173, "step": 7420 }, { "epoch": 1.2681527973163078, "grad_norm": 1.4375, "learning_rate": 1.2333788584435089e-05, "loss": 0.509, "step": 7421 }, { "epoch": 1.2683259387512174, "grad_norm": 1.421875, "learning_rate": 1.2332015053407383e-05, "loss": 0.4988, "step": 7422 }, { "epoch": 1.2684990801861271, "grad_norm": 1.5078125, "learning_rate": 1.233024144480631e-05, "loss": 0.4931, "step": 7423 }, { "epoch": 1.2686722216210367, "grad_norm": 1.40625, "learning_rate": 1.2328467758690869e-05, "loss": 0.4075, "step": 7424 }, { "epoch": 1.2688453630559464, "grad_norm": 1.3125, "learning_rate": 1.2326693995120066e-05, "loss": 0.4899, "step": 7425 }, { "epoch": 1.269018504490856, "grad_norm": 1.5078125, "learning_rate": 1.2324920154152896e-05, "loss": 0.4704, "step": 7426 }, { "epoch": 1.2691916459257655, "grad_norm": 1.4296875, "learning_rate": 1.232314623584837e-05, "loss": 0.4645, "step": 7427 }, { "epoch": 1.2693647873606753, "grad_norm": 1.421875, "learning_rate": 1.2321372240265495e-05, "loss": 0.503, "step": 7428 }, { "epoch": 1.269537928795585, "grad_norm": 1.5390625, "learning_rate": 1.2319598167463285e-05, "loss": 0.4714, "step": 7429 }, { "epoch": 1.2697110702304946, "grad_norm": 1.453125, "learning_rate": 1.2317824017500751e-05, "loss": 0.4811, "step": 7430 }, { "epoch": 1.269884211665404, "grad_norm": 1.3359375, "learning_rate": 1.2316049790436905e-05, "loss": 0.4332, "step": 7431 }, { "epoch": 1.2700573531003139, "grad_norm": 1.421875, "learning_rate": 1.2314275486330778e-05, "loss": 0.4847, "step": 7432 }, { "epoch": 1.2702304945352234, "grad_norm": 1.4140625, "learning_rate": 1.2312501105241381e-05, "loss": 0.5007, "step": 7433 }, { "epoch": 1.2704036359701332, "grad_norm": 1.546875, "learning_rate": 1.231072664722774e-05, "loss": 0.5186, "step": 7434 }, { "epoch": 1.2705767774050427, "grad_norm": 1.3515625, "learning_rate": 1.2308952112348882e-05, "loss": 0.4644, "step": 7435 }, { "epoch": 1.2707499188399525, "grad_norm": 1.515625, "learning_rate": 1.2307177500663836e-05, "loss": 0.4996, "step": 7436 }, { "epoch": 1.270923060274862, "grad_norm": 1.296875, "learning_rate": 1.2305402812231634e-05, "loss": 0.3974, "step": 7437 }, { "epoch": 1.2710962017097716, "grad_norm": 1.34375, "learning_rate": 1.2303628047111312e-05, "loss": 0.442, "step": 7438 }, { "epoch": 1.2712693431446813, "grad_norm": 1.3203125, "learning_rate": 1.2301853205361904e-05, "loss": 0.4239, "step": 7439 }, { "epoch": 1.271442484579591, "grad_norm": 1.375, "learning_rate": 1.230007828704245e-05, "loss": 0.4851, "step": 7440 }, { "epoch": 1.2716156260145006, "grad_norm": 1.46875, "learning_rate": 1.2298303292211993e-05, "loss": 0.5252, "step": 7441 }, { "epoch": 1.2717887674494102, "grad_norm": 1.3046875, "learning_rate": 1.2296528220929576e-05, "loss": 0.3859, "step": 7442 }, { "epoch": 1.27196190888432, "grad_norm": 1.3515625, "learning_rate": 1.2294753073254244e-05, "loss": 0.4542, "step": 7443 }, { "epoch": 1.2721350503192295, "grad_norm": 1.390625, "learning_rate": 1.229297784924505e-05, "loss": 0.4992, "step": 7444 }, { "epoch": 1.2723081917541392, "grad_norm": 1.4609375, "learning_rate": 1.2291202548961042e-05, "loss": 0.4759, "step": 7445 }, { "epoch": 1.2724813331890488, "grad_norm": 1.609375, "learning_rate": 1.2289427172461283e-05, "loss": 0.5492, "step": 7446 }, { "epoch": 1.2726544746239585, "grad_norm": 1.5, "learning_rate": 1.2287651719804821e-05, "loss": 0.4741, "step": 7447 }, { "epoch": 1.272827616058868, "grad_norm": 1.46875, "learning_rate": 1.2285876191050719e-05, "loss": 0.4746, "step": 7448 }, { "epoch": 1.2730007574937776, "grad_norm": 1.5078125, "learning_rate": 1.228410058625804e-05, "loss": 0.5436, "step": 7449 }, { "epoch": 1.2731738989286874, "grad_norm": 1.4453125, "learning_rate": 1.2282324905485842e-05, "loss": 0.4806, "step": 7450 }, { "epoch": 1.2733470403635971, "grad_norm": 1.4375, "learning_rate": 1.2280549148793205e-05, "loss": 0.5372, "step": 7451 }, { "epoch": 1.2735201817985067, "grad_norm": 1.359375, "learning_rate": 1.2278773316239188e-05, "loss": 0.4258, "step": 7452 }, { "epoch": 1.2736933232334162, "grad_norm": 1.4921875, "learning_rate": 1.227699740788287e-05, "loss": 0.521, "step": 7453 }, { "epoch": 1.273866464668326, "grad_norm": 1.296875, "learning_rate": 1.2275221423783318e-05, "loss": 0.4538, "step": 7454 }, { "epoch": 1.2740396061032355, "grad_norm": 1.390625, "learning_rate": 1.2273445363999614e-05, "loss": 0.4172, "step": 7455 }, { "epoch": 1.2742127475381453, "grad_norm": 1.4453125, "learning_rate": 1.227166922859084e-05, "loss": 0.4718, "step": 7456 }, { "epoch": 1.2743858889730548, "grad_norm": 1.3125, "learning_rate": 1.226989301761607e-05, "loss": 0.3942, "step": 7457 }, { "epoch": 1.2745590304079646, "grad_norm": 1.4140625, "learning_rate": 1.22681167311344e-05, "loss": 0.4464, "step": 7458 }, { "epoch": 1.2747321718428741, "grad_norm": 1.578125, "learning_rate": 1.2266340369204912e-05, "loss": 0.5117, "step": 7459 }, { "epoch": 1.2749053132777837, "grad_norm": 1.4296875, "learning_rate": 1.2264563931886692e-05, "loss": 0.47, "step": 7460 }, { "epoch": 1.2750784547126934, "grad_norm": 1.390625, "learning_rate": 1.2262787419238837e-05, "loss": 0.4647, "step": 7461 }, { "epoch": 1.2752515961476032, "grad_norm": 1.375, "learning_rate": 1.2261010831320443e-05, "loss": 0.4689, "step": 7462 }, { "epoch": 1.2754247375825127, "grad_norm": 1.4140625, "learning_rate": 1.2259234168190605e-05, "loss": 0.5264, "step": 7463 }, { "epoch": 1.2755978790174223, "grad_norm": 1.375, "learning_rate": 1.2257457429908421e-05, "loss": 0.4385, "step": 7464 }, { "epoch": 1.275771020452332, "grad_norm": 1.4453125, "learning_rate": 1.2255680616532999e-05, "loss": 0.497, "step": 7465 }, { "epoch": 1.2759441618872416, "grad_norm": 1.328125, "learning_rate": 1.2253903728123434e-05, "loss": 0.4591, "step": 7466 }, { "epoch": 1.2761173033221513, "grad_norm": 1.265625, "learning_rate": 1.2252126764738845e-05, "loss": 0.424, "step": 7467 }, { "epoch": 1.2762904447570609, "grad_norm": 1.3515625, "learning_rate": 1.2250349726438335e-05, "loss": 0.5003, "step": 7468 }, { "epoch": 1.2764635861919706, "grad_norm": 1.4453125, "learning_rate": 1.2248572613281015e-05, "loss": 0.4894, "step": 7469 }, { "epoch": 1.2766367276268802, "grad_norm": 1.4453125, "learning_rate": 1.2246795425326008e-05, "loss": 0.4249, "step": 7470 }, { "epoch": 1.2768098690617897, "grad_norm": 1.515625, "learning_rate": 1.2245018162632421e-05, "loss": 0.5369, "step": 7471 }, { "epoch": 1.2769830104966995, "grad_norm": 1.359375, "learning_rate": 1.2243240825259382e-05, "loss": 0.4379, "step": 7472 }, { "epoch": 1.2771561519316093, "grad_norm": 1.359375, "learning_rate": 1.2241463413266006e-05, "loss": 0.5172, "step": 7473 }, { "epoch": 1.2773292933665188, "grad_norm": 1.4453125, "learning_rate": 1.2239685926711424e-05, "loss": 0.466, "step": 7474 }, { "epoch": 1.2775024348014283, "grad_norm": 1.546875, "learning_rate": 1.2237908365654762e-05, "loss": 0.4475, "step": 7475 }, { "epoch": 1.277675576236338, "grad_norm": 1.46875, "learning_rate": 1.2236130730155147e-05, "loss": 0.4979, "step": 7476 }, { "epoch": 1.2778487176712476, "grad_norm": 1.34375, "learning_rate": 1.2234353020271715e-05, "loss": 0.4169, "step": 7477 }, { "epoch": 1.2780218591061574, "grad_norm": 1.453125, "learning_rate": 1.2232575236063595e-05, "loss": 0.4979, "step": 7478 }, { "epoch": 1.278195000541067, "grad_norm": 1.4140625, "learning_rate": 1.2230797377589932e-05, "loss": 0.4519, "step": 7479 }, { "epoch": 1.2783681419759767, "grad_norm": 1.375, "learning_rate": 1.2229019444909857e-05, "loss": 0.4762, "step": 7480 }, { "epoch": 1.2785412834108862, "grad_norm": 1.3828125, "learning_rate": 1.2227241438082517e-05, "loss": 0.599, "step": 7481 }, { "epoch": 1.2787144248457958, "grad_norm": 1.3671875, "learning_rate": 1.2225463357167057e-05, "loss": 0.5345, "step": 7482 }, { "epoch": 1.2788875662807055, "grad_norm": 1.375, "learning_rate": 1.2223685202222623e-05, "loss": 0.5609, "step": 7483 }, { "epoch": 1.2790607077156153, "grad_norm": 1.3828125, "learning_rate": 1.2221906973308365e-05, "loss": 0.4375, "step": 7484 }, { "epoch": 1.2792338491505248, "grad_norm": 1.4375, "learning_rate": 1.2220128670483434e-05, "loss": 0.4399, "step": 7485 }, { "epoch": 1.2794069905854344, "grad_norm": 1.3671875, "learning_rate": 1.2218350293806984e-05, "loss": 0.4525, "step": 7486 }, { "epoch": 1.2795801320203442, "grad_norm": 1.40625, "learning_rate": 1.2216571843338174e-05, "loss": 0.4943, "step": 7487 }, { "epoch": 1.2797532734552537, "grad_norm": 1.3359375, "learning_rate": 1.2214793319136158e-05, "loss": 0.4653, "step": 7488 }, { "epoch": 1.2799264148901635, "grad_norm": 1.4140625, "learning_rate": 1.2213014721260106e-05, "loss": 0.4494, "step": 7489 }, { "epoch": 1.280099556325073, "grad_norm": 1.3515625, "learning_rate": 1.2211236049769175e-05, "loss": 0.4994, "step": 7490 }, { "epoch": 1.2802726977599828, "grad_norm": 1.703125, "learning_rate": 1.2209457304722538e-05, "loss": 0.4651, "step": 7491 }, { "epoch": 1.2804458391948923, "grad_norm": 1.5625, "learning_rate": 1.2207678486179358e-05, "loss": 0.481, "step": 7492 }, { "epoch": 1.2806189806298018, "grad_norm": 1.34375, "learning_rate": 1.2205899594198808e-05, "loss": 0.4297, "step": 7493 }, { "epoch": 1.2807921220647116, "grad_norm": 1.453125, "learning_rate": 1.2204120628840066e-05, "loss": 0.5195, "step": 7494 }, { "epoch": 1.2809652634996214, "grad_norm": 1.59375, "learning_rate": 1.22023415901623e-05, "loss": 0.4811, "step": 7495 }, { "epoch": 1.281138404934531, "grad_norm": 1.3515625, "learning_rate": 1.2200562478224703e-05, "loss": 0.4689, "step": 7496 }, { "epoch": 1.2813115463694404, "grad_norm": 1.3671875, "learning_rate": 1.2198783293086442e-05, "loss": 0.5172, "step": 7497 }, { "epoch": 1.2814846878043502, "grad_norm": 1.4453125, "learning_rate": 1.2197004034806708e-05, "loss": 0.4805, "step": 7498 }, { "epoch": 1.2816578292392597, "grad_norm": 1.5703125, "learning_rate": 1.2195224703444683e-05, "loss": 0.4586, "step": 7499 }, { "epoch": 1.2818309706741695, "grad_norm": 1.421875, "learning_rate": 1.219344529905956e-05, "loss": 0.4565, "step": 7500 }, { "epoch": 1.282004112109079, "grad_norm": 1.3125, "learning_rate": 1.2191665821710533e-05, "loss": 0.5185, "step": 7501 }, { "epoch": 1.2821772535439888, "grad_norm": 1.4296875, "learning_rate": 1.2189886271456785e-05, "loss": 0.5115, "step": 7502 }, { "epoch": 1.2823503949788984, "grad_norm": 1.421875, "learning_rate": 1.2188106648357521e-05, "loss": 0.5296, "step": 7503 }, { "epoch": 1.282523536413808, "grad_norm": 1.3359375, "learning_rate": 1.2186326952471932e-05, "loss": 0.4922, "step": 7504 }, { "epoch": 1.2826966778487177, "grad_norm": 1.40625, "learning_rate": 1.2184547183859225e-05, "loss": 0.4598, "step": 7505 }, { "epoch": 1.2828698192836274, "grad_norm": 1.6015625, "learning_rate": 1.21827673425786e-05, "loss": 0.5569, "step": 7506 }, { "epoch": 1.283042960718537, "grad_norm": 1.265625, "learning_rate": 1.2180987428689263e-05, "loss": 0.3693, "step": 7507 }, { "epoch": 1.2832161021534465, "grad_norm": 1.3125, "learning_rate": 1.2179207442250428e-05, "loss": 0.4324, "step": 7508 }, { "epoch": 1.2833892435883563, "grad_norm": 1.515625, "learning_rate": 1.217742738332129e-05, "loss": 0.4698, "step": 7509 }, { "epoch": 1.2835623850232658, "grad_norm": 1.390625, "learning_rate": 1.217564725196108e-05, "loss": 0.4005, "step": 7510 }, { "epoch": 1.2837355264581756, "grad_norm": 1.4140625, "learning_rate": 1.2173867048229e-05, "loss": 0.4534, "step": 7511 }, { "epoch": 1.283908667893085, "grad_norm": 1.3671875, "learning_rate": 1.2172086772184273e-05, "loss": 0.5269, "step": 7512 }, { "epoch": 1.2840818093279949, "grad_norm": 1.34375, "learning_rate": 1.2170306423886123e-05, "loss": 0.4364, "step": 7513 }, { "epoch": 1.2842549507629044, "grad_norm": 1.515625, "learning_rate": 1.2168526003393763e-05, "loss": 0.4936, "step": 7514 }, { "epoch": 1.284428092197814, "grad_norm": 1.375, "learning_rate": 1.2166745510766425e-05, "loss": 0.4827, "step": 7515 }, { "epoch": 1.2846012336327237, "grad_norm": 1.59375, "learning_rate": 1.2164964946063331e-05, "loss": 0.5024, "step": 7516 }, { "epoch": 1.2847743750676335, "grad_norm": 1.453125, "learning_rate": 1.2163184309343719e-05, "loss": 0.4402, "step": 7517 }, { "epoch": 1.284947516502543, "grad_norm": 1.3515625, "learning_rate": 1.216140360066681e-05, "loss": 0.4394, "step": 7518 }, { "epoch": 1.2851206579374526, "grad_norm": 1.375, "learning_rate": 1.2159622820091848e-05, "loss": 0.4982, "step": 7519 }, { "epoch": 1.2852937993723623, "grad_norm": 1.4453125, "learning_rate": 1.2157841967678064e-05, "loss": 0.4722, "step": 7520 }, { "epoch": 1.2854669408072719, "grad_norm": 1.3828125, "learning_rate": 1.2156061043484697e-05, "loss": 0.433, "step": 7521 }, { "epoch": 1.2856400822421816, "grad_norm": 1.375, "learning_rate": 1.2154280047570995e-05, "loss": 0.4908, "step": 7522 }, { "epoch": 1.2858132236770912, "grad_norm": 1.3828125, "learning_rate": 1.2152498979996195e-05, "loss": 0.4312, "step": 7523 }, { "epoch": 1.285986365112001, "grad_norm": 1.3125, "learning_rate": 1.2150717840819546e-05, "loss": 0.4838, "step": 7524 }, { "epoch": 1.2861595065469105, "grad_norm": 1.4609375, "learning_rate": 1.2148936630100301e-05, "loss": 0.5234, "step": 7525 }, { "epoch": 1.2863326479818202, "grad_norm": 1.5078125, "learning_rate": 1.2147155347897698e-05, "loss": 0.4766, "step": 7526 }, { "epoch": 1.2865057894167298, "grad_norm": 1.359375, "learning_rate": 1.2145373994271007e-05, "loss": 0.435, "step": 7527 }, { "epoch": 1.2866789308516395, "grad_norm": 1.390625, "learning_rate": 1.2143592569279475e-05, "loss": 0.4806, "step": 7528 }, { "epoch": 1.286852072286549, "grad_norm": 1.3984375, "learning_rate": 1.214181107298236e-05, "loss": 0.5392, "step": 7529 }, { "epoch": 1.2870252137214586, "grad_norm": 1.3046875, "learning_rate": 1.2140029505438925e-05, "loss": 0.399, "step": 7530 }, { "epoch": 1.2871983551563684, "grad_norm": 1.390625, "learning_rate": 1.2138247866708432e-05, "loss": 0.5091, "step": 7531 }, { "epoch": 1.287371496591278, "grad_norm": 1.5, "learning_rate": 1.2136466156850146e-05, "loss": 0.5121, "step": 7532 }, { "epoch": 1.2875446380261877, "grad_norm": 1.40625, "learning_rate": 1.2134684375923334e-05, "loss": 0.556, "step": 7533 }, { "epoch": 1.2877177794610972, "grad_norm": 1.5, "learning_rate": 1.213290252398727e-05, "loss": 0.4381, "step": 7534 }, { "epoch": 1.287890920896007, "grad_norm": 1.46875, "learning_rate": 1.2131120601101223e-05, "loss": 0.4492, "step": 7535 }, { "epoch": 1.2880640623309165, "grad_norm": 1.359375, "learning_rate": 1.2129338607324468e-05, "loss": 0.4466, "step": 7536 }, { "epoch": 1.2882372037658263, "grad_norm": 1.3984375, "learning_rate": 1.2127556542716281e-05, "loss": 0.4972, "step": 7537 }, { "epoch": 1.2884103452007358, "grad_norm": 1.40625, "learning_rate": 1.2125774407335944e-05, "loss": 0.5146, "step": 7538 }, { "epoch": 1.2885834866356456, "grad_norm": 1.421875, "learning_rate": 1.2123992201242742e-05, "loss": 0.4633, "step": 7539 }, { "epoch": 1.2887566280705551, "grad_norm": 1.3515625, "learning_rate": 1.2122209924495952e-05, "loss": 0.474, "step": 7540 }, { "epoch": 1.2889297695054647, "grad_norm": 1.2890625, "learning_rate": 1.2120427577154866e-05, "loss": 0.4678, "step": 7541 }, { "epoch": 1.2891029109403744, "grad_norm": 1.4765625, "learning_rate": 1.2118645159278767e-05, "loss": 0.4548, "step": 7542 }, { "epoch": 1.2892760523752842, "grad_norm": 1.4453125, "learning_rate": 1.2116862670926954e-05, "loss": 0.4589, "step": 7543 }, { "epoch": 1.2894491938101937, "grad_norm": 1.3828125, "learning_rate": 1.2115080112158715e-05, "loss": 0.4354, "step": 7544 }, { "epoch": 1.2896223352451033, "grad_norm": 1.4453125, "learning_rate": 1.2113297483033347e-05, "loss": 0.4981, "step": 7545 }, { "epoch": 1.289795476680013, "grad_norm": 1.4140625, "learning_rate": 1.2111514783610152e-05, "loss": 0.478, "step": 7546 }, { "epoch": 1.2899686181149226, "grad_norm": 1.421875, "learning_rate": 1.2109732013948423e-05, "loss": 0.4584, "step": 7547 }, { "epoch": 1.2901417595498323, "grad_norm": 1.3671875, "learning_rate": 1.2107949174107472e-05, "loss": 0.4434, "step": 7548 }, { "epoch": 1.2903149009847419, "grad_norm": 1.4296875, "learning_rate": 1.2106166264146598e-05, "loss": 0.4996, "step": 7549 }, { "epoch": 1.2904880424196516, "grad_norm": 1.375, "learning_rate": 1.2104383284125109e-05, "loss": 0.5111, "step": 7550 }, { "epoch": 1.2906611838545612, "grad_norm": 1.34375, "learning_rate": 1.2102600234102322e-05, "loss": 0.4401, "step": 7551 }, { "epoch": 1.2908343252894707, "grad_norm": 1.4375, "learning_rate": 1.210081711413754e-05, "loss": 0.4588, "step": 7552 }, { "epoch": 1.2910074667243805, "grad_norm": 1.3359375, "learning_rate": 1.209903392429008e-05, "loss": 0.459, "step": 7553 }, { "epoch": 1.2911806081592903, "grad_norm": 1.421875, "learning_rate": 1.2097250664619262e-05, "loss": 0.4971, "step": 7554 }, { "epoch": 1.2913537495941998, "grad_norm": 1.453125, "learning_rate": 1.2095467335184408e-05, "loss": 0.4685, "step": 7555 }, { "epoch": 1.2915268910291093, "grad_norm": 1.34375, "learning_rate": 1.2093683936044832e-05, "loss": 0.4483, "step": 7556 }, { "epoch": 1.291700032464019, "grad_norm": 1.3828125, "learning_rate": 1.2091900467259859e-05, "loss": 0.5114, "step": 7557 }, { "epoch": 1.2918731738989286, "grad_norm": 1.3046875, "learning_rate": 1.209011692888882e-05, "loss": 0.4616, "step": 7558 }, { "epoch": 1.2920463153338384, "grad_norm": 1.3984375, "learning_rate": 1.208833332099104e-05, "loss": 0.4363, "step": 7559 }, { "epoch": 1.292219456768748, "grad_norm": 1.4140625, "learning_rate": 1.2086549643625853e-05, "loss": 0.4991, "step": 7560 }, { "epoch": 1.2923925982036577, "grad_norm": 1.4453125, "learning_rate": 1.2084765896852588e-05, "loss": 0.5335, "step": 7561 }, { "epoch": 1.2925657396385672, "grad_norm": 1.390625, "learning_rate": 1.2082982080730583e-05, "loss": 0.4964, "step": 7562 }, { "epoch": 1.2927388810734768, "grad_norm": 1.3515625, "learning_rate": 1.2081198195319176e-05, "loss": 0.4513, "step": 7563 }, { "epoch": 1.2929120225083865, "grad_norm": 1.3828125, "learning_rate": 1.2079414240677705e-05, "loss": 0.4453, "step": 7564 }, { "epoch": 1.2930851639432963, "grad_norm": 1.3671875, "learning_rate": 1.2077630216865517e-05, "loss": 0.4833, "step": 7565 }, { "epoch": 1.2932583053782059, "grad_norm": 1.359375, "learning_rate": 1.2075846123941952e-05, "loss": 0.5272, "step": 7566 }, { "epoch": 1.2934314468131154, "grad_norm": 1.3984375, "learning_rate": 1.2074061961966362e-05, "loss": 0.482, "step": 7567 }, { "epoch": 1.2936045882480252, "grad_norm": 1.3515625, "learning_rate": 1.2072277730998088e-05, "loss": 0.5446, "step": 7568 }, { "epoch": 1.2937777296829347, "grad_norm": 1.3046875, "learning_rate": 1.2070493431096487e-05, "loss": 0.4616, "step": 7569 }, { "epoch": 1.2939508711178445, "grad_norm": 1.375, "learning_rate": 1.2068709062320911e-05, "loss": 0.5062, "step": 7570 }, { "epoch": 1.294124012552754, "grad_norm": 1.4375, "learning_rate": 1.2066924624730721e-05, "loss": 0.4796, "step": 7571 }, { "epoch": 1.2942971539876638, "grad_norm": 1.359375, "learning_rate": 1.2065140118385272e-05, "loss": 0.4503, "step": 7572 }, { "epoch": 1.2944702954225733, "grad_norm": 1.421875, "learning_rate": 1.2063355543343925e-05, "loss": 0.4343, "step": 7573 }, { "epoch": 1.2946434368574828, "grad_norm": 1.4921875, "learning_rate": 1.206157089966604e-05, "loss": 0.5252, "step": 7574 }, { "epoch": 1.2948165782923926, "grad_norm": 1.515625, "learning_rate": 1.2059786187410984e-05, "loss": 0.5037, "step": 7575 }, { "epoch": 1.2949897197273024, "grad_norm": 1.3828125, "learning_rate": 1.2058001406638129e-05, "loss": 0.492, "step": 7576 }, { "epoch": 1.295162861162212, "grad_norm": 1.3359375, "learning_rate": 1.2056216557406841e-05, "loss": 0.4401, "step": 7577 }, { "epoch": 1.2953360025971214, "grad_norm": 1.375, "learning_rate": 1.2054431639776493e-05, "loss": 0.4605, "step": 7578 }, { "epoch": 1.2955091440320312, "grad_norm": 1.4296875, "learning_rate": 1.205264665380646e-05, "loss": 0.4506, "step": 7579 }, { "epoch": 1.2956822854669408, "grad_norm": 1.3125, "learning_rate": 1.2050861599556114e-05, "loss": 0.432, "step": 7580 }, { "epoch": 1.2958554269018505, "grad_norm": 1.390625, "learning_rate": 1.2049076477084842e-05, "loss": 0.5144, "step": 7581 }, { "epoch": 1.29602856833676, "grad_norm": 1.4375, "learning_rate": 1.2047291286452019e-05, "loss": 0.4858, "step": 7582 }, { "epoch": 1.2962017097716698, "grad_norm": 1.5703125, "learning_rate": 1.204550602771703e-05, "loss": 0.5975, "step": 7583 }, { "epoch": 1.2963748512065794, "grad_norm": 1.3671875, "learning_rate": 1.2043720700939265e-05, "loss": 0.4847, "step": 7584 }, { "epoch": 1.296547992641489, "grad_norm": 1.5, "learning_rate": 1.2041935306178104e-05, "loss": 0.488, "step": 7585 }, { "epoch": 1.2967211340763987, "grad_norm": 1.390625, "learning_rate": 1.2040149843492945e-05, "loss": 0.5193, "step": 7586 }, { "epoch": 1.2968942755113084, "grad_norm": 1.578125, "learning_rate": 1.2038364312943176e-05, "loss": 0.4489, "step": 7587 }, { "epoch": 1.297067416946218, "grad_norm": 1.515625, "learning_rate": 1.2036578714588191e-05, "loss": 0.5222, "step": 7588 }, { "epoch": 1.2972405583811275, "grad_norm": 1.4609375, "learning_rate": 1.2034793048487394e-05, "loss": 0.4833, "step": 7589 }, { "epoch": 1.2974136998160373, "grad_norm": 1.421875, "learning_rate": 1.2033007314700174e-05, "loss": 0.4594, "step": 7590 }, { "epoch": 1.2975868412509468, "grad_norm": 1.375, "learning_rate": 1.2031221513285942e-05, "loss": 0.4731, "step": 7591 }, { "epoch": 1.2977599826858566, "grad_norm": 1.4609375, "learning_rate": 1.2029435644304095e-05, "loss": 0.5199, "step": 7592 }, { "epoch": 1.2979331241207661, "grad_norm": 1.3203125, "learning_rate": 1.2027649707814047e-05, "loss": 0.4869, "step": 7593 }, { "epoch": 1.2981062655556759, "grad_norm": 1.3828125, "learning_rate": 1.2025863703875197e-05, "loss": 0.4751, "step": 7594 }, { "epoch": 1.2982794069905854, "grad_norm": 1.5078125, "learning_rate": 1.202407763254696e-05, "loss": 0.5321, "step": 7595 }, { "epoch": 1.298452548425495, "grad_norm": 1.3203125, "learning_rate": 1.2022291493888749e-05, "loss": 0.4585, "step": 7596 }, { "epoch": 1.2986256898604047, "grad_norm": 1.296875, "learning_rate": 1.202050528795998e-05, "loss": 0.4685, "step": 7597 }, { "epoch": 1.2987988312953145, "grad_norm": 1.34375, "learning_rate": 1.201871901482007e-05, "loss": 0.4707, "step": 7598 }, { "epoch": 1.298971972730224, "grad_norm": 1.390625, "learning_rate": 1.2016932674528436e-05, "loss": 0.4457, "step": 7599 }, { "epoch": 1.2991451141651336, "grad_norm": 1.453125, "learning_rate": 1.20151462671445e-05, "loss": 0.4878, "step": 7600 }, { "epoch": 1.2993182556000433, "grad_norm": 1.4609375, "learning_rate": 1.2013359792727688e-05, "loss": 0.4431, "step": 7601 }, { "epoch": 1.2994913970349529, "grad_norm": 1.40625, "learning_rate": 1.2011573251337427e-05, "loss": 0.5068, "step": 7602 }, { "epoch": 1.2996645384698626, "grad_norm": 1.34375, "learning_rate": 1.2009786643033144e-05, "loss": 0.4682, "step": 7603 }, { "epoch": 1.2998376799047722, "grad_norm": 1.484375, "learning_rate": 1.2007999967874269e-05, "loss": 0.4608, "step": 7604 }, { "epoch": 1.300010821339682, "grad_norm": 1.3828125, "learning_rate": 1.2006213225920236e-05, "loss": 0.4917, "step": 7605 }, { "epoch": 1.3001839627745915, "grad_norm": 1.4296875, "learning_rate": 1.2004426417230479e-05, "loss": 0.4871, "step": 7606 }, { "epoch": 1.300357104209501, "grad_norm": 1.328125, "learning_rate": 1.2002639541864436e-05, "loss": 0.4997, "step": 7607 }, { "epoch": 1.3005302456444108, "grad_norm": 1.359375, "learning_rate": 1.2000852599881545e-05, "loss": 0.4871, "step": 7608 }, { "epoch": 1.3007033870793205, "grad_norm": 1.34375, "learning_rate": 1.199906559134125e-05, "loss": 0.4555, "step": 7609 }, { "epoch": 1.30087652851423, "grad_norm": 1.4921875, "learning_rate": 1.1997278516302996e-05, "loss": 0.449, "step": 7610 }, { "epoch": 1.3010496699491396, "grad_norm": 1.4375, "learning_rate": 1.1995491374826227e-05, "loss": 0.4789, "step": 7611 }, { "epoch": 1.3012228113840494, "grad_norm": 1.421875, "learning_rate": 1.1993704166970393e-05, "loss": 0.4525, "step": 7612 }, { "epoch": 1.301395952818959, "grad_norm": 1.390625, "learning_rate": 1.1991916892794939e-05, "loss": 0.463, "step": 7613 }, { "epoch": 1.3015690942538687, "grad_norm": 1.40625, "learning_rate": 1.1990129552359326e-05, "loss": 0.4554, "step": 7614 }, { "epoch": 1.3017422356887782, "grad_norm": 1.546875, "learning_rate": 1.1988342145723006e-05, "loss": 0.4796, "step": 7615 }, { "epoch": 1.301915377123688, "grad_norm": 1.46875, "learning_rate": 1.1986554672945434e-05, "loss": 0.4222, "step": 7616 }, { "epoch": 1.3020885185585975, "grad_norm": 1.4140625, "learning_rate": 1.1984767134086073e-05, "loss": 0.4183, "step": 7617 }, { "epoch": 1.302261659993507, "grad_norm": 1.3515625, "learning_rate": 1.1982979529204377e-05, "loss": 0.4919, "step": 7618 }, { "epoch": 1.3024348014284168, "grad_norm": 1.421875, "learning_rate": 1.1981191858359823e-05, "loss": 0.4981, "step": 7619 }, { "epoch": 1.3026079428633266, "grad_norm": 1.3671875, "learning_rate": 1.1979404121611864e-05, "loss": 0.4791, "step": 7620 }, { "epoch": 1.3027810842982361, "grad_norm": 1.2734375, "learning_rate": 1.1977616319019976e-05, "loss": 0.4394, "step": 7621 }, { "epoch": 1.3029542257331457, "grad_norm": 1.5625, "learning_rate": 1.1975828450643627e-05, "loss": 0.4819, "step": 7622 }, { "epoch": 1.3031273671680554, "grad_norm": 1.421875, "learning_rate": 1.1974040516542287e-05, "loss": 0.4567, "step": 7623 }, { "epoch": 1.303300508602965, "grad_norm": 1.375, "learning_rate": 1.1972252516775437e-05, "loss": 0.4687, "step": 7624 }, { "epoch": 1.3034736500378747, "grad_norm": 1.421875, "learning_rate": 1.1970464451402548e-05, "loss": 0.4462, "step": 7625 }, { "epoch": 1.3036467914727843, "grad_norm": 1.3203125, "learning_rate": 1.1968676320483103e-05, "loss": 0.4848, "step": 7626 }, { "epoch": 1.303819932907694, "grad_norm": 1.3125, "learning_rate": 1.1966888124076584e-05, "loss": 0.4371, "step": 7627 }, { "epoch": 1.3039930743426036, "grad_norm": 1.3125, "learning_rate": 1.1965099862242466e-05, "loss": 0.4179, "step": 7628 }, { "epoch": 1.3041662157775131, "grad_norm": 1.28125, "learning_rate": 1.1963311535040248e-05, "loss": 0.4354, "step": 7629 }, { "epoch": 1.304339357212423, "grad_norm": 1.5078125, "learning_rate": 1.1961523142529406e-05, "loss": 0.5517, "step": 7630 }, { "epoch": 1.3045124986473327, "grad_norm": 1.2890625, "learning_rate": 1.1959734684769437e-05, "loss": 0.4719, "step": 7631 }, { "epoch": 1.3046856400822422, "grad_norm": 1.375, "learning_rate": 1.1957946161819831e-05, "loss": 0.4766, "step": 7632 }, { "epoch": 1.3048587815171517, "grad_norm": 1.390625, "learning_rate": 1.195615757374008e-05, "loss": 0.4636, "step": 7633 }, { "epoch": 1.3050319229520615, "grad_norm": 1.4453125, "learning_rate": 1.1954368920589686e-05, "loss": 0.506, "step": 7634 }, { "epoch": 1.305205064386971, "grad_norm": 1.3828125, "learning_rate": 1.1952580202428143e-05, "loss": 0.5079, "step": 7635 }, { "epoch": 1.3053782058218808, "grad_norm": 1.4453125, "learning_rate": 1.1950791419314956e-05, "loss": 0.5105, "step": 7636 }, { "epoch": 1.3055513472567903, "grad_norm": 1.3046875, "learning_rate": 1.1949002571309624e-05, "loss": 0.4725, "step": 7637 }, { "epoch": 1.3057244886917, "grad_norm": 1.2890625, "learning_rate": 1.1947213658471653e-05, "loss": 0.4085, "step": 7638 }, { "epoch": 1.3058976301266096, "grad_norm": 1.53125, "learning_rate": 1.1945424680860551e-05, "loss": 0.457, "step": 7639 }, { "epoch": 1.3060707715615192, "grad_norm": 1.2890625, "learning_rate": 1.1943635638535827e-05, "loss": 0.431, "step": 7640 }, { "epoch": 1.306243912996429, "grad_norm": 1.375, "learning_rate": 1.1941846531556995e-05, "loss": 0.4645, "step": 7641 }, { "epoch": 1.3064170544313387, "grad_norm": 1.4375, "learning_rate": 1.1940057359983566e-05, "loss": 0.4575, "step": 7642 }, { "epoch": 1.3065901958662482, "grad_norm": 1.7265625, "learning_rate": 1.1938268123875058e-05, "loss": 0.5468, "step": 7643 }, { "epoch": 1.3067633373011578, "grad_norm": 1.421875, "learning_rate": 1.1936478823290986e-05, "loss": 0.509, "step": 7644 }, { "epoch": 1.3069364787360676, "grad_norm": 1.53125, "learning_rate": 1.1934689458290874e-05, "loss": 0.51, "step": 7645 }, { "epoch": 1.307109620170977, "grad_norm": 1.5078125, "learning_rate": 1.1932900028934243e-05, "loss": 0.5223, "step": 7646 }, { "epoch": 1.3072827616058869, "grad_norm": 1.46875, "learning_rate": 1.1931110535280615e-05, "loss": 0.5414, "step": 7647 }, { "epoch": 1.3074559030407964, "grad_norm": 1.25, "learning_rate": 1.192932097738952e-05, "loss": 0.4634, "step": 7648 }, { "epoch": 1.3076290444757062, "grad_norm": 1.484375, "learning_rate": 1.1927531355320486e-05, "loss": 0.495, "step": 7649 }, { "epoch": 1.3078021859106157, "grad_norm": 1.3125, "learning_rate": 1.1925741669133042e-05, "loss": 0.4417, "step": 7650 }, { "epoch": 1.3079753273455252, "grad_norm": 1.2734375, "learning_rate": 1.192395191888672e-05, "loss": 0.4591, "step": 7651 }, { "epoch": 1.308148468780435, "grad_norm": 1.40625, "learning_rate": 1.1922162104641061e-05, "loss": 0.4811, "step": 7652 }, { "epoch": 1.3083216102153448, "grad_norm": 1.3203125, "learning_rate": 1.19203722264556e-05, "loss": 0.4731, "step": 7653 }, { "epoch": 1.3084947516502543, "grad_norm": 1.3671875, "learning_rate": 1.1918582284389874e-05, "loss": 0.4282, "step": 7654 }, { "epoch": 1.3086678930851638, "grad_norm": 1.5, "learning_rate": 1.1916792278503426e-05, "loss": 0.5377, "step": 7655 }, { "epoch": 1.3088410345200736, "grad_norm": 1.46875, "learning_rate": 1.19150022088558e-05, "loss": 0.499, "step": 7656 }, { "epoch": 1.3090141759549832, "grad_norm": 1.4375, "learning_rate": 1.1913212075506542e-05, "loss": 0.4662, "step": 7657 }, { "epoch": 1.309187317389893, "grad_norm": 1.5390625, "learning_rate": 1.1911421878515198e-05, "loss": 0.4836, "step": 7658 }, { "epoch": 1.3093604588248025, "grad_norm": 1.5859375, "learning_rate": 1.190963161794132e-05, "loss": 0.5186, "step": 7659 }, { "epoch": 1.3095336002597122, "grad_norm": 1.421875, "learning_rate": 1.1907841293844463e-05, "loss": 0.4973, "step": 7660 }, { "epoch": 1.3097067416946218, "grad_norm": 1.40625, "learning_rate": 1.190605090628417e-05, "loss": 0.4221, "step": 7661 }, { "epoch": 1.3098798831295313, "grad_norm": 1.2578125, "learning_rate": 1.190426045532001e-05, "loss": 0.4151, "step": 7662 }, { "epoch": 1.310053024564441, "grad_norm": 1.359375, "learning_rate": 1.190246994101154e-05, "loss": 0.488, "step": 7663 }, { "epoch": 1.3102261659993508, "grad_norm": 1.3828125, "learning_rate": 1.1900679363418312e-05, "loss": 0.4576, "step": 7664 }, { "epoch": 1.3103993074342604, "grad_norm": 1.3203125, "learning_rate": 1.1898888722599897e-05, "loss": 0.4888, "step": 7665 }, { "epoch": 1.31057244886917, "grad_norm": 1.453125, "learning_rate": 1.1897098018615854e-05, "loss": 0.5, "step": 7666 }, { "epoch": 1.3107455903040797, "grad_norm": 1.4765625, "learning_rate": 1.1895307251525758e-05, "loss": 0.5274, "step": 7667 }, { "epoch": 1.3109187317389892, "grad_norm": 1.296875, "learning_rate": 1.1893516421389168e-05, "loss": 0.3773, "step": 7668 }, { "epoch": 1.311091873173899, "grad_norm": 1.421875, "learning_rate": 1.1891725528265665e-05, "loss": 0.4563, "step": 7669 }, { "epoch": 1.3112650146088085, "grad_norm": 1.5390625, "learning_rate": 1.1889934572214813e-05, "loss": 0.5031, "step": 7670 }, { "epoch": 1.3114381560437183, "grad_norm": 1.3671875, "learning_rate": 1.1888143553296192e-05, "loss": 0.4444, "step": 7671 }, { "epoch": 1.3116112974786278, "grad_norm": 1.3125, "learning_rate": 1.1886352471569378e-05, "loss": 0.5108, "step": 7672 }, { "epoch": 1.3117844389135376, "grad_norm": 1.46875, "learning_rate": 1.1884561327093953e-05, "loss": 0.4701, "step": 7673 }, { "epoch": 1.3119575803484471, "grad_norm": 1.3203125, "learning_rate": 1.1882770119929496e-05, "loss": 0.4661, "step": 7674 }, { "epoch": 1.3121307217833569, "grad_norm": 1.46875, "learning_rate": 1.1880978850135593e-05, "loss": 0.5354, "step": 7675 }, { "epoch": 1.3123038632182664, "grad_norm": 1.4140625, "learning_rate": 1.1879187517771826e-05, "loss": 0.4216, "step": 7676 }, { "epoch": 1.312477004653176, "grad_norm": 1.3828125, "learning_rate": 1.1877396122897786e-05, "loss": 0.4331, "step": 7677 }, { "epoch": 1.3126501460880857, "grad_norm": 1.421875, "learning_rate": 1.1875604665573062e-05, "loss": 0.4747, "step": 7678 }, { "epoch": 1.3128232875229953, "grad_norm": 1.421875, "learning_rate": 1.187381314585725e-05, "loss": 0.4551, "step": 7679 }, { "epoch": 1.312996428957905, "grad_norm": 1.421875, "learning_rate": 1.1872021563809934e-05, "loss": 0.4622, "step": 7680 }, { "epoch": 1.3131695703928146, "grad_norm": 1.40625, "learning_rate": 1.1870229919490718e-05, "loss": 0.492, "step": 7681 }, { "epoch": 1.3133427118277243, "grad_norm": 1.375, "learning_rate": 1.1868438212959196e-05, "loss": 0.4928, "step": 7682 }, { "epoch": 1.3135158532626339, "grad_norm": 1.40625, "learning_rate": 1.1866646444274976e-05, "loss": 0.404, "step": 7683 }, { "epoch": 1.3136889946975436, "grad_norm": 1.3125, "learning_rate": 1.1864854613497652e-05, "loss": 0.4669, "step": 7684 }, { "epoch": 1.3138621361324532, "grad_norm": 1.4609375, "learning_rate": 1.186306272068683e-05, "loss": 0.4842, "step": 7685 }, { "epoch": 1.314035277567363, "grad_norm": 1.4140625, "learning_rate": 1.1861270765902121e-05, "loss": 0.4344, "step": 7686 }, { "epoch": 1.3142084190022725, "grad_norm": 1.3046875, "learning_rate": 1.1859478749203127e-05, "loss": 0.466, "step": 7687 }, { "epoch": 1.314381560437182, "grad_norm": 1.4140625, "learning_rate": 1.1857686670649464e-05, "loss": 0.4318, "step": 7688 }, { "epoch": 1.3145547018720918, "grad_norm": 1.3671875, "learning_rate": 1.185589453030074e-05, "loss": 0.4886, "step": 7689 }, { "epoch": 1.3147278433070015, "grad_norm": 1.4296875, "learning_rate": 1.1854102328216574e-05, "loss": 0.4403, "step": 7690 }, { "epoch": 1.314900984741911, "grad_norm": 1.5703125, "learning_rate": 1.1852310064456582e-05, "loss": 0.5193, "step": 7691 }, { "epoch": 1.3150741261768206, "grad_norm": 1.3984375, "learning_rate": 1.1850517739080381e-05, "loss": 0.4506, "step": 7692 }, { "epoch": 1.3152472676117304, "grad_norm": 1.40625, "learning_rate": 1.184872535214759e-05, "loss": 0.4765, "step": 7693 }, { "epoch": 1.31542040904664, "grad_norm": 1.5078125, "learning_rate": 1.1846932903717836e-05, "loss": 0.5183, "step": 7694 }, { "epoch": 1.3155935504815497, "grad_norm": 1.3046875, "learning_rate": 1.1845140393850744e-05, "loss": 0.4443, "step": 7695 }, { "epoch": 1.3157666919164592, "grad_norm": 1.359375, "learning_rate": 1.1843347822605938e-05, "loss": 0.4597, "step": 7696 }, { "epoch": 1.315939833351369, "grad_norm": 1.3359375, "learning_rate": 1.1841555190043048e-05, "loss": 0.4857, "step": 7697 }, { "epoch": 1.3161129747862785, "grad_norm": 1.5390625, "learning_rate": 1.1839762496221707e-05, "loss": 0.4603, "step": 7698 }, { "epoch": 1.316286116221188, "grad_norm": 1.2890625, "learning_rate": 1.1837969741201543e-05, "loss": 0.3659, "step": 7699 }, { "epoch": 1.3164592576560978, "grad_norm": 1.34375, "learning_rate": 1.1836176925042198e-05, "loss": 0.468, "step": 7700 }, { "epoch": 1.3166323990910076, "grad_norm": 1.359375, "learning_rate": 1.1834384047803305e-05, "loss": 0.4442, "step": 7701 }, { "epoch": 1.3168055405259171, "grad_norm": 1.34375, "learning_rate": 1.1832591109544502e-05, "loss": 0.4666, "step": 7702 }, { "epoch": 1.3169786819608267, "grad_norm": 1.3828125, "learning_rate": 1.1830798110325436e-05, "loss": 0.4907, "step": 7703 }, { "epoch": 1.3171518233957364, "grad_norm": 1.5859375, "learning_rate": 1.1829005050205744e-05, "loss": 0.528, "step": 7704 }, { "epoch": 1.317324964830646, "grad_norm": 1.421875, "learning_rate": 1.1827211929245075e-05, "loss": 0.4652, "step": 7705 }, { "epoch": 1.3174981062655557, "grad_norm": 1.375, "learning_rate": 1.1825418747503074e-05, "loss": 0.4937, "step": 7706 }, { "epoch": 1.3176712477004653, "grad_norm": 1.4140625, "learning_rate": 1.1823625505039395e-05, "loss": 0.4432, "step": 7707 }, { "epoch": 1.317844389135375, "grad_norm": 1.375, "learning_rate": 1.1821832201913683e-05, "loss": 0.4699, "step": 7708 }, { "epoch": 1.3180175305702846, "grad_norm": 1.390625, "learning_rate": 1.1820038838185594e-05, "loss": 0.4857, "step": 7709 }, { "epoch": 1.3181906720051941, "grad_norm": 1.4921875, "learning_rate": 1.1818245413914784e-05, "loss": 0.4069, "step": 7710 }, { "epoch": 1.318363813440104, "grad_norm": 1.3203125, "learning_rate": 1.1816451929160908e-05, "loss": 0.4371, "step": 7711 }, { "epoch": 1.3185369548750137, "grad_norm": 1.4921875, "learning_rate": 1.1814658383983632e-05, "loss": 0.4968, "step": 7712 }, { "epoch": 1.3187100963099232, "grad_norm": 1.4765625, "learning_rate": 1.181286477844261e-05, "loss": 0.5185, "step": 7713 }, { "epoch": 1.3188832377448327, "grad_norm": 1.375, "learning_rate": 1.1811071112597508e-05, "loss": 0.5111, "step": 7714 }, { "epoch": 1.3190563791797425, "grad_norm": 1.2734375, "learning_rate": 1.180927738650799e-05, "loss": 0.4321, "step": 7715 }, { "epoch": 1.319229520614652, "grad_norm": 1.4140625, "learning_rate": 1.1807483600233728e-05, "loss": 0.5152, "step": 7716 }, { "epoch": 1.3194026620495618, "grad_norm": 1.46875, "learning_rate": 1.1805689753834391e-05, "loss": 0.4545, "step": 7717 }, { "epoch": 1.3195758034844713, "grad_norm": 1.4296875, "learning_rate": 1.1803895847369645e-05, "loss": 0.4871, "step": 7718 }, { "epoch": 1.319748944919381, "grad_norm": 1.609375, "learning_rate": 1.1802101880899165e-05, "loss": 0.5285, "step": 7719 }, { "epoch": 1.3199220863542906, "grad_norm": 1.3828125, "learning_rate": 1.180030785448263e-05, "loss": 0.4429, "step": 7720 }, { "epoch": 1.3200952277892002, "grad_norm": 1.3359375, "learning_rate": 1.1798513768179716e-05, "loss": 0.4614, "step": 7721 }, { "epoch": 1.32026836922411, "grad_norm": 1.40625, "learning_rate": 1.1796719622050099e-05, "loss": 0.4204, "step": 7722 }, { "epoch": 1.3204415106590197, "grad_norm": 1.3671875, "learning_rate": 1.1794925416153465e-05, "loss": 0.4728, "step": 7723 }, { "epoch": 1.3206146520939293, "grad_norm": 1.4296875, "learning_rate": 1.1793131150549497e-05, "loss": 0.408, "step": 7724 }, { "epoch": 1.3207877935288388, "grad_norm": 1.3828125, "learning_rate": 1.1791336825297877e-05, "loss": 0.4796, "step": 7725 }, { "epoch": 1.3209609349637486, "grad_norm": 1.5390625, "learning_rate": 1.1789542440458292e-05, "loss": 0.5091, "step": 7726 }, { "epoch": 1.321134076398658, "grad_norm": 1.4140625, "learning_rate": 1.1787747996090437e-05, "loss": 0.6058, "step": 7727 }, { "epoch": 1.3213072178335679, "grad_norm": 1.375, "learning_rate": 1.1785953492253999e-05, "loss": 0.441, "step": 7728 }, { "epoch": 1.3214803592684774, "grad_norm": 1.5, "learning_rate": 1.1784158929008674e-05, "loss": 0.4857, "step": 7729 }, { "epoch": 1.3216535007033872, "grad_norm": 1.421875, "learning_rate": 1.1782364306414154e-05, "loss": 0.4446, "step": 7730 }, { "epoch": 1.3218266421382967, "grad_norm": 1.375, "learning_rate": 1.1780569624530135e-05, "loss": 0.4389, "step": 7731 }, { "epoch": 1.3219997835732062, "grad_norm": 1.390625, "learning_rate": 1.1778774883416325e-05, "loss": 0.4618, "step": 7732 }, { "epoch": 1.322172925008116, "grad_norm": 1.296875, "learning_rate": 1.1776980083132417e-05, "loss": 0.4398, "step": 7733 }, { "epoch": 1.3223460664430258, "grad_norm": 1.3828125, "learning_rate": 1.1775185223738114e-05, "loss": 0.4673, "step": 7734 }, { "epoch": 1.3225192078779353, "grad_norm": 1.4375, "learning_rate": 1.1773390305293126e-05, "loss": 0.4913, "step": 7735 }, { "epoch": 1.3226923493128449, "grad_norm": 1.3046875, "learning_rate": 1.1771595327857159e-05, "loss": 0.4702, "step": 7736 }, { "epoch": 1.3228654907477546, "grad_norm": 1.453125, "learning_rate": 1.1769800291489916e-05, "loss": 0.4727, "step": 7737 }, { "epoch": 1.3230386321826642, "grad_norm": 1.640625, "learning_rate": 1.176800519625112e-05, "loss": 0.5784, "step": 7738 }, { "epoch": 1.323211773617574, "grad_norm": 1.4375, "learning_rate": 1.176621004220047e-05, "loss": 0.4576, "step": 7739 }, { "epoch": 1.3233849150524835, "grad_norm": 1.3671875, "learning_rate": 1.1764414829397693e-05, "loss": 0.4263, "step": 7740 }, { "epoch": 1.3235580564873932, "grad_norm": 1.40625, "learning_rate": 1.1762619557902498e-05, "loss": 0.4991, "step": 7741 }, { "epoch": 1.3237311979223028, "grad_norm": 1.40625, "learning_rate": 1.1760824227774603e-05, "loss": 0.4961, "step": 7742 }, { "epoch": 1.3239043393572123, "grad_norm": 1.53125, "learning_rate": 1.1759028839073738e-05, "loss": 0.4967, "step": 7743 }, { "epoch": 1.324077480792122, "grad_norm": 1.375, "learning_rate": 1.1757233391859617e-05, "loss": 0.5014, "step": 7744 }, { "epoch": 1.3242506222270318, "grad_norm": 1.3125, "learning_rate": 1.1755437886191972e-05, "loss": 0.4663, "step": 7745 }, { "epoch": 1.3244237636619414, "grad_norm": 1.3828125, "learning_rate": 1.1753642322130521e-05, "loss": 0.5552, "step": 7746 }, { "epoch": 1.324596905096851, "grad_norm": 1.453125, "learning_rate": 1.1751846699734996e-05, "loss": 0.4952, "step": 7747 }, { "epoch": 1.3247700465317607, "grad_norm": 1.28125, "learning_rate": 1.1750051019065132e-05, "loss": 0.4896, "step": 7748 }, { "epoch": 1.3249431879666702, "grad_norm": 1.375, "learning_rate": 1.1748255280180656e-05, "loss": 0.4903, "step": 7749 }, { "epoch": 1.32511632940158, "grad_norm": 1.4453125, "learning_rate": 1.1746459483141306e-05, "loss": 0.5038, "step": 7750 }, { "epoch": 1.3252894708364895, "grad_norm": 1.34375, "learning_rate": 1.1744663628006815e-05, "loss": 0.4633, "step": 7751 }, { "epoch": 1.3254626122713993, "grad_norm": 1.4296875, "learning_rate": 1.1742867714836922e-05, "loss": 0.5728, "step": 7752 }, { "epoch": 1.3256357537063088, "grad_norm": 1.4296875, "learning_rate": 1.1741071743691371e-05, "loss": 0.4375, "step": 7753 }, { "epoch": 1.3258088951412184, "grad_norm": 1.4921875, "learning_rate": 1.1739275714629898e-05, "loss": 0.4797, "step": 7754 }, { "epoch": 1.3259820365761281, "grad_norm": 1.3515625, "learning_rate": 1.1737479627712255e-05, "loss": 0.5235, "step": 7755 }, { "epoch": 1.3261551780110379, "grad_norm": 1.34375, "learning_rate": 1.1735683482998179e-05, "loss": 0.4991, "step": 7756 }, { "epoch": 1.3263283194459474, "grad_norm": 1.515625, "learning_rate": 1.1733887280547425e-05, "loss": 0.4672, "step": 7757 }, { "epoch": 1.326501460880857, "grad_norm": 1.4453125, "learning_rate": 1.1732091020419736e-05, "loss": 0.427, "step": 7758 }, { "epoch": 1.3266746023157667, "grad_norm": 1.5625, "learning_rate": 1.173029470267487e-05, "loss": 0.464, "step": 7759 }, { "epoch": 1.3268477437506763, "grad_norm": 1.453125, "learning_rate": 1.172849832737258e-05, "loss": 0.4702, "step": 7760 }, { "epoch": 1.327020885185586, "grad_norm": 1.5390625, "learning_rate": 1.1726701894572619e-05, "loss": 0.4493, "step": 7761 }, { "epoch": 1.3271940266204956, "grad_norm": 1.390625, "learning_rate": 1.1724905404334746e-05, "loss": 0.4817, "step": 7762 }, { "epoch": 1.3273671680554053, "grad_norm": 1.421875, "learning_rate": 1.1723108856718716e-05, "loss": 0.541, "step": 7763 }, { "epoch": 1.3275403094903149, "grad_norm": 1.3671875, "learning_rate": 1.1721312251784298e-05, "loss": 0.4557, "step": 7764 }, { "epoch": 1.3277134509252244, "grad_norm": 1.4375, "learning_rate": 1.1719515589591248e-05, "loss": 0.4476, "step": 7765 }, { "epoch": 1.3278865923601342, "grad_norm": 1.4296875, "learning_rate": 1.1717718870199338e-05, "loss": 0.5086, "step": 7766 }, { "epoch": 1.328059733795044, "grad_norm": 1.3359375, "learning_rate": 1.171592209366833e-05, "loss": 0.4439, "step": 7767 }, { "epoch": 1.3282328752299535, "grad_norm": 1.4609375, "learning_rate": 1.1714125260057994e-05, "loss": 0.4914, "step": 7768 }, { "epoch": 1.328406016664863, "grad_norm": 1.3515625, "learning_rate": 1.1712328369428098e-05, "loss": 0.4367, "step": 7769 }, { "epoch": 1.3285791580997728, "grad_norm": 1.359375, "learning_rate": 1.1710531421838422e-05, "loss": 0.5001, "step": 7770 }, { "epoch": 1.3287522995346823, "grad_norm": 1.5, "learning_rate": 1.1708734417348737e-05, "loss": 0.5643, "step": 7771 }, { "epoch": 1.328925440969592, "grad_norm": 1.4375, "learning_rate": 1.1706937356018818e-05, "loss": 0.5125, "step": 7772 }, { "epoch": 1.3290985824045016, "grad_norm": 1.4921875, "learning_rate": 1.1705140237908441e-05, "loss": 0.5233, "step": 7773 }, { "epoch": 1.3292717238394114, "grad_norm": 1.3359375, "learning_rate": 1.1703343063077393e-05, "loss": 0.4643, "step": 7774 }, { "epoch": 1.329444865274321, "grad_norm": 1.4375, "learning_rate": 1.1701545831585451e-05, "loss": 0.4329, "step": 7775 }, { "epoch": 1.3296180067092305, "grad_norm": 1.40625, "learning_rate": 1.1699748543492403e-05, "loss": 0.5558, "step": 7776 }, { "epoch": 1.3297911481441402, "grad_norm": 1.515625, "learning_rate": 1.1697951198858032e-05, "loss": 0.4958, "step": 7777 }, { "epoch": 1.32996428957905, "grad_norm": 1.4140625, "learning_rate": 1.1696153797742124e-05, "loss": 0.6363, "step": 7778 }, { "epoch": 1.3301374310139595, "grad_norm": 1.421875, "learning_rate": 1.1694356340204475e-05, "loss": 0.4532, "step": 7779 }, { "epoch": 1.330310572448869, "grad_norm": 1.4296875, "learning_rate": 1.1692558826304867e-05, "loss": 0.4728, "step": 7780 }, { "epoch": 1.3304837138837788, "grad_norm": 1.46875, "learning_rate": 1.1690761256103107e-05, "loss": 0.5084, "step": 7781 }, { "epoch": 1.3306568553186884, "grad_norm": 1.78125, "learning_rate": 1.1688963629658978e-05, "loss": 0.5082, "step": 7782 }, { "epoch": 1.3308299967535981, "grad_norm": 1.3125, "learning_rate": 1.1687165947032285e-05, "loss": 0.4518, "step": 7783 }, { "epoch": 1.3310031381885077, "grad_norm": 1.3828125, "learning_rate": 1.1685368208282819e-05, "loss": 0.4773, "step": 7784 }, { "epoch": 1.3311762796234174, "grad_norm": 1.5234375, "learning_rate": 1.1683570413470384e-05, "loss": 0.524, "step": 7785 }, { "epoch": 1.331349421058327, "grad_norm": 1.3984375, "learning_rate": 1.1681772562654792e-05, "loss": 0.4778, "step": 7786 }, { "epoch": 1.3315225624932365, "grad_norm": 1.4296875, "learning_rate": 1.1679974655895836e-05, "loss": 0.5269, "step": 7787 }, { "epoch": 1.3316957039281463, "grad_norm": 1.40625, "learning_rate": 1.1678176693253327e-05, "loss": 0.4748, "step": 7788 }, { "epoch": 1.331868845363056, "grad_norm": 1.40625, "learning_rate": 1.1676378674787072e-05, "loss": 0.4627, "step": 7789 }, { "epoch": 1.3320419867979656, "grad_norm": 1.3359375, "learning_rate": 1.1674580600556883e-05, "loss": 0.4517, "step": 7790 }, { "epoch": 1.3322151282328751, "grad_norm": 1.40625, "learning_rate": 1.1672782470622573e-05, "loss": 0.4619, "step": 7791 }, { "epoch": 1.332388269667785, "grad_norm": 1.484375, "learning_rate": 1.1670984285043951e-05, "loss": 0.4877, "step": 7792 }, { "epoch": 1.3325614111026944, "grad_norm": 1.453125, "learning_rate": 1.166918604388084e-05, "loss": 0.4452, "step": 7793 }, { "epoch": 1.3327345525376042, "grad_norm": 1.5234375, "learning_rate": 1.166738774719305e-05, "loss": 0.5045, "step": 7794 }, { "epoch": 1.3329076939725137, "grad_norm": 1.3828125, "learning_rate": 1.1665589395040409e-05, "loss": 0.4329, "step": 7795 }, { "epoch": 1.3330808354074235, "grad_norm": 1.8515625, "learning_rate": 1.1663790987482729e-05, "loss": 0.5153, "step": 7796 }, { "epoch": 1.333253976842333, "grad_norm": 1.546875, "learning_rate": 1.1661992524579836e-05, "loss": 0.4498, "step": 7797 }, { "epoch": 1.3334271182772426, "grad_norm": 1.4296875, "learning_rate": 1.166019400639156e-05, "loss": 0.4905, "step": 7798 }, { "epoch": 1.3336002597121523, "grad_norm": 1.40625, "learning_rate": 1.1658395432977725e-05, "loss": 0.4932, "step": 7799 }, { "epoch": 1.333773401147062, "grad_norm": 1.40625, "learning_rate": 1.1656596804398159e-05, "loss": 0.5808, "step": 7800 }, { "epoch": 1.3339465425819717, "grad_norm": 1.328125, "learning_rate": 1.165479812071269e-05, "loss": 0.4548, "step": 7801 }, { "epoch": 1.3341196840168812, "grad_norm": 1.4375, "learning_rate": 1.1652999381981157e-05, "loss": 0.4745, "step": 7802 }, { "epoch": 1.334292825451791, "grad_norm": 1.40625, "learning_rate": 1.1651200588263385e-05, "loss": 0.4537, "step": 7803 }, { "epoch": 1.3344659668867005, "grad_norm": 1.375, "learning_rate": 1.1649401739619219e-05, "loss": 0.4844, "step": 7804 }, { "epoch": 1.3346391083216103, "grad_norm": 1.4296875, "learning_rate": 1.1647602836108493e-05, "loss": 0.4697, "step": 7805 }, { "epoch": 1.3348122497565198, "grad_norm": 1.21875, "learning_rate": 1.1645803877791046e-05, "loss": 0.4183, "step": 7806 }, { "epoch": 1.3349853911914296, "grad_norm": 1.25, "learning_rate": 1.1644004864726718e-05, "loss": 0.4083, "step": 7807 }, { "epoch": 1.335158532626339, "grad_norm": 2.03125, "learning_rate": 1.1642205796975357e-05, "loss": 0.4218, "step": 7808 }, { "epoch": 1.3353316740612489, "grad_norm": 1.4609375, "learning_rate": 1.1640406674596807e-05, "loss": 0.5052, "step": 7809 }, { "epoch": 1.3355048154961584, "grad_norm": 1.5390625, "learning_rate": 1.1638607497650911e-05, "loss": 0.509, "step": 7810 }, { "epoch": 1.3356779569310682, "grad_norm": 1.5234375, "learning_rate": 1.163680826619752e-05, "loss": 0.5375, "step": 7811 }, { "epoch": 1.3358510983659777, "grad_norm": 1.609375, "learning_rate": 1.1635008980296486e-05, "loss": 0.49, "step": 7812 }, { "epoch": 1.3360242398008872, "grad_norm": 1.40625, "learning_rate": 1.1633209640007662e-05, "loss": 0.4982, "step": 7813 }, { "epoch": 1.336197381235797, "grad_norm": 1.4296875, "learning_rate": 1.1631410245390899e-05, "loss": 0.5117, "step": 7814 }, { "epoch": 1.3363705226707066, "grad_norm": 1.4375, "learning_rate": 1.1629610796506055e-05, "loss": 0.487, "step": 7815 }, { "epoch": 1.3365436641056163, "grad_norm": 1.5, "learning_rate": 1.1627811293412988e-05, "loss": 0.4956, "step": 7816 }, { "epoch": 1.3367168055405259, "grad_norm": 1.3046875, "learning_rate": 1.1626011736171556e-05, "loss": 0.3731, "step": 7817 }, { "epoch": 1.3368899469754356, "grad_norm": 1.4296875, "learning_rate": 1.1624212124841619e-05, "loss": 0.5722, "step": 7818 }, { "epoch": 1.3370630884103452, "grad_norm": 1.3125, "learning_rate": 1.1622412459483046e-05, "loss": 0.4557, "step": 7819 }, { "epoch": 1.337236229845255, "grad_norm": 1.453125, "learning_rate": 1.1620612740155697e-05, "loss": 0.5176, "step": 7820 }, { "epoch": 1.3374093712801645, "grad_norm": 1.484375, "learning_rate": 1.1618812966919442e-05, "loss": 0.4826, "step": 7821 }, { "epoch": 1.3375825127150742, "grad_norm": 1.4453125, "learning_rate": 1.1617013139834148e-05, "loss": 0.4548, "step": 7822 }, { "epoch": 1.3377556541499838, "grad_norm": 1.4609375, "learning_rate": 1.161521325895968e-05, "loss": 0.5035, "step": 7823 }, { "epoch": 1.3379287955848933, "grad_norm": 1.53125, "learning_rate": 1.1613413324355923e-05, "loss": 0.4775, "step": 7824 }, { "epoch": 1.338101937019803, "grad_norm": 1.3359375, "learning_rate": 1.1611613336082739e-05, "loss": 0.4799, "step": 7825 }, { "epoch": 1.3382750784547128, "grad_norm": 1.46875, "learning_rate": 1.160981329420001e-05, "loss": 0.4937, "step": 7826 }, { "epoch": 1.3384482198896224, "grad_norm": 1.515625, "learning_rate": 1.160801319876761e-05, "loss": 0.4829, "step": 7827 }, { "epoch": 1.338621361324532, "grad_norm": 1.3203125, "learning_rate": 1.160621304984542e-05, "loss": 0.4048, "step": 7828 }, { "epoch": 1.3387945027594417, "grad_norm": 1.3359375, "learning_rate": 1.1604412847493323e-05, "loss": 0.4484, "step": 7829 }, { "epoch": 1.3389676441943512, "grad_norm": 1.3515625, "learning_rate": 1.1602612591771199e-05, "loss": 0.4893, "step": 7830 }, { "epoch": 1.339140785629261, "grad_norm": 1.2578125, "learning_rate": 1.1600812282738936e-05, "loss": 0.4572, "step": 7831 }, { "epoch": 1.3393139270641705, "grad_norm": 1.359375, "learning_rate": 1.1599011920456416e-05, "loss": 0.449, "step": 7832 }, { "epoch": 1.3394870684990803, "grad_norm": 1.4921875, "learning_rate": 1.159721150498353e-05, "loss": 0.5193, "step": 7833 }, { "epoch": 1.3396602099339898, "grad_norm": 1.5859375, "learning_rate": 1.1595411036380164e-05, "loss": 0.497, "step": 7834 }, { "epoch": 1.3398333513688994, "grad_norm": 1.4453125, "learning_rate": 1.1593610514706217e-05, "loss": 0.4879, "step": 7835 }, { "epoch": 1.3400064928038091, "grad_norm": 1.3984375, "learning_rate": 1.1591809940021577e-05, "loss": 0.4769, "step": 7836 }, { "epoch": 1.3401796342387189, "grad_norm": 1.4765625, "learning_rate": 1.1590009312386141e-05, "loss": 0.5199, "step": 7837 }, { "epoch": 1.3403527756736284, "grad_norm": 1.4140625, "learning_rate": 1.1588208631859808e-05, "loss": 0.4387, "step": 7838 }, { "epoch": 1.340525917108538, "grad_norm": 1.5546875, "learning_rate": 1.1586407898502468e-05, "loss": 0.4953, "step": 7839 }, { "epoch": 1.3406990585434477, "grad_norm": 1.4453125, "learning_rate": 1.1584607112374033e-05, "loss": 0.5041, "step": 7840 }, { "epoch": 1.3408721999783573, "grad_norm": 1.5, "learning_rate": 1.15828062735344e-05, "loss": 0.5404, "step": 7841 }, { "epoch": 1.341045341413267, "grad_norm": 1.4140625, "learning_rate": 1.1581005382043474e-05, "loss": 0.4962, "step": 7842 }, { "epoch": 1.3412184828481766, "grad_norm": 1.40625, "learning_rate": 1.157920443796116e-05, "loss": 0.4972, "step": 7843 }, { "epoch": 1.3413916242830863, "grad_norm": 1.3671875, "learning_rate": 1.1577403441347363e-05, "loss": 0.4452, "step": 7844 }, { "epoch": 1.3415647657179959, "grad_norm": 1.4140625, "learning_rate": 1.1575602392261998e-05, "loss": 0.4901, "step": 7845 }, { "epoch": 1.3417379071529054, "grad_norm": 1.3046875, "learning_rate": 1.157380129076497e-05, "loss": 0.4432, "step": 7846 }, { "epoch": 1.3419110485878152, "grad_norm": 1.390625, "learning_rate": 1.15720001369162e-05, "loss": 0.5035, "step": 7847 }, { "epoch": 1.342084190022725, "grad_norm": 1.40625, "learning_rate": 1.1570198930775594e-05, "loss": 0.4396, "step": 7848 }, { "epoch": 1.3422573314576345, "grad_norm": 1.4140625, "learning_rate": 1.1568397672403071e-05, "loss": 0.5, "step": 7849 }, { "epoch": 1.342430472892544, "grad_norm": 1.484375, "learning_rate": 1.156659636185855e-05, "loss": 0.5332, "step": 7850 }, { "epoch": 1.3426036143274538, "grad_norm": 1.453125, "learning_rate": 1.1564794999201949e-05, "loss": 0.4737, "step": 7851 }, { "epoch": 1.3427767557623633, "grad_norm": 1.4296875, "learning_rate": 1.1562993584493195e-05, "loss": 0.4495, "step": 7852 }, { "epoch": 1.342949897197273, "grad_norm": 1.3671875, "learning_rate": 1.1561192117792203e-05, "loss": 0.4441, "step": 7853 }, { "epoch": 1.3431230386321826, "grad_norm": 1.4375, "learning_rate": 1.1559390599158904e-05, "loss": 0.4474, "step": 7854 }, { "epoch": 1.3432961800670924, "grad_norm": 1.4453125, "learning_rate": 1.1557589028653225e-05, "loss": 0.4937, "step": 7855 }, { "epoch": 1.343469321502002, "grad_norm": 1.375, "learning_rate": 1.1555787406335086e-05, "loss": 0.431, "step": 7856 }, { "epoch": 1.3436424629369115, "grad_norm": 1.3203125, "learning_rate": 1.1553985732264429e-05, "loss": 0.4177, "step": 7857 }, { "epoch": 1.3438156043718212, "grad_norm": 1.4765625, "learning_rate": 1.1552184006501175e-05, "loss": 0.5132, "step": 7858 }, { "epoch": 1.343988745806731, "grad_norm": 1.3984375, "learning_rate": 1.1550382229105266e-05, "loss": 0.4947, "step": 7859 }, { "epoch": 1.3441618872416405, "grad_norm": 1.390625, "learning_rate": 1.1548580400136631e-05, "loss": 0.477, "step": 7860 }, { "epoch": 1.34433502867655, "grad_norm": 1.4609375, "learning_rate": 1.1546778519655209e-05, "loss": 0.4549, "step": 7861 }, { "epoch": 1.3445081701114598, "grad_norm": 1.4921875, "learning_rate": 1.1544976587720944e-05, "loss": 0.4795, "step": 7862 }, { "epoch": 1.3446813115463694, "grad_norm": 1.3515625, "learning_rate": 1.1543174604393768e-05, "loss": 0.4403, "step": 7863 }, { "epoch": 1.3448544529812791, "grad_norm": 1.3125, "learning_rate": 1.1541372569733628e-05, "loss": 0.4469, "step": 7864 }, { "epoch": 1.3450275944161887, "grad_norm": 1.3359375, "learning_rate": 1.153957048380047e-05, "loss": 0.4719, "step": 7865 }, { "epoch": 1.3452007358510985, "grad_norm": 1.4765625, "learning_rate": 1.153776834665423e-05, "loss": 0.4563, "step": 7866 }, { "epoch": 1.345373877286008, "grad_norm": 1.328125, "learning_rate": 1.1535966158354866e-05, "loss": 0.4708, "step": 7867 }, { "epoch": 1.3455470187209175, "grad_norm": 1.3984375, "learning_rate": 1.153416391896232e-05, "loss": 0.4495, "step": 7868 }, { "epoch": 1.3457201601558273, "grad_norm": 1.5078125, "learning_rate": 1.1532361628536546e-05, "loss": 0.4903, "step": 7869 }, { "epoch": 1.345893301590737, "grad_norm": 1.328125, "learning_rate": 1.1530559287137495e-05, "loss": 0.4679, "step": 7870 }, { "epoch": 1.3460664430256466, "grad_norm": 1.3671875, "learning_rate": 1.1528756894825125e-05, "loss": 0.472, "step": 7871 }, { "epoch": 1.3462395844605561, "grad_norm": 1.484375, "learning_rate": 1.1526954451659383e-05, "loss": 0.5124, "step": 7872 }, { "epoch": 1.346412725895466, "grad_norm": 1.484375, "learning_rate": 1.1525151957700231e-05, "loss": 0.5075, "step": 7873 }, { "epoch": 1.3465858673303754, "grad_norm": 1.3515625, "learning_rate": 1.1523349413007633e-05, "loss": 0.4469, "step": 7874 }, { "epoch": 1.3467590087652852, "grad_norm": 1.390625, "learning_rate": 1.1521546817641545e-05, "loss": 0.4583, "step": 7875 }, { "epoch": 1.3469321502001947, "grad_norm": 1.3671875, "learning_rate": 1.1519744171661928e-05, "loss": 0.4846, "step": 7876 }, { "epoch": 1.3471052916351045, "grad_norm": 1.3359375, "learning_rate": 1.1517941475128747e-05, "loss": 0.4394, "step": 7877 }, { "epoch": 1.347278433070014, "grad_norm": 1.515625, "learning_rate": 1.1516138728101971e-05, "loss": 0.5125, "step": 7878 }, { "epoch": 1.3474515745049236, "grad_norm": 1.3828125, "learning_rate": 1.1514335930641565e-05, "loss": 0.44, "step": 7879 }, { "epoch": 1.3476247159398334, "grad_norm": 1.34375, "learning_rate": 1.1512533082807498e-05, "loss": 0.4668, "step": 7880 }, { "epoch": 1.3477978573747431, "grad_norm": 1.4296875, "learning_rate": 1.1510730184659744e-05, "loss": 0.4884, "step": 7881 }, { "epoch": 1.3479709988096527, "grad_norm": 1.3515625, "learning_rate": 1.150892723625827e-05, "loss": 0.4822, "step": 7882 }, { "epoch": 1.3481441402445622, "grad_norm": 1.3515625, "learning_rate": 1.1507124237663052e-05, "loss": 0.4165, "step": 7883 }, { "epoch": 1.348317281679472, "grad_norm": 1.3828125, "learning_rate": 1.1505321188934069e-05, "loss": 0.5399, "step": 7884 }, { "epoch": 1.3484904231143815, "grad_norm": 1.3359375, "learning_rate": 1.15035180901313e-05, "loss": 0.4219, "step": 7885 }, { "epoch": 1.3486635645492913, "grad_norm": 1.5390625, "learning_rate": 1.1501714941314716e-05, "loss": 0.4479, "step": 7886 }, { "epoch": 1.3488367059842008, "grad_norm": 1.515625, "learning_rate": 1.1499911742544304e-05, "loss": 0.4932, "step": 7887 }, { "epoch": 1.3490098474191106, "grad_norm": 1.4375, "learning_rate": 1.1498108493880047e-05, "loss": 0.4886, "step": 7888 }, { "epoch": 1.34918298885402, "grad_norm": 1.421875, "learning_rate": 1.1496305195381924e-05, "loss": 0.6566, "step": 7889 }, { "epoch": 1.3493561302889296, "grad_norm": 1.3515625, "learning_rate": 1.1494501847109928e-05, "loss": 0.4411, "step": 7890 }, { "epoch": 1.3495292717238394, "grad_norm": 2.34375, "learning_rate": 1.1492698449124042e-05, "loss": 0.5054, "step": 7891 }, { "epoch": 1.3497024131587492, "grad_norm": 1.28125, "learning_rate": 1.1490895001484258e-05, "loss": 0.4034, "step": 7892 }, { "epoch": 1.3498755545936587, "grad_norm": 1.4375, "learning_rate": 1.148909150425056e-05, "loss": 0.4322, "step": 7893 }, { "epoch": 1.3500486960285683, "grad_norm": 1.484375, "learning_rate": 1.1487287957482947e-05, "loss": 0.4641, "step": 7894 }, { "epoch": 1.350221837463478, "grad_norm": 1.3671875, "learning_rate": 1.1485484361241415e-05, "loss": 0.43, "step": 7895 }, { "epoch": 1.3503949788983876, "grad_norm": 1.296875, "learning_rate": 1.1483680715585951e-05, "loss": 0.4951, "step": 7896 }, { "epoch": 1.3505681203332973, "grad_norm": 1.484375, "learning_rate": 1.1481877020576565e-05, "loss": 0.4805, "step": 7897 }, { "epoch": 1.3507412617682069, "grad_norm": 1.4140625, "learning_rate": 1.1480073276273243e-05, "loss": 0.4371, "step": 7898 }, { "epoch": 1.3509144032031166, "grad_norm": 1.5234375, "learning_rate": 1.1478269482735991e-05, "loss": 0.5336, "step": 7899 }, { "epoch": 1.3510875446380262, "grad_norm": 1.4375, "learning_rate": 1.1476465640024814e-05, "loss": 0.4947, "step": 7900 }, { "epoch": 1.3512606860729357, "grad_norm": 1.375, "learning_rate": 1.1474661748199712e-05, "loss": 0.4515, "step": 7901 }, { "epoch": 1.3514338275078455, "grad_norm": 1.4296875, "learning_rate": 1.1472857807320694e-05, "loss": 0.5055, "step": 7902 }, { "epoch": 1.3516069689427552, "grad_norm": 1.34375, "learning_rate": 1.1471053817447766e-05, "loss": 0.4938, "step": 7903 }, { "epoch": 1.3517801103776648, "grad_norm": 1.5, "learning_rate": 1.1469249778640935e-05, "loss": 0.5587, "step": 7904 }, { "epoch": 1.3519532518125743, "grad_norm": 1.375, "learning_rate": 1.1467445690960213e-05, "loss": 0.5165, "step": 7905 }, { "epoch": 1.352126393247484, "grad_norm": 1.375, "learning_rate": 1.146564155446561e-05, "loss": 0.5039, "step": 7906 }, { "epoch": 1.3522995346823936, "grad_norm": 1.4296875, "learning_rate": 1.1463837369217145e-05, "loss": 0.5826, "step": 7907 }, { "epoch": 1.3524726761173034, "grad_norm": 1.390625, "learning_rate": 1.1462033135274831e-05, "loss": 0.47, "step": 7908 }, { "epoch": 1.352645817552213, "grad_norm": 1.28125, "learning_rate": 1.1460228852698682e-05, "loss": 0.427, "step": 7909 }, { "epoch": 1.3528189589871227, "grad_norm": 1.3828125, "learning_rate": 1.1458424521548717e-05, "loss": 0.4769, "step": 7910 }, { "epoch": 1.3529921004220322, "grad_norm": 1.3671875, "learning_rate": 1.1456620141884957e-05, "loss": 0.4797, "step": 7911 }, { "epoch": 1.3531652418569418, "grad_norm": 1.28125, "learning_rate": 1.1454815713767431e-05, "loss": 0.4646, "step": 7912 }, { "epoch": 1.3533383832918515, "grad_norm": 1.3046875, "learning_rate": 1.1453011237256152e-05, "loss": 0.484, "step": 7913 }, { "epoch": 1.3535115247267613, "grad_norm": 1.5546875, "learning_rate": 1.1451206712411151e-05, "loss": 0.4571, "step": 7914 }, { "epoch": 1.3536846661616708, "grad_norm": 1.421875, "learning_rate": 1.1449402139292447e-05, "loss": 0.5244, "step": 7915 }, { "epoch": 1.3538578075965804, "grad_norm": 1.375, "learning_rate": 1.144759751796008e-05, "loss": 0.4775, "step": 7916 }, { "epoch": 1.3540309490314901, "grad_norm": 1.5546875, "learning_rate": 1.1445792848474073e-05, "loss": 0.4118, "step": 7917 }, { "epoch": 1.3542040904663997, "grad_norm": 1.3046875, "learning_rate": 1.1443988130894457e-05, "loss": 0.4301, "step": 7918 }, { "epoch": 1.3543772319013094, "grad_norm": 1.515625, "learning_rate": 1.144218336528127e-05, "loss": 0.5086, "step": 7919 }, { "epoch": 1.354550373336219, "grad_norm": 1.5625, "learning_rate": 1.1440378551694537e-05, "loss": 0.4979, "step": 7920 }, { "epoch": 1.3547235147711287, "grad_norm": 1.3203125, "learning_rate": 1.1438573690194306e-05, "loss": 0.4844, "step": 7921 }, { "epoch": 1.3548966562060383, "grad_norm": 1.4453125, "learning_rate": 1.1436768780840608e-05, "loss": 0.6219, "step": 7922 }, { "epoch": 1.3550697976409478, "grad_norm": 1.375, "learning_rate": 1.1434963823693483e-05, "loss": 0.4166, "step": 7923 }, { "epoch": 1.3552429390758576, "grad_norm": 1.4140625, "learning_rate": 1.1433158818812973e-05, "loss": 0.5316, "step": 7924 }, { "epoch": 1.3554160805107673, "grad_norm": 1.390625, "learning_rate": 1.1431353766259122e-05, "loss": 0.4498, "step": 7925 }, { "epoch": 1.3555892219456769, "grad_norm": 1.4921875, "learning_rate": 1.1429548666091969e-05, "loss": 0.4669, "step": 7926 }, { "epoch": 1.3557623633805864, "grad_norm": 1.4375, "learning_rate": 1.1427743518371563e-05, "loss": 0.4942, "step": 7927 }, { "epoch": 1.3559355048154962, "grad_norm": 1.4140625, "learning_rate": 1.1425938323157958e-05, "loss": 0.4502, "step": 7928 }, { "epoch": 1.3561086462504057, "grad_norm": 1.359375, "learning_rate": 1.1424133080511191e-05, "loss": 0.4496, "step": 7929 }, { "epoch": 1.3562817876853155, "grad_norm": 1.6171875, "learning_rate": 1.1422327790491319e-05, "loss": 0.4745, "step": 7930 }, { "epoch": 1.356454929120225, "grad_norm": 1.34375, "learning_rate": 1.1420522453158395e-05, "loss": 0.5486, "step": 7931 }, { "epoch": 1.3566280705551348, "grad_norm": 1.4296875, "learning_rate": 1.141871706857247e-05, "loss": 0.4933, "step": 7932 }, { "epoch": 1.3568012119900443, "grad_norm": 1.90625, "learning_rate": 1.1416911636793602e-05, "loss": 0.4673, "step": 7933 }, { "epoch": 1.3569743534249539, "grad_norm": 1.3671875, "learning_rate": 1.1415106157881846e-05, "loss": 0.4745, "step": 7934 }, { "epoch": 1.3571474948598636, "grad_norm": 1.453125, "learning_rate": 1.141330063189726e-05, "loss": 0.4923, "step": 7935 }, { "epoch": 1.3573206362947734, "grad_norm": 1.5546875, "learning_rate": 1.1411495058899903e-05, "loss": 0.4803, "step": 7936 }, { "epoch": 1.357493777729683, "grad_norm": 1.3671875, "learning_rate": 1.1409689438949836e-05, "loss": 0.4684, "step": 7937 }, { "epoch": 1.3576669191645925, "grad_norm": 1.3203125, "learning_rate": 1.1407883772107128e-05, "loss": 0.4395, "step": 7938 }, { "epoch": 1.3578400605995022, "grad_norm": 1.375, "learning_rate": 1.140607805843184e-05, "loss": 0.4421, "step": 7939 }, { "epoch": 1.3580132020344118, "grad_norm": 1.390625, "learning_rate": 1.1404272297984038e-05, "loss": 0.5228, "step": 7940 }, { "epoch": 1.3581863434693215, "grad_norm": 1.4140625, "learning_rate": 1.1402466490823787e-05, "loss": 0.4982, "step": 7941 }, { "epoch": 1.358359484904231, "grad_norm": 1.4296875, "learning_rate": 1.140066063701116e-05, "loss": 0.6183, "step": 7942 }, { "epoch": 1.3585326263391408, "grad_norm": 1.359375, "learning_rate": 1.1398854736606229e-05, "loss": 0.4015, "step": 7943 }, { "epoch": 1.3587057677740504, "grad_norm": 1.46875, "learning_rate": 1.1397048789669061e-05, "loss": 0.4642, "step": 7944 }, { "epoch": 1.3588789092089602, "grad_norm": 1.4609375, "learning_rate": 1.1395242796259736e-05, "loss": 0.4922, "step": 7945 }, { "epoch": 1.3590520506438697, "grad_norm": 1.3359375, "learning_rate": 1.1393436756438325e-05, "loss": 0.4588, "step": 7946 }, { "epoch": 1.3592251920787795, "grad_norm": 1.53125, "learning_rate": 1.1391630670264908e-05, "loss": 0.4653, "step": 7947 }, { "epoch": 1.359398333513689, "grad_norm": 1.578125, "learning_rate": 1.1389824537799561e-05, "loss": 0.4501, "step": 7948 }, { "epoch": 1.3595714749485985, "grad_norm": 1.5234375, "learning_rate": 1.1388018359102363e-05, "loss": 0.4523, "step": 7949 }, { "epoch": 1.3597446163835083, "grad_norm": 1.4453125, "learning_rate": 1.1386212134233405e-05, "loss": 0.4813, "step": 7950 }, { "epoch": 1.3599177578184178, "grad_norm": 1.4453125, "learning_rate": 1.1384405863252758e-05, "loss": 0.5275, "step": 7951 }, { "epoch": 1.3600908992533276, "grad_norm": 1.4296875, "learning_rate": 1.1382599546220516e-05, "loss": 0.4321, "step": 7952 }, { "epoch": 1.3602640406882371, "grad_norm": 1.3359375, "learning_rate": 1.1380793183196759e-05, "loss": 0.4521, "step": 7953 }, { "epoch": 1.360437182123147, "grad_norm": 1.5234375, "learning_rate": 1.1378986774241578e-05, "loss": 0.466, "step": 7954 }, { "epoch": 1.3606103235580564, "grad_norm": 1.40625, "learning_rate": 1.137718031941506e-05, "loss": 0.4652, "step": 7955 }, { "epoch": 1.3607834649929662, "grad_norm": 1.5234375, "learning_rate": 1.13753738187773e-05, "loss": 0.4484, "step": 7956 }, { "epoch": 1.3609566064278757, "grad_norm": 1.34375, "learning_rate": 1.1373567272388386e-05, "loss": 0.5084, "step": 7957 }, { "epoch": 1.3611297478627855, "grad_norm": 1.3359375, "learning_rate": 1.1371760680308412e-05, "loss": 0.4813, "step": 7958 }, { "epoch": 1.361302889297695, "grad_norm": 1.421875, "learning_rate": 1.136995404259748e-05, "loss": 0.4774, "step": 7959 }, { "epoch": 1.3614760307326046, "grad_norm": 1.453125, "learning_rate": 1.1368147359315678e-05, "loss": 0.5248, "step": 7960 }, { "epoch": 1.3616491721675144, "grad_norm": 1.3671875, "learning_rate": 1.1366340630523112e-05, "loss": 0.4788, "step": 7961 }, { "epoch": 1.3618223136024241, "grad_norm": 1.3828125, "learning_rate": 1.1364533856279875e-05, "loss": 0.4849, "step": 7962 }, { "epoch": 1.3619954550373337, "grad_norm": 1.296875, "learning_rate": 1.1362727036646073e-05, "loss": 0.4365, "step": 7963 }, { "epoch": 1.3621685964722432, "grad_norm": 1.34375, "learning_rate": 1.1360920171681808e-05, "loss": 0.4709, "step": 7964 }, { "epoch": 1.362341737907153, "grad_norm": 1.421875, "learning_rate": 1.1359113261447183e-05, "loss": 0.4914, "step": 7965 }, { "epoch": 1.3625148793420625, "grad_norm": 1.3671875, "learning_rate": 1.135730630600231e-05, "loss": 0.4285, "step": 7966 }, { "epoch": 1.3626880207769723, "grad_norm": 1.359375, "learning_rate": 1.1355499305407288e-05, "loss": 0.4644, "step": 7967 }, { "epoch": 1.3628611622118818, "grad_norm": 1.40625, "learning_rate": 1.1353692259722229e-05, "loss": 0.4853, "step": 7968 }, { "epoch": 1.3630343036467916, "grad_norm": 1.703125, "learning_rate": 1.1351885169007247e-05, "loss": 0.5296, "step": 7969 }, { "epoch": 1.363207445081701, "grad_norm": 1.7578125, "learning_rate": 1.1350078033322449e-05, "loss": 0.4867, "step": 7970 }, { "epoch": 1.3633805865166106, "grad_norm": 2.078125, "learning_rate": 1.1348270852727955e-05, "loss": 0.4838, "step": 7971 }, { "epoch": 1.3635537279515204, "grad_norm": 1.375, "learning_rate": 1.1346463627283875e-05, "loss": 0.4697, "step": 7972 }, { "epoch": 1.3637268693864302, "grad_norm": 1.4140625, "learning_rate": 1.1344656357050325e-05, "loss": 0.4744, "step": 7973 }, { "epoch": 1.3639000108213397, "grad_norm": 1.5, "learning_rate": 1.1342849042087426e-05, "loss": 0.5239, "step": 7974 }, { "epoch": 1.3640731522562493, "grad_norm": 1.390625, "learning_rate": 1.1341041682455292e-05, "loss": 0.5168, "step": 7975 }, { "epoch": 1.364246293691159, "grad_norm": 1.4765625, "learning_rate": 1.1339234278214051e-05, "loss": 0.5505, "step": 7976 }, { "epoch": 1.3644194351260686, "grad_norm": 1.484375, "learning_rate": 1.133742682942382e-05, "loss": 0.469, "step": 7977 }, { "epoch": 1.3645925765609783, "grad_norm": 1.4375, "learning_rate": 1.133561933614473e-05, "loss": 0.503, "step": 7978 }, { "epoch": 1.3647657179958879, "grad_norm": 1.453125, "learning_rate": 1.1333811798436898e-05, "loss": 0.5484, "step": 7979 }, { "epoch": 1.3649388594307976, "grad_norm": 1.4140625, "learning_rate": 1.1332004216360455e-05, "loss": 0.4777, "step": 7980 }, { "epoch": 1.3651120008657072, "grad_norm": 1.390625, "learning_rate": 1.1330196589975528e-05, "loss": 0.4604, "step": 7981 }, { "epoch": 1.3652851423006167, "grad_norm": 1.5546875, "learning_rate": 1.1328388919342248e-05, "loss": 0.4529, "step": 7982 }, { "epoch": 1.3654582837355265, "grad_norm": 1.515625, "learning_rate": 1.1326581204520751e-05, "loss": 0.465, "step": 7983 }, { "epoch": 1.3656314251704362, "grad_norm": 1.546875, "learning_rate": 1.1324773445571158e-05, "loss": 0.4822, "step": 7984 }, { "epoch": 1.3658045666053458, "grad_norm": 1.390625, "learning_rate": 1.1322965642553611e-05, "loss": 0.4491, "step": 7985 }, { "epoch": 1.3659777080402553, "grad_norm": 1.296875, "learning_rate": 1.1321157795528248e-05, "loss": 0.4469, "step": 7986 }, { "epoch": 1.366150849475165, "grad_norm": 1.34375, "learning_rate": 1.13193499045552e-05, "loss": 0.4265, "step": 7987 }, { "epoch": 1.3663239909100746, "grad_norm": 1.4296875, "learning_rate": 1.1317541969694612e-05, "loss": 0.4648, "step": 7988 }, { "epoch": 1.3664971323449844, "grad_norm": 1.4453125, "learning_rate": 1.1315733991006616e-05, "loss": 0.4306, "step": 7989 }, { "epoch": 1.366670273779894, "grad_norm": 1.5, "learning_rate": 1.1313925968551362e-05, "loss": 0.3972, "step": 7990 }, { "epoch": 1.3668434152148037, "grad_norm": 1.359375, "learning_rate": 1.1312117902388986e-05, "loss": 0.5, "step": 7991 }, { "epoch": 1.3670165566497132, "grad_norm": 1.359375, "learning_rate": 1.1310309792579637e-05, "loss": 0.4881, "step": 7992 }, { "epoch": 1.3671896980846228, "grad_norm": 1.3828125, "learning_rate": 1.130850163918346e-05, "loss": 0.4566, "step": 7993 }, { "epoch": 1.3673628395195325, "grad_norm": 1.515625, "learning_rate": 1.1306693442260602e-05, "loss": 0.5857, "step": 7994 }, { "epoch": 1.3675359809544423, "grad_norm": 1.4921875, "learning_rate": 1.1304885201871212e-05, "loss": 0.4449, "step": 7995 }, { "epoch": 1.3677091223893518, "grad_norm": 1.5, "learning_rate": 1.1303076918075435e-05, "loss": 0.4729, "step": 7996 }, { "epoch": 1.3678822638242614, "grad_norm": 1.4296875, "learning_rate": 1.1301268590933434e-05, "loss": 0.6578, "step": 7997 }, { "epoch": 1.3680554052591711, "grad_norm": 1.3515625, "learning_rate": 1.1299460220505351e-05, "loss": 0.4392, "step": 7998 }, { "epoch": 1.3682285466940807, "grad_norm": 1.5, "learning_rate": 1.1297651806851349e-05, "loss": 0.57, "step": 7999 }, { "epoch": 1.3684016881289904, "grad_norm": 1.46875, "learning_rate": 1.1295843350031582e-05, "loss": 0.5088, "step": 8000 }, { "epoch": 1.3685748295639, "grad_norm": 1.3984375, "learning_rate": 1.1294034850106202e-05, "loss": 0.5097, "step": 8001 }, { "epoch": 1.3687479709988097, "grad_norm": 1.4609375, "learning_rate": 1.129222630713537e-05, "loss": 0.4453, "step": 8002 }, { "epoch": 1.3689211124337193, "grad_norm": 1.3359375, "learning_rate": 1.129041772117925e-05, "loss": 0.4125, "step": 8003 }, { "epoch": 1.3690942538686288, "grad_norm": 1.3828125, "learning_rate": 1.1288609092298004e-05, "loss": 0.51, "step": 8004 }, { "epoch": 1.3692673953035386, "grad_norm": 1.3359375, "learning_rate": 1.128680042055179e-05, "loss": 0.4303, "step": 8005 }, { "epoch": 1.3694405367384483, "grad_norm": 1.4921875, "learning_rate": 1.1284991706000776e-05, "loss": 0.4849, "step": 8006 }, { "epoch": 1.3696136781733579, "grad_norm": 1.484375, "learning_rate": 1.1283182948705127e-05, "loss": 0.4868, "step": 8007 }, { "epoch": 1.3697868196082674, "grad_norm": 1.3515625, "learning_rate": 1.1281374148725014e-05, "loss": 0.451, "step": 8008 }, { "epoch": 1.3699599610431772, "grad_norm": 1.3359375, "learning_rate": 1.1279565306120601e-05, "loss": 0.4851, "step": 8009 }, { "epoch": 1.3701331024780867, "grad_norm": 1.5078125, "learning_rate": 1.127775642095206e-05, "loss": 0.4947, "step": 8010 }, { "epoch": 1.3703062439129965, "grad_norm": 1.4375, "learning_rate": 1.1275947493279564e-05, "loss": 0.4785, "step": 8011 }, { "epoch": 1.370479385347906, "grad_norm": 1.375, "learning_rate": 1.1274138523163281e-05, "loss": 0.4801, "step": 8012 }, { "epoch": 1.3706525267828158, "grad_norm": 1.40625, "learning_rate": 1.127232951066339e-05, "loss": 0.521, "step": 8013 }, { "epoch": 1.3708256682177253, "grad_norm": 1.4140625, "learning_rate": 1.1270520455840072e-05, "loss": 0.4516, "step": 8014 }, { "epoch": 1.3709988096526349, "grad_norm": 1.453125, "learning_rate": 1.1268711358753494e-05, "loss": 0.4193, "step": 8015 }, { "epoch": 1.3711719510875446, "grad_norm": 1.7265625, "learning_rate": 1.1266902219463844e-05, "loss": 0.5087, "step": 8016 }, { "epoch": 1.3713450925224544, "grad_norm": 1.4609375, "learning_rate": 1.1265093038031294e-05, "loss": 0.5276, "step": 8017 }, { "epoch": 1.371518233957364, "grad_norm": 1.4765625, "learning_rate": 1.1263283814516028e-05, "loss": 0.5171, "step": 8018 }, { "epoch": 1.3716913753922735, "grad_norm": 1.5390625, "learning_rate": 1.1261474548978233e-05, "loss": 0.4402, "step": 8019 }, { "epoch": 1.3718645168271832, "grad_norm": 1.3203125, "learning_rate": 1.1259665241478088e-05, "loss": 0.4691, "step": 8020 }, { "epoch": 1.3720376582620928, "grad_norm": 1.4765625, "learning_rate": 1.1257855892075784e-05, "loss": 0.4718, "step": 8021 }, { "epoch": 1.3722107996970025, "grad_norm": 1.4453125, "learning_rate": 1.1256046500831503e-05, "loss": 0.4746, "step": 8022 }, { "epoch": 1.372383941131912, "grad_norm": 1.34375, "learning_rate": 1.1254237067805437e-05, "loss": 0.5729, "step": 8023 }, { "epoch": 1.3725570825668219, "grad_norm": 1.3984375, "learning_rate": 1.1252427593057775e-05, "loss": 0.5126, "step": 8024 }, { "epoch": 1.3727302240017314, "grad_norm": 1.5546875, "learning_rate": 1.1250618076648707e-05, "loss": 0.5015, "step": 8025 }, { "epoch": 1.372903365436641, "grad_norm": 1.3046875, "learning_rate": 1.124880851863843e-05, "loss": 0.4536, "step": 8026 }, { "epoch": 1.3730765068715507, "grad_norm": 1.4921875, "learning_rate": 1.1246998919087133e-05, "loss": 0.4709, "step": 8027 }, { "epoch": 1.3732496483064605, "grad_norm": 1.328125, "learning_rate": 1.1245189278055013e-05, "loss": 0.4215, "step": 8028 }, { "epoch": 1.37342278974137, "grad_norm": 1.375, "learning_rate": 1.1243379595602266e-05, "loss": 0.4458, "step": 8029 }, { "epoch": 1.3735959311762795, "grad_norm": 1.3984375, "learning_rate": 1.1241569871789096e-05, "loss": 0.466, "step": 8030 }, { "epoch": 1.3737690726111893, "grad_norm": 1.3515625, "learning_rate": 1.1239760106675693e-05, "loss": 0.4185, "step": 8031 }, { "epoch": 1.3739422140460988, "grad_norm": 1.375, "learning_rate": 1.1237950300322265e-05, "loss": 0.4715, "step": 8032 }, { "epoch": 1.3741153554810086, "grad_norm": 1.359375, "learning_rate": 1.1236140452789014e-05, "loss": 0.4874, "step": 8033 }, { "epoch": 1.3742884969159181, "grad_norm": 1.3203125, "learning_rate": 1.1234330564136137e-05, "loss": 0.3887, "step": 8034 }, { "epoch": 1.374461638350828, "grad_norm": 1.4296875, "learning_rate": 1.1232520634423852e-05, "loss": 0.5134, "step": 8035 }, { "epoch": 1.3746347797857374, "grad_norm": 1.3359375, "learning_rate": 1.1230710663712352e-05, "loss": 0.4355, "step": 8036 }, { "epoch": 1.374807921220647, "grad_norm": 1.4765625, "learning_rate": 1.1228900652061852e-05, "loss": 0.4462, "step": 8037 }, { "epoch": 1.3749810626555568, "grad_norm": 1.3984375, "learning_rate": 1.122709059953256e-05, "loss": 0.5171, "step": 8038 }, { "epoch": 1.3751542040904665, "grad_norm": 1.390625, "learning_rate": 1.1225280506184683e-05, "loss": 0.44, "step": 8039 }, { "epoch": 1.375327345525376, "grad_norm": 1.4296875, "learning_rate": 1.122347037207844e-05, "loss": 0.5094, "step": 8040 }, { "epoch": 1.3755004869602856, "grad_norm": 1.5234375, "learning_rate": 1.1221660197274038e-05, "loss": 0.4832, "step": 8041 }, { "epoch": 1.3756736283951954, "grad_norm": 1.296875, "learning_rate": 1.1219849981831696e-05, "loss": 0.3877, "step": 8042 }, { "epoch": 1.375846769830105, "grad_norm": 1.3984375, "learning_rate": 1.1218039725811626e-05, "loss": 0.4162, "step": 8043 }, { "epoch": 1.3760199112650147, "grad_norm": 1.4375, "learning_rate": 1.1216229429274048e-05, "loss": 0.4404, "step": 8044 }, { "epoch": 1.3761930526999242, "grad_norm": 1.5234375, "learning_rate": 1.121441909227918e-05, "loss": 0.4951, "step": 8045 }, { "epoch": 1.376366194134834, "grad_norm": 1.4296875, "learning_rate": 1.121260871488724e-05, "loss": 0.5162, "step": 8046 }, { "epoch": 1.3765393355697435, "grad_norm": 1.4609375, "learning_rate": 1.1210798297158454e-05, "loss": 0.4179, "step": 8047 }, { "epoch": 1.376712477004653, "grad_norm": 1.4453125, "learning_rate": 1.120898783915304e-05, "loss": 0.4476, "step": 8048 }, { "epoch": 1.3768856184395628, "grad_norm": 1.4140625, "learning_rate": 1.1207177340931226e-05, "loss": 0.5089, "step": 8049 }, { "epoch": 1.3770587598744726, "grad_norm": 1.3671875, "learning_rate": 1.1205366802553231e-05, "loss": 0.4826, "step": 8050 }, { "epoch": 1.3772319013093821, "grad_norm": 1.3984375, "learning_rate": 1.1203556224079288e-05, "loss": 0.5092, "step": 8051 }, { "epoch": 1.3774050427442917, "grad_norm": 1.4453125, "learning_rate": 1.1201745605569625e-05, "loss": 0.4336, "step": 8052 }, { "epoch": 1.3775781841792014, "grad_norm": 1.2734375, "learning_rate": 1.1199934947084466e-05, "loss": 0.4057, "step": 8053 }, { "epoch": 1.377751325614111, "grad_norm": 1.53125, "learning_rate": 1.1198124248684048e-05, "loss": 0.5417, "step": 8054 }, { "epoch": 1.3779244670490207, "grad_norm": 1.3671875, "learning_rate": 1.1196313510428599e-05, "loss": 0.4286, "step": 8055 }, { "epoch": 1.3780976084839303, "grad_norm": 1.5078125, "learning_rate": 1.1194502732378349e-05, "loss": 0.48, "step": 8056 }, { "epoch": 1.37827074991884, "grad_norm": 1.4375, "learning_rate": 1.1192691914593541e-05, "loss": 0.4621, "step": 8057 }, { "epoch": 1.3784438913537496, "grad_norm": 1.453125, "learning_rate": 1.1190881057134406e-05, "loss": 0.4854, "step": 8058 }, { "epoch": 1.378617032788659, "grad_norm": 1.4296875, "learning_rate": 1.1189070160061184e-05, "loss": 0.4474, "step": 8059 }, { "epoch": 1.3787901742235689, "grad_norm": 1.3515625, "learning_rate": 1.118725922343411e-05, "loss": 0.4549, "step": 8060 }, { "epoch": 1.3789633156584786, "grad_norm": 1.6171875, "learning_rate": 1.1185448247313427e-05, "loss": 0.5378, "step": 8061 }, { "epoch": 1.3791364570933882, "grad_norm": 1.3671875, "learning_rate": 1.1183637231759372e-05, "loss": 0.4774, "step": 8062 }, { "epoch": 1.3793095985282977, "grad_norm": 1.5, "learning_rate": 1.1181826176832193e-05, "loss": 0.5053, "step": 8063 }, { "epoch": 1.3794827399632075, "grad_norm": 1.2890625, "learning_rate": 1.1180015082592132e-05, "loss": 0.4216, "step": 8064 }, { "epoch": 1.379655881398117, "grad_norm": 1.46875, "learning_rate": 1.1178203949099434e-05, "loss": 0.5013, "step": 8065 }, { "epoch": 1.3798290228330268, "grad_norm": 1.3359375, "learning_rate": 1.1176392776414347e-05, "loss": 0.4596, "step": 8066 }, { "epoch": 1.3800021642679363, "grad_norm": 1.578125, "learning_rate": 1.1174581564597112e-05, "loss": 0.5358, "step": 8067 }, { "epoch": 1.380175305702846, "grad_norm": 1.6171875, "learning_rate": 1.1172770313707985e-05, "loss": 0.4944, "step": 8068 }, { "epoch": 1.3803484471377556, "grad_norm": 1.4453125, "learning_rate": 1.1170959023807216e-05, "loss": 0.4996, "step": 8069 }, { "epoch": 1.3805215885726652, "grad_norm": 1.3671875, "learning_rate": 1.1169147694955054e-05, "loss": 0.5383, "step": 8070 }, { "epoch": 1.380694730007575, "grad_norm": 1.40625, "learning_rate": 1.1167336327211752e-05, "loss": 0.5377, "step": 8071 }, { "epoch": 1.3808678714424847, "grad_norm": 1.3984375, "learning_rate": 1.1165524920637565e-05, "loss": 0.4883, "step": 8072 }, { "epoch": 1.3810410128773942, "grad_norm": 1.46875, "learning_rate": 1.1163713475292752e-05, "loss": 0.4983, "step": 8073 }, { "epoch": 1.3812141543123038, "grad_norm": 1.4609375, "learning_rate": 1.1161901991237567e-05, "loss": 0.5132, "step": 8074 }, { "epoch": 1.3813872957472135, "grad_norm": 1.4375, "learning_rate": 1.1160090468532266e-05, "loss": 0.4843, "step": 8075 }, { "epoch": 1.381560437182123, "grad_norm": 1.3359375, "learning_rate": 1.1158278907237113e-05, "loss": 0.4562, "step": 8076 }, { "epoch": 1.3817335786170328, "grad_norm": 1.4609375, "learning_rate": 1.115646730741236e-05, "loss": 0.4431, "step": 8077 }, { "epoch": 1.3819067200519424, "grad_norm": 1.53125, "learning_rate": 1.1154655669118282e-05, "loss": 0.4504, "step": 8078 }, { "epoch": 1.3820798614868521, "grad_norm": 1.4140625, "learning_rate": 1.1152843992415131e-05, "loss": 0.493, "step": 8079 }, { "epoch": 1.3822530029217617, "grad_norm": 1.5078125, "learning_rate": 1.1151032277363181e-05, "loss": 0.4755, "step": 8080 }, { "epoch": 1.3824261443566714, "grad_norm": 1.4375, "learning_rate": 1.1149220524022689e-05, "loss": 0.5121, "step": 8081 }, { "epoch": 1.382599285791581, "grad_norm": 1.359375, "learning_rate": 1.1147408732453926e-05, "loss": 0.4865, "step": 8082 }, { "epoch": 1.3827724272264907, "grad_norm": 1.28125, "learning_rate": 1.1145596902717159e-05, "loss": 0.4378, "step": 8083 }, { "epoch": 1.3829455686614003, "grad_norm": 1.4921875, "learning_rate": 1.114378503487266e-05, "loss": 0.5162, "step": 8084 }, { "epoch": 1.3831187100963098, "grad_norm": 1.40625, "learning_rate": 1.1141973128980703e-05, "loss": 0.4466, "step": 8085 }, { "epoch": 1.3832918515312196, "grad_norm": 1.484375, "learning_rate": 1.1140161185101553e-05, "loss": 0.517, "step": 8086 }, { "epoch": 1.3834649929661291, "grad_norm": 1.5390625, "learning_rate": 1.1138349203295487e-05, "loss": 0.4591, "step": 8087 }, { "epoch": 1.3836381344010389, "grad_norm": 1.375, "learning_rate": 1.1136537183622777e-05, "loss": 0.456, "step": 8088 }, { "epoch": 1.3838112758359484, "grad_norm": 1.453125, "learning_rate": 1.1134725126143701e-05, "loss": 0.4853, "step": 8089 }, { "epoch": 1.3839844172708582, "grad_norm": 1.359375, "learning_rate": 1.1132913030918543e-05, "loss": 0.4871, "step": 8090 }, { "epoch": 1.3841575587057677, "grad_norm": 1.453125, "learning_rate": 1.1131100898007567e-05, "loss": 0.4664, "step": 8091 }, { "epoch": 1.3843307001406775, "grad_norm": 1.390625, "learning_rate": 1.1129288727471066e-05, "loss": 0.5133, "step": 8092 }, { "epoch": 1.384503841575587, "grad_norm": 1.359375, "learning_rate": 1.1127476519369313e-05, "loss": 0.4844, "step": 8093 }, { "epoch": 1.3846769830104968, "grad_norm": 1.421875, "learning_rate": 1.1125664273762593e-05, "loss": 0.445, "step": 8094 }, { "epoch": 1.3848501244454063, "grad_norm": 1.3359375, "learning_rate": 1.112385199071119e-05, "loss": 0.5224, "step": 8095 }, { "epoch": 1.3850232658803159, "grad_norm": 1.46875, "learning_rate": 1.1122039670275387e-05, "loss": 0.4366, "step": 8096 }, { "epoch": 1.3851964073152256, "grad_norm": 1.3046875, "learning_rate": 1.1120227312515475e-05, "loss": 0.4363, "step": 8097 }, { "epoch": 1.3853695487501354, "grad_norm": 1.3515625, "learning_rate": 1.1118414917491733e-05, "loss": 0.4704, "step": 8098 }, { "epoch": 1.385542690185045, "grad_norm": 1.375, "learning_rate": 1.1116602485264454e-05, "loss": 0.4382, "step": 8099 }, { "epoch": 1.3857158316199545, "grad_norm": 1.40625, "learning_rate": 1.1114790015893928e-05, "loss": 0.4779, "step": 8100 }, { "epoch": 1.3858889730548642, "grad_norm": 1.390625, "learning_rate": 1.1112977509440445e-05, "loss": 0.5114, "step": 8101 }, { "epoch": 1.3860621144897738, "grad_norm": 1.4375, "learning_rate": 1.11111649659643e-05, "loss": 0.4259, "step": 8102 }, { "epoch": 1.3862352559246836, "grad_norm": 1.421875, "learning_rate": 1.1109352385525782e-05, "loss": 0.4532, "step": 8103 }, { "epoch": 1.386408397359593, "grad_norm": 1.375, "learning_rate": 1.1107539768185188e-05, "loss": 0.4208, "step": 8104 }, { "epoch": 1.3865815387945029, "grad_norm": 1.3828125, "learning_rate": 1.1105727114002811e-05, "loss": 0.4999, "step": 8105 }, { "epoch": 1.3867546802294124, "grad_norm": 1.484375, "learning_rate": 1.1103914423038954e-05, "loss": 0.4969, "step": 8106 }, { "epoch": 1.386927821664322, "grad_norm": 1.3671875, "learning_rate": 1.1102101695353911e-05, "loss": 0.4971, "step": 8107 }, { "epoch": 1.3871009630992317, "grad_norm": 1.328125, "learning_rate": 1.1100288931007982e-05, "loss": 0.4733, "step": 8108 }, { "epoch": 1.3872741045341415, "grad_norm": 1.546875, "learning_rate": 1.1098476130061467e-05, "loss": 0.5127, "step": 8109 }, { "epoch": 1.387447245969051, "grad_norm": 1.2578125, "learning_rate": 1.1096663292574667e-05, "loss": 0.3916, "step": 8110 }, { "epoch": 1.3876203874039605, "grad_norm": 1.4609375, "learning_rate": 1.1094850418607892e-05, "loss": 0.4357, "step": 8111 }, { "epoch": 1.3877935288388703, "grad_norm": 1.4453125, "learning_rate": 1.1093037508221439e-05, "loss": 0.565, "step": 8112 }, { "epoch": 1.3879666702737798, "grad_norm": 1.5859375, "learning_rate": 1.1091224561475615e-05, "loss": 0.4402, "step": 8113 }, { "epoch": 1.3881398117086896, "grad_norm": 1.3671875, "learning_rate": 1.1089411578430732e-05, "loss": 0.4252, "step": 8114 }, { "epoch": 1.3883129531435991, "grad_norm": 1.3671875, "learning_rate": 1.1087598559147085e-05, "loss": 0.4613, "step": 8115 }, { "epoch": 1.388486094578509, "grad_norm": 1.4140625, "learning_rate": 1.1085785503685e-05, "loss": 0.4391, "step": 8116 }, { "epoch": 1.3886592360134185, "grad_norm": 1.484375, "learning_rate": 1.1083972412104778e-05, "loss": 0.4624, "step": 8117 }, { "epoch": 1.388832377448328, "grad_norm": 1.4453125, "learning_rate": 1.1082159284466733e-05, "loss": 0.4731, "step": 8118 }, { "epoch": 1.3890055188832378, "grad_norm": 1.53125, "learning_rate": 1.1080346120831177e-05, "loss": 0.5827, "step": 8119 }, { "epoch": 1.3891786603181475, "grad_norm": 1.6953125, "learning_rate": 1.1078532921258422e-05, "loss": 0.5024, "step": 8120 }, { "epoch": 1.389351801753057, "grad_norm": 1.4921875, "learning_rate": 1.1076719685808786e-05, "loss": 0.4751, "step": 8121 }, { "epoch": 1.3895249431879666, "grad_norm": 1.5078125, "learning_rate": 1.1074906414542584e-05, "loss": 0.4438, "step": 8122 }, { "epoch": 1.3896980846228764, "grad_norm": 1.40625, "learning_rate": 1.1073093107520136e-05, "loss": 0.5518, "step": 8123 }, { "epoch": 1.389871226057786, "grad_norm": 1.375, "learning_rate": 1.1071279764801758e-05, "loss": 0.4261, "step": 8124 }, { "epoch": 1.3900443674926957, "grad_norm": 1.484375, "learning_rate": 1.106946638644777e-05, "loss": 0.4696, "step": 8125 }, { "epoch": 1.3902175089276052, "grad_norm": 1.3828125, "learning_rate": 1.1067652972518496e-05, "loss": 0.5069, "step": 8126 }, { "epoch": 1.390390650362515, "grad_norm": 1.390625, "learning_rate": 1.1065839523074255e-05, "loss": 0.4611, "step": 8127 }, { "epoch": 1.3905637917974245, "grad_norm": 1.3671875, "learning_rate": 1.1064026038175376e-05, "loss": 0.4907, "step": 8128 }, { "epoch": 1.390736933232334, "grad_norm": 1.484375, "learning_rate": 1.1062212517882176e-05, "loss": 0.446, "step": 8129 }, { "epoch": 1.3909100746672438, "grad_norm": 1.4375, "learning_rate": 1.1060398962254988e-05, "loss": 0.5664, "step": 8130 }, { "epoch": 1.3910832161021536, "grad_norm": 1.359375, "learning_rate": 1.105858537135413e-05, "loss": 0.4711, "step": 8131 }, { "epoch": 1.3912563575370631, "grad_norm": 1.3984375, "learning_rate": 1.105677174523994e-05, "loss": 0.4235, "step": 8132 }, { "epoch": 1.3914294989719727, "grad_norm": 1.3203125, "learning_rate": 1.1054958083972743e-05, "loss": 0.4395, "step": 8133 }, { "epoch": 1.3916026404068824, "grad_norm": 1.3359375, "learning_rate": 1.105314438761287e-05, "loss": 0.4553, "step": 8134 }, { "epoch": 1.391775781841792, "grad_norm": 1.4140625, "learning_rate": 1.1051330656220652e-05, "loss": 0.4841, "step": 8135 }, { "epoch": 1.3919489232767017, "grad_norm": 1.3984375, "learning_rate": 1.1049516889856423e-05, "loss": 0.4799, "step": 8136 }, { "epoch": 1.3921220647116113, "grad_norm": 1.390625, "learning_rate": 1.1047703088580514e-05, "loss": 0.48, "step": 8137 }, { "epoch": 1.392295206146521, "grad_norm": 1.4453125, "learning_rate": 1.1045889252453266e-05, "loss": 0.4785, "step": 8138 }, { "epoch": 1.3924683475814306, "grad_norm": 1.34375, "learning_rate": 1.1044075381535012e-05, "loss": 0.4425, "step": 8139 }, { "epoch": 1.39264148901634, "grad_norm": 1.390625, "learning_rate": 1.104226147588609e-05, "loss": 0.4675, "step": 8140 }, { "epoch": 1.3928146304512499, "grad_norm": 1.484375, "learning_rate": 1.1040447535566839e-05, "loss": 0.5026, "step": 8141 }, { "epoch": 1.3929877718861596, "grad_norm": 1.46875, "learning_rate": 1.1038633560637594e-05, "loss": 0.4334, "step": 8142 }, { "epoch": 1.3931609133210692, "grad_norm": 1.2890625, "learning_rate": 1.1036819551158704e-05, "loss": 0.4533, "step": 8143 }, { "epoch": 1.3933340547559787, "grad_norm": 1.40625, "learning_rate": 1.103500550719051e-05, "loss": 0.4881, "step": 8144 }, { "epoch": 1.3935071961908885, "grad_norm": 1.3359375, "learning_rate": 1.103319142879335e-05, "loss": 0.4308, "step": 8145 }, { "epoch": 1.393680337625798, "grad_norm": 1.4375, "learning_rate": 1.1031377316027569e-05, "loss": 0.4572, "step": 8146 }, { "epoch": 1.3938534790607078, "grad_norm": 1.421875, "learning_rate": 1.102956316895352e-05, "loss": 0.4863, "step": 8147 }, { "epoch": 1.3940266204956173, "grad_norm": 1.6171875, "learning_rate": 1.1027748987631539e-05, "loss": 0.4782, "step": 8148 }, { "epoch": 1.394199761930527, "grad_norm": 1.4765625, "learning_rate": 1.1025934772121984e-05, "loss": 0.546, "step": 8149 }, { "epoch": 1.3943729033654366, "grad_norm": 1.4765625, "learning_rate": 1.10241205224852e-05, "loss": 0.5542, "step": 8150 }, { "epoch": 1.3945460448003462, "grad_norm": 1.34375, "learning_rate": 1.1022306238781533e-05, "loss": 0.4496, "step": 8151 }, { "epoch": 1.394719186235256, "grad_norm": 1.3203125, "learning_rate": 1.1020491921071341e-05, "loss": 0.4511, "step": 8152 }, { "epoch": 1.3948923276701657, "grad_norm": 1.359375, "learning_rate": 1.101867756941497e-05, "loss": 0.463, "step": 8153 }, { "epoch": 1.3950654691050752, "grad_norm": 1.53125, "learning_rate": 1.101686318387278e-05, "loss": 0.4814, "step": 8154 }, { "epoch": 1.3952386105399848, "grad_norm": 1.3828125, "learning_rate": 1.1015048764505121e-05, "loss": 0.5034, "step": 8155 }, { "epoch": 1.3954117519748945, "grad_norm": 1.4296875, "learning_rate": 1.1013234311372353e-05, "loss": 0.5014, "step": 8156 }, { "epoch": 1.395584893409804, "grad_norm": 1.3828125, "learning_rate": 1.1011419824534829e-05, "loss": 0.4806, "step": 8157 }, { "epoch": 1.3957580348447138, "grad_norm": 1.390625, "learning_rate": 1.1009605304052906e-05, "loss": 0.4257, "step": 8158 }, { "epoch": 1.3959311762796234, "grad_norm": 1.3671875, "learning_rate": 1.1007790749986947e-05, "loss": 0.4533, "step": 8159 }, { "epoch": 1.3961043177145331, "grad_norm": 1.4375, "learning_rate": 1.1005976162397309e-05, "loss": 0.44, "step": 8160 }, { "epoch": 1.3962774591494427, "grad_norm": 1.390625, "learning_rate": 1.1004161541344358e-05, "loss": 0.562, "step": 8161 }, { "epoch": 1.3964506005843522, "grad_norm": 1.390625, "learning_rate": 1.1002346886888453e-05, "loss": 0.4695, "step": 8162 }, { "epoch": 1.396623742019262, "grad_norm": 1.53125, "learning_rate": 1.1000532199089958e-05, "loss": 0.4512, "step": 8163 }, { "epoch": 1.3967968834541717, "grad_norm": 1.34375, "learning_rate": 1.0998717478009237e-05, "loss": 0.4325, "step": 8164 }, { "epoch": 1.3969700248890813, "grad_norm": 1.4296875, "learning_rate": 1.0996902723706658e-05, "loss": 0.4165, "step": 8165 }, { "epoch": 1.3971431663239908, "grad_norm": 1.59375, "learning_rate": 1.0995087936242586e-05, "loss": 0.482, "step": 8166 }, { "epoch": 1.3973163077589006, "grad_norm": 1.34375, "learning_rate": 1.099327311567739e-05, "loss": 0.4924, "step": 8167 }, { "epoch": 1.3974894491938101, "grad_norm": 1.46875, "learning_rate": 1.0991458262071441e-05, "loss": 0.5226, "step": 8168 }, { "epoch": 1.39766259062872, "grad_norm": 1.6640625, "learning_rate": 1.0989643375485101e-05, "loss": 0.4997, "step": 8169 }, { "epoch": 1.3978357320636294, "grad_norm": 1.3125, "learning_rate": 1.0987828455978756e-05, "loss": 0.446, "step": 8170 }, { "epoch": 1.3980088734985392, "grad_norm": 1.5546875, "learning_rate": 1.0986013503612763e-05, "loss": 0.4657, "step": 8171 }, { "epoch": 1.3981820149334487, "grad_norm": 1.28125, "learning_rate": 1.0984198518447505e-05, "loss": 0.4577, "step": 8172 }, { "epoch": 1.3983551563683583, "grad_norm": 1.375, "learning_rate": 1.0982383500543355e-05, "loss": 0.4917, "step": 8173 }, { "epoch": 1.398528297803268, "grad_norm": 1.546875, "learning_rate": 1.0980568449960687e-05, "loss": 0.462, "step": 8174 }, { "epoch": 1.3987014392381778, "grad_norm": 1.4140625, "learning_rate": 1.0978753366759878e-05, "loss": 0.4162, "step": 8175 }, { "epoch": 1.3988745806730873, "grad_norm": 1.390625, "learning_rate": 1.0976938251001306e-05, "loss": 0.4675, "step": 8176 }, { "epoch": 1.3990477221079969, "grad_norm": 1.46875, "learning_rate": 1.0975123102745348e-05, "loss": 0.4395, "step": 8177 }, { "epoch": 1.3992208635429066, "grad_norm": 1.3125, "learning_rate": 1.0973307922052392e-05, "loss": 0.4319, "step": 8178 }, { "epoch": 1.3993940049778162, "grad_norm": 1.4765625, "learning_rate": 1.097149270898281e-05, "loss": 0.5041, "step": 8179 }, { "epoch": 1.399567146412726, "grad_norm": 1.34375, "learning_rate": 1.0969677463596988e-05, "loss": 0.4694, "step": 8180 }, { "epoch": 1.3997402878476355, "grad_norm": 1.5546875, "learning_rate": 1.096786218595531e-05, "loss": 0.4983, "step": 8181 }, { "epoch": 1.3999134292825453, "grad_norm": 1.453125, "learning_rate": 1.0966046876118162e-05, "loss": 0.5158, "step": 8182 }, { "epoch": 1.4000865707174548, "grad_norm": 1.3359375, "learning_rate": 1.0964231534145921e-05, "loss": 0.4626, "step": 8183 }, { "epoch": 1.4002597121523643, "grad_norm": 1.453125, "learning_rate": 1.0962416160098984e-05, "loss": 0.5047, "step": 8184 }, { "epoch": 1.400432853587274, "grad_norm": 1.359375, "learning_rate": 1.0960600754037732e-05, "loss": 0.5023, "step": 8185 }, { "epoch": 1.4006059950221839, "grad_norm": 1.3515625, "learning_rate": 1.0958785316022551e-05, "loss": 0.4414, "step": 8186 }, { "epoch": 1.4007791364570934, "grad_norm": 1.421875, "learning_rate": 1.0956969846113842e-05, "loss": 0.4317, "step": 8187 }, { "epoch": 1.400952277892003, "grad_norm": 1.375, "learning_rate": 1.0955154344371986e-05, "loss": 0.4519, "step": 8188 }, { "epoch": 1.4011254193269127, "grad_norm": 1.515625, "learning_rate": 1.0953338810857378e-05, "loss": 0.4865, "step": 8189 }, { "epoch": 1.4012985607618222, "grad_norm": 1.4296875, "learning_rate": 1.0951523245630411e-05, "loss": 0.4548, "step": 8190 }, { "epoch": 1.401471702196732, "grad_norm": 1.4140625, "learning_rate": 1.0949707648751473e-05, "loss": 0.5001, "step": 8191 }, { "epoch": 1.4016448436316415, "grad_norm": 1.5, "learning_rate": 1.094789202028097e-05, "loss": 0.4697, "step": 8192 }, { "epoch": 1.4018179850665513, "grad_norm": 1.296875, "learning_rate": 1.0946076360279292e-05, "loss": 0.4057, "step": 8193 }, { "epoch": 1.4019911265014608, "grad_norm": 1.4296875, "learning_rate": 1.0944260668806836e-05, "loss": 0.489, "step": 8194 }, { "epoch": 1.4021642679363704, "grad_norm": 1.515625, "learning_rate": 1.0942444945924e-05, "loss": 0.4697, "step": 8195 }, { "epoch": 1.4023374093712802, "grad_norm": 1.3984375, "learning_rate": 1.0940629191691184e-05, "loss": 0.4174, "step": 8196 }, { "epoch": 1.40251055080619, "grad_norm": 1.546875, "learning_rate": 1.0938813406168786e-05, "loss": 0.4707, "step": 8197 }, { "epoch": 1.4026836922410995, "grad_norm": 1.6953125, "learning_rate": 1.0936997589417211e-05, "loss": 0.4696, "step": 8198 }, { "epoch": 1.402856833676009, "grad_norm": 1.3984375, "learning_rate": 1.0935181741496858e-05, "loss": 0.4358, "step": 8199 }, { "epoch": 1.4030299751109188, "grad_norm": 1.4375, "learning_rate": 1.0933365862468132e-05, "loss": 0.5677, "step": 8200 }, { "epoch": 1.4032031165458283, "grad_norm": 1.4140625, "learning_rate": 1.0931549952391438e-05, "loss": 0.4195, "step": 8201 }, { "epoch": 1.403376257980738, "grad_norm": 1.453125, "learning_rate": 1.092973401132718e-05, "loss": 0.4224, "step": 8202 }, { "epoch": 1.4035493994156476, "grad_norm": 1.46875, "learning_rate": 1.0927918039335765e-05, "loss": 0.4949, "step": 8203 }, { "epoch": 1.4037225408505574, "grad_norm": 1.5234375, "learning_rate": 1.0926102036477602e-05, "loss": 0.5205, "step": 8204 }, { "epoch": 1.403895682285467, "grad_norm": 1.3984375, "learning_rate": 1.0924286002813096e-05, "loss": 0.4377, "step": 8205 }, { "epoch": 1.4040688237203764, "grad_norm": 1.6015625, "learning_rate": 1.0922469938402658e-05, "loss": 0.5902, "step": 8206 }, { "epoch": 1.4042419651552862, "grad_norm": 1.4453125, "learning_rate": 1.0920653843306697e-05, "loss": 0.4458, "step": 8207 }, { "epoch": 1.404415106590196, "grad_norm": 1.3984375, "learning_rate": 1.0918837717585629e-05, "loss": 0.5435, "step": 8208 }, { "epoch": 1.4045882480251055, "grad_norm": 1.3828125, "learning_rate": 1.0917021561299864e-05, "loss": 0.4413, "step": 8209 }, { "epoch": 1.404761389460015, "grad_norm": 1.4375, "learning_rate": 1.0915205374509813e-05, "loss": 0.5529, "step": 8210 }, { "epoch": 1.4049345308949248, "grad_norm": 1.4765625, "learning_rate": 1.0913389157275895e-05, "loss": 0.4557, "step": 8211 }, { "epoch": 1.4051076723298344, "grad_norm": 1.4609375, "learning_rate": 1.0911572909658524e-05, "loss": 0.4318, "step": 8212 }, { "epoch": 1.4052808137647441, "grad_norm": 1.4296875, "learning_rate": 1.0909756631718114e-05, "loss": 0.4498, "step": 8213 }, { "epoch": 1.4054539551996537, "grad_norm": 1.40625, "learning_rate": 1.0907940323515085e-05, "loss": 0.4382, "step": 8214 }, { "epoch": 1.4056270966345634, "grad_norm": 1.3203125, "learning_rate": 1.0906123985109855e-05, "loss": 0.4556, "step": 8215 }, { "epoch": 1.405800238069473, "grad_norm": 1.4296875, "learning_rate": 1.0904307616562849e-05, "loss": 0.4334, "step": 8216 }, { "epoch": 1.4059733795043827, "grad_norm": 1.40625, "learning_rate": 1.0902491217934477e-05, "loss": 0.4608, "step": 8217 }, { "epoch": 1.4061465209392923, "grad_norm": 1.421875, "learning_rate": 1.0900674789285168e-05, "loss": 0.4194, "step": 8218 }, { "epoch": 1.406319662374202, "grad_norm": 1.25, "learning_rate": 1.0898858330675342e-05, "loss": 0.4139, "step": 8219 }, { "epoch": 1.4064928038091116, "grad_norm": 1.46875, "learning_rate": 1.0897041842165426e-05, "loss": 0.5187, "step": 8220 }, { "epoch": 1.406665945244021, "grad_norm": 1.3046875, "learning_rate": 1.0895225323815841e-05, "loss": 0.4689, "step": 8221 }, { "epoch": 1.4068390866789309, "grad_norm": 1.4609375, "learning_rate": 1.0893408775687013e-05, "loss": 0.4966, "step": 8222 }, { "epoch": 1.4070122281138404, "grad_norm": 1.4296875, "learning_rate": 1.0891592197839369e-05, "loss": 0.4352, "step": 8223 }, { "epoch": 1.4071853695487502, "grad_norm": 1.3984375, "learning_rate": 1.0889775590333338e-05, "loss": 0.5207, "step": 8224 }, { "epoch": 1.4073585109836597, "grad_norm": 1.515625, "learning_rate": 1.0887958953229349e-05, "loss": 0.5331, "step": 8225 }, { "epoch": 1.4075316524185695, "grad_norm": 1.5078125, "learning_rate": 1.0886142286587828e-05, "loss": 0.5465, "step": 8226 }, { "epoch": 1.407704793853479, "grad_norm": 1.34375, "learning_rate": 1.0884325590469209e-05, "loss": 0.4224, "step": 8227 }, { "epoch": 1.4078779352883888, "grad_norm": 1.4375, "learning_rate": 1.0882508864933925e-05, "loss": 0.4672, "step": 8228 }, { "epoch": 1.4080510767232983, "grad_norm": 1.375, "learning_rate": 1.08806921100424e-05, "loss": 0.4519, "step": 8229 }, { "epoch": 1.408224218158208, "grad_norm": 1.421875, "learning_rate": 1.0878875325855076e-05, "loss": 0.513, "step": 8230 }, { "epoch": 1.4083973595931176, "grad_norm": 1.4296875, "learning_rate": 1.0877058512432383e-05, "loss": 0.4364, "step": 8231 }, { "epoch": 1.4085705010280272, "grad_norm": 1.359375, "learning_rate": 1.0875241669834762e-05, "loss": 0.4503, "step": 8232 }, { "epoch": 1.408743642462937, "grad_norm": 1.3828125, "learning_rate": 1.0873424798122643e-05, "loss": 0.436, "step": 8233 }, { "epoch": 1.4089167838978467, "grad_norm": 1.5078125, "learning_rate": 1.0871607897356464e-05, "loss": 0.4346, "step": 8234 }, { "epoch": 1.4090899253327562, "grad_norm": 1.46875, "learning_rate": 1.0869790967596667e-05, "loss": 0.5078, "step": 8235 }, { "epoch": 1.4092630667676658, "grad_norm": 1.40625, "learning_rate": 1.086797400890369e-05, "loss": 0.431, "step": 8236 }, { "epoch": 1.4094362082025755, "grad_norm": 1.3125, "learning_rate": 1.0866157021337973e-05, "loss": 0.4687, "step": 8237 }, { "epoch": 1.409609349637485, "grad_norm": 1.40625, "learning_rate": 1.0864340004959957e-05, "loss": 0.5005, "step": 8238 }, { "epoch": 1.4097824910723948, "grad_norm": 1.28125, "learning_rate": 1.0862522959830082e-05, "loss": 0.4364, "step": 8239 }, { "epoch": 1.4099556325073044, "grad_norm": 1.4765625, "learning_rate": 1.0860705886008795e-05, "loss": 0.4943, "step": 8240 }, { "epoch": 1.4101287739422141, "grad_norm": 1.4140625, "learning_rate": 1.0858888783556538e-05, "loss": 0.407, "step": 8241 }, { "epoch": 1.4103019153771237, "grad_norm": 1.515625, "learning_rate": 1.0857071652533758e-05, "loss": 0.4944, "step": 8242 }, { "epoch": 1.4104750568120332, "grad_norm": 1.4453125, "learning_rate": 1.0855254493000897e-05, "loss": 0.5291, "step": 8243 }, { "epoch": 1.410648198246943, "grad_norm": 1.515625, "learning_rate": 1.0853437305018409e-05, "loss": 0.4565, "step": 8244 }, { "epoch": 1.4108213396818527, "grad_norm": 1.3046875, "learning_rate": 1.0851620088646729e-05, "loss": 0.4647, "step": 8245 }, { "epoch": 1.4109944811167623, "grad_norm": 1.375, "learning_rate": 1.084980284394632e-05, "loss": 0.4591, "step": 8246 }, { "epoch": 1.4111676225516718, "grad_norm": 1.453125, "learning_rate": 1.0847985570977624e-05, "loss": 0.4784, "step": 8247 }, { "epoch": 1.4113407639865816, "grad_norm": 1.4375, "learning_rate": 1.0846168269801094e-05, "loss": 0.4892, "step": 8248 }, { "epoch": 1.4115139054214911, "grad_norm": 1.71875, "learning_rate": 1.0844350940477184e-05, "loss": 0.5193, "step": 8249 }, { "epoch": 1.411687046856401, "grad_norm": 1.3203125, "learning_rate": 1.0842533583066339e-05, "loss": 0.4686, "step": 8250 }, { "epoch": 1.4118601882913104, "grad_norm": 1.2890625, "learning_rate": 1.084071619762902e-05, "loss": 0.3961, "step": 8251 }, { "epoch": 1.4120333297262202, "grad_norm": 1.359375, "learning_rate": 1.0838898784225678e-05, "loss": 0.4659, "step": 8252 }, { "epoch": 1.4122064711611297, "grad_norm": 1.59375, "learning_rate": 1.0837081342916769e-05, "loss": 0.5309, "step": 8253 }, { "epoch": 1.4123796125960393, "grad_norm": 1.359375, "learning_rate": 1.0835263873762754e-05, "loss": 0.4838, "step": 8254 }, { "epoch": 1.412552754030949, "grad_norm": 1.421875, "learning_rate": 1.083344637682408e-05, "loss": 0.487, "step": 8255 }, { "epoch": 1.4127258954658588, "grad_norm": 1.3828125, "learning_rate": 1.0831628852161213e-05, "loss": 0.3898, "step": 8256 }, { "epoch": 1.4128990369007683, "grad_norm": 1.5546875, "learning_rate": 1.082981129983461e-05, "loss": 0.4371, "step": 8257 }, { "epoch": 1.4130721783356779, "grad_norm": 1.4765625, "learning_rate": 1.0827993719904733e-05, "loss": 0.4249, "step": 8258 }, { "epoch": 1.4132453197705876, "grad_norm": 1.3359375, "learning_rate": 1.0826176112432037e-05, "loss": 0.459, "step": 8259 }, { "epoch": 1.4134184612054972, "grad_norm": 1.2578125, "learning_rate": 1.0824358477476988e-05, "loss": 0.3797, "step": 8260 }, { "epoch": 1.413591602640407, "grad_norm": 1.4140625, "learning_rate": 1.082254081510005e-05, "loss": 0.4171, "step": 8261 }, { "epoch": 1.4137647440753165, "grad_norm": 1.421875, "learning_rate": 1.0820723125361685e-05, "loss": 0.4501, "step": 8262 }, { "epoch": 1.4139378855102263, "grad_norm": 1.40625, "learning_rate": 1.081890540832236e-05, "loss": 0.4367, "step": 8263 }, { "epoch": 1.4141110269451358, "grad_norm": 1.6015625, "learning_rate": 1.0817087664042536e-05, "loss": 0.5247, "step": 8264 }, { "epoch": 1.4142841683800453, "grad_norm": 1.46875, "learning_rate": 1.0815269892582683e-05, "loss": 0.44, "step": 8265 }, { "epoch": 1.414457309814955, "grad_norm": 1.390625, "learning_rate": 1.0813452094003267e-05, "loss": 0.4367, "step": 8266 }, { "epoch": 1.4146304512498649, "grad_norm": 1.375, "learning_rate": 1.0811634268364752e-05, "loss": 0.4892, "step": 8267 }, { "epoch": 1.4148035926847744, "grad_norm": 1.4140625, "learning_rate": 1.0809816415727616e-05, "loss": 0.4427, "step": 8268 }, { "epoch": 1.414976734119684, "grad_norm": 1.484375, "learning_rate": 1.0807998536152325e-05, "loss": 0.509, "step": 8269 }, { "epoch": 1.4151498755545937, "grad_norm": 1.515625, "learning_rate": 1.0806180629699348e-05, "loss": 0.462, "step": 8270 }, { "epoch": 1.4153230169895032, "grad_norm": 1.3671875, "learning_rate": 1.0804362696429156e-05, "loss": 0.449, "step": 8271 }, { "epoch": 1.415496158424413, "grad_norm": 1.453125, "learning_rate": 1.0802544736402224e-05, "loss": 0.572, "step": 8272 }, { "epoch": 1.4156692998593225, "grad_norm": 1.390625, "learning_rate": 1.0800726749679025e-05, "loss": 0.4189, "step": 8273 }, { "epoch": 1.4158424412942323, "grad_norm": 1.2890625, "learning_rate": 1.0798908736320035e-05, "loss": 0.4009, "step": 8274 }, { "epoch": 1.4160155827291419, "grad_norm": 1.3828125, "learning_rate": 1.0797090696385728e-05, "loss": 0.4364, "step": 8275 }, { "epoch": 1.4161887241640514, "grad_norm": 1.4296875, "learning_rate": 1.079527262993658e-05, "loss": 0.5517, "step": 8276 }, { "epoch": 1.4163618655989612, "grad_norm": 1.390625, "learning_rate": 1.0793454537033068e-05, "loss": 0.4311, "step": 8277 }, { "epoch": 1.416535007033871, "grad_norm": 1.4609375, "learning_rate": 1.0791636417735669e-05, "loss": 0.5227, "step": 8278 }, { "epoch": 1.4167081484687805, "grad_norm": 1.4453125, "learning_rate": 1.0789818272104866e-05, "loss": 0.4913, "step": 8279 }, { "epoch": 1.41688128990369, "grad_norm": 1.375, "learning_rate": 1.0788000100201135e-05, "loss": 0.4704, "step": 8280 }, { "epoch": 1.4170544313385998, "grad_norm": 1.484375, "learning_rate": 1.0786181902084957e-05, "loss": 0.5802, "step": 8281 }, { "epoch": 1.4172275727735093, "grad_norm": 1.34375, "learning_rate": 1.0784363677816817e-05, "loss": 0.4339, "step": 8282 }, { "epoch": 1.417400714208419, "grad_norm": 1.5078125, "learning_rate": 1.078254542745719e-05, "loss": 0.4602, "step": 8283 }, { "epoch": 1.4175738556433286, "grad_norm": 1.4296875, "learning_rate": 1.0780727151066565e-05, "loss": 0.4395, "step": 8284 }, { "epoch": 1.4177469970782384, "grad_norm": 1.4921875, "learning_rate": 1.0778908848705425e-05, "loss": 0.5206, "step": 8285 }, { "epoch": 1.417920138513148, "grad_norm": 1.3515625, "learning_rate": 1.0777090520434254e-05, "loss": 0.4653, "step": 8286 }, { "epoch": 1.4180932799480575, "grad_norm": 1.6640625, "learning_rate": 1.0775272166313542e-05, "loss": 0.517, "step": 8287 }, { "epoch": 1.4182664213829672, "grad_norm": 1.6015625, "learning_rate": 1.0773453786403766e-05, "loss": 0.4898, "step": 8288 }, { "epoch": 1.418439562817877, "grad_norm": 1.359375, "learning_rate": 1.0771635380765426e-05, "loss": 0.4735, "step": 8289 }, { "epoch": 1.4186127042527865, "grad_norm": 1.5546875, "learning_rate": 1.0769816949459002e-05, "loss": 0.4399, "step": 8290 }, { "epoch": 1.418785845687696, "grad_norm": 1.421875, "learning_rate": 1.0767998492544984e-05, "loss": 0.4391, "step": 8291 }, { "epoch": 1.4189589871226058, "grad_norm": 1.515625, "learning_rate": 1.0766180010083867e-05, "loss": 0.5011, "step": 8292 }, { "epoch": 1.4191321285575154, "grad_norm": 1.375, "learning_rate": 1.0764361502136134e-05, "loss": 0.4678, "step": 8293 }, { "epoch": 1.4193052699924251, "grad_norm": 1.453125, "learning_rate": 1.0762542968762282e-05, "loss": 0.4738, "step": 8294 }, { "epoch": 1.4194784114273347, "grad_norm": 1.5390625, "learning_rate": 1.0760724410022804e-05, "loss": 0.47, "step": 8295 }, { "epoch": 1.4196515528622444, "grad_norm": 1.3828125, "learning_rate": 1.0758905825978195e-05, "loss": 0.4283, "step": 8296 }, { "epoch": 1.419824694297154, "grad_norm": 1.3359375, "learning_rate": 1.0757087216688945e-05, "loss": 0.4491, "step": 8297 }, { "epoch": 1.4199978357320635, "grad_norm": 1.359375, "learning_rate": 1.0755268582215548e-05, "loss": 0.4946, "step": 8298 }, { "epoch": 1.4201709771669733, "grad_norm": 1.421875, "learning_rate": 1.0753449922618503e-05, "loss": 0.4612, "step": 8299 }, { "epoch": 1.420344118601883, "grad_norm": 1.5078125, "learning_rate": 1.075163123795831e-05, "loss": 0.4472, "step": 8300 }, { "epoch": 1.4205172600367926, "grad_norm": 1.484375, "learning_rate": 1.0749812528295462e-05, "loss": 0.4686, "step": 8301 }, { "epoch": 1.4206904014717021, "grad_norm": 1.421875, "learning_rate": 1.0747993793690458e-05, "loss": 0.4921, "step": 8302 }, { "epoch": 1.4208635429066119, "grad_norm": 1.34375, "learning_rate": 1.0746175034203799e-05, "loss": 0.4377, "step": 8303 }, { "epoch": 1.4210366843415214, "grad_norm": 1.5859375, "learning_rate": 1.0744356249895986e-05, "loss": 0.462, "step": 8304 }, { "epoch": 1.4212098257764312, "grad_norm": 1.421875, "learning_rate": 1.0742537440827513e-05, "loss": 0.5059, "step": 8305 }, { "epoch": 1.4213829672113407, "grad_norm": 1.3359375, "learning_rate": 1.0740718607058896e-05, "loss": 0.434, "step": 8306 }, { "epoch": 1.4215561086462505, "grad_norm": 1.3359375, "learning_rate": 1.0738899748650621e-05, "loss": 0.4192, "step": 8307 }, { "epoch": 1.42172925008116, "grad_norm": 1.390625, "learning_rate": 1.0737080865663206e-05, "loss": 0.5323, "step": 8308 }, { "epoch": 1.4219023915160696, "grad_norm": 1.375, "learning_rate": 1.0735261958157143e-05, "loss": 0.4878, "step": 8309 }, { "epoch": 1.4220755329509793, "grad_norm": 1.4765625, "learning_rate": 1.0733443026192944e-05, "loss": 0.4524, "step": 8310 }, { "epoch": 1.422248674385889, "grad_norm": 1.3515625, "learning_rate": 1.0731624069831112e-05, "loss": 0.4638, "step": 8311 }, { "epoch": 1.4224218158207986, "grad_norm": 1.4765625, "learning_rate": 1.0729805089132158e-05, "loss": 0.4619, "step": 8312 }, { "epoch": 1.4225949572557082, "grad_norm": 1.5234375, "learning_rate": 1.0727986084156587e-05, "loss": 0.4492, "step": 8313 }, { "epoch": 1.422768098690618, "grad_norm": 1.5625, "learning_rate": 1.0726167054964907e-05, "loss": 0.533, "step": 8314 }, { "epoch": 1.4229412401255275, "grad_norm": 1.4609375, "learning_rate": 1.0724348001617626e-05, "loss": 0.4448, "step": 8315 }, { "epoch": 1.4231143815604372, "grad_norm": 1.3984375, "learning_rate": 1.0722528924175254e-05, "loss": 0.4391, "step": 8316 }, { "epoch": 1.4232875229953468, "grad_norm": 1.40625, "learning_rate": 1.0720709822698302e-05, "loss": 0.4966, "step": 8317 }, { "epoch": 1.4234606644302565, "grad_norm": 1.390625, "learning_rate": 1.071889069724729e-05, "loss": 0.4315, "step": 8318 }, { "epoch": 1.423633805865166, "grad_norm": 1.4296875, "learning_rate": 1.0717071547882716e-05, "loss": 0.4749, "step": 8319 }, { "epoch": 1.4238069473000756, "grad_norm": 1.546875, "learning_rate": 1.0715252374665105e-05, "loss": 0.4799, "step": 8320 }, { "epoch": 1.4239800887349854, "grad_norm": 1.3828125, "learning_rate": 1.0713433177654957e-05, "loss": 0.44, "step": 8321 }, { "epoch": 1.4241532301698951, "grad_norm": 1.453125, "learning_rate": 1.0711613956912804e-05, "loss": 0.4919, "step": 8322 }, { "epoch": 1.4243263716048047, "grad_norm": 1.671875, "learning_rate": 1.070979471249915e-05, "loss": 0.5149, "step": 8323 }, { "epoch": 1.4244995130397142, "grad_norm": 1.5859375, "learning_rate": 1.0707975444474515e-05, "loss": 0.5123, "step": 8324 }, { "epoch": 1.424672654474624, "grad_norm": 1.5078125, "learning_rate": 1.0706156152899415e-05, "loss": 0.42, "step": 8325 }, { "epoch": 1.4248457959095335, "grad_norm": 1.4453125, "learning_rate": 1.0704336837834366e-05, "loss": 0.542, "step": 8326 }, { "epoch": 1.4250189373444433, "grad_norm": 1.5, "learning_rate": 1.0702517499339893e-05, "loss": 0.5859, "step": 8327 }, { "epoch": 1.4251920787793528, "grad_norm": 1.3984375, "learning_rate": 1.070069813747651e-05, "loss": 0.534, "step": 8328 }, { "epoch": 1.4253652202142626, "grad_norm": 1.453125, "learning_rate": 1.0698878752304738e-05, "loss": 0.4787, "step": 8329 }, { "epoch": 1.4255383616491721, "grad_norm": 1.421875, "learning_rate": 1.0697059343885101e-05, "loss": 0.4305, "step": 8330 }, { "epoch": 1.4257115030840817, "grad_norm": 1.34375, "learning_rate": 1.0695239912278117e-05, "loss": 0.4368, "step": 8331 }, { "epoch": 1.4258846445189914, "grad_norm": 1.7109375, "learning_rate": 1.0693420457544308e-05, "loss": 0.6162, "step": 8332 }, { "epoch": 1.4260577859539012, "grad_norm": 1.3984375, "learning_rate": 1.0691600979744199e-05, "loss": 0.4415, "step": 8333 }, { "epoch": 1.4262309273888107, "grad_norm": 1.46875, "learning_rate": 1.0689781478938317e-05, "loss": 0.5002, "step": 8334 }, { "epoch": 1.4264040688237203, "grad_norm": 1.34375, "learning_rate": 1.0687961955187183e-05, "loss": 0.4171, "step": 8335 }, { "epoch": 1.42657721025863, "grad_norm": 1.265625, "learning_rate": 1.0686142408551323e-05, "loss": 0.4112, "step": 8336 }, { "epoch": 1.4267503516935396, "grad_norm": 1.421875, "learning_rate": 1.0684322839091263e-05, "loss": 0.5083, "step": 8337 }, { "epoch": 1.4269234931284493, "grad_norm": 1.46875, "learning_rate": 1.0682503246867532e-05, "loss": 0.4917, "step": 8338 }, { "epoch": 1.427096634563359, "grad_norm": 1.2578125, "learning_rate": 1.0680683631940661e-05, "loss": 0.4836, "step": 8339 }, { "epoch": 1.4272697759982687, "grad_norm": 1.40625, "learning_rate": 1.067886399437117e-05, "loss": 0.4975, "step": 8340 }, { "epoch": 1.4274429174331782, "grad_norm": 1.53125, "learning_rate": 1.0677044334219592e-05, "loss": 0.4698, "step": 8341 }, { "epoch": 1.4276160588680877, "grad_norm": 1.2890625, "learning_rate": 1.0675224651546459e-05, "loss": 0.3654, "step": 8342 }, { "epoch": 1.4277892003029975, "grad_norm": 1.46875, "learning_rate": 1.0673404946412302e-05, "loss": 0.4799, "step": 8343 }, { "epoch": 1.4279623417379073, "grad_norm": 1.3984375, "learning_rate": 1.0671585218877654e-05, "loss": 0.4479, "step": 8344 }, { "epoch": 1.4281354831728168, "grad_norm": 1.4765625, "learning_rate": 1.0669765469003042e-05, "loss": 0.4866, "step": 8345 }, { "epoch": 1.4283086246077263, "grad_norm": 1.453125, "learning_rate": 1.0667945696849003e-05, "loss": 0.5911, "step": 8346 }, { "epoch": 1.428481766042636, "grad_norm": 1.4296875, "learning_rate": 1.0666125902476068e-05, "loss": 0.5193, "step": 8347 }, { "epoch": 1.4286549074775456, "grad_norm": 1.4296875, "learning_rate": 1.0664306085944776e-05, "loss": 0.469, "step": 8348 }, { "epoch": 1.4288280489124554, "grad_norm": 1.3359375, "learning_rate": 1.0662486247315658e-05, "loss": 0.4189, "step": 8349 }, { "epoch": 1.429001190347365, "grad_norm": 1.3515625, "learning_rate": 1.066066638664925e-05, "loss": 0.4556, "step": 8350 }, { "epoch": 1.4291743317822747, "grad_norm": 1.6171875, "learning_rate": 1.0658846504006096e-05, "loss": 0.5914, "step": 8351 }, { "epoch": 1.4293474732171843, "grad_norm": 1.4296875, "learning_rate": 1.0657026599446725e-05, "loss": 0.4564, "step": 8352 }, { "epoch": 1.4295206146520938, "grad_norm": 1.59375, "learning_rate": 1.0655206673031677e-05, "loss": 0.4994, "step": 8353 }, { "epoch": 1.4296937560870036, "grad_norm": 1.4765625, "learning_rate": 1.0653386724821492e-05, "loss": 0.5019, "step": 8354 }, { "epoch": 1.4298668975219133, "grad_norm": 2.09375, "learning_rate": 1.0651566754876715e-05, "loss": 0.4852, "step": 8355 }, { "epoch": 1.4300400389568229, "grad_norm": 1.4765625, "learning_rate": 1.064974676325788e-05, "loss": 0.4714, "step": 8356 }, { "epoch": 1.4302131803917324, "grad_norm": 1.3984375, "learning_rate": 1.0647926750025528e-05, "loss": 0.4701, "step": 8357 }, { "epoch": 1.4303863218266422, "grad_norm": 1.28125, "learning_rate": 1.0646106715240204e-05, "loss": 0.4467, "step": 8358 }, { "epoch": 1.4305594632615517, "grad_norm": 1.5, "learning_rate": 1.0644286658962447e-05, "loss": 0.4391, "step": 8359 }, { "epoch": 1.4307326046964615, "grad_norm": 1.265625, "learning_rate": 1.0642466581252805e-05, "loss": 0.4466, "step": 8360 }, { "epoch": 1.430905746131371, "grad_norm": 1.34375, "learning_rate": 1.0640646482171816e-05, "loss": 0.4862, "step": 8361 }, { "epoch": 1.4310788875662808, "grad_norm": 1.34375, "learning_rate": 1.063882636178003e-05, "loss": 0.4595, "step": 8362 }, { "epoch": 1.4312520290011903, "grad_norm": 1.453125, "learning_rate": 1.0637006220137993e-05, "loss": 0.497, "step": 8363 }, { "epoch": 1.4314251704361, "grad_norm": 1.421875, "learning_rate": 1.0635186057306244e-05, "loss": 0.48, "step": 8364 }, { "epoch": 1.4315983118710096, "grad_norm": 1.3984375, "learning_rate": 1.0633365873345338e-05, "loss": 0.5026, "step": 8365 }, { "epoch": 1.4317714533059194, "grad_norm": 1.34375, "learning_rate": 1.0631545668315819e-05, "loss": 0.4389, "step": 8366 }, { "epoch": 1.431944594740829, "grad_norm": 1.3828125, "learning_rate": 1.0629725442278234e-05, "loss": 0.4904, "step": 8367 }, { "epoch": 1.4321177361757385, "grad_norm": 1.5, "learning_rate": 1.0627905195293135e-05, "loss": 0.5002, "step": 8368 }, { "epoch": 1.4322908776106482, "grad_norm": 1.3828125, "learning_rate": 1.0626084927421068e-05, "loss": 0.4543, "step": 8369 }, { "epoch": 1.4324640190455578, "grad_norm": 1.296875, "learning_rate": 1.0624264638722588e-05, "loss": 0.446, "step": 8370 }, { "epoch": 1.4326371604804675, "grad_norm": 1.375, "learning_rate": 1.0622444329258241e-05, "loss": 0.5229, "step": 8371 }, { "epoch": 1.432810301915377, "grad_norm": 1.4296875, "learning_rate": 1.0620623999088584e-05, "loss": 0.4839, "step": 8372 }, { "epoch": 1.4329834433502868, "grad_norm": 1.5390625, "learning_rate": 1.0618803648274165e-05, "loss": 0.4544, "step": 8373 }, { "epoch": 1.4331565847851964, "grad_norm": 1.4921875, "learning_rate": 1.0616983276875538e-05, "loss": 0.5438, "step": 8374 }, { "epoch": 1.4333297262201061, "grad_norm": 1.390625, "learning_rate": 1.0615162884953256e-05, "loss": 0.3975, "step": 8375 }, { "epoch": 1.4335028676550157, "grad_norm": 1.53125, "learning_rate": 1.0613342472567878e-05, "loss": 0.4442, "step": 8376 }, { "epoch": 1.4336760090899254, "grad_norm": 1.3515625, "learning_rate": 1.0611522039779956e-05, "loss": 0.4753, "step": 8377 }, { "epoch": 1.433849150524835, "grad_norm": 1.4921875, "learning_rate": 1.0609701586650043e-05, "loss": 0.5197, "step": 8378 }, { "epoch": 1.4340222919597445, "grad_norm": 1.453125, "learning_rate": 1.06078811132387e-05, "loss": 0.4612, "step": 8379 }, { "epoch": 1.4341954333946543, "grad_norm": 1.359375, "learning_rate": 1.0606060619606482e-05, "loss": 0.4565, "step": 8380 }, { "epoch": 1.434368574829564, "grad_norm": 1.3515625, "learning_rate": 1.0604240105813948e-05, "loss": 0.4949, "step": 8381 }, { "epoch": 1.4345417162644736, "grad_norm": 1.3671875, "learning_rate": 1.0602419571921656e-05, "loss": 0.433, "step": 8382 }, { "epoch": 1.4347148576993831, "grad_norm": 1.3359375, "learning_rate": 1.0600599017990165e-05, "loss": 0.4541, "step": 8383 }, { "epoch": 1.4348879991342929, "grad_norm": 1.3671875, "learning_rate": 1.0598778444080036e-05, "loss": 0.5132, "step": 8384 }, { "epoch": 1.4350611405692024, "grad_norm": 1.3671875, "learning_rate": 1.0596957850251826e-05, "loss": 0.4716, "step": 8385 }, { "epoch": 1.4352342820041122, "grad_norm": 1.421875, "learning_rate": 1.05951372365661e-05, "loss": 0.3931, "step": 8386 }, { "epoch": 1.4354074234390217, "grad_norm": 1.421875, "learning_rate": 1.0593316603083416e-05, "loss": 0.4898, "step": 8387 }, { "epoch": 1.4355805648739315, "grad_norm": 1.515625, "learning_rate": 1.0591495949864343e-05, "loss": 0.4725, "step": 8388 }, { "epoch": 1.435753706308841, "grad_norm": 1.4140625, "learning_rate": 1.058967527696944e-05, "loss": 0.492, "step": 8389 }, { "epoch": 1.4359268477437506, "grad_norm": 1.4453125, "learning_rate": 1.0587854584459268e-05, "loss": 0.4519, "step": 8390 }, { "epoch": 1.4360999891786603, "grad_norm": 1.46875, "learning_rate": 1.0586033872394395e-05, "loss": 0.4851, "step": 8391 }, { "epoch": 1.43627313061357, "grad_norm": 1.3828125, "learning_rate": 1.0584213140835387e-05, "loss": 0.467, "step": 8392 }, { "epoch": 1.4364462720484796, "grad_norm": 1.4765625, "learning_rate": 1.0582392389842806e-05, "loss": 0.4648, "step": 8393 }, { "epoch": 1.4366194134833892, "grad_norm": 1.4375, "learning_rate": 1.0580571619477225e-05, "loss": 0.5407, "step": 8394 }, { "epoch": 1.436792554918299, "grad_norm": 1.296875, "learning_rate": 1.0578750829799202e-05, "loss": 0.4528, "step": 8395 }, { "epoch": 1.4369656963532085, "grad_norm": 1.5078125, "learning_rate": 1.0576930020869314e-05, "loss": 0.4917, "step": 8396 }, { "epoch": 1.4371388377881182, "grad_norm": 1.4921875, "learning_rate": 1.0575109192748122e-05, "loss": 0.4524, "step": 8397 }, { "epoch": 1.4373119792230278, "grad_norm": 1.375, "learning_rate": 1.0573288345496197e-05, "loss": 0.4829, "step": 8398 }, { "epoch": 1.4374851206579375, "grad_norm": 1.3125, "learning_rate": 1.057146747917411e-05, "loss": 0.4292, "step": 8399 }, { "epoch": 1.437658262092847, "grad_norm": 1.359375, "learning_rate": 1.0569646593842433e-05, "loss": 0.4489, "step": 8400 }, { "epoch": 1.4378314035277566, "grad_norm": 1.390625, "learning_rate": 1.0567825689561735e-05, "loss": 0.4636, "step": 8401 }, { "epoch": 1.4380045449626664, "grad_norm": 1.5390625, "learning_rate": 1.0566004766392581e-05, "loss": 0.4955, "step": 8402 }, { "epoch": 1.4381776863975761, "grad_norm": 1.4609375, "learning_rate": 1.0564183824395554e-05, "loss": 0.5031, "step": 8403 }, { "epoch": 1.4383508278324857, "grad_norm": 1.4140625, "learning_rate": 1.0562362863631222e-05, "loss": 0.4357, "step": 8404 }, { "epoch": 1.4385239692673952, "grad_norm": 1.390625, "learning_rate": 1.0560541884160155e-05, "loss": 0.4241, "step": 8405 }, { "epoch": 1.438697110702305, "grad_norm": 1.421875, "learning_rate": 1.0558720886042935e-05, "loss": 0.4282, "step": 8406 }, { "epoch": 1.4388702521372145, "grad_norm": 1.390625, "learning_rate": 1.0556899869340127e-05, "loss": 0.4541, "step": 8407 }, { "epoch": 1.4390433935721243, "grad_norm": 1.4609375, "learning_rate": 1.0555078834112315e-05, "loss": 0.5235, "step": 8408 }, { "epoch": 1.4392165350070338, "grad_norm": 1.4375, "learning_rate": 1.0553257780420069e-05, "loss": 0.4604, "step": 8409 }, { "epoch": 1.4393896764419436, "grad_norm": 1.3203125, "learning_rate": 1.0551436708323972e-05, "loss": 0.4786, "step": 8410 }, { "epoch": 1.4395628178768531, "grad_norm": 1.3984375, "learning_rate": 1.0549615617884593e-05, "loss": 0.5514, "step": 8411 }, { "epoch": 1.4397359593117627, "grad_norm": 1.21875, "learning_rate": 1.0547794509162512e-05, "loss": 0.4257, "step": 8412 }, { "epoch": 1.4399091007466724, "grad_norm": 1.390625, "learning_rate": 1.0545973382218308e-05, "loss": 0.4623, "step": 8413 }, { "epoch": 1.4400822421815822, "grad_norm": 1.4296875, "learning_rate": 1.0544152237112564e-05, "loss": 0.4774, "step": 8414 }, { "epoch": 1.4402553836164917, "grad_norm": 1.4140625, "learning_rate": 1.0542331073905856e-05, "loss": 0.4093, "step": 8415 }, { "epoch": 1.4404285250514013, "grad_norm": 1.3515625, "learning_rate": 1.0540509892658763e-05, "loss": 0.4529, "step": 8416 }, { "epoch": 1.440601666486311, "grad_norm": 1.3828125, "learning_rate": 1.0538688693431867e-05, "loss": 0.5184, "step": 8417 }, { "epoch": 1.4407748079212206, "grad_norm": 1.3515625, "learning_rate": 1.0536867476285747e-05, "loss": 0.4438, "step": 8418 }, { "epoch": 1.4409479493561304, "grad_norm": 1.453125, "learning_rate": 1.053504624128099e-05, "loss": 0.533, "step": 8419 }, { "epoch": 1.44112109079104, "grad_norm": 1.359375, "learning_rate": 1.0533224988478176e-05, "loss": 0.4735, "step": 8420 }, { "epoch": 1.4412942322259497, "grad_norm": 1.453125, "learning_rate": 1.0531403717937888e-05, "loss": 0.4663, "step": 8421 }, { "epoch": 1.4414673736608592, "grad_norm": 1.3203125, "learning_rate": 1.0529582429720711e-05, "loss": 0.4807, "step": 8422 }, { "epoch": 1.4416405150957687, "grad_norm": 1.421875, "learning_rate": 1.0527761123887223e-05, "loss": 0.4895, "step": 8423 }, { "epoch": 1.4418136565306785, "grad_norm": 1.453125, "learning_rate": 1.0525939800498018e-05, "loss": 0.5225, "step": 8424 }, { "epoch": 1.4419867979655883, "grad_norm": 1.40625, "learning_rate": 1.0524118459613673e-05, "loss": 0.4685, "step": 8425 }, { "epoch": 1.4421599394004978, "grad_norm": 1.359375, "learning_rate": 1.052229710129478e-05, "loss": 0.4439, "step": 8426 }, { "epoch": 1.4423330808354073, "grad_norm": 1.578125, "learning_rate": 1.0520475725601926e-05, "loss": 0.5411, "step": 8427 }, { "epoch": 1.442506222270317, "grad_norm": 1.3359375, "learning_rate": 1.0518654332595692e-05, "loss": 0.5151, "step": 8428 }, { "epoch": 1.4426793637052266, "grad_norm": 1.4296875, "learning_rate": 1.0516832922336673e-05, "loss": 0.472, "step": 8429 }, { "epoch": 1.4428525051401364, "grad_norm": 1.484375, "learning_rate": 1.0515011494885452e-05, "loss": 0.566, "step": 8430 }, { "epoch": 1.443025646575046, "grad_norm": 1.5625, "learning_rate": 1.0513190050302619e-05, "loss": 0.4934, "step": 8431 }, { "epoch": 1.4431987880099557, "grad_norm": 1.375, "learning_rate": 1.0511368588648767e-05, "loss": 0.5055, "step": 8432 }, { "epoch": 1.4433719294448653, "grad_norm": 1.4609375, "learning_rate": 1.0509547109984484e-05, "loss": 0.4618, "step": 8433 }, { "epoch": 1.4435450708797748, "grad_norm": 1.3125, "learning_rate": 1.0507725614370357e-05, "loss": 0.3979, "step": 8434 }, { "epoch": 1.4437182123146846, "grad_norm": 1.34375, "learning_rate": 1.0505904101866982e-05, "loss": 0.5186, "step": 8435 }, { "epoch": 1.4438913537495943, "grad_norm": 1.4453125, "learning_rate": 1.050408257253495e-05, "loss": 0.5133, "step": 8436 }, { "epoch": 1.4440644951845039, "grad_norm": 1.4296875, "learning_rate": 1.0502261026434852e-05, "loss": 0.4759, "step": 8437 }, { "epoch": 1.4442376366194134, "grad_norm": 1.3125, "learning_rate": 1.0500439463627279e-05, "loss": 0.4568, "step": 8438 }, { "epoch": 1.4444107780543232, "grad_norm": 1.3125, "learning_rate": 1.0498617884172831e-05, "loss": 0.4373, "step": 8439 }, { "epoch": 1.4445839194892327, "grad_norm": 1.3515625, "learning_rate": 1.0496796288132092e-05, "loss": 0.4836, "step": 8440 }, { "epoch": 1.4447570609241425, "grad_norm": 1.3125, "learning_rate": 1.0494974675565666e-05, "loss": 0.4272, "step": 8441 }, { "epoch": 1.444930202359052, "grad_norm": 1.4140625, "learning_rate": 1.0493153046534141e-05, "loss": 0.4454, "step": 8442 }, { "epoch": 1.4451033437939618, "grad_norm": 1.421875, "learning_rate": 1.0491331401098119e-05, "loss": 0.4564, "step": 8443 }, { "epoch": 1.4452764852288713, "grad_norm": 1.2734375, "learning_rate": 1.0489509739318193e-05, "loss": 0.4841, "step": 8444 }, { "epoch": 1.4454496266637809, "grad_norm": 1.40625, "learning_rate": 1.0487688061254955e-05, "loss": 0.4329, "step": 8445 }, { "epoch": 1.4456227680986906, "grad_norm": 1.4140625, "learning_rate": 1.0485866366969012e-05, "loss": 0.457, "step": 8446 }, { "epoch": 1.4457959095336004, "grad_norm": 1.3046875, "learning_rate": 1.0484044656520955e-05, "loss": 0.4432, "step": 8447 }, { "epoch": 1.44596905096851, "grad_norm": 1.296875, "learning_rate": 1.0482222929971386e-05, "loss": 0.4186, "step": 8448 }, { "epoch": 1.4461421924034195, "grad_norm": 1.4609375, "learning_rate": 1.04804011873809e-05, "loss": 0.4969, "step": 8449 }, { "epoch": 1.4463153338383292, "grad_norm": 1.4453125, "learning_rate": 1.0478579428810097e-05, "loss": 0.4785, "step": 8450 }, { "epoch": 1.4464884752732388, "grad_norm": 1.4375, "learning_rate": 1.0476757654319579e-05, "loss": 0.4849, "step": 8451 }, { "epoch": 1.4466616167081485, "grad_norm": 1.3671875, "learning_rate": 1.0474935863969946e-05, "loss": 0.4705, "step": 8452 }, { "epoch": 1.446834758143058, "grad_norm": 1.359375, "learning_rate": 1.0473114057821801e-05, "loss": 0.4524, "step": 8453 }, { "epoch": 1.4470078995779678, "grad_norm": 1.34375, "learning_rate": 1.047129223593574e-05, "loss": 0.4589, "step": 8454 }, { "epoch": 1.4471810410128774, "grad_norm": 1.3828125, "learning_rate": 1.0469470398372368e-05, "loss": 0.4118, "step": 8455 }, { "epoch": 1.447354182447787, "grad_norm": 1.4609375, "learning_rate": 1.0467648545192288e-05, "loss": 0.5148, "step": 8456 }, { "epoch": 1.4475273238826967, "grad_norm": 1.390625, "learning_rate": 1.0465826676456104e-05, "loss": 0.5018, "step": 8457 }, { "epoch": 1.4477004653176064, "grad_norm": 1.3671875, "learning_rate": 1.0464004792224421e-05, "loss": 0.4652, "step": 8458 }, { "epoch": 1.447873606752516, "grad_norm": 1.3984375, "learning_rate": 1.0462182892557834e-05, "loss": 0.4495, "step": 8459 }, { "epoch": 1.4480467481874255, "grad_norm": 1.328125, "learning_rate": 1.0460360977516961e-05, "loss": 0.4892, "step": 8460 }, { "epoch": 1.4482198896223353, "grad_norm": 1.4921875, "learning_rate": 1.0458539047162395e-05, "loss": 0.4891, "step": 8461 }, { "epoch": 1.4483930310572448, "grad_norm": 1.53125, "learning_rate": 1.045671710155475e-05, "loss": 0.4661, "step": 8462 }, { "epoch": 1.4485661724921546, "grad_norm": 1.46875, "learning_rate": 1.0454895140754628e-05, "loss": 0.4299, "step": 8463 }, { "epoch": 1.4487393139270641, "grad_norm": 1.4140625, "learning_rate": 1.0453073164822636e-05, "loss": 0.3953, "step": 8464 }, { "epoch": 1.4489124553619739, "grad_norm": 1.421875, "learning_rate": 1.0451251173819386e-05, "loss": 0.4964, "step": 8465 }, { "epoch": 1.4490855967968834, "grad_norm": 1.5078125, "learning_rate": 1.0449429167805477e-05, "loss": 0.5236, "step": 8466 }, { "epoch": 1.449258738231793, "grad_norm": 1.3203125, "learning_rate": 1.044760714684152e-05, "loss": 0.4236, "step": 8467 }, { "epoch": 1.4494318796667027, "grad_norm": 1.3125, "learning_rate": 1.044578511098813e-05, "loss": 0.392, "step": 8468 }, { "epoch": 1.4496050211016125, "grad_norm": 1.375, "learning_rate": 1.0443963060305909e-05, "loss": 0.4779, "step": 8469 }, { "epoch": 1.449778162536522, "grad_norm": 1.421875, "learning_rate": 1.0442140994855473e-05, "loss": 0.4495, "step": 8470 }, { "epoch": 1.4499513039714316, "grad_norm": 1.359375, "learning_rate": 1.0440318914697422e-05, "loss": 0.4813, "step": 8471 }, { "epoch": 1.4501244454063413, "grad_norm": 1.34375, "learning_rate": 1.0438496819892376e-05, "loss": 0.4751, "step": 8472 }, { "epoch": 1.4502975868412509, "grad_norm": 1.4453125, "learning_rate": 1.0436674710500943e-05, "loss": 0.4171, "step": 8473 }, { "epoch": 1.4504707282761606, "grad_norm": 1.5859375, "learning_rate": 1.0434852586583737e-05, "loss": 0.546, "step": 8474 }, { "epoch": 1.4506438697110702, "grad_norm": 1.328125, "learning_rate": 1.0433030448201364e-05, "loss": 0.4297, "step": 8475 }, { "epoch": 1.45081701114598, "grad_norm": 1.34375, "learning_rate": 1.0431208295414443e-05, "loss": 0.4145, "step": 8476 }, { "epoch": 1.4509901525808895, "grad_norm": 1.421875, "learning_rate": 1.0429386128283586e-05, "loss": 0.4974, "step": 8477 }, { "epoch": 1.451163294015799, "grad_norm": 1.421875, "learning_rate": 1.0427563946869397e-05, "loss": 0.4093, "step": 8478 }, { "epoch": 1.4513364354507088, "grad_norm": 1.4453125, "learning_rate": 1.0425741751232506e-05, "loss": 0.551, "step": 8479 }, { "epoch": 1.4515095768856185, "grad_norm": 1.3125, "learning_rate": 1.0423919541433513e-05, "loss": 0.4759, "step": 8480 }, { "epoch": 1.451682718320528, "grad_norm": 1.4296875, "learning_rate": 1.0422097317533041e-05, "loss": 0.4779, "step": 8481 }, { "epoch": 1.4518558597554376, "grad_norm": 1.4609375, "learning_rate": 1.042027507959171e-05, "loss": 0.4901, "step": 8482 }, { "epoch": 1.4520290011903474, "grad_norm": 1.421875, "learning_rate": 1.041845282767012e-05, "loss": 0.4426, "step": 8483 }, { "epoch": 1.452202142625257, "grad_norm": 1.3515625, "learning_rate": 1.0416630561828902e-05, "loss": 0.4652, "step": 8484 }, { "epoch": 1.4523752840601667, "grad_norm": 1.5234375, "learning_rate": 1.0414808282128668e-05, "loss": 0.4725, "step": 8485 }, { "epoch": 1.4525484254950762, "grad_norm": 1.34375, "learning_rate": 1.0412985988630036e-05, "loss": 0.4557, "step": 8486 }, { "epoch": 1.452721566929986, "grad_norm": 1.4453125, "learning_rate": 1.041116368139362e-05, "loss": 0.5116, "step": 8487 }, { "epoch": 1.4528947083648955, "grad_norm": 1.390625, "learning_rate": 1.0409341360480039e-05, "loss": 0.4575, "step": 8488 }, { "epoch": 1.453067849799805, "grad_norm": 1.3671875, "learning_rate": 1.0407519025949915e-05, "loss": 0.4492, "step": 8489 }, { "epoch": 1.4532409912347148, "grad_norm": 1.46875, "learning_rate": 1.0405696677863866e-05, "loss": 0.4461, "step": 8490 }, { "epoch": 1.4534141326696246, "grad_norm": 1.3984375, "learning_rate": 1.0403874316282513e-05, "loss": 0.4295, "step": 8491 }, { "epoch": 1.4535872741045341, "grad_norm": 1.40625, "learning_rate": 1.0402051941266471e-05, "loss": 0.4271, "step": 8492 }, { "epoch": 1.4537604155394437, "grad_norm": 1.3984375, "learning_rate": 1.0400229552876364e-05, "loss": 0.5096, "step": 8493 }, { "epoch": 1.4539335569743534, "grad_norm": 1.4296875, "learning_rate": 1.0398407151172814e-05, "loss": 0.5025, "step": 8494 }, { "epoch": 1.454106698409263, "grad_norm": 1.3984375, "learning_rate": 1.039658473621644e-05, "loss": 0.468, "step": 8495 }, { "epoch": 1.4542798398441728, "grad_norm": 1.3671875, "learning_rate": 1.0394762308067865e-05, "loss": 0.4746, "step": 8496 }, { "epoch": 1.4544529812790823, "grad_norm": 1.453125, "learning_rate": 1.039293986678771e-05, "loss": 0.4547, "step": 8497 }, { "epoch": 1.454626122713992, "grad_norm": 1.453125, "learning_rate": 1.03911174124366e-05, "loss": 0.4768, "step": 8498 }, { "epoch": 1.4547992641489016, "grad_norm": 1.328125, "learning_rate": 1.038929494507515e-05, "loss": 0.3995, "step": 8499 }, { "epoch": 1.4549724055838114, "grad_norm": 1.40625, "learning_rate": 1.0387472464763997e-05, "loss": 0.5046, "step": 8500 }, { "epoch": 1.455145547018721, "grad_norm": 1.828125, "learning_rate": 1.0385649971563753e-05, "loss": 0.5464, "step": 8501 }, { "epoch": 1.4553186884536307, "grad_norm": 1.3125, "learning_rate": 1.038382746553505e-05, "loss": 0.4621, "step": 8502 }, { "epoch": 1.4554918298885402, "grad_norm": 1.25, "learning_rate": 1.0382004946738511e-05, "loss": 0.4259, "step": 8503 }, { "epoch": 1.4556649713234497, "grad_norm": 1.4140625, "learning_rate": 1.0380182415234758e-05, "loss": 0.4832, "step": 8504 }, { "epoch": 1.4558381127583595, "grad_norm": 1.3671875, "learning_rate": 1.037835987108442e-05, "loss": 0.4422, "step": 8505 }, { "epoch": 1.456011254193269, "grad_norm": 1.4140625, "learning_rate": 1.037653731434812e-05, "loss": 0.4669, "step": 8506 }, { "epoch": 1.4561843956281788, "grad_norm": 1.4921875, "learning_rate": 1.0374714745086487e-05, "loss": 0.5025, "step": 8507 }, { "epoch": 1.4563575370630883, "grad_norm": 1.4140625, "learning_rate": 1.0372892163360148e-05, "loss": 0.4716, "step": 8508 }, { "epoch": 1.456530678497998, "grad_norm": 1.3125, "learning_rate": 1.037106956922973e-05, "loss": 0.4833, "step": 8509 }, { "epoch": 1.4567038199329077, "grad_norm": 1.6015625, "learning_rate": 1.0369246962755859e-05, "loss": 0.6253, "step": 8510 }, { "epoch": 1.4568769613678174, "grad_norm": 1.375, "learning_rate": 1.0367424343999164e-05, "loss": 0.4582, "step": 8511 }, { "epoch": 1.457050102802727, "grad_norm": 1.5078125, "learning_rate": 1.0365601713020277e-05, "loss": 0.5287, "step": 8512 }, { "epoch": 1.4572232442376367, "grad_norm": 1.3828125, "learning_rate": 1.036377906987982e-05, "loss": 0.5398, "step": 8513 }, { "epoch": 1.4573963856725463, "grad_norm": 1.234375, "learning_rate": 1.0361956414638428e-05, "loss": 0.427, "step": 8514 }, { "epoch": 1.4575695271074558, "grad_norm": 1.3515625, "learning_rate": 1.036013374735673e-05, "loss": 0.4853, "step": 8515 }, { "epoch": 1.4577426685423656, "grad_norm": 1.3515625, "learning_rate": 1.0358311068095353e-05, "loss": 0.4925, "step": 8516 }, { "epoch": 1.4579158099772753, "grad_norm": 1.3359375, "learning_rate": 1.0356488376914932e-05, "loss": 0.4385, "step": 8517 }, { "epoch": 1.4580889514121849, "grad_norm": 1.3359375, "learning_rate": 1.0354665673876094e-05, "loss": 0.4419, "step": 8518 }, { "epoch": 1.4582620928470944, "grad_norm": 1.40625, "learning_rate": 1.0352842959039473e-05, "loss": 0.4735, "step": 8519 }, { "epoch": 1.4584352342820042, "grad_norm": 1.4609375, "learning_rate": 1.03510202324657e-05, "loss": 0.4561, "step": 8520 }, { "epoch": 1.4586083757169137, "grad_norm": 1.5546875, "learning_rate": 1.0349197494215404e-05, "loss": 0.5003, "step": 8521 }, { "epoch": 1.4587815171518235, "grad_norm": 1.5078125, "learning_rate": 1.0347374744349224e-05, "loss": 0.4776, "step": 8522 }, { "epoch": 1.458954658586733, "grad_norm": 1.5859375, "learning_rate": 1.0345551982927787e-05, "loss": 0.4801, "step": 8523 }, { "epoch": 1.4591278000216428, "grad_norm": 1.453125, "learning_rate": 1.0343729210011731e-05, "loss": 0.5016, "step": 8524 }, { "epoch": 1.4593009414565523, "grad_norm": 1.5, "learning_rate": 1.0341906425661686e-05, "loss": 0.4685, "step": 8525 }, { "epoch": 1.4594740828914619, "grad_norm": 1.28125, "learning_rate": 1.0340083629938284e-05, "loss": 0.4542, "step": 8526 }, { "epoch": 1.4596472243263716, "grad_norm": 1.484375, "learning_rate": 1.0338260822902166e-05, "loss": 0.4695, "step": 8527 }, { "epoch": 1.4598203657612814, "grad_norm": 1.3515625, "learning_rate": 1.0336438004613962e-05, "loss": 0.446, "step": 8528 }, { "epoch": 1.459993507196191, "grad_norm": 1.328125, "learning_rate": 1.0334615175134313e-05, "loss": 0.4571, "step": 8529 }, { "epoch": 1.4601666486311005, "grad_norm": 1.3515625, "learning_rate": 1.0332792334523847e-05, "loss": 0.4312, "step": 8530 }, { "epoch": 1.4603397900660102, "grad_norm": 1.3515625, "learning_rate": 1.0330969482843203e-05, "loss": 0.5002, "step": 8531 }, { "epoch": 1.4605129315009198, "grad_norm": 1.578125, "learning_rate": 1.0329146620153017e-05, "loss": 0.4779, "step": 8532 }, { "epoch": 1.4606860729358295, "grad_norm": 1.4765625, "learning_rate": 1.0327323746513924e-05, "loss": 0.4455, "step": 8533 }, { "epoch": 1.460859214370739, "grad_norm": 1.4296875, "learning_rate": 1.0325500861986568e-05, "loss": 0.4746, "step": 8534 }, { "epoch": 1.4610323558056488, "grad_norm": 1.390625, "learning_rate": 1.032367796663158e-05, "loss": 0.4688, "step": 8535 }, { "epoch": 1.4612054972405584, "grad_norm": 1.46875, "learning_rate": 1.03218550605096e-05, "loss": 0.5459, "step": 8536 }, { "epoch": 1.461378638675468, "grad_norm": 1.4609375, "learning_rate": 1.0320032143681262e-05, "loss": 0.4859, "step": 8537 }, { "epoch": 1.4615517801103777, "grad_norm": 1.4296875, "learning_rate": 1.0318209216207211e-05, "loss": 0.4404, "step": 8538 }, { "epoch": 1.4617249215452874, "grad_norm": 1.3828125, "learning_rate": 1.0316386278148081e-05, "loss": 0.4781, "step": 8539 }, { "epoch": 1.461898062980197, "grad_norm": 1.421875, "learning_rate": 1.0314563329564512e-05, "loss": 0.462, "step": 8540 }, { "epoch": 1.4620712044151065, "grad_norm": 1.328125, "learning_rate": 1.031274037051715e-05, "loss": 0.4845, "step": 8541 }, { "epoch": 1.4622443458500163, "grad_norm": 1.328125, "learning_rate": 1.0310917401066622e-05, "loss": 0.4182, "step": 8542 }, { "epoch": 1.4624174872849258, "grad_norm": 1.53125, "learning_rate": 1.0309094421273577e-05, "loss": 0.5008, "step": 8543 }, { "epoch": 1.4625906287198356, "grad_norm": 1.4609375, "learning_rate": 1.0307271431198655e-05, "loss": 0.4915, "step": 8544 }, { "epoch": 1.4627637701547451, "grad_norm": 1.3671875, "learning_rate": 1.0305448430902497e-05, "loss": 0.4577, "step": 8545 }, { "epoch": 1.4629369115896549, "grad_norm": 1.4765625, "learning_rate": 1.0303625420445741e-05, "loss": 0.4814, "step": 8546 }, { "epoch": 1.4631100530245644, "grad_norm": 1.4921875, "learning_rate": 1.0301802399889034e-05, "loss": 0.4902, "step": 8547 }, { "epoch": 1.463283194459474, "grad_norm": 1.375, "learning_rate": 1.029997936929301e-05, "loss": 0.4257, "step": 8548 }, { "epoch": 1.4634563358943837, "grad_norm": 1.3203125, "learning_rate": 1.0298156328718319e-05, "loss": 0.4107, "step": 8549 }, { "epoch": 1.4636294773292935, "grad_norm": 1.40625, "learning_rate": 1.0296333278225599e-05, "loss": 0.5192, "step": 8550 }, { "epoch": 1.463802618764203, "grad_norm": 1.4140625, "learning_rate": 1.0294510217875495e-05, "loss": 0.4227, "step": 8551 }, { "epoch": 1.4639757601991126, "grad_norm": 1.5, "learning_rate": 1.0292687147728648e-05, "loss": 0.5766, "step": 8552 }, { "epoch": 1.4641489016340223, "grad_norm": 1.5, "learning_rate": 1.0290864067845702e-05, "loss": 0.5624, "step": 8553 }, { "epoch": 1.4643220430689319, "grad_norm": 1.3671875, "learning_rate": 1.0289040978287306e-05, "loss": 0.4229, "step": 8554 }, { "epoch": 1.4644951845038416, "grad_norm": 1.3203125, "learning_rate": 1.0287217879114097e-05, "loss": 0.4602, "step": 8555 }, { "epoch": 1.4646683259387512, "grad_norm": 1.5234375, "learning_rate": 1.0285394770386725e-05, "loss": 0.4474, "step": 8556 }, { "epoch": 1.464841467373661, "grad_norm": 1.53125, "learning_rate": 1.028357165216583e-05, "loss": 0.4723, "step": 8557 }, { "epoch": 1.4650146088085705, "grad_norm": 1.4296875, "learning_rate": 1.0281748524512062e-05, "loss": 0.4663, "step": 8558 }, { "epoch": 1.46518775024348, "grad_norm": 1.421875, "learning_rate": 1.027992538748606e-05, "loss": 0.4614, "step": 8559 }, { "epoch": 1.4653608916783898, "grad_norm": 1.3984375, "learning_rate": 1.027810224114848e-05, "loss": 0.4483, "step": 8560 }, { "epoch": 1.4655340331132996, "grad_norm": 1.4375, "learning_rate": 1.0276279085559958e-05, "loss": 0.4511, "step": 8561 }, { "epoch": 1.465707174548209, "grad_norm": 1.4375, "learning_rate": 1.0274455920781146e-05, "loss": 0.494, "step": 8562 }, { "epoch": 1.4658803159831186, "grad_norm": 1.375, "learning_rate": 1.0272632746872687e-05, "loss": 0.4464, "step": 8563 }, { "epoch": 1.4660534574180284, "grad_norm": 1.3671875, "learning_rate": 1.027080956389523e-05, "loss": 0.4905, "step": 8564 }, { "epoch": 1.466226598852938, "grad_norm": 1.390625, "learning_rate": 1.0268986371909424e-05, "loss": 0.4865, "step": 8565 }, { "epoch": 1.4663997402878477, "grad_norm": 1.5234375, "learning_rate": 1.0267163170975914e-05, "loss": 0.4871, "step": 8566 }, { "epoch": 1.4665728817227572, "grad_norm": 1.3984375, "learning_rate": 1.0265339961155351e-05, "loss": 0.4122, "step": 8567 }, { "epoch": 1.466746023157667, "grad_norm": 1.4296875, "learning_rate": 1.026351674250838e-05, "loss": 0.4474, "step": 8568 }, { "epoch": 1.4669191645925765, "grad_norm": 1.34375, "learning_rate": 1.0261693515095648e-05, "loss": 0.4703, "step": 8569 }, { "epoch": 1.467092306027486, "grad_norm": 1.3671875, "learning_rate": 1.0259870278977809e-05, "loss": 0.451, "step": 8570 }, { "epoch": 1.4672654474623958, "grad_norm": 1.4296875, "learning_rate": 1.0258047034215507e-05, "loss": 0.4627, "step": 8571 }, { "epoch": 1.4674385888973056, "grad_norm": 1.3828125, "learning_rate": 1.02562237808694e-05, "loss": 0.5026, "step": 8572 }, { "epoch": 1.4676117303322151, "grad_norm": 1.5, "learning_rate": 1.0254400519000126e-05, "loss": 0.4703, "step": 8573 }, { "epoch": 1.4677848717671247, "grad_norm": 1.4140625, "learning_rate": 1.0252577248668342e-05, "loss": 0.499, "step": 8574 }, { "epoch": 1.4679580132020345, "grad_norm": 1.4296875, "learning_rate": 1.0250753969934695e-05, "loss": 0.4838, "step": 8575 }, { "epoch": 1.468131154636944, "grad_norm": 1.4140625, "learning_rate": 1.0248930682859839e-05, "loss": 0.4667, "step": 8576 }, { "epoch": 1.4683042960718538, "grad_norm": 1.3984375, "learning_rate": 1.0247107387504422e-05, "loss": 0.4701, "step": 8577 }, { "epoch": 1.4684774375067633, "grad_norm": 1.390625, "learning_rate": 1.0245284083929096e-05, "loss": 0.4632, "step": 8578 }, { "epoch": 1.468650578941673, "grad_norm": 1.34375, "learning_rate": 1.0243460772194515e-05, "loss": 0.4602, "step": 8579 }, { "epoch": 1.4688237203765826, "grad_norm": 1.3515625, "learning_rate": 1.0241637452361323e-05, "loss": 0.4191, "step": 8580 }, { "epoch": 1.4689968618114921, "grad_norm": 1.4609375, "learning_rate": 1.0239814124490182e-05, "loss": 0.4969, "step": 8581 }, { "epoch": 1.469170003246402, "grad_norm": 1.40625, "learning_rate": 1.0237990788641733e-05, "loss": 0.4686, "step": 8582 }, { "epoch": 1.4693431446813117, "grad_norm": 1.4765625, "learning_rate": 1.0236167444876639e-05, "loss": 0.5497, "step": 8583 }, { "epoch": 1.4695162861162212, "grad_norm": 1.3828125, "learning_rate": 1.0234344093255546e-05, "loss": 0.4646, "step": 8584 }, { "epoch": 1.4696894275511307, "grad_norm": 1.4921875, "learning_rate": 1.0232520733839109e-05, "loss": 0.5376, "step": 8585 }, { "epoch": 1.4698625689860405, "grad_norm": 1.3203125, "learning_rate": 1.023069736668798e-05, "loss": 0.4561, "step": 8586 }, { "epoch": 1.47003571042095, "grad_norm": 1.5859375, "learning_rate": 1.0228873991862816e-05, "loss": 0.4994, "step": 8587 }, { "epoch": 1.4702088518558598, "grad_norm": 1.3828125, "learning_rate": 1.0227050609424267e-05, "loss": 0.4278, "step": 8588 }, { "epoch": 1.4703819932907694, "grad_norm": 1.5390625, "learning_rate": 1.0225227219432988e-05, "loss": 0.48, "step": 8589 }, { "epoch": 1.4705551347256791, "grad_norm": 1.46875, "learning_rate": 1.0223403821949633e-05, "loss": 0.4912, "step": 8590 }, { "epoch": 1.4707282761605887, "grad_norm": 1.3125, "learning_rate": 1.0221580417034855e-05, "loss": 0.4566, "step": 8591 }, { "epoch": 1.4709014175954982, "grad_norm": 1.4140625, "learning_rate": 1.0219757004749313e-05, "loss": 0.4685, "step": 8592 }, { "epoch": 1.471074559030408, "grad_norm": 1.4609375, "learning_rate": 1.0217933585153658e-05, "loss": 0.4324, "step": 8593 }, { "epoch": 1.4712477004653177, "grad_norm": 1.4453125, "learning_rate": 1.0216110158308548e-05, "loss": 0.5025, "step": 8594 }, { "epoch": 1.4714208419002273, "grad_norm": 1.3125, "learning_rate": 1.0214286724274635e-05, "loss": 0.4807, "step": 8595 }, { "epoch": 1.4715939833351368, "grad_norm": 1.3359375, "learning_rate": 1.021246328311258e-05, "loss": 0.4611, "step": 8596 }, { "epoch": 1.4717671247700466, "grad_norm": 1.40625, "learning_rate": 1.0210639834883029e-05, "loss": 0.4571, "step": 8597 }, { "epoch": 1.471940266204956, "grad_norm": 1.4296875, "learning_rate": 1.0208816379646652e-05, "loss": 0.4322, "step": 8598 }, { "epoch": 1.4721134076398659, "grad_norm": 1.515625, "learning_rate": 1.0206992917464093e-05, "loss": 0.4964, "step": 8599 }, { "epoch": 1.4722865490747754, "grad_norm": 1.3828125, "learning_rate": 1.0205169448396017e-05, "loss": 0.5004, "step": 8600 }, { "epoch": 1.4724596905096852, "grad_norm": 1.3515625, "learning_rate": 1.0203345972503076e-05, "loss": 0.4635, "step": 8601 }, { "epoch": 1.4726328319445947, "grad_norm": 1.3671875, "learning_rate": 1.0201522489845927e-05, "loss": 0.5285, "step": 8602 }, { "epoch": 1.4728059733795043, "grad_norm": 1.375, "learning_rate": 1.019969900048523e-05, "loss": 0.4194, "step": 8603 }, { "epoch": 1.472979114814414, "grad_norm": 1.421875, "learning_rate": 1.0197875504481642e-05, "loss": 0.4417, "step": 8604 }, { "epoch": 1.4731522562493238, "grad_norm": 1.4765625, "learning_rate": 1.019605200189582e-05, "loss": 0.5312, "step": 8605 }, { "epoch": 1.4733253976842333, "grad_norm": 1.515625, "learning_rate": 1.019422849278842e-05, "loss": 0.4455, "step": 8606 }, { "epoch": 1.4734985391191429, "grad_norm": 1.3828125, "learning_rate": 1.0192404977220106e-05, "loss": 0.4557, "step": 8607 }, { "epoch": 1.4736716805540526, "grad_norm": 1.390625, "learning_rate": 1.0190581455251527e-05, "loss": 0.5031, "step": 8608 }, { "epoch": 1.4738448219889622, "grad_norm": 1.4375, "learning_rate": 1.0188757926943349e-05, "loss": 0.4841, "step": 8609 }, { "epoch": 1.474017963423872, "grad_norm": 1.5, "learning_rate": 1.018693439235623e-05, "loss": 0.531, "step": 8610 }, { "epoch": 1.4741911048587815, "grad_norm": 1.46875, "learning_rate": 1.018511085155083e-05, "loss": 0.4538, "step": 8611 }, { "epoch": 1.4743642462936912, "grad_norm": 1.375, "learning_rate": 1.0183287304587805e-05, "loss": 0.4551, "step": 8612 }, { "epoch": 1.4745373877286008, "grad_norm": 1.3203125, "learning_rate": 1.0181463751527812e-05, "loss": 0.3889, "step": 8613 }, { "epoch": 1.4747105291635103, "grad_norm": 1.421875, "learning_rate": 1.0179640192431518e-05, "loss": 0.4468, "step": 8614 }, { "epoch": 1.47488367059842, "grad_norm": 1.453125, "learning_rate": 1.0177816627359575e-05, "loss": 0.4621, "step": 8615 }, { "epoch": 1.4750568120333298, "grad_norm": 1.3515625, "learning_rate": 1.0175993056372648e-05, "loss": 0.4852, "step": 8616 }, { "epoch": 1.4752299534682394, "grad_norm": 1.4140625, "learning_rate": 1.01741694795314e-05, "loss": 0.4952, "step": 8617 }, { "epoch": 1.475403094903149, "grad_norm": 1.59375, "learning_rate": 1.0172345896896483e-05, "loss": 0.5119, "step": 8618 }, { "epoch": 1.4755762363380587, "grad_norm": 1.3671875, "learning_rate": 1.0170522308528565e-05, "loss": 0.4362, "step": 8619 }, { "epoch": 1.4757493777729682, "grad_norm": 1.609375, "learning_rate": 1.0168698714488303e-05, "loss": 0.4805, "step": 8620 }, { "epoch": 1.475922519207878, "grad_norm": 1.3984375, "learning_rate": 1.0166875114836357e-05, "loss": 0.4541, "step": 8621 }, { "epoch": 1.4760956606427875, "grad_norm": 1.4453125, "learning_rate": 1.0165051509633395e-05, "loss": 0.4741, "step": 8622 }, { "epoch": 1.4762688020776973, "grad_norm": 1.2890625, "learning_rate": 1.0163227898940068e-05, "loss": 0.4479, "step": 8623 }, { "epoch": 1.4764419435126068, "grad_norm": 1.4296875, "learning_rate": 1.0161404282817047e-05, "loss": 0.486, "step": 8624 }, { "epoch": 1.4766150849475164, "grad_norm": 1.4453125, "learning_rate": 1.0159580661324989e-05, "loss": 0.5039, "step": 8625 }, { "epoch": 1.4767882263824261, "grad_norm": 1.5, "learning_rate": 1.0157757034524555e-05, "loss": 0.5091, "step": 8626 }, { "epoch": 1.476961367817336, "grad_norm": 1.3515625, "learning_rate": 1.015593340247641e-05, "loss": 0.4381, "step": 8627 }, { "epoch": 1.4771345092522454, "grad_norm": 1.3515625, "learning_rate": 1.0154109765241214e-05, "loss": 0.4832, "step": 8628 }, { "epoch": 1.477307650687155, "grad_norm": 1.4921875, "learning_rate": 1.0152286122879631e-05, "loss": 0.5569, "step": 8629 }, { "epoch": 1.4774807921220647, "grad_norm": 1.421875, "learning_rate": 1.015046247545232e-05, "loss": 0.4834, "step": 8630 }, { "epoch": 1.4776539335569743, "grad_norm": 1.296875, "learning_rate": 1.0148638823019953e-05, "loss": 0.4333, "step": 8631 }, { "epoch": 1.477827074991884, "grad_norm": 1.515625, "learning_rate": 1.0146815165643182e-05, "loss": 0.4904, "step": 8632 }, { "epoch": 1.4780002164267936, "grad_norm": 1.453125, "learning_rate": 1.0144991503382676e-05, "loss": 0.4445, "step": 8633 }, { "epoch": 1.4781733578617033, "grad_norm": 1.375, "learning_rate": 1.01431678362991e-05, "loss": 0.5349, "step": 8634 }, { "epoch": 1.4783464992966129, "grad_norm": 1.453125, "learning_rate": 1.0141344164453108e-05, "loss": 0.4645, "step": 8635 }, { "epoch": 1.4785196407315226, "grad_norm": 1.2890625, "learning_rate": 1.0139520487905374e-05, "loss": 0.4325, "step": 8636 }, { "epoch": 1.4786927821664322, "grad_norm": 1.4375, "learning_rate": 1.0137696806716558e-05, "loss": 0.4743, "step": 8637 }, { "epoch": 1.478865923601342, "grad_norm": 1.34375, "learning_rate": 1.0135873120947323e-05, "loss": 0.4377, "step": 8638 }, { "epoch": 1.4790390650362515, "grad_norm": 1.5, "learning_rate": 1.0134049430658332e-05, "loss": 0.5264, "step": 8639 }, { "epoch": 1.479212206471161, "grad_norm": 1.4609375, "learning_rate": 1.0132225735910252e-05, "loss": 0.4382, "step": 8640 }, { "epoch": 1.4793853479060708, "grad_norm": 1.453125, "learning_rate": 1.0130402036763747e-05, "loss": 0.4669, "step": 8641 }, { "epoch": 1.4795584893409803, "grad_norm": 1.4765625, "learning_rate": 1.012857833327948e-05, "loss": 0.4612, "step": 8642 }, { "epoch": 1.47973163077589, "grad_norm": 1.4765625, "learning_rate": 1.0126754625518116e-05, "loss": 0.461, "step": 8643 }, { "epoch": 1.4799047722107996, "grad_norm": 1.390625, "learning_rate": 1.012493091354032e-05, "loss": 0.47, "step": 8644 }, { "epoch": 1.4800779136457094, "grad_norm": 1.453125, "learning_rate": 1.0123107197406757e-05, "loss": 0.5763, "step": 8645 }, { "epoch": 1.480251055080619, "grad_norm": 1.3203125, "learning_rate": 1.0121283477178092e-05, "loss": 0.4621, "step": 8646 }, { "epoch": 1.4804241965155287, "grad_norm": 1.3828125, "learning_rate": 1.0119459752914988e-05, "loss": 0.4648, "step": 8647 }, { "epoch": 1.4805973379504382, "grad_norm": 1.4375, "learning_rate": 1.0117636024678117e-05, "loss": 0.5121, "step": 8648 }, { "epoch": 1.480770479385348, "grad_norm": 1.3828125, "learning_rate": 1.0115812292528138e-05, "loss": 0.4696, "step": 8649 }, { "epoch": 1.4809436208202575, "grad_norm": 1.375, "learning_rate": 1.0113988556525717e-05, "loss": 0.4536, "step": 8650 }, { "epoch": 1.481116762255167, "grad_norm": 1.34375, "learning_rate": 1.0112164816731522e-05, "loss": 0.446, "step": 8651 }, { "epoch": 1.4812899036900768, "grad_norm": 1.3125, "learning_rate": 1.0110341073206221e-05, "loss": 0.41, "step": 8652 }, { "epoch": 1.4814630451249866, "grad_norm": 1.421875, "learning_rate": 1.0108517326010472e-05, "loss": 0.4835, "step": 8653 }, { "epoch": 1.4816361865598962, "grad_norm": 1.390625, "learning_rate": 1.0106693575204947e-05, "loss": 0.4373, "step": 8654 }, { "epoch": 1.4818093279948057, "grad_norm": 1.2578125, "learning_rate": 1.0104869820850314e-05, "loss": 0.4285, "step": 8655 }, { "epoch": 1.4819824694297155, "grad_norm": 1.4921875, "learning_rate": 1.0103046063007234e-05, "loss": 0.4544, "step": 8656 }, { "epoch": 1.482155610864625, "grad_norm": 1.5, "learning_rate": 1.0101222301736378e-05, "loss": 0.4621, "step": 8657 }, { "epoch": 1.4823287522995348, "grad_norm": 1.3671875, "learning_rate": 1.0099398537098408e-05, "loss": 0.4344, "step": 8658 }, { "epoch": 1.4825018937344443, "grad_norm": 1.359375, "learning_rate": 1.0097574769153996e-05, "loss": 0.4007, "step": 8659 }, { "epoch": 1.482675035169354, "grad_norm": 1.40625, "learning_rate": 1.0095750997963804e-05, "loss": 0.4663, "step": 8660 }, { "epoch": 1.4828481766042636, "grad_norm": 1.4609375, "learning_rate": 1.0093927223588503e-05, "loss": 0.4792, "step": 8661 }, { "epoch": 1.4830213180391731, "grad_norm": 1.5078125, "learning_rate": 1.0092103446088758e-05, "loss": 0.4614, "step": 8662 }, { "epoch": 1.483194459474083, "grad_norm": 1.3984375, "learning_rate": 1.0090279665525232e-05, "loss": 0.461, "step": 8663 }, { "epoch": 1.4833676009089927, "grad_norm": 1.4140625, "learning_rate": 1.0088455881958601e-05, "loss": 0.4606, "step": 8664 }, { "epoch": 1.4833676009089927, "eval_loss": 0.6770570874214172, "eval_runtime": 2676.8153, "eval_samples_per_second": 18.708, "eval_steps_per_second": 18.708, "step": 8664 }, { "epoch": 1.4835407423439022, "grad_norm": 1.4921875, "learning_rate": 1.0086632095449522e-05, "loss": 0.4323, "step": 8665 }, { "epoch": 1.4837138837788117, "grad_norm": 1.46875, "learning_rate": 1.0084808306058671e-05, "loss": 0.4762, "step": 8666 }, { "epoch": 1.4838870252137215, "grad_norm": 1.359375, "learning_rate": 1.0082984513846713e-05, "loss": 0.4568, "step": 8667 }, { "epoch": 1.484060166648631, "grad_norm": 1.4375, "learning_rate": 1.0081160718874313e-05, "loss": 0.4489, "step": 8668 }, { "epoch": 1.4842333080835408, "grad_norm": 1.4375, "learning_rate": 1.0079336921202143e-05, "loss": 0.5004, "step": 8669 }, { "epoch": 1.4844064495184504, "grad_norm": 1.4453125, "learning_rate": 1.0077513120890865e-05, "loss": 0.5055, "step": 8670 }, { "epoch": 1.4845795909533601, "grad_norm": 1.375, "learning_rate": 1.0075689318001151e-05, "loss": 0.4225, "step": 8671 }, { "epoch": 1.4847527323882697, "grad_norm": 1.484375, "learning_rate": 1.0073865512593668e-05, "loss": 0.4628, "step": 8672 }, { "epoch": 1.4849258738231792, "grad_norm": 1.484375, "learning_rate": 1.0072041704729086e-05, "loss": 0.4676, "step": 8673 }, { "epoch": 1.485099015258089, "grad_norm": 1.3046875, "learning_rate": 1.007021789446807e-05, "loss": 0.4229, "step": 8674 }, { "epoch": 1.4852721566929987, "grad_norm": 1.453125, "learning_rate": 1.006839408187129e-05, "loss": 0.4728, "step": 8675 }, { "epoch": 1.4854452981279083, "grad_norm": 1.34375, "learning_rate": 1.0066570266999414e-05, "loss": 0.414, "step": 8676 }, { "epoch": 1.4856184395628178, "grad_norm": 1.4375, "learning_rate": 1.0064746449913108e-05, "loss": 0.4951, "step": 8677 }, { "epoch": 1.4857915809977276, "grad_norm": 1.3515625, "learning_rate": 1.0062922630673041e-05, "loss": 0.5068, "step": 8678 }, { "epoch": 1.485964722432637, "grad_norm": 1.4921875, "learning_rate": 1.0061098809339887e-05, "loss": 0.4518, "step": 8679 }, { "epoch": 1.4861378638675469, "grad_norm": 1.4140625, "learning_rate": 1.0059274985974305e-05, "loss": 0.5044, "step": 8680 }, { "epoch": 1.4863110053024564, "grad_norm": 1.4609375, "learning_rate": 1.0057451160636975e-05, "loss": 0.4816, "step": 8681 }, { "epoch": 1.4864841467373662, "grad_norm": 1.375, "learning_rate": 1.0055627333388556e-05, "loss": 0.5006, "step": 8682 }, { "epoch": 1.4866572881722757, "grad_norm": 1.4375, "learning_rate": 1.005380350428972e-05, "loss": 0.4973, "step": 8683 }, { "epoch": 1.4868304296071853, "grad_norm": 1.34375, "learning_rate": 1.0051979673401138e-05, "loss": 0.4496, "step": 8684 }, { "epoch": 1.487003571042095, "grad_norm": 1.3671875, "learning_rate": 1.0050155840783475e-05, "loss": 0.4609, "step": 8685 }, { "epoch": 1.4871767124770048, "grad_norm": 1.7578125, "learning_rate": 1.0048332006497406e-05, "loss": 0.464, "step": 8686 }, { "epoch": 1.4873498539119143, "grad_norm": 1.40625, "learning_rate": 1.0046508170603592e-05, "loss": 0.4699, "step": 8687 }, { "epoch": 1.4875229953468239, "grad_norm": 1.4140625, "learning_rate": 1.0044684333162709e-05, "loss": 0.4839, "step": 8688 }, { "epoch": 1.4876961367817336, "grad_norm": 1.546875, "learning_rate": 1.0042860494235418e-05, "loss": 0.5234, "step": 8689 }, { "epoch": 1.4878692782166432, "grad_norm": 1.3828125, "learning_rate": 1.0041036653882399e-05, "loss": 0.4774, "step": 8690 }, { "epoch": 1.488042419651553, "grad_norm": 1.4296875, "learning_rate": 1.0039212812164312e-05, "loss": 0.4239, "step": 8691 }, { "epoch": 1.4882155610864625, "grad_norm": 1.375, "learning_rate": 1.003738896914183e-05, "loss": 0.4699, "step": 8692 }, { "epoch": 1.4883887025213722, "grad_norm": 1.4296875, "learning_rate": 1.0035565124875623e-05, "loss": 0.4797, "step": 8693 }, { "epoch": 1.4885618439562818, "grad_norm": 1.40625, "learning_rate": 1.0033741279426355e-05, "loss": 0.5152, "step": 8694 }, { "epoch": 1.4887349853911913, "grad_norm": 1.3515625, "learning_rate": 1.0031917432854705e-05, "loss": 0.4597, "step": 8695 }, { "epoch": 1.488908126826101, "grad_norm": 1.4375, "learning_rate": 1.0030093585221335e-05, "loss": 0.422, "step": 8696 }, { "epoch": 1.4890812682610108, "grad_norm": 1.453125, "learning_rate": 1.0028269736586914e-05, "loss": 0.4605, "step": 8697 }, { "epoch": 1.4892544096959204, "grad_norm": 1.375, "learning_rate": 1.0026445887012117e-05, "loss": 0.4534, "step": 8698 }, { "epoch": 1.48942755113083, "grad_norm": 1.4609375, "learning_rate": 1.0024622036557606e-05, "loss": 0.439, "step": 8699 }, { "epoch": 1.4896006925657397, "grad_norm": 1.4140625, "learning_rate": 1.0022798185284059e-05, "loss": 0.4454, "step": 8700 }, { "epoch": 1.4897738340006492, "grad_norm": 1.40625, "learning_rate": 1.002097433325214e-05, "loss": 0.4657, "step": 8701 }, { "epoch": 1.489946975435559, "grad_norm": 1.5234375, "learning_rate": 1.0019150480522522e-05, "loss": 0.4988, "step": 8702 }, { "epoch": 1.4901201168704685, "grad_norm": 1.5078125, "learning_rate": 1.0017326627155868e-05, "loss": 0.4543, "step": 8703 }, { "epoch": 1.4902932583053783, "grad_norm": 1.4921875, "learning_rate": 1.0015502773212853e-05, "loss": 0.5013, "step": 8704 }, { "epoch": 1.4904663997402878, "grad_norm": 1.4296875, "learning_rate": 1.0013678918754146e-05, "loss": 0.5033, "step": 8705 }, { "epoch": 1.4906395411751974, "grad_norm": 1.3828125, "learning_rate": 1.0011855063840416e-05, "loss": 0.4591, "step": 8706 }, { "epoch": 1.4908126826101071, "grad_norm": 1.4296875, "learning_rate": 1.0010031208532338e-05, "loss": 0.4753, "step": 8707 }, { "epoch": 1.490985824045017, "grad_norm": 1.375, "learning_rate": 1.0008207352890571e-05, "loss": 0.4124, "step": 8708 }, { "epoch": 1.4911589654799264, "grad_norm": 1.375, "learning_rate": 1.000638349697579e-05, "loss": 0.4921, "step": 8709 }, { "epoch": 1.491332106914836, "grad_norm": 1.3828125, "learning_rate": 1.0004559640848669e-05, "loss": 0.4624, "step": 8710 }, { "epoch": 1.4915052483497457, "grad_norm": 1.4609375, "learning_rate": 1.000273578456987e-05, "loss": 0.5361, "step": 8711 }, { "epoch": 1.4916783897846553, "grad_norm": 1.4140625, "learning_rate": 1.000091192820007e-05, "loss": 0.5274, "step": 8712 }, { "epoch": 1.491851531219565, "grad_norm": 1.515625, "learning_rate": 9.999088071799933e-06, "loss": 0.4817, "step": 8713 }, { "epoch": 1.4920246726544746, "grad_norm": 1.3125, "learning_rate": 9.997264215430133e-06, "loss": 0.4891, "step": 8714 }, { "epoch": 1.4921978140893843, "grad_norm": 1.578125, "learning_rate": 9.995440359151335e-06, "loss": 0.5247, "step": 8715 }, { "epoch": 1.4923709555242939, "grad_norm": 1.34375, "learning_rate": 9.993616503024211e-06, "loss": 0.4362, "step": 8716 }, { "epoch": 1.4925440969592034, "grad_norm": 1.46875, "learning_rate": 9.991792647109434e-06, "loss": 0.4624, "step": 8717 }, { "epoch": 1.4927172383941132, "grad_norm": 1.4375, "learning_rate": 9.989968791467666e-06, "loss": 0.4927, "step": 8718 }, { "epoch": 1.492890379829023, "grad_norm": 1.4140625, "learning_rate": 9.988144936159582e-06, "loss": 0.4739, "step": 8719 }, { "epoch": 1.4930635212639325, "grad_norm": 1.4296875, "learning_rate": 9.986321081245854e-06, "loss": 0.4775, "step": 8720 }, { "epoch": 1.493236662698842, "grad_norm": 1.4375, "learning_rate": 9.984497226787148e-06, "loss": 0.4561, "step": 8721 }, { "epoch": 1.4934098041337518, "grad_norm": 1.40625, "learning_rate": 9.982673372844135e-06, "loss": 0.446, "step": 8722 }, { "epoch": 1.4935829455686613, "grad_norm": 1.5234375, "learning_rate": 9.980849519477482e-06, "loss": 0.4747, "step": 8723 }, { "epoch": 1.493756087003571, "grad_norm": 1.421875, "learning_rate": 9.979025666747863e-06, "loss": 0.4266, "step": 8724 }, { "epoch": 1.4939292284384806, "grad_norm": 1.375, "learning_rate": 9.977201814715943e-06, "loss": 0.4804, "step": 8725 }, { "epoch": 1.4941023698733904, "grad_norm": 1.359375, "learning_rate": 9.975377963442396e-06, "loss": 0.451, "step": 8726 }, { "epoch": 1.4942755113083, "grad_norm": 1.4140625, "learning_rate": 9.973554112987888e-06, "loss": 0.4567, "step": 8727 }, { "epoch": 1.4944486527432095, "grad_norm": 1.3046875, "learning_rate": 9.971730263413088e-06, "loss": 0.4337, "step": 8728 }, { "epoch": 1.4946217941781192, "grad_norm": 1.5234375, "learning_rate": 9.969906414778667e-06, "loss": 0.4651, "step": 8729 }, { "epoch": 1.494794935613029, "grad_norm": 1.4375, "learning_rate": 9.968082567145299e-06, "loss": 0.4937, "step": 8730 }, { "epoch": 1.4949680770479385, "grad_norm": 1.4765625, "learning_rate": 9.966258720573645e-06, "loss": 0.4689, "step": 8731 }, { "epoch": 1.495141218482848, "grad_norm": 1.296875, "learning_rate": 9.96443487512438e-06, "loss": 0.4581, "step": 8732 }, { "epoch": 1.4953143599177579, "grad_norm": 1.3671875, "learning_rate": 9.962611030858173e-06, "loss": 0.4546, "step": 8733 }, { "epoch": 1.4954875013526674, "grad_norm": 1.4140625, "learning_rate": 9.960787187835691e-06, "loss": 0.5044, "step": 8734 }, { "epoch": 1.4956606427875772, "grad_norm": 1.4375, "learning_rate": 9.958963346117604e-06, "loss": 0.4755, "step": 8735 }, { "epoch": 1.4958337842224867, "grad_norm": 1.421875, "learning_rate": 9.957139505764584e-06, "loss": 0.4284, "step": 8736 }, { "epoch": 1.4960069256573965, "grad_norm": 1.328125, "learning_rate": 9.955315666837296e-06, "loss": 0.4336, "step": 8737 }, { "epoch": 1.496180067092306, "grad_norm": 1.484375, "learning_rate": 9.953491829396413e-06, "loss": 0.5362, "step": 8738 }, { "epoch": 1.4963532085272155, "grad_norm": 1.578125, "learning_rate": 9.951667993502599e-06, "loss": 0.4412, "step": 8739 }, { "epoch": 1.4965263499621253, "grad_norm": 1.46875, "learning_rate": 9.949844159216525e-06, "loss": 0.4484, "step": 8740 }, { "epoch": 1.496699491397035, "grad_norm": 1.46875, "learning_rate": 9.948020326598862e-06, "loss": 0.4826, "step": 8741 }, { "epoch": 1.4968726328319446, "grad_norm": 1.3046875, "learning_rate": 9.946196495710281e-06, "loss": 0.4427, "step": 8742 }, { "epoch": 1.4970457742668541, "grad_norm": 1.390625, "learning_rate": 9.944372666611446e-06, "loss": 0.4548, "step": 8743 }, { "epoch": 1.497218915701764, "grad_norm": 1.4609375, "learning_rate": 9.942548839363028e-06, "loss": 0.4971, "step": 8744 }, { "epoch": 1.4973920571366734, "grad_norm": 1.40625, "learning_rate": 9.940725014025696e-06, "loss": 0.5015, "step": 8745 }, { "epoch": 1.4975651985715832, "grad_norm": 1.4609375, "learning_rate": 9.938901190660117e-06, "loss": 0.4026, "step": 8746 }, { "epoch": 1.4977383400064928, "grad_norm": 1.375, "learning_rate": 9.93707736932696e-06, "loss": 0.4975, "step": 8747 }, { "epoch": 1.4979114814414025, "grad_norm": 1.3984375, "learning_rate": 9.935253550086897e-06, "loss": 0.5153, "step": 8748 }, { "epoch": 1.498084622876312, "grad_norm": 1.40625, "learning_rate": 9.933429733000591e-06, "loss": 0.4513, "step": 8749 }, { "epoch": 1.4982577643112216, "grad_norm": 1.4140625, "learning_rate": 9.931605918128716e-06, "loss": 0.5305, "step": 8750 }, { "epoch": 1.4984309057461314, "grad_norm": 1.5234375, "learning_rate": 9.92978210553193e-06, "loss": 0.4978, "step": 8751 }, { "epoch": 1.4986040471810411, "grad_norm": 1.53125, "learning_rate": 9.927958295270916e-06, "loss": 0.476, "step": 8752 }, { "epoch": 1.4987771886159507, "grad_norm": 1.3828125, "learning_rate": 9.926134487406332e-06, "loss": 0.472, "step": 8753 }, { "epoch": 1.4989503300508602, "grad_norm": 1.484375, "learning_rate": 9.924310681998852e-06, "loss": 0.5234, "step": 8754 }, { "epoch": 1.49912347148577, "grad_norm": 1.3203125, "learning_rate": 9.922486879109138e-06, "loss": 0.4504, "step": 8755 }, { "epoch": 1.4992966129206795, "grad_norm": 1.5078125, "learning_rate": 9.92066307879786e-06, "loss": 0.4997, "step": 8756 }, { "epoch": 1.4994697543555893, "grad_norm": 1.390625, "learning_rate": 9.91883928112569e-06, "loss": 0.5766, "step": 8757 }, { "epoch": 1.4996428957904988, "grad_norm": 1.3828125, "learning_rate": 9.91701548615329e-06, "loss": 0.4582, "step": 8758 }, { "epoch": 1.4998160372254086, "grad_norm": 1.4140625, "learning_rate": 9.915191693941332e-06, "loss": 0.4392, "step": 8759 }, { "epoch": 1.4999891786603181, "grad_norm": 1.4140625, "learning_rate": 9.913367904550481e-06, "loss": 0.4951, "step": 8760 }, { "epoch": 1.5001623200952277, "grad_norm": 1.3515625, "learning_rate": 9.911544118041406e-06, "loss": 0.4662, "step": 8761 }, { "epoch": 1.5003354615301374, "grad_norm": 1.46875, "learning_rate": 9.90972033447477e-06, "loss": 0.4841, "step": 8762 }, { "epoch": 1.5005086029650472, "grad_norm": 1.53125, "learning_rate": 9.907896553911244e-06, "loss": 0.4418, "step": 8763 }, { "epoch": 1.5006817443999567, "grad_norm": 1.609375, "learning_rate": 9.906072776411499e-06, "loss": 0.5324, "step": 8764 }, { "epoch": 1.5008548858348663, "grad_norm": 1.3125, "learning_rate": 9.904249002036198e-06, "loss": 0.4253, "step": 8765 }, { "epoch": 1.501028027269776, "grad_norm": 1.5625, "learning_rate": 9.902425230846008e-06, "loss": 0.4893, "step": 8766 }, { "epoch": 1.5012011687046858, "grad_norm": 1.3046875, "learning_rate": 9.900601462901594e-06, "loss": 0.4791, "step": 8767 }, { "epoch": 1.5013743101395953, "grad_norm": 1.4296875, "learning_rate": 9.898777698263626e-06, "loss": 0.4689, "step": 8768 }, { "epoch": 1.5015474515745049, "grad_norm": 1.40625, "learning_rate": 9.896953936992771e-06, "loss": 0.4861, "step": 8769 }, { "epoch": 1.5017205930094146, "grad_norm": 1.4609375, "learning_rate": 9.895130179149691e-06, "loss": 0.4909, "step": 8770 }, { "epoch": 1.5018937344443242, "grad_norm": 1.375, "learning_rate": 9.893306424795055e-06, "loss": 0.4752, "step": 8771 }, { "epoch": 1.5020668758792337, "grad_norm": 1.375, "learning_rate": 9.891482673989533e-06, "loss": 0.4469, "step": 8772 }, { "epoch": 1.5022400173141435, "grad_norm": 1.3828125, "learning_rate": 9.889658926793784e-06, "loss": 0.5114, "step": 8773 }, { "epoch": 1.5024131587490532, "grad_norm": 1.328125, "learning_rate": 9.88783518326848e-06, "loss": 0.4563, "step": 8774 }, { "epoch": 1.5025863001839628, "grad_norm": 1.484375, "learning_rate": 9.886011443474284e-06, "loss": 0.48, "step": 8775 }, { "epoch": 1.5027594416188723, "grad_norm": 1.421875, "learning_rate": 9.884187707471866e-06, "loss": 0.4399, "step": 8776 }, { "epoch": 1.502932583053782, "grad_norm": 1.3671875, "learning_rate": 9.882363975321885e-06, "loss": 0.4653, "step": 8777 }, { "epoch": 1.5031057244886918, "grad_norm": 1.375, "learning_rate": 9.880540247085014e-06, "loss": 0.4355, "step": 8778 }, { "epoch": 1.5032788659236014, "grad_norm": 1.359375, "learning_rate": 9.878716522821911e-06, "loss": 0.4616, "step": 8779 }, { "epoch": 1.503452007358511, "grad_norm": 1.3984375, "learning_rate": 9.876892802593247e-06, "loss": 0.4737, "step": 8780 }, { "epoch": 1.5036251487934207, "grad_norm": 1.3671875, "learning_rate": 9.875069086459684e-06, "loss": 0.4596, "step": 8781 }, { "epoch": 1.5037982902283302, "grad_norm": 1.3984375, "learning_rate": 9.873245374481887e-06, "loss": 0.4921, "step": 8782 }, { "epoch": 1.5039714316632398, "grad_norm": 1.546875, "learning_rate": 9.871421666720521e-06, "loss": 0.5042, "step": 8783 }, { "epoch": 1.5041445730981495, "grad_norm": 1.4453125, "learning_rate": 9.869597963236253e-06, "loss": 0.5516, "step": 8784 }, { "epoch": 1.5043177145330593, "grad_norm": 1.609375, "learning_rate": 9.86777426408975e-06, "loss": 0.5087, "step": 8785 }, { "epoch": 1.5044908559679688, "grad_norm": 1.34375, "learning_rate": 9.86595056934167e-06, "loss": 0.4568, "step": 8786 }, { "epoch": 1.5046639974028784, "grad_norm": 1.3828125, "learning_rate": 9.864126879052679e-06, "loss": 0.4692, "step": 8787 }, { "epoch": 1.5048371388377881, "grad_norm": 1.328125, "learning_rate": 9.862303193283446e-06, "loss": 0.453, "step": 8788 }, { "epoch": 1.505010280272698, "grad_norm": 1.390625, "learning_rate": 9.860479512094628e-06, "loss": 0.4318, "step": 8789 }, { "epoch": 1.5051834217076074, "grad_norm": 1.640625, "learning_rate": 9.858655835546895e-06, "loss": 0.5501, "step": 8790 }, { "epoch": 1.505356563142517, "grad_norm": 1.3984375, "learning_rate": 9.856832163700907e-06, "loss": 0.4523, "step": 8791 }, { "epoch": 1.5055297045774267, "grad_norm": 1.4921875, "learning_rate": 9.855008496617326e-06, "loss": 0.4922, "step": 8792 }, { "epoch": 1.5057028460123363, "grad_norm": 1.4765625, "learning_rate": 9.853184834356823e-06, "loss": 0.5557, "step": 8793 }, { "epoch": 1.5058759874472458, "grad_norm": 1.421875, "learning_rate": 9.85136117698005e-06, "loss": 0.4765, "step": 8794 }, { "epoch": 1.5060491288821556, "grad_norm": 1.28125, "learning_rate": 9.84953752454768e-06, "loss": 0.4032, "step": 8795 }, { "epoch": 1.5062222703170653, "grad_norm": 1.359375, "learning_rate": 9.84771387712037e-06, "loss": 0.4622, "step": 8796 }, { "epoch": 1.5063954117519749, "grad_norm": 1.53125, "learning_rate": 9.845890234758789e-06, "loss": 0.5261, "step": 8797 }, { "epoch": 1.5065685531868844, "grad_norm": 1.328125, "learning_rate": 9.844066597523592e-06, "loss": 0.4108, "step": 8798 }, { "epoch": 1.5067416946217942, "grad_norm": 1.3828125, "learning_rate": 9.842242965475447e-06, "loss": 0.4973, "step": 8799 }, { "epoch": 1.506914836056704, "grad_norm": 1.4453125, "learning_rate": 9.840419338675016e-06, "loss": 0.4775, "step": 8800 }, { "epoch": 1.5070879774916135, "grad_norm": 1.46875, "learning_rate": 9.838595717182956e-06, "loss": 0.4499, "step": 8801 }, { "epoch": 1.507261118926523, "grad_norm": 1.3515625, "learning_rate": 9.836772101059937e-06, "loss": 0.4215, "step": 8802 }, { "epoch": 1.5074342603614328, "grad_norm": 1.3125, "learning_rate": 9.83494849036661e-06, "loss": 0.4495, "step": 8803 }, { "epoch": 1.5076074017963423, "grad_norm": 1.3671875, "learning_rate": 9.833124885163645e-06, "loss": 0.487, "step": 8804 }, { "epoch": 1.5077805432312519, "grad_norm": 1.4921875, "learning_rate": 9.831301285511699e-06, "loss": 0.5137, "step": 8805 }, { "epoch": 1.5079536846661616, "grad_norm": 1.3515625, "learning_rate": 9.829477691471438e-06, "loss": 0.4744, "step": 8806 }, { "epoch": 1.5081268261010714, "grad_norm": 1.4765625, "learning_rate": 9.827654103103518e-06, "loss": 0.5233, "step": 8807 }, { "epoch": 1.508299967535981, "grad_norm": 1.390625, "learning_rate": 9.825830520468604e-06, "loss": 0.4656, "step": 8808 }, { "epoch": 1.5084731089708905, "grad_norm": 1.34375, "learning_rate": 9.824006943627354e-06, "loss": 0.4658, "step": 8809 }, { "epoch": 1.5086462504058002, "grad_norm": 1.390625, "learning_rate": 9.822183372640426e-06, "loss": 0.462, "step": 8810 }, { "epoch": 1.50881939184071, "grad_norm": 1.4609375, "learning_rate": 9.820359807568486e-06, "loss": 0.4555, "step": 8811 }, { "epoch": 1.5089925332756196, "grad_norm": 2.96875, "learning_rate": 9.818536248472193e-06, "loss": 0.5013, "step": 8812 }, { "epoch": 1.509165674710529, "grad_norm": 1.375, "learning_rate": 9.816712695412201e-06, "loss": 0.445, "step": 8813 }, { "epoch": 1.5093388161454389, "grad_norm": 1.4375, "learning_rate": 9.814889148449176e-06, "loss": 0.5023, "step": 8814 }, { "epoch": 1.5095119575803484, "grad_norm": 1.2890625, "learning_rate": 9.813065607643773e-06, "loss": 0.4545, "step": 8815 }, { "epoch": 1.509685099015258, "grad_norm": 1.3671875, "learning_rate": 9.811242073056651e-06, "loss": 0.4744, "step": 8816 }, { "epoch": 1.5098582404501677, "grad_norm": 1.359375, "learning_rate": 9.809418544748473e-06, "loss": 0.479, "step": 8817 }, { "epoch": 1.5100313818850775, "grad_norm": 1.4921875, "learning_rate": 9.8075950227799e-06, "loss": 0.5166, "step": 8818 }, { "epoch": 1.510204523319987, "grad_norm": 1.4375, "learning_rate": 9.805771507211581e-06, "loss": 0.4645, "step": 8819 }, { "epoch": 1.5103776647548965, "grad_norm": 1.421875, "learning_rate": 9.803947998104182e-06, "loss": 0.4362, "step": 8820 }, { "epoch": 1.5105508061898063, "grad_norm": 1.2890625, "learning_rate": 9.802124495518361e-06, "loss": 0.4477, "step": 8821 }, { "epoch": 1.510723947624716, "grad_norm": 1.7109375, "learning_rate": 9.800300999514773e-06, "loss": 0.6313, "step": 8822 }, { "epoch": 1.5108970890596256, "grad_norm": 1.3984375, "learning_rate": 9.798477510154075e-06, "loss": 0.4766, "step": 8823 }, { "epoch": 1.5110702304945351, "grad_norm": 1.453125, "learning_rate": 9.796654027496929e-06, "loss": 0.4342, "step": 8824 }, { "epoch": 1.511243371929445, "grad_norm": 1.359375, "learning_rate": 9.794830551603988e-06, "loss": 0.5168, "step": 8825 }, { "epoch": 1.5114165133643545, "grad_norm": 1.3671875, "learning_rate": 9.793007082535912e-06, "loss": 0.4457, "step": 8826 }, { "epoch": 1.511589654799264, "grad_norm": 1.3515625, "learning_rate": 9.79118362035335e-06, "loss": 0.4459, "step": 8827 }, { "epoch": 1.5117627962341738, "grad_norm": 1.40625, "learning_rate": 9.78936016511697e-06, "loss": 0.4835, "step": 8828 }, { "epoch": 1.5119359376690835, "grad_norm": 1.4453125, "learning_rate": 9.787536716887423e-06, "loss": 0.4481, "step": 8829 }, { "epoch": 1.512109079103993, "grad_norm": 1.4140625, "learning_rate": 9.785713275725368e-06, "loss": 0.4807, "step": 8830 }, { "epoch": 1.5122822205389026, "grad_norm": 1.4140625, "learning_rate": 9.783889841691455e-06, "loss": 0.4443, "step": 8831 }, { "epoch": 1.5124553619738124, "grad_norm": 1.4609375, "learning_rate": 9.782066414846344e-06, "loss": 0.5272, "step": 8832 }, { "epoch": 1.5126285034087221, "grad_norm": 1.453125, "learning_rate": 9.78024299525069e-06, "loss": 0.4875, "step": 8833 }, { "epoch": 1.5128016448436317, "grad_norm": 1.4609375, "learning_rate": 9.778419582965147e-06, "loss": 0.5084, "step": 8834 }, { "epoch": 1.5129747862785412, "grad_norm": 1.390625, "learning_rate": 9.77659617805037e-06, "loss": 0.4353, "step": 8835 }, { "epoch": 1.513147927713451, "grad_norm": 1.4453125, "learning_rate": 9.774772780567017e-06, "loss": 0.4731, "step": 8836 }, { "epoch": 1.5133210691483607, "grad_norm": 1.390625, "learning_rate": 9.772949390575735e-06, "loss": 0.4464, "step": 8837 }, { "epoch": 1.51349421058327, "grad_norm": 1.4140625, "learning_rate": 9.771126008137184e-06, "loss": 0.4073, "step": 8838 }, { "epoch": 1.5136673520181798, "grad_norm": 1.40625, "learning_rate": 9.769302633312018e-06, "loss": 0.5122, "step": 8839 }, { "epoch": 1.5138404934530896, "grad_norm": 1.5, "learning_rate": 9.767479266160893e-06, "loss": 0.4738, "step": 8840 }, { "epoch": 1.5140136348879991, "grad_norm": 1.453125, "learning_rate": 9.765655906744456e-06, "loss": 0.4645, "step": 8841 }, { "epoch": 1.5141867763229087, "grad_norm": 1.453125, "learning_rate": 9.763832555123364e-06, "loss": 0.4647, "step": 8842 }, { "epoch": 1.5143599177578184, "grad_norm": 1.3671875, "learning_rate": 9.762009211358268e-06, "loss": 0.4732, "step": 8843 }, { "epoch": 1.5145330591927282, "grad_norm": 1.4375, "learning_rate": 9.760185875509823e-06, "loss": 0.5187, "step": 8844 }, { "epoch": 1.5147062006276377, "grad_norm": 1.34375, "learning_rate": 9.75836254763868e-06, "loss": 0.4296, "step": 8845 }, { "epoch": 1.5148793420625473, "grad_norm": 1.3359375, "learning_rate": 9.75653922780549e-06, "loss": 0.4438, "step": 8846 }, { "epoch": 1.515052483497457, "grad_norm": 1.4140625, "learning_rate": 9.754715916070906e-06, "loss": 0.4887, "step": 8847 }, { "epoch": 1.5152256249323668, "grad_norm": 1.3359375, "learning_rate": 9.752892612495578e-06, "loss": 0.428, "step": 8848 }, { "epoch": 1.515398766367276, "grad_norm": 1.46875, "learning_rate": 9.751069317140163e-06, "loss": 0.4837, "step": 8849 }, { "epoch": 1.5155719078021859, "grad_norm": 1.2890625, "learning_rate": 9.749246030065306e-06, "loss": 0.4138, "step": 8850 }, { "epoch": 1.5157450492370956, "grad_norm": 1.3671875, "learning_rate": 9.74742275133166e-06, "loss": 0.4941, "step": 8851 }, { "epoch": 1.5159181906720052, "grad_norm": 1.46875, "learning_rate": 9.745599480999878e-06, "loss": 0.5138, "step": 8852 }, { "epoch": 1.5160913321069147, "grad_norm": 1.359375, "learning_rate": 9.743776219130604e-06, "loss": 0.4687, "step": 8853 }, { "epoch": 1.5162644735418245, "grad_norm": 1.4140625, "learning_rate": 9.741952965784494e-06, "loss": 0.4619, "step": 8854 }, { "epoch": 1.5164376149767342, "grad_norm": 1.4296875, "learning_rate": 9.740129721022195e-06, "loss": 0.4774, "step": 8855 }, { "epoch": 1.5166107564116438, "grad_norm": 1.390625, "learning_rate": 9.738306484904354e-06, "loss": 0.454, "step": 8856 }, { "epoch": 1.5167838978465533, "grad_norm": 1.3125, "learning_rate": 9.736483257491625e-06, "loss": 0.4448, "step": 8857 }, { "epoch": 1.516957039281463, "grad_norm": 1.375, "learning_rate": 9.734660038844654e-06, "loss": 0.4709, "step": 8858 }, { "epoch": 1.5171301807163728, "grad_norm": 1.4375, "learning_rate": 9.732836829024088e-06, "loss": 0.4838, "step": 8859 }, { "epoch": 1.5173033221512822, "grad_norm": 1.390625, "learning_rate": 9.731013628090576e-06, "loss": 0.4947, "step": 8860 }, { "epoch": 1.517476463586192, "grad_norm": 1.3671875, "learning_rate": 9.729190436104772e-06, "loss": 0.437, "step": 8861 }, { "epoch": 1.5176496050211017, "grad_norm": 1.5234375, "learning_rate": 9.727367253127315e-06, "loss": 0.5286, "step": 8862 }, { "epoch": 1.5178227464560112, "grad_norm": 1.3984375, "learning_rate": 9.725544079218857e-06, "loss": 0.4745, "step": 8863 }, { "epoch": 1.5179958878909208, "grad_norm": 1.3671875, "learning_rate": 9.723720914440045e-06, "loss": 0.423, "step": 8864 }, { "epoch": 1.5181690293258305, "grad_norm": 1.546875, "learning_rate": 9.721897758851525e-06, "loss": 0.4941, "step": 8865 }, { "epoch": 1.5183421707607403, "grad_norm": 1.59375, "learning_rate": 9.720074612513942e-06, "loss": 0.4636, "step": 8866 }, { "epoch": 1.5185153121956498, "grad_norm": 1.359375, "learning_rate": 9.718251475487943e-06, "loss": 0.4472, "step": 8867 }, { "epoch": 1.5186884536305594, "grad_norm": 1.40625, "learning_rate": 9.716428347834172e-06, "loss": 0.4387, "step": 8868 }, { "epoch": 1.5188615950654691, "grad_norm": 1.46875, "learning_rate": 9.71460522961328e-06, "loss": 0.4934, "step": 8869 }, { "epoch": 1.519034736500379, "grad_norm": 1.5078125, "learning_rate": 9.712782120885905e-06, "loss": 0.4956, "step": 8870 }, { "epoch": 1.5192078779352884, "grad_norm": 1.3359375, "learning_rate": 9.710959021712695e-06, "loss": 0.4903, "step": 8871 }, { "epoch": 1.519381019370198, "grad_norm": 1.4609375, "learning_rate": 9.709135932154297e-06, "loss": 0.4167, "step": 8872 }, { "epoch": 1.5195541608051077, "grad_norm": 1.3515625, "learning_rate": 9.707312852271354e-06, "loss": 0.4833, "step": 8873 }, { "epoch": 1.5197273022400173, "grad_norm": 1.4296875, "learning_rate": 9.705489782124508e-06, "loss": 0.432, "step": 8874 }, { "epoch": 1.5199004436749268, "grad_norm": 1.53125, "learning_rate": 9.703666721774403e-06, "loss": 0.4424, "step": 8875 }, { "epoch": 1.5200735851098366, "grad_norm": 1.3515625, "learning_rate": 9.701843671281685e-06, "loss": 0.4599, "step": 8876 }, { "epoch": 1.5202467265447464, "grad_norm": 1.453125, "learning_rate": 9.700020630706992e-06, "loss": 0.5104, "step": 8877 }, { "epoch": 1.520419867979656, "grad_norm": 1.3515625, "learning_rate": 9.698197600110973e-06, "loss": 0.5056, "step": 8878 }, { "epoch": 1.5205930094145654, "grad_norm": 1.359375, "learning_rate": 9.696374579554262e-06, "loss": 0.4653, "step": 8879 }, { "epoch": 1.5207661508494752, "grad_norm": 1.3046875, "learning_rate": 9.694551569097506e-06, "loss": 0.4341, "step": 8880 }, { "epoch": 1.520939292284385, "grad_norm": 1.4765625, "learning_rate": 9.692728568801345e-06, "loss": 0.4348, "step": 8881 }, { "epoch": 1.5211124337192945, "grad_norm": 1.515625, "learning_rate": 9.690905578726424e-06, "loss": 0.4759, "step": 8882 }, { "epoch": 1.521285575154204, "grad_norm": 1.4296875, "learning_rate": 9.689082598933381e-06, "loss": 0.493, "step": 8883 }, { "epoch": 1.5214587165891138, "grad_norm": 1.6484375, "learning_rate": 9.687259629482854e-06, "loss": 0.49, "step": 8884 }, { "epoch": 1.5216318580240233, "grad_norm": 1.3984375, "learning_rate": 9.68543667043549e-06, "loss": 0.5698, "step": 8885 }, { "epoch": 1.5218049994589329, "grad_norm": 1.4296875, "learning_rate": 9.68361372185192e-06, "loss": 0.5358, "step": 8886 }, { "epoch": 1.5219781408938426, "grad_norm": 1.484375, "learning_rate": 9.681790783792792e-06, "loss": 0.5545, "step": 8887 }, { "epoch": 1.5221512823287524, "grad_norm": 1.4296875, "learning_rate": 9.679967856318741e-06, "loss": 0.4814, "step": 8888 }, { "epoch": 1.522324423763662, "grad_norm": 1.515625, "learning_rate": 9.678144939490405e-06, "loss": 0.4918, "step": 8889 }, { "epoch": 1.5224975651985715, "grad_norm": 1.390625, "learning_rate": 9.676322033368425e-06, "loss": 0.433, "step": 8890 }, { "epoch": 1.5226707066334813, "grad_norm": 1.359375, "learning_rate": 9.674499138013435e-06, "loss": 0.4928, "step": 8891 }, { "epoch": 1.522843848068391, "grad_norm": 1.3828125, "learning_rate": 9.672676253486076e-06, "loss": 0.4341, "step": 8892 }, { "epoch": 1.5230169895033006, "grad_norm": 1.375, "learning_rate": 9.670853379846984e-06, "loss": 0.4522, "step": 8893 }, { "epoch": 1.52319013093821, "grad_norm": 1.390625, "learning_rate": 9.669030517156802e-06, "loss": 0.4788, "step": 8894 }, { "epoch": 1.5233632723731199, "grad_norm": 1.3828125, "learning_rate": 9.667207665476157e-06, "loss": 0.4874, "step": 8895 }, { "epoch": 1.5235364138080294, "grad_norm": 1.3125, "learning_rate": 9.66538482486569e-06, "loss": 0.4071, "step": 8896 }, { "epoch": 1.523709555242939, "grad_norm": 1.3203125, "learning_rate": 9.66356199538604e-06, "loss": 0.4091, "step": 8897 }, { "epoch": 1.5238826966778487, "grad_norm": 1.4140625, "learning_rate": 9.661739177097836e-06, "loss": 0.4784, "step": 8898 }, { "epoch": 1.5240558381127585, "grad_norm": 1.3828125, "learning_rate": 9.659916370061718e-06, "loss": 0.4258, "step": 8899 }, { "epoch": 1.524228979547668, "grad_norm": 1.4609375, "learning_rate": 9.658093574338319e-06, "loss": 0.4997, "step": 8900 }, { "epoch": 1.5244021209825775, "grad_norm": 1.3125, "learning_rate": 9.656270789988274e-06, "loss": 0.4227, "step": 8901 }, { "epoch": 1.5245752624174873, "grad_norm": 1.4375, "learning_rate": 9.654448017072213e-06, "loss": 0.4482, "step": 8902 }, { "epoch": 1.524748403852397, "grad_norm": 1.53125, "learning_rate": 9.652625255650777e-06, "loss": 0.5062, "step": 8903 }, { "epoch": 1.5249215452873066, "grad_norm": 1.3671875, "learning_rate": 9.650802505784597e-06, "loss": 0.4756, "step": 8904 }, { "epoch": 1.5250946867222162, "grad_norm": 1.46875, "learning_rate": 9.648979767534304e-06, "loss": 0.4772, "step": 8905 }, { "epoch": 1.525267828157126, "grad_norm": 1.53125, "learning_rate": 9.64715704096053e-06, "loss": 0.445, "step": 8906 }, { "epoch": 1.5254409695920355, "grad_norm": 1.4765625, "learning_rate": 9.645334326123909e-06, "loss": 0.5076, "step": 8907 }, { "epoch": 1.525614111026945, "grad_norm": 1.4140625, "learning_rate": 9.643511623085071e-06, "loss": 0.4942, "step": 8908 }, { "epoch": 1.5257872524618548, "grad_norm": 1.4609375, "learning_rate": 9.64168893190465e-06, "loss": 0.4614, "step": 8909 }, { "epoch": 1.5259603938967645, "grad_norm": 1.34375, "learning_rate": 9.639866252643275e-06, "loss": 0.4799, "step": 8910 }, { "epoch": 1.526133535331674, "grad_norm": 1.2734375, "learning_rate": 9.638043585361574e-06, "loss": 0.3708, "step": 8911 }, { "epoch": 1.5263066767665836, "grad_norm": 1.6328125, "learning_rate": 9.636220930120183e-06, "loss": 0.5712, "step": 8912 }, { "epoch": 1.5264798182014934, "grad_norm": 1.390625, "learning_rate": 9.634398286979726e-06, "loss": 0.4058, "step": 8913 }, { "epoch": 1.5266529596364031, "grad_norm": 1.359375, "learning_rate": 9.632575656000837e-06, "loss": 0.48, "step": 8914 }, { "epoch": 1.5268261010713127, "grad_norm": 1.328125, "learning_rate": 9.630753037244141e-06, "loss": 0.45, "step": 8915 }, { "epoch": 1.5269992425062222, "grad_norm": 1.3984375, "learning_rate": 9.628930430770273e-06, "loss": 0.5004, "step": 8916 }, { "epoch": 1.527172383941132, "grad_norm": 1.4765625, "learning_rate": 9.627107836639854e-06, "loss": 0.4597, "step": 8917 }, { "epoch": 1.5273455253760415, "grad_norm": 1.4375, "learning_rate": 9.625285254913515e-06, "loss": 0.4313, "step": 8918 }, { "epoch": 1.527518666810951, "grad_norm": 1.3828125, "learning_rate": 9.623462685651883e-06, "loss": 0.4942, "step": 8919 }, { "epoch": 1.5276918082458608, "grad_norm": 1.4453125, "learning_rate": 9.621640128915584e-06, "loss": 0.4892, "step": 8920 }, { "epoch": 1.5278649496807706, "grad_norm": 1.3203125, "learning_rate": 9.619817584765247e-06, "loss": 0.4699, "step": 8921 }, { "epoch": 1.5280380911156801, "grad_norm": 1.3828125, "learning_rate": 9.617995053261492e-06, "loss": 0.4466, "step": 8922 }, { "epoch": 1.5282112325505897, "grad_norm": 1.3671875, "learning_rate": 9.616172534464953e-06, "loss": 0.4319, "step": 8923 }, { "epoch": 1.5283843739854994, "grad_norm": 1.3515625, "learning_rate": 9.614350028436245e-06, "loss": 0.4496, "step": 8924 }, { "epoch": 1.5285575154204092, "grad_norm": 1.3828125, "learning_rate": 9.612527535236007e-06, "loss": 0.4989, "step": 8925 }, { "epoch": 1.5287306568553187, "grad_norm": 1.3828125, "learning_rate": 9.61070505492485e-06, "loss": 0.4673, "step": 8926 }, { "epoch": 1.5289037982902283, "grad_norm": 1.4453125, "learning_rate": 9.608882587563404e-06, "loss": 0.4703, "step": 8927 }, { "epoch": 1.529076939725138, "grad_norm": 1.546875, "learning_rate": 9.607060133212294e-06, "loss": 0.4868, "step": 8928 }, { "epoch": 1.5292500811600476, "grad_norm": 1.359375, "learning_rate": 9.605237691932138e-06, "loss": 0.4935, "step": 8929 }, { "epoch": 1.529423222594957, "grad_norm": 1.3125, "learning_rate": 9.603415263783564e-06, "loss": 0.4395, "step": 8930 }, { "epoch": 1.5295963640298669, "grad_norm": 1.3203125, "learning_rate": 9.60159284882719e-06, "loss": 0.4242, "step": 8931 }, { "epoch": 1.5297695054647766, "grad_norm": 1.546875, "learning_rate": 9.599770447123638e-06, "loss": 0.4774, "step": 8932 }, { "epoch": 1.5299426468996862, "grad_norm": 1.4765625, "learning_rate": 9.597948058733534e-06, "loss": 0.4598, "step": 8933 }, { "epoch": 1.5301157883345957, "grad_norm": 1.421875, "learning_rate": 9.596125683717492e-06, "loss": 0.4444, "step": 8934 }, { "epoch": 1.5302889297695055, "grad_norm": 1.3828125, "learning_rate": 9.594303322136134e-06, "loss": 0.4623, "step": 8935 }, { "epoch": 1.5304620712044152, "grad_norm": 1.5078125, "learning_rate": 9.592480974050085e-06, "loss": 0.4239, "step": 8936 }, { "epoch": 1.5306352126393248, "grad_norm": 1.390625, "learning_rate": 9.590658639519963e-06, "loss": 0.4721, "step": 8937 }, { "epoch": 1.5308083540742343, "grad_norm": 1.3984375, "learning_rate": 9.588836318606383e-06, "loss": 0.4798, "step": 8938 }, { "epoch": 1.530981495509144, "grad_norm": 1.4140625, "learning_rate": 9.587014011369969e-06, "loss": 0.4902, "step": 8939 }, { "epoch": 1.5311546369440536, "grad_norm": 1.3515625, "learning_rate": 9.585191717871336e-06, "loss": 0.5156, "step": 8940 }, { "epoch": 1.5313277783789632, "grad_norm": 1.40625, "learning_rate": 9.5833694381711e-06, "loss": 0.4763, "step": 8941 }, { "epoch": 1.531500919813873, "grad_norm": 1.7109375, "learning_rate": 9.581547172329883e-06, "loss": 0.4807, "step": 8942 }, { "epoch": 1.5316740612487827, "grad_norm": 1.40625, "learning_rate": 9.579724920408296e-06, "loss": 0.4745, "step": 8943 }, { "epoch": 1.5318472026836922, "grad_norm": 1.5, "learning_rate": 9.57790268246696e-06, "loss": 0.4899, "step": 8944 }, { "epoch": 1.5320203441186018, "grad_norm": 1.4453125, "learning_rate": 9.576080458566492e-06, "loss": 0.4627, "step": 8945 }, { "epoch": 1.5321934855535115, "grad_norm": 1.4609375, "learning_rate": 9.5742582487675e-06, "loss": 0.4836, "step": 8946 }, { "epoch": 1.5323666269884213, "grad_norm": 1.4921875, "learning_rate": 9.572436053130604e-06, "loss": 0.464, "step": 8947 }, { "epoch": 1.5325397684233308, "grad_norm": 1.3671875, "learning_rate": 9.570613871716419e-06, "loss": 0.5007, "step": 8948 }, { "epoch": 1.5327129098582404, "grad_norm": 1.3203125, "learning_rate": 9.56879170458556e-06, "loss": 0.4121, "step": 8949 }, { "epoch": 1.5328860512931501, "grad_norm": 1.4453125, "learning_rate": 9.566969551798638e-06, "loss": 0.4384, "step": 8950 }, { "epoch": 1.5330591927280597, "grad_norm": 1.3828125, "learning_rate": 9.565147413416266e-06, "loss": 0.451, "step": 8951 }, { "epoch": 1.5332323341629692, "grad_norm": 1.4375, "learning_rate": 9.563325289499058e-06, "loss": 0.502, "step": 8952 }, { "epoch": 1.533405475597879, "grad_norm": 1.4140625, "learning_rate": 9.561503180107626e-06, "loss": 0.436, "step": 8953 }, { "epoch": 1.5335786170327887, "grad_norm": 1.5078125, "learning_rate": 9.559681085302583e-06, "loss": 0.4952, "step": 8954 }, { "epoch": 1.5337517584676983, "grad_norm": 1.375, "learning_rate": 9.557859005144534e-06, "loss": 0.4968, "step": 8955 }, { "epoch": 1.5339248999026078, "grad_norm": 1.46875, "learning_rate": 9.556036939694093e-06, "loss": 0.4955, "step": 8956 }, { "epoch": 1.5340980413375176, "grad_norm": 1.4375, "learning_rate": 9.554214889011872e-06, "loss": 0.4452, "step": 8957 }, { "epoch": 1.5342711827724274, "grad_norm": 1.46875, "learning_rate": 9.552392853158481e-06, "loss": 0.5356, "step": 8958 }, { "epoch": 1.534444324207337, "grad_norm": 1.3984375, "learning_rate": 9.550570832194527e-06, "loss": 0.4997, "step": 8959 }, { "epoch": 1.5346174656422464, "grad_norm": 1.5078125, "learning_rate": 9.548748826180618e-06, "loss": 0.5028, "step": 8960 }, { "epoch": 1.5347906070771562, "grad_norm": 1.4140625, "learning_rate": 9.546926835177367e-06, "loss": 0.5231, "step": 8961 }, { "epoch": 1.5349637485120657, "grad_norm": 1.3828125, "learning_rate": 9.545104859245376e-06, "loss": 0.4327, "step": 8962 }, { "epoch": 1.5351368899469753, "grad_norm": 1.4765625, "learning_rate": 9.543282898445253e-06, "loss": 0.4943, "step": 8963 }, { "epoch": 1.535310031381885, "grad_norm": 1.375, "learning_rate": 9.541460952837609e-06, "loss": 0.3991, "step": 8964 }, { "epoch": 1.5354831728167948, "grad_norm": 1.3359375, "learning_rate": 9.539639022483044e-06, "loss": 0.4331, "step": 8965 }, { "epoch": 1.5356563142517043, "grad_norm": 1.453125, "learning_rate": 9.53781710744217e-06, "loss": 0.4982, "step": 8966 }, { "epoch": 1.5358294556866139, "grad_norm": 1.296875, "learning_rate": 9.53599520777558e-06, "loss": 0.4346, "step": 8967 }, { "epoch": 1.5360025971215236, "grad_norm": 1.4921875, "learning_rate": 9.534173323543898e-06, "loss": 0.4972, "step": 8968 }, { "epoch": 1.5361757385564334, "grad_norm": 1.453125, "learning_rate": 9.532351454807712e-06, "loss": 0.4744, "step": 8969 }, { "epoch": 1.536348879991343, "grad_norm": 1.4296875, "learning_rate": 9.530529601627635e-06, "loss": 0.4819, "step": 8970 }, { "epoch": 1.5365220214262525, "grad_norm": 1.3828125, "learning_rate": 9.528707764064264e-06, "loss": 0.4847, "step": 8971 }, { "epoch": 1.5366951628611623, "grad_norm": 1.3828125, "learning_rate": 9.526885942178202e-06, "loss": 0.4694, "step": 8972 }, { "epoch": 1.536868304296072, "grad_norm": 1.4375, "learning_rate": 9.525064136030057e-06, "loss": 0.5258, "step": 8973 }, { "epoch": 1.5370414457309813, "grad_norm": 1.34375, "learning_rate": 9.523242345680423e-06, "loss": 0.4568, "step": 8974 }, { "epoch": 1.537214587165891, "grad_norm": 1.3828125, "learning_rate": 9.521420571189905e-06, "loss": 0.4545, "step": 8975 }, { "epoch": 1.5373877286008009, "grad_norm": 1.3671875, "learning_rate": 9.519598812619106e-06, "loss": 0.4285, "step": 8976 }, { "epoch": 1.5375608700357104, "grad_norm": 1.34375, "learning_rate": 9.51777707002862e-06, "loss": 0.502, "step": 8977 }, { "epoch": 1.53773401147062, "grad_norm": 1.4375, "learning_rate": 9.515955343479045e-06, "loss": 0.4962, "step": 8978 }, { "epoch": 1.5379071529055297, "grad_norm": 1.3515625, "learning_rate": 9.514133633030987e-06, "loss": 0.4818, "step": 8979 }, { "epoch": 1.5380802943404395, "grad_norm": 1.53125, "learning_rate": 9.512311938745045e-06, "loss": 0.4788, "step": 8980 }, { "epoch": 1.538253435775349, "grad_norm": 1.46875, "learning_rate": 9.510490260681809e-06, "loss": 0.4998, "step": 8981 }, { "epoch": 1.5384265772102586, "grad_norm": 1.34375, "learning_rate": 9.508668598901884e-06, "loss": 0.4965, "step": 8982 }, { "epoch": 1.5385997186451683, "grad_norm": 1.40625, "learning_rate": 9.50684695346586e-06, "loss": 0.3996, "step": 8983 }, { "epoch": 1.538772860080078, "grad_norm": 1.4609375, "learning_rate": 9.505025324434337e-06, "loss": 0.5025, "step": 8984 }, { "epoch": 1.5389460015149874, "grad_norm": 1.4453125, "learning_rate": 9.503203711867913e-06, "loss": 0.4371, "step": 8985 }, { "epoch": 1.5391191429498972, "grad_norm": 1.4765625, "learning_rate": 9.501382115827174e-06, "loss": 0.4924, "step": 8986 }, { "epoch": 1.539292284384807, "grad_norm": 1.421875, "learning_rate": 9.499560536372725e-06, "loss": 0.429, "step": 8987 }, { "epoch": 1.5394654258197165, "grad_norm": 1.421875, "learning_rate": 9.497738973565154e-06, "loss": 0.4739, "step": 8988 }, { "epoch": 1.539638567254626, "grad_norm": 1.4453125, "learning_rate": 9.495917427465053e-06, "loss": 0.4869, "step": 8989 }, { "epoch": 1.5398117086895358, "grad_norm": 1.390625, "learning_rate": 9.49409589813302e-06, "loss": 0.4231, "step": 8990 }, { "epoch": 1.5399848501244455, "grad_norm": 1.6640625, "learning_rate": 9.492274385629643e-06, "loss": 0.5035, "step": 8991 }, { "epoch": 1.540157991559355, "grad_norm": 1.4296875, "learning_rate": 9.49045289001552e-06, "loss": 0.4786, "step": 8992 }, { "epoch": 1.5403311329942646, "grad_norm": 1.390625, "learning_rate": 9.488631411351235e-06, "loss": 0.4442, "step": 8993 }, { "epoch": 1.5405042744291744, "grad_norm": 1.3984375, "learning_rate": 9.486809949697382e-06, "loss": 0.4665, "step": 8994 }, { "epoch": 1.5406774158640841, "grad_norm": 1.3125, "learning_rate": 9.484988505114551e-06, "loss": 0.4302, "step": 8995 }, { "epoch": 1.5408505572989935, "grad_norm": 1.3828125, "learning_rate": 9.483167077663332e-06, "loss": 0.4601, "step": 8996 }, { "epoch": 1.5410236987339032, "grad_norm": 1.40625, "learning_rate": 9.481345667404313e-06, "loss": 0.4674, "step": 8997 }, { "epoch": 1.541196840168813, "grad_norm": 1.4609375, "learning_rate": 9.47952427439808e-06, "loss": 0.4868, "step": 8998 }, { "epoch": 1.5413699816037225, "grad_norm": 1.6484375, "learning_rate": 9.477702898705223e-06, "loss": 0.5008, "step": 8999 }, { "epoch": 1.541543123038632, "grad_norm": 1.40625, "learning_rate": 9.475881540386327e-06, "loss": 0.4892, "step": 9000 }, { "epoch": 1.5417162644735418, "grad_norm": 1.40625, "learning_rate": 9.474060199501987e-06, "loss": 0.4514, "step": 9001 }, { "epoch": 1.5418894059084516, "grad_norm": 1.4375, "learning_rate": 9.472238876112779e-06, "loss": 0.4414, "step": 9002 }, { "epoch": 1.5420625473433611, "grad_norm": 1.4453125, "learning_rate": 9.470417570279292e-06, "loss": 0.4947, "step": 9003 }, { "epoch": 1.5422356887782707, "grad_norm": 1.4765625, "learning_rate": 9.468596282062114e-06, "loss": 0.4709, "step": 9004 }, { "epoch": 1.5424088302131804, "grad_norm": 1.375, "learning_rate": 9.466775011521825e-06, "loss": 0.4905, "step": 9005 }, { "epoch": 1.5425819716480902, "grad_norm": 1.3203125, "learning_rate": 9.464953758719012e-06, "loss": 0.4116, "step": 9006 }, { "epoch": 1.5427551130829997, "grad_norm": 1.34375, "learning_rate": 9.463132523714254e-06, "loss": 0.4865, "step": 9007 }, { "epoch": 1.5429282545179093, "grad_norm": 1.3828125, "learning_rate": 9.461311306568136e-06, "loss": 0.4509, "step": 9008 }, { "epoch": 1.543101395952819, "grad_norm": 1.359375, "learning_rate": 9.459490107341242e-06, "loss": 0.4906, "step": 9009 }, { "epoch": 1.5432745373877286, "grad_norm": 1.3203125, "learning_rate": 9.457668926094149e-06, "loss": 0.4727, "step": 9010 }, { "epoch": 1.5434476788226381, "grad_norm": 1.4140625, "learning_rate": 9.455847762887436e-06, "loss": 0.4735, "step": 9011 }, { "epoch": 1.5436208202575479, "grad_norm": 1.3984375, "learning_rate": 9.45402661778169e-06, "loss": 0.4156, "step": 9012 }, { "epoch": 1.5437939616924576, "grad_norm": 1.3515625, "learning_rate": 9.452205490837491e-06, "loss": 0.432, "step": 9013 }, { "epoch": 1.5439671031273672, "grad_norm": 1.515625, "learning_rate": 9.45038438211541e-06, "loss": 0.4732, "step": 9014 }, { "epoch": 1.5441402445622767, "grad_norm": 1.421875, "learning_rate": 9.448563291676031e-06, "loss": 0.5591, "step": 9015 }, { "epoch": 1.5443133859971865, "grad_norm": 1.453125, "learning_rate": 9.446742219579933e-06, "loss": 0.4503, "step": 9016 }, { "epoch": 1.5444865274320962, "grad_norm": 1.28125, "learning_rate": 9.444921165887687e-06, "loss": 0.3986, "step": 9017 }, { "epoch": 1.5446596688670058, "grad_norm": 1.4140625, "learning_rate": 9.443100130659876e-06, "loss": 0.4283, "step": 9018 }, { "epoch": 1.5448328103019153, "grad_norm": 1.4609375, "learning_rate": 9.44127911395707e-06, "loss": 0.4885, "step": 9019 }, { "epoch": 1.545005951736825, "grad_norm": 1.40625, "learning_rate": 9.439458115839847e-06, "loss": 0.4764, "step": 9020 }, { "epoch": 1.5451790931717346, "grad_norm": 1.5, "learning_rate": 9.43763713636878e-06, "loss": 0.4908, "step": 9021 }, { "epoch": 1.5453522346066442, "grad_norm": 1.4296875, "learning_rate": 9.435816175604449e-06, "loss": 0.5555, "step": 9022 }, { "epoch": 1.545525376041554, "grad_norm": 1.4296875, "learning_rate": 9.433995233607419e-06, "loss": 0.5031, "step": 9023 }, { "epoch": 1.5456985174764637, "grad_norm": 1.4375, "learning_rate": 9.43217431043827e-06, "loss": 0.4483, "step": 9024 }, { "epoch": 1.5458716589113732, "grad_norm": 1.34375, "learning_rate": 9.43035340615757e-06, "loss": 0.5514, "step": 9025 }, { "epoch": 1.5460448003462828, "grad_norm": 1.4765625, "learning_rate": 9.42853252082589e-06, "loss": 0.481, "step": 9026 }, { "epoch": 1.5462179417811925, "grad_norm": 1.3515625, "learning_rate": 9.426711654503804e-06, "loss": 0.4276, "step": 9027 }, { "epoch": 1.5463910832161023, "grad_norm": 1.515625, "learning_rate": 9.424890807251882e-06, "loss": 0.4499, "step": 9028 }, { "epoch": 1.5465642246510118, "grad_norm": 1.421875, "learning_rate": 9.42306997913069e-06, "loss": 0.4465, "step": 9029 }, { "epoch": 1.5467373660859214, "grad_norm": 1.4765625, "learning_rate": 9.421249170200801e-06, "loss": 0.4587, "step": 9030 }, { "epoch": 1.5469105075208311, "grad_norm": 1.3515625, "learning_rate": 9.41942838052278e-06, "loss": 0.4304, "step": 9031 }, { "epoch": 1.5470836489557407, "grad_norm": 1.40625, "learning_rate": 9.417607610157194e-06, "loss": 0.4493, "step": 9032 }, { "epoch": 1.5472567903906502, "grad_norm": 1.328125, "learning_rate": 9.415786859164614e-06, "loss": 0.456, "step": 9033 }, { "epoch": 1.54742993182556, "grad_norm": 1.3671875, "learning_rate": 9.413966127605607e-06, "loss": 0.4923, "step": 9034 }, { "epoch": 1.5476030732604698, "grad_norm": 1.453125, "learning_rate": 9.412145415540734e-06, "loss": 0.4533, "step": 9035 }, { "epoch": 1.5477762146953793, "grad_norm": 1.34375, "learning_rate": 9.410324723030564e-06, "loss": 0.4543, "step": 9036 }, { "epoch": 1.5479493561302888, "grad_norm": 1.3046875, "learning_rate": 9.40850405013566e-06, "loss": 0.428, "step": 9037 }, { "epoch": 1.5481224975651986, "grad_norm": 1.359375, "learning_rate": 9.406683396916586e-06, "loss": 0.4681, "step": 9038 }, { "epoch": 1.5482956390001084, "grad_norm": 1.359375, "learning_rate": 9.404862763433904e-06, "loss": 0.4089, "step": 9039 }, { "epoch": 1.548468780435018, "grad_norm": 1.5859375, "learning_rate": 9.403042149748179e-06, "loss": 0.4691, "step": 9040 }, { "epoch": 1.5486419218699274, "grad_norm": 1.328125, "learning_rate": 9.40122155591997e-06, "loss": 0.4399, "step": 9041 }, { "epoch": 1.5488150633048372, "grad_norm": 1.46875, "learning_rate": 9.399400982009841e-06, "loss": 0.4666, "step": 9042 }, { "epoch": 1.5489882047397467, "grad_norm": 1.453125, "learning_rate": 9.397580428078344e-06, "loss": 0.5212, "step": 9043 }, { "epoch": 1.5491613461746563, "grad_norm": 1.53125, "learning_rate": 9.395759894186054e-06, "loss": 0.5082, "step": 9044 }, { "epoch": 1.549334487609566, "grad_norm": 1.6015625, "learning_rate": 9.393939380393518e-06, "loss": 0.4895, "step": 9045 }, { "epoch": 1.5495076290444758, "grad_norm": 1.4453125, "learning_rate": 9.392118886761303e-06, "loss": 0.4928, "step": 9046 }, { "epoch": 1.5496807704793853, "grad_norm": 1.484375, "learning_rate": 9.390298413349959e-06, "loss": 0.5009, "step": 9047 }, { "epoch": 1.549853911914295, "grad_norm": 1.453125, "learning_rate": 9.388477960220048e-06, "loss": 0.4685, "step": 9048 }, { "epoch": 1.5500270533492047, "grad_norm": 1.3203125, "learning_rate": 9.386657527432126e-06, "loss": 0.4712, "step": 9049 }, { "epoch": 1.5502001947841144, "grad_norm": 1.375, "learning_rate": 9.384837115046746e-06, "loss": 0.4198, "step": 9050 }, { "epoch": 1.550373336219024, "grad_norm": 1.4296875, "learning_rate": 9.383016723124465e-06, "loss": 0.5278, "step": 9051 }, { "epoch": 1.5505464776539335, "grad_norm": 1.3046875, "learning_rate": 9.38119635172584e-06, "loss": 0.5215, "step": 9052 }, { "epoch": 1.5507196190888433, "grad_norm": 1.34375, "learning_rate": 9.37937600091142e-06, "loss": 0.4529, "step": 9053 }, { "epoch": 1.5508927605237528, "grad_norm": 1.4140625, "learning_rate": 9.377555670741759e-06, "loss": 0.4318, "step": 9054 }, { "epoch": 1.5510659019586623, "grad_norm": 1.515625, "learning_rate": 9.375735361277412e-06, "loss": 0.5341, "step": 9055 }, { "epoch": 1.551239043393572, "grad_norm": 1.3671875, "learning_rate": 9.373915072578932e-06, "loss": 0.5076, "step": 9056 }, { "epoch": 1.5514121848284819, "grad_norm": 1.40625, "learning_rate": 9.372094804706867e-06, "loss": 0.4933, "step": 9057 }, { "epoch": 1.5515853262633914, "grad_norm": 1.2734375, "learning_rate": 9.37027455772177e-06, "loss": 0.4697, "step": 9058 }, { "epoch": 1.551758467698301, "grad_norm": 1.4375, "learning_rate": 9.368454331684185e-06, "loss": 0.441, "step": 9059 }, { "epoch": 1.5519316091332107, "grad_norm": 1.453125, "learning_rate": 9.366634126654664e-06, "loss": 0.5854, "step": 9060 }, { "epoch": 1.5521047505681205, "grad_norm": 1.4140625, "learning_rate": 9.36481394269376e-06, "loss": 0.5, "step": 9061 }, { "epoch": 1.55227789200303, "grad_norm": 1.390625, "learning_rate": 9.362993779862012e-06, "loss": 0.4241, "step": 9062 }, { "epoch": 1.5524510334379396, "grad_norm": 1.3203125, "learning_rate": 9.361173638219973e-06, "loss": 0.4814, "step": 9063 }, { "epoch": 1.5526241748728493, "grad_norm": 1.5078125, "learning_rate": 9.359353517828189e-06, "loss": 0.5066, "step": 9064 }, { "epoch": 1.5527973163077589, "grad_norm": 1.390625, "learning_rate": 9.3575334187472e-06, "loss": 0.4735, "step": 9065 }, { "epoch": 1.5529704577426684, "grad_norm": 1.421875, "learning_rate": 9.355713341037555e-06, "loss": 0.4594, "step": 9066 }, { "epoch": 1.5531435991775782, "grad_norm": 1.3046875, "learning_rate": 9.3538932847598e-06, "loss": 0.4663, "step": 9067 }, { "epoch": 1.553316740612488, "grad_norm": 1.453125, "learning_rate": 9.352073249974475e-06, "loss": 0.5171, "step": 9068 }, { "epoch": 1.5534898820473975, "grad_norm": 1.328125, "learning_rate": 9.350253236742123e-06, "loss": 0.4378, "step": 9069 }, { "epoch": 1.553663023482307, "grad_norm": 1.3828125, "learning_rate": 9.34843324512329e-06, "loss": 0.4903, "step": 9070 }, { "epoch": 1.5538361649172168, "grad_norm": 1.4375, "learning_rate": 9.34661327517851e-06, "loss": 0.4939, "step": 9071 }, { "epoch": 1.5540093063521265, "grad_norm": 1.4453125, "learning_rate": 9.344793326968326e-06, "loss": 0.492, "step": 9072 }, { "epoch": 1.554182447787036, "grad_norm": 1.359375, "learning_rate": 9.342973400553281e-06, "loss": 0.4599, "step": 9073 }, { "epoch": 1.5543555892219456, "grad_norm": 1.4609375, "learning_rate": 9.34115349599391e-06, "loss": 0.5495, "step": 9074 }, { "epoch": 1.5545287306568554, "grad_norm": 1.484375, "learning_rate": 9.339333613350749e-06, "loss": 0.4417, "step": 9075 }, { "epoch": 1.554701872091765, "grad_norm": 1.2421875, "learning_rate": 9.337513752684343e-06, "loss": 0.4436, "step": 9076 }, { "epoch": 1.5548750135266745, "grad_norm": 1.515625, "learning_rate": 9.335693914055229e-06, "loss": 0.4832, "step": 9077 }, { "epoch": 1.5550481549615842, "grad_norm": 1.53125, "learning_rate": 9.333874097523934e-06, "loss": 0.528, "step": 9078 }, { "epoch": 1.555221296396494, "grad_norm": 1.4296875, "learning_rate": 9.332054303151e-06, "loss": 0.4825, "step": 9079 }, { "epoch": 1.5553944378314035, "grad_norm": 1.53125, "learning_rate": 9.330234530996963e-06, "loss": 0.4455, "step": 9080 }, { "epoch": 1.555567579266313, "grad_norm": 1.3984375, "learning_rate": 9.328414781122351e-06, "loss": 0.4648, "step": 9081 }, { "epoch": 1.5557407207012228, "grad_norm": 1.3515625, "learning_rate": 9.3265950535877e-06, "loss": 0.4543, "step": 9082 }, { "epoch": 1.5559138621361326, "grad_norm": 1.328125, "learning_rate": 9.324775348453543e-06, "loss": 0.4629, "step": 9083 }, { "epoch": 1.5560870035710421, "grad_norm": 1.40625, "learning_rate": 9.322955665780411e-06, "loss": 0.4713, "step": 9084 }, { "epoch": 1.5562601450059517, "grad_norm": 1.515625, "learning_rate": 9.321136005628835e-06, "loss": 0.4446, "step": 9085 }, { "epoch": 1.5564332864408614, "grad_norm": 1.3984375, "learning_rate": 9.31931636805934e-06, "loss": 0.4909, "step": 9086 }, { "epoch": 1.556606427875771, "grad_norm": 1.4296875, "learning_rate": 9.317496753132468e-06, "loss": 0.42, "step": 9087 }, { "epoch": 1.5567795693106805, "grad_norm": 1.3203125, "learning_rate": 9.315677160908737e-06, "loss": 0.4152, "step": 9088 }, { "epoch": 1.5569527107455903, "grad_norm": 1.4609375, "learning_rate": 9.31385759144868e-06, "loss": 0.5228, "step": 9089 }, { "epoch": 1.5571258521805, "grad_norm": 1.3671875, "learning_rate": 9.31203804481282e-06, "loss": 0.4776, "step": 9090 }, { "epoch": 1.5572989936154096, "grad_norm": 1.5859375, "learning_rate": 9.310218521061684e-06, "loss": 0.468, "step": 9091 }, { "epoch": 1.5574721350503191, "grad_norm": 1.46875, "learning_rate": 9.308399020255803e-06, "loss": 0.5343, "step": 9092 }, { "epoch": 1.5576452764852289, "grad_norm": 1.375, "learning_rate": 9.306579542455694e-06, "loss": 0.4524, "step": 9093 }, { "epoch": 1.5578184179201386, "grad_norm": 1.484375, "learning_rate": 9.30476008772189e-06, "loss": 0.4331, "step": 9094 }, { "epoch": 1.5579915593550482, "grad_norm": 1.328125, "learning_rate": 9.302940656114904e-06, "loss": 0.4726, "step": 9095 }, { "epoch": 1.5581647007899577, "grad_norm": 1.359375, "learning_rate": 9.301121247695265e-06, "loss": 0.4172, "step": 9096 }, { "epoch": 1.5583378422248675, "grad_norm": 1.34375, "learning_rate": 9.299301862523491e-06, "loss": 0.4195, "step": 9097 }, { "epoch": 1.558510983659777, "grad_norm": 1.390625, "learning_rate": 9.297482500660109e-06, "loss": 0.4088, "step": 9098 }, { "epoch": 1.5586841250946866, "grad_norm": 1.3984375, "learning_rate": 9.295663162165634e-06, "loss": 0.5205, "step": 9099 }, { "epoch": 1.5588572665295963, "grad_norm": 1.3515625, "learning_rate": 9.293843847100588e-06, "loss": 0.5315, "step": 9100 }, { "epoch": 1.559030407964506, "grad_norm": 1.3828125, "learning_rate": 9.292024555525489e-06, "loss": 0.45, "step": 9101 }, { "epoch": 1.5592035493994156, "grad_norm": 1.5, "learning_rate": 9.290205287500853e-06, "loss": 0.4574, "step": 9102 }, { "epoch": 1.5593766908343252, "grad_norm": 1.4765625, "learning_rate": 9.288386043087198e-06, "loss": 0.4853, "step": 9103 }, { "epoch": 1.559549832269235, "grad_norm": 1.265625, "learning_rate": 9.286566822345044e-06, "loss": 0.4093, "step": 9104 }, { "epoch": 1.5597229737041447, "grad_norm": 1.390625, "learning_rate": 9.284747625334902e-06, "loss": 0.4641, "step": 9105 }, { "epoch": 1.5598961151390542, "grad_norm": 1.3984375, "learning_rate": 9.282928452117289e-06, "loss": 0.4748, "step": 9106 }, { "epoch": 1.5600692565739638, "grad_norm": 1.484375, "learning_rate": 9.281109302752716e-06, "loss": 0.4386, "step": 9107 }, { "epoch": 1.5602423980088735, "grad_norm": 1.40625, "learning_rate": 9.279290177301696e-06, "loss": 0.4504, "step": 9108 }, { "epoch": 1.5604155394437833, "grad_norm": 1.4453125, "learning_rate": 9.277471075824747e-06, "loss": 0.4325, "step": 9109 }, { "epoch": 1.5605886808786926, "grad_norm": 1.4375, "learning_rate": 9.275651998382377e-06, "loss": 0.4951, "step": 9110 }, { "epoch": 1.5607618223136024, "grad_norm": 1.5859375, "learning_rate": 9.273832945035096e-06, "loss": 0.4371, "step": 9111 }, { "epoch": 1.5609349637485121, "grad_norm": 1.3359375, "learning_rate": 9.272013915843415e-06, "loss": 0.4549, "step": 9112 }, { "epoch": 1.5611081051834217, "grad_norm": 1.3515625, "learning_rate": 9.270194910867844e-06, "loss": 0.4543, "step": 9113 }, { "epoch": 1.5612812466183312, "grad_norm": 1.3984375, "learning_rate": 9.26837593016889e-06, "loss": 0.4503, "step": 9114 }, { "epoch": 1.561454388053241, "grad_norm": 1.40625, "learning_rate": 9.266556973807059e-06, "loss": 0.5055, "step": 9115 }, { "epoch": 1.5616275294881508, "grad_norm": 1.390625, "learning_rate": 9.264738041842862e-06, "loss": 0.4442, "step": 9116 }, { "epoch": 1.5618006709230603, "grad_norm": 1.4140625, "learning_rate": 9.2629191343368e-06, "loss": 0.4811, "step": 9117 }, { "epoch": 1.5619738123579698, "grad_norm": 1.4140625, "learning_rate": 9.261100251349384e-06, "loss": 0.4331, "step": 9118 }, { "epoch": 1.5621469537928796, "grad_norm": 1.4296875, "learning_rate": 9.259281392941106e-06, "loss": 0.5161, "step": 9119 }, { "epoch": 1.5623200952277894, "grad_norm": 1.375, "learning_rate": 9.257462559172485e-06, "loss": 0.5123, "step": 9120 }, { "epoch": 1.5624932366626987, "grad_norm": 1.3125, "learning_rate": 9.255643750104017e-06, "loss": 0.4063, "step": 9121 }, { "epoch": 1.5626663780976084, "grad_norm": 1.3203125, "learning_rate": 9.253824965796203e-06, "loss": 0.5254, "step": 9122 }, { "epoch": 1.5628395195325182, "grad_norm": 1.328125, "learning_rate": 9.252006206309543e-06, "loss": 0.4382, "step": 9123 }, { "epoch": 1.5630126609674277, "grad_norm": 1.4140625, "learning_rate": 9.25018747170454e-06, "loss": 0.4448, "step": 9124 }, { "epoch": 1.5631858024023373, "grad_norm": 1.4140625, "learning_rate": 9.248368762041694e-06, "loss": 0.3779, "step": 9125 }, { "epoch": 1.563358943837247, "grad_norm": 1.515625, "learning_rate": 9.246550077381498e-06, "loss": 0.4921, "step": 9126 }, { "epoch": 1.5635320852721568, "grad_norm": 1.34375, "learning_rate": 9.244731417784456e-06, "loss": 0.5993, "step": 9127 }, { "epoch": 1.5637052267070664, "grad_norm": 1.3203125, "learning_rate": 9.242912783311062e-06, "loss": 0.4522, "step": 9128 }, { "epoch": 1.563878368141976, "grad_norm": 1.4609375, "learning_rate": 9.24109417402181e-06, "loss": 0.4736, "step": 9129 }, { "epoch": 1.5640515095768857, "grad_norm": 1.453125, "learning_rate": 9.239275589977197e-06, "loss": 0.4591, "step": 9130 }, { "epoch": 1.5642246510117954, "grad_norm": 1.4140625, "learning_rate": 9.237457031237718e-06, "loss": 0.4076, "step": 9131 }, { "epoch": 1.5643977924467047, "grad_norm": 1.359375, "learning_rate": 9.235638497863868e-06, "loss": 0.4122, "step": 9132 }, { "epoch": 1.5645709338816145, "grad_norm": 1.46875, "learning_rate": 9.233819989916138e-06, "loss": 0.4764, "step": 9133 }, { "epoch": 1.5647440753165243, "grad_norm": 1.34375, "learning_rate": 9.23200150745502e-06, "loss": 0.4573, "step": 9134 }, { "epoch": 1.5649172167514338, "grad_norm": 1.4375, "learning_rate": 9.230183050541001e-06, "loss": 0.4915, "step": 9135 }, { "epoch": 1.5650903581863433, "grad_norm": 1.34375, "learning_rate": 9.228364619234577e-06, "loss": 0.4519, "step": 9136 }, { "epoch": 1.565263499621253, "grad_norm": 1.4375, "learning_rate": 9.226546213596235e-06, "loss": 0.5002, "step": 9137 }, { "epoch": 1.5654366410561629, "grad_norm": 1.3671875, "learning_rate": 9.224727833686463e-06, "loss": 0.4776, "step": 9138 }, { "epoch": 1.5656097824910724, "grad_norm": 1.4296875, "learning_rate": 9.222909479565747e-06, "loss": 0.5143, "step": 9139 }, { "epoch": 1.565782923925982, "grad_norm": 1.4296875, "learning_rate": 9.221091151294576e-06, "loss": 0.4832, "step": 9140 }, { "epoch": 1.5659560653608917, "grad_norm": 1.359375, "learning_rate": 9.219272848933437e-06, "loss": 0.4353, "step": 9141 }, { "epoch": 1.5661292067958015, "grad_norm": 1.5078125, "learning_rate": 9.217454572542812e-06, "loss": 0.4844, "step": 9142 }, { "epoch": 1.566302348230711, "grad_norm": 1.4296875, "learning_rate": 9.215636322183187e-06, "loss": 0.4284, "step": 9143 }, { "epoch": 1.5664754896656206, "grad_norm": 1.484375, "learning_rate": 9.213818097915046e-06, "loss": 0.5311, "step": 9144 }, { "epoch": 1.5666486311005303, "grad_norm": 1.46875, "learning_rate": 9.211999899798866e-06, "loss": 0.4447, "step": 9145 }, { "epoch": 1.5668217725354399, "grad_norm": 1.4453125, "learning_rate": 9.210181727895137e-06, "loss": 0.5006, "step": 9146 }, { "epoch": 1.5669949139703494, "grad_norm": 1.453125, "learning_rate": 9.208363582264333e-06, "loss": 0.5143, "step": 9147 }, { "epoch": 1.5671680554052592, "grad_norm": 1.4375, "learning_rate": 9.206545462966935e-06, "loss": 0.4502, "step": 9148 }, { "epoch": 1.567341196840169, "grad_norm": 1.4140625, "learning_rate": 9.204727370063425e-06, "loss": 0.4713, "step": 9149 }, { "epoch": 1.5675143382750785, "grad_norm": 1.328125, "learning_rate": 9.202909303614275e-06, "loss": 0.451, "step": 9150 }, { "epoch": 1.567687479709988, "grad_norm": 1.515625, "learning_rate": 9.201091263679967e-06, "loss": 0.5775, "step": 9151 }, { "epoch": 1.5678606211448978, "grad_norm": 1.296875, "learning_rate": 9.199273250320975e-06, "loss": 0.4408, "step": 9152 }, { "epoch": 1.5680337625798075, "grad_norm": 1.3046875, "learning_rate": 9.197455263597778e-06, "loss": 0.4797, "step": 9153 }, { "epoch": 1.568206904014717, "grad_norm": 1.40625, "learning_rate": 9.195637303570847e-06, "loss": 0.5199, "step": 9154 }, { "epoch": 1.5683800454496266, "grad_norm": 1.5078125, "learning_rate": 9.193819370300657e-06, "loss": 0.5208, "step": 9155 }, { "epoch": 1.5685531868845364, "grad_norm": 1.2890625, "learning_rate": 9.19200146384768e-06, "loss": 0.4241, "step": 9156 }, { "epoch": 1.568726328319446, "grad_norm": 1.4375, "learning_rate": 9.190183584272386e-06, "loss": 0.497, "step": 9157 }, { "epoch": 1.5688994697543555, "grad_norm": 1.5, "learning_rate": 9.18836573163525e-06, "loss": 0.5331, "step": 9158 }, { "epoch": 1.5690726111892652, "grad_norm": 1.4609375, "learning_rate": 9.186547905996738e-06, "loss": 0.435, "step": 9159 }, { "epoch": 1.569245752624175, "grad_norm": 1.4765625, "learning_rate": 9.18473010741732e-06, "loss": 0.4654, "step": 9160 }, { "epoch": 1.5694188940590845, "grad_norm": 1.40625, "learning_rate": 9.18291233595747e-06, "loss": 0.4978, "step": 9161 }, { "epoch": 1.569592035493994, "grad_norm": 1.390625, "learning_rate": 9.181094591677641e-06, "loss": 0.4876, "step": 9162 }, { "epoch": 1.5697651769289038, "grad_norm": 1.4609375, "learning_rate": 9.179276874638315e-06, "loss": 0.4803, "step": 9163 }, { "epoch": 1.5699383183638136, "grad_norm": 1.3984375, "learning_rate": 9.177459184899951e-06, "loss": 0.4488, "step": 9164 }, { "epoch": 1.5701114597987231, "grad_norm": 1.3515625, "learning_rate": 9.175641522523015e-06, "loss": 0.4747, "step": 9165 }, { "epoch": 1.5702846012336327, "grad_norm": 1.453125, "learning_rate": 9.173823887567966e-06, "loss": 0.4795, "step": 9166 }, { "epoch": 1.5704577426685424, "grad_norm": 1.328125, "learning_rate": 9.172006280095272e-06, "loss": 0.4758, "step": 9167 }, { "epoch": 1.570630884103452, "grad_norm": 1.46875, "learning_rate": 9.170188700165394e-06, "loss": 0.4491, "step": 9168 }, { "epoch": 1.5708040255383615, "grad_norm": 1.4296875, "learning_rate": 9.16837114783879e-06, "loss": 0.4517, "step": 9169 }, { "epoch": 1.5709771669732713, "grad_norm": 1.34375, "learning_rate": 9.166553623175925e-06, "loss": 0.4702, "step": 9170 }, { "epoch": 1.571150308408181, "grad_norm": 1.3671875, "learning_rate": 9.164736126237253e-06, "loss": 0.4451, "step": 9171 }, { "epoch": 1.5713234498430906, "grad_norm": 1.4140625, "learning_rate": 9.162918657083233e-06, "loss": 0.4144, "step": 9172 }, { "epoch": 1.5714965912780001, "grad_norm": 1.484375, "learning_rate": 9.161101215774322e-06, "loss": 0.4577, "step": 9173 }, { "epoch": 1.5716697327129099, "grad_norm": 1.46875, "learning_rate": 9.159283802370981e-06, "loss": 0.4932, "step": 9174 }, { "epoch": 1.5718428741478196, "grad_norm": 1.3359375, "learning_rate": 9.157466416933663e-06, "loss": 0.4463, "step": 9175 }, { "epoch": 1.5720160155827292, "grad_norm": 1.421875, "learning_rate": 9.15564905952282e-06, "loss": 0.494, "step": 9176 }, { "epoch": 1.5721891570176387, "grad_norm": 1.3671875, "learning_rate": 9.15383173019891e-06, "loss": 0.4965, "step": 9177 }, { "epoch": 1.5723622984525485, "grad_norm": 1.4453125, "learning_rate": 9.152014429022377e-06, "loss": 0.4812, "step": 9178 }, { "epoch": 1.572535439887458, "grad_norm": 1.3671875, "learning_rate": 9.150197156053682e-06, "loss": 0.487, "step": 9179 }, { "epoch": 1.5727085813223676, "grad_norm": 1.4921875, "learning_rate": 9.148379911353273e-06, "loss": 0.4983, "step": 9180 }, { "epoch": 1.5728817227572773, "grad_norm": 1.3828125, "learning_rate": 9.146562694981598e-06, "loss": 0.4644, "step": 9181 }, { "epoch": 1.573054864192187, "grad_norm": 1.5234375, "learning_rate": 9.144745506999108e-06, "loss": 0.4727, "step": 9182 }, { "epoch": 1.5732280056270966, "grad_norm": 1.390625, "learning_rate": 9.142928347466247e-06, "loss": 0.4719, "step": 9183 }, { "epoch": 1.5734011470620062, "grad_norm": 1.453125, "learning_rate": 9.141111216443464e-06, "loss": 0.5122, "step": 9184 }, { "epoch": 1.573574288496916, "grad_norm": 1.5546875, "learning_rate": 9.139294113991206e-06, "loss": 0.5571, "step": 9185 }, { "epoch": 1.5737474299318257, "grad_norm": 1.421875, "learning_rate": 9.137477040169921e-06, "loss": 0.4261, "step": 9186 }, { "epoch": 1.5739205713667352, "grad_norm": 1.3984375, "learning_rate": 9.135659995040046e-06, "loss": 0.4273, "step": 9187 }, { "epoch": 1.5740937128016448, "grad_norm": 1.359375, "learning_rate": 9.133842978662029e-06, "loss": 0.4657, "step": 9188 }, { "epoch": 1.5742668542365545, "grad_norm": 1.5, "learning_rate": 9.132025991096313e-06, "loss": 0.4718, "step": 9189 }, { "epoch": 1.574439995671464, "grad_norm": 1.3203125, "learning_rate": 9.130209032403336e-06, "loss": 0.4564, "step": 9190 }, { "epoch": 1.5746131371063736, "grad_norm": 1.34375, "learning_rate": 9.128392102643538e-06, "loss": 0.4403, "step": 9191 }, { "epoch": 1.5747862785412834, "grad_norm": 1.3984375, "learning_rate": 9.126575201877362e-06, "loss": 0.4041, "step": 9192 }, { "epoch": 1.5749594199761932, "grad_norm": 1.4296875, "learning_rate": 9.124758330165243e-06, "loss": 0.4608, "step": 9193 }, { "epoch": 1.5751325614111027, "grad_norm": 1.4296875, "learning_rate": 9.122941487567617e-06, "loss": 0.4591, "step": 9194 }, { "epoch": 1.5753057028460122, "grad_norm": 1.4375, "learning_rate": 9.121124674144924e-06, "loss": 0.4944, "step": 9195 }, { "epoch": 1.575478844280922, "grad_norm": 1.671875, "learning_rate": 9.119307889957602e-06, "loss": 0.4666, "step": 9196 }, { "epoch": 1.5756519857158318, "grad_norm": 1.40625, "learning_rate": 9.117491135066079e-06, "loss": 0.4808, "step": 9197 }, { "epoch": 1.5758251271507413, "grad_norm": 1.484375, "learning_rate": 9.115674409530793e-06, "loss": 0.4457, "step": 9198 }, { "epoch": 1.5759982685856508, "grad_norm": 1.3671875, "learning_rate": 9.113857713412173e-06, "loss": 0.465, "step": 9199 }, { "epoch": 1.5761714100205606, "grad_norm": 1.3125, "learning_rate": 9.112041046770653e-06, "loss": 0.4459, "step": 9200 }, { "epoch": 1.5763445514554701, "grad_norm": 1.5078125, "learning_rate": 9.110224409666665e-06, "loss": 0.3848, "step": 9201 }, { "epoch": 1.5765176928903797, "grad_norm": 1.4453125, "learning_rate": 9.108407802160633e-06, "loss": 0.5382, "step": 9202 }, { "epoch": 1.5766908343252894, "grad_norm": 1.421875, "learning_rate": 9.10659122431299e-06, "loss": 0.4699, "step": 9203 }, { "epoch": 1.5768639757601992, "grad_norm": 2.265625, "learning_rate": 9.104774676184164e-06, "loss": 0.5921, "step": 9204 }, { "epoch": 1.5770371171951088, "grad_norm": 1.453125, "learning_rate": 9.102958157834575e-06, "loss": 0.4819, "step": 9205 }, { "epoch": 1.5772102586300183, "grad_norm": 1.5, "learning_rate": 9.10114166932466e-06, "loss": 0.4922, "step": 9206 }, { "epoch": 1.577383400064928, "grad_norm": 1.421875, "learning_rate": 9.099325210714834e-06, "loss": 0.4687, "step": 9207 }, { "epoch": 1.5775565414998378, "grad_norm": 1.34375, "learning_rate": 9.097508782065526e-06, "loss": 0.4584, "step": 9208 }, { "epoch": 1.5777296829347474, "grad_norm": 1.3828125, "learning_rate": 9.095692383437156e-06, "loss": 0.4795, "step": 9209 }, { "epoch": 1.577902824369657, "grad_norm": 1.421875, "learning_rate": 9.093876014890147e-06, "loss": 0.4229, "step": 9210 }, { "epoch": 1.5780759658045667, "grad_norm": 1.4140625, "learning_rate": 9.092059676484918e-06, "loss": 0.4163, "step": 9211 }, { "epoch": 1.5782491072394762, "grad_norm": 1.4765625, "learning_rate": 9.09024336828189e-06, "loss": 0.5659, "step": 9212 }, { "epoch": 1.5784222486743857, "grad_norm": 1.375, "learning_rate": 9.088427090341483e-06, "loss": 0.4378, "step": 9213 }, { "epoch": 1.5785953901092955, "grad_norm": 1.3671875, "learning_rate": 9.08661084272411e-06, "loss": 0.4513, "step": 9214 }, { "epoch": 1.5787685315442053, "grad_norm": 1.390625, "learning_rate": 9.08479462549019e-06, "loss": 0.4347, "step": 9215 }, { "epoch": 1.5789416729791148, "grad_norm": 1.484375, "learning_rate": 9.082978438700138e-06, "loss": 0.4672, "step": 9216 }, { "epoch": 1.5791148144140243, "grad_norm": 1.46875, "learning_rate": 9.081162282414372e-06, "loss": 0.5417, "step": 9217 }, { "epoch": 1.579287955848934, "grad_norm": 1.4375, "learning_rate": 9.079346156693305e-06, "loss": 0.5025, "step": 9218 }, { "epoch": 1.5794610972838439, "grad_norm": 1.3984375, "learning_rate": 9.077530061597343e-06, "loss": 0.5515, "step": 9219 }, { "epoch": 1.5796342387187534, "grad_norm": 1.3359375, "learning_rate": 9.075713997186907e-06, "loss": 0.4832, "step": 9220 }, { "epoch": 1.579807380153663, "grad_norm": 1.484375, "learning_rate": 9.073897963522402e-06, "loss": 0.5154, "step": 9221 }, { "epoch": 1.5799805215885727, "grad_norm": 1.40625, "learning_rate": 9.072081960664239e-06, "loss": 0.4675, "step": 9222 }, { "epoch": 1.5801536630234823, "grad_norm": 1.2890625, "learning_rate": 9.070265988672822e-06, "loss": 0.4546, "step": 9223 }, { "epoch": 1.5803268044583918, "grad_norm": 1.4140625, "learning_rate": 9.068450047608564e-06, "loss": 0.4977, "step": 9224 }, { "epoch": 1.5804999458933016, "grad_norm": 1.6171875, "learning_rate": 9.06663413753187e-06, "loss": 0.472, "step": 9225 }, { "epoch": 1.5806730873282113, "grad_norm": 1.4140625, "learning_rate": 9.064818258503145e-06, "loss": 0.4235, "step": 9226 }, { "epoch": 1.5808462287631209, "grad_norm": 1.3125, "learning_rate": 9.06300241058279e-06, "loss": 0.52, "step": 9227 }, { "epoch": 1.5810193701980304, "grad_norm": 1.390625, "learning_rate": 9.061186593831215e-06, "loss": 0.4714, "step": 9228 }, { "epoch": 1.5811925116329402, "grad_norm": 1.4609375, "learning_rate": 9.05937080830882e-06, "loss": 0.5048, "step": 9229 }, { "epoch": 1.58136565306785, "grad_norm": 1.3359375, "learning_rate": 9.057555054076003e-06, "loss": 0.4276, "step": 9230 }, { "epoch": 1.5815387945027595, "grad_norm": 1.40625, "learning_rate": 9.055739331193166e-06, "loss": 0.4773, "step": 9231 }, { "epoch": 1.581711935937669, "grad_norm": 1.4375, "learning_rate": 9.053923639720711e-06, "loss": 0.4803, "step": 9232 }, { "epoch": 1.5818850773725788, "grad_norm": 1.4609375, "learning_rate": 9.052107979719032e-06, "loss": 0.4907, "step": 9233 }, { "epoch": 1.5820582188074883, "grad_norm": 1.4921875, "learning_rate": 9.050292351248529e-06, "loss": 0.4324, "step": 9234 }, { "epoch": 1.5822313602423979, "grad_norm": 1.4453125, "learning_rate": 9.048476754369595e-06, "loss": 0.468, "step": 9235 }, { "epoch": 1.5824045016773076, "grad_norm": 1.3046875, "learning_rate": 9.046661189142627e-06, "loss": 0.4247, "step": 9236 }, { "epoch": 1.5825776431122174, "grad_norm": 1.484375, "learning_rate": 9.04484565562802e-06, "loss": 0.5446, "step": 9237 }, { "epoch": 1.582750784547127, "grad_norm": 1.34375, "learning_rate": 9.04303015388616e-06, "loss": 0.4388, "step": 9238 }, { "epoch": 1.5829239259820365, "grad_norm": 1.3671875, "learning_rate": 9.041214683977449e-06, "loss": 0.4657, "step": 9239 }, { "epoch": 1.5830970674169462, "grad_norm": 1.3671875, "learning_rate": 9.039399245962272e-06, "loss": 0.4525, "step": 9240 }, { "epoch": 1.583270208851856, "grad_norm": 1.4609375, "learning_rate": 9.037583839901021e-06, "loss": 0.4958, "step": 9241 }, { "epoch": 1.5834433502867655, "grad_norm": 1.3125, "learning_rate": 9.03576846585408e-06, "loss": 0.4373, "step": 9242 }, { "epoch": 1.583616491721675, "grad_norm": 1.375, "learning_rate": 9.033953123881841e-06, "loss": 0.452, "step": 9243 }, { "epoch": 1.5837896331565848, "grad_norm": 1.3984375, "learning_rate": 9.032137814044692e-06, "loss": 0.4888, "step": 9244 }, { "epoch": 1.5839627745914946, "grad_norm": 1.359375, "learning_rate": 9.030322536403013e-06, "loss": 0.4414, "step": 9245 }, { "epoch": 1.584135916026404, "grad_norm": 1.484375, "learning_rate": 9.028507291017194e-06, "loss": 0.4975, "step": 9246 }, { "epoch": 1.5843090574613137, "grad_norm": 1.3984375, "learning_rate": 9.026692077947612e-06, "loss": 0.4599, "step": 9247 }, { "epoch": 1.5844821988962234, "grad_norm": 1.421875, "learning_rate": 9.024876897254654e-06, "loss": 0.44, "step": 9248 }, { "epoch": 1.584655340331133, "grad_norm": 1.4296875, "learning_rate": 9.023061748998696e-06, "loss": 0.4691, "step": 9249 }, { "epoch": 1.5848284817660425, "grad_norm": 1.5703125, "learning_rate": 9.021246633240124e-06, "loss": 0.5442, "step": 9250 }, { "epoch": 1.5850016232009523, "grad_norm": 1.4921875, "learning_rate": 9.019431550039317e-06, "loss": 0.4914, "step": 9251 }, { "epoch": 1.585174764635862, "grad_norm": 1.421875, "learning_rate": 9.017616499456647e-06, "loss": 0.5285, "step": 9252 }, { "epoch": 1.5853479060707716, "grad_norm": 1.3984375, "learning_rate": 9.015801481552498e-06, "loss": 0.4229, "step": 9253 }, { "epoch": 1.5855210475056811, "grad_norm": 1.390625, "learning_rate": 9.013986496387239e-06, "loss": 0.4886, "step": 9254 }, { "epoch": 1.5856941889405909, "grad_norm": 1.3984375, "learning_rate": 9.01217154402125e-06, "loss": 0.4273, "step": 9255 }, { "epoch": 1.5858673303755007, "grad_norm": 1.3203125, "learning_rate": 9.0103566245149e-06, "loss": 0.4262, "step": 9256 }, { "epoch": 1.58604047181041, "grad_norm": 1.375, "learning_rate": 9.008541737928565e-06, "loss": 0.4872, "step": 9257 }, { "epoch": 1.5862136132453197, "grad_norm": 1.4296875, "learning_rate": 9.006726884322615e-06, "loss": 0.4544, "step": 9258 }, { "epoch": 1.5863867546802295, "grad_norm": 1.3671875, "learning_rate": 9.004912063757414e-06, "loss": 0.459, "step": 9259 }, { "epoch": 1.586559896115139, "grad_norm": 1.5625, "learning_rate": 9.003097276293344e-06, "loss": 0.4681, "step": 9260 }, { "epoch": 1.5867330375500486, "grad_norm": 1.421875, "learning_rate": 9.001282521990764e-06, "loss": 0.4571, "step": 9261 }, { "epoch": 1.5869061789849583, "grad_norm": 1.4609375, "learning_rate": 8.999467800910045e-06, "loss": 0.4441, "step": 9262 }, { "epoch": 1.587079320419868, "grad_norm": 1.3515625, "learning_rate": 8.99765311311155e-06, "loss": 0.4518, "step": 9263 }, { "epoch": 1.5872524618547776, "grad_norm": 1.4296875, "learning_rate": 8.995838458655644e-06, "loss": 0.4803, "step": 9264 }, { "epoch": 1.5874256032896872, "grad_norm": 1.4140625, "learning_rate": 8.994023837602694e-06, "loss": 0.4316, "step": 9265 }, { "epoch": 1.587598744724597, "grad_norm": 1.453125, "learning_rate": 8.992209250013057e-06, "loss": 0.4863, "step": 9266 }, { "epoch": 1.5877718861595067, "grad_norm": 1.3203125, "learning_rate": 8.990394695947097e-06, "loss": 0.4785, "step": 9267 }, { "epoch": 1.587945027594416, "grad_norm": 1.3515625, "learning_rate": 8.988580175465177e-06, "loss": 0.4527, "step": 9268 }, { "epoch": 1.5881181690293258, "grad_norm": 1.5546875, "learning_rate": 8.986765688627652e-06, "loss": 0.5348, "step": 9269 }, { "epoch": 1.5882913104642356, "grad_norm": 1.2734375, "learning_rate": 8.984951235494879e-06, "loss": 0.4295, "step": 9270 }, { "epoch": 1.588464451899145, "grad_norm": 1.4296875, "learning_rate": 8.983136816127221e-06, "loss": 0.4215, "step": 9271 }, { "epoch": 1.5886375933340546, "grad_norm": 1.4453125, "learning_rate": 8.981322430585031e-06, "loss": 0.4894, "step": 9272 }, { "epoch": 1.5888107347689644, "grad_norm": 1.46875, "learning_rate": 8.97950807892866e-06, "loss": 0.484, "step": 9273 }, { "epoch": 1.5889838762038742, "grad_norm": 1.640625, "learning_rate": 8.97769376121847e-06, "loss": 0.5345, "step": 9274 }, { "epoch": 1.5891570176387837, "grad_norm": 1.359375, "learning_rate": 8.975879477514804e-06, "loss": 0.4332, "step": 9275 }, { "epoch": 1.5893301590736932, "grad_norm": 1.578125, "learning_rate": 8.974065227878018e-06, "loss": 0.4666, "step": 9276 }, { "epoch": 1.589503300508603, "grad_norm": 1.3515625, "learning_rate": 8.972251012368463e-06, "loss": 0.4873, "step": 9277 }, { "epoch": 1.5896764419435128, "grad_norm": 1.421875, "learning_rate": 8.970436831046484e-06, "loss": 0.4468, "step": 9278 }, { "epoch": 1.5898495833784223, "grad_norm": 1.4375, "learning_rate": 8.968622683972433e-06, "loss": 0.4425, "step": 9279 }, { "epoch": 1.5900227248133318, "grad_norm": 1.7265625, "learning_rate": 8.966808571206655e-06, "loss": 0.4713, "step": 9280 }, { "epoch": 1.5901958662482416, "grad_norm": 1.40625, "learning_rate": 8.964994492809493e-06, "loss": 0.522, "step": 9281 }, { "epoch": 1.5903690076831511, "grad_norm": 1.484375, "learning_rate": 8.963180448841296e-06, "loss": 0.5313, "step": 9282 }, { "epoch": 1.5905421491180607, "grad_norm": 1.4765625, "learning_rate": 8.961366439362404e-06, "loss": 0.504, "step": 9283 }, { "epoch": 1.5907152905529705, "grad_norm": 1.390625, "learning_rate": 8.959552464433166e-06, "loss": 0.5098, "step": 9284 }, { "epoch": 1.5908884319878802, "grad_norm": 1.359375, "learning_rate": 8.957738524113913e-06, "loss": 0.422, "step": 9285 }, { "epoch": 1.5910615734227898, "grad_norm": 1.640625, "learning_rate": 8.955924618464992e-06, "loss": 0.4996, "step": 9286 }, { "epoch": 1.5912347148576993, "grad_norm": 1.390625, "learning_rate": 8.954110747546737e-06, "loss": 0.4442, "step": 9287 }, { "epoch": 1.591407856292609, "grad_norm": 1.3828125, "learning_rate": 8.952296911419487e-06, "loss": 0.5361, "step": 9288 }, { "epoch": 1.5915809977275188, "grad_norm": 1.5703125, "learning_rate": 8.950483110143582e-06, "loss": 0.5154, "step": 9289 }, { "epoch": 1.5917541391624284, "grad_norm": 1.375, "learning_rate": 8.948669343779353e-06, "loss": 0.4522, "step": 9290 }, { "epoch": 1.591927280597338, "grad_norm": 1.3671875, "learning_rate": 8.946855612387134e-06, "loss": 0.4315, "step": 9291 }, { "epoch": 1.5921004220322477, "grad_norm": 1.4765625, "learning_rate": 8.945041916027259e-06, "loss": 0.4729, "step": 9292 }, { "epoch": 1.5922735634671572, "grad_norm": 1.3046875, "learning_rate": 8.943228254760063e-06, "loss": 0.4765, "step": 9293 }, { "epoch": 1.5924467049020667, "grad_norm": 1.421875, "learning_rate": 8.941414628645872e-06, "loss": 0.4532, "step": 9294 }, { "epoch": 1.5926198463369765, "grad_norm": 1.40625, "learning_rate": 8.939601037745016e-06, "loss": 0.4447, "step": 9295 }, { "epoch": 1.5927929877718863, "grad_norm": 1.40625, "learning_rate": 8.937787482117827e-06, "loss": 0.5338, "step": 9296 }, { "epoch": 1.5929661292067958, "grad_norm": 1.421875, "learning_rate": 8.935973961824628e-06, "loss": 0.4716, "step": 9297 }, { "epoch": 1.5931392706417054, "grad_norm": 1.328125, "learning_rate": 8.934160476925747e-06, "loss": 0.4518, "step": 9298 }, { "epoch": 1.5933124120766151, "grad_norm": 1.40625, "learning_rate": 8.932347027481507e-06, "loss": 0.4528, "step": 9299 }, { "epoch": 1.5934855535115249, "grad_norm": 1.3984375, "learning_rate": 8.930533613552231e-06, "loss": 0.4772, "step": 9300 }, { "epoch": 1.5936586949464344, "grad_norm": 1.46875, "learning_rate": 8.928720235198247e-06, "loss": 0.4858, "step": 9301 }, { "epoch": 1.593831836381344, "grad_norm": 1.375, "learning_rate": 8.926906892479869e-06, "loss": 0.446, "step": 9302 }, { "epoch": 1.5940049778162537, "grad_norm": 1.4453125, "learning_rate": 8.925093585457416e-06, "loss": 0.5572, "step": 9303 }, { "epoch": 1.5941781192511633, "grad_norm": 1.5078125, "learning_rate": 8.923280314191215e-06, "loss": 0.4752, "step": 9304 }, { "epoch": 1.5943512606860728, "grad_norm": 1.4375, "learning_rate": 8.921467078741581e-06, "loss": 0.4465, "step": 9305 }, { "epoch": 1.5945244021209826, "grad_norm": 1.5390625, "learning_rate": 8.919653879168825e-06, "loss": 0.4318, "step": 9306 }, { "epoch": 1.5946975435558923, "grad_norm": 1.5625, "learning_rate": 8.91784071553327e-06, "loss": 0.4963, "step": 9307 }, { "epoch": 1.5948706849908019, "grad_norm": 1.4296875, "learning_rate": 8.916027587895224e-06, "loss": 0.4809, "step": 9308 }, { "epoch": 1.5950438264257114, "grad_norm": 1.4296875, "learning_rate": 8.914214496315002e-06, "loss": 0.4526, "step": 9309 }, { "epoch": 1.5952169678606212, "grad_norm": 1.34375, "learning_rate": 8.912401440852916e-06, "loss": 0.4643, "step": 9310 }, { "epoch": 1.595390109295531, "grad_norm": 1.4765625, "learning_rate": 8.910588421569275e-06, "loss": 0.4523, "step": 9311 }, { "epoch": 1.5955632507304405, "grad_norm": 1.3125, "learning_rate": 8.908775438524388e-06, "loss": 0.3974, "step": 9312 }, { "epoch": 1.59573639216535, "grad_norm": 1.359375, "learning_rate": 8.906962491778563e-06, "loss": 0.5235, "step": 9313 }, { "epoch": 1.5959095336002598, "grad_norm": 1.5078125, "learning_rate": 8.90514958139211e-06, "loss": 0.5259, "step": 9314 }, { "epoch": 1.5960826750351693, "grad_norm": 1.421875, "learning_rate": 8.903336707425334e-06, "loss": 0.4341, "step": 9315 }, { "epoch": 1.5962558164700789, "grad_norm": 1.390625, "learning_rate": 8.901523869938535e-06, "loss": 0.4342, "step": 9316 }, { "epoch": 1.5964289579049886, "grad_norm": 1.515625, "learning_rate": 8.899711068992023e-06, "loss": 0.4728, "step": 9317 }, { "epoch": 1.5966020993398984, "grad_norm": 1.4375, "learning_rate": 8.897898304646094e-06, "loss": 0.6344, "step": 9318 }, { "epoch": 1.596775240774808, "grad_norm": 1.3671875, "learning_rate": 8.896085576961049e-06, "loss": 0.4843, "step": 9319 }, { "epoch": 1.5969483822097175, "grad_norm": 1.4453125, "learning_rate": 8.894272885997192e-06, "loss": 0.4973, "step": 9320 }, { "epoch": 1.5971215236446272, "grad_norm": 1.3984375, "learning_rate": 8.892460231814815e-06, "loss": 0.4918, "step": 9321 }, { "epoch": 1.597294665079537, "grad_norm": 1.3828125, "learning_rate": 8.890647614474223e-06, "loss": 0.4733, "step": 9322 }, { "epoch": 1.5974678065144465, "grad_norm": 1.359375, "learning_rate": 8.888835034035706e-06, "loss": 0.4634, "step": 9323 }, { "epoch": 1.597640947949356, "grad_norm": 1.546875, "learning_rate": 8.887022490559557e-06, "loss": 0.4637, "step": 9324 }, { "epoch": 1.5978140893842658, "grad_norm": 1.453125, "learning_rate": 8.885209984106072e-06, "loss": 0.4464, "step": 9325 }, { "epoch": 1.5979872308191754, "grad_norm": 1.3046875, "learning_rate": 8.883397514735546e-06, "loss": 0.4328, "step": 9326 }, { "epoch": 1.598160372254085, "grad_norm": 1.4921875, "learning_rate": 8.88158508250827e-06, "loss": 0.4342, "step": 9327 }, { "epoch": 1.5983335136889947, "grad_norm": 1.453125, "learning_rate": 8.87977268748453e-06, "loss": 0.5341, "step": 9328 }, { "epoch": 1.5985066551239044, "grad_norm": 1.40625, "learning_rate": 8.877960329724615e-06, "loss": 0.4459, "step": 9329 }, { "epoch": 1.598679796558814, "grad_norm": 1.4140625, "learning_rate": 8.876148009288813e-06, "loss": 0.4723, "step": 9330 }, { "epoch": 1.5988529379937235, "grad_norm": 1.4609375, "learning_rate": 8.874335726237409e-06, "loss": 0.5101, "step": 9331 }, { "epoch": 1.5990260794286333, "grad_norm": 1.4765625, "learning_rate": 8.872523480630692e-06, "loss": 0.4586, "step": 9332 }, { "epoch": 1.599199220863543, "grad_norm": 1.46875, "learning_rate": 8.870711272528937e-06, "loss": 0.4541, "step": 9333 }, { "epoch": 1.5993723622984526, "grad_norm": 1.265625, "learning_rate": 8.868899101992438e-06, "loss": 0.475, "step": 9334 }, { "epoch": 1.5995455037333621, "grad_norm": 1.59375, "learning_rate": 8.867086969081459e-06, "loss": 0.4976, "step": 9335 }, { "epoch": 1.599718645168272, "grad_norm": 1.5078125, "learning_rate": 8.865274873856299e-06, "loss": 0.4358, "step": 9336 }, { "epoch": 1.5998917866031814, "grad_norm": 1.359375, "learning_rate": 8.863462816377224e-06, "loss": 0.4983, "step": 9337 }, { "epoch": 1.600064928038091, "grad_norm": 1.453125, "learning_rate": 8.861650796704515e-06, "loss": 0.4978, "step": 9338 }, { "epoch": 1.6002380694730007, "grad_norm": 1.4140625, "learning_rate": 8.85983881489845e-06, "loss": 0.4978, "step": 9339 }, { "epoch": 1.6004112109079105, "grad_norm": 1.390625, "learning_rate": 8.858026871019299e-06, "loss": 0.4427, "step": 9340 }, { "epoch": 1.60058435234282, "grad_norm": 1.421875, "learning_rate": 8.856214965127341e-06, "loss": 0.4227, "step": 9341 }, { "epoch": 1.6007574937777296, "grad_norm": 1.3828125, "learning_rate": 8.854403097282843e-06, "loss": 0.4818, "step": 9342 }, { "epoch": 1.6009306352126393, "grad_norm": 1.5625, "learning_rate": 8.852591267546077e-06, "loss": 0.5791, "step": 9343 }, { "epoch": 1.601103776647549, "grad_norm": 1.484375, "learning_rate": 8.850779475977316e-06, "loss": 0.4395, "step": 9344 }, { "epoch": 1.6012769180824586, "grad_norm": 1.40625, "learning_rate": 8.848967722636826e-06, "loss": 0.4901, "step": 9345 }, { "epoch": 1.6014500595173682, "grad_norm": 1.3984375, "learning_rate": 8.84715600758487e-06, "loss": 0.4474, "step": 9346 }, { "epoch": 1.601623200952278, "grad_norm": 1.6015625, "learning_rate": 8.84534433088172e-06, "loss": 0.5493, "step": 9347 }, { "epoch": 1.6017963423871875, "grad_norm": 1.3359375, "learning_rate": 8.843532692587642e-06, "loss": 0.4608, "step": 9348 }, { "epoch": 1.601969483822097, "grad_norm": 1.3984375, "learning_rate": 8.841721092762892e-06, "loss": 0.4652, "step": 9349 }, { "epoch": 1.6021426252570068, "grad_norm": 1.375, "learning_rate": 8.839909531467737e-06, "loss": 0.4502, "step": 9350 }, { "epoch": 1.6023157666919166, "grad_norm": 1.578125, "learning_rate": 8.838098008762436e-06, "loss": 0.5076, "step": 9351 }, { "epoch": 1.602488908126826, "grad_norm": 1.4453125, "learning_rate": 8.83628652470725e-06, "loss": 0.509, "step": 9352 }, { "epoch": 1.6026620495617356, "grad_norm": 1.3203125, "learning_rate": 8.834475079362437e-06, "loss": 0.454, "step": 9353 }, { "epoch": 1.6028351909966454, "grad_norm": 1.3515625, "learning_rate": 8.832663672788251e-06, "loss": 0.4173, "step": 9354 }, { "epoch": 1.6030083324315552, "grad_norm": 1.4375, "learning_rate": 8.830852305044951e-06, "loss": 0.4829, "step": 9355 }, { "epoch": 1.6031814738664647, "grad_norm": 1.3125, "learning_rate": 8.829040976192789e-06, "loss": 0.4879, "step": 9356 }, { "epoch": 1.6033546153013742, "grad_norm": 1.484375, "learning_rate": 8.827229686292017e-06, "loss": 0.4996, "step": 9357 }, { "epoch": 1.603527756736284, "grad_norm": 1.4609375, "learning_rate": 8.82541843540289e-06, "loss": 0.5187, "step": 9358 }, { "epoch": 1.6037008981711935, "grad_norm": 1.4375, "learning_rate": 8.823607223585657e-06, "loss": 0.4483, "step": 9359 }, { "epoch": 1.603874039606103, "grad_norm": 1.4765625, "learning_rate": 8.821796050900568e-06, "loss": 0.491, "step": 9360 }, { "epoch": 1.6040471810410128, "grad_norm": 1.3984375, "learning_rate": 8.81998491740787e-06, "loss": 0.4861, "step": 9361 }, { "epoch": 1.6042203224759226, "grad_norm": 1.359375, "learning_rate": 8.81817382316781e-06, "loss": 0.4442, "step": 9362 }, { "epoch": 1.6043934639108322, "grad_norm": 1.453125, "learning_rate": 8.81636276824063e-06, "loss": 0.4868, "step": 9363 }, { "epoch": 1.6045666053457417, "grad_norm": 1.453125, "learning_rate": 8.814551752686577e-06, "loss": 0.4392, "step": 9364 }, { "epoch": 1.6047397467806515, "grad_norm": 1.3984375, "learning_rate": 8.812740776565895e-06, "loss": 0.4375, "step": 9365 }, { "epoch": 1.6049128882155612, "grad_norm": 1.359375, "learning_rate": 8.810929839938821e-06, "loss": 0.5176, "step": 9366 }, { "epoch": 1.6050860296504708, "grad_norm": 1.3046875, "learning_rate": 8.809118942865597e-06, "loss": 0.4421, "step": 9367 }, { "epoch": 1.6052591710853803, "grad_norm": 1.3828125, "learning_rate": 8.807308085406459e-06, "loss": 0.4518, "step": 9368 }, { "epoch": 1.60543231252029, "grad_norm": 1.390625, "learning_rate": 8.805497267621653e-06, "loss": 0.4673, "step": 9369 }, { "epoch": 1.6056054539551996, "grad_norm": 1.421875, "learning_rate": 8.803686489571406e-06, "loss": 0.4987, "step": 9370 }, { "epoch": 1.6057785953901091, "grad_norm": 1.3359375, "learning_rate": 8.801875751315955e-06, "loss": 0.5121, "step": 9371 }, { "epoch": 1.605951736825019, "grad_norm": 1.359375, "learning_rate": 8.800065052915536e-06, "loss": 0.4119, "step": 9372 }, { "epoch": 1.6061248782599287, "grad_norm": 1.3359375, "learning_rate": 8.79825439443038e-06, "loss": 0.4589, "step": 9373 }, { "epoch": 1.6062980196948382, "grad_norm": 1.3515625, "learning_rate": 8.796443775920715e-06, "loss": 0.4341, "step": 9374 }, { "epoch": 1.6064711611297477, "grad_norm": 1.2890625, "learning_rate": 8.79463319744677e-06, "loss": 0.4561, "step": 9375 }, { "epoch": 1.6066443025646575, "grad_norm": 1.40625, "learning_rate": 8.792822659068777e-06, "loss": 0.4363, "step": 9376 }, { "epoch": 1.6068174439995673, "grad_norm": 1.359375, "learning_rate": 8.791012160846965e-06, "loss": 0.4334, "step": 9377 }, { "epoch": 1.6069905854344768, "grad_norm": 1.3828125, "learning_rate": 8.789201702841546e-06, "loss": 0.5148, "step": 9378 }, { "epoch": 1.6071637268693864, "grad_norm": 1.515625, "learning_rate": 8.787391285112759e-06, "loss": 0.4852, "step": 9379 }, { "epoch": 1.6073368683042961, "grad_norm": 1.6015625, "learning_rate": 8.785580907720821e-06, "loss": 0.4665, "step": 9380 }, { "epoch": 1.6075100097392057, "grad_norm": 1.5859375, "learning_rate": 8.783770570725953e-06, "loss": 0.4863, "step": 9381 }, { "epoch": 1.6076831511741152, "grad_norm": 1.4453125, "learning_rate": 8.781960274188376e-06, "loss": 0.5384, "step": 9382 }, { "epoch": 1.607856292609025, "grad_norm": 1.5234375, "learning_rate": 8.780150018168305e-06, "loss": 0.4949, "step": 9383 }, { "epoch": 1.6080294340439347, "grad_norm": 1.4453125, "learning_rate": 8.778339802725964e-06, "loss": 0.5042, "step": 9384 }, { "epoch": 1.6082025754788443, "grad_norm": 1.3984375, "learning_rate": 8.776529627921562e-06, "loss": 0.4884, "step": 9385 }, { "epoch": 1.6083757169137538, "grad_norm": 1.3671875, "learning_rate": 8.77471949381532e-06, "loss": 0.4057, "step": 9386 }, { "epoch": 1.6085488583486636, "grad_norm": 1.390625, "learning_rate": 8.772909400467445e-06, "loss": 0.4267, "step": 9387 }, { "epoch": 1.6087219997835733, "grad_norm": 1.6484375, "learning_rate": 8.771099347938152e-06, "loss": 0.4743, "step": 9388 }, { "epoch": 1.6088951412184829, "grad_norm": 1.4609375, "learning_rate": 8.76928933628765e-06, "loss": 0.4881, "step": 9389 }, { "epoch": 1.6090682826533924, "grad_norm": 1.5078125, "learning_rate": 8.76747936557615e-06, "loss": 0.476, "step": 9390 }, { "epoch": 1.6092414240883022, "grad_norm": 1.3046875, "learning_rate": 8.765669435863863e-06, "loss": 0.4718, "step": 9391 }, { "epoch": 1.609414565523212, "grad_norm": 1.4453125, "learning_rate": 8.763859547210989e-06, "loss": 0.4967, "step": 9392 }, { "epoch": 1.6095877069581213, "grad_norm": 1.4765625, "learning_rate": 8.762049699677738e-06, "loss": 0.458, "step": 9393 }, { "epoch": 1.609760848393031, "grad_norm": 1.3828125, "learning_rate": 8.760239893324309e-06, "loss": 0.5027, "step": 9394 }, { "epoch": 1.6099339898279408, "grad_norm": 1.453125, "learning_rate": 8.758430128210908e-06, "loss": 0.4723, "step": 9395 }, { "epoch": 1.6101071312628503, "grad_norm": 1.4296875, "learning_rate": 8.756620404397737e-06, "loss": 0.5636, "step": 9396 }, { "epoch": 1.6102802726977599, "grad_norm": 1.3203125, "learning_rate": 8.75481072194499e-06, "loss": 0.5102, "step": 9397 }, { "epoch": 1.6104534141326696, "grad_norm": 1.5234375, "learning_rate": 8.753001080912873e-06, "loss": 0.4885, "step": 9398 }, { "epoch": 1.6106265555675794, "grad_norm": 1.4453125, "learning_rate": 8.751191481361576e-06, "loss": 0.5006, "step": 9399 }, { "epoch": 1.610799697002489, "grad_norm": 1.4921875, "learning_rate": 8.749381923351293e-06, "loss": 0.4967, "step": 9400 }, { "epoch": 1.6109728384373985, "grad_norm": 1.4140625, "learning_rate": 8.747572406942227e-06, "loss": 0.5132, "step": 9401 }, { "epoch": 1.6111459798723082, "grad_norm": 1.484375, "learning_rate": 8.745762932194564e-06, "loss": 0.4904, "step": 9402 }, { "epoch": 1.611319121307218, "grad_norm": 1.3828125, "learning_rate": 8.743953499168499e-06, "loss": 0.4951, "step": 9403 }, { "epoch": 1.6114922627421273, "grad_norm": 1.5078125, "learning_rate": 8.74214410792422e-06, "loss": 0.4237, "step": 9404 }, { "epoch": 1.611665404177037, "grad_norm": 1.46875, "learning_rate": 8.740334758521916e-06, "loss": 0.444, "step": 9405 }, { "epoch": 1.6118385456119468, "grad_norm": 1.6171875, "learning_rate": 8.73852545102177e-06, "loss": 0.5481, "step": 9406 }, { "epoch": 1.6120116870468564, "grad_norm": 1.453125, "learning_rate": 8.736716185483974e-06, "loss": 0.4478, "step": 9407 }, { "epoch": 1.612184828481766, "grad_norm": 1.4609375, "learning_rate": 8.734906961968713e-06, "loss": 0.4814, "step": 9408 }, { "epoch": 1.6123579699166757, "grad_norm": 1.5703125, "learning_rate": 8.733097780536161e-06, "loss": 0.4579, "step": 9409 }, { "epoch": 1.6125311113515854, "grad_norm": 1.421875, "learning_rate": 8.73128864124651e-06, "loss": 0.4696, "step": 9410 }, { "epoch": 1.612704252786495, "grad_norm": 1.34375, "learning_rate": 8.72947954415993e-06, "loss": 0.4225, "step": 9411 }, { "epoch": 1.6128773942214045, "grad_norm": 1.34375, "learning_rate": 8.72767048933661e-06, "loss": 0.3868, "step": 9412 }, { "epoch": 1.6130505356563143, "grad_norm": 1.421875, "learning_rate": 8.72586147683672e-06, "loss": 0.482, "step": 9413 }, { "epoch": 1.613223677091224, "grad_norm": 1.3984375, "learning_rate": 8.724052506720439e-06, "loss": 0.4608, "step": 9414 }, { "epoch": 1.6133968185261334, "grad_norm": 1.3515625, "learning_rate": 8.722243579047944e-06, "loss": 0.4201, "step": 9415 }, { "epoch": 1.6135699599610431, "grad_norm": 1.3671875, "learning_rate": 8.720434693879402e-06, "loss": 0.5106, "step": 9416 }, { "epoch": 1.613743101395953, "grad_norm": 1.5234375, "learning_rate": 8.718625851274991e-06, "loss": 0.4839, "step": 9417 }, { "epoch": 1.6139162428308624, "grad_norm": 1.4765625, "learning_rate": 8.716817051294874e-06, "loss": 0.493, "step": 9418 }, { "epoch": 1.614089384265772, "grad_norm": 1.3125, "learning_rate": 8.715008293999226e-06, "loss": 0.4431, "step": 9419 }, { "epoch": 1.6142625257006817, "grad_norm": 1.4140625, "learning_rate": 8.713199579448214e-06, "loss": 0.4755, "step": 9420 }, { "epoch": 1.6144356671355915, "grad_norm": 1.4921875, "learning_rate": 8.711390907702001e-06, "loss": 0.4704, "step": 9421 }, { "epoch": 1.614608808570501, "grad_norm": 1.390625, "learning_rate": 8.70958227882075e-06, "loss": 0.4441, "step": 9422 }, { "epoch": 1.6147819500054106, "grad_norm": 1.359375, "learning_rate": 8.70777369286463e-06, "loss": 0.4571, "step": 9423 }, { "epoch": 1.6149550914403203, "grad_norm": 1.328125, "learning_rate": 8.705965149893802e-06, "loss": 0.4325, "step": 9424 }, { "epoch": 1.61512823287523, "grad_norm": 1.3828125, "learning_rate": 8.704156649968423e-06, "loss": 0.4426, "step": 9425 }, { "epoch": 1.6153013743101396, "grad_norm": 1.3984375, "learning_rate": 8.702348193148655e-06, "loss": 0.4713, "step": 9426 }, { "epoch": 1.6154745157450492, "grad_norm": 1.421875, "learning_rate": 8.70053977949465e-06, "loss": 0.4597, "step": 9427 }, { "epoch": 1.615647657179959, "grad_norm": 1.3515625, "learning_rate": 8.69873140906657e-06, "loss": 0.4333, "step": 9428 }, { "epoch": 1.6158207986148685, "grad_norm": 1.4375, "learning_rate": 8.696923081924567e-06, "loss": 0.435, "step": 9429 }, { "epoch": 1.615993940049778, "grad_norm": 1.421875, "learning_rate": 8.695114798128794e-06, "loss": 0.4463, "step": 9430 }, { "epoch": 1.6161670814846878, "grad_norm": 1.3984375, "learning_rate": 8.693306557739403e-06, "loss": 0.4103, "step": 9431 }, { "epoch": 1.6163402229195976, "grad_norm": 1.3515625, "learning_rate": 8.691498360816545e-06, "loss": 0.4579, "step": 9432 }, { "epoch": 1.616513364354507, "grad_norm": 1.453125, "learning_rate": 8.689690207420364e-06, "loss": 0.4528, "step": 9433 }, { "epoch": 1.6166865057894166, "grad_norm": 1.375, "learning_rate": 8.687882097611016e-06, "loss": 0.4513, "step": 9434 }, { "epoch": 1.6168596472243264, "grad_norm": 1.3671875, "learning_rate": 8.68607403144864e-06, "loss": 0.4571, "step": 9435 }, { "epoch": 1.6170327886592362, "grad_norm": 1.4375, "learning_rate": 8.684266008993385e-06, "loss": 0.4807, "step": 9436 }, { "epoch": 1.6172059300941457, "grad_norm": 1.46875, "learning_rate": 8.682458030305393e-06, "loss": 0.5348, "step": 9437 }, { "epoch": 1.6173790715290552, "grad_norm": 1.421875, "learning_rate": 8.680650095444802e-06, "loss": 0.4923, "step": 9438 }, { "epoch": 1.617552212963965, "grad_norm": 1.5234375, "learning_rate": 8.678842204471755e-06, "loss": 0.5172, "step": 9439 }, { "epoch": 1.6177253543988745, "grad_norm": 1.4921875, "learning_rate": 8.67703435744639e-06, "loss": 0.4735, "step": 9440 }, { "epoch": 1.617898495833784, "grad_norm": 1.40625, "learning_rate": 8.675226554428847e-06, "loss": 0.4441, "step": 9441 }, { "epoch": 1.6180716372686939, "grad_norm": 1.4140625, "learning_rate": 8.673418795479256e-06, "loss": 0.4178, "step": 9442 }, { "epoch": 1.6182447787036036, "grad_norm": 1.3515625, "learning_rate": 8.671611080657752e-06, "loss": 0.4075, "step": 9443 }, { "epoch": 1.6184179201385132, "grad_norm": 1.34375, "learning_rate": 8.669803410024471e-06, "loss": 0.4523, "step": 9444 }, { "epoch": 1.6185910615734227, "grad_norm": 1.3828125, "learning_rate": 8.667995783639548e-06, "loss": 0.4432, "step": 9445 }, { "epoch": 1.6187642030083325, "grad_norm": 1.53125, "learning_rate": 8.666188201563103e-06, "loss": 0.509, "step": 9446 }, { "epoch": 1.6189373444432422, "grad_norm": 1.359375, "learning_rate": 8.664380663855272e-06, "loss": 0.4599, "step": 9447 }, { "epoch": 1.6191104858781518, "grad_norm": 1.421875, "learning_rate": 8.662573170576181e-06, "loss": 0.4576, "step": 9448 }, { "epoch": 1.6192836273130613, "grad_norm": 1.359375, "learning_rate": 8.660765721785952e-06, "loss": 0.4504, "step": 9449 }, { "epoch": 1.619456768747971, "grad_norm": 1.3125, "learning_rate": 8.658958317544712e-06, "loss": 0.4376, "step": 9450 }, { "epoch": 1.6196299101828806, "grad_norm": 1.296875, "learning_rate": 8.65715095791258e-06, "loss": 0.4153, "step": 9451 }, { "epoch": 1.6198030516177901, "grad_norm": 1.484375, "learning_rate": 8.65534364294968e-06, "loss": 0.537, "step": 9452 }, { "epoch": 1.6199761930527, "grad_norm": 1.375, "learning_rate": 8.653536372716131e-06, "loss": 0.4635, "step": 9453 }, { "epoch": 1.6201493344876097, "grad_norm": 1.3984375, "learning_rate": 8.651729147272046e-06, "loss": 0.5151, "step": 9454 }, { "epoch": 1.6203224759225192, "grad_norm": 1.375, "learning_rate": 8.64992196667755e-06, "loss": 0.4184, "step": 9455 }, { "epoch": 1.6204956173574288, "grad_norm": 1.34375, "learning_rate": 8.648114830992755e-06, "loss": 0.4513, "step": 9456 }, { "epoch": 1.6206687587923385, "grad_norm": 1.40625, "learning_rate": 8.646307740277773e-06, "loss": 0.4318, "step": 9457 }, { "epoch": 1.6208419002272483, "grad_norm": 1.3359375, "learning_rate": 8.644500694592716e-06, "loss": 0.43, "step": 9458 }, { "epoch": 1.6210150416621578, "grad_norm": 1.4296875, "learning_rate": 8.642693693997694e-06, "loss": 0.4622, "step": 9459 }, { "epoch": 1.6211881830970674, "grad_norm": 1.4765625, "learning_rate": 8.64088673855282e-06, "loss": 0.4887, "step": 9460 }, { "epoch": 1.6213613245319771, "grad_norm": 1.421875, "learning_rate": 8.639079828318196e-06, "loss": 0.5093, "step": 9461 }, { "epoch": 1.6215344659668867, "grad_norm": 1.3515625, "learning_rate": 8.637272963353932e-06, "loss": 0.4672, "step": 9462 }, { "epoch": 1.6217076074017962, "grad_norm": 1.296875, "learning_rate": 8.63546614372013e-06, "loss": 0.4133, "step": 9463 }, { "epoch": 1.621880748836706, "grad_norm": 1.375, "learning_rate": 8.633659369476893e-06, "loss": 0.4946, "step": 9464 }, { "epoch": 1.6220538902716157, "grad_norm": 1.5078125, "learning_rate": 8.631852640684323e-06, "loss": 0.4913, "step": 9465 }, { "epoch": 1.6222270317065253, "grad_norm": 1.4609375, "learning_rate": 8.630045957402521e-06, "loss": 0.503, "step": 9466 }, { "epoch": 1.6224001731414348, "grad_norm": 1.4453125, "learning_rate": 8.628239319691588e-06, "loss": 0.4676, "step": 9467 }, { "epoch": 1.6225733145763446, "grad_norm": 1.4140625, "learning_rate": 8.626432727611615e-06, "loss": 0.5036, "step": 9468 }, { "epoch": 1.6227464560112543, "grad_norm": 1.34375, "learning_rate": 8.624626181222704e-06, "loss": 0.3712, "step": 9469 }, { "epoch": 1.6229195974461639, "grad_norm": 1.546875, "learning_rate": 8.622819680584942e-06, "loss": 0.5361, "step": 9470 }, { "epoch": 1.6230927388810734, "grad_norm": 1.375, "learning_rate": 8.621013225758424e-06, "loss": 0.475, "step": 9471 }, { "epoch": 1.6232658803159832, "grad_norm": 1.421875, "learning_rate": 8.619206816803244e-06, "loss": 0.5217, "step": 9472 }, { "epoch": 1.6234390217508927, "grad_norm": 1.4921875, "learning_rate": 8.617400453779487e-06, "loss": 0.5329, "step": 9473 }, { "epoch": 1.6236121631858023, "grad_norm": 1.4765625, "learning_rate": 8.615594136747245e-06, "loss": 0.438, "step": 9474 }, { "epoch": 1.623785304620712, "grad_norm": 1.453125, "learning_rate": 8.6137878657666e-06, "loss": 0.4824, "step": 9475 }, { "epoch": 1.6239584460556218, "grad_norm": 1.3671875, "learning_rate": 8.611981640897635e-06, "loss": 0.4531, "step": 9476 }, { "epoch": 1.6241315874905313, "grad_norm": 1.4609375, "learning_rate": 8.61017546220044e-06, "loss": 0.5038, "step": 9477 }, { "epoch": 1.6243047289254409, "grad_norm": 1.3828125, "learning_rate": 8.608369329735094e-06, "loss": 0.4579, "step": 9478 }, { "epoch": 1.6244778703603506, "grad_norm": 1.375, "learning_rate": 8.606563243561678e-06, "loss": 0.5066, "step": 9479 }, { "epoch": 1.6246510117952604, "grad_norm": 1.453125, "learning_rate": 8.604757203740267e-06, "loss": 0.5034, "step": 9480 }, { "epoch": 1.62482415323017, "grad_norm": 1.40625, "learning_rate": 8.602951210330942e-06, "loss": 0.486, "step": 9481 }, { "epoch": 1.6249972946650795, "grad_norm": 1.2578125, "learning_rate": 8.601145263393775e-06, "loss": 0.4378, "step": 9482 }, { "epoch": 1.6251704360999892, "grad_norm": 1.5, "learning_rate": 8.599339362988842e-06, "loss": 0.4548, "step": 9483 }, { "epoch": 1.6253435775348988, "grad_norm": 1.4375, "learning_rate": 8.597533509176216e-06, "loss": 0.494, "step": 9484 }, { "epoch": 1.6255167189698083, "grad_norm": 1.3984375, "learning_rate": 8.595727702015967e-06, "loss": 0.417, "step": 9485 }, { "epoch": 1.625689860404718, "grad_norm": 1.5234375, "learning_rate": 8.593921941568165e-06, "loss": 0.5694, "step": 9486 }, { "epoch": 1.6258630018396278, "grad_norm": 1.3515625, "learning_rate": 8.592116227892872e-06, "loss": 0.4088, "step": 9487 }, { "epoch": 1.6260361432745374, "grad_norm": 1.3046875, "learning_rate": 8.590310561050164e-06, "loss": 0.3875, "step": 9488 }, { "epoch": 1.626209284709447, "grad_norm": 1.8125, "learning_rate": 8.5885049411001e-06, "loss": 0.4324, "step": 9489 }, { "epoch": 1.6263824261443567, "grad_norm": 1.5, "learning_rate": 8.586699368102744e-06, "loss": 0.4646, "step": 9490 }, { "epoch": 1.6265555675792664, "grad_norm": 1.484375, "learning_rate": 8.584893842118158e-06, "loss": 0.515, "step": 9491 }, { "epoch": 1.626728709014176, "grad_norm": 1.484375, "learning_rate": 8.5830883632064e-06, "loss": 0.5219, "step": 9492 }, { "epoch": 1.6269018504490855, "grad_norm": 1.3671875, "learning_rate": 8.581282931427532e-06, "loss": 0.4987, "step": 9493 }, { "epoch": 1.6270749918839953, "grad_norm": 1.375, "learning_rate": 8.579477546841608e-06, "loss": 0.4924, "step": 9494 }, { "epoch": 1.6272481333189048, "grad_norm": 1.34375, "learning_rate": 8.577672209508683e-06, "loss": 0.4486, "step": 9495 }, { "epoch": 1.6274212747538144, "grad_norm": 1.515625, "learning_rate": 8.575866919488814e-06, "loss": 0.4822, "step": 9496 }, { "epoch": 1.6275944161887241, "grad_norm": 1.421875, "learning_rate": 8.574061676842046e-06, "loss": 0.5101, "step": 9497 }, { "epoch": 1.627767557623634, "grad_norm": 1.546875, "learning_rate": 8.572256481628437e-06, "loss": 0.4816, "step": 9498 }, { "epoch": 1.6279406990585434, "grad_norm": 1.40625, "learning_rate": 8.570451333908033e-06, "loss": 0.4844, "step": 9499 }, { "epoch": 1.628113840493453, "grad_norm": 1.4140625, "learning_rate": 8.568646233740882e-06, "loss": 0.4382, "step": 9500 }, { "epoch": 1.6282869819283627, "grad_norm": 1.4609375, "learning_rate": 8.566841181187028e-06, "loss": 0.4574, "step": 9501 }, { "epoch": 1.6284601233632725, "grad_norm": 1.4453125, "learning_rate": 8.565036176306518e-06, "loss": 0.5547, "step": 9502 }, { "epoch": 1.628633264798182, "grad_norm": 1.4375, "learning_rate": 8.563231219159397e-06, "loss": 0.5048, "step": 9503 }, { "epoch": 1.6288064062330916, "grad_norm": 1.5234375, "learning_rate": 8.561426309805696e-06, "loss": 0.5162, "step": 9504 }, { "epoch": 1.6289795476680013, "grad_norm": 1.5078125, "learning_rate": 8.559621448305465e-06, "loss": 0.4593, "step": 9505 }, { "epoch": 1.629152689102911, "grad_norm": 1.4296875, "learning_rate": 8.557816634718735e-06, "loss": 0.473, "step": 9506 }, { "epoch": 1.6293258305378204, "grad_norm": 1.7109375, "learning_rate": 8.556011869105544e-06, "loss": 0.4616, "step": 9507 }, { "epoch": 1.6294989719727302, "grad_norm": 1.4921875, "learning_rate": 8.554207151525927e-06, "loss": 0.4756, "step": 9508 }, { "epoch": 1.62967211340764, "grad_norm": 1.359375, "learning_rate": 8.552402482039921e-06, "loss": 0.497, "step": 9509 }, { "epoch": 1.6298452548425495, "grad_norm": 1.390625, "learning_rate": 8.550597860707553e-06, "loss": 0.4749, "step": 9510 }, { "epoch": 1.630018396277459, "grad_norm": 1.4296875, "learning_rate": 8.548793287588852e-06, "loss": 0.4784, "step": 9511 }, { "epoch": 1.6301915377123688, "grad_norm": 1.3984375, "learning_rate": 8.546988762743852e-06, "loss": 0.4431, "step": 9512 }, { "epoch": 1.6303646791472786, "grad_norm": 1.4375, "learning_rate": 8.545184286232572e-06, "loss": 0.4741, "step": 9513 }, { "epoch": 1.630537820582188, "grad_norm": 1.40625, "learning_rate": 8.543379858115045e-06, "loss": 0.4675, "step": 9514 }, { "epoch": 1.6307109620170976, "grad_norm": 1.46875, "learning_rate": 8.541575478451288e-06, "loss": 0.4542, "step": 9515 }, { "epoch": 1.6308841034520074, "grad_norm": 1.3515625, "learning_rate": 8.539771147301323e-06, "loss": 0.3902, "step": 9516 }, { "epoch": 1.631057244886917, "grad_norm": 1.421875, "learning_rate": 8.537966864725176e-06, "loss": 0.4998, "step": 9517 }, { "epoch": 1.6312303863218265, "grad_norm": 1.3828125, "learning_rate": 8.53616263078286e-06, "loss": 0.4441, "step": 9518 }, { "epoch": 1.6314035277567362, "grad_norm": 1.421875, "learning_rate": 8.53435844553439e-06, "loss": 0.4893, "step": 9519 }, { "epoch": 1.631576669191646, "grad_norm": 1.4140625, "learning_rate": 8.532554309039789e-06, "loss": 0.5314, "step": 9520 }, { "epoch": 1.6317498106265556, "grad_norm": 1.46875, "learning_rate": 8.530750221359069e-06, "loss": 0.5246, "step": 9521 }, { "epoch": 1.631922952061465, "grad_norm": 1.6953125, "learning_rate": 8.528946182552237e-06, "loss": 0.4205, "step": 9522 }, { "epoch": 1.6320960934963749, "grad_norm": 1.3828125, "learning_rate": 8.527142192679308e-06, "loss": 0.5072, "step": 9523 }, { "epoch": 1.6322692349312846, "grad_norm": 1.6328125, "learning_rate": 8.52533825180029e-06, "loss": 0.4706, "step": 9524 }, { "epoch": 1.6324423763661942, "grad_norm": 1.4375, "learning_rate": 8.52353435997519e-06, "loss": 0.4673, "step": 9525 }, { "epoch": 1.6326155178011037, "grad_norm": 1.421875, "learning_rate": 8.521730517264012e-06, "loss": 0.5236, "step": 9526 }, { "epoch": 1.6327886592360135, "grad_norm": 1.3671875, "learning_rate": 8.519926723726762e-06, "loss": 0.4327, "step": 9527 }, { "epoch": 1.6329618006709232, "grad_norm": 1.453125, "learning_rate": 8.51812297942344e-06, "loss": 0.4523, "step": 9528 }, { "epoch": 1.6331349421058325, "grad_norm": 1.5859375, "learning_rate": 8.516319284414052e-06, "loss": 0.5588, "step": 9529 }, { "epoch": 1.6333080835407423, "grad_norm": 1.4609375, "learning_rate": 8.514515638758586e-06, "loss": 0.541, "step": 9530 }, { "epoch": 1.633481224975652, "grad_norm": 1.4609375, "learning_rate": 8.512712042517053e-06, "loss": 0.4716, "step": 9531 }, { "epoch": 1.6336543664105616, "grad_norm": 1.4609375, "learning_rate": 8.51090849574944e-06, "loss": 0.4686, "step": 9532 }, { "epoch": 1.6338275078454711, "grad_norm": 1.515625, "learning_rate": 8.509104998515747e-06, "loss": 0.464, "step": 9533 }, { "epoch": 1.634000649280381, "grad_norm": 1.375, "learning_rate": 8.50730155087596e-06, "loss": 0.4879, "step": 9534 }, { "epoch": 1.6341737907152907, "grad_norm": 1.421875, "learning_rate": 8.505498152890073e-06, "loss": 0.5034, "step": 9535 }, { "epoch": 1.6343469321502002, "grad_norm": 1.46875, "learning_rate": 8.503694804618077e-06, "loss": 0.438, "step": 9536 }, { "epoch": 1.6345200735851098, "grad_norm": 1.4453125, "learning_rate": 8.501891506119956e-06, "loss": 0.489, "step": 9537 }, { "epoch": 1.6346932150200195, "grad_norm": 1.3984375, "learning_rate": 8.5000882574557e-06, "loss": 0.4199, "step": 9538 }, { "epoch": 1.6348663564549293, "grad_norm": 1.4609375, "learning_rate": 8.498285058685289e-06, "loss": 0.4572, "step": 9539 }, { "epoch": 1.6350394978898386, "grad_norm": 1.328125, "learning_rate": 8.496481909868706e-06, "loss": 0.4558, "step": 9540 }, { "epoch": 1.6352126393247484, "grad_norm": 1.3984375, "learning_rate": 8.494678811065931e-06, "loss": 0.412, "step": 9541 }, { "epoch": 1.6353857807596581, "grad_norm": 1.3984375, "learning_rate": 8.492875762336947e-06, "loss": 0.4811, "step": 9542 }, { "epoch": 1.6355589221945677, "grad_norm": 1.390625, "learning_rate": 8.491072763741734e-06, "loss": 0.4568, "step": 9543 }, { "epoch": 1.6357320636294772, "grad_norm": 1.453125, "learning_rate": 8.48926981534026e-06, "loss": 0.4416, "step": 9544 }, { "epoch": 1.635905205064387, "grad_norm": 1.375, "learning_rate": 8.487466917192505e-06, "loss": 0.4178, "step": 9545 }, { "epoch": 1.6360783464992967, "grad_norm": 1.4140625, "learning_rate": 8.485664069358438e-06, "loss": 0.5217, "step": 9546 }, { "epoch": 1.6362514879342063, "grad_norm": 1.4609375, "learning_rate": 8.483861271898032e-06, "loss": 0.5152, "step": 9547 }, { "epoch": 1.6364246293691158, "grad_norm": 1.3984375, "learning_rate": 8.482058524871257e-06, "loss": 0.4791, "step": 9548 }, { "epoch": 1.6365977708040256, "grad_norm": 1.34375, "learning_rate": 8.480255828338077e-06, "loss": 0.4302, "step": 9549 }, { "epoch": 1.6367709122389353, "grad_norm": 1.625, "learning_rate": 8.478453182358462e-06, "loss": 0.4446, "step": 9550 }, { "epoch": 1.6369440536738447, "grad_norm": 1.3984375, "learning_rate": 8.476650586992372e-06, "loss": 0.466, "step": 9551 }, { "epoch": 1.6371171951087544, "grad_norm": 1.5234375, "learning_rate": 8.474848042299769e-06, "loss": 0.4486, "step": 9552 }, { "epoch": 1.6372903365436642, "grad_norm": 1.484375, "learning_rate": 8.473045548340619e-06, "loss": 0.4762, "step": 9553 }, { "epoch": 1.6374634779785737, "grad_norm": 1.4921875, "learning_rate": 8.471243105174879e-06, "loss": 0.5004, "step": 9554 }, { "epoch": 1.6376366194134833, "grad_norm": 1.5078125, "learning_rate": 8.469440712862507e-06, "loss": 0.4998, "step": 9555 }, { "epoch": 1.637809760848393, "grad_norm": 1.40625, "learning_rate": 8.467638371463455e-06, "loss": 0.4721, "step": 9556 }, { "epoch": 1.6379829022833028, "grad_norm": 1.359375, "learning_rate": 8.465836081037683e-06, "loss": 0.4358, "step": 9557 }, { "epoch": 1.6381560437182123, "grad_norm": 1.3984375, "learning_rate": 8.464033841645138e-06, "loss": 0.495, "step": 9558 }, { "epoch": 1.6383291851531219, "grad_norm": 1.5625, "learning_rate": 8.462231653345772e-06, "loss": 0.5319, "step": 9559 }, { "epoch": 1.6385023265880316, "grad_norm": 1.34375, "learning_rate": 8.460429516199538e-06, "loss": 0.4611, "step": 9560 }, { "epoch": 1.6386754680229414, "grad_norm": 1.3671875, "learning_rate": 8.458627430266375e-06, "loss": 0.4418, "step": 9561 }, { "epoch": 1.638848609457851, "grad_norm": 1.53125, "learning_rate": 8.456825395606233e-06, "loss": 0.5027, "step": 9562 }, { "epoch": 1.6390217508927605, "grad_norm": 1.40625, "learning_rate": 8.455023412279058e-06, "loss": 0.4502, "step": 9563 }, { "epoch": 1.6391948923276702, "grad_norm": 1.3203125, "learning_rate": 8.453221480344791e-06, "loss": 0.4599, "step": 9564 }, { "epoch": 1.6393680337625798, "grad_norm": 1.515625, "learning_rate": 8.45141959986337e-06, "loss": 0.4557, "step": 9565 }, { "epoch": 1.6395411751974893, "grad_norm": 1.4375, "learning_rate": 8.449617770894737e-06, "loss": 0.5574, "step": 9566 }, { "epoch": 1.639714316632399, "grad_norm": 1.359375, "learning_rate": 8.447815993498828e-06, "loss": 0.4667, "step": 9567 }, { "epoch": 1.6398874580673088, "grad_norm": 1.3828125, "learning_rate": 8.446014267735576e-06, "loss": 0.4415, "step": 9568 }, { "epoch": 1.6400605995022184, "grad_norm": 1.46875, "learning_rate": 8.444212593664917e-06, "loss": 0.4997, "step": 9569 }, { "epoch": 1.640233740937128, "grad_norm": 1.3046875, "learning_rate": 8.442410971346782e-06, "loss": 0.4819, "step": 9570 }, { "epoch": 1.6404068823720377, "grad_norm": 1.2734375, "learning_rate": 8.440609400841098e-06, "loss": 0.4117, "step": 9571 }, { "epoch": 1.6405800238069475, "grad_norm": 1.4375, "learning_rate": 8.438807882207802e-06, "loss": 0.4351, "step": 9572 }, { "epoch": 1.640753165241857, "grad_norm": 1.390625, "learning_rate": 8.437006415506807e-06, "loss": 0.5473, "step": 9573 }, { "epoch": 1.6409263066767665, "grad_norm": 1.421875, "learning_rate": 8.435205000798051e-06, "loss": 0.437, "step": 9574 }, { "epoch": 1.6410994481116763, "grad_norm": 1.5, "learning_rate": 8.433403638141452e-06, "loss": 0.549, "step": 9575 }, { "epoch": 1.6412725895465858, "grad_norm": 1.375, "learning_rate": 8.431602327596932e-06, "loss": 0.4259, "step": 9576 }, { "epoch": 1.6414457309814954, "grad_norm": 1.4140625, "learning_rate": 8.429801069224411e-06, "loss": 0.5591, "step": 9577 }, { "epoch": 1.6416188724164051, "grad_norm": 1.4609375, "learning_rate": 8.427999863083803e-06, "loss": 0.4967, "step": 9578 }, { "epoch": 1.641792013851315, "grad_norm": 1.5234375, "learning_rate": 8.426198709235032e-06, "loss": 0.4685, "step": 9579 }, { "epoch": 1.6419651552862244, "grad_norm": 1.421875, "learning_rate": 8.424397607738007e-06, "loss": 0.5104, "step": 9580 }, { "epoch": 1.642138296721134, "grad_norm": 1.5234375, "learning_rate": 8.42259655865264e-06, "loss": 0.4862, "step": 9581 }, { "epoch": 1.6423114381560437, "grad_norm": 1.5, "learning_rate": 8.420795562038846e-06, "loss": 0.512, "step": 9582 }, { "epoch": 1.6424845795909535, "grad_norm": 1.390625, "learning_rate": 8.418994617956531e-06, "loss": 0.484, "step": 9583 }, { "epoch": 1.642657721025863, "grad_norm": 1.4375, "learning_rate": 8.4171937264656e-06, "loss": 0.4422, "step": 9584 }, { "epoch": 1.6428308624607726, "grad_norm": 1.4296875, "learning_rate": 8.415392887625968e-06, "loss": 0.49, "step": 9585 }, { "epoch": 1.6430040038956824, "grad_norm": 1.4296875, "learning_rate": 8.413592101497532e-06, "loss": 0.4588, "step": 9586 }, { "epoch": 1.643177145330592, "grad_norm": 1.4296875, "learning_rate": 8.411791368140197e-06, "loss": 0.4637, "step": 9587 }, { "epoch": 1.6433502867655014, "grad_norm": 1.3671875, "learning_rate": 8.409990687613862e-06, "loss": 0.4364, "step": 9588 }, { "epoch": 1.6435234282004112, "grad_norm": 1.4375, "learning_rate": 8.408190059978426e-06, "loss": 0.4932, "step": 9589 }, { "epoch": 1.643696569635321, "grad_norm": 1.5625, "learning_rate": 8.406389485293786e-06, "loss": 0.5165, "step": 9590 }, { "epoch": 1.6438697110702305, "grad_norm": 1.3828125, "learning_rate": 8.404588963619839e-06, "loss": 0.4397, "step": 9591 }, { "epoch": 1.64404285250514, "grad_norm": 1.3359375, "learning_rate": 8.402788495016475e-06, "loss": 0.5296, "step": 9592 }, { "epoch": 1.6442159939400498, "grad_norm": 1.421875, "learning_rate": 8.40098807954359e-06, "loss": 0.4994, "step": 9593 }, { "epoch": 1.6443891353749596, "grad_norm": 1.3515625, "learning_rate": 8.399187717261071e-06, "loss": 0.4563, "step": 9594 }, { "epoch": 1.644562276809869, "grad_norm": 1.453125, "learning_rate": 8.397387408228803e-06, "loss": 0.4421, "step": 9595 }, { "epoch": 1.6447354182447786, "grad_norm": 1.4453125, "learning_rate": 8.395587152506679e-06, "loss": 0.5079, "step": 9596 }, { "epoch": 1.6449085596796884, "grad_norm": 1.5390625, "learning_rate": 8.393786950154581e-06, "loss": 0.4403, "step": 9597 }, { "epoch": 1.645081701114598, "grad_norm": 1.4140625, "learning_rate": 8.391986801232392e-06, "loss": 0.4443, "step": 9598 }, { "epoch": 1.6452548425495075, "grad_norm": 1.4765625, "learning_rate": 8.390186705799993e-06, "loss": 0.4646, "step": 9599 }, { "epoch": 1.6454279839844173, "grad_norm": 1.4140625, "learning_rate": 8.388386663917264e-06, "loss": 0.4696, "step": 9600 }, { "epoch": 1.645601125419327, "grad_norm": 1.3203125, "learning_rate": 8.386586675644083e-06, "loss": 0.4239, "step": 9601 }, { "epoch": 1.6457742668542366, "grad_norm": 1.328125, "learning_rate": 8.384786741040323e-06, "loss": 0.4202, "step": 9602 }, { "epoch": 1.645947408289146, "grad_norm": 1.40625, "learning_rate": 8.382986860165859e-06, "loss": 0.4652, "step": 9603 }, { "epoch": 1.6461205497240559, "grad_norm": 1.359375, "learning_rate": 8.381187033080561e-06, "loss": 0.451, "step": 9604 }, { "epoch": 1.6462936911589656, "grad_norm": 1.3828125, "learning_rate": 8.379387259844307e-06, "loss": 0.4716, "step": 9605 }, { "epoch": 1.6464668325938752, "grad_norm": 1.3984375, "learning_rate": 8.377587540516954e-06, "loss": 0.4508, "step": 9606 }, { "epoch": 1.6466399740287847, "grad_norm": 1.3203125, "learning_rate": 8.375787875158381e-06, "loss": 0.4729, "step": 9607 }, { "epoch": 1.6468131154636945, "grad_norm": 1.5078125, "learning_rate": 8.373988263828446e-06, "loss": 0.4531, "step": 9608 }, { "epoch": 1.646986256898604, "grad_norm": 1.3515625, "learning_rate": 8.372188706587015e-06, "loss": 0.4078, "step": 9609 }, { "epoch": 1.6471593983335135, "grad_norm": 1.3515625, "learning_rate": 8.370389203493947e-06, "loss": 0.4411, "step": 9610 }, { "epoch": 1.6473325397684233, "grad_norm": 1.359375, "learning_rate": 8.368589754609103e-06, "loss": 0.4985, "step": 9611 }, { "epoch": 1.647505681203333, "grad_norm": 1.4140625, "learning_rate": 8.36679035999234e-06, "loss": 0.4916, "step": 9612 }, { "epoch": 1.6476788226382426, "grad_norm": 1.90625, "learning_rate": 8.364991019703515e-06, "loss": 0.4825, "step": 9613 }, { "epoch": 1.6478519640731522, "grad_norm": 1.375, "learning_rate": 8.363191733802483e-06, "loss": 0.5278, "step": 9614 }, { "epoch": 1.648025105508062, "grad_norm": 1.4453125, "learning_rate": 8.361392502349092e-06, "loss": 0.4292, "step": 9615 }, { "epoch": 1.6481982469429717, "grad_norm": 1.34375, "learning_rate": 8.359593325403195e-06, "loss": 0.4075, "step": 9616 }, { "epoch": 1.6483713883778812, "grad_norm": 1.4609375, "learning_rate": 8.357794203024643e-06, "loss": 0.4748, "step": 9617 }, { "epoch": 1.6485445298127908, "grad_norm": 1.3203125, "learning_rate": 8.355995135273282e-06, "loss": 0.4491, "step": 9618 }, { "epoch": 1.6487176712477005, "grad_norm": 1.5078125, "learning_rate": 8.354196122208958e-06, "loss": 0.4936, "step": 9619 }, { "epoch": 1.64889081268261, "grad_norm": 1.4375, "learning_rate": 8.352397163891509e-06, "loss": 0.4382, "step": 9620 }, { "epoch": 1.6490639541175196, "grad_norm": 1.40625, "learning_rate": 8.350598260380785e-06, "loss": 0.4458, "step": 9621 }, { "epoch": 1.6492370955524294, "grad_norm": 1.421875, "learning_rate": 8.348799411736616e-06, "loss": 0.4176, "step": 9622 }, { "epoch": 1.6494102369873391, "grad_norm": 1.46875, "learning_rate": 8.347000618018848e-06, "loss": 0.4753, "step": 9623 }, { "epoch": 1.6495833784222487, "grad_norm": 1.390625, "learning_rate": 8.345201879287313e-06, "loss": 0.4172, "step": 9624 }, { "epoch": 1.6497565198571582, "grad_norm": 1.4765625, "learning_rate": 8.343403195601846e-06, "loss": 0.5451, "step": 9625 }, { "epoch": 1.649929661292068, "grad_norm": 1.4296875, "learning_rate": 8.34160456702228e-06, "loss": 0.4675, "step": 9626 }, { "epoch": 1.6501028027269777, "grad_norm": 1.4375, "learning_rate": 8.33980599360844e-06, "loss": 0.5139, "step": 9627 }, { "epoch": 1.6502759441618873, "grad_norm": 1.6015625, "learning_rate": 8.338007475420165e-06, "loss": 0.4962, "step": 9628 }, { "epoch": 1.6504490855967968, "grad_norm": 1.5546875, "learning_rate": 8.336209012517273e-06, "loss": 0.4706, "step": 9629 }, { "epoch": 1.6506222270317066, "grad_norm": 1.4609375, "learning_rate": 8.334410604959594e-06, "loss": 0.4295, "step": 9630 }, { "epoch": 1.6507953684666161, "grad_norm": 1.3828125, "learning_rate": 8.332612252806953e-06, "loss": 0.4668, "step": 9631 }, { "epoch": 1.6509685099015257, "grad_norm": 1.3515625, "learning_rate": 8.330813956119163e-06, "loss": 0.4441, "step": 9632 }, { "epoch": 1.6511416513364354, "grad_norm": 1.4296875, "learning_rate": 8.32901571495605e-06, "loss": 0.4976, "step": 9633 }, { "epoch": 1.6513147927713452, "grad_norm": 1.40625, "learning_rate": 8.32721752937743e-06, "loss": 0.4758, "step": 9634 }, { "epoch": 1.6514879342062547, "grad_norm": 1.3515625, "learning_rate": 8.325419399443118e-06, "loss": 0.463, "step": 9635 }, { "epoch": 1.6516610756411643, "grad_norm": 1.375, "learning_rate": 8.323621325212932e-06, "loss": 0.414, "step": 9636 }, { "epoch": 1.651834217076074, "grad_norm": 1.5078125, "learning_rate": 8.321823306746677e-06, "loss": 0.5709, "step": 9637 }, { "epoch": 1.6520073585109838, "grad_norm": 1.375, "learning_rate": 8.320025344104165e-06, "loss": 0.4704, "step": 9638 }, { "epoch": 1.6521804999458933, "grad_norm": 1.3984375, "learning_rate": 8.31822743734521e-06, "loss": 0.4176, "step": 9639 }, { "epoch": 1.6523536413808029, "grad_norm": 1.4375, "learning_rate": 8.316429586529616e-06, "loss": 0.4749, "step": 9640 }, { "epoch": 1.6525267828157126, "grad_norm": 1.3046875, "learning_rate": 8.314631791717184e-06, "loss": 0.4188, "step": 9641 }, { "epoch": 1.6526999242506222, "grad_norm": 1.40625, "learning_rate": 8.31283405296772e-06, "loss": 0.4662, "step": 9642 }, { "epoch": 1.6528730656855317, "grad_norm": 1.4453125, "learning_rate": 8.311036370341025e-06, "loss": 0.5083, "step": 9643 }, { "epoch": 1.6530462071204415, "grad_norm": 1.3828125, "learning_rate": 8.309238743896897e-06, "loss": 0.5002, "step": 9644 }, { "epoch": 1.6532193485553512, "grad_norm": 1.3984375, "learning_rate": 8.307441173695134e-06, "loss": 0.4749, "step": 9645 }, { "epoch": 1.6533924899902608, "grad_norm": 1.3828125, "learning_rate": 8.30564365979553e-06, "loss": 0.5167, "step": 9646 }, { "epoch": 1.6535656314251703, "grad_norm": 1.4921875, "learning_rate": 8.303846202257879e-06, "loss": 0.4662, "step": 9647 }, { "epoch": 1.65373877286008, "grad_norm": 1.4609375, "learning_rate": 8.302048801141974e-06, "loss": 0.4805, "step": 9648 }, { "epoch": 1.6539119142949898, "grad_norm": 1.328125, "learning_rate": 8.300251456507599e-06, "loss": 0.4536, "step": 9649 }, { "epoch": 1.6540850557298994, "grad_norm": 1.3125, "learning_rate": 8.29845416841455e-06, "loss": 0.4152, "step": 9650 }, { "epoch": 1.654258197164809, "grad_norm": 1.4140625, "learning_rate": 8.296656936922609e-06, "loss": 0.4736, "step": 9651 }, { "epoch": 1.6544313385997187, "grad_norm": 1.34375, "learning_rate": 8.29485976209156e-06, "loss": 0.3987, "step": 9652 }, { "epoch": 1.6546044800346282, "grad_norm": 1.484375, "learning_rate": 8.293062643981187e-06, "loss": 0.4902, "step": 9653 }, { "epoch": 1.6547776214695378, "grad_norm": 1.390625, "learning_rate": 8.291265582651266e-06, "loss": 0.4243, "step": 9654 }, { "epoch": 1.6549507629044475, "grad_norm": 1.4375, "learning_rate": 8.289468578161581e-06, "loss": 0.439, "step": 9655 }, { "epoch": 1.6551239043393573, "grad_norm": 1.46875, "learning_rate": 8.287671630571903e-06, "loss": 0.5048, "step": 9656 }, { "epoch": 1.6552970457742668, "grad_norm": 1.3515625, "learning_rate": 8.285874739942013e-06, "loss": 0.4478, "step": 9657 }, { "epoch": 1.6554701872091764, "grad_norm": 1.3828125, "learning_rate": 8.284077906331675e-06, "loss": 0.4326, "step": 9658 }, { "epoch": 1.6556433286440861, "grad_norm": 1.4140625, "learning_rate": 8.282281129800667e-06, "loss": 0.4599, "step": 9659 }, { "epoch": 1.655816470078996, "grad_norm": 1.3671875, "learning_rate": 8.280484410408752e-06, "loss": 0.5339, "step": 9660 }, { "epoch": 1.6559896115139054, "grad_norm": 1.3515625, "learning_rate": 8.278687748215705e-06, "loss": 0.4255, "step": 9661 }, { "epoch": 1.656162752948815, "grad_norm": 1.6171875, "learning_rate": 8.276891143281285e-06, "loss": 0.4857, "step": 9662 }, { "epoch": 1.6563358943837247, "grad_norm": 1.3828125, "learning_rate": 8.275094595665258e-06, "loss": 0.4647, "step": 9663 }, { "epoch": 1.6565090358186345, "grad_norm": 1.2734375, "learning_rate": 8.273298105427386e-06, "loss": 0.4447, "step": 9664 }, { "epoch": 1.6566821772535438, "grad_norm": 1.3984375, "learning_rate": 8.271501672627423e-06, "loss": 0.4753, "step": 9665 }, { "epoch": 1.6568553186884536, "grad_norm": 1.546875, "learning_rate": 8.269705297325131e-06, "loss": 0.4567, "step": 9666 }, { "epoch": 1.6570284601233634, "grad_norm": 1.375, "learning_rate": 8.267908979580267e-06, "loss": 0.4909, "step": 9667 }, { "epoch": 1.657201601558273, "grad_norm": 1.34375, "learning_rate": 8.266112719452579e-06, "loss": 0.4123, "step": 9668 }, { "epoch": 1.6573747429931824, "grad_norm": 1.3828125, "learning_rate": 8.264316517001827e-06, "loss": 0.4281, "step": 9669 }, { "epoch": 1.6575478844280922, "grad_norm": 1.453125, "learning_rate": 8.26252037228775e-06, "loss": 0.4913, "step": 9670 }, { "epoch": 1.657721025863002, "grad_norm": 1.4453125, "learning_rate": 8.260724285370102e-06, "loss": 0.4916, "step": 9671 }, { "epoch": 1.6578941672979115, "grad_norm": 1.390625, "learning_rate": 8.25892825630863e-06, "loss": 0.4259, "step": 9672 }, { "epoch": 1.658067308732821, "grad_norm": 1.4609375, "learning_rate": 8.25713228516308e-06, "loss": 0.4691, "step": 9673 }, { "epoch": 1.6582404501677308, "grad_norm": 1.375, "learning_rate": 8.255336371993188e-06, "loss": 0.4543, "step": 9674 }, { "epoch": 1.6584135916026406, "grad_norm": 1.328125, "learning_rate": 8.253540516858696e-06, "loss": 0.4628, "step": 9675 }, { "epoch": 1.6585867330375499, "grad_norm": 1.3828125, "learning_rate": 8.251744719819347e-06, "loss": 0.4317, "step": 9676 }, { "epoch": 1.6587598744724597, "grad_norm": 1.421875, "learning_rate": 8.249948980934872e-06, "loss": 0.4993, "step": 9677 }, { "epoch": 1.6589330159073694, "grad_norm": 1.4921875, "learning_rate": 8.248153300265007e-06, "loss": 0.4892, "step": 9678 }, { "epoch": 1.659106157342279, "grad_norm": 1.359375, "learning_rate": 8.246357677869484e-06, "loss": 0.4321, "step": 9679 }, { "epoch": 1.6592792987771885, "grad_norm": 1.3046875, "learning_rate": 8.244562113808033e-06, "loss": 0.4522, "step": 9680 }, { "epoch": 1.6594524402120983, "grad_norm": 1.375, "learning_rate": 8.242766608140383e-06, "loss": 0.4354, "step": 9681 }, { "epoch": 1.659625581647008, "grad_norm": 1.3359375, "learning_rate": 8.240971160926262e-06, "loss": 0.4191, "step": 9682 }, { "epoch": 1.6597987230819176, "grad_norm": 1.453125, "learning_rate": 8.239175772225397e-06, "loss": 0.5051, "step": 9683 }, { "epoch": 1.659971864516827, "grad_norm": 1.5, "learning_rate": 8.237380442097504e-06, "loss": 0.5616, "step": 9684 }, { "epoch": 1.6601450059517369, "grad_norm": 1.453125, "learning_rate": 8.235585170602312e-06, "loss": 0.5378, "step": 9685 }, { "epoch": 1.6603181473866466, "grad_norm": 1.4375, "learning_rate": 8.233789957799531e-06, "loss": 0.4746, "step": 9686 }, { "epoch": 1.660491288821556, "grad_norm": 1.3671875, "learning_rate": 8.231994803748885e-06, "loss": 0.4441, "step": 9687 }, { "epoch": 1.6606644302564657, "grad_norm": 1.421875, "learning_rate": 8.230199708510086e-06, "loss": 0.5201, "step": 9688 }, { "epoch": 1.6608375716913755, "grad_norm": 1.40625, "learning_rate": 8.228404672142846e-06, "loss": 0.4481, "step": 9689 }, { "epoch": 1.661010713126285, "grad_norm": 1.453125, "learning_rate": 8.226609694706879e-06, "loss": 0.4098, "step": 9690 }, { "epoch": 1.6611838545611946, "grad_norm": 1.5078125, "learning_rate": 8.224814776261889e-06, "loss": 0.5147, "step": 9691 }, { "epoch": 1.6613569959961043, "grad_norm": 1.34375, "learning_rate": 8.223019916867586e-06, "loss": 0.4593, "step": 9692 }, { "epoch": 1.661530137431014, "grad_norm": 1.609375, "learning_rate": 8.221225116583677e-06, "loss": 0.4546, "step": 9693 }, { "epoch": 1.6617032788659236, "grad_norm": 1.421875, "learning_rate": 8.219430375469863e-06, "loss": 0.4547, "step": 9694 }, { "epoch": 1.6618764203008332, "grad_norm": 1.4375, "learning_rate": 8.21763569358585e-06, "loss": 0.4364, "step": 9695 }, { "epoch": 1.662049561735743, "grad_norm": 1.4296875, "learning_rate": 8.21584107099133e-06, "loss": 0.508, "step": 9696 }, { "epoch": 1.6622227031706527, "grad_norm": 1.3984375, "learning_rate": 8.214046507746005e-06, "loss": 0.465, "step": 9697 }, { "epoch": 1.6623958446055622, "grad_norm": 1.4140625, "learning_rate": 8.212252003909567e-06, "loss": 0.4832, "step": 9698 }, { "epoch": 1.6625689860404718, "grad_norm": 1.546875, "learning_rate": 8.210457559541711e-06, "loss": 0.5112, "step": 9699 }, { "epoch": 1.6627421274753815, "grad_norm": 1.4375, "learning_rate": 8.20866317470213e-06, "loss": 0.4809, "step": 9700 }, { "epoch": 1.662915268910291, "grad_norm": 1.4453125, "learning_rate": 8.206868849450508e-06, "loss": 0.5236, "step": 9701 }, { "epoch": 1.6630884103452006, "grad_norm": 1.375, "learning_rate": 8.20507458384654e-06, "loss": 0.4721, "step": 9702 }, { "epoch": 1.6632615517801104, "grad_norm": 1.421875, "learning_rate": 8.203280377949901e-06, "loss": 0.4667, "step": 9703 }, { "epoch": 1.6634346932150201, "grad_norm": 1.5390625, "learning_rate": 8.201486231820288e-06, "loss": 0.5642, "step": 9704 }, { "epoch": 1.6636078346499297, "grad_norm": 1.4140625, "learning_rate": 8.199692145517372e-06, "loss": 0.4653, "step": 9705 }, { "epoch": 1.6637809760848392, "grad_norm": 1.4453125, "learning_rate": 8.197898119100836e-06, "loss": 0.466, "step": 9706 }, { "epoch": 1.663954117519749, "grad_norm": 1.390625, "learning_rate": 8.19610415263036e-06, "loss": 0.4683, "step": 9707 }, { "epoch": 1.6641272589546587, "grad_norm": 1.40625, "learning_rate": 8.194310246165612e-06, "loss": 0.4514, "step": 9708 }, { "epoch": 1.6643004003895683, "grad_norm": 1.3984375, "learning_rate": 8.192516399766275e-06, "loss": 0.5137, "step": 9709 }, { "epoch": 1.6644735418244778, "grad_norm": 1.53125, "learning_rate": 8.190722613492011e-06, "loss": 0.4928, "step": 9710 }, { "epoch": 1.6646466832593876, "grad_norm": 1.4453125, "learning_rate": 8.188928887402496e-06, "loss": 0.4918, "step": 9711 }, { "epoch": 1.6648198246942971, "grad_norm": 1.59375, "learning_rate": 8.187135221557396e-06, "loss": 0.543, "step": 9712 }, { "epoch": 1.6649929661292067, "grad_norm": 1.390625, "learning_rate": 8.185341616016373e-06, "loss": 0.4751, "step": 9713 }, { "epoch": 1.6651661075641164, "grad_norm": 1.390625, "learning_rate": 8.183548070839092e-06, "loss": 0.4383, "step": 9714 }, { "epoch": 1.6653392489990262, "grad_norm": 1.484375, "learning_rate": 8.181754586085218e-06, "loss": 0.4646, "step": 9715 }, { "epoch": 1.6655123904339357, "grad_norm": 1.4140625, "learning_rate": 8.179961161814409e-06, "loss": 0.4857, "step": 9716 }, { "epoch": 1.6656855318688453, "grad_norm": 1.3203125, "learning_rate": 8.178167798086319e-06, "loss": 0.4147, "step": 9717 }, { "epoch": 1.665858673303755, "grad_norm": 1.2734375, "learning_rate": 8.176374494960607e-06, "loss": 0.408, "step": 9718 }, { "epoch": 1.6660318147386648, "grad_norm": 1.3671875, "learning_rate": 8.174581252496928e-06, "loss": 0.4199, "step": 9719 }, { "epoch": 1.6662049561735743, "grad_norm": 1.4296875, "learning_rate": 8.172788070754927e-06, "loss": 0.4428, "step": 9720 }, { "epoch": 1.6663780976084839, "grad_norm": 1.4296875, "learning_rate": 8.17099494979426e-06, "loss": 0.441, "step": 9721 }, { "epoch": 1.6665512390433936, "grad_norm": 1.3515625, "learning_rate": 8.169201889674567e-06, "loss": 0.4308, "step": 9722 }, { "epoch": 1.6667243804783032, "grad_norm": 1.3203125, "learning_rate": 8.1674088904555e-06, "loss": 0.3981, "step": 9723 }, { "epoch": 1.6668975219132127, "grad_norm": 1.453125, "learning_rate": 8.1656159521967e-06, "loss": 0.4466, "step": 9724 }, { "epoch": 1.6670706633481225, "grad_norm": 1.4140625, "learning_rate": 8.163823074957804e-06, "loss": 0.4771, "step": 9725 }, { "epoch": 1.6672438047830322, "grad_norm": 1.3359375, "learning_rate": 8.162030258798458e-06, "loss": 0.4498, "step": 9726 }, { "epoch": 1.6674169462179418, "grad_norm": 1.3671875, "learning_rate": 8.160237503778295e-06, "loss": 0.473, "step": 9727 }, { "epoch": 1.6675900876528513, "grad_norm": 1.3359375, "learning_rate": 8.158444809956957e-06, "loss": 0.4576, "step": 9728 }, { "epoch": 1.667763229087761, "grad_norm": 1.4375, "learning_rate": 8.156652177394066e-06, "loss": 0.4228, "step": 9729 }, { "epoch": 1.6679363705226709, "grad_norm": 1.5078125, "learning_rate": 8.154859606149259e-06, "loss": 0.5263, "step": 9730 }, { "epoch": 1.6681095119575804, "grad_norm": 1.5625, "learning_rate": 8.153067096282167e-06, "loss": 0.5009, "step": 9731 }, { "epoch": 1.66828265339249, "grad_norm": 1.34375, "learning_rate": 8.151274647852413e-06, "loss": 0.4289, "step": 9732 }, { "epoch": 1.6684557948273997, "grad_norm": 1.3125, "learning_rate": 8.149482260919625e-06, "loss": 0.4374, "step": 9733 }, { "epoch": 1.6686289362623092, "grad_norm": 1.421875, "learning_rate": 8.147689935543423e-06, "loss": 0.4484, "step": 9734 }, { "epoch": 1.6688020776972188, "grad_norm": 1.46875, "learning_rate": 8.145897671783427e-06, "loss": 0.4756, "step": 9735 }, { "epoch": 1.6689752191321285, "grad_norm": 1.4609375, "learning_rate": 8.144105469699259e-06, "loss": 0.598, "step": 9736 }, { "epoch": 1.6691483605670383, "grad_norm": 1.4609375, "learning_rate": 8.142313329350539e-06, "loss": 0.4909, "step": 9737 }, { "epoch": 1.6693215020019478, "grad_norm": 1.4375, "learning_rate": 8.140521250796874e-06, "loss": 0.4605, "step": 9738 }, { "epoch": 1.6694946434368574, "grad_norm": 1.421875, "learning_rate": 8.138729234097882e-06, "loss": 0.433, "step": 9739 }, { "epoch": 1.6696677848717671, "grad_norm": 1.328125, "learning_rate": 8.136937279313171e-06, "loss": 0.4232, "step": 9740 }, { "epoch": 1.669840926306677, "grad_norm": 1.359375, "learning_rate": 8.135145386502351e-06, "loss": 0.4792, "step": 9741 }, { "epoch": 1.6700140677415864, "grad_norm": 1.453125, "learning_rate": 8.133353555725027e-06, "loss": 0.4837, "step": 9742 }, { "epoch": 1.670187209176496, "grad_norm": 1.328125, "learning_rate": 8.131561787040806e-06, "loss": 0.5265, "step": 9743 }, { "epoch": 1.6703603506114058, "grad_norm": 1.4765625, "learning_rate": 8.129770080509285e-06, "loss": 0.4601, "step": 9744 }, { "epoch": 1.6705334920463153, "grad_norm": 1.6484375, "learning_rate": 8.127978436190071e-06, "loss": 0.4959, "step": 9745 }, { "epoch": 1.6707066334812248, "grad_norm": 1.9609375, "learning_rate": 8.126186854142752e-06, "loss": 0.514, "step": 9746 }, { "epoch": 1.6708797749161346, "grad_norm": 1.5234375, "learning_rate": 8.124395334426938e-06, "loss": 0.4891, "step": 9747 }, { "epoch": 1.6710529163510444, "grad_norm": 1.453125, "learning_rate": 8.122603877102214e-06, "loss": 0.5426, "step": 9748 }, { "epoch": 1.671226057785954, "grad_norm": 1.4453125, "learning_rate": 8.120812482228176e-06, "loss": 0.438, "step": 9749 }, { "epoch": 1.6713991992208634, "grad_norm": 1.390625, "learning_rate": 8.11902114986441e-06, "loss": 0.4436, "step": 9750 }, { "epoch": 1.6715723406557732, "grad_norm": 1.4765625, "learning_rate": 8.117229880070505e-06, "loss": 0.4741, "step": 9751 }, { "epoch": 1.671745482090683, "grad_norm": 1.3515625, "learning_rate": 8.11543867290605e-06, "loss": 0.4253, "step": 9752 }, { "epoch": 1.6719186235255925, "grad_norm": 1.375, "learning_rate": 8.113647528430623e-06, "loss": 0.4524, "step": 9753 }, { "epoch": 1.672091764960502, "grad_norm": 1.5234375, "learning_rate": 8.111856446703812e-06, "loss": 0.4641, "step": 9754 }, { "epoch": 1.6722649063954118, "grad_norm": 1.4296875, "learning_rate": 8.110065427785192e-06, "loss": 0.4334, "step": 9755 }, { "epoch": 1.6724380478303214, "grad_norm": 1.4921875, "learning_rate": 8.108274471734341e-06, "loss": 0.494, "step": 9756 }, { "epoch": 1.672611189265231, "grad_norm": 1.5625, "learning_rate": 8.106483578610832e-06, "loss": 0.4772, "step": 9757 }, { "epoch": 1.6727843307001407, "grad_norm": 1.46875, "learning_rate": 8.104692748474244e-06, "loss": 0.4361, "step": 9758 }, { "epoch": 1.6729574721350504, "grad_norm": 1.3984375, "learning_rate": 8.102901981384146e-06, "loss": 0.4924, "step": 9759 }, { "epoch": 1.67313061356996, "grad_norm": 1.7890625, "learning_rate": 8.101111277400105e-06, "loss": 0.523, "step": 9760 }, { "epoch": 1.6733037550048695, "grad_norm": 1.46875, "learning_rate": 8.099320636581691e-06, "loss": 0.5352, "step": 9761 }, { "epoch": 1.6734768964397793, "grad_norm": 1.3671875, "learning_rate": 8.097530058988464e-06, "loss": 0.405, "step": 9762 }, { "epoch": 1.673650037874689, "grad_norm": 1.375, "learning_rate": 8.095739544679991e-06, "loss": 0.4148, "step": 9763 }, { "epoch": 1.6738231793095986, "grad_norm": 1.3125, "learning_rate": 8.093949093715833e-06, "loss": 0.4368, "step": 9764 }, { "epoch": 1.673996320744508, "grad_norm": 1.46875, "learning_rate": 8.092158706155544e-06, "loss": 0.481, "step": 9765 }, { "epoch": 1.6741694621794179, "grad_norm": 1.390625, "learning_rate": 8.090368382058685e-06, "loss": 0.4988, "step": 9766 }, { "epoch": 1.6743426036143274, "grad_norm": 1.375, "learning_rate": 8.088578121484807e-06, "loss": 0.4436, "step": 9767 }, { "epoch": 1.674515745049237, "grad_norm": 1.4296875, "learning_rate": 8.086787924493461e-06, "loss": 0.5207, "step": 9768 }, { "epoch": 1.6746888864841467, "grad_norm": 1.40625, "learning_rate": 8.084997791144202e-06, "loss": 0.4507, "step": 9769 }, { "epoch": 1.6748620279190565, "grad_norm": 1.34375, "learning_rate": 8.083207721496576e-06, "loss": 0.4287, "step": 9770 }, { "epoch": 1.675035169353966, "grad_norm": 1.3984375, "learning_rate": 8.081417715610129e-06, "loss": 0.5024, "step": 9771 }, { "epoch": 1.6752083107888756, "grad_norm": 1.453125, "learning_rate": 8.079627773544403e-06, "loss": 0.5771, "step": 9772 }, { "epoch": 1.6753814522237853, "grad_norm": 1.3828125, "learning_rate": 8.077837895358942e-06, "loss": 0.4681, "step": 9773 }, { "epoch": 1.675554593658695, "grad_norm": 1.3828125, "learning_rate": 8.076048081113281e-06, "loss": 0.4159, "step": 9774 }, { "epoch": 1.6757277350936046, "grad_norm": 1.4296875, "learning_rate": 8.074258330866961e-06, "loss": 0.4249, "step": 9775 }, { "epoch": 1.6759008765285142, "grad_norm": 1.3359375, "learning_rate": 8.07246864467952e-06, "loss": 0.4435, "step": 9776 }, { "epoch": 1.676074017963424, "grad_norm": 1.4375, "learning_rate": 8.070679022610485e-06, "loss": 0.4561, "step": 9777 }, { "epoch": 1.6762471593983335, "grad_norm": 1.4765625, "learning_rate": 8.068889464719392e-06, "loss": 0.4733, "step": 9778 }, { "epoch": 1.676420300833243, "grad_norm": 1.375, "learning_rate": 8.067099971065759e-06, "loss": 0.5103, "step": 9779 }, { "epoch": 1.6765934422681528, "grad_norm": 1.390625, "learning_rate": 8.065310541709128e-06, "loss": 0.4188, "step": 9780 }, { "epoch": 1.6767665837030625, "grad_norm": 1.515625, "learning_rate": 8.063521176709015e-06, "loss": 0.4605, "step": 9781 }, { "epoch": 1.676939725137972, "grad_norm": 1.4921875, "learning_rate": 8.061731876124944e-06, "loss": 0.4849, "step": 9782 }, { "epoch": 1.6771128665728816, "grad_norm": 1.2890625, "learning_rate": 8.059942640016435e-06, "loss": 0.4183, "step": 9783 }, { "epoch": 1.6772860080077914, "grad_norm": 1.4375, "learning_rate": 8.058153468443006e-06, "loss": 0.4657, "step": 9784 }, { "epoch": 1.6774591494427011, "grad_norm": 1.59375, "learning_rate": 8.056364361464176e-06, "loss": 0.4695, "step": 9785 }, { "epoch": 1.6776322908776107, "grad_norm": 1.3515625, "learning_rate": 8.054575319139452e-06, "loss": 0.4631, "step": 9786 }, { "epoch": 1.6778054323125202, "grad_norm": 1.34375, "learning_rate": 8.05278634152835e-06, "loss": 0.4802, "step": 9787 }, { "epoch": 1.67797857374743, "grad_norm": 1.328125, "learning_rate": 8.050997428690381e-06, "loss": 0.4947, "step": 9788 }, { "epoch": 1.6781517151823395, "grad_norm": 1.4609375, "learning_rate": 8.049208580685049e-06, "loss": 0.4963, "step": 9789 }, { "epoch": 1.678324856617249, "grad_norm": 1.4765625, "learning_rate": 8.047419797571856e-06, "loss": 0.4993, "step": 9790 }, { "epoch": 1.6784979980521588, "grad_norm": 1.375, "learning_rate": 8.045631079410314e-06, "loss": 0.4716, "step": 9791 }, { "epoch": 1.6786711394870686, "grad_norm": 1.4140625, "learning_rate": 8.043842426259921e-06, "loss": 0.4605, "step": 9792 }, { "epoch": 1.6788442809219781, "grad_norm": 1.34375, "learning_rate": 8.04205383818017e-06, "loss": 0.4619, "step": 9793 }, { "epoch": 1.6790174223568877, "grad_norm": 1.4140625, "learning_rate": 8.040265315230564e-06, "loss": 0.468, "step": 9794 }, { "epoch": 1.6791905637917974, "grad_norm": 1.375, "learning_rate": 8.038476857470598e-06, "loss": 0.4597, "step": 9795 }, { "epoch": 1.6793637052267072, "grad_norm": 1.375, "learning_rate": 8.036688464959757e-06, "loss": 0.4369, "step": 9796 }, { "epoch": 1.6795368466616167, "grad_norm": 1.4296875, "learning_rate": 8.034900137757537e-06, "loss": 0.4625, "step": 9797 }, { "epoch": 1.6797099880965263, "grad_norm": 1.46875, "learning_rate": 8.033111875923421e-06, "loss": 0.4376, "step": 9798 }, { "epoch": 1.679883129531436, "grad_norm": 1.4375, "learning_rate": 8.0313236795169e-06, "loss": 0.4598, "step": 9799 }, { "epoch": 1.6800562709663458, "grad_norm": 1.4453125, "learning_rate": 8.029535548597452e-06, "loss": 0.45, "step": 9800 }, { "epoch": 1.6802294124012551, "grad_norm": 1.484375, "learning_rate": 8.027747483224566e-06, "loss": 0.5342, "step": 9801 }, { "epoch": 1.6804025538361649, "grad_norm": 1.359375, "learning_rate": 8.025959483457714e-06, "loss": 0.4476, "step": 9802 }, { "epoch": 1.6805756952710746, "grad_norm": 1.46875, "learning_rate": 8.024171549356375e-06, "loss": 0.4501, "step": 9803 }, { "epoch": 1.6807488367059842, "grad_norm": 1.4140625, "learning_rate": 8.022383680980029e-06, "loss": 0.548, "step": 9804 }, { "epoch": 1.6809219781408937, "grad_norm": 1.5078125, "learning_rate": 8.020595878388139e-06, "loss": 0.5165, "step": 9805 }, { "epoch": 1.6810951195758035, "grad_norm": 1.453125, "learning_rate": 8.01880814164018e-06, "loss": 0.5333, "step": 9806 }, { "epoch": 1.6812682610107132, "grad_norm": 1.71875, "learning_rate": 8.017020470795624e-06, "loss": 0.5117, "step": 9807 }, { "epoch": 1.6814414024456228, "grad_norm": 1.53125, "learning_rate": 8.015232865913932e-06, "loss": 0.4937, "step": 9808 }, { "epoch": 1.6816145438805323, "grad_norm": 1.3828125, "learning_rate": 8.013445327054571e-06, "loss": 0.452, "step": 9809 }, { "epoch": 1.681787685315442, "grad_norm": 1.3984375, "learning_rate": 8.011657854276999e-06, "loss": 0.5048, "step": 9810 }, { "epoch": 1.6819608267503519, "grad_norm": 1.359375, "learning_rate": 8.009870447640676e-06, "loss": 0.4628, "step": 9811 }, { "epoch": 1.6821339681852612, "grad_norm": 1.3359375, "learning_rate": 8.008083107205061e-06, "loss": 0.4489, "step": 9812 }, { "epoch": 1.682307109620171, "grad_norm": 1.53125, "learning_rate": 8.006295833029612e-06, "loss": 0.4671, "step": 9813 }, { "epoch": 1.6824802510550807, "grad_norm": 1.3046875, "learning_rate": 8.004508625173774e-06, "loss": 0.4726, "step": 9814 }, { "epoch": 1.6826533924899902, "grad_norm": 1.3359375, "learning_rate": 8.002721483697005e-06, "loss": 0.4255, "step": 9815 }, { "epoch": 1.6828265339248998, "grad_norm": 1.3515625, "learning_rate": 8.000934408658751e-06, "loss": 0.4156, "step": 9816 }, { "epoch": 1.6829996753598095, "grad_norm": 1.6171875, "learning_rate": 7.999147400118457e-06, "loss": 0.4442, "step": 9817 }, { "epoch": 1.6831728167947193, "grad_norm": 1.4609375, "learning_rate": 7.997360458135569e-06, "loss": 0.479, "step": 9818 }, { "epoch": 1.6833459582296288, "grad_norm": 1.4375, "learning_rate": 7.995573582769526e-06, "loss": 0.4552, "step": 9819 }, { "epoch": 1.6835190996645384, "grad_norm": 1.3203125, "learning_rate": 7.993786774079768e-06, "loss": 0.4609, "step": 9820 }, { "epoch": 1.6836922410994482, "grad_norm": 1.34375, "learning_rate": 7.992000032125736e-06, "loss": 0.4459, "step": 9821 }, { "epoch": 1.683865382534358, "grad_norm": 1.390625, "learning_rate": 7.990213356966856e-06, "loss": 0.5155, "step": 9822 }, { "epoch": 1.6840385239692672, "grad_norm": 1.46875, "learning_rate": 7.988426748662575e-06, "loss": 0.5054, "step": 9823 }, { "epoch": 1.684211665404177, "grad_norm": 1.3203125, "learning_rate": 7.986640207272312e-06, "loss": 0.476, "step": 9824 }, { "epoch": 1.6843848068390868, "grad_norm": 1.4296875, "learning_rate": 7.984853732855501e-06, "loss": 0.4733, "step": 9825 }, { "epoch": 1.6845579482739963, "grad_norm": 1.5234375, "learning_rate": 7.983067325471568e-06, "loss": 0.4362, "step": 9826 }, { "epoch": 1.6847310897089058, "grad_norm": 1.4609375, "learning_rate": 7.981280985179933e-06, "loss": 0.4826, "step": 9827 }, { "epoch": 1.6849042311438156, "grad_norm": 1.5546875, "learning_rate": 7.979494712040023e-06, "loss": 0.4556, "step": 9828 }, { "epoch": 1.6850773725787254, "grad_norm": 1.6015625, "learning_rate": 7.977708506111253e-06, "loss": 0.4681, "step": 9829 }, { "epoch": 1.685250514013635, "grad_norm": 1.421875, "learning_rate": 7.975922367453041e-06, "loss": 0.523, "step": 9830 }, { "epoch": 1.6854236554485444, "grad_norm": 1.4375, "learning_rate": 7.974136296124808e-06, "loss": 0.4643, "step": 9831 }, { "epoch": 1.6855967968834542, "grad_norm": 1.375, "learning_rate": 7.972350292185958e-06, "loss": 0.506, "step": 9832 }, { "epoch": 1.685769938318364, "grad_norm": 1.4921875, "learning_rate": 7.970564355695905e-06, "loss": 0.4618, "step": 9833 }, { "epoch": 1.6859430797532735, "grad_norm": 1.3984375, "learning_rate": 7.96877848671406e-06, "loss": 0.494, "step": 9834 }, { "epoch": 1.686116221188183, "grad_norm": 1.3515625, "learning_rate": 7.966992685299827e-06, "loss": 0.4647, "step": 9835 }, { "epoch": 1.6862893626230928, "grad_norm": 1.4453125, "learning_rate": 7.965206951512609e-06, "loss": 0.4721, "step": 9836 }, { "epoch": 1.6864625040580024, "grad_norm": 1.4140625, "learning_rate": 7.963421285411812e-06, "loss": 0.4713, "step": 9837 }, { "epoch": 1.686635645492912, "grad_norm": 1.4765625, "learning_rate": 7.961635687056827e-06, "loss": 0.4914, "step": 9838 }, { "epoch": 1.6868087869278217, "grad_norm": 1.515625, "learning_rate": 7.959850156507058e-06, "loss": 0.451, "step": 9839 }, { "epoch": 1.6869819283627314, "grad_norm": 1.5234375, "learning_rate": 7.9580646938219e-06, "loss": 0.5055, "step": 9840 }, { "epoch": 1.687155069797641, "grad_norm": 1.4140625, "learning_rate": 7.956279299060742e-06, "loss": 0.462, "step": 9841 }, { "epoch": 1.6873282112325505, "grad_norm": 1.453125, "learning_rate": 7.954493972282975e-06, "loss": 0.4858, "step": 9842 }, { "epoch": 1.6875013526674603, "grad_norm": 1.359375, "learning_rate": 7.952708713547984e-06, "loss": 0.4733, "step": 9843 }, { "epoch": 1.68767449410237, "grad_norm": 1.390625, "learning_rate": 7.95092352291516e-06, "loss": 0.4948, "step": 9844 }, { "epoch": 1.6878476355372796, "grad_norm": 1.3203125, "learning_rate": 7.949138400443886e-06, "loss": 0.4444, "step": 9845 }, { "epoch": 1.688020776972189, "grad_norm": 1.4375, "learning_rate": 7.947353346193542e-06, "loss": 0.5331, "step": 9846 }, { "epoch": 1.6881939184070989, "grad_norm": 1.4765625, "learning_rate": 7.94556836022351e-06, "loss": 0.5124, "step": 9847 }, { "epoch": 1.6883670598420084, "grad_norm": 1.4140625, "learning_rate": 7.94378344259316e-06, "loss": 0.4945, "step": 9848 }, { "epoch": 1.688540201276918, "grad_norm": 1.3828125, "learning_rate": 7.941998593361874e-06, "loss": 0.4217, "step": 9849 }, { "epoch": 1.6887133427118277, "grad_norm": 1.390625, "learning_rate": 7.940213812589018e-06, "loss": 0.4258, "step": 9850 }, { "epoch": 1.6888864841467375, "grad_norm": 1.4453125, "learning_rate": 7.938429100333964e-06, "loss": 0.4775, "step": 9851 }, { "epoch": 1.689059625581647, "grad_norm": 1.390625, "learning_rate": 7.936644456656082e-06, "loss": 0.5076, "step": 9852 }, { "epoch": 1.6892327670165566, "grad_norm": 1.4296875, "learning_rate": 7.934859881614733e-06, "loss": 0.4601, "step": 9853 }, { "epoch": 1.6894059084514663, "grad_norm": 1.390625, "learning_rate": 7.93307537526928e-06, "loss": 0.471, "step": 9854 }, { "epoch": 1.689579049886376, "grad_norm": 1.609375, "learning_rate": 7.931290937679089e-06, "loss": 0.5291, "step": 9855 }, { "epoch": 1.6897521913212856, "grad_norm": 1.5703125, "learning_rate": 7.929506568903517e-06, "loss": 0.5077, "step": 9856 }, { "epoch": 1.6899253327561952, "grad_norm": 1.4453125, "learning_rate": 7.927722269001917e-06, "loss": 0.4643, "step": 9857 }, { "epoch": 1.690098474191105, "grad_norm": 1.3984375, "learning_rate": 7.925938038033643e-06, "loss": 0.4772, "step": 9858 }, { "epoch": 1.6902716156260145, "grad_norm": 1.2890625, "learning_rate": 7.92415387605805e-06, "loss": 0.4071, "step": 9859 }, { "epoch": 1.690444757060924, "grad_norm": 1.4296875, "learning_rate": 7.922369783134485e-06, "loss": 0.4683, "step": 9860 }, { "epoch": 1.6906178984958338, "grad_norm": 1.390625, "learning_rate": 7.920585759322297e-06, "loss": 0.4333, "step": 9861 }, { "epoch": 1.6907910399307435, "grad_norm": 1.4609375, "learning_rate": 7.918801804680826e-06, "loss": 0.4586, "step": 9862 }, { "epoch": 1.690964181365653, "grad_norm": 1.4765625, "learning_rate": 7.91701791926942e-06, "loss": 0.4508, "step": 9863 }, { "epoch": 1.6911373228005626, "grad_norm": 1.40625, "learning_rate": 7.915234103147417e-06, "loss": 0.4529, "step": 9864 }, { "epoch": 1.6913104642354724, "grad_norm": 1.3984375, "learning_rate": 7.91345035637415e-06, "loss": 0.4594, "step": 9865 }, { "epoch": 1.6914836056703821, "grad_norm": 1.296875, "learning_rate": 7.91166667900896e-06, "loss": 0.4315, "step": 9866 }, { "epoch": 1.6916567471052917, "grad_norm": 1.34375, "learning_rate": 7.909883071111181e-06, "loss": 0.4591, "step": 9867 }, { "epoch": 1.6918298885402012, "grad_norm": 1.375, "learning_rate": 7.908099532740143e-06, "loss": 0.4659, "step": 9868 }, { "epoch": 1.692003029975111, "grad_norm": 1.390625, "learning_rate": 7.906316063955173e-06, "loss": 0.4444, "step": 9869 }, { "epoch": 1.6921761714100205, "grad_norm": 1.53125, "learning_rate": 7.904532664815596e-06, "loss": 0.5406, "step": 9870 }, { "epoch": 1.69234931284493, "grad_norm": 1.3828125, "learning_rate": 7.90274933538074e-06, "loss": 0.4808, "step": 9871 }, { "epoch": 1.6925224542798398, "grad_norm": 1.375, "learning_rate": 7.900966075709922e-06, "loss": 0.4599, "step": 9872 }, { "epoch": 1.6926955957147496, "grad_norm": 1.3359375, "learning_rate": 7.899182885862467e-06, "loss": 0.4879, "step": 9873 }, { "epoch": 1.6928687371496591, "grad_norm": 1.3515625, "learning_rate": 7.897399765897685e-06, "loss": 0.5598, "step": 9874 }, { "epoch": 1.6930418785845687, "grad_norm": 1.328125, "learning_rate": 7.895616715874893e-06, "loss": 0.407, "step": 9875 }, { "epoch": 1.6932150200194784, "grad_norm": 1.4453125, "learning_rate": 7.893833735853404e-06, "loss": 0.4402, "step": 9876 }, { "epoch": 1.6933881614543882, "grad_norm": 1.34375, "learning_rate": 7.892050825892531e-06, "loss": 0.44, "step": 9877 }, { "epoch": 1.6935613028892977, "grad_norm": 1.4375, "learning_rate": 7.890267986051579e-06, "loss": 0.4458, "step": 9878 }, { "epoch": 1.6937344443242073, "grad_norm": 1.484375, "learning_rate": 7.888485216389852e-06, "loss": 0.4899, "step": 9879 }, { "epoch": 1.693907585759117, "grad_norm": 1.359375, "learning_rate": 7.886702516966656e-06, "loss": 0.4676, "step": 9880 }, { "epoch": 1.6940807271940266, "grad_norm": 1.625, "learning_rate": 7.884919887841289e-06, "loss": 0.4696, "step": 9881 }, { "epoch": 1.6942538686289361, "grad_norm": 1.3828125, "learning_rate": 7.883137329073049e-06, "loss": 0.4739, "step": 9882 }, { "epoch": 1.6944270100638459, "grad_norm": 1.4453125, "learning_rate": 7.881354840721237e-06, "loss": 0.4366, "step": 9883 }, { "epoch": 1.6946001514987556, "grad_norm": 1.375, "learning_rate": 7.879572422845139e-06, "loss": 0.4355, "step": 9884 }, { "epoch": 1.6947732929336652, "grad_norm": 1.3671875, "learning_rate": 7.877790075504053e-06, "loss": 0.406, "step": 9885 }, { "epoch": 1.6949464343685747, "grad_norm": 1.453125, "learning_rate": 7.876007798757263e-06, "loss": 0.4822, "step": 9886 }, { "epoch": 1.6951195758034845, "grad_norm": 1.5703125, "learning_rate": 7.874225592664056e-06, "loss": 0.4906, "step": 9887 }, { "epoch": 1.6952927172383943, "grad_norm": 1.296875, "learning_rate": 7.872443457283719e-06, "loss": 0.4445, "step": 9888 }, { "epoch": 1.6954658586733038, "grad_norm": 1.4296875, "learning_rate": 7.870661392675535e-06, "loss": 0.5072, "step": 9889 }, { "epoch": 1.6956390001082133, "grad_norm": 1.34375, "learning_rate": 7.86887939889878e-06, "loss": 0.4213, "step": 9890 }, { "epoch": 1.695812141543123, "grad_norm": 1.3203125, "learning_rate": 7.867097476012733e-06, "loss": 0.4493, "step": 9891 }, { "epoch": 1.6959852829780326, "grad_norm": 1.4921875, "learning_rate": 7.865315624076667e-06, "loss": 0.45, "step": 9892 }, { "epoch": 1.6961584244129422, "grad_norm": 1.375, "learning_rate": 7.863533843149857e-06, "loss": 0.4404, "step": 9893 }, { "epoch": 1.696331565847852, "grad_norm": 1.4140625, "learning_rate": 7.861752133291571e-06, "loss": 0.4342, "step": 9894 }, { "epoch": 1.6965047072827617, "grad_norm": 1.40625, "learning_rate": 7.85997049456108e-06, "loss": 0.3876, "step": 9895 }, { "epoch": 1.6966778487176712, "grad_norm": 1.46875, "learning_rate": 7.858188927017644e-06, "loss": 0.4374, "step": 9896 }, { "epoch": 1.6968509901525808, "grad_norm": 1.3203125, "learning_rate": 7.85640743072053e-06, "loss": 0.4652, "step": 9897 }, { "epoch": 1.6970241315874905, "grad_norm": 1.3203125, "learning_rate": 7.854626005728992e-06, "loss": 0.436, "step": 9898 }, { "epoch": 1.6971972730224003, "grad_norm": 1.4921875, "learning_rate": 7.852844652102301e-06, "loss": 0.3941, "step": 9899 }, { "epoch": 1.6973704144573099, "grad_norm": 1.3515625, "learning_rate": 7.851063369899704e-06, "loss": 0.4252, "step": 9900 }, { "epoch": 1.6975435558922194, "grad_norm": 1.3671875, "learning_rate": 7.849282159180456e-06, "loss": 0.4528, "step": 9901 }, { "epoch": 1.6977166973271292, "grad_norm": 1.5078125, "learning_rate": 7.847501020003806e-06, "loss": 0.489, "step": 9902 }, { "epoch": 1.6978898387620387, "grad_norm": 1.4609375, "learning_rate": 7.845719952429007e-06, "loss": 0.4582, "step": 9903 }, { "epoch": 1.6980629801969482, "grad_norm": 1.5390625, "learning_rate": 7.843938956515305e-06, "loss": 0.4933, "step": 9904 }, { "epoch": 1.698236121631858, "grad_norm": 1.4453125, "learning_rate": 7.84215803232194e-06, "loss": 0.494, "step": 9905 }, { "epoch": 1.6984092630667678, "grad_norm": 1.34375, "learning_rate": 7.840377179908156e-06, "loss": 0.4931, "step": 9906 }, { "epoch": 1.6985824045016773, "grad_norm": 1.3828125, "learning_rate": 7.838596399333194e-06, "loss": 0.4453, "step": 9907 }, { "epoch": 1.6987555459365868, "grad_norm": 1.359375, "learning_rate": 7.836815690656288e-06, "loss": 0.5075, "step": 9908 }, { "epoch": 1.6989286873714966, "grad_norm": 1.3359375, "learning_rate": 7.835035053936669e-06, "loss": 0.4904, "step": 9909 }, { "epoch": 1.6991018288064064, "grad_norm": 1.4375, "learning_rate": 7.833254489233577e-06, "loss": 0.4655, "step": 9910 }, { "epoch": 1.699274970241316, "grad_norm": 1.5390625, "learning_rate": 7.83147399660624e-06, "loss": 0.4798, "step": 9911 }, { "epoch": 1.6994481116762254, "grad_norm": 1.5390625, "learning_rate": 7.829693576113882e-06, "loss": 0.4917, "step": 9912 }, { "epoch": 1.6996212531111352, "grad_norm": 1.4296875, "learning_rate": 7.827913227815729e-06, "loss": 0.4134, "step": 9913 }, { "epoch": 1.6997943945460448, "grad_norm": 1.421875, "learning_rate": 7.826132951771003e-06, "loss": 0.47, "step": 9914 }, { "epoch": 1.6999675359809543, "grad_norm": 1.328125, "learning_rate": 7.824352748038924e-06, "loss": 0.4027, "step": 9915 }, { "epoch": 1.700140677415864, "grad_norm": 1.5078125, "learning_rate": 7.822572616678712e-06, "loss": 0.4902, "step": 9916 }, { "epoch": 1.7003138188507738, "grad_norm": 1.3671875, "learning_rate": 7.820792557749579e-06, "loss": 0.4593, "step": 9917 }, { "epoch": 1.7004869602856834, "grad_norm": 1.4140625, "learning_rate": 7.819012571310738e-06, "loss": 0.4611, "step": 9918 }, { "epoch": 1.700660101720593, "grad_norm": 1.3984375, "learning_rate": 7.817232657421401e-06, "loss": 0.4453, "step": 9919 }, { "epoch": 1.7008332431555027, "grad_norm": 1.46875, "learning_rate": 7.815452816140778e-06, "loss": 0.4466, "step": 9920 }, { "epoch": 1.7010063845904124, "grad_norm": 1.3515625, "learning_rate": 7.81367304752807e-06, "loss": 0.4373, "step": 9921 }, { "epoch": 1.701179526025322, "grad_norm": 1.4609375, "learning_rate": 7.811893351642484e-06, "loss": 0.4513, "step": 9922 }, { "epoch": 1.7013526674602315, "grad_norm": 1.5703125, "learning_rate": 7.810113728543218e-06, "loss": 0.4882, "step": 9923 }, { "epoch": 1.7015258088951413, "grad_norm": 1.78125, "learning_rate": 7.808334178289472e-06, "loss": 0.4669, "step": 9924 }, { "epoch": 1.7016989503300508, "grad_norm": 1.2890625, "learning_rate": 7.806554700940442e-06, "loss": 0.4207, "step": 9925 }, { "epoch": 1.7018720917649603, "grad_norm": 1.3828125, "learning_rate": 7.804775296555319e-06, "loss": 0.4761, "step": 9926 }, { "epoch": 1.7020452331998701, "grad_norm": 1.40625, "learning_rate": 7.802995965193296e-06, "loss": 0.4416, "step": 9927 }, { "epoch": 1.7022183746347799, "grad_norm": 1.421875, "learning_rate": 7.801216706913563e-06, "loss": 0.4528, "step": 9928 }, { "epoch": 1.7023915160696894, "grad_norm": 1.375, "learning_rate": 7.799437521775302e-06, "loss": 0.5627, "step": 9929 }, { "epoch": 1.702564657504599, "grad_norm": 1.34375, "learning_rate": 7.797658409837698e-06, "loss": 0.4205, "step": 9930 }, { "epoch": 1.7027377989395087, "grad_norm": 1.484375, "learning_rate": 7.795879371159936e-06, "loss": 0.5307, "step": 9931 }, { "epoch": 1.7029109403744185, "grad_norm": 1.3671875, "learning_rate": 7.794100405801193e-06, "loss": 0.4441, "step": 9932 }, { "epoch": 1.703084081809328, "grad_norm": 1.46875, "learning_rate": 7.792321513820644e-06, "loss": 0.4178, "step": 9933 }, { "epoch": 1.7032572232442376, "grad_norm": 1.4296875, "learning_rate": 7.790542695277465e-06, "loss": 0.4532, "step": 9934 }, { "epoch": 1.7034303646791473, "grad_norm": 1.390625, "learning_rate": 7.788763950230826e-06, "loss": 0.4547, "step": 9935 }, { "epoch": 1.703603506114057, "grad_norm": 1.40625, "learning_rate": 7.786985278739895e-06, "loss": 0.4228, "step": 9936 }, { "epoch": 1.7037766475489664, "grad_norm": 1.375, "learning_rate": 7.785206680863844e-06, "loss": 0.4424, "step": 9937 }, { "epoch": 1.7039497889838762, "grad_norm": 1.4921875, "learning_rate": 7.78342815666183e-06, "loss": 0.4531, "step": 9938 }, { "epoch": 1.704122930418786, "grad_norm": 1.3984375, "learning_rate": 7.78164970619302e-06, "loss": 0.5173, "step": 9939 }, { "epoch": 1.7042960718536955, "grad_norm": 1.4375, "learning_rate": 7.779871329516571e-06, "loss": 0.4721, "step": 9940 }, { "epoch": 1.704469213288605, "grad_norm": 1.4921875, "learning_rate": 7.778093026691636e-06, "loss": 0.4891, "step": 9941 }, { "epoch": 1.7046423547235148, "grad_norm": 1.3984375, "learning_rate": 7.776314797777379e-06, "loss": 0.4419, "step": 9942 }, { "epoch": 1.7048154961584245, "grad_norm": 1.453125, "learning_rate": 7.774536642832943e-06, "loss": 0.4547, "step": 9943 }, { "epoch": 1.704988637593334, "grad_norm": 1.40625, "learning_rate": 7.772758561917484e-06, "loss": 0.5174, "step": 9944 }, { "epoch": 1.7051617790282436, "grad_norm": 1.4453125, "learning_rate": 7.770980555090144e-06, "loss": 0.4908, "step": 9945 }, { "epoch": 1.7053349204631534, "grad_norm": 1.625, "learning_rate": 7.769202622410071e-06, "loss": 0.5447, "step": 9946 }, { "epoch": 1.7055080618980631, "grad_norm": 1.3984375, "learning_rate": 7.767424763936408e-06, "loss": 0.4115, "step": 9947 }, { "epoch": 1.7056812033329725, "grad_norm": 1.34375, "learning_rate": 7.76564697972829e-06, "loss": 0.4145, "step": 9948 }, { "epoch": 1.7058543447678822, "grad_norm": 1.375, "learning_rate": 7.763869269844857e-06, "loss": 0.4402, "step": 9949 }, { "epoch": 1.706027486202792, "grad_norm": 1.46875, "learning_rate": 7.762091634345243e-06, "loss": 0.4588, "step": 9950 }, { "epoch": 1.7062006276377015, "grad_norm": 1.4140625, "learning_rate": 7.760314073288578e-06, "loss": 0.4698, "step": 9951 }, { "epoch": 1.706373769072611, "grad_norm": 1.4609375, "learning_rate": 7.758536586733994e-06, "loss": 0.5275, "step": 9952 }, { "epoch": 1.7065469105075208, "grad_norm": 1.375, "learning_rate": 7.756759174740623e-06, "loss": 0.4885, "step": 9953 }, { "epoch": 1.7067200519424306, "grad_norm": 1.375, "learning_rate": 7.75498183736758e-06, "loss": 0.4487, "step": 9954 }, { "epoch": 1.7068931933773401, "grad_norm": 1.3359375, "learning_rate": 7.753204574673996e-06, "loss": 0.4703, "step": 9955 }, { "epoch": 1.7070663348122497, "grad_norm": 1.5703125, "learning_rate": 7.751427386718986e-06, "loss": 0.5455, "step": 9956 }, { "epoch": 1.7072394762471594, "grad_norm": 1.46875, "learning_rate": 7.749650273561668e-06, "loss": 0.4331, "step": 9957 }, { "epoch": 1.7074126176820692, "grad_norm": 1.375, "learning_rate": 7.747873235261157e-06, "loss": 0.4132, "step": 9958 }, { "epoch": 1.7075857591169785, "grad_norm": 1.5, "learning_rate": 7.746096271876569e-06, "loss": 0.4962, "step": 9959 }, { "epoch": 1.7077589005518883, "grad_norm": 1.390625, "learning_rate": 7.744319383467006e-06, "loss": 0.5071, "step": 9960 }, { "epoch": 1.707932041986798, "grad_norm": 1.3984375, "learning_rate": 7.742542570091584e-06, "loss": 0.4996, "step": 9961 }, { "epoch": 1.7081051834217076, "grad_norm": 1.4140625, "learning_rate": 7.7407658318094e-06, "loss": 0.4519, "step": 9962 }, { "epoch": 1.7082783248566171, "grad_norm": 1.4375, "learning_rate": 7.738989168679559e-06, "loss": 0.4717, "step": 9963 }, { "epoch": 1.7084514662915269, "grad_norm": 1.3828125, "learning_rate": 7.737212580761161e-06, "loss": 0.4801, "step": 9964 }, { "epoch": 1.7086246077264367, "grad_norm": 1.5390625, "learning_rate": 7.73543606811331e-06, "loss": 0.5456, "step": 9965 }, { "epoch": 1.7087977491613462, "grad_norm": 1.3359375, "learning_rate": 7.733659630795092e-06, "loss": 0.4013, "step": 9966 }, { "epoch": 1.7089708905962557, "grad_norm": 1.6328125, "learning_rate": 7.731883268865601e-06, "loss": 0.4906, "step": 9967 }, { "epoch": 1.7091440320311655, "grad_norm": 1.3515625, "learning_rate": 7.730106982383932e-06, "loss": 0.4156, "step": 9968 }, { "epoch": 1.7093171734660753, "grad_norm": 1.4140625, "learning_rate": 7.728330771409165e-06, "loss": 0.4111, "step": 9969 }, { "epoch": 1.7094903149009846, "grad_norm": 1.5390625, "learning_rate": 7.726554636000388e-06, "loss": 0.5014, "step": 9970 }, { "epoch": 1.7096634563358943, "grad_norm": 1.296875, "learning_rate": 7.724778576216687e-06, "loss": 0.4241, "step": 9971 }, { "epoch": 1.709836597770804, "grad_norm": 1.4453125, "learning_rate": 7.723002592117136e-06, "loss": 0.4511, "step": 9972 }, { "epoch": 1.7100097392057136, "grad_norm": 1.40625, "learning_rate": 7.721226683760812e-06, "loss": 0.4676, "step": 9973 }, { "epoch": 1.7101828806406232, "grad_norm": 1.53125, "learning_rate": 7.719450851206796e-06, "loss": 0.4579, "step": 9974 }, { "epoch": 1.710356022075533, "grad_norm": 1.3828125, "learning_rate": 7.717675094514156e-06, "loss": 0.494, "step": 9975 }, { "epoch": 1.7105291635104427, "grad_norm": 1.5234375, "learning_rate": 7.715899413741963e-06, "loss": 0.4773, "step": 9976 }, { "epoch": 1.7107023049453522, "grad_norm": 1.3046875, "learning_rate": 7.714123808949285e-06, "loss": 0.456, "step": 9977 }, { "epoch": 1.7108754463802618, "grad_norm": 1.3828125, "learning_rate": 7.71234828019518e-06, "loss": 0.4362, "step": 9978 }, { "epoch": 1.7110485878151716, "grad_norm": 1.421875, "learning_rate": 7.71057282753872e-06, "loss": 0.4832, "step": 9979 }, { "epoch": 1.7112217292500813, "grad_norm": 1.609375, "learning_rate": 7.70879745103896e-06, "loss": 0.504, "step": 9980 }, { "epoch": 1.7113948706849909, "grad_norm": 1.3125, "learning_rate": 7.707022150754952e-06, "loss": 0.4228, "step": 9981 }, { "epoch": 1.7115680121199004, "grad_norm": 1.375, "learning_rate": 7.705246926745759e-06, "loss": 0.4086, "step": 9982 }, { "epoch": 1.7117411535548102, "grad_norm": 1.359375, "learning_rate": 7.703471779070431e-06, "loss": 0.4644, "step": 9983 }, { "epoch": 1.7119142949897197, "grad_norm": 1.3828125, "learning_rate": 7.70169670778801e-06, "loss": 0.4786, "step": 9984 }, { "epoch": 1.7120874364246292, "grad_norm": 1.3671875, "learning_rate": 7.69992171295755e-06, "loss": 0.468, "step": 9985 }, { "epoch": 1.712260577859539, "grad_norm": 1.4453125, "learning_rate": 7.698146794638097e-06, "loss": 0.458, "step": 9986 }, { "epoch": 1.7124337192944488, "grad_norm": 1.421875, "learning_rate": 7.69637195288869e-06, "loss": 0.399, "step": 9987 }, { "epoch": 1.7126068607293583, "grad_norm": 1.4921875, "learning_rate": 7.694597187768367e-06, "loss": 0.5292, "step": 9988 }, { "epoch": 1.7127800021642678, "grad_norm": 1.3984375, "learning_rate": 7.692822499336169e-06, "loss": 0.4538, "step": 9989 }, { "epoch": 1.7129531435991776, "grad_norm": 1.5234375, "learning_rate": 7.691047887651121e-06, "loss": 0.5251, "step": 9990 }, { "epoch": 1.7131262850340874, "grad_norm": 1.3828125, "learning_rate": 7.689273352772264e-06, "loss": 0.4362, "step": 9991 }, { "epoch": 1.713299426468997, "grad_norm": 1.3828125, "learning_rate": 7.687498894758626e-06, "loss": 0.4346, "step": 9992 }, { "epoch": 1.7134725679039065, "grad_norm": 1.3125, "learning_rate": 7.685724513669227e-06, "loss": 0.4606, "step": 9993 }, { "epoch": 1.7136457093388162, "grad_norm": 1.5234375, "learning_rate": 7.683950209563096e-06, "loss": 0.5197, "step": 9994 }, { "epoch": 1.7138188507737258, "grad_norm": 1.4921875, "learning_rate": 7.68217598249925e-06, "loss": 0.4255, "step": 9995 }, { "epoch": 1.7139919922086353, "grad_norm": 1.4140625, "learning_rate": 7.680401832536718e-06, "loss": 0.4556, "step": 9996 }, { "epoch": 1.714165133643545, "grad_norm": 1.328125, "learning_rate": 7.678627759734506e-06, "loss": 0.462, "step": 9997 }, { "epoch": 1.7143382750784548, "grad_norm": 1.390625, "learning_rate": 7.676853764151631e-06, "loss": 0.5257, "step": 9998 }, { "epoch": 1.7145114165133644, "grad_norm": 1.4296875, "learning_rate": 7.675079845847108e-06, "loss": 0.5299, "step": 9999 }, { "epoch": 1.714684557948274, "grad_norm": 1.4765625, "learning_rate": 7.673306004879938e-06, "loss": 0.5419, "step": 10000 }, { "epoch": 1.7148576993831837, "grad_norm": 1.4296875, "learning_rate": 7.671532241309135e-06, "loss": 0.473, "step": 10001 }, { "epoch": 1.7150308408180934, "grad_norm": 1.390625, "learning_rate": 7.669758555193694e-06, "loss": 0.5207, "step": 10002 }, { "epoch": 1.715203982253003, "grad_norm": 1.4765625, "learning_rate": 7.66798494659262e-06, "loss": 0.4868, "step": 10003 }, { "epoch": 1.7153771236879125, "grad_norm": 1.484375, "learning_rate": 7.666211415564914e-06, "loss": 0.4521, "step": 10004 }, { "epoch": 1.7155502651228223, "grad_norm": 1.484375, "learning_rate": 7.664437962169568e-06, "loss": 0.4135, "step": 10005 }, { "epoch": 1.7157234065577318, "grad_norm": 1.4609375, "learning_rate": 7.662664586465574e-06, "loss": 0.4843, "step": 10006 }, { "epoch": 1.7158965479926414, "grad_norm": 1.359375, "learning_rate": 7.660891288511924e-06, "loss": 0.4069, "step": 10007 }, { "epoch": 1.7160696894275511, "grad_norm": 1.5078125, "learning_rate": 7.65911806836761e-06, "loss": 0.4784, "step": 10008 }, { "epoch": 1.7162428308624609, "grad_norm": 1.3984375, "learning_rate": 7.657344926091613e-06, "loss": 0.4557, "step": 10009 }, { "epoch": 1.7164159722973704, "grad_norm": 1.3515625, "learning_rate": 7.655571861742915e-06, "loss": 0.4535, "step": 10010 }, { "epoch": 1.71658911373228, "grad_norm": 1.6328125, "learning_rate": 7.6537988753805e-06, "loss": 0.4885, "step": 10011 }, { "epoch": 1.7167622551671897, "grad_norm": 1.5, "learning_rate": 7.652025967063342e-06, "loss": 0.4288, "step": 10012 }, { "epoch": 1.7169353966020995, "grad_norm": 1.4921875, "learning_rate": 7.650253136850419e-06, "loss": 0.4948, "step": 10013 }, { "epoch": 1.717108538037009, "grad_norm": 1.5703125, "learning_rate": 7.648480384800698e-06, "loss": 0.5245, "step": 10014 }, { "epoch": 1.7172816794719186, "grad_norm": 1.4375, "learning_rate": 7.646707710973154e-06, "loss": 0.4477, "step": 10015 }, { "epoch": 1.7174548209068283, "grad_norm": 1.328125, "learning_rate": 7.644935115426753e-06, "loss": 0.4226, "step": 10016 }, { "epoch": 1.7176279623417379, "grad_norm": 1.609375, "learning_rate": 7.643162598220456e-06, "loss": 0.5218, "step": 10017 }, { "epoch": 1.7178011037766474, "grad_norm": 1.3046875, "learning_rate": 7.641390159413229e-06, "loss": 0.4213, "step": 10018 }, { "epoch": 1.7179742452115572, "grad_norm": 1.4375, "learning_rate": 7.63961779906403e-06, "loss": 0.4923, "step": 10019 }, { "epoch": 1.718147386646467, "grad_norm": 1.3984375, "learning_rate": 7.63784551723182e-06, "loss": 0.4142, "step": 10020 }, { "epoch": 1.7183205280813765, "grad_norm": 1.5234375, "learning_rate": 7.636073313975544e-06, "loss": 0.4743, "step": 10021 }, { "epoch": 1.718493669516286, "grad_norm": 1.3515625, "learning_rate": 7.63430118935416e-06, "loss": 0.4687, "step": 10022 }, { "epoch": 1.7186668109511958, "grad_norm": 1.3203125, "learning_rate": 7.632529143426616e-06, "loss": 0.4267, "step": 10023 }, { "epoch": 1.7188399523861055, "grad_norm": 1.3671875, "learning_rate": 7.630757176251858e-06, "loss": 0.4861, "step": 10024 }, { "epoch": 1.719013093821015, "grad_norm": 1.4375, "learning_rate": 7.6289852878888304e-06, "loss": 0.4663, "step": 10025 }, { "epoch": 1.7191862352559246, "grad_norm": 1.40625, "learning_rate": 7.627213478396471e-06, "loss": 0.457, "step": 10026 }, { "epoch": 1.7193593766908344, "grad_norm": 1.3359375, "learning_rate": 7.625441747833719e-06, "loss": 0.4683, "step": 10027 }, { "epoch": 1.719532518125744, "grad_norm": 1.4140625, "learning_rate": 7.623670096259511e-06, "loss": 0.4093, "step": 10028 }, { "epoch": 1.7197056595606535, "grad_norm": 1.4296875, "learning_rate": 7.621898523732785e-06, "loss": 0.5077, "step": 10029 }, { "epoch": 1.7198788009955632, "grad_norm": 1.453125, "learning_rate": 7.620127030312464e-06, "loss": 0.502, "step": 10030 }, { "epoch": 1.720051942430473, "grad_norm": 1.390625, "learning_rate": 7.61835561605748e-06, "loss": 0.4037, "step": 10031 }, { "epoch": 1.7202250838653825, "grad_norm": 1.609375, "learning_rate": 7.616584281026759e-06, "loss": 0.5217, "step": 10032 }, { "epoch": 1.720398225300292, "grad_norm": 1.4140625, "learning_rate": 7.614813025279218e-06, "loss": 0.5478, "step": 10033 }, { "epoch": 1.7205713667352018, "grad_norm": 1.546875, "learning_rate": 7.613041848873783e-06, "loss": 0.5695, "step": 10034 }, { "epoch": 1.7207445081701116, "grad_norm": 1.4765625, "learning_rate": 7.611270751869371e-06, "loss": 0.4872, "step": 10035 }, { "epoch": 1.7209176496050211, "grad_norm": 1.4765625, "learning_rate": 7.609499734324892e-06, "loss": 0.4767, "step": 10036 }, { "epoch": 1.7210907910399307, "grad_norm": 1.4375, "learning_rate": 7.607728796299264e-06, "loss": 0.4559, "step": 10037 }, { "epoch": 1.7212639324748404, "grad_norm": 1.484375, "learning_rate": 7.605957937851386e-06, "loss": 0.528, "step": 10038 }, { "epoch": 1.72143707390975, "grad_norm": 1.359375, "learning_rate": 7.6041871590401795e-06, "loss": 0.3965, "step": 10039 }, { "epoch": 1.7216102153446595, "grad_norm": 1.34375, "learning_rate": 7.602416459924537e-06, "loss": 0.4809, "step": 10040 }, { "epoch": 1.7217833567795693, "grad_norm": 1.46875, "learning_rate": 7.600645840563368e-06, "loss": 0.4996, "step": 10041 }, { "epoch": 1.721956498214479, "grad_norm": 1.3984375, "learning_rate": 7.598875301015566e-06, "loss": 0.4705, "step": 10042 }, { "epoch": 1.7221296396493886, "grad_norm": 1.4453125, "learning_rate": 7.5971048413400276e-06, "loss": 0.4254, "step": 10043 }, { "epoch": 1.7223027810842981, "grad_norm": 1.4140625, "learning_rate": 7.595334461595649e-06, "loss": 0.4181, "step": 10044 }, { "epoch": 1.722475922519208, "grad_norm": 1.4296875, "learning_rate": 7.593564161841318e-06, "loss": 0.4479, "step": 10045 }, { "epoch": 1.7226490639541177, "grad_norm": 1.375, "learning_rate": 7.5917939421359235e-06, "loss": 0.4695, "step": 10046 }, { "epoch": 1.7228222053890272, "grad_norm": 1.40625, "learning_rate": 7.5900238025383546e-06, "loss": 0.4926, "step": 10047 }, { "epoch": 1.7229953468239367, "grad_norm": 1.40625, "learning_rate": 7.588253743107488e-06, "loss": 0.4549, "step": 10048 }, { "epoch": 1.7231684882588465, "grad_norm": 1.6328125, "learning_rate": 7.586483763902206e-06, "loss": 0.5867, "step": 10049 }, { "epoch": 1.723341629693756, "grad_norm": 1.4453125, "learning_rate": 7.5847138649813875e-06, "loss": 0.4711, "step": 10050 }, { "epoch": 1.7235147711286656, "grad_norm": 1.3984375, "learning_rate": 7.582944046403911e-06, "loss": 0.4226, "step": 10051 }, { "epoch": 1.7236879125635753, "grad_norm": 1.5, "learning_rate": 7.581174308228643e-06, "loss": 0.5412, "step": 10052 }, { "epoch": 1.723861053998485, "grad_norm": 1.421875, "learning_rate": 7.579404650514455e-06, "loss": 0.5123, "step": 10053 }, { "epoch": 1.7240341954333946, "grad_norm": 1.3984375, "learning_rate": 7.577635073320212e-06, "loss": 0.486, "step": 10054 }, { "epoch": 1.7242073368683042, "grad_norm": 1.40625, "learning_rate": 7.57586557670478e-06, "loss": 0.4827, "step": 10055 }, { "epoch": 1.724380478303214, "grad_norm": 1.5625, "learning_rate": 7.574096160727022e-06, "loss": 0.4829, "step": 10056 }, { "epoch": 1.7245536197381237, "grad_norm": 1.4453125, "learning_rate": 7.572326825445792e-06, "loss": 0.4432, "step": 10057 }, { "epoch": 1.7247267611730333, "grad_norm": 1.40625, "learning_rate": 7.57055757091995e-06, "loss": 0.4691, "step": 10058 }, { "epoch": 1.7248999026079428, "grad_norm": 1.4375, "learning_rate": 7.56878839720835e-06, "loss": 0.4775, "step": 10059 }, { "epoch": 1.7250730440428526, "grad_norm": 1.3984375, "learning_rate": 7.567019304369837e-06, "loss": 0.4834, "step": 10060 }, { "epoch": 1.725246185477762, "grad_norm": 1.453125, "learning_rate": 7.565250292463265e-06, "loss": 0.4454, "step": 10061 }, { "epoch": 1.7254193269126716, "grad_norm": 1.4375, "learning_rate": 7.563481361547478e-06, "loss": 0.4394, "step": 10062 }, { "epoch": 1.7255924683475814, "grad_norm": 1.3671875, "learning_rate": 7.5617125116813195e-06, "loss": 0.4638, "step": 10063 }, { "epoch": 1.7257656097824912, "grad_norm": 1.390625, "learning_rate": 7.559943742923626e-06, "loss": 0.4508, "step": 10064 }, { "epoch": 1.7259387512174007, "grad_norm": 1.375, "learning_rate": 7.558175055333239e-06, "loss": 0.4732, "step": 10065 }, { "epoch": 1.7261118926523102, "grad_norm": 1.34375, "learning_rate": 7.5564064489689895e-06, "loss": 0.4189, "step": 10066 }, { "epoch": 1.72628503408722, "grad_norm": 1.484375, "learning_rate": 7.554637923889709e-06, "loss": 0.5071, "step": 10067 }, { "epoch": 1.7264581755221298, "grad_norm": 1.3359375, "learning_rate": 7.552869480154232e-06, "loss": 0.4469, "step": 10068 }, { "epoch": 1.7266313169570393, "grad_norm": 1.359375, "learning_rate": 7.551101117821378e-06, "loss": 0.4646, "step": 10069 }, { "epoch": 1.7268044583919488, "grad_norm": 1.453125, "learning_rate": 7.549332836949975e-06, "loss": 0.4929, "step": 10070 }, { "epoch": 1.7269775998268586, "grad_norm": 1.4296875, "learning_rate": 7.5475646375988395e-06, "loss": 0.4437, "step": 10071 }, { "epoch": 1.7271507412617682, "grad_norm": 1.3203125, "learning_rate": 7.5457965198267995e-06, "loss": 0.4259, "step": 10072 }, { "epoch": 1.7273238826966777, "grad_norm": 1.515625, "learning_rate": 7.5440284836926605e-06, "loss": 0.4639, "step": 10073 }, { "epoch": 1.7274970241315875, "grad_norm": 1.546875, "learning_rate": 7.54226052925524e-06, "loss": 0.5427, "step": 10074 }, { "epoch": 1.7276701655664972, "grad_norm": 1.375, "learning_rate": 7.5404926565733495e-06, "loss": 0.4443, "step": 10075 }, { "epoch": 1.7278433070014068, "grad_norm": 1.4140625, "learning_rate": 7.538724865705792e-06, "loss": 0.4846, "step": 10076 }, { "epoch": 1.7280164484363163, "grad_norm": 1.4296875, "learning_rate": 7.536957156711377e-06, "loss": 0.474, "step": 10077 }, { "epoch": 1.728189589871226, "grad_norm": 1.3203125, "learning_rate": 7.535189529648902e-06, "loss": 0.4353, "step": 10078 }, { "epoch": 1.7283627313061358, "grad_norm": 1.296875, "learning_rate": 7.533421984577168e-06, "loss": 0.4253, "step": 10079 }, { "epoch": 1.7285358727410454, "grad_norm": 1.40625, "learning_rate": 7.531654521554974e-06, "loss": 0.5161, "step": 10080 }, { "epoch": 1.728709014175955, "grad_norm": 1.484375, "learning_rate": 7.52988714064111e-06, "loss": 0.4849, "step": 10081 }, { "epoch": 1.7288821556108647, "grad_norm": 1.515625, "learning_rate": 7.528119841894365e-06, "loss": 0.4748, "step": 10082 }, { "epoch": 1.7290552970457744, "grad_norm": 1.3828125, "learning_rate": 7.526352625373536e-06, "loss": 0.5034, "step": 10083 }, { "epoch": 1.7292284384806837, "grad_norm": 1.3359375, "learning_rate": 7.524585491137404e-06, "loss": 0.4756, "step": 10084 }, { "epoch": 1.7294015799155935, "grad_norm": 1.46875, "learning_rate": 7.52281843924475e-06, "loss": 0.4798, "step": 10085 }, { "epoch": 1.7295747213505033, "grad_norm": 1.3046875, "learning_rate": 7.521051469754356e-06, "loss": 0.5088, "step": 10086 }, { "epoch": 1.7297478627854128, "grad_norm": 2.328125, "learning_rate": 7.519284582725e-06, "loss": 0.5229, "step": 10087 }, { "epoch": 1.7299210042203224, "grad_norm": 1.4453125, "learning_rate": 7.517517778215455e-06, "loss": 0.5448, "step": 10088 }, { "epoch": 1.7300941456552321, "grad_norm": 1.328125, "learning_rate": 7.515751056284496e-06, "loss": 0.47, "step": 10089 }, { "epoch": 1.7302672870901419, "grad_norm": 1.5078125, "learning_rate": 7.513984416990887e-06, "loss": 0.5281, "step": 10090 }, { "epoch": 1.7304404285250514, "grad_norm": 1.3359375, "learning_rate": 7.512217860393397e-06, "loss": 0.403, "step": 10091 }, { "epoch": 1.730613569959961, "grad_norm": 1.484375, "learning_rate": 7.5104513865507925e-06, "loss": 0.4755, "step": 10092 }, { "epoch": 1.7307867113948707, "grad_norm": 1.359375, "learning_rate": 7.508684995521828e-06, "loss": 0.4634, "step": 10093 }, { "epoch": 1.7309598528297805, "grad_norm": 1.328125, "learning_rate": 7.506918687365267e-06, "loss": 0.4329, "step": 10094 }, { "epoch": 1.7311329942646898, "grad_norm": 1.5, "learning_rate": 7.505152462139863e-06, "loss": 0.4367, "step": 10095 }, { "epoch": 1.7313061356995996, "grad_norm": 1.359375, "learning_rate": 7.503386319904372e-06, "loss": 0.4621, "step": 10096 }, { "epoch": 1.7314792771345093, "grad_norm": 1.3671875, "learning_rate": 7.501620260717538e-06, "loss": 0.4098, "step": 10097 }, { "epoch": 1.7316524185694189, "grad_norm": 1.375, "learning_rate": 7.499854284638112e-06, "loss": 0.4571, "step": 10098 }, { "epoch": 1.7318255600043284, "grad_norm": 1.4140625, "learning_rate": 7.4980883917248385e-06, "loss": 0.4919, "step": 10099 }, { "epoch": 1.7319987014392382, "grad_norm": 1.3671875, "learning_rate": 7.496322582036457e-06, "loss": 0.4564, "step": 10100 }, { "epoch": 1.732171842874148, "grad_norm": 1.359375, "learning_rate": 7.494556855631708e-06, "loss": 0.47, "step": 10101 }, { "epoch": 1.7323449843090575, "grad_norm": 1.4453125, "learning_rate": 7.492791212569326e-06, "loss": 0.4711, "step": 10102 }, { "epoch": 1.732518125743967, "grad_norm": 1.4921875, "learning_rate": 7.491025652908042e-06, "loss": 0.4832, "step": 10103 }, { "epoch": 1.7326912671788768, "grad_norm": 1.3984375, "learning_rate": 7.489260176706592e-06, "loss": 0.4759, "step": 10104 }, { "epoch": 1.7328644086137865, "grad_norm": 1.4140625, "learning_rate": 7.487494784023703e-06, "loss": 0.4573, "step": 10105 }, { "epoch": 1.7330375500486959, "grad_norm": 1.3046875, "learning_rate": 7.485729474918097e-06, "loss": 0.4148, "step": 10106 }, { "epoch": 1.7332106914836056, "grad_norm": 1.453125, "learning_rate": 7.483964249448496e-06, "loss": 0.4957, "step": 10107 }, { "epoch": 1.7333838329185154, "grad_norm": 1.3125, "learning_rate": 7.4821991076736245e-06, "loss": 0.4568, "step": 10108 }, { "epoch": 1.733556974353425, "grad_norm": 1.375, "learning_rate": 7.480434049652192e-06, "loss": 0.4038, "step": 10109 }, { "epoch": 1.7337301157883345, "grad_norm": 1.546875, "learning_rate": 7.478669075442917e-06, "loss": 0.5085, "step": 10110 }, { "epoch": 1.7339032572232442, "grad_norm": 1.390625, "learning_rate": 7.47690418510451e-06, "loss": 0.4262, "step": 10111 }, { "epoch": 1.734076398658154, "grad_norm": 1.40625, "learning_rate": 7.475139378695677e-06, "loss": 0.4902, "step": 10112 }, { "epoch": 1.7342495400930635, "grad_norm": 1.3828125, "learning_rate": 7.473374656275127e-06, "loss": 0.4889, "step": 10113 }, { "epoch": 1.734422681527973, "grad_norm": 1.4296875, "learning_rate": 7.471610017901554e-06, "loss": 0.496, "step": 10114 }, { "epoch": 1.7345958229628828, "grad_norm": 1.4375, "learning_rate": 7.469845463633672e-06, "loss": 0.4779, "step": 10115 }, { "epoch": 1.7347689643977926, "grad_norm": 1.421875, "learning_rate": 7.468080993530166e-06, "loss": 0.4444, "step": 10116 }, { "epoch": 1.7349421058327021, "grad_norm": 1.3671875, "learning_rate": 7.4663166076497376e-06, "loss": 0.4531, "step": 10117 }, { "epoch": 1.7351152472676117, "grad_norm": 1.4609375, "learning_rate": 7.464552306051074e-06, "loss": 0.5138, "step": 10118 }, { "epoch": 1.7352883887025214, "grad_norm": 1.4375, "learning_rate": 7.462788088792865e-06, "loss": 0.4883, "step": 10119 }, { "epoch": 1.735461530137431, "grad_norm": 1.34375, "learning_rate": 7.461023955933798e-06, "loss": 0.4575, "step": 10120 }, { "epoch": 1.7356346715723405, "grad_norm": 1.421875, "learning_rate": 7.4592599075325536e-06, "loss": 0.4562, "step": 10121 }, { "epoch": 1.7358078130072503, "grad_norm": 1.3984375, "learning_rate": 7.457495943647812e-06, "loss": 0.4551, "step": 10122 }, { "epoch": 1.73598095444216, "grad_norm": 1.328125, "learning_rate": 7.455732064338255e-06, "loss": 0.4188, "step": 10123 }, { "epoch": 1.7361540958770696, "grad_norm": 1.4609375, "learning_rate": 7.45396826966255e-06, "loss": 0.5008, "step": 10124 }, { "epoch": 1.7363272373119791, "grad_norm": 1.421875, "learning_rate": 7.452204559679373e-06, "loss": 0.5249, "step": 10125 }, { "epoch": 1.736500378746889, "grad_norm": 1.3671875, "learning_rate": 7.450440934447394e-06, "loss": 0.4269, "step": 10126 }, { "epoch": 1.7366735201817987, "grad_norm": 1.4765625, "learning_rate": 7.448677394025279e-06, "loss": 0.4939, "step": 10127 }, { "epoch": 1.7368466616167082, "grad_norm": 1.4140625, "learning_rate": 7.446913938471688e-06, "loss": 0.4128, "step": 10128 }, { "epoch": 1.7370198030516177, "grad_norm": 1.4921875, "learning_rate": 7.445150567845287e-06, "loss": 0.43, "step": 10129 }, { "epoch": 1.7371929444865275, "grad_norm": 1.421875, "learning_rate": 7.443387282204727e-06, "loss": 0.4394, "step": 10130 }, { "epoch": 1.737366085921437, "grad_norm": 1.359375, "learning_rate": 7.441624081608667e-06, "loss": 0.4794, "step": 10131 }, { "epoch": 1.7375392273563466, "grad_norm": 1.3984375, "learning_rate": 7.4398609661157595e-06, "loss": 0.4346, "step": 10132 }, { "epoch": 1.7377123687912563, "grad_norm": 1.359375, "learning_rate": 7.4380979357846505e-06, "loss": 0.4472, "step": 10133 }, { "epoch": 1.737885510226166, "grad_norm": 1.4296875, "learning_rate": 7.436334990673989e-06, "loss": 0.5605, "step": 10134 }, { "epoch": 1.7380586516610756, "grad_norm": 1.359375, "learning_rate": 7.4345721308424194e-06, "loss": 0.4943, "step": 10135 }, { "epoch": 1.7382317930959852, "grad_norm": 1.3125, "learning_rate": 7.432809356348576e-06, "loss": 0.4309, "step": 10136 }, { "epoch": 1.738404934530895, "grad_norm": 1.5234375, "learning_rate": 7.431046667251105e-06, "loss": 0.5139, "step": 10137 }, { "epoch": 1.7385780759658047, "grad_norm": 1.453125, "learning_rate": 7.429284063608637e-06, "loss": 0.4363, "step": 10138 }, { "epoch": 1.7387512174007143, "grad_norm": 1.375, "learning_rate": 7.427521545479807e-06, "loss": 0.4359, "step": 10139 }, { "epoch": 1.7389243588356238, "grad_norm": 1.3984375, "learning_rate": 7.4257591129232405e-06, "loss": 0.3982, "step": 10140 }, { "epoch": 1.7390975002705336, "grad_norm": 1.3515625, "learning_rate": 7.423996765997567e-06, "loss": 0.4947, "step": 10141 }, { "epoch": 1.739270641705443, "grad_norm": 1.4765625, "learning_rate": 7.422234504761408e-06, "loss": 0.4258, "step": 10142 }, { "epoch": 1.7394437831403526, "grad_norm": 1.34375, "learning_rate": 7.420472329273385e-06, "loss": 0.44, "step": 10143 }, { "epoch": 1.7396169245752624, "grad_norm": 1.5234375, "learning_rate": 7.418710239592118e-06, "loss": 0.5451, "step": 10144 }, { "epoch": 1.7397900660101722, "grad_norm": 1.40625, "learning_rate": 7.416948235776218e-06, "loss": 0.4718, "step": 10145 }, { "epoch": 1.7399632074450817, "grad_norm": 1.46875, "learning_rate": 7.4151863178843e-06, "loss": 0.5327, "step": 10146 }, { "epoch": 1.7401363488799912, "grad_norm": 1.46875, "learning_rate": 7.4134244859749695e-06, "loss": 0.4657, "step": 10147 }, { "epoch": 1.740309490314901, "grad_norm": 1.34375, "learning_rate": 7.411662740106842e-06, "loss": 0.4815, "step": 10148 }, { "epoch": 1.7404826317498108, "grad_norm": 1.4375, "learning_rate": 7.409901080338512e-06, "loss": 0.489, "step": 10149 }, { "epoch": 1.7406557731847203, "grad_norm": 1.4609375, "learning_rate": 7.408139506728584e-06, "loss": 0.4667, "step": 10150 }, { "epoch": 1.7408289146196299, "grad_norm": 1.5234375, "learning_rate": 7.406378019335658e-06, "loss": 0.4663, "step": 10151 }, { "epoch": 1.7410020560545396, "grad_norm": 1.4296875, "learning_rate": 7.404616618218325e-06, "loss": 0.4505, "step": 10152 }, { "epoch": 1.7411751974894492, "grad_norm": 1.3515625, "learning_rate": 7.402855303435179e-06, "loss": 0.4551, "step": 10153 }, { "epoch": 1.7413483389243587, "grad_norm": 1.3515625, "learning_rate": 7.401094075044809e-06, "loss": 0.4493, "step": 10154 }, { "epoch": 1.7415214803592685, "grad_norm": 1.40625, "learning_rate": 7.399332933105801e-06, "loss": 0.5386, "step": 10155 }, { "epoch": 1.7416946217941782, "grad_norm": 1.4375, "learning_rate": 7.397571877676742e-06, "loss": 0.4955, "step": 10156 }, { "epoch": 1.7418677632290878, "grad_norm": 1.375, "learning_rate": 7.395810908816203e-06, "loss": 0.4696, "step": 10157 }, { "epoch": 1.7420409046639973, "grad_norm": 1.46875, "learning_rate": 7.394050026582772e-06, "loss": 0.4905, "step": 10158 }, { "epoch": 1.742214046098907, "grad_norm": 1.4921875, "learning_rate": 7.3922892310350205e-06, "loss": 0.5587, "step": 10159 }, { "epoch": 1.7423871875338168, "grad_norm": 1.40625, "learning_rate": 7.390528522231522e-06, "loss": 0.4401, "step": 10160 }, { "epoch": 1.7425603289687264, "grad_norm": 1.3515625, "learning_rate": 7.388767900230842e-06, "loss": 0.4507, "step": 10161 }, { "epoch": 1.742733470403636, "grad_norm": 1.3671875, "learning_rate": 7.38700736509155e-06, "loss": 0.4699, "step": 10162 }, { "epoch": 1.7429066118385457, "grad_norm": 1.359375, "learning_rate": 7.385246916872209e-06, "loss": 0.4959, "step": 10163 }, { "epoch": 1.7430797532734552, "grad_norm": 1.421875, "learning_rate": 7.383486555631376e-06, "loss": 0.4348, "step": 10164 }, { "epoch": 1.7432528947083648, "grad_norm": 1.4375, "learning_rate": 7.381726281427614e-06, "loss": 0.4534, "step": 10165 }, { "epoch": 1.7434260361432745, "grad_norm": 1.46875, "learning_rate": 7.379966094319474e-06, "loss": 0.5682, "step": 10166 }, { "epoch": 1.7435991775781843, "grad_norm": 1.4296875, "learning_rate": 7.3782059943655086e-06, "loss": 0.4729, "step": 10167 }, { "epoch": 1.7437723190130938, "grad_norm": 1.328125, "learning_rate": 7.3764459816242625e-06, "loss": 0.4543, "step": 10168 }, { "epoch": 1.7439454604480034, "grad_norm": 1.5390625, "learning_rate": 7.374686056154292e-06, "loss": 0.4865, "step": 10169 }, { "epoch": 1.7441186018829131, "grad_norm": 1.5234375, "learning_rate": 7.372926218014131e-06, "loss": 0.4636, "step": 10170 }, { "epoch": 1.7442917433178229, "grad_norm": 1.4140625, "learning_rate": 7.371166467262323e-06, "loss": 0.4294, "step": 10171 }, { "epoch": 1.7444648847527324, "grad_norm": 1.265625, "learning_rate": 7.3694068039574085e-06, "loss": 0.4451, "step": 10172 }, { "epoch": 1.744638026187642, "grad_norm": 1.5, "learning_rate": 7.367647228157915e-06, "loss": 0.4663, "step": 10173 }, { "epoch": 1.7448111676225517, "grad_norm": 1.3359375, "learning_rate": 7.365887739922377e-06, "loss": 0.4218, "step": 10174 }, { "epoch": 1.7449843090574613, "grad_norm": 1.4609375, "learning_rate": 7.364128339309326e-06, "loss": 0.4661, "step": 10175 }, { "epoch": 1.7451574504923708, "grad_norm": 1.4453125, "learning_rate": 7.362369026377283e-06, "loss": 0.5591, "step": 10176 }, { "epoch": 1.7453305919272806, "grad_norm": 1.390625, "learning_rate": 7.360609801184775e-06, "loss": 0.5019, "step": 10177 }, { "epoch": 1.7455037333621903, "grad_norm": 1.3671875, "learning_rate": 7.358850663790316e-06, "loss": 0.4837, "step": 10178 }, { "epoch": 1.7456768747970999, "grad_norm": 1.734375, "learning_rate": 7.357091614252425e-06, "loss": 0.4134, "step": 10179 }, { "epoch": 1.7458500162320094, "grad_norm": 1.28125, "learning_rate": 7.355332652629621e-06, "loss": 0.3851, "step": 10180 }, { "epoch": 1.7460231576669192, "grad_norm": 1.484375, "learning_rate": 7.3535737789804116e-06, "loss": 0.4944, "step": 10181 }, { "epoch": 1.746196299101829, "grad_norm": 1.4140625, "learning_rate": 7.351814993363304e-06, "loss": 0.49, "step": 10182 }, { "epoch": 1.7463694405367385, "grad_norm": 1.4453125, "learning_rate": 7.350056295836803e-06, "loss": 0.4535, "step": 10183 }, { "epoch": 1.746542581971648, "grad_norm": 1.4453125, "learning_rate": 7.348297686459414e-06, "loss": 0.4566, "step": 10184 }, { "epoch": 1.7467157234065578, "grad_norm": 1.40625, "learning_rate": 7.3465391652896314e-06, "loss": 0.48, "step": 10185 }, { "epoch": 1.7468888648414673, "grad_norm": 1.3671875, "learning_rate": 7.344780732385954e-06, "loss": 0.4401, "step": 10186 }, { "epoch": 1.7470620062763769, "grad_norm": 1.4453125, "learning_rate": 7.343022387806879e-06, "loss": 0.4862, "step": 10187 }, { "epoch": 1.7472351477112866, "grad_norm": 1.328125, "learning_rate": 7.34126413161089e-06, "loss": 0.4633, "step": 10188 }, { "epoch": 1.7474082891461964, "grad_norm": 1.359375, "learning_rate": 7.339505963856481e-06, "loss": 0.5784, "step": 10189 }, { "epoch": 1.747581430581106, "grad_norm": 1.328125, "learning_rate": 7.3377478846021275e-06, "loss": 0.4264, "step": 10190 }, { "epoch": 1.7477545720160155, "grad_norm": 1.421875, "learning_rate": 7.335989893906324e-06, "loss": 0.4725, "step": 10191 }, { "epoch": 1.7479277134509252, "grad_norm": 1.3671875, "learning_rate": 7.334231991827538e-06, "loss": 0.4435, "step": 10192 }, { "epoch": 1.748100854885835, "grad_norm": 1.3125, "learning_rate": 7.332474178424254e-06, "loss": 0.4213, "step": 10193 }, { "epoch": 1.7482739963207445, "grad_norm": 1.4296875, "learning_rate": 7.330716453754938e-06, "loss": 0.5188, "step": 10194 }, { "epoch": 1.748447137755654, "grad_norm": 1.3125, "learning_rate": 7.328958817878061e-06, "loss": 0.4303, "step": 10195 }, { "epoch": 1.7486202791905638, "grad_norm": 1.4765625, "learning_rate": 7.327201270852095e-06, "loss": 0.4872, "step": 10196 }, { "epoch": 1.7487934206254734, "grad_norm": 1.4296875, "learning_rate": 7.325443812735498e-06, "loss": 0.4833, "step": 10197 }, { "epoch": 1.748966562060383, "grad_norm": 1.3515625, "learning_rate": 7.323686443586733e-06, "loss": 0.4985, "step": 10198 }, { "epoch": 1.7491397034952927, "grad_norm": 1.359375, "learning_rate": 7.32192916346426e-06, "loss": 0.5168, "step": 10199 }, { "epoch": 1.7493128449302024, "grad_norm": 1.3203125, "learning_rate": 7.3201719724265304e-06, "loss": 0.4449, "step": 10200 }, { "epoch": 1.749485986365112, "grad_norm": 1.390625, "learning_rate": 7.318414870531996e-06, "loss": 0.4925, "step": 10201 }, { "epoch": 1.7496591278000215, "grad_norm": 1.296875, "learning_rate": 7.316657857839111e-06, "loss": 0.4705, "step": 10202 }, { "epoch": 1.7498322692349313, "grad_norm": 1.3984375, "learning_rate": 7.314900934406322e-06, "loss": 0.5, "step": 10203 }, { "epoch": 1.750005410669841, "grad_norm": 1.4609375, "learning_rate": 7.313144100292064e-06, "loss": 0.5072, "step": 10204 }, { "epoch": 1.7501785521047506, "grad_norm": 1.53125, "learning_rate": 7.311387355554788e-06, "loss": 0.4776, "step": 10205 }, { "epoch": 1.7503516935396601, "grad_norm": 1.2890625, "learning_rate": 7.309630700252921e-06, "loss": 0.4616, "step": 10206 }, { "epoch": 1.75052483497457, "grad_norm": 1.375, "learning_rate": 7.3078741344449036e-06, "loss": 0.444, "step": 10207 }, { "epoch": 1.7506979764094794, "grad_norm": 1.5625, "learning_rate": 7.306117658189166e-06, "loss": 0.5342, "step": 10208 }, { "epoch": 1.750871117844389, "grad_norm": 1.3984375, "learning_rate": 7.304361271544134e-06, "loss": 0.4407, "step": 10209 }, { "epoch": 1.7510442592792987, "grad_norm": 1.453125, "learning_rate": 7.302604974568236e-06, "loss": 0.5273, "step": 10210 }, { "epoch": 1.7512174007142085, "grad_norm": 1.359375, "learning_rate": 7.300848767319895e-06, "loss": 0.4568, "step": 10211 }, { "epoch": 1.751390542149118, "grad_norm": 1.453125, "learning_rate": 7.299092649857524e-06, "loss": 0.4404, "step": 10212 }, { "epoch": 1.7515636835840276, "grad_norm": 1.4375, "learning_rate": 7.297336622239546e-06, "loss": 0.4179, "step": 10213 }, { "epoch": 1.7517368250189373, "grad_norm": 1.5078125, "learning_rate": 7.2955806845243734e-06, "loss": 0.4999, "step": 10214 }, { "epoch": 1.7519099664538471, "grad_norm": 1.53125, "learning_rate": 7.293824836770418e-06, "loss": 0.4272, "step": 10215 }, { "epoch": 1.7520831078887567, "grad_norm": 1.359375, "learning_rate": 7.292069079036081e-06, "loss": 0.4363, "step": 10216 }, { "epoch": 1.7522562493236662, "grad_norm": 1.5078125, "learning_rate": 7.290313411379776e-06, "loss": 0.4757, "step": 10217 }, { "epoch": 1.752429390758576, "grad_norm": 1.6015625, "learning_rate": 7.288557833859895e-06, "loss": 0.5084, "step": 10218 }, { "epoch": 1.7526025321934857, "grad_norm": 1.578125, "learning_rate": 7.286802346534841e-06, "loss": 0.5266, "step": 10219 }, { "epoch": 1.752775673628395, "grad_norm": 1.71875, "learning_rate": 7.2850469494630124e-06, "loss": 0.4187, "step": 10220 }, { "epoch": 1.7529488150633048, "grad_norm": 1.3671875, "learning_rate": 7.2832916427027944e-06, "loss": 0.5165, "step": 10221 }, { "epoch": 1.7531219564982146, "grad_norm": 1.3046875, "learning_rate": 7.28153642631258e-06, "loss": 0.4359, "step": 10222 }, { "epoch": 1.753295097933124, "grad_norm": 1.4765625, "learning_rate": 7.279781300350758e-06, "loss": 0.4319, "step": 10223 }, { "epoch": 1.7534682393680336, "grad_norm": 1.3046875, "learning_rate": 7.278026264875712e-06, "loss": 0.4309, "step": 10224 }, { "epoch": 1.7536413808029434, "grad_norm": 1.5390625, "learning_rate": 7.276271319945818e-06, "loss": 0.545, "step": 10225 }, { "epoch": 1.7538145222378532, "grad_norm": 1.421875, "learning_rate": 7.274516465619454e-06, "loss": 0.5513, "step": 10226 }, { "epoch": 1.7539876636727627, "grad_norm": 1.3828125, "learning_rate": 7.272761701955e-06, "loss": 0.468, "step": 10227 }, { "epoch": 1.7541608051076722, "grad_norm": 1.3359375, "learning_rate": 7.271007029010821e-06, "loss": 0.3743, "step": 10228 }, { "epoch": 1.754333946542582, "grad_norm": 1.390625, "learning_rate": 7.269252446845292e-06, "loss": 0.4367, "step": 10229 }, { "epoch": 1.7545070879774918, "grad_norm": 1.4140625, "learning_rate": 7.2674979555167695e-06, "loss": 0.4942, "step": 10230 }, { "epoch": 1.754680229412401, "grad_norm": 1.3828125, "learning_rate": 7.265743555083622e-06, "loss": 0.4463, "step": 10231 }, { "epoch": 1.7548533708473109, "grad_norm": 1.4296875, "learning_rate": 7.263989245604208e-06, "loss": 0.5007, "step": 10232 }, { "epoch": 1.7550265122822206, "grad_norm": 1.5703125, "learning_rate": 7.2622350271368795e-06, "loss": 0.5249, "step": 10233 }, { "epoch": 1.7551996537171302, "grad_norm": 1.578125, "learning_rate": 7.2604808997399956e-06, "loss": 0.4315, "step": 10234 }, { "epoch": 1.7553727951520397, "grad_norm": 1.4296875, "learning_rate": 7.2587268634719034e-06, "loss": 0.4805, "step": 10235 }, { "epoch": 1.7555459365869495, "grad_norm": 1.421875, "learning_rate": 7.256972918390954e-06, "loss": 0.4695, "step": 10236 }, { "epoch": 1.7557190780218592, "grad_norm": 1.3125, "learning_rate": 7.255219064555487e-06, "loss": 0.496, "step": 10237 }, { "epoch": 1.7558922194567688, "grad_norm": 1.3046875, "learning_rate": 7.253465302023843e-06, "loss": 0.4297, "step": 10238 }, { "epoch": 1.7560653608916783, "grad_norm": 1.453125, "learning_rate": 7.251711630854365e-06, "loss": 0.4885, "step": 10239 }, { "epoch": 1.756238502326588, "grad_norm": 1.296875, "learning_rate": 7.249958051105383e-06, "loss": 0.485, "step": 10240 }, { "epoch": 1.7564116437614978, "grad_norm": 1.421875, "learning_rate": 7.2482045628352335e-06, "loss": 0.498, "step": 10241 }, { "epoch": 1.7565847851964072, "grad_norm": 1.359375, "learning_rate": 7.24645116610224e-06, "loss": 0.4562, "step": 10242 }, { "epoch": 1.756757926631317, "grad_norm": 1.4765625, "learning_rate": 7.244697860964732e-06, "loss": 0.4379, "step": 10243 }, { "epoch": 1.7569310680662267, "grad_norm": 1.4296875, "learning_rate": 7.24294464748103e-06, "loss": 0.4703, "step": 10244 }, { "epoch": 1.7571042095011362, "grad_norm": 1.421875, "learning_rate": 7.2411915257094586e-06, "loss": 0.4623, "step": 10245 }, { "epoch": 1.7572773509360458, "grad_norm": 1.3515625, "learning_rate": 7.2394384957083305e-06, "loss": 0.4556, "step": 10246 }, { "epoch": 1.7574504923709555, "grad_norm": 1.34375, "learning_rate": 7.23768555753596e-06, "loss": 0.4769, "step": 10247 }, { "epoch": 1.7576236338058653, "grad_norm": 1.4140625, "learning_rate": 7.23593271125066e-06, "loss": 0.4545, "step": 10248 }, { "epoch": 1.7577967752407748, "grad_norm": 1.4609375, "learning_rate": 7.234179956910735e-06, "loss": 0.466, "step": 10249 }, { "epoch": 1.7579699166756844, "grad_norm": 1.3671875, "learning_rate": 7.232427294574491e-06, "loss": 0.4422, "step": 10250 }, { "epoch": 1.7581430581105941, "grad_norm": 1.5, "learning_rate": 7.23067472430023e-06, "loss": 0.4658, "step": 10251 }, { "epoch": 1.7583161995455039, "grad_norm": 1.4140625, "learning_rate": 7.228922246146249e-06, "loss": 0.4572, "step": 10252 }, { "epoch": 1.7584893409804134, "grad_norm": 1.53125, "learning_rate": 7.227169860170845e-06, "loss": 0.4901, "step": 10253 }, { "epoch": 1.758662482415323, "grad_norm": 1.34375, "learning_rate": 7.225417566432307e-06, "loss": 0.4919, "step": 10254 }, { "epoch": 1.7588356238502327, "grad_norm": 1.515625, "learning_rate": 7.2236653649889255e-06, "loss": 0.4558, "step": 10255 }, { "epoch": 1.7590087652851423, "grad_norm": 1.4140625, "learning_rate": 7.221913255898989e-06, "loss": 0.4076, "step": 10256 }, { "epoch": 1.7591819067200518, "grad_norm": 1.4765625, "learning_rate": 7.220161239220782e-06, "loss": 0.4502, "step": 10257 }, { "epoch": 1.7593550481549616, "grad_norm": 1.4296875, "learning_rate": 7.218409315012579e-06, "loss": 0.4865, "step": 10258 }, { "epoch": 1.7595281895898713, "grad_norm": 1.484375, "learning_rate": 7.216657483332659e-06, "loss": 0.4257, "step": 10259 }, { "epoch": 1.7597013310247809, "grad_norm": 1.359375, "learning_rate": 7.2149057442393e-06, "loss": 0.4109, "step": 10260 }, { "epoch": 1.7598744724596904, "grad_norm": 1.390625, "learning_rate": 7.213154097790767e-06, "loss": 0.4689, "step": 10261 }, { "epoch": 1.7600476138946002, "grad_norm": 1.4921875, "learning_rate": 7.21140254404533e-06, "loss": 0.5218, "step": 10262 }, { "epoch": 1.76022075532951, "grad_norm": 1.4296875, "learning_rate": 7.209651083061255e-06, "loss": 0.5155, "step": 10263 }, { "epoch": 1.7603938967644195, "grad_norm": 1.3828125, "learning_rate": 7.2078997148968e-06, "loss": 0.4967, "step": 10264 }, { "epoch": 1.760567038199329, "grad_norm": 1.4453125, "learning_rate": 7.2061484396102275e-06, "loss": 0.4765, "step": 10265 }, { "epoch": 1.7607401796342388, "grad_norm": 1.3671875, "learning_rate": 7.2043972572597855e-06, "loss": 0.4364, "step": 10266 }, { "epoch": 1.7609133210691483, "grad_norm": 1.4609375, "learning_rate": 7.202646167903737e-06, "loss": 0.4917, "step": 10267 }, { "epoch": 1.7610864625040579, "grad_norm": 1.40625, "learning_rate": 7.200895171600324e-06, "loss": 0.5405, "step": 10268 }, { "epoch": 1.7612596039389676, "grad_norm": 1.4375, "learning_rate": 7.199144268407796e-06, "loss": 0.4984, "step": 10269 }, { "epoch": 1.7614327453738774, "grad_norm": 1.3046875, "learning_rate": 7.197393458384392e-06, "loss": 0.3871, "step": 10270 }, { "epoch": 1.761605886808787, "grad_norm": 1.5, "learning_rate": 7.1956427415883534e-06, "loss": 0.4549, "step": 10271 }, { "epoch": 1.7617790282436965, "grad_norm": 1.4296875, "learning_rate": 7.1938921180779205e-06, "loss": 0.4684, "step": 10272 }, { "epoch": 1.7619521696786062, "grad_norm": 1.4296875, "learning_rate": 7.192141587911323e-06, "loss": 0.424, "step": 10273 }, { "epoch": 1.762125311113516, "grad_norm": 1.4453125, "learning_rate": 7.190391151146791e-06, "loss": 0.4961, "step": 10274 }, { "epoch": 1.7622984525484255, "grad_norm": 1.4296875, "learning_rate": 7.188640807842556e-06, "loss": 0.5317, "step": 10275 }, { "epoch": 1.762471593983335, "grad_norm": 1.46875, "learning_rate": 7.186890558056836e-06, "loss": 0.4959, "step": 10276 }, { "epoch": 1.7626447354182448, "grad_norm": 1.40625, "learning_rate": 7.185140401847859e-06, "loss": 0.504, "step": 10277 }, { "epoch": 1.7628178768531544, "grad_norm": 1.40625, "learning_rate": 7.183390339273838e-06, "loss": 0.4543, "step": 10278 }, { "epoch": 1.762991018288064, "grad_norm": 1.3828125, "learning_rate": 7.181640370392994e-06, "loss": 0.4595, "step": 10279 }, { "epoch": 1.7631641597229737, "grad_norm": 1.4765625, "learning_rate": 7.179890495263532e-06, "loss": 0.5325, "step": 10280 }, { "epoch": 1.7633373011578835, "grad_norm": 1.4140625, "learning_rate": 7.178140713943666e-06, "loss": 0.5061, "step": 10281 }, { "epoch": 1.763510442592793, "grad_norm": 1.3671875, "learning_rate": 7.176391026491597e-06, "loss": 0.4021, "step": 10282 }, { "epoch": 1.7636835840277025, "grad_norm": 1.3046875, "learning_rate": 7.17464143296553e-06, "loss": 0.4893, "step": 10283 }, { "epoch": 1.7638567254626123, "grad_norm": 1.59375, "learning_rate": 7.172891933423667e-06, "loss": 0.4351, "step": 10284 }, { "epoch": 1.764029866897522, "grad_norm": 1.5, "learning_rate": 7.1711425279242e-06, "loss": 0.4573, "step": 10285 }, { "epoch": 1.7642030083324316, "grad_norm": 1.5078125, "learning_rate": 7.1693932165253225e-06, "loss": 0.4934, "step": 10286 }, { "epoch": 1.7643761497673411, "grad_norm": 1.375, "learning_rate": 7.167643999285223e-06, "loss": 0.4758, "step": 10287 }, { "epoch": 1.764549291202251, "grad_norm": 1.53125, "learning_rate": 7.165894876262097e-06, "loss": 0.4822, "step": 10288 }, { "epoch": 1.7647224326371604, "grad_norm": 1.328125, "learning_rate": 7.164145847514119e-06, "loss": 0.4507, "step": 10289 }, { "epoch": 1.76489557407207, "grad_norm": 1.359375, "learning_rate": 7.162396913099474e-06, "loss": 0.5035, "step": 10290 }, { "epoch": 1.7650687155069797, "grad_norm": 1.3828125, "learning_rate": 7.1606480730763404e-06, "loss": 0.4379, "step": 10291 }, { "epoch": 1.7652418569418895, "grad_norm": 1.5, "learning_rate": 7.1588993275028885e-06, "loss": 0.4252, "step": 10292 }, { "epoch": 1.765414998376799, "grad_norm": 1.3203125, "learning_rate": 7.157150676437292e-06, "loss": 0.4586, "step": 10293 }, { "epoch": 1.7655881398117086, "grad_norm": 1.4296875, "learning_rate": 7.155402119937718e-06, "loss": 0.5002, "step": 10294 }, { "epoch": 1.7657612812466184, "grad_norm": 1.421875, "learning_rate": 7.1536536580623315e-06, "loss": 0.4697, "step": 10295 }, { "epoch": 1.7659344226815281, "grad_norm": 1.578125, "learning_rate": 7.151905290869297e-06, "loss": 0.4263, "step": 10296 }, { "epoch": 1.7661075641164377, "grad_norm": 1.375, "learning_rate": 7.150157018416768e-06, "loss": 0.4357, "step": 10297 }, { "epoch": 1.7662807055513472, "grad_norm": 1.515625, "learning_rate": 7.148408840762901e-06, "loss": 0.4788, "step": 10298 }, { "epoch": 1.766453846986257, "grad_norm": 1.3359375, "learning_rate": 7.146660757965852e-06, "loss": 0.4042, "step": 10299 }, { "epoch": 1.7666269884211665, "grad_norm": 1.40625, "learning_rate": 7.144912770083769e-06, "loss": 0.5091, "step": 10300 }, { "epoch": 1.766800129856076, "grad_norm": 1.3828125, "learning_rate": 7.143164877174797e-06, "loss": 0.4139, "step": 10301 }, { "epoch": 1.7669732712909858, "grad_norm": 1.359375, "learning_rate": 7.141417079297077e-06, "loss": 0.4569, "step": 10302 }, { "epoch": 1.7671464127258956, "grad_norm": 1.4375, "learning_rate": 7.1396693765087546e-06, "loss": 0.513, "step": 10303 }, { "epoch": 1.767319554160805, "grad_norm": 1.4140625, "learning_rate": 7.137921768867959e-06, "loss": 0.4808, "step": 10304 }, { "epoch": 1.7674926955957146, "grad_norm": 1.328125, "learning_rate": 7.136174256432828e-06, "loss": 0.4742, "step": 10305 }, { "epoch": 1.7676658370306244, "grad_norm": 1.3671875, "learning_rate": 7.13442683926149e-06, "loss": 0.4424, "step": 10306 }, { "epoch": 1.7678389784655342, "grad_norm": 1.3828125, "learning_rate": 7.132679517412072e-06, "loss": 0.4815, "step": 10307 }, { "epoch": 1.7680121199004437, "grad_norm": 1.34375, "learning_rate": 7.1309322909427e-06, "loss": 0.4591, "step": 10308 }, { "epoch": 1.7681852613353533, "grad_norm": 1.3984375, "learning_rate": 7.129185159911489e-06, "loss": 0.4125, "step": 10309 }, { "epoch": 1.768358402770263, "grad_norm": 1.3515625, "learning_rate": 7.127438124376564e-06, "loss": 0.4785, "step": 10310 }, { "epoch": 1.7685315442051726, "grad_norm": 1.3671875, "learning_rate": 7.125691184396034e-06, "loss": 0.4169, "step": 10311 }, { "epoch": 1.768704685640082, "grad_norm": 1.484375, "learning_rate": 7.123944340028015e-06, "loss": 0.4589, "step": 10312 }, { "epoch": 1.7688778270749919, "grad_norm": 1.375, "learning_rate": 7.122197591330609e-06, "loss": 0.4454, "step": 10313 }, { "epoch": 1.7690509685099016, "grad_norm": 1.3046875, "learning_rate": 7.120450938361925e-06, "loss": 0.4097, "step": 10314 }, { "epoch": 1.7692241099448112, "grad_norm": 1.2890625, "learning_rate": 7.118704381180064e-06, "loss": 0.4111, "step": 10315 }, { "epoch": 1.7693972513797207, "grad_norm": 1.40625, "learning_rate": 7.1169579198431215e-06, "loss": 0.4419, "step": 10316 }, { "epoch": 1.7695703928146305, "grad_norm": 1.578125, "learning_rate": 7.115211554409197e-06, "loss": 0.4692, "step": 10317 }, { "epoch": 1.7697435342495402, "grad_norm": 1.34375, "learning_rate": 7.113465284936378e-06, "loss": 0.4177, "step": 10318 }, { "epoch": 1.7699166756844498, "grad_norm": 1.4375, "learning_rate": 7.111719111482757e-06, "loss": 0.4833, "step": 10319 }, { "epoch": 1.7700898171193593, "grad_norm": 1.25, "learning_rate": 7.109973034106414e-06, "loss": 0.3743, "step": 10320 }, { "epoch": 1.770262958554269, "grad_norm": 1.4765625, "learning_rate": 7.108227052865442e-06, "loss": 0.4767, "step": 10321 }, { "epoch": 1.7704360999891786, "grad_norm": 1.4453125, "learning_rate": 7.1064811678179116e-06, "loss": 0.4628, "step": 10322 }, { "epoch": 1.7706092414240882, "grad_norm": 1.4140625, "learning_rate": 7.1047353790219e-06, "loss": 0.4636, "step": 10323 }, { "epoch": 1.770782382858998, "grad_norm": 1.359375, "learning_rate": 7.102989686535483e-06, "loss": 0.4515, "step": 10324 }, { "epoch": 1.7709555242939077, "grad_norm": 1.4609375, "learning_rate": 7.101244090416728e-06, "loss": 0.4843, "step": 10325 }, { "epoch": 1.7711286657288172, "grad_norm": 1.390625, "learning_rate": 7.0994985907237004e-06, "loss": 0.4498, "step": 10326 }, { "epoch": 1.7713018071637268, "grad_norm": 1.359375, "learning_rate": 7.097753187514466e-06, "loss": 0.467, "step": 10327 }, { "epoch": 1.7714749485986365, "grad_norm": 1.28125, "learning_rate": 7.096007880847083e-06, "loss": 0.4242, "step": 10328 }, { "epoch": 1.7716480900335463, "grad_norm": 1.4296875, "learning_rate": 7.094262670779611e-06, "loss": 0.5793, "step": 10329 }, { "epoch": 1.7718212314684558, "grad_norm": 1.421875, "learning_rate": 7.0925175573701e-06, "loss": 0.483, "step": 10330 }, { "epoch": 1.7719943729033654, "grad_norm": 1.3515625, "learning_rate": 7.090772540676598e-06, "loss": 0.4917, "step": 10331 }, { "epoch": 1.7721675143382751, "grad_norm": 1.4453125, "learning_rate": 7.0890276207571586e-06, "loss": 0.4467, "step": 10332 }, { "epoch": 1.7723406557731847, "grad_norm": 1.3671875, "learning_rate": 7.087282797669825e-06, "loss": 0.4308, "step": 10333 }, { "epoch": 1.7725137972080942, "grad_norm": 1.4921875, "learning_rate": 7.085538071472634e-06, "loss": 0.5408, "step": 10334 }, { "epoch": 1.772686938643004, "grad_norm": 1.4453125, "learning_rate": 7.0837934422236256e-06, "loss": 0.4772, "step": 10335 }, { "epoch": 1.7728600800779137, "grad_norm": 1.3046875, "learning_rate": 7.082048909980834e-06, "loss": 0.4137, "step": 10336 }, { "epoch": 1.7730332215128233, "grad_norm": 1.34375, "learning_rate": 7.080304474802289e-06, "loss": 0.4296, "step": 10337 }, { "epoch": 1.7732063629477328, "grad_norm": 1.578125, "learning_rate": 7.078560136746019e-06, "loss": 0.4803, "step": 10338 }, { "epoch": 1.7733795043826426, "grad_norm": 1.4765625, "learning_rate": 7.0768158958700505e-06, "loss": 0.4812, "step": 10339 }, { "epoch": 1.7735526458175523, "grad_norm": 1.3671875, "learning_rate": 7.075071752232401e-06, "loss": 0.4358, "step": 10340 }, { "epoch": 1.7737257872524619, "grad_norm": 1.46875, "learning_rate": 7.0733277058910885e-06, "loss": 0.5262, "step": 10341 }, { "epoch": 1.7738989286873714, "grad_norm": 1.421875, "learning_rate": 7.071583756904132e-06, "loss": 0.4592, "step": 10342 }, { "epoch": 1.7740720701222812, "grad_norm": 1.46875, "learning_rate": 7.069839905329543e-06, "loss": 0.4405, "step": 10343 }, { "epoch": 1.7742452115571907, "grad_norm": 1.515625, "learning_rate": 7.0680961512253254e-06, "loss": 0.4114, "step": 10344 }, { "epoch": 1.7744183529921003, "grad_norm": 1.3828125, "learning_rate": 7.066352494649491e-06, "loss": 0.4984, "step": 10345 }, { "epoch": 1.77459149442701, "grad_norm": 1.3828125, "learning_rate": 7.0646089356600335e-06, "loss": 0.4818, "step": 10346 }, { "epoch": 1.7747646358619198, "grad_norm": 1.3515625, "learning_rate": 7.062865474314957e-06, "loss": 0.4778, "step": 10347 }, { "epoch": 1.7749377772968293, "grad_norm": 1.40625, "learning_rate": 7.061122110672257e-06, "loss": 0.4513, "step": 10348 }, { "epoch": 1.7751109187317389, "grad_norm": 1.3984375, "learning_rate": 7.059378844789922e-06, "loss": 0.5076, "step": 10349 }, { "epoch": 1.7752840601666486, "grad_norm": 1.5078125, "learning_rate": 7.057635676725945e-06, "loss": 0.4832, "step": 10350 }, { "epoch": 1.7754572016015584, "grad_norm": 1.4140625, "learning_rate": 7.055892606538311e-06, "loss": 0.5271, "step": 10351 }, { "epoch": 1.775630343036468, "grad_norm": 1.3515625, "learning_rate": 7.0541496342849954e-06, "loss": 0.478, "step": 10352 }, { "epoch": 1.7758034844713775, "grad_norm": 1.4765625, "learning_rate": 7.052406760023987e-06, "loss": 0.446, "step": 10353 }, { "epoch": 1.7759766259062872, "grad_norm": 1.34375, "learning_rate": 7.050663983813257e-06, "loss": 0.5024, "step": 10354 }, { "epoch": 1.776149767341197, "grad_norm": 1.4140625, "learning_rate": 7.048921305710781e-06, "loss": 0.4887, "step": 10355 }, { "epoch": 1.7763229087761063, "grad_norm": 1.265625, "learning_rate": 7.047178725774524e-06, "loss": 0.4073, "step": 10356 }, { "epoch": 1.776496050211016, "grad_norm": 1.40625, "learning_rate": 7.045436244062458e-06, "loss": 0.4905, "step": 10357 }, { "epoch": 1.7766691916459258, "grad_norm": 1.3984375, "learning_rate": 7.043693860632538e-06, "loss": 0.532, "step": 10358 }, { "epoch": 1.7768423330808354, "grad_norm": 1.46875, "learning_rate": 7.0419515755427295e-06, "loss": 0.4223, "step": 10359 }, { "epoch": 1.777015474515745, "grad_norm": 1.5, "learning_rate": 7.0402093888509874e-06, "loss": 0.4961, "step": 10360 }, { "epoch": 1.7771886159506547, "grad_norm": 1.4453125, "learning_rate": 7.038467300615263e-06, "loss": 0.4245, "step": 10361 }, { "epoch": 1.7773617573855645, "grad_norm": 1.4453125, "learning_rate": 7.036725310893507e-06, "loss": 0.4133, "step": 10362 }, { "epoch": 1.777534898820474, "grad_norm": 1.296875, "learning_rate": 7.034983419743664e-06, "loss": 0.4438, "step": 10363 }, { "epoch": 1.7777080402553835, "grad_norm": 1.40625, "learning_rate": 7.033241627223684e-06, "loss": 0.4554, "step": 10364 }, { "epoch": 1.7778811816902933, "grad_norm": 1.53125, "learning_rate": 7.031499933391499e-06, "loss": 0.5037, "step": 10365 }, { "epoch": 1.778054323125203, "grad_norm": 1.34375, "learning_rate": 7.029758338305051e-06, "loss": 0.4462, "step": 10366 }, { "epoch": 1.7782274645601124, "grad_norm": 1.4140625, "learning_rate": 7.02801684202227e-06, "loss": 0.4764, "step": 10367 }, { "epoch": 1.7784006059950221, "grad_norm": 1.4609375, "learning_rate": 7.026275444601087e-06, "loss": 0.5056, "step": 10368 }, { "epoch": 1.778573747429932, "grad_norm": 1.4453125, "learning_rate": 7.024534146099431e-06, "loss": 0.5262, "step": 10369 }, { "epoch": 1.7787468888648414, "grad_norm": 1.3515625, "learning_rate": 7.022792946575222e-06, "loss": 0.4697, "step": 10370 }, { "epoch": 1.778920030299751, "grad_norm": 1.3828125, "learning_rate": 7.021051846086381e-06, "loss": 0.4798, "step": 10371 }, { "epoch": 1.7790931717346607, "grad_norm": 1.484375, "learning_rate": 7.019310844690827e-06, "loss": 0.5121, "step": 10372 }, { "epoch": 1.7792663131695705, "grad_norm": 1.421875, "learning_rate": 7.01756994244647e-06, "loss": 0.4968, "step": 10373 }, { "epoch": 1.77943945460448, "grad_norm": 1.390625, "learning_rate": 7.015829139411221e-06, "loss": 0.4156, "step": 10374 }, { "epoch": 1.7796125960393896, "grad_norm": 1.296875, "learning_rate": 7.01408843564299e-06, "loss": 0.4033, "step": 10375 }, { "epoch": 1.7797857374742994, "grad_norm": 1.5078125, "learning_rate": 7.012347831199681e-06, "loss": 0.5145, "step": 10376 }, { "epoch": 1.7799588789092091, "grad_norm": 1.4375, "learning_rate": 7.010607326139191e-06, "loss": 0.5353, "step": 10377 }, { "epoch": 1.7801320203441184, "grad_norm": 1.390625, "learning_rate": 7.008866920519417e-06, "loss": 0.4184, "step": 10378 }, { "epoch": 1.7803051617790282, "grad_norm": 1.421875, "learning_rate": 7.0071266143982575e-06, "loss": 0.5798, "step": 10379 }, { "epoch": 1.780478303213938, "grad_norm": 1.4765625, "learning_rate": 7.005386407833597e-06, "loss": 0.513, "step": 10380 }, { "epoch": 1.7806514446488475, "grad_norm": 1.3671875, "learning_rate": 7.0036463008833274e-06, "loss": 0.4491, "step": 10381 }, { "epoch": 1.780824586083757, "grad_norm": 1.53125, "learning_rate": 7.001906293605329e-06, "loss": 0.4697, "step": 10382 }, { "epoch": 1.7809977275186668, "grad_norm": 1.3984375, "learning_rate": 7.000166386057483e-06, "loss": 0.4187, "step": 10383 }, { "epoch": 1.7811708689535766, "grad_norm": 1.515625, "learning_rate": 6.998426578297669e-06, "loss": 0.4459, "step": 10384 }, { "epoch": 1.781344010388486, "grad_norm": 1.328125, "learning_rate": 6.996686870383755e-06, "loss": 0.425, "step": 10385 }, { "epoch": 1.7815171518233957, "grad_norm": 1.546875, "learning_rate": 6.994947262373619e-06, "loss": 0.5193, "step": 10386 }, { "epoch": 1.7816902932583054, "grad_norm": 1.3828125, "learning_rate": 6.9932077543251245e-06, "loss": 0.4347, "step": 10387 }, { "epoch": 1.7818634346932152, "grad_norm": 1.4453125, "learning_rate": 6.991468346296137e-06, "loss": 0.4924, "step": 10388 }, { "epoch": 1.7820365761281247, "grad_norm": 1.46875, "learning_rate": 6.989729038344515e-06, "loss": 0.4467, "step": 10389 }, { "epoch": 1.7822097175630343, "grad_norm": 1.375, "learning_rate": 6.987989830528116e-06, "loss": 0.4663, "step": 10390 }, { "epoch": 1.782382858997944, "grad_norm": 1.453125, "learning_rate": 6.986250722904797e-06, "loss": 0.4442, "step": 10391 }, { "epoch": 1.7825560004328536, "grad_norm": 1.46875, "learning_rate": 6.9845117155324034e-06, "loss": 0.5115, "step": 10392 }, { "epoch": 1.782729141867763, "grad_norm": 1.453125, "learning_rate": 6.982772808468788e-06, "loss": 0.5528, "step": 10393 }, { "epoch": 1.7829022833026729, "grad_norm": 1.4375, "learning_rate": 6.981034001771789e-06, "loss": 0.4006, "step": 10394 }, { "epoch": 1.7830754247375826, "grad_norm": 1.375, "learning_rate": 6.979295295499249e-06, "loss": 0.4189, "step": 10395 }, { "epoch": 1.7832485661724922, "grad_norm": 1.3203125, "learning_rate": 6.977556689709009e-06, "loss": 0.4434, "step": 10396 }, { "epoch": 1.7834217076074017, "grad_norm": 1.3984375, "learning_rate": 6.9758181844589005e-06, "loss": 0.4908, "step": 10397 }, { "epoch": 1.7835948490423115, "grad_norm": 1.3515625, "learning_rate": 6.974079779806751e-06, "loss": 0.4177, "step": 10398 }, { "epoch": 1.7837679904772212, "grad_norm": 1.3203125, "learning_rate": 6.972341475810391e-06, "loss": 0.4069, "step": 10399 }, { "epoch": 1.7839411319121308, "grad_norm": 1.375, "learning_rate": 6.970603272527646e-06, "loss": 0.489, "step": 10400 }, { "epoch": 1.7841142733470403, "grad_norm": 1.4609375, "learning_rate": 6.9688651700163325e-06, "loss": 0.4393, "step": 10401 }, { "epoch": 1.78428741478195, "grad_norm": 1.5234375, "learning_rate": 6.967127168334269e-06, "loss": 0.4866, "step": 10402 }, { "epoch": 1.7844605562168596, "grad_norm": 1.296875, "learning_rate": 6.965389267539271e-06, "loss": 0.4194, "step": 10403 }, { "epoch": 1.7846336976517692, "grad_norm": 1.40625, "learning_rate": 6.963651467689145e-06, "loss": 0.4863, "step": 10404 }, { "epoch": 1.784806839086679, "grad_norm": 1.3515625, "learning_rate": 6.961913768841704e-06, "loss": 0.4419, "step": 10405 }, { "epoch": 1.7849799805215887, "grad_norm": 1.3359375, "learning_rate": 6.960176171054741e-06, "loss": 0.4593, "step": 10406 }, { "epoch": 1.7851531219564982, "grad_norm": 1.484375, "learning_rate": 6.95843867438607e-06, "loss": 0.4689, "step": 10407 }, { "epoch": 1.7853262633914078, "grad_norm": 1.421875, "learning_rate": 6.9567012788934804e-06, "loss": 0.5229, "step": 10408 }, { "epoch": 1.7854994048263175, "grad_norm": 1.5703125, "learning_rate": 6.954963984634768e-06, "loss": 0.4765, "step": 10409 }, { "epoch": 1.7856725462612273, "grad_norm": 1.390625, "learning_rate": 6.953226791667722e-06, "loss": 0.4459, "step": 10410 }, { "epoch": 1.7858456876961368, "grad_norm": 1.34375, "learning_rate": 6.951489700050128e-06, "loss": 0.5715, "step": 10411 }, { "epoch": 1.7860188291310464, "grad_norm": 1.5859375, "learning_rate": 6.949752709839773e-06, "loss": 0.4806, "step": 10412 }, { "epoch": 1.7861919705659561, "grad_norm": 1.421875, "learning_rate": 6.948015821094434e-06, "loss": 0.4833, "step": 10413 }, { "epoch": 1.7863651120008657, "grad_norm": 1.3125, "learning_rate": 6.946279033871888e-06, "loss": 0.4833, "step": 10414 }, { "epoch": 1.7865382534357752, "grad_norm": 1.46875, "learning_rate": 6.944542348229913e-06, "loss": 0.4633, "step": 10415 }, { "epoch": 1.786711394870685, "grad_norm": 1.4296875, "learning_rate": 6.9428057642262704e-06, "loss": 0.478, "step": 10416 }, { "epoch": 1.7868845363055947, "grad_norm": 1.328125, "learning_rate": 6.941069281918731e-06, "loss": 0.4121, "step": 10417 }, { "epoch": 1.7870576777405043, "grad_norm": 1.40625, "learning_rate": 6.9393329013650615e-06, "loss": 0.4775, "step": 10418 }, { "epoch": 1.7872308191754138, "grad_norm": 1.4375, "learning_rate": 6.937596622623021e-06, "loss": 0.4981, "step": 10419 }, { "epoch": 1.7874039606103236, "grad_norm": 1.3828125, "learning_rate": 6.935860445750361e-06, "loss": 0.4805, "step": 10420 }, { "epoch": 1.7875771020452333, "grad_norm": 1.4140625, "learning_rate": 6.93412437080484e-06, "loss": 0.4592, "step": 10421 }, { "epoch": 1.7877502434801429, "grad_norm": 1.3828125, "learning_rate": 6.932388397844204e-06, "loss": 0.4588, "step": 10422 }, { "epoch": 1.7879233849150524, "grad_norm": 1.4375, "learning_rate": 6.9306525269262e-06, "loss": 0.5003, "step": 10423 }, { "epoch": 1.7880965263499622, "grad_norm": 1.3828125, "learning_rate": 6.928916758108575e-06, "loss": 0.506, "step": 10424 }, { "epoch": 1.7882696677848717, "grad_norm": 1.359375, "learning_rate": 6.927181091449061e-06, "loss": 0.4601, "step": 10425 }, { "epoch": 1.7884428092197813, "grad_norm": 1.375, "learning_rate": 6.9254455270053984e-06, "loss": 0.4454, "step": 10426 }, { "epoch": 1.788615950654691, "grad_norm": 1.4140625, "learning_rate": 6.923710064835324e-06, "loss": 0.4518, "step": 10427 }, { "epoch": 1.7887890920896008, "grad_norm": 1.4453125, "learning_rate": 6.921974704996557e-06, "loss": 0.5595, "step": 10428 }, { "epoch": 1.7889622335245103, "grad_norm": 1.2890625, "learning_rate": 6.920239447546832e-06, "loss": 0.4581, "step": 10429 }, { "epoch": 1.7891353749594199, "grad_norm": 1.359375, "learning_rate": 6.918504292543869e-06, "loss": 0.4148, "step": 10430 }, { "epoch": 1.7893085163943296, "grad_norm": 1.2890625, "learning_rate": 6.916769240045386e-06, "loss": 0.4072, "step": 10431 }, { "epoch": 1.7894816578292394, "grad_norm": 1.53125, "learning_rate": 6.915034290109099e-06, "loss": 0.4325, "step": 10432 }, { "epoch": 1.789654799264149, "grad_norm": 1.4609375, "learning_rate": 6.913299442792724e-06, "loss": 0.4295, "step": 10433 }, { "epoch": 1.7898279406990585, "grad_norm": 1.4140625, "learning_rate": 6.911564698153964e-06, "loss": 0.4533, "step": 10434 }, { "epoch": 1.7900010821339682, "grad_norm": 1.359375, "learning_rate": 6.909830056250527e-06, "loss": 0.4612, "step": 10435 }, { "epoch": 1.7901742235688778, "grad_norm": 1.3515625, "learning_rate": 6.908095517140117e-06, "loss": 0.437, "step": 10436 }, { "epoch": 1.7903473650037873, "grad_norm": 1.53125, "learning_rate": 6.906361080880428e-06, "loss": 0.4856, "step": 10437 }, { "epoch": 1.790520506438697, "grad_norm": 1.4609375, "learning_rate": 6.90462674752916e-06, "loss": 0.5095, "step": 10438 }, { "epoch": 1.7906936478736069, "grad_norm": 1.3125, "learning_rate": 6.9028925171439984e-06, "loss": 0.4232, "step": 10439 }, { "epoch": 1.7908667893085164, "grad_norm": 1.4609375, "learning_rate": 6.9011583897826405e-06, "loss": 0.4493, "step": 10440 }, { "epoch": 1.791039930743426, "grad_norm": 1.5, "learning_rate": 6.899424365502765e-06, "loss": 0.4451, "step": 10441 }, { "epoch": 1.7912130721783357, "grad_norm": 1.3671875, "learning_rate": 6.8976904443620554e-06, "loss": 0.4456, "step": 10442 }, { "epoch": 1.7913862136132455, "grad_norm": 1.4453125, "learning_rate": 6.89595662641819e-06, "loss": 0.451, "step": 10443 }, { "epoch": 1.791559355048155, "grad_norm": 1.5078125, "learning_rate": 6.89422291172884e-06, "loss": 0.5192, "step": 10444 }, { "epoch": 1.7917324964830645, "grad_norm": 1.3359375, "learning_rate": 6.892489300351683e-06, "loss": 0.4745, "step": 10445 }, { "epoch": 1.7919056379179743, "grad_norm": 1.40625, "learning_rate": 6.8907557923443815e-06, "loss": 0.4686, "step": 10446 }, { "epoch": 1.7920787793528838, "grad_norm": 1.3984375, "learning_rate": 6.8890223877646e-06, "loss": 0.497, "step": 10447 }, { "epoch": 1.7922519207877934, "grad_norm": 1.3203125, "learning_rate": 6.887289086670004e-06, "loss": 0.453, "step": 10448 }, { "epoch": 1.7924250622227031, "grad_norm": 1.4296875, "learning_rate": 6.885555889118245e-06, "loss": 0.5122, "step": 10449 }, { "epoch": 1.792598203657613, "grad_norm": 1.375, "learning_rate": 6.883822795166976e-06, "loss": 0.5202, "step": 10450 }, { "epoch": 1.7927713450925225, "grad_norm": 1.5078125, "learning_rate": 6.882089804873855e-06, "loss": 0.4631, "step": 10451 }, { "epoch": 1.792944486527432, "grad_norm": 1.40625, "learning_rate": 6.880356918296527e-06, "loss": 0.505, "step": 10452 }, { "epoch": 1.7931176279623418, "grad_norm": 1.3984375, "learning_rate": 6.878624135492633e-06, "loss": 0.4814, "step": 10453 }, { "epoch": 1.7932907693972515, "grad_norm": 1.390625, "learning_rate": 6.876891456519812e-06, "loss": 0.4576, "step": 10454 }, { "epoch": 1.793463910832161, "grad_norm": 1.3671875, "learning_rate": 6.875158881435706e-06, "loss": 0.4287, "step": 10455 }, { "epoch": 1.7936370522670706, "grad_norm": 1.375, "learning_rate": 6.873426410297943e-06, "loss": 0.4658, "step": 10456 }, { "epoch": 1.7938101937019804, "grad_norm": 1.359375, "learning_rate": 6.871694043164158e-06, "loss": 0.4855, "step": 10457 }, { "epoch": 1.79398333513689, "grad_norm": 1.4296875, "learning_rate": 6.869961780091972e-06, "loss": 0.4839, "step": 10458 }, { "epoch": 1.7941564765717994, "grad_norm": 1.5234375, "learning_rate": 6.868229621139009e-06, "loss": 0.4053, "step": 10459 }, { "epoch": 1.7943296180067092, "grad_norm": 1.34375, "learning_rate": 6.866497566362888e-06, "loss": 0.4004, "step": 10460 }, { "epoch": 1.794502759441619, "grad_norm": 1.453125, "learning_rate": 6.864765615821231e-06, "loss": 0.4257, "step": 10461 }, { "epoch": 1.7946759008765285, "grad_norm": 1.5, "learning_rate": 6.863033769571643e-06, "loss": 0.4728, "step": 10462 }, { "epoch": 1.794849042311438, "grad_norm": 1.375, "learning_rate": 6.861302027671738e-06, "loss": 0.4222, "step": 10463 }, { "epoch": 1.7950221837463478, "grad_norm": 1.375, "learning_rate": 6.859570390179121e-06, "loss": 0.4789, "step": 10464 }, { "epoch": 1.7951953251812576, "grad_norm": 1.53125, "learning_rate": 6.857838857151391e-06, "loss": 0.5219, "step": 10465 }, { "epoch": 1.7953684666161671, "grad_norm": 1.5234375, "learning_rate": 6.856107428646148e-06, "loss": 0.5055, "step": 10466 }, { "epoch": 1.7955416080510767, "grad_norm": 1.359375, "learning_rate": 6.85437610472099e-06, "loss": 0.4673, "step": 10467 }, { "epoch": 1.7957147494859864, "grad_norm": 1.4921875, "learning_rate": 6.852644885433505e-06, "loss": 0.4573, "step": 10468 }, { "epoch": 1.795887890920896, "grad_norm": 1.515625, "learning_rate": 6.850913770841283e-06, "loss": 0.4468, "step": 10469 }, { "epoch": 1.7960610323558055, "grad_norm": 1.4609375, "learning_rate": 6.849182761001907e-06, "loss": 0.4223, "step": 10470 }, { "epoch": 1.7962341737907153, "grad_norm": 1.40625, "learning_rate": 6.8474518559729576e-06, "loss": 0.4776, "step": 10471 }, { "epoch": 1.796407315225625, "grad_norm": 1.390625, "learning_rate": 6.845721055812016e-06, "loss": 0.4989, "step": 10472 }, { "epoch": 1.7965804566605346, "grad_norm": 1.390625, "learning_rate": 6.843990360576656e-06, "loss": 0.4026, "step": 10473 }, { "epoch": 1.796753598095444, "grad_norm": 1.4609375, "learning_rate": 6.842259770324447e-06, "loss": 0.4637, "step": 10474 }, { "epoch": 1.7969267395303539, "grad_norm": 1.3515625, "learning_rate": 6.840529285112954e-06, "loss": 0.4414, "step": 10475 }, { "epoch": 1.7970998809652636, "grad_norm": 1.3671875, "learning_rate": 6.838798904999746e-06, "loss": 0.4061, "step": 10476 }, { "epoch": 1.7972730224001732, "grad_norm": 1.4375, "learning_rate": 6.837068630042378e-06, "loss": 0.4434, "step": 10477 }, { "epoch": 1.7974461638350827, "grad_norm": 1.9453125, "learning_rate": 6.835338460298411e-06, "loss": 0.5386, "step": 10478 }, { "epoch": 1.7976193052699925, "grad_norm": 1.3828125, "learning_rate": 6.833608395825397e-06, "loss": 0.528, "step": 10479 }, { "epoch": 1.797792446704902, "grad_norm": 1.2734375, "learning_rate": 6.831878436680883e-06, "loss": 0.3855, "step": 10480 }, { "epoch": 1.7979655881398116, "grad_norm": 1.4453125, "learning_rate": 6.83014858292242e-06, "loss": 0.4719, "step": 10481 }, { "epoch": 1.7981387295747213, "grad_norm": 1.3359375, "learning_rate": 6.828418834607542e-06, "loss": 0.4382, "step": 10482 }, { "epoch": 1.798311871009631, "grad_norm": 1.4140625, "learning_rate": 6.826689191793799e-06, "loss": 0.4514, "step": 10483 }, { "epoch": 1.7984850124445406, "grad_norm": 1.3359375, "learning_rate": 6.824959654538722e-06, "loss": 0.4287, "step": 10484 }, { "epoch": 1.7986581538794502, "grad_norm": 1.3828125, "learning_rate": 6.823230222899846e-06, "loss": 0.5188, "step": 10485 }, { "epoch": 1.79883129531436, "grad_norm": 1.453125, "learning_rate": 6.821500896934693e-06, "loss": 0.4302, "step": 10486 }, { "epoch": 1.7990044367492697, "grad_norm": 1.2890625, "learning_rate": 6.819771676700794e-06, "loss": 0.4068, "step": 10487 }, { "epoch": 1.7991775781841792, "grad_norm": 1.453125, "learning_rate": 6.81804256225567e-06, "loss": 0.4868, "step": 10488 }, { "epoch": 1.7993507196190888, "grad_norm": 1.390625, "learning_rate": 6.816313553656837e-06, "loss": 0.4878, "step": 10489 }, { "epoch": 1.7995238610539985, "grad_norm": 1.390625, "learning_rate": 6.814584650961811e-06, "loss": 0.478, "step": 10490 }, { "epoch": 1.7996970024889083, "grad_norm": 1.4765625, "learning_rate": 6.812855854228104e-06, "loss": 0.6031, "step": 10491 }, { "epoch": 1.7998701439238176, "grad_norm": 1.3828125, "learning_rate": 6.81112716351322e-06, "loss": 0.4814, "step": 10492 }, { "epoch": 1.8000432853587274, "grad_norm": 1.34375, "learning_rate": 6.809398578874664e-06, "loss": 0.5014, "step": 10493 }, { "epoch": 1.8002164267936371, "grad_norm": 1.3359375, "learning_rate": 6.80767010036994e-06, "loss": 0.4213, "step": 10494 }, { "epoch": 1.8003895682285467, "grad_norm": 1.3828125, "learning_rate": 6.805941728056548e-06, "loss": 0.4575, "step": 10495 }, { "epoch": 1.8005627096634562, "grad_norm": 1.3515625, "learning_rate": 6.804213461991972e-06, "loss": 0.492, "step": 10496 }, { "epoch": 1.800735851098366, "grad_norm": 1.375, "learning_rate": 6.802485302233709e-06, "loss": 0.4549, "step": 10497 }, { "epoch": 1.8009089925332757, "grad_norm": 1.484375, "learning_rate": 6.80075724883924e-06, "loss": 0.5114, "step": 10498 }, { "epoch": 1.8010821339681853, "grad_norm": 1.515625, "learning_rate": 6.799029301866054e-06, "loss": 0.5354, "step": 10499 }, { "epoch": 1.8012552754030948, "grad_norm": 1.4453125, "learning_rate": 6.797301461371626e-06, "loss": 0.4221, "step": 10500 }, { "epoch": 1.8014284168380046, "grad_norm": 1.3515625, "learning_rate": 6.7955737274134324e-06, "loss": 0.4845, "step": 10501 }, { "epoch": 1.8016015582729143, "grad_norm": 1.4609375, "learning_rate": 6.793846100048947e-06, "loss": 0.4917, "step": 10502 }, { "epoch": 1.8017746997078237, "grad_norm": 1.4453125, "learning_rate": 6.792118579335637e-06, "loss": 0.4721, "step": 10503 }, { "epoch": 1.8019478411427334, "grad_norm": 1.46875, "learning_rate": 6.790391165330965e-06, "loss": 0.5043, "step": 10504 }, { "epoch": 1.8021209825776432, "grad_norm": 1.4140625, "learning_rate": 6.788663858092399e-06, "loss": 0.4864, "step": 10505 }, { "epoch": 1.8022941240125527, "grad_norm": 1.296875, "learning_rate": 6.786936657677392e-06, "loss": 0.4891, "step": 10506 }, { "epoch": 1.8024672654474623, "grad_norm": 1.4140625, "learning_rate": 6.785209564143402e-06, "loss": 0.425, "step": 10507 }, { "epoch": 1.802640406882372, "grad_norm": 1.4453125, "learning_rate": 6.783482577547877e-06, "loss": 0.5303, "step": 10508 }, { "epoch": 1.8028135483172818, "grad_norm": 1.359375, "learning_rate": 6.781755697948269e-06, "loss": 0.4248, "step": 10509 }, { "epoch": 1.8029866897521913, "grad_norm": 1.46875, "learning_rate": 6.7800289254020155e-06, "loss": 0.471, "step": 10510 }, { "epoch": 1.8031598311871009, "grad_norm": 1.3984375, "learning_rate": 6.778302259966558e-06, "loss": 0.5078, "step": 10511 }, { "epoch": 1.8033329726220106, "grad_norm": 1.4140625, "learning_rate": 6.7765757016993395e-06, "loss": 0.4495, "step": 10512 }, { "epoch": 1.8035061140569204, "grad_norm": 1.390625, "learning_rate": 6.774849250657784e-06, "loss": 0.4786, "step": 10513 }, { "epoch": 1.8036792554918297, "grad_norm": 1.2890625, "learning_rate": 6.773122906899326e-06, "loss": 0.431, "step": 10514 }, { "epoch": 1.8038523969267395, "grad_norm": 1.5078125, "learning_rate": 6.771396670481389e-06, "loss": 0.4838, "step": 10515 }, { "epoch": 1.8040255383616493, "grad_norm": 1.390625, "learning_rate": 6.769670541461404e-06, "loss": 0.4275, "step": 10516 }, { "epoch": 1.8041986797965588, "grad_norm": 1.3046875, "learning_rate": 6.767944519896779e-06, "loss": 0.4298, "step": 10517 }, { "epoch": 1.8043718212314683, "grad_norm": 1.3828125, "learning_rate": 6.766218605844934e-06, "loss": 0.4554, "step": 10518 }, { "epoch": 1.804544962666378, "grad_norm": 1.34375, "learning_rate": 6.764492799363282e-06, "loss": 0.4528, "step": 10519 }, { "epoch": 1.8047181041012879, "grad_norm": 1.4609375, "learning_rate": 6.762767100509226e-06, "loss": 0.4924, "step": 10520 }, { "epoch": 1.8048912455361974, "grad_norm": 1.484375, "learning_rate": 6.761041509340179e-06, "loss": 0.4809, "step": 10521 }, { "epoch": 1.805064386971107, "grad_norm": 1.3828125, "learning_rate": 6.759316025913534e-06, "loss": 0.4373, "step": 10522 }, { "epoch": 1.8052375284060167, "grad_norm": 1.3828125, "learning_rate": 6.75759065028669e-06, "loss": 0.4206, "step": 10523 }, { "epoch": 1.8054106698409265, "grad_norm": 1.3671875, "learning_rate": 6.755865382517046e-06, "loss": 0.4378, "step": 10524 }, { "epoch": 1.805583811275836, "grad_norm": 1.40625, "learning_rate": 6.754140222661982e-06, "loss": 0.4542, "step": 10525 }, { "epoch": 1.8057569527107455, "grad_norm": 1.34375, "learning_rate": 6.752415170778894e-06, "loss": 0.4356, "step": 10526 }, { "epoch": 1.8059300941456553, "grad_norm": 1.390625, "learning_rate": 6.750690226925163e-06, "loss": 0.4819, "step": 10527 }, { "epoch": 1.8061032355805648, "grad_norm": 1.34375, "learning_rate": 6.748965391158169e-06, "loss": 0.4779, "step": 10528 }, { "epoch": 1.8062763770154744, "grad_norm": 1.59375, "learning_rate": 6.7472406635352835e-06, "loss": 0.5521, "step": 10529 }, { "epoch": 1.8064495184503842, "grad_norm": 1.4375, "learning_rate": 6.745516044113882e-06, "loss": 0.4923, "step": 10530 }, { "epoch": 1.806622659885294, "grad_norm": 1.3984375, "learning_rate": 6.743791532951335e-06, "loss": 0.4861, "step": 10531 }, { "epoch": 1.8067958013202035, "grad_norm": 1.3359375, "learning_rate": 6.742067130105003e-06, "loss": 0.4521, "step": 10532 }, { "epoch": 1.806968942755113, "grad_norm": 1.453125, "learning_rate": 6.740342835632251e-06, "loss": 0.4847, "step": 10533 }, { "epoch": 1.8071420841900228, "grad_norm": 1.484375, "learning_rate": 6.738618649590434e-06, "loss": 0.4822, "step": 10534 }, { "epoch": 1.8073152256249325, "grad_norm": 1.3671875, "learning_rate": 6.736894572036907e-06, "loss": 0.4204, "step": 10535 }, { "epoch": 1.807488367059842, "grad_norm": 1.5546875, "learning_rate": 6.73517060302902e-06, "loss": 0.5008, "step": 10536 }, { "epoch": 1.8076615084947516, "grad_norm": 1.4140625, "learning_rate": 6.733446742624124e-06, "loss": 0.4825, "step": 10537 }, { "epoch": 1.8078346499296614, "grad_norm": 1.5078125, "learning_rate": 6.731722990879559e-06, "loss": 0.4434, "step": 10538 }, { "epoch": 1.808007791364571, "grad_norm": 2.25, "learning_rate": 6.729999347852665e-06, "loss": 0.4756, "step": 10539 }, { "epoch": 1.8081809327994804, "grad_norm": 1.3984375, "learning_rate": 6.72827581360078e-06, "loss": 0.5008, "step": 10540 }, { "epoch": 1.8083540742343902, "grad_norm": 1.4921875, "learning_rate": 6.7265523881812335e-06, "loss": 0.4944, "step": 10541 }, { "epoch": 1.8085272156693, "grad_norm": 1.453125, "learning_rate": 6.724829071651356e-06, "loss": 0.4436, "step": 10542 }, { "epoch": 1.8087003571042095, "grad_norm": 1.5625, "learning_rate": 6.723105864068475e-06, "loss": 0.5111, "step": 10543 }, { "epoch": 1.808873498539119, "grad_norm": 1.4453125, "learning_rate": 6.721382765489909e-06, "loss": 0.4734, "step": 10544 }, { "epoch": 1.8090466399740288, "grad_norm": 1.5, "learning_rate": 6.719659775972978e-06, "loss": 0.4877, "step": 10545 }, { "epoch": 1.8092197814089386, "grad_norm": 1.4375, "learning_rate": 6.717936895574995e-06, "loss": 0.4805, "step": 10546 }, { "epoch": 1.8093929228438481, "grad_norm": 1.4609375, "learning_rate": 6.716214124353266e-06, "loss": 0.4893, "step": 10547 }, { "epoch": 1.8095660642787577, "grad_norm": 1.4765625, "learning_rate": 6.714491462365108e-06, "loss": 0.4629, "step": 10548 }, { "epoch": 1.8097392057136674, "grad_norm": 1.3359375, "learning_rate": 6.712768909667822e-06, "loss": 0.4479, "step": 10549 }, { "epoch": 1.809912347148577, "grad_norm": 1.3984375, "learning_rate": 6.7110464663187025e-06, "loss": 0.435, "step": 10550 }, { "epoch": 1.8100854885834865, "grad_norm": 1.4921875, "learning_rate": 6.70932413237505e-06, "loss": 0.5505, "step": 10551 }, { "epoch": 1.8102586300183963, "grad_norm": 1.4296875, "learning_rate": 6.707601907894159e-06, "loss": 0.4781, "step": 10552 }, { "epoch": 1.810431771453306, "grad_norm": 1.46875, "learning_rate": 6.705879792933313e-06, "loss": 0.5169, "step": 10553 }, { "epoch": 1.8106049128882156, "grad_norm": 1.3671875, "learning_rate": 6.7041577875498e-06, "loss": 0.4708, "step": 10554 }, { "epoch": 1.810778054323125, "grad_norm": 1.4453125, "learning_rate": 6.702435891800905e-06, "loss": 0.4642, "step": 10555 }, { "epoch": 1.8109511957580349, "grad_norm": 1.5, "learning_rate": 6.700714105743898e-06, "loss": 0.4402, "step": 10556 }, { "epoch": 1.8111243371929446, "grad_norm": 1.4921875, "learning_rate": 6.698992429436062e-06, "loss": 0.4983, "step": 10557 }, { "epoch": 1.8112974786278542, "grad_norm": 1.34375, "learning_rate": 6.6972708629346574e-06, "loss": 0.4409, "step": 10558 }, { "epoch": 1.8114706200627637, "grad_norm": 1.4765625, "learning_rate": 6.695549406296964e-06, "loss": 0.4549, "step": 10559 }, { "epoch": 1.8116437614976735, "grad_norm": 1.375, "learning_rate": 6.6938280595802375e-06, "loss": 0.4306, "step": 10560 }, { "epoch": 1.811816902932583, "grad_norm": 1.4453125, "learning_rate": 6.692106822841741e-06, "loss": 0.4845, "step": 10561 }, { "epoch": 1.8119900443674926, "grad_norm": 1.3515625, "learning_rate": 6.6903856961387255e-06, "loss": 0.4569, "step": 10562 }, { "epoch": 1.8121631858024023, "grad_norm": 1.328125, "learning_rate": 6.688664679528449e-06, "loss": 0.4618, "step": 10563 }, { "epoch": 1.812336327237312, "grad_norm": 1.3046875, "learning_rate": 6.686943773068159e-06, "loss": 0.4509, "step": 10564 }, { "epoch": 1.8125094686722216, "grad_norm": 1.484375, "learning_rate": 6.6852229768150976e-06, "loss": 0.477, "step": 10565 }, { "epoch": 1.8126826101071312, "grad_norm": 1.453125, "learning_rate": 6.683502290826508e-06, "loss": 0.4856, "step": 10566 }, { "epoch": 1.812855751542041, "grad_norm": 1.4296875, "learning_rate": 6.681781715159631e-06, "loss": 0.5537, "step": 10567 }, { "epoch": 1.8130288929769507, "grad_norm": 1.28125, "learning_rate": 6.680061249871695e-06, "loss": 0.3862, "step": 10568 }, { "epoch": 1.8132020344118602, "grad_norm": 1.453125, "learning_rate": 6.678340895019932e-06, "loss": 0.446, "step": 10569 }, { "epoch": 1.8133751758467698, "grad_norm": 1.3046875, "learning_rate": 6.676620650661574e-06, "loss": 0.5039, "step": 10570 }, { "epoch": 1.8135483172816795, "grad_norm": 1.328125, "learning_rate": 6.674900516853841e-06, "loss": 0.4264, "step": 10571 }, { "epoch": 1.813721458716589, "grad_norm": 1.4765625, "learning_rate": 6.67318049365395e-06, "loss": 0.5165, "step": 10572 }, { "epoch": 1.8138946001514986, "grad_norm": 1.4609375, "learning_rate": 6.67146058111912e-06, "loss": 0.4984, "step": 10573 }, { "epoch": 1.8140677415864084, "grad_norm": 1.4453125, "learning_rate": 6.66974077930656e-06, "loss": 0.4245, "step": 10574 }, { "epoch": 1.8142408830213181, "grad_norm": 1.359375, "learning_rate": 6.6680210882734805e-06, "loss": 0.5175, "step": 10575 }, { "epoch": 1.8144140244562277, "grad_norm": 1.4296875, "learning_rate": 6.6663015080770885e-06, "loss": 0.4942, "step": 10576 }, { "epoch": 1.8145871658911372, "grad_norm": 1.40625, "learning_rate": 6.6645820387745786e-06, "loss": 0.4558, "step": 10577 }, { "epoch": 1.814760307326047, "grad_norm": 1.3359375, "learning_rate": 6.662862680423153e-06, "loss": 0.3973, "step": 10578 }, { "epoch": 1.8149334487609567, "grad_norm": 1.453125, "learning_rate": 6.661143433080001e-06, "loss": 0.4978, "step": 10579 }, { "epoch": 1.8151065901958663, "grad_norm": 1.34375, "learning_rate": 6.659424296802322e-06, "loss": 0.4554, "step": 10580 }, { "epoch": 1.8152797316307758, "grad_norm": 1.3828125, "learning_rate": 6.6577052716472915e-06, "loss": 0.4177, "step": 10581 }, { "epoch": 1.8154528730656856, "grad_norm": 1.46875, "learning_rate": 6.655986357672098e-06, "loss": 0.4465, "step": 10582 }, { "epoch": 1.8156260145005951, "grad_norm": 1.5390625, "learning_rate": 6.654267554933919e-06, "loss": 0.5186, "step": 10583 }, { "epoch": 1.8157991559355047, "grad_norm": 1.359375, "learning_rate": 6.652548863489928e-06, "loss": 0.4485, "step": 10584 }, { "epoch": 1.8159722973704144, "grad_norm": 1.46875, "learning_rate": 6.6508302833973006e-06, "loss": 0.4796, "step": 10585 }, { "epoch": 1.8161454388053242, "grad_norm": 1.40625, "learning_rate": 6.6491118147132e-06, "loss": 0.468, "step": 10586 }, { "epoch": 1.8163185802402337, "grad_norm": 1.375, "learning_rate": 6.6473934574947904e-06, "loss": 0.4562, "step": 10587 }, { "epoch": 1.8164917216751433, "grad_norm": 1.390625, "learning_rate": 6.645675211799239e-06, "loss": 0.4941, "step": 10588 }, { "epoch": 1.816664863110053, "grad_norm": 1.546875, "learning_rate": 6.643957077683692e-06, "loss": 0.4673, "step": 10589 }, { "epoch": 1.8168380045449628, "grad_norm": 1.421875, "learning_rate": 6.642239055205308e-06, "loss": 0.4965, "step": 10590 }, { "epoch": 1.8170111459798723, "grad_norm": 1.34375, "learning_rate": 6.640521144421237e-06, "loss": 0.4935, "step": 10591 }, { "epoch": 1.8171842874147819, "grad_norm": 1.328125, "learning_rate": 6.638803345388625e-06, "loss": 0.3904, "step": 10592 }, { "epoch": 1.8173574288496916, "grad_norm": 1.390625, "learning_rate": 6.63708565816461e-06, "loss": 0.4588, "step": 10593 }, { "epoch": 1.8175305702846012, "grad_norm": 1.4140625, "learning_rate": 6.6353680828063306e-06, "loss": 0.5262, "step": 10594 }, { "epoch": 1.8177037117195107, "grad_norm": 1.3671875, "learning_rate": 6.633650619370926e-06, "loss": 0.4292, "step": 10595 }, { "epoch": 1.8178768531544205, "grad_norm": 1.3515625, "learning_rate": 6.631933267915522e-06, "loss": 0.4862, "step": 10596 }, { "epoch": 1.8180499945893303, "grad_norm": 1.296875, "learning_rate": 6.630216028497248e-06, "loss": 0.404, "step": 10597 }, { "epoch": 1.8182231360242398, "grad_norm": 1.359375, "learning_rate": 6.628498901173224e-06, "loss": 0.4663, "step": 10598 }, { "epoch": 1.8183962774591493, "grad_norm": 1.4296875, "learning_rate": 6.626781886000573e-06, "loss": 0.4888, "step": 10599 }, { "epoch": 1.818569418894059, "grad_norm": 1.328125, "learning_rate": 6.625064983036409e-06, "loss": 0.4236, "step": 10600 }, { "epoch": 1.8187425603289689, "grad_norm": 1.46875, "learning_rate": 6.623348192337841e-06, "loss": 0.545, "step": 10601 }, { "epoch": 1.8189157017638784, "grad_norm": 1.4453125, "learning_rate": 6.621631513961981e-06, "loss": 0.4682, "step": 10602 }, { "epoch": 1.819088843198788, "grad_norm": 1.578125, "learning_rate": 6.619914947965934e-06, "loss": 0.5927, "step": 10603 }, { "epoch": 1.8192619846336977, "grad_norm": 1.4453125, "learning_rate": 6.618198494406802e-06, "loss": 0.4947, "step": 10604 }, { "epoch": 1.8194351260686072, "grad_norm": 1.3515625, "learning_rate": 6.616482153341677e-06, "loss": 0.428, "step": 10605 }, { "epoch": 1.8196082675035168, "grad_norm": 1.4140625, "learning_rate": 6.614765924827653e-06, "loss": 0.4398, "step": 10606 }, { "epoch": 1.8197814089384265, "grad_norm": 1.5234375, "learning_rate": 6.613049808921825e-06, "loss": 0.476, "step": 10607 }, { "epoch": 1.8199545503733363, "grad_norm": 1.515625, "learning_rate": 6.611333805681274e-06, "loss": 0.5322, "step": 10608 }, { "epoch": 1.8201276918082459, "grad_norm": 1.453125, "learning_rate": 6.6096179151630836e-06, "loss": 0.6498, "step": 10609 }, { "epoch": 1.8203008332431554, "grad_norm": 1.4765625, "learning_rate": 6.60790213742433e-06, "loss": 0.4032, "step": 10610 }, { "epoch": 1.8204739746780652, "grad_norm": 1.390625, "learning_rate": 6.6061864725220895e-06, "loss": 0.4893, "step": 10611 }, { "epoch": 1.820647116112975, "grad_norm": 1.5625, "learning_rate": 6.60447092051343e-06, "loss": 0.4472, "step": 10612 }, { "epoch": 1.8208202575478845, "grad_norm": 1.421875, "learning_rate": 6.602755481455426e-06, "loss": 0.4818, "step": 10613 }, { "epoch": 1.820993398982794, "grad_norm": 1.3515625, "learning_rate": 6.601040155405134e-06, "loss": 0.4382, "step": 10614 }, { "epoch": 1.8211665404177038, "grad_norm": 1.4921875, "learning_rate": 6.599324942419615e-06, "loss": 0.4291, "step": 10615 }, { "epoch": 1.8213396818526133, "grad_norm": 1.4296875, "learning_rate": 6.597609842555927e-06, "loss": 0.467, "step": 10616 }, { "epoch": 1.8215128232875228, "grad_norm": 1.3984375, "learning_rate": 6.595894855871119e-06, "loss": 0.4959, "step": 10617 }, { "epoch": 1.8216859647224326, "grad_norm": 1.3515625, "learning_rate": 6.59417998242224e-06, "loss": 0.4763, "step": 10618 }, { "epoch": 1.8218591061573424, "grad_norm": 1.34375, "learning_rate": 6.592465222266335e-06, "loss": 0.4969, "step": 10619 }, { "epoch": 1.822032247592252, "grad_norm": 1.3828125, "learning_rate": 6.590750575460445e-06, "loss": 0.444, "step": 10620 }, { "epoch": 1.8222053890271614, "grad_norm": 1.4453125, "learning_rate": 6.589036042061607e-06, "loss": 0.4932, "step": 10621 }, { "epoch": 1.8223785304620712, "grad_norm": 1.375, "learning_rate": 6.587321622126852e-06, "loss": 0.4334, "step": 10622 }, { "epoch": 1.822551671896981, "grad_norm": 1.3671875, "learning_rate": 6.58560731571321e-06, "loss": 0.4557, "step": 10623 }, { "epoch": 1.8227248133318905, "grad_norm": 1.34375, "learning_rate": 6.583893122877707e-06, "loss": 0.4489, "step": 10624 }, { "epoch": 1.8228979547668, "grad_norm": 1.40625, "learning_rate": 6.58217904367737e-06, "loss": 0.5083, "step": 10625 }, { "epoch": 1.8230710962017098, "grad_norm": 1.375, "learning_rate": 6.580465078169209e-06, "loss": 0.4909, "step": 10626 }, { "epoch": 1.8232442376366194, "grad_norm": 1.3125, "learning_rate": 6.57875122641024e-06, "loss": 0.4503, "step": 10627 }, { "epoch": 1.823417379071529, "grad_norm": 1.4296875, "learning_rate": 6.57703748845748e-06, "loss": 0.4786, "step": 10628 }, { "epoch": 1.8235905205064387, "grad_norm": 1.4140625, "learning_rate": 6.575323864367927e-06, "loss": 0.4185, "step": 10629 }, { "epoch": 1.8237636619413484, "grad_norm": 1.2890625, "learning_rate": 6.573610354198587e-06, "loss": 0.3979, "step": 10630 }, { "epoch": 1.823936803376258, "grad_norm": 1.359375, "learning_rate": 6.571896958006463e-06, "loss": 0.4418, "step": 10631 }, { "epoch": 1.8241099448111675, "grad_norm": 1.53125, "learning_rate": 6.570183675848545e-06, "loss": 0.4053, "step": 10632 }, { "epoch": 1.8242830862460773, "grad_norm": 1.4765625, "learning_rate": 6.568470507781828e-06, "loss": 0.4598, "step": 10633 }, { "epoch": 1.824456227680987, "grad_norm": 1.59375, "learning_rate": 6.566757453863293e-06, "loss": 0.5501, "step": 10634 }, { "epoch": 1.8246293691158966, "grad_norm": 1.4375, "learning_rate": 6.565044514149934e-06, "loss": 0.4951, "step": 10635 }, { "epoch": 1.8248025105508061, "grad_norm": 1.484375, "learning_rate": 6.563331688698724e-06, "loss": 0.4527, "step": 10636 }, { "epoch": 1.8249756519857159, "grad_norm": 1.359375, "learning_rate": 6.5616189775666436e-06, "loss": 0.4898, "step": 10637 }, { "epoch": 1.8251487934206256, "grad_norm": 1.546875, "learning_rate": 6.5599063808106625e-06, "loss": 0.4504, "step": 10638 }, { "epoch": 1.825321934855535, "grad_norm": 1.3828125, "learning_rate": 6.558193898487749e-06, "loss": 0.4445, "step": 10639 }, { "epoch": 1.8254950762904447, "grad_norm": 1.3671875, "learning_rate": 6.556481530654872e-06, "loss": 0.455, "step": 10640 }, { "epoch": 1.8256682177253545, "grad_norm": 1.5625, "learning_rate": 6.554769277368988e-06, "loss": 0.504, "step": 10641 }, { "epoch": 1.825841359160264, "grad_norm": 1.4375, "learning_rate": 6.553057138687056e-06, "loss": 0.5121, "step": 10642 }, { "epoch": 1.8260145005951736, "grad_norm": 1.390625, "learning_rate": 6.55134511466603e-06, "loss": 0.4496, "step": 10643 }, { "epoch": 1.8261876420300833, "grad_norm": 1.4296875, "learning_rate": 6.549633205362856e-06, "loss": 0.4442, "step": 10644 }, { "epoch": 1.826360783464993, "grad_norm": 1.3515625, "learning_rate": 6.547921410834486e-06, "loss": 0.4302, "step": 10645 }, { "epoch": 1.8265339248999026, "grad_norm": 1.453125, "learning_rate": 6.546209731137858e-06, "loss": 0.4705, "step": 10646 }, { "epoch": 1.8267070663348122, "grad_norm": 1.4765625, "learning_rate": 6.5444981663299135e-06, "loss": 0.4531, "step": 10647 }, { "epoch": 1.826880207769722, "grad_norm": 1.6640625, "learning_rate": 6.542786716467584e-06, "loss": 0.4693, "step": 10648 }, { "epoch": 1.8270533492046317, "grad_norm": 1.4375, "learning_rate": 6.541075381607801e-06, "loss": 0.4434, "step": 10649 }, { "epoch": 1.827226490639541, "grad_norm": 1.375, "learning_rate": 6.539364161807489e-06, "loss": 0.4905, "step": 10650 }, { "epoch": 1.8273996320744508, "grad_norm": 1.296875, "learning_rate": 6.537653057123574e-06, "loss": 0.4114, "step": 10651 }, { "epoch": 1.8275727735093605, "grad_norm": 1.4609375, "learning_rate": 6.535942067612975e-06, "loss": 0.4256, "step": 10652 }, { "epoch": 1.82774591494427, "grad_norm": 1.421875, "learning_rate": 6.534231193332605e-06, "loss": 0.4317, "step": 10653 }, { "epoch": 1.8279190563791796, "grad_norm": 1.3828125, "learning_rate": 6.5325204343393755e-06, "loss": 0.4568, "step": 10654 }, { "epoch": 1.8280921978140894, "grad_norm": 1.3125, "learning_rate": 6.530809790690195e-06, "loss": 0.4474, "step": 10655 }, { "epoch": 1.8282653392489991, "grad_norm": 1.4296875, "learning_rate": 6.52909926244197e-06, "loss": 0.4669, "step": 10656 }, { "epoch": 1.8284384806839087, "grad_norm": 1.5234375, "learning_rate": 6.527388849651597e-06, "loss": 0.424, "step": 10657 }, { "epoch": 1.8286116221188182, "grad_norm": 1.453125, "learning_rate": 6.525678552375971e-06, "loss": 0.474, "step": 10658 }, { "epoch": 1.828784763553728, "grad_norm": 1.515625, "learning_rate": 6.523968370671989e-06, "loss": 0.4437, "step": 10659 }, { "epoch": 1.8289579049886378, "grad_norm": 1.40625, "learning_rate": 6.522258304596536e-06, "loss": 0.5333, "step": 10660 }, { "epoch": 1.829131046423547, "grad_norm": 1.3828125, "learning_rate": 6.520548354206498e-06, "loss": 0.4135, "step": 10661 }, { "epoch": 1.8293041878584568, "grad_norm": 1.5234375, "learning_rate": 6.518838519558752e-06, "loss": 0.4622, "step": 10662 }, { "epoch": 1.8294773292933666, "grad_norm": 1.375, "learning_rate": 6.517128800710177e-06, "loss": 0.4913, "step": 10663 }, { "epoch": 1.8296504707282761, "grad_norm": 1.40625, "learning_rate": 6.515419197717651e-06, "loss": 0.4068, "step": 10664 }, { "epoch": 1.8298236121631857, "grad_norm": 1.3828125, "learning_rate": 6.513709710638034e-06, "loss": 0.4523, "step": 10665 }, { "epoch": 1.8299967535980954, "grad_norm": 1.375, "learning_rate": 6.512000339528196e-06, "loss": 0.4296, "step": 10666 }, { "epoch": 1.8301698950330052, "grad_norm": 1.3125, "learning_rate": 6.510291084444999e-06, "loss": 0.4199, "step": 10667 }, { "epoch": 1.8303430364679147, "grad_norm": 1.3984375, "learning_rate": 6.508581945445303e-06, "loss": 0.4295, "step": 10668 }, { "epoch": 1.8305161779028243, "grad_norm": 1.46875, "learning_rate": 6.506872922585956e-06, "loss": 0.4428, "step": 10669 }, { "epoch": 1.830689319337734, "grad_norm": 1.4296875, "learning_rate": 6.50516401592381e-06, "loss": 0.3981, "step": 10670 }, { "epoch": 1.8308624607726438, "grad_norm": 1.4609375, "learning_rate": 6.503455225515714e-06, "loss": 0.4909, "step": 10671 }, { "epoch": 1.8310356022075533, "grad_norm": 1.375, "learning_rate": 6.501746551418505e-06, "loss": 0.4483, "step": 10672 }, { "epoch": 1.8312087436424629, "grad_norm": 1.3828125, "learning_rate": 6.500037993689025e-06, "loss": 0.4455, "step": 10673 }, { "epoch": 1.8313818850773727, "grad_norm": 1.453125, "learning_rate": 6.498329552384107e-06, "loss": 0.4459, "step": 10674 }, { "epoch": 1.8315550265122822, "grad_norm": 1.390625, "learning_rate": 6.4966212275605814e-06, "loss": 0.4377, "step": 10675 }, { "epoch": 1.8317281679471917, "grad_norm": 1.5234375, "learning_rate": 6.494913019275275e-06, "loss": 0.5166, "step": 10676 }, { "epoch": 1.8319013093821015, "grad_norm": 1.3515625, "learning_rate": 6.493204927585006e-06, "loss": 0.4301, "step": 10677 }, { "epoch": 1.8320744508170113, "grad_norm": 1.390625, "learning_rate": 6.4914969525466035e-06, "loss": 0.4702, "step": 10678 }, { "epoch": 1.8322475922519208, "grad_norm": 1.4140625, "learning_rate": 6.489789094216874e-06, "loss": 0.4601, "step": 10679 }, { "epoch": 1.8324207336868303, "grad_norm": 1.3984375, "learning_rate": 6.488081352652633e-06, "loss": 0.4403, "step": 10680 }, { "epoch": 1.83259387512174, "grad_norm": 1.3671875, "learning_rate": 6.486373727910686e-06, "loss": 0.482, "step": 10681 }, { "epoch": 1.8327670165566499, "grad_norm": 1.46875, "learning_rate": 6.484666220047835e-06, "loss": 0.5307, "step": 10682 }, { "epoch": 1.8329401579915594, "grad_norm": 1.3671875, "learning_rate": 6.482958829120883e-06, "loss": 0.4189, "step": 10683 }, { "epoch": 1.833113299426469, "grad_norm": 1.421875, "learning_rate": 6.481251555186621e-06, "loss": 0.4863, "step": 10684 }, { "epoch": 1.8332864408613787, "grad_norm": 1.3046875, "learning_rate": 6.479544398301844e-06, "loss": 0.4597, "step": 10685 }, { "epoch": 1.8334595822962882, "grad_norm": 1.484375, "learning_rate": 6.477837358523338e-06, "loss": 0.5461, "step": 10686 }, { "epoch": 1.8336327237311978, "grad_norm": 1.3125, "learning_rate": 6.476130435907888e-06, "loss": 0.4269, "step": 10687 }, { "epoch": 1.8338058651661076, "grad_norm": 1.4453125, "learning_rate": 6.47442363051227e-06, "loss": 0.4661, "step": 10688 }, { "epoch": 1.8339790066010173, "grad_norm": 1.390625, "learning_rate": 6.472716942393269e-06, "loss": 0.4427, "step": 10689 }, { "epoch": 1.8341521480359269, "grad_norm": 1.3515625, "learning_rate": 6.471010371607649e-06, "loss": 0.4648, "step": 10690 }, { "epoch": 1.8343252894708364, "grad_norm": 1.390625, "learning_rate": 6.469303918212181e-06, "loss": 0.4334, "step": 10691 }, { "epoch": 1.8344984309057462, "grad_norm": 1.375, "learning_rate": 6.467597582263632e-06, "loss": 0.4718, "step": 10692 }, { "epoch": 1.834671572340656, "grad_norm": 1.328125, "learning_rate": 6.465891363818757e-06, "loss": 0.4993, "step": 10693 }, { "epoch": 1.8348447137755655, "grad_norm": 1.4375, "learning_rate": 6.464185262934315e-06, "loss": 0.468, "step": 10694 }, { "epoch": 1.835017855210475, "grad_norm": 1.59375, "learning_rate": 6.4624792796670624e-06, "loss": 0.5102, "step": 10695 }, { "epoch": 1.8351909966453848, "grad_norm": 1.4375, "learning_rate": 6.460773414073742e-06, "loss": 0.4825, "step": 10696 }, { "epoch": 1.8353641380802943, "grad_norm": 1.328125, "learning_rate": 6.4590676662111026e-06, "loss": 0.4231, "step": 10697 }, { "epoch": 1.8355372795152038, "grad_norm": 1.359375, "learning_rate": 6.457362036135878e-06, "loss": 0.4487, "step": 10698 }, { "epoch": 1.8357104209501136, "grad_norm": 1.5, "learning_rate": 6.4556565239048165e-06, "loss": 0.4787, "step": 10699 }, { "epoch": 1.8358835623850234, "grad_norm": 1.5, "learning_rate": 6.453951129574644e-06, "loss": 0.4652, "step": 10700 }, { "epoch": 1.836056703819933, "grad_norm": 1.5, "learning_rate": 6.452245853202093e-06, "loss": 0.6055, "step": 10701 }, { "epoch": 1.8362298452548425, "grad_norm": 1.3671875, "learning_rate": 6.450540694843883e-06, "loss": 0.4674, "step": 10702 }, { "epoch": 1.8364029866897522, "grad_norm": 1.3359375, "learning_rate": 6.448835654556741e-06, "loss": 0.4225, "step": 10703 }, { "epoch": 1.836576128124662, "grad_norm": 1.3984375, "learning_rate": 6.447130732397384e-06, "loss": 0.4485, "step": 10704 }, { "epoch": 1.8367492695595715, "grad_norm": 1.390625, "learning_rate": 6.445425928422521e-06, "loss": 0.4992, "step": 10705 }, { "epoch": 1.836922410994481, "grad_norm": 1.453125, "learning_rate": 6.443721242688865e-06, "loss": 0.5254, "step": 10706 }, { "epoch": 1.8370955524293908, "grad_norm": 1.375, "learning_rate": 6.442016675253122e-06, "loss": 0.4767, "step": 10707 }, { "epoch": 1.8372686938643004, "grad_norm": 1.4375, "learning_rate": 6.440312226171992e-06, "loss": 0.5057, "step": 10708 }, { "epoch": 1.83744183529921, "grad_norm": 1.421875, "learning_rate": 6.43860789550217e-06, "loss": 0.4674, "step": 10709 }, { "epoch": 1.8376149767341197, "grad_norm": 1.4765625, "learning_rate": 6.436903683300355e-06, "loss": 0.4208, "step": 10710 }, { "epoch": 1.8377881181690294, "grad_norm": 1.34375, "learning_rate": 6.4351995896232374e-06, "loss": 0.4462, "step": 10711 }, { "epoch": 1.837961259603939, "grad_norm": 1.5, "learning_rate": 6.433495614527497e-06, "loss": 0.4828, "step": 10712 }, { "epoch": 1.8381344010388485, "grad_norm": 1.5234375, "learning_rate": 6.431791758069823e-06, "loss": 0.5111, "step": 10713 }, { "epoch": 1.8383075424737583, "grad_norm": 1.484375, "learning_rate": 6.430088020306886e-06, "loss": 0.4994, "step": 10714 }, { "epoch": 1.838480683908668, "grad_norm": 1.3671875, "learning_rate": 6.4283844012953636e-06, "loss": 0.4476, "step": 10715 }, { "epoch": 1.8386538253435776, "grad_norm": 1.3984375, "learning_rate": 6.426680901091928e-06, "loss": 0.4787, "step": 10716 }, { "epoch": 1.8388269667784871, "grad_norm": 1.4609375, "learning_rate": 6.424977519753241e-06, "loss": 0.4947, "step": 10717 }, { "epoch": 1.8390001082133969, "grad_norm": 1.421875, "learning_rate": 6.423274257335966e-06, "loss": 0.4371, "step": 10718 }, { "epoch": 1.8391732496483064, "grad_norm": 1.2890625, "learning_rate": 6.421571113896765e-06, "loss": 0.4285, "step": 10719 }, { "epoch": 1.839346391083216, "grad_norm": 1.3984375, "learning_rate": 6.419868089492285e-06, "loss": 0.4832, "step": 10720 }, { "epoch": 1.8395195325181257, "grad_norm": 1.484375, "learning_rate": 6.418165184179183e-06, "loss": 0.533, "step": 10721 }, { "epoch": 1.8396926739530355, "grad_norm": 1.6171875, "learning_rate": 6.416462398014103e-06, "loss": 0.4455, "step": 10722 }, { "epoch": 1.839865815387945, "grad_norm": 1.34375, "learning_rate": 6.414759731053688e-06, "loss": 0.4373, "step": 10723 }, { "epoch": 1.8400389568228546, "grad_norm": 1.4140625, "learning_rate": 6.413057183354574e-06, "loss": 0.4538, "step": 10724 }, { "epoch": 1.8402120982577643, "grad_norm": 1.4921875, "learning_rate": 6.4113547549734e-06, "loss": 0.513, "step": 10725 }, { "epoch": 1.840385239692674, "grad_norm": 1.3203125, "learning_rate": 6.409652445966791e-06, "loss": 0.4594, "step": 10726 }, { "epoch": 1.8405583811275836, "grad_norm": 1.4140625, "learning_rate": 6.407950256391375e-06, "loss": 0.4487, "step": 10727 }, { "epoch": 1.8407315225624932, "grad_norm": 1.484375, "learning_rate": 6.40624818630378e-06, "loss": 0.4195, "step": 10728 }, { "epoch": 1.840904663997403, "grad_norm": 1.453125, "learning_rate": 6.404546235760616e-06, "loss": 0.4717, "step": 10729 }, { "epoch": 1.8410778054323125, "grad_norm": 1.6484375, "learning_rate": 6.402844404818502e-06, "loss": 0.4585, "step": 10730 }, { "epoch": 1.841250946867222, "grad_norm": 1.4140625, "learning_rate": 6.4011426935340485e-06, "loss": 0.4579, "step": 10731 }, { "epoch": 1.8414240883021318, "grad_norm": 1.4375, "learning_rate": 6.399441101963863e-06, "loss": 0.4374, "step": 10732 }, { "epoch": 1.8415972297370415, "grad_norm": 1.421875, "learning_rate": 6.397739630164547e-06, "loss": 0.5252, "step": 10733 }, { "epoch": 1.841770371171951, "grad_norm": 1.5, "learning_rate": 6.396038278192698e-06, "loss": 0.4296, "step": 10734 }, { "epoch": 1.8419435126068606, "grad_norm": 1.4609375, "learning_rate": 6.394337046104915e-06, "loss": 0.4648, "step": 10735 }, { "epoch": 1.8421166540417704, "grad_norm": 1.453125, "learning_rate": 6.392635933957783e-06, "loss": 0.4658, "step": 10736 }, { "epoch": 1.8422897954766801, "grad_norm": 1.375, "learning_rate": 6.390934941807893e-06, "loss": 0.4229, "step": 10737 }, { "epoch": 1.8424629369115897, "grad_norm": 1.4921875, "learning_rate": 6.389234069711822e-06, "loss": 0.4559, "step": 10738 }, { "epoch": 1.8426360783464992, "grad_norm": 1.515625, "learning_rate": 6.387533317726155e-06, "loss": 0.5247, "step": 10739 }, { "epoch": 1.842809219781409, "grad_norm": 1.3125, "learning_rate": 6.385832685907467e-06, "loss": 0.4463, "step": 10740 }, { "epoch": 1.8429823612163185, "grad_norm": 1.34375, "learning_rate": 6.384132174312322e-06, "loss": 0.4574, "step": 10741 }, { "epoch": 1.843155502651228, "grad_norm": 1.4921875, "learning_rate": 6.382431782997289e-06, "loss": 0.4705, "step": 10742 }, { "epoch": 1.8433286440861378, "grad_norm": 1.3359375, "learning_rate": 6.380731512018935e-06, "loss": 0.4346, "step": 10743 }, { "epoch": 1.8435017855210476, "grad_norm": 1.4609375, "learning_rate": 6.379031361433818e-06, "loss": 0.4644, "step": 10744 }, { "epoch": 1.8436749269559571, "grad_norm": 1.375, "learning_rate": 6.377331331298489e-06, "loss": 0.4587, "step": 10745 }, { "epoch": 1.8438480683908667, "grad_norm": 1.3828125, "learning_rate": 6.375631421669501e-06, "loss": 0.4505, "step": 10746 }, { "epoch": 1.8440212098257764, "grad_norm": 1.4375, "learning_rate": 6.3739316326034005e-06, "loss": 0.4572, "step": 10747 }, { "epoch": 1.8441943512606862, "grad_norm": 1.5390625, "learning_rate": 6.372231964156729e-06, "loss": 0.531, "step": 10748 }, { "epoch": 1.8443674926955957, "grad_norm": 1.5390625, "learning_rate": 6.370532416386027e-06, "loss": 0.4405, "step": 10749 }, { "epoch": 1.8445406341305053, "grad_norm": 1.2734375, "learning_rate": 6.368832989347827e-06, "loss": 0.4061, "step": 10750 }, { "epoch": 1.844713775565415, "grad_norm": 1.4296875, "learning_rate": 6.367133683098661e-06, "loss": 0.4657, "step": 10751 }, { "epoch": 1.8448869170003246, "grad_norm": 1.3359375, "learning_rate": 6.365434497695057e-06, "loss": 0.4525, "step": 10752 }, { "epoch": 1.8450600584352341, "grad_norm": 1.3984375, "learning_rate": 6.36373543319353e-06, "loss": 0.5374, "step": 10753 }, { "epoch": 1.845233199870144, "grad_norm": 1.4609375, "learning_rate": 6.362036489650609e-06, "loss": 0.445, "step": 10754 }, { "epoch": 1.8454063413050537, "grad_norm": 1.5078125, "learning_rate": 6.360337667122805e-06, "loss": 0.4733, "step": 10755 }, { "epoch": 1.8455794827399632, "grad_norm": 1.515625, "learning_rate": 6.358638965666628e-06, "loss": 0.5472, "step": 10756 }, { "epoch": 1.8457526241748727, "grad_norm": 1.328125, "learning_rate": 6.3569403853385814e-06, "loss": 0.4563, "step": 10757 }, { "epoch": 1.8459257656097825, "grad_norm": 1.625, "learning_rate": 6.355241926195171e-06, "loss": 0.516, "step": 10758 }, { "epoch": 1.8460989070446923, "grad_norm": 1.4296875, "learning_rate": 6.353543588292897e-06, "loss": 0.5017, "step": 10759 }, { "epoch": 1.8462720484796018, "grad_norm": 1.3671875, "learning_rate": 6.35184537168825e-06, "loss": 0.4892, "step": 10760 }, { "epoch": 1.8464451899145113, "grad_norm": 1.4453125, "learning_rate": 6.350147276437722e-06, "loss": 0.4285, "step": 10761 }, { "epoch": 1.846618331349421, "grad_norm": 1.453125, "learning_rate": 6.348449302597798e-06, "loss": 0.5395, "step": 10762 }, { "epoch": 1.8467914727843306, "grad_norm": 1.453125, "learning_rate": 6.346751450224959e-06, "loss": 0.5096, "step": 10763 }, { "epoch": 1.8469646142192402, "grad_norm": 1.3359375, "learning_rate": 6.345053719375688e-06, "loss": 0.4203, "step": 10764 }, { "epoch": 1.84713775565415, "grad_norm": 1.640625, "learning_rate": 6.343356110106458e-06, "loss": 0.4975, "step": 10765 }, { "epoch": 1.8473108970890597, "grad_norm": 1.359375, "learning_rate": 6.341658622473736e-06, "loss": 0.4517, "step": 10766 }, { "epoch": 1.8474840385239693, "grad_norm": 1.515625, "learning_rate": 6.339961256533992e-06, "loss": 0.4193, "step": 10767 }, { "epoch": 1.8476571799588788, "grad_norm": 1.3984375, "learning_rate": 6.3382640123436864e-06, "loss": 0.4073, "step": 10768 }, { "epoch": 1.8478303213937886, "grad_norm": 1.375, "learning_rate": 6.336566889959275e-06, "loss": 0.4437, "step": 10769 }, { "epoch": 1.8480034628286983, "grad_norm": 1.421875, "learning_rate": 6.334869889437214e-06, "loss": 0.4933, "step": 10770 }, { "epoch": 1.8481766042636079, "grad_norm": 1.5234375, "learning_rate": 6.333173010833955e-06, "loss": 0.4438, "step": 10771 }, { "epoch": 1.8483497456985174, "grad_norm": 1.53125, "learning_rate": 6.3314762542059416e-06, "loss": 0.4893, "step": 10772 }, { "epoch": 1.8485228871334272, "grad_norm": 1.375, "learning_rate": 6.329779619609615e-06, "loss": 0.4591, "step": 10773 }, { "epoch": 1.848696028568337, "grad_norm": 1.453125, "learning_rate": 6.32808310710141e-06, "loss": 0.5133, "step": 10774 }, { "epoch": 1.8488691700032462, "grad_norm": 1.421875, "learning_rate": 6.326386716737769e-06, "loss": 0.4673, "step": 10775 }, { "epoch": 1.849042311438156, "grad_norm": 1.4765625, "learning_rate": 6.324690448575115e-06, "loss": 0.4867, "step": 10776 }, { "epoch": 1.8492154528730658, "grad_norm": 1.4609375, "learning_rate": 6.322994302669877e-06, "loss": 0.4627, "step": 10777 }, { "epoch": 1.8493885943079753, "grad_norm": 1.359375, "learning_rate": 6.321298279078473e-06, "loss": 0.448, "step": 10778 }, { "epoch": 1.8495617357428848, "grad_norm": 1.484375, "learning_rate": 6.319602377857321e-06, "loss": 0.4564, "step": 10779 }, { "epoch": 1.8497348771777946, "grad_norm": 1.5859375, "learning_rate": 6.31790659906284e-06, "loss": 0.4123, "step": 10780 }, { "epoch": 1.8499080186127044, "grad_norm": 1.6796875, "learning_rate": 6.316210942751431e-06, "loss": 0.524, "step": 10781 }, { "epoch": 1.850081160047614, "grad_norm": 1.40625, "learning_rate": 6.314515408979503e-06, "loss": 0.4996, "step": 10782 }, { "epoch": 1.8502543014825235, "grad_norm": 1.4296875, "learning_rate": 6.31281999780346e-06, "loss": 0.4125, "step": 10783 }, { "epoch": 1.8504274429174332, "grad_norm": 1.4375, "learning_rate": 6.311124709279694e-06, "loss": 0.4337, "step": 10784 }, { "epoch": 1.850600584352343, "grad_norm": 1.484375, "learning_rate": 6.309429543464597e-06, "loss": 0.4332, "step": 10785 }, { "epoch": 1.8507737257872523, "grad_norm": 1.4765625, "learning_rate": 6.307734500414564e-06, "loss": 0.5092, "step": 10786 }, { "epoch": 1.850946867222162, "grad_norm": 1.34375, "learning_rate": 6.306039580185978e-06, "loss": 0.4737, "step": 10787 }, { "epoch": 1.8511200086570718, "grad_norm": 1.40625, "learning_rate": 6.304344782835217e-06, "loss": 0.4424, "step": 10788 }, { "epoch": 1.8512931500919814, "grad_norm": 1.5234375, "learning_rate": 6.302650108418662e-06, "loss": 0.4364, "step": 10789 }, { "epoch": 1.851466291526891, "grad_norm": 1.390625, "learning_rate": 6.300955556992678e-06, "loss": 0.4318, "step": 10790 }, { "epoch": 1.8516394329618007, "grad_norm": 1.5, "learning_rate": 6.299261128613641e-06, "loss": 0.5296, "step": 10791 }, { "epoch": 1.8518125743967104, "grad_norm": 1.3515625, "learning_rate": 6.297566823337913e-06, "loss": 0.439, "step": 10792 }, { "epoch": 1.85198571583162, "grad_norm": 1.2578125, "learning_rate": 6.295872641221852e-06, "loss": 0.4165, "step": 10793 }, { "epoch": 1.8521588572665295, "grad_norm": 1.3203125, "learning_rate": 6.294178582321815e-06, "loss": 0.4458, "step": 10794 }, { "epoch": 1.8523319987014393, "grad_norm": 1.4453125, "learning_rate": 6.292484646694158e-06, "loss": 0.3922, "step": 10795 }, { "epoch": 1.852505140136349, "grad_norm": 1.5390625, "learning_rate": 6.290790834395222e-06, "loss": 0.474, "step": 10796 }, { "epoch": 1.8526782815712584, "grad_norm": 1.34375, "learning_rate": 6.289097145481355e-06, "loss": 0.4245, "step": 10797 }, { "epoch": 1.8528514230061681, "grad_norm": 1.4921875, "learning_rate": 6.287403580008898e-06, "loss": 0.461, "step": 10798 }, { "epoch": 1.8530245644410779, "grad_norm": 1.4296875, "learning_rate": 6.285710138034187e-06, "loss": 0.457, "step": 10799 }, { "epoch": 1.8531977058759874, "grad_norm": 1.421875, "learning_rate": 6.28401681961355e-06, "loss": 0.4584, "step": 10800 }, { "epoch": 1.853370847310897, "grad_norm": 1.4375, "learning_rate": 6.282323624803317e-06, "loss": 0.426, "step": 10801 }, { "epoch": 1.8535439887458067, "grad_norm": 1.390625, "learning_rate": 6.280630553659809e-06, "loss": 0.4301, "step": 10802 }, { "epoch": 1.8537171301807165, "grad_norm": 1.3671875, "learning_rate": 6.278937606239346e-06, "loss": 0.4333, "step": 10803 }, { "epoch": 1.853890271615626, "grad_norm": 1.3984375, "learning_rate": 6.2772447825982465e-06, "loss": 0.4814, "step": 10804 }, { "epoch": 1.8540634130505356, "grad_norm": 1.484375, "learning_rate": 6.275552082792816e-06, "loss": 0.4931, "step": 10805 }, { "epoch": 1.8542365544854453, "grad_norm": 1.2890625, "learning_rate": 6.273859506879365e-06, "loss": 0.4086, "step": 10806 }, { "epoch": 1.854409695920355, "grad_norm": 1.3828125, "learning_rate": 6.2721670549141925e-06, "loss": 0.4472, "step": 10807 }, { "epoch": 1.8545828373552646, "grad_norm": 1.3984375, "learning_rate": 6.270474726953604e-06, "loss": 0.4712, "step": 10808 }, { "epoch": 1.8547559787901742, "grad_norm": 1.5078125, "learning_rate": 6.268782523053887e-06, "loss": 0.4975, "step": 10809 }, { "epoch": 1.854929120225084, "grad_norm": 1.40625, "learning_rate": 6.267090443271335e-06, "loss": 0.4609, "step": 10810 }, { "epoch": 1.8551022616599935, "grad_norm": 1.4375, "learning_rate": 6.265398487662238e-06, "loss": 0.457, "step": 10811 }, { "epoch": 1.855275403094903, "grad_norm": 1.3671875, "learning_rate": 6.263706656282869e-06, "loss": 0.4819, "step": 10812 }, { "epoch": 1.8554485445298128, "grad_norm": 1.359375, "learning_rate": 6.2620149491895146e-06, "loss": 0.3838, "step": 10813 }, { "epoch": 1.8556216859647225, "grad_norm": 1.4140625, "learning_rate": 6.260323366438442e-06, "loss": 0.4506, "step": 10814 }, { "epoch": 1.855794827399632, "grad_norm": 1.234375, "learning_rate": 6.2586319080859245e-06, "loss": 0.4129, "step": 10815 }, { "epoch": 1.8559679688345416, "grad_norm": 1.359375, "learning_rate": 6.256940574188229e-06, "loss": 0.4222, "step": 10816 }, { "epoch": 1.8561411102694514, "grad_norm": 1.3671875, "learning_rate": 6.255249364801609e-06, "loss": 0.4182, "step": 10817 }, { "epoch": 1.8563142517043612, "grad_norm": 1.359375, "learning_rate": 6.2535582799823334e-06, "loss": 0.4478, "step": 10818 }, { "epoch": 1.8564873931392707, "grad_norm": 1.546875, "learning_rate": 6.251867319786648e-06, "loss": 0.5108, "step": 10819 }, { "epoch": 1.8566605345741802, "grad_norm": 1.40625, "learning_rate": 6.250176484270805e-06, "loss": 0.469, "step": 10820 }, { "epoch": 1.85683367600909, "grad_norm": 1.3359375, "learning_rate": 6.2484857734910465e-06, "loss": 0.4353, "step": 10821 }, { "epoch": 1.8570068174439995, "grad_norm": 1.4609375, "learning_rate": 6.246795187503614e-06, "loss": 0.5173, "step": 10822 }, { "epoch": 1.857179958878909, "grad_norm": 1.375, "learning_rate": 6.245104726364746e-06, "loss": 0.3993, "step": 10823 }, { "epoch": 1.8573531003138188, "grad_norm": 1.6796875, "learning_rate": 6.243414390130671e-06, "loss": 0.4929, "step": 10824 }, { "epoch": 1.8575262417487286, "grad_norm": 1.3671875, "learning_rate": 6.241724178857621e-06, "loss": 0.4534, "step": 10825 }, { "epoch": 1.8576993831836381, "grad_norm": 1.328125, "learning_rate": 6.240034092601818e-06, "loss": 0.4241, "step": 10826 }, { "epoch": 1.8578725246185477, "grad_norm": 1.515625, "learning_rate": 6.238344131419481e-06, "loss": 0.5083, "step": 10827 }, { "epoch": 1.8580456660534574, "grad_norm": 1.59375, "learning_rate": 6.236654295366826e-06, "loss": 0.5365, "step": 10828 }, { "epoch": 1.8582188074883672, "grad_norm": 1.484375, "learning_rate": 6.234964584500067e-06, "loss": 0.546, "step": 10829 }, { "epoch": 1.8583919489232767, "grad_norm": 1.2734375, "learning_rate": 6.233274998875414e-06, "loss": 0.4216, "step": 10830 }, { "epoch": 1.8585650903581863, "grad_norm": 1.375, "learning_rate": 6.231585538549063e-06, "loss": 0.4567, "step": 10831 }, { "epoch": 1.858738231793096, "grad_norm": 1.421875, "learning_rate": 6.229896203577219e-06, "loss": 0.4101, "step": 10832 }, { "epoch": 1.8589113732280056, "grad_norm": 1.390625, "learning_rate": 6.228206994016073e-06, "loss": 0.4478, "step": 10833 }, { "epoch": 1.8590845146629151, "grad_norm": 1.4140625, "learning_rate": 6.226517909921817e-06, "loss": 0.5022, "step": 10834 }, { "epoch": 1.859257656097825, "grad_norm": 1.4375, "learning_rate": 6.224828951350639e-06, "loss": 0.4231, "step": 10835 }, { "epoch": 1.8594307975327347, "grad_norm": 1.421875, "learning_rate": 6.223140118358719e-06, "loss": 0.4653, "step": 10836 }, { "epoch": 1.8596039389676442, "grad_norm": 1.5234375, "learning_rate": 6.221451411002238e-06, "loss": 0.4979, "step": 10837 }, { "epoch": 1.8597770804025537, "grad_norm": 1.421875, "learning_rate": 6.219762829337367e-06, "loss": 0.397, "step": 10838 }, { "epoch": 1.8599502218374635, "grad_norm": 1.28125, "learning_rate": 6.218074373420276e-06, "loss": 0.4723, "step": 10839 }, { "epoch": 1.8601233632723733, "grad_norm": 1.4140625, "learning_rate": 6.216386043307132e-06, "loss": 0.43, "step": 10840 }, { "epoch": 1.8602965047072828, "grad_norm": 1.2578125, "learning_rate": 6.214697839054098e-06, "loss": 0.433, "step": 10841 }, { "epoch": 1.8604696461421923, "grad_norm": 1.328125, "learning_rate": 6.2130097607173305e-06, "loss": 0.4311, "step": 10842 }, { "epoch": 1.860642787577102, "grad_norm": 1.390625, "learning_rate": 6.211321808352981e-06, "loss": 0.4375, "step": 10843 }, { "epoch": 1.8608159290120116, "grad_norm": 1.46875, "learning_rate": 6.209633982017201e-06, "loss": 0.4883, "step": 10844 }, { "epoch": 1.8609890704469212, "grad_norm": 1.328125, "learning_rate": 6.207946281766132e-06, "loss": 0.4091, "step": 10845 }, { "epoch": 1.861162211881831, "grad_norm": 1.390625, "learning_rate": 6.206258707655916e-06, "loss": 0.4278, "step": 10846 }, { "epoch": 1.8613353533167407, "grad_norm": 1.4453125, "learning_rate": 6.204571259742691e-06, "loss": 0.475, "step": 10847 }, { "epoch": 1.8615084947516503, "grad_norm": 1.625, "learning_rate": 6.202883938082587e-06, "loss": 0.4548, "step": 10848 }, { "epoch": 1.8616816361865598, "grad_norm": 1.375, "learning_rate": 6.201196742731732e-06, "loss": 0.3844, "step": 10849 }, { "epoch": 1.8618547776214696, "grad_norm": 1.4296875, "learning_rate": 6.1995096737462466e-06, "loss": 0.4475, "step": 10850 }, { "epoch": 1.8620279190563793, "grad_norm": 1.3046875, "learning_rate": 6.197822731182259e-06, "loss": 0.4502, "step": 10851 }, { "epoch": 1.8622010604912889, "grad_norm": 1.4140625, "learning_rate": 6.196135915095878e-06, "loss": 0.4021, "step": 10852 }, { "epoch": 1.8623742019261984, "grad_norm": 1.46875, "learning_rate": 6.194449225543218e-06, "loss": 0.4992, "step": 10853 }, { "epoch": 1.8625473433611082, "grad_norm": 1.3984375, "learning_rate": 6.1927626625803824e-06, "loss": 0.4696, "step": 10854 }, { "epoch": 1.8627204847960177, "grad_norm": 1.46875, "learning_rate": 6.191076226263476e-06, "loss": 0.4369, "step": 10855 }, { "epoch": 1.8628936262309272, "grad_norm": 1.421875, "learning_rate": 6.189389916648598e-06, "loss": 0.5465, "step": 10856 }, { "epoch": 1.863066767665837, "grad_norm": 1.4453125, "learning_rate": 6.187703733791841e-06, "loss": 0.4382, "step": 10857 }, { "epoch": 1.8632399091007468, "grad_norm": 1.3515625, "learning_rate": 6.1860176777492966e-06, "loss": 0.4177, "step": 10858 }, { "epoch": 1.8634130505356563, "grad_norm": 1.5546875, "learning_rate": 6.18433174857705e-06, "loss": 0.4817, "step": 10859 }, { "epoch": 1.8635861919705659, "grad_norm": 1.484375, "learning_rate": 6.182645946331182e-06, "loss": 0.5222, "step": 10860 }, { "epoch": 1.8637593334054756, "grad_norm": 1.3671875, "learning_rate": 6.180960271067767e-06, "loss": 0.459, "step": 10861 }, { "epoch": 1.8639324748403854, "grad_norm": 1.34375, "learning_rate": 6.179274722842885e-06, "loss": 0.4101, "step": 10862 }, { "epoch": 1.864105616275295, "grad_norm": 1.3203125, "learning_rate": 6.1775893017126046e-06, "loss": 0.4524, "step": 10863 }, { "epoch": 1.8642787577102045, "grad_norm": 1.390625, "learning_rate": 6.1759040077329845e-06, "loss": 0.4043, "step": 10864 }, { "epoch": 1.8644518991451142, "grad_norm": 1.46875, "learning_rate": 6.174218840960091e-06, "loss": 0.5058, "step": 10865 }, { "epoch": 1.8646250405800238, "grad_norm": 1.453125, "learning_rate": 6.172533801449976e-06, "loss": 0.5065, "step": 10866 }, { "epoch": 1.8647981820149333, "grad_norm": 1.609375, "learning_rate": 6.170848889258694e-06, "loss": 0.5166, "step": 10867 }, { "epoch": 1.864971323449843, "grad_norm": 1.421875, "learning_rate": 6.169164104442292e-06, "loss": 0.5541, "step": 10868 }, { "epoch": 1.8651444648847528, "grad_norm": 1.421875, "learning_rate": 6.167479447056813e-06, "loss": 0.5119, "step": 10869 }, { "epoch": 1.8653176063196624, "grad_norm": 1.3671875, "learning_rate": 6.1657949171582965e-06, "loss": 0.4446, "step": 10870 }, { "epoch": 1.865490747754572, "grad_norm": 1.515625, "learning_rate": 6.164110514802781e-06, "loss": 0.5625, "step": 10871 }, { "epoch": 1.8656638891894817, "grad_norm": 1.3984375, "learning_rate": 6.162426240046291e-06, "loss": 0.446, "step": 10872 }, { "epoch": 1.8658370306243914, "grad_norm": 1.40625, "learning_rate": 6.160742092944857e-06, "loss": 0.4748, "step": 10873 }, { "epoch": 1.866010172059301, "grad_norm": 1.46875, "learning_rate": 6.1590580735545015e-06, "loss": 0.5005, "step": 10874 }, { "epoch": 1.8661833134942105, "grad_norm": 1.515625, "learning_rate": 6.157374181931244e-06, "loss": 0.4873, "step": 10875 }, { "epoch": 1.8663564549291203, "grad_norm": 1.359375, "learning_rate": 6.1556904181310936e-06, "loss": 0.449, "step": 10876 }, { "epoch": 1.8665295963640298, "grad_norm": 1.484375, "learning_rate": 6.154006782210066e-06, "loss": 0.4889, "step": 10877 }, { "epoch": 1.8667027377989394, "grad_norm": 1.40625, "learning_rate": 6.152323274224161e-06, "loss": 0.5165, "step": 10878 }, { "epoch": 1.8668758792338491, "grad_norm": 1.3515625, "learning_rate": 6.150639894229381e-06, "loss": 0.4426, "step": 10879 }, { "epoch": 1.8670490206687589, "grad_norm": 1.453125, "learning_rate": 6.148956642281726e-06, "loss": 0.4662, "step": 10880 }, { "epoch": 1.8672221621036684, "grad_norm": 1.34375, "learning_rate": 6.1472735184371845e-06, "loss": 0.404, "step": 10881 }, { "epoch": 1.867395303538578, "grad_norm": 1.3125, "learning_rate": 6.1455905227517445e-06, "loss": 0.483, "step": 10882 }, { "epoch": 1.8675684449734877, "grad_norm": 1.4296875, "learning_rate": 6.143907655281394e-06, "loss": 0.5141, "step": 10883 }, { "epoch": 1.8677415864083975, "grad_norm": 1.5234375, "learning_rate": 6.1422249160821135e-06, "loss": 0.4921, "step": 10884 }, { "epoch": 1.867914727843307, "grad_norm": 1.3828125, "learning_rate": 6.140542305209873e-06, "loss": 0.4631, "step": 10885 }, { "epoch": 1.8680878692782166, "grad_norm": 1.3671875, "learning_rate": 6.138859822720647e-06, "loss": 0.4673, "step": 10886 }, { "epoch": 1.8682610107131263, "grad_norm": 1.453125, "learning_rate": 6.137177468670404e-06, "loss": 0.4804, "step": 10887 }, { "epoch": 1.8684341521480359, "grad_norm": 1.3828125, "learning_rate": 6.135495243115104e-06, "loss": 0.4889, "step": 10888 }, { "epoch": 1.8686072935829454, "grad_norm": 1.484375, "learning_rate": 6.133813146110708e-06, "loss": 0.4466, "step": 10889 }, { "epoch": 1.8687804350178552, "grad_norm": 1.4375, "learning_rate": 6.132131177713165e-06, "loss": 0.4959, "step": 10890 }, { "epoch": 1.868953576452765, "grad_norm": 1.34375, "learning_rate": 6.130449337978429e-06, "loss": 0.4247, "step": 10891 }, { "epoch": 1.8691267178876745, "grad_norm": 1.4921875, "learning_rate": 6.128767626962448e-06, "loss": 0.5393, "step": 10892 }, { "epoch": 1.869299859322584, "grad_norm": 1.453125, "learning_rate": 6.127086044721154e-06, "loss": 0.5295, "step": 10893 }, { "epoch": 1.8694730007574938, "grad_norm": 1.2734375, "learning_rate": 6.125404591310494e-06, "loss": 0.3876, "step": 10894 }, { "epoch": 1.8696461421924035, "grad_norm": 1.4375, "learning_rate": 6.123723266786396e-06, "loss": 0.4276, "step": 10895 }, { "epoch": 1.869819283627313, "grad_norm": 1.4609375, "learning_rate": 6.122042071204792e-06, "loss": 0.4204, "step": 10896 }, { "epoch": 1.8699924250622226, "grad_norm": 1.4765625, "learning_rate": 6.120361004621602e-06, "loss": 0.4895, "step": 10897 }, { "epoch": 1.8701655664971324, "grad_norm": 1.3359375, "learning_rate": 6.118680067092747e-06, "loss": 0.4777, "step": 10898 }, { "epoch": 1.870338707932042, "grad_norm": 1.421875, "learning_rate": 6.116999258674146e-06, "loss": 0.5134, "step": 10899 }, { "epoch": 1.8705118493669515, "grad_norm": 1.328125, "learning_rate": 6.115318579421704e-06, "loss": 0.455, "step": 10900 }, { "epoch": 1.8706849908018612, "grad_norm": 1.453125, "learning_rate": 6.1136380293913335e-06, "loss": 0.4765, "step": 10901 }, { "epoch": 1.870858132236771, "grad_norm": 1.390625, "learning_rate": 6.111957608638934e-06, "loss": 0.5328, "step": 10902 }, { "epoch": 1.8710312736716805, "grad_norm": 1.46875, "learning_rate": 6.1102773172204034e-06, "loss": 0.447, "step": 10903 }, { "epoch": 1.87120441510659, "grad_norm": 1.4609375, "learning_rate": 6.108597155191637e-06, "loss": 0.4784, "step": 10904 }, { "epoch": 1.8713775565414998, "grad_norm": 1.359375, "learning_rate": 6.106917122608525e-06, "loss": 0.4843, "step": 10905 }, { "epoch": 1.8715506979764096, "grad_norm": 1.3203125, "learning_rate": 6.105237219526956e-06, "loss": 0.4441, "step": 10906 }, { "epoch": 1.8717238394113191, "grad_norm": 1.3125, "learning_rate": 6.103557446002806e-06, "loss": 0.4293, "step": 10907 }, { "epoch": 1.8718969808462287, "grad_norm": 1.359375, "learning_rate": 6.101877802091955e-06, "loss": 0.4668, "step": 10908 }, { "epoch": 1.8720701222811384, "grad_norm": 1.4375, "learning_rate": 6.100198287850274e-06, "loss": 0.4891, "step": 10909 }, { "epoch": 1.8722432637160482, "grad_norm": 1.3984375, "learning_rate": 6.098518903333631e-06, "loss": 0.4399, "step": 10910 }, { "epoch": 1.8724164051509575, "grad_norm": 1.375, "learning_rate": 6.096839648597892e-06, "loss": 0.4886, "step": 10911 }, { "epoch": 1.8725895465858673, "grad_norm": 1.359375, "learning_rate": 6.095160523698913e-06, "loss": 0.48, "step": 10912 }, { "epoch": 1.872762688020777, "grad_norm": 1.4296875, "learning_rate": 6.0934815286925546e-06, "loss": 0.4478, "step": 10913 }, { "epoch": 1.8729358294556866, "grad_norm": 1.5390625, "learning_rate": 6.091802663634661e-06, "loss": 0.4757, "step": 10914 }, { "epoch": 1.8731089708905961, "grad_norm": 1.3671875, "learning_rate": 6.090123928581082e-06, "loss": 0.4414, "step": 10915 }, { "epoch": 1.873282112325506, "grad_norm": 1.4140625, "learning_rate": 6.0884453235876615e-06, "loss": 0.4311, "step": 10916 }, { "epoch": 1.8734552537604157, "grad_norm": 1.4140625, "learning_rate": 6.086766848710236e-06, "loss": 0.4571, "step": 10917 }, { "epoch": 1.8736283951953252, "grad_norm": 1.3984375, "learning_rate": 6.085088504004641e-06, "loss": 0.4718, "step": 10918 }, { "epoch": 1.8738015366302347, "grad_norm": 1.6171875, "learning_rate": 6.083410289526704e-06, "loss": 0.4776, "step": 10919 }, { "epoch": 1.8739746780651445, "grad_norm": 1.3046875, "learning_rate": 6.081732205332252e-06, "loss": 0.4253, "step": 10920 }, { "epoch": 1.8741478195000543, "grad_norm": 1.4375, "learning_rate": 6.080054251477101e-06, "loss": 0.4923, "step": 10921 }, { "epoch": 1.8743209609349636, "grad_norm": 1.3828125, "learning_rate": 6.078376428017072e-06, "loss": 0.5012, "step": 10922 }, { "epoch": 1.8744941023698733, "grad_norm": 1.5, "learning_rate": 6.0766987350079765e-06, "loss": 0.6346, "step": 10923 }, { "epoch": 1.8746672438047831, "grad_norm": 1.4375, "learning_rate": 6.075021172505618e-06, "loss": 0.4201, "step": 10924 }, { "epoch": 1.8748403852396927, "grad_norm": 1.5234375, "learning_rate": 6.073343740565807e-06, "loss": 0.5292, "step": 10925 }, { "epoch": 1.8750135266746022, "grad_norm": 1.34375, "learning_rate": 6.071666439244331e-06, "loss": 0.4055, "step": 10926 }, { "epoch": 1.875186668109512, "grad_norm": 1.375, "learning_rate": 6.069989268596996e-06, "loss": 0.4589, "step": 10927 }, { "epoch": 1.8753598095444217, "grad_norm": 1.390625, "learning_rate": 6.06831222867959e-06, "loss": 0.476, "step": 10928 }, { "epoch": 1.8755329509793313, "grad_norm": 1.7890625, "learning_rate": 6.066635319547895e-06, "loss": 0.4688, "step": 10929 }, { "epoch": 1.8757060924142408, "grad_norm": 1.3359375, "learning_rate": 6.064958541257696e-06, "loss": 0.4327, "step": 10930 }, { "epoch": 1.8758792338491506, "grad_norm": 1.5078125, "learning_rate": 6.063281893864768e-06, "loss": 0.5356, "step": 10931 }, { "epoch": 1.8760523752840603, "grad_norm": 1.328125, "learning_rate": 6.061605377424886e-06, "loss": 0.4377, "step": 10932 }, { "epoch": 1.8762255167189696, "grad_norm": 1.375, "learning_rate": 6.059928991993816e-06, "loss": 0.4712, "step": 10933 }, { "epoch": 1.8763986581538794, "grad_norm": 1.546875, "learning_rate": 6.058252737627324e-06, "loss": 0.4907, "step": 10934 }, { "epoch": 1.8765717995887892, "grad_norm": 1.4296875, "learning_rate": 6.0565766143811714e-06, "loss": 0.4931, "step": 10935 }, { "epoch": 1.8767449410236987, "grad_norm": 1.34375, "learning_rate": 6.054900622311106e-06, "loss": 0.4274, "step": 10936 }, { "epoch": 1.8769180824586083, "grad_norm": 1.34375, "learning_rate": 6.053224761472887e-06, "loss": 0.5011, "step": 10937 }, { "epoch": 1.877091223893518, "grad_norm": 1.4375, "learning_rate": 6.051549031922259e-06, "loss": 0.4961, "step": 10938 }, { "epoch": 1.8772643653284278, "grad_norm": 1.5, "learning_rate": 6.049873433714965e-06, "loss": 0.4088, "step": 10939 }, { "epoch": 1.8774375067633373, "grad_norm": 1.4375, "learning_rate": 6.0481979669067416e-06, "loss": 0.4501, "step": 10940 }, { "epoch": 1.8776106481982469, "grad_norm": 1.453125, "learning_rate": 6.046522631553322e-06, "loss": 0.4265, "step": 10941 }, { "epoch": 1.8777837896331566, "grad_norm": 1.484375, "learning_rate": 6.0448474277104365e-06, "loss": 0.4868, "step": 10942 }, { "epoch": 1.8779569310680664, "grad_norm": 1.484375, "learning_rate": 6.0431723554338086e-06, "loss": 0.4904, "step": 10943 }, { "epoch": 1.878130072502976, "grad_norm": 1.3125, "learning_rate": 6.041497414779161e-06, "loss": 0.4592, "step": 10944 }, { "epoch": 1.8783032139378855, "grad_norm": 1.34375, "learning_rate": 6.039822605802206e-06, "loss": 0.4266, "step": 10945 }, { "epoch": 1.8784763553727952, "grad_norm": 1.4453125, "learning_rate": 6.03814792855866e-06, "loss": 0.4678, "step": 10946 }, { "epoch": 1.8786494968077048, "grad_norm": 1.40625, "learning_rate": 6.036473383104224e-06, "loss": 0.4548, "step": 10947 }, { "epoch": 1.8788226382426143, "grad_norm": 1.3984375, "learning_rate": 6.034798969494611e-06, "loss": 0.5137, "step": 10948 }, { "epoch": 1.878995779677524, "grad_norm": 1.34375, "learning_rate": 6.03312468778551e-06, "loss": 0.3894, "step": 10949 }, { "epoch": 1.8791689211124338, "grad_norm": 1.3125, "learning_rate": 6.03145053803262e-06, "loss": 0.4185, "step": 10950 }, { "epoch": 1.8793420625473434, "grad_norm": 1.34375, "learning_rate": 6.029776520291631e-06, "loss": 0.4207, "step": 10951 }, { "epoch": 1.879515203982253, "grad_norm": 1.359375, "learning_rate": 6.028102634618225e-06, "loss": 0.4162, "step": 10952 }, { "epoch": 1.8796883454171627, "grad_norm": 1.375, "learning_rate": 6.026428881068087e-06, "loss": 0.4639, "step": 10953 }, { "epoch": 1.8798614868520724, "grad_norm": 1.515625, "learning_rate": 6.0247552596968895e-06, "loss": 0.4969, "step": 10954 }, { "epoch": 1.880034628286982, "grad_norm": 1.3515625, "learning_rate": 6.023081770560307e-06, "loss": 0.4044, "step": 10955 }, { "epoch": 1.8802077697218915, "grad_norm": 1.4765625, "learning_rate": 6.021408413714009e-06, "loss": 0.4333, "step": 10956 }, { "epoch": 1.8803809111568013, "grad_norm": 1.46875, "learning_rate": 6.019735189213655e-06, "loss": 0.4467, "step": 10957 }, { "epoch": 1.8805540525917108, "grad_norm": 1.515625, "learning_rate": 6.018062097114905e-06, "loss": 0.4997, "step": 10958 }, { "epoch": 1.8807271940266204, "grad_norm": 1.484375, "learning_rate": 6.016389137473417e-06, "loss": 0.4459, "step": 10959 }, { "epoch": 1.8809003354615301, "grad_norm": 1.25, "learning_rate": 6.01471631034484e-06, "loss": 0.4141, "step": 10960 }, { "epoch": 1.8810734768964399, "grad_norm": 1.4765625, "learning_rate": 6.013043615784817e-06, "loss": 0.4878, "step": 10961 }, { "epoch": 1.8812466183313494, "grad_norm": 1.3984375, "learning_rate": 6.011371053848991e-06, "loss": 0.4012, "step": 10962 }, { "epoch": 1.881419759766259, "grad_norm": 1.4765625, "learning_rate": 6.009698624593001e-06, "loss": 0.4461, "step": 10963 }, { "epoch": 1.8815929012011687, "grad_norm": 1.453125, "learning_rate": 6.008026328072476e-06, "loss": 0.5579, "step": 10964 }, { "epoch": 1.8817660426360785, "grad_norm": 1.46875, "learning_rate": 6.006354164343047e-06, "loss": 0.4917, "step": 10965 }, { "epoch": 1.881939184070988, "grad_norm": 1.59375, "learning_rate": 6.004682133460336e-06, "loss": 0.4588, "step": 10966 }, { "epoch": 1.8821123255058976, "grad_norm": 1.4765625, "learning_rate": 6.00301023547996e-06, "loss": 0.473, "step": 10967 }, { "epoch": 1.8822854669408073, "grad_norm": 1.4140625, "learning_rate": 6.0013384704575406e-06, "loss": 0.5391, "step": 10968 }, { "epoch": 1.8824586083757169, "grad_norm": 1.46875, "learning_rate": 5.999666838448679e-06, "loss": 0.5007, "step": 10969 }, { "epoch": 1.8826317498106264, "grad_norm": 1.390625, "learning_rate": 5.9979953395089916e-06, "loss": 0.4572, "step": 10970 }, { "epoch": 1.8828048912455362, "grad_norm": 1.4296875, "learning_rate": 5.996323973694073e-06, "loss": 0.513, "step": 10971 }, { "epoch": 1.882978032680446, "grad_norm": 1.484375, "learning_rate": 5.994652741059524e-06, "loss": 0.4848, "step": 10972 }, { "epoch": 1.8831511741153555, "grad_norm": 1.4296875, "learning_rate": 5.992981641660935e-06, "loss": 0.4671, "step": 10973 }, { "epoch": 1.883324315550265, "grad_norm": 1.421875, "learning_rate": 5.991310675553894e-06, "loss": 0.4926, "step": 10974 }, { "epoch": 1.8834974569851748, "grad_norm": 1.3203125, "learning_rate": 5.989639842793988e-06, "loss": 0.5025, "step": 10975 }, { "epoch": 1.8836705984200846, "grad_norm": 1.3984375, "learning_rate": 5.987969143436793e-06, "loss": 0.4023, "step": 10976 }, { "epoch": 1.883843739854994, "grad_norm": 1.4375, "learning_rate": 5.986298577537888e-06, "loss": 0.435, "step": 10977 }, { "epoch": 1.8840168812899036, "grad_norm": 1.453125, "learning_rate": 5.984628145152838e-06, "loss": 0.4319, "step": 10978 }, { "epoch": 1.8841900227248134, "grad_norm": 1.3203125, "learning_rate": 5.982957846337211e-06, "loss": 0.4828, "step": 10979 }, { "epoch": 1.884363164159723, "grad_norm": 1.265625, "learning_rate": 5.98128768114657e-06, "loss": 0.4102, "step": 10980 }, { "epoch": 1.8845363055946325, "grad_norm": 1.390625, "learning_rate": 5.9796176496364735e-06, "loss": 0.4984, "step": 10981 }, { "epoch": 1.8847094470295422, "grad_norm": 1.390625, "learning_rate": 5.977947751862474e-06, "loss": 0.4535, "step": 10982 }, { "epoch": 1.884882588464452, "grad_norm": 1.65625, "learning_rate": 5.976277987880117e-06, "loss": 0.4324, "step": 10983 }, { "epoch": 1.8850557298993615, "grad_norm": 1.375, "learning_rate": 5.974608357744951e-06, "loss": 0.4269, "step": 10984 }, { "epoch": 1.885228871334271, "grad_norm": 1.34375, "learning_rate": 5.972938861512509e-06, "loss": 0.4314, "step": 10985 }, { "epoch": 1.8854020127691808, "grad_norm": 1.421875, "learning_rate": 5.971269499238332e-06, "loss": 0.4171, "step": 10986 }, { "epoch": 1.8855751542040906, "grad_norm": 1.359375, "learning_rate": 5.9696002709779486e-06, "loss": 0.4216, "step": 10987 }, { "epoch": 1.8857482956390001, "grad_norm": 1.40625, "learning_rate": 5.967931176786882e-06, "loss": 0.5161, "step": 10988 }, { "epoch": 1.8859214370739097, "grad_norm": 1.3984375, "learning_rate": 5.966262216720658e-06, "loss": 0.4755, "step": 10989 }, { "epoch": 1.8860945785088195, "grad_norm": 1.4921875, "learning_rate": 5.964593390834792e-06, "loss": 0.5228, "step": 10990 }, { "epoch": 1.886267719943729, "grad_norm": 1.5, "learning_rate": 5.962924699184792e-06, "loss": 0.4609, "step": 10991 }, { "epoch": 1.8864408613786385, "grad_norm": 1.421875, "learning_rate": 5.961256141826175e-06, "loss": 0.538, "step": 10992 }, { "epoch": 1.8866140028135483, "grad_norm": 1.3515625, "learning_rate": 5.959587718814439e-06, "loss": 0.4015, "step": 10993 }, { "epoch": 1.886787144248458, "grad_norm": 1.390625, "learning_rate": 5.957919430205088e-06, "loss": 0.4697, "step": 10994 }, { "epoch": 1.8869602856833676, "grad_norm": 1.296875, "learning_rate": 5.95625127605361e-06, "loss": 0.4246, "step": 10995 }, { "epoch": 1.8871334271182771, "grad_norm": 1.3515625, "learning_rate": 5.9545832564155035e-06, "loss": 0.452, "step": 10996 }, { "epoch": 1.887306568553187, "grad_norm": 1.4296875, "learning_rate": 5.952915371346247e-06, "loss": 0.4725, "step": 10997 }, { "epoch": 1.8874797099880967, "grad_norm": 1.4296875, "learning_rate": 5.951247620901326e-06, "loss": 0.4209, "step": 10998 }, { "epoch": 1.8876528514230062, "grad_norm": 1.390625, "learning_rate": 5.949580005136216e-06, "loss": 0.448, "step": 10999 }, { "epoch": 1.8878259928579157, "grad_norm": 1.4453125, "learning_rate": 5.947912524106389e-06, "loss": 0.4505, "step": 11000 }, { "epoch": 1.8879991342928255, "grad_norm": 1.3203125, "learning_rate": 5.9462451778673116e-06, "loss": 0.422, "step": 11001 }, { "epoch": 1.888172275727735, "grad_norm": 1.3046875, "learning_rate": 5.944577966474452e-06, "loss": 0.4051, "step": 11002 }, { "epoch": 1.8883454171626446, "grad_norm": 1.421875, "learning_rate": 5.9429108899832675e-06, "loss": 0.52, "step": 11003 }, { "epoch": 1.8885185585975544, "grad_norm": 1.46875, "learning_rate": 5.94124394844921e-06, "loss": 0.4295, "step": 11004 }, { "epoch": 1.8886917000324641, "grad_norm": 1.3359375, "learning_rate": 5.939577141927731e-06, "loss": 0.4209, "step": 11005 }, { "epoch": 1.8888648414673737, "grad_norm": 1.4609375, "learning_rate": 5.93791047047428e-06, "loss": 0.4502, "step": 11006 }, { "epoch": 1.8890379829022832, "grad_norm": 1.4296875, "learning_rate": 5.93624393414429e-06, "loss": 0.4836, "step": 11007 }, { "epoch": 1.889211124337193, "grad_norm": 1.3828125, "learning_rate": 5.934577532993204e-06, "loss": 0.4726, "step": 11008 }, { "epoch": 1.8893842657721027, "grad_norm": 1.421875, "learning_rate": 5.932911267076451e-06, "loss": 0.4873, "step": 11009 }, { "epoch": 1.8895574072070123, "grad_norm": 1.4609375, "learning_rate": 5.93124513644946e-06, "loss": 0.5116, "step": 11010 }, { "epoch": 1.8897305486419218, "grad_norm": 1.5546875, "learning_rate": 5.929579141167653e-06, "loss": 0.4399, "step": 11011 }, { "epoch": 1.8899036900768316, "grad_norm": 1.4453125, "learning_rate": 5.927913281286446e-06, "loss": 0.4563, "step": 11012 }, { "epoch": 1.890076831511741, "grad_norm": 1.5078125, "learning_rate": 5.926247556861257e-06, "loss": 0.4734, "step": 11013 }, { "epoch": 1.8902499729466506, "grad_norm": 1.3984375, "learning_rate": 5.924581967947496e-06, "loss": 0.3952, "step": 11014 }, { "epoch": 1.8904231143815604, "grad_norm": 1.609375, "learning_rate": 5.922916514600568e-06, "loss": 0.5204, "step": 11015 }, { "epoch": 1.8905962558164702, "grad_norm": 1.59375, "learning_rate": 5.92125119687587e-06, "loss": 0.4237, "step": 11016 }, { "epoch": 1.8907693972513797, "grad_norm": 1.546875, "learning_rate": 5.919586014828802e-06, "loss": 0.5046, "step": 11017 }, { "epoch": 1.8909425386862893, "grad_norm": 1.515625, "learning_rate": 5.9179209685147525e-06, "loss": 0.4299, "step": 11018 }, { "epoch": 1.891115680121199, "grad_norm": 1.3984375, "learning_rate": 5.916256057989107e-06, "loss": 0.4063, "step": 11019 }, { "epoch": 1.8912888215561088, "grad_norm": 1.5859375, "learning_rate": 5.9145912833072535e-06, "loss": 0.445, "step": 11020 }, { "epoch": 1.8914619629910183, "grad_norm": 1.390625, "learning_rate": 5.912926644524566e-06, "loss": 0.4773, "step": 11021 }, { "epoch": 1.8916351044259279, "grad_norm": 1.40625, "learning_rate": 5.911262141696418e-06, "loss": 0.4782, "step": 11022 }, { "epoch": 1.8918082458608376, "grad_norm": 1.546875, "learning_rate": 5.9095977748781766e-06, "loss": 0.4828, "step": 11023 }, { "epoch": 1.8919813872957472, "grad_norm": 1.3984375, "learning_rate": 5.907933544125213e-06, "loss": 0.4466, "step": 11024 }, { "epoch": 1.8921545287306567, "grad_norm": 1.421875, "learning_rate": 5.906269449492881e-06, "loss": 0.523, "step": 11025 }, { "epoch": 1.8923276701655665, "grad_norm": 1.421875, "learning_rate": 5.904605491036537e-06, "loss": 0.4563, "step": 11026 }, { "epoch": 1.8925008116004762, "grad_norm": 1.453125, "learning_rate": 5.9029416688115344e-06, "loss": 0.4392, "step": 11027 }, { "epoch": 1.8926739530353858, "grad_norm": 1.4296875, "learning_rate": 5.901277982873217e-06, "loss": 0.455, "step": 11028 }, { "epoch": 1.8928470944702953, "grad_norm": 1.4453125, "learning_rate": 5.899614433276928e-06, "loss": 0.4475, "step": 11029 }, { "epoch": 1.893020235905205, "grad_norm": 1.3671875, "learning_rate": 5.897951020078002e-06, "loss": 0.4848, "step": 11030 }, { "epoch": 1.8931933773401148, "grad_norm": 1.46875, "learning_rate": 5.8962877433317725e-06, "loss": 0.4719, "step": 11031 }, { "epoch": 1.8933665187750244, "grad_norm": 1.4609375, "learning_rate": 5.894624603093571e-06, "loss": 0.4375, "step": 11032 }, { "epoch": 1.893539660209934, "grad_norm": 1.34375, "learning_rate": 5.892961599418716e-06, "loss": 0.4525, "step": 11033 }, { "epoch": 1.8937128016448437, "grad_norm": 1.5078125, "learning_rate": 5.8912987323625274e-06, "loss": 0.4277, "step": 11034 }, { "epoch": 1.8938859430797532, "grad_norm": 1.4140625, "learning_rate": 5.889636001980324e-06, "loss": 0.4761, "step": 11035 }, { "epoch": 1.8940590845146628, "grad_norm": 1.453125, "learning_rate": 5.887973408327412e-06, "loss": 0.5051, "step": 11036 }, { "epoch": 1.8942322259495725, "grad_norm": 1.3671875, "learning_rate": 5.886310951459098e-06, "loss": 0.5125, "step": 11037 }, { "epoch": 1.8944053673844823, "grad_norm": 1.390625, "learning_rate": 5.884648631430683e-06, "loss": 0.4616, "step": 11038 }, { "epoch": 1.8945785088193918, "grad_norm": 1.3125, "learning_rate": 5.882986448297463e-06, "loss": 0.4033, "step": 11039 }, { "epoch": 1.8947516502543014, "grad_norm": 1.3515625, "learning_rate": 5.88132440211473e-06, "loss": 0.4417, "step": 11040 }, { "epoch": 1.8949247916892111, "grad_norm": 1.4296875, "learning_rate": 5.87966249293777e-06, "loss": 0.5613, "step": 11041 }, { "epoch": 1.895097933124121, "grad_norm": 1.515625, "learning_rate": 5.878000720821866e-06, "loss": 0.4282, "step": 11042 }, { "epoch": 1.8952710745590304, "grad_norm": 1.4140625, "learning_rate": 5.8763390858222955e-06, "loss": 0.4324, "step": 11043 }, { "epoch": 1.89544421599394, "grad_norm": 1.359375, "learning_rate": 5.874677587994335e-06, "loss": 0.4753, "step": 11044 }, { "epoch": 1.8956173574288497, "grad_norm": 1.4375, "learning_rate": 5.873016227393247e-06, "loss": 0.4512, "step": 11045 }, { "epoch": 1.8957904988637595, "grad_norm": 1.3203125, "learning_rate": 5.871355004074304e-06, "loss": 0.4393, "step": 11046 }, { "epoch": 1.8959636402986688, "grad_norm": 1.3984375, "learning_rate": 5.869693918092761e-06, "loss": 0.4604, "step": 11047 }, { "epoch": 1.8961367817335786, "grad_norm": 1.5703125, "learning_rate": 5.868032969503876e-06, "loss": 0.4683, "step": 11048 }, { "epoch": 1.8963099231684883, "grad_norm": 1.3359375, "learning_rate": 5.866372158362896e-06, "loss": 0.416, "step": 11049 }, { "epoch": 1.8964830646033979, "grad_norm": 1.390625, "learning_rate": 5.86471148472507e-06, "loss": 0.4349, "step": 11050 }, { "epoch": 1.8966562060383074, "grad_norm": 1.328125, "learning_rate": 5.863050948645639e-06, "loss": 0.4032, "step": 11051 }, { "epoch": 1.8968293474732172, "grad_norm": 1.4296875, "learning_rate": 5.861390550179837e-06, "loss": 0.4906, "step": 11052 }, { "epoch": 1.897002488908127, "grad_norm": 1.4453125, "learning_rate": 5.859730289382902e-06, "loss": 0.4688, "step": 11053 }, { "epoch": 1.8971756303430365, "grad_norm": 1.3359375, "learning_rate": 5.858070166310057e-06, "loss": 0.4287, "step": 11054 }, { "epoch": 1.897348771777946, "grad_norm": 1.328125, "learning_rate": 5.856410181016523e-06, "loss": 0.4861, "step": 11055 }, { "epoch": 1.8975219132128558, "grad_norm": 1.546875, "learning_rate": 5.854750333557526e-06, "loss": 0.4794, "step": 11056 }, { "epoch": 1.8976950546477656, "grad_norm": 1.375, "learning_rate": 5.853090623988276e-06, "loss": 0.5306, "step": 11057 }, { "epoch": 1.8978681960826749, "grad_norm": 1.4140625, "learning_rate": 5.851431052363985e-06, "loss": 0.4413, "step": 11058 }, { "epoch": 1.8980413375175846, "grad_norm": 1.3984375, "learning_rate": 5.849771618739852e-06, "loss": 0.4949, "step": 11059 }, { "epoch": 1.8982144789524944, "grad_norm": 1.546875, "learning_rate": 5.8481123231710844e-06, "loss": 0.4778, "step": 11060 }, { "epoch": 1.898387620387404, "grad_norm": 1.40625, "learning_rate": 5.846453165712873e-06, "loss": 0.4066, "step": 11061 }, { "epoch": 1.8985607618223135, "grad_norm": 1.453125, "learning_rate": 5.84479414642041e-06, "loss": 0.4882, "step": 11062 }, { "epoch": 1.8987339032572232, "grad_norm": 1.3671875, "learning_rate": 5.843135265348884e-06, "loss": 0.4189, "step": 11063 }, { "epoch": 1.898907044692133, "grad_norm": 1.5703125, "learning_rate": 5.841476522553473e-06, "loss": 0.5322, "step": 11064 }, { "epoch": 1.8990801861270425, "grad_norm": 1.5, "learning_rate": 5.839817918089359e-06, "loss": 0.4489, "step": 11065 }, { "epoch": 1.899253327561952, "grad_norm": 1.3515625, "learning_rate": 5.8381594520117045e-06, "loss": 0.4125, "step": 11066 }, { "epoch": 1.8994264689968618, "grad_norm": 1.4375, "learning_rate": 5.836501124375691e-06, "loss": 0.4921, "step": 11067 }, { "epoch": 1.8995996104317716, "grad_norm": 1.28125, "learning_rate": 5.834842935236478e-06, "loss": 0.4201, "step": 11068 }, { "epoch": 1.899772751866681, "grad_norm": 1.46875, "learning_rate": 5.83318488464922e-06, "loss": 0.4745, "step": 11069 }, { "epoch": 1.8999458933015907, "grad_norm": 1.28125, "learning_rate": 5.831526972669074e-06, "loss": 0.4113, "step": 11070 }, { "epoch": 1.9001190347365005, "grad_norm": 1.3828125, "learning_rate": 5.829869199351188e-06, "loss": 0.5123, "step": 11071 }, { "epoch": 1.90029217617141, "grad_norm": 1.4453125, "learning_rate": 5.82821156475071e-06, "loss": 0.442, "step": 11072 }, { "epoch": 1.9004653176063195, "grad_norm": 1.296875, "learning_rate": 5.826554068922783e-06, "loss": 0.4737, "step": 11073 }, { "epoch": 1.9006384590412293, "grad_norm": 1.3828125, "learning_rate": 5.824896711922533e-06, "loss": 0.5314, "step": 11074 }, { "epoch": 1.900811600476139, "grad_norm": 1.390625, "learning_rate": 5.823239493805098e-06, "loss": 0.4977, "step": 11075 }, { "epoch": 1.9009847419110486, "grad_norm": 1.484375, "learning_rate": 5.821582414625603e-06, "loss": 0.4786, "step": 11076 }, { "epoch": 1.9011578833459581, "grad_norm": 1.46875, "learning_rate": 5.819925474439166e-06, "loss": 0.4093, "step": 11077 }, { "epoch": 1.901331024780868, "grad_norm": 1.3203125, "learning_rate": 5.818268673300918e-06, "loss": 0.4662, "step": 11078 }, { "epoch": 1.9015041662157777, "grad_norm": 1.484375, "learning_rate": 5.816612011265957e-06, "loss": 0.4813, "step": 11079 }, { "epoch": 1.9016773076506872, "grad_norm": 1.4453125, "learning_rate": 5.814955488389398e-06, "loss": 0.4201, "step": 11080 }, { "epoch": 1.9018504490855968, "grad_norm": 1.3828125, "learning_rate": 5.813299104726341e-06, "loss": 0.4483, "step": 11081 }, { "epoch": 1.9020235905205065, "grad_norm": 1.3046875, "learning_rate": 5.8116428603318874e-06, "loss": 0.477, "step": 11082 }, { "epoch": 1.902196731955416, "grad_norm": 1.546875, "learning_rate": 5.809986755261136e-06, "loss": 0.4557, "step": 11083 }, { "epoch": 1.9023698733903256, "grad_norm": 1.625, "learning_rate": 5.808330789569165e-06, "loss": 0.4988, "step": 11084 }, { "epoch": 1.9025430148252354, "grad_norm": 1.4375, "learning_rate": 5.8066749633110675e-06, "loss": 0.471, "step": 11085 }, { "epoch": 1.9027161562601451, "grad_norm": 1.3828125, "learning_rate": 5.80501927654192e-06, "loss": 0.4373, "step": 11086 }, { "epoch": 1.9028892976950547, "grad_norm": 1.421875, "learning_rate": 5.8033637293168e-06, "loss": 0.52, "step": 11087 }, { "epoch": 1.9030624391299642, "grad_norm": 1.3671875, "learning_rate": 5.801708321690778e-06, "loss": 0.4677, "step": 11088 }, { "epoch": 1.903235580564874, "grad_norm": 1.546875, "learning_rate": 5.800053053718921e-06, "loss": 0.4856, "step": 11089 }, { "epoch": 1.9034087219997837, "grad_norm": 1.46875, "learning_rate": 5.798397925456291e-06, "loss": 0.4642, "step": 11090 }, { "epoch": 1.9035818634346933, "grad_norm": 1.375, "learning_rate": 5.796742936957943e-06, "loss": 0.4406, "step": 11091 }, { "epoch": 1.9037550048696028, "grad_norm": 1.4609375, "learning_rate": 5.795088088278935e-06, "loss": 0.4666, "step": 11092 }, { "epoch": 1.9039281463045126, "grad_norm": 1.3359375, "learning_rate": 5.793433379474307e-06, "loss": 0.4577, "step": 11093 }, { "epoch": 1.904101287739422, "grad_norm": 1.390625, "learning_rate": 5.791778810599105e-06, "loss": 0.5087, "step": 11094 }, { "epoch": 1.9042744291743317, "grad_norm": 1.3359375, "learning_rate": 5.790124381708369e-06, "loss": 0.4578, "step": 11095 }, { "epoch": 1.9044475706092414, "grad_norm": 1.390625, "learning_rate": 5.7884700928571305e-06, "loss": 0.4407, "step": 11096 }, { "epoch": 1.9046207120441512, "grad_norm": 1.40625, "learning_rate": 5.786815944100425e-06, "loss": 0.4599, "step": 11097 }, { "epoch": 1.9047938534790607, "grad_norm": 1.5, "learning_rate": 5.785161935493266e-06, "loss": 0.4333, "step": 11098 }, { "epoch": 1.9049669949139703, "grad_norm": 1.40625, "learning_rate": 5.783508067090678e-06, "loss": 0.444, "step": 11099 }, { "epoch": 1.90514013634888, "grad_norm": 1.6640625, "learning_rate": 5.781854338947679e-06, "loss": 0.4706, "step": 11100 }, { "epoch": 1.9053132777837898, "grad_norm": 1.4921875, "learning_rate": 5.780200751119279e-06, "loss": 0.5063, "step": 11101 }, { "epoch": 1.9054864192186993, "grad_norm": 1.46875, "learning_rate": 5.778547303660487e-06, "loss": 0.5789, "step": 11102 }, { "epoch": 1.9056595606536089, "grad_norm": 1.3359375, "learning_rate": 5.776893996626295e-06, "loss": 0.4567, "step": 11103 }, { "epoch": 1.9058327020885186, "grad_norm": 1.4296875, "learning_rate": 5.775240830071704e-06, "loss": 0.4513, "step": 11104 }, { "epoch": 1.9060058435234282, "grad_norm": 1.34375, "learning_rate": 5.773587804051706e-06, "loss": 0.4883, "step": 11105 }, { "epoch": 1.9061789849583377, "grad_norm": 1.546875, "learning_rate": 5.771934918621289e-06, "loss": 0.5188, "step": 11106 }, { "epoch": 1.9063521263932475, "grad_norm": 1.5390625, "learning_rate": 5.770282173835437e-06, "loss": 0.5308, "step": 11107 }, { "epoch": 1.9065252678281572, "grad_norm": 1.3515625, "learning_rate": 5.768629569749122e-06, "loss": 0.4878, "step": 11108 }, { "epoch": 1.9066984092630668, "grad_norm": 1.390625, "learning_rate": 5.766977106417322e-06, "loss": 0.4565, "step": 11109 }, { "epoch": 1.9068715506979763, "grad_norm": 1.5546875, "learning_rate": 5.765324783894999e-06, "loss": 0.4505, "step": 11110 }, { "epoch": 1.907044692132886, "grad_norm": 1.453125, "learning_rate": 5.763672602237129e-06, "loss": 0.434, "step": 11111 }, { "epoch": 1.9072178335677958, "grad_norm": 1.390625, "learning_rate": 5.762020561498659e-06, "loss": 0.4644, "step": 11112 }, { "epoch": 1.9073909750027054, "grad_norm": 1.3984375, "learning_rate": 5.760368661734549e-06, "loss": 0.5353, "step": 11113 }, { "epoch": 1.907564116437615, "grad_norm": 1.390625, "learning_rate": 5.758716902999747e-06, "loss": 0.4916, "step": 11114 }, { "epoch": 1.9077372578725247, "grad_norm": 1.4375, "learning_rate": 5.757065285349199e-06, "loss": 0.4318, "step": 11115 }, { "epoch": 1.9079103993074342, "grad_norm": 1.4140625, "learning_rate": 5.755413808837849e-06, "loss": 0.5047, "step": 11116 }, { "epoch": 1.9080835407423438, "grad_norm": 1.4140625, "learning_rate": 5.753762473520623e-06, "loss": 0.4554, "step": 11117 }, { "epoch": 1.9082566821772535, "grad_norm": 1.4375, "learning_rate": 5.752111279452459e-06, "loss": 0.4753, "step": 11118 }, { "epoch": 1.9084298236121633, "grad_norm": 1.40625, "learning_rate": 5.75046022668828e-06, "loss": 0.3956, "step": 11119 }, { "epoch": 1.9086029650470728, "grad_norm": 1.4375, "learning_rate": 5.748809315283011e-06, "loss": 0.3812, "step": 11120 }, { "epoch": 1.9087761064819824, "grad_norm": 1.3984375, "learning_rate": 5.7471585452915645e-06, "loss": 0.4717, "step": 11121 }, { "epoch": 1.9089492479168921, "grad_norm": 1.3828125, "learning_rate": 5.745507916768856e-06, "loss": 0.4435, "step": 11122 }, { "epoch": 1.909122389351802, "grad_norm": 1.375, "learning_rate": 5.74385742976979e-06, "loss": 0.4442, "step": 11123 }, { "epoch": 1.9092955307867114, "grad_norm": 1.5078125, "learning_rate": 5.742207084349274e-06, "loss": 0.4502, "step": 11124 }, { "epoch": 1.909468672221621, "grad_norm": 1.34375, "learning_rate": 5.7405568805622e-06, "loss": 0.4141, "step": 11125 }, { "epoch": 1.9096418136565307, "grad_norm": 1.59375, "learning_rate": 5.738906818463469e-06, "loss": 0.4708, "step": 11126 }, { "epoch": 1.9098149550914403, "grad_norm": 1.359375, "learning_rate": 5.7372568981079605e-06, "loss": 0.479, "step": 11127 }, { "epoch": 1.9099880965263498, "grad_norm": 1.4453125, "learning_rate": 5.735607119550561e-06, "loss": 0.492, "step": 11128 }, { "epoch": 1.9101612379612596, "grad_norm": 1.3515625, "learning_rate": 5.733957482846153e-06, "loss": 0.4057, "step": 11129 }, { "epoch": 1.9103343793961693, "grad_norm": 1.46875, "learning_rate": 5.732307988049608e-06, "loss": 0.4712, "step": 11130 }, { "epoch": 1.9105075208310789, "grad_norm": 1.4921875, "learning_rate": 5.730658635215797e-06, "loss": 0.4275, "step": 11131 }, { "epoch": 1.9106806622659884, "grad_norm": 1.34375, "learning_rate": 5.729009424399583e-06, "loss": 0.479, "step": 11132 }, { "epoch": 1.9108538037008982, "grad_norm": 1.421875, "learning_rate": 5.727360355655829e-06, "loss": 0.4812, "step": 11133 }, { "epoch": 1.911026945135808, "grad_norm": 1.46875, "learning_rate": 5.725711429039389e-06, "loss": 0.4604, "step": 11134 }, { "epoch": 1.9112000865707175, "grad_norm": 1.40625, "learning_rate": 5.724062644605117e-06, "loss": 0.4512, "step": 11135 }, { "epoch": 1.911373228005627, "grad_norm": 1.4375, "learning_rate": 5.7224140024078525e-06, "loss": 0.4258, "step": 11136 }, { "epoch": 1.9115463694405368, "grad_norm": 1.4765625, "learning_rate": 5.72076550250244e-06, "loss": 0.5064, "step": 11137 }, { "epoch": 1.9117195108754463, "grad_norm": 1.421875, "learning_rate": 5.719117144943715e-06, "loss": 0.4958, "step": 11138 }, { "epoch": 1.9118926523103559, "grad_norm": 1.4375, "learning_rate": 5.717468929786513e-06, "loss": 0.4804, "step": 11139 }, { "epoch": 1.9120657937452656, "grad_norm": 1.3046875, "learning_rate": 5.7158208570856614e-06, "loss": 0.4746, "step": 11140 }, { "epoch": 1.9122389351801754, "grad_norm": 1.4921875, "learning_rate": 5.714172926895976e-06, "loss": 0.5239, "step": 11141 }, { "epoch": 1.912412076615085, "grad_norm": 1.40625, "learning_rate": 5.712525139272273e-06, "loss": 0.4546, "step": 11142 }, { "epoch": 1.9125852180499945, "grad_norm": 1.421875, "learning_rate": 5.710877494269376e-06, "loss": 0.421, "step": 11143 }, { "epoch": 1.9127583594849042, "grad_norm": 1.46875, "learning_rate": 5.709229991942092e-06, "loss": 0.5011, "step": 11144 }, { "epoch": 1.912931500919814, "grad_norm": 1.3984375, "learning_rate": 5.707582632345214e-06, "loss": 0.4575, "step": 11145 }, { "epoch": 1.9131046423547236, "grad_norm": 1.4296875, "learning_rate": 5.705935415533548e-06, "loss": 0.4693, "step": 11146 }, { "epoch": 1.913277783789633, "grad_norm": 1.4296875, "learning_rate": 5.704288341561886e-06, "loss": 0.5021, "step": 11147 }, { "epoch": 1.9134509252245429, "grad_norm": 1.390625, "learning_rate": 5.702641410485017e-06, "loss": 0.4042, "step": 11148 }, { "epoch": 1.9136240666594524, "grad_norm": 1.296875, "learning_rate": 5.700994622357732e-06, "loss": 0.3615, "step": 11149 }, { "epoch": 1.913797208094362, "grad_norm": 1.484375, "learning_rate": 5.699347977234799e-06, "loss": 0.4764, "step": 11150 }, { "epoch": 1.9139703495292717, "grad_norm": 1.2734375, "learning_rate": 5.697701475170998e-06, "loss": 0.4309, "step": 11151 }, { "epoch": 1.9141434909641815, "grad_norm": 1.4609375, "learning_rate": 5.696055116221101e-06, "loss": 0.4513, "step": 11152 }, { "epoch": 1.914316632399091, "grad_norm": 1.4375, "learning_rate": 5.694408900439867e-06, "loss": 0.4298, "step": 11153 }, { "epoch": 1.9144897738340005, "grad_norm": 1.375, "learning_rate": 5.69276282788207e-06, "loss": 0.4647, "step": 11154 }, { "epoch": 1.9146629152689103, "grad_norm": 1.484375, "learning_rate": 5.691116898602454e-06, "loss": 0.4879, "step": 11155 }, { "epoch": 1.91483605670382, "grad_norm": 1.4140625, "learning_rate": 5.689471112655771e-06, "loss": 0.5395, "step": 11156 }, { "epoch": 1.9150091981387296, "grad_norm": 1.625, "learning_rate": 5.68782547009677e-06, "loss": 0.4834, "step": 11157 }, { "epoch": 1.9151823395736391, "grad_norm": 1.375, "learning_rate": 5.686179970980195e-06, "loss": 0.4694, "step": 11158 }, { "epoch": 1.915355481008549, "grad_norm": 1.5078125, "learning_rate": 5.684534615360781e-06, "loss": 0.5174, "step": 11159 }, { "epoch": 1.9155286224434585, "grad_norm": 1.484375, "learning_rate": 5.682889403293257e-06, "loss": 0.436, "step": 11160 }, { "epoch": 1.915701763878368, "grad_norm": 1.4453125, "learning_rate": 5.68124433483235e-06, "loss": 0.473, "step": 11161 }, { "epoch": 1.9158749053132778, "grad_norm": 1.375, "learning_rate": 5.679599410032785e-06, "loss": 0.4471, "step": 11162 }, { "epoch": 1.9160480467481875, "grad_norm": 1.328125, "learning_rate": 5.677954628949281e-06, "loss": 0.4444, "step": 11163 }, { "epoch": 1.916221188183097, "grad_norm": 1.4921875, "learning_rate": 5.6763099916365475e-06, "loss": 0.4794, "step": 11164 }, { "epoch": 1.9163943296180066, "grad_norm": 1.3203125, "learning_rate": 5.674665498149295e-06, "loss": 0.4385, "step": 11165 }, { "epoch": 1.9165674710529164, "grad_norm": 1.375, "learning_rate": 5.673021148542224e-06, "loss": 0.465, "step": 11166 }, { "epoch": 1.9167406124878261, "grad_norm": 1.3359375, "learning_rate": 5.671376942870035e-06, "loss": 0.4138, "step": 11167 }, { "epoch": 1.9169137539227357, "grad_norm": 1.4453125, "learning_rate": 5.669732881187427e-06, "loss": 0.5114, "step": 11168 }, { "epoch": 1.9170868953576452, "grad_norm": 1.4140625, "learning_rate": 5.668088963549079e-06, "loss": 0.4867, "step": 11169 }, { "epoch": 1.917260036792555, "grad_norm": 1.40625, "learning_rate": 5.6664451900096785e-06, "loss": 0.4318, "step": 11170 }, { "epoch": 1.9174331782274645, "grad_norm": 1.2890625, "learning_rate": 5.664801560623907e-06, "loss": 0.3953, "step": 11171 }, { "epoch": 1.917606319662374, "grad_norm": 1.5625, "learning_rate": 5.663158075446438e-06, "loss": 0.4714, "step": 11172 }, { "epoch": 1.9177794610972838, "grad_norm": 1.3828125, "learning_rate": 5.6615147345319445e-06, "loss": 0.4341, "step": 11173 }, { "epoch": 1.9179526025321936, "grad_norm": 1.4609375, "learning_rate": 5.65987153793508e-06, "loss": 0.4512, "step": 11174 }, { "epoch": 1.9181257439671031, "grad_norm": 1.4921875, "learning_rate": 5.658228485710518e-06, "loss": 0.497, "step": 11175 }, { "epoch": 1.9182988854020127, "grad_norm": 1.359375, "learning_rate": 5.656585577912908e-06, "loss": 0.4505, "step": 11176 }, { "epoch": 1.9184720268369224, "grad_norm": 1.4609375, "learning_rate": 5.654942814596902e-06, "loss": 0.4541, "step": 11177 }, { "epoch": 1.9186451682718322, "grad_norm": 1.453125, "learning_rate": 5.6533001958171485e-06, "loss": 0.4346, "step": 11178 }, { "epoch": 1.9188183097067417, "grad_norm": 1.4296875, "learning_rate": 5.651657721628281e-06, "loss": 0.4279, "step": 11179 }, { "epoch": 1.9189914511416513, "grad_norm": 1.4375, "learning_rate": 5.650015392084939e-06, "loss": 0.4416, "step": 11180 }, { "epoch": 1.919164592576561, "grad_norm": 1.3203125, "learning_rate": 5.648373207241754e-06, "loss": 0.4166, "step": 11181 }, { "epoch": 1.9193377340114708, "grad_norm": 1.4140625, "learning_rate": 5.646731167153354e-06, "loss": 0.4617, "step": 11182 }, { "epoch": 1.91951087544638, "grad_norm": 1.4765625, "learning_rate": 5.6450892718743626e-06, "loss": 0.4948, "step": 11183 }, { "epoch": 1.9196840168812899, "grad_norm": 1.375, "learning_rate": 5.64344752145939e-06, "loss": 0.4345, "step": 11184 }, { "epoch": 1.9198571583161996, "grad_norm": 1.4140625, "learning_rate": 5.641805915963048e-06, "loss": 0.4064, "step": 11185 }, { "epoch": 1.9200302997511092, "grad_norm": 1.3828125, "learning_rate": 5.640164455439951e-06, "loss": 0.4624, "step": 11186 }, { "epoch": 1.9202034411860187, "grad_norm": 1.3359375, "learning_rate": 5.6385231399447014e-06, "loss": 0.4468, "step": 11187 }, { "epoch": 1.9203765826209285, "grad_norm": 1.3046875, "learning_rate": 5.636881969531891e-06, "loss": 0.4278, "step": 11188 }, { "epoch": 1.9205497240558382, "grad_norm": 1.453125, "learning_rate": 5.635240944256113e-06, "loss": 0.5005, "step": 11189 }, { "epoch": 1.9207228654907478, "grad_norm": 1.4296875, "learning_rate": 5.633600064171959e-06, "loss": 0.509, "step": 11190 }, { "epoch": 1.9208960069256573, "grad_norm": 1.484375, "learning_rate": 5.6319593293340094e-06, "loss": 0.4849, "step": 11191 }, { "epoch": 1.921069148360567, "grad_norm": 1.390625, "learning_rate": 5.6303187397968475e-06, "loss": 0.6026, "step": 11192 }, { "epoch": 1.9212422897954768, "grad_norm": 1.4453125, "learning_rate": 5.62867829561504e-06, "loss": 0.4744, "step": 11193 }, { "epoch": 1.9214154312303862, "grad_norm": 1.421875, "learning_rate": 5.627037996843157e-06, "loss": 0.4614, "step": 11194 }, { "epoch": 1.921588572665296, "grad_norm": 1.3046875, "learning_rate": 5.625397843535762e-06, "loss": 0.5035, "step": 11195 }, { "epoch": 1.9217617141002057, "grad_norm": 1.4375, "learning_rate": 5.6237578357474175e-06, "loss": 0.4709, "step": 11196 }, { "epoch": 1.9219348555351152, "grad_norm": 1.390625, "learning_rate": 5.622117973532675e-06, "loss": 0.4973, "step": 11197 }, { "epoch": 1.9221079969700248, "grad_norm": 1.265625, "learning_rate": 5.620478256946082e-06, "loss": 0.4236, "step": 11198 }, { "epoch": 1.9222811384049345, "grad_norm": 1.46875, "learning_rate": 5.6188386860421876e-06, "loss": 0.4548, "step": 11199 }, { "epoch": 1.9224542798398443, "grad_norm": 1.4375, "learning_rate": 5.617199260875528e-06, "loss": 0.4623, "step": 11200 }, { "epoch": 1.9226274212747538, "grad_norm": 1.3515625, "learning_rate": 5.615559981500638e-06, "loss": 0.4632, "step": 11201 }, { "epoch": 1.9228005627096634, "grad_norm": 1.4375, "learning_rate": 5.613920847972052e-06, "loss": 0.4606, "step": 11202 }, { "epoch": 1.9229737041445731, "grad_norm": 1.4765625, "learning_rate": 5.612281860344289e-06, "loss": 0.534, "step": 11203 }, { "epoch": 1.923146845579483, "grad_norm": 1.34375, "learning_rate": 5.610643018671868e-06, "loss": 0.4344, "step": 11204 }, { "epoch": 1.9233199870143922, "grad_norm": 1.4765625, "learning_rate": 5.6090043230093094e-06, "loss": 0.4856, "step": 11205 }, { "epoch": 1.923493128449302, "grad_norm": 1.3828125, "learning_rate": 5.607365773411122e-06, "loss": 0.4424, "step": 11206 }, { "epoch": 1.9236662698842117, "grad_norm": 1.375, "learning_rate": 5.605727369931811e-06, "loss": 0.5136, "step": 11207 }, { "epoch": 1.9238394113191213, "grad_norm": 1.40625, "learning_rate": 5.604089112625876e-06, "loss": 0.5013, "step": 11208 }, { "epoch": 1.9240125527540308, "grad_norm": 1.328125, "learning_rate": 5.602451001547815e-06, "loss": 0.4324, "step": 11209 }, { "epoch": 1.9241856941889406, "grad_norm": 1.4296875, "learning_rate": 5.600813036752117e-06, "loss": 0.475, "step": 11210 }, { "epoch": 1.9243588356238504, "grad_norm": 1.4140625, "learning_rate": 5.5991752182932735e-06, "loss": 0.4489, "step": 11211 }, { "epoch": 1.92453197705876, "grad_norm": 1.375, "learning_rate": 5.597537546225758e-06, "loss": 0.4162, "step": 11212 }, { "epoch": 1.9247051184936694, "grad_norm": 1.53125, "learning_rate": 5.59590002060405e-06, "loss": 0.5947, "step": 11213 }, { "epoch": 1.9248782599285792, "grad_norm": 1.5078125, "learning_rate": 5.594262641482622e-06, "loss": 0.4932, "step": 11214 }, { "epoch": 1.925051401363489, "grad_norm": 1.4375, "learning_rate": 5.592625408915939e-06, "loss": 0.4651, "step": 11215 }, { "epoch": 1.9252245427983985, "grad_norm": 1.4296875, "learning_rate": 5.590988322958467e-06, "loss": 0.4019, "step": 11216 }, { "epoch": 1.925397684233308, "grad_norm": 1.453125, "learning_rate": 5.589351383664656e-06, "loss": 0.4119, "step": 11217 }, { "epoch": 1.9255708256682178, "grad_norm": 1.484375, "learning_rate": 5.587714591088959e-06, "loss": 0.4831, "step": 11218 }, { "epoch": 1.9257439671031273, "grad_norm": 1.578125, "learning_rate": 5.586077945285828e-06, "loss": 0.4932, "step": 11219 }, { "epoch": 1.9259171085380369, "grad_norm": 1.359375, "learning_rate": 5.5844414463097075e-06, "loss": 0.6468, "step": 11220 }, { "epoch": 1.9260902499729466, "grad_norm": 1.375, "learning_rate": 5.582805094215027e-06, "loss": 0.5232, "step": 11221 }, { "epoch": 1.9262633914078564, "grad_norm": 1.390625, "learning_rate": 5.581168889056221e-06, "loss": 0.4736, "step": 11222 }, { "epoch": 1.926436532842766, "grad_norm": 1.4296875, "learning_rate": 5.579532830887718e-06, "loss": 0.4784, "step": 11223 }, { "epoch": 1.9266096742776755, "grad_norm": 1.4296875, "learning_rate": 5.577896919763942e-06, "loss": 0.4315, "step": 11224 }, { "epoch": 1.9267828157125853, "grad_norm": 1.3203125, "learning_rate": 5.576261155739313e-06, "loss": 0.5008, "step": 11225 }, { "epoch": 1.926955957147495, "grad_norm": 1.34375, "learning_rate": 5.574625538868237e-06, "loss": 0.445, "step": 11226 }, { "epoch": 1.9271290985824046, "grad_norm": 1.546875, "learning_rate": 5.572990069205126e-06, "loss": 0.4539, "step": 11227 }, { "epoch": 1.927302240017314, "grad_norm": 1.5390625, "learning_rate": 5.571354746804383e-06, "loss": 0.5106, "step": 11228 }, { "epoch": 1.9274753814522239, "grad_norm": 1.3515625, "learning_rate": 5.5697195717204025e-06, "loss": 0.4362, "step": 11229 }, { "epoch": 1.9276485228871334, "grad_norm": 1.359375, "learning_rate": 5.5680845440075885e-06, "loss": 0.4686, "step": 11230 }, { "epoch": 1.927821664322043, "grad_norm": 1.546875, "learning_rate": 5.566449663720318e-06, "loss": 0.5093, "step": 11231 }, { "epoch": 1.9279948057569527, "grad_norm": 1.4140625, "learning_rate": 5.56481493091298e-06, "loss": 0.4187, "step": 11232 }, { "epoch": 1.9281679471918625, "grad_norm": 1.4609375, "learning_rate": 5.563180345639952e-06, "loss": 0.4164, "step": 11233 }, { "epoch": 1.928341088626772, "grad_norm": 1.3359375, "learning_rate": 5.5615459079556054e-06, "loss": 0.4501, "step": 11234 }, { "epoch": 1.9285142300616815, "grad_norm": 1.3984375, "learning_rate": 5.559911617914316e-06, "loss": 0.4942, "step": 11235 }, { "epoch": 1.9286873714965913, "grad_norm": 1.4375, "learning_rate": 5.55827747557044e-06, "loss": 0.5075, "step": 11236 }, { "epoch": 1.928860512931501, "grad_norm": 1.4609375, "learning_rate": 5.556643480978339e-06, "loss": 0.391, "step": 11237 }, { "epoch": 1.9290336543664106, "grad_norm": 1.375, "learning_rate": 5.5550096341923664e-06, "loss": 0.4496, "step": 11238 }, { "epoch": 1.9292067958013202, "grad_norm": 1.3984375, "learning_rate": 5.553375935266873e-06, "loss": 0.4388, "step": 11239 }, { "epoch": 1.92937993723623, "grad_norm": 1.4609375, "learning_rate": 5.551742384256201e-06, "loss": 0.4781, "step": 11240 }, { "epoch": 1.9295530786711395, "grad_norm": 1.375, "learning_rate": 5.550108981214692e-06, "loss": 0.5004, "step": 11241 }, { "epoch": 1.929726220106049, "grad_norm": 1.4921875, "learning_rate": 5.548475726196679e-06, "loss": 0.4585, "step": 11242 }, { "epoch": 1.9298993615409588, "grad_norm": 1.46875, "learning_rate": 5.5468426192564914e-06, "loss": 0.5205, "step": 11243 }, { "epoch": 1.9300725029758685, "grad_norm": 1.4609375, "learning_rate": 5.54520966044846e-06, "loss": 0.5055, "step": 11244 }, { "epoch": 1.930245644410778, "grad_norm": 1.3828125, "learning_rate": 5.543576849826893e-06, "loss": 0.4648, "step": 11245 }, { "epoch": 1.9304187858456876, "grad_norm": 1.4453125, "learning_rate": 5.54194418744611e-06, "loss": 0.4657, "step": 11246 }, { "epoch": 1.9305919272805974, "grad_norm": 1.4140625, "learning_rate": 5.5403116733604214e-06, "loss": 0.502, "step": 11247 }, { "epoch": 1.9307650687155071, "grad_norm": 1.4296875, "learning_rate": 5.538679307624132e-06, "loss": 0.496, "step": 11248 }, { "epoch": 1.9309382101504167, "grad_norm": 1.4453125, "learning_rate": 5.537047090291545e-06, "loss": 0.4998, "step": 11249 }, { "epoch": 1.9311113515853262, "grad_norm": 1.40625, "learning_rate": 5.535415021416946e-06, "loss": 0.4652, "step": 11250 }, { "epoch": 1.931284493020236, "grad_norm": 1.4375, "learning_rate": 5.533783101054633e-06, "loss": 0.4516, "step": 11251 }, { "epoch": 1.9314576344551455, "grad_norm": 1.4296875, "learning_rate": 5.532151329258888e-06, "loss": 0.4422, "step": 11252 }, { "epoch": 1.931630775890055, "grad_norm": 1.484375, "learning_rate": 5.530519706083994e-06, "loss": 0.4333, "step": 11253 }, { "epoch": 1.9318039173249648, "grad_norm": 1.3515625, "learning_rate": 5.5288882315842265e-06, "loss": 0.4416, "step": 11254 }, { "epoch": 1.9319770587598746, "grad_norm": 1.4296875, "learning_rate": 5.5272569058138515e-06, "loss": 0.4988, "step": 11255 }, { "epoch": 1.9321502001947841, "grad_norm": 1.40625, "learning_rate": 5.525625728827134e-06, "loss": 0.4981, "step": 11256 }, { "epoch": 1.9323233416296937, "grad_norm": 1.40625, "learning_rate": 5.523994700678337e-06, "loss": 0.4912, "step": 11257 }, { "epoch": 1.9324964830646034, "grad_norm": 1.359375, "learning_rate": 5.5223638214217155e-06, "loss": 0.4214, "step": 11258 }, { "epoch": 1.9326696244995132, "grad_norm": 1.4375, "learning_rate": 5.520733091111524e-06, "loss": 0.5237, "step": 11259 }, { "epoch": 1.9328427659344227, "grad_norm": 1.2890625, "learning_rate": 5.519102509801999e-06, "loss": 0.4397, "step": 11260 }, { "epoch": 1.9330159073693323, "grad_norm": 1.3515625, "learning_rate": 5.5174720775473835e-06, "loss": 0.45, "step": 11261 }, { "epoch": 1.933189048804242, "grad_norm": 1.4453125, "learning_rate": 5.5158417944019175e-06, "loss": 0.4818, "step": 11262 }, { "epoch": 1.9333621902391516, "grad_norm": 1.3515625, "learning_rate": 5.5142116604198344e-06, "loss": 0.4718, "step": 11263 }, { "epoch": 1.933535331674061, "grad_norm": 1.3359375, "learning_rate": 5.51258167565535e-06, "loss": 0.4326, "step": 11264 }, { "epoch": 1.9337084731089709, "grad_norm": 1.421875, "learning_rate": 5.5109518401626905e-06, "loss": 0.4458, "step": 11265 }, { "epoch": 1.9338816145438806, "grad_norm": 1.4375, "learning_rate": 5.50932215399607e-06, "loss": 0.4808, "step": 11266 }, { "epoch": 1.9340547559787902, "grad_norm": 1.3984375, "learning_rate": 5.507692617209701e-06, "loss": 0.5643, "step": 11267 }, { "epoch": 1.9342278974136997, "grad_norm": 1.40625, "learning_rate": 5.506063229857792e-06, "loss": 0.4592, "step": 11268 }, { "epoch": 1.9344010388486095, "grad_norm": 1.46875, "learning_rate": 5.504433991994536e-06, "loss": 0.5377, "step": 11269 }, { "epoch": 1.9345741802835192, "grad_norm": 1.421875, "learning_rate": 5.502804903674133e-06, "loss": 0.4851, "step": 11270 }, { "epoch": 1.9347473217184288, "grad_norm": 1.453125, "learning_rate": 5.501175964950774e-06, "loss": 0.4487, "step": 11271 }, { "epoch": 1.9349204631533383, "grad_norm": 2.390625, "learning_rate": 5.4995471758786435e-06, "loss": 0.5356, "step": 11272 }, { "epoch": 1.935093604588248, "grad_norm": 1.34375, "learning_rate": 5.497918536511924e-06, "loss": 0.486, "step": 11273 }, { "epoch": 1.9352667460231576, "grad_norm": 1.7265625, "learning_rate": 5.496290046904792e-06, "loss": 0.5043, "step": 11274 }, { "epoch": 1.9354398874580672, "grad_norm": 1.6015625, "learning_rate": 5.494661707111417e-06, "loss": 0.5328, "step": 11275 }, { "epoch": 1.935613028892977, "grad_norm": 1.4609375, "learning_rate": 5.493033517185964e-06, "loss": 0.4312, "step": 11276 }, { "epoch": 1.9357861703278867, "grad_norm": 1.390625, "learning_rate": 5.4914054771825965e-06, "loss": 0.441, "step": 11277 }, { "epoch": 1.9359593117627962, "grad_norm": 1.4765625, "learning_rate": 5.489777587155473e-06, "loss": 0.5147, "step": 11278 }, { "epoch": 1.9361324531977058, "grad_norm": 1.2734375, "learning_rate": 5.488149847158737e-06, "loss": 0.3974, "step": 11279 }, { "epoch": 1.9363055946326155, "grad_norm": 1.5234375, "learning_rate": 5.486522257246538e-06, "loss": 0.4941, "step": 11280 }, { "epoch": 1.9364787360675253, "grad_norm": 1.3671875, "learning_rate": 5.484894817473019e-06, "loss": 0.4432, "step": 11281 }, { "epoch": 1.9366518775024348, "grad_norm": 1.4609375, "learning_rate": 5.483267527892312e-06, "loss": 0.4342, "step": 11282 }, { "epoch": 1.9368250189373444, "grad_norm": 1.3671875, "learning_rate": 5.481640388558551e-06, "loss": 0.4771, "step": 11283 }, { "epoch": 1.9369981603722541, "grad_norm": 1.453125, "learning_rate": 5.480013399525861e-06, "loss": 0.4672, "step": 11284 }, { "epoch": 1.9371713018071637, "grad_norm": 1.421875, "learning_rate": 5.478386560848363e-06, "loss": 0.5199, "step": 11285 }, { "epoch": 1.9373444432420732, "grad_norm": 1.328125, "learning_rate": 5.476759872580174e-06, "loss": 0.4679, "step": 11286 }, { "epoch": 1.937517584676983, "grad_norm": 1.390625, "learning_rate": 5.475133334775409e-06, "loss": 0.4899, "step": 11287 }, { "epoch": 1.9376907261118927, "grad_norm": 1.390625, "learning_rate": 5.473506947488163e-06, "loss": 0.4549, "step": 11288 }, { "epoch": 1.9378638675468023, "grad_norm": 1.5234375, "learning_rate": 5.4718807107725455e-06, "loss": 0.5318, "step": 11289 }, { "epoch": 1.9380370089817118, "grad_norm": 1.28125, "learning_rate": 5.470254624682649e-06, "loss": 0.3728, "step": 11290 }, { "epoch": 1.9382101504166216, "grad_norm": 1.3828125, "learning_rate": 5.468628689272566e-06, "loss": 0.4754, "step": 11291 }, { "epoch": 1.9383832918515314, "grad_norm": 1.3828125, "learning_rate": 5.467002904596386e-06, "loss": 0.4647, "step": 11292 }, { "epoch": 1.938556433286441, "grad_norm": 1.3828125, "learning_rate": 5.465377270708183e-06, "loss": 0.436, "step": 11293 }, { "epoch": 1.9387295747213504, "grad_norm": 1.3828125, "learning_rate": 5.4637517876620314e-06, "loss": 0.4452, "step": 11294 }, { "epoch": 1.9389027161562602, "grad_norm": 1.296875, "learning_rate": 5.462126455512012e-06, "loss": 0.4173, "step": 11295 }, { "epoch": 1.9390758575911697, "grad_norm": 1.4375, "learning_rate": 5.460501274312189e-06, "loss": 0.4522, "step": 11296 }, { "epoch": 1.9392489990260793, "grad_norm": 1.3515625, "learning_rate": 5.458876244116615e-06, "loss": 0.4699, "step": 11297 }, { "epoch": 1.939422140460989, "grad_norm": 1.5390625, "learning_rate": 5.45725136497935e-06, "loss": 0.4931, "step": 11298 }, { "epoch": 1.9395952818958988, "grad_norm": 1.5546875, "learning_rate": 5.455626636954447e-06, "loss": 0.4532, "step": 11299 }, { "epoch": 1.9397684233308083, "grad_norm": 1.421875, "learning_rate": 5.454002060095949e-06, "loss": 0.4844, "step": 11300 }, { "epoch": 1.9399415647657179, "grad_norm": 1.453125, "learning_rate": 5.4523776344579e-06, "loss": 0.5101, "step": 11301 }, { "epoch": 1.9401147062006276, "grad_norm": 1.546875, "learning_rate": 5.450753360094336e-06, "loss": 0.4968, "step": 11302 }, { "epoch": 1.9402878476355374, "grad_norm": 1.3984375, "learning_rate": 5.449129237059283e-06, "loss": 0.4695, "step": 11303 }, { "epoch": 1.940460989070447, "grad_norm": 1.4453125, "learning_rate": 5.447505265406763e-06, "loss": 0.4528, "step": 11304 }, { "epoch": 1.9406341305053565, "grad_norm": 1.375, "learning_rate": 5.445881445190808e-06, "loss": 0.4474, "step": 11305 }, { "epoch": 1.9408072719402663, "grad_norm": 1.328125, "learning_rate": 5.4442577764654334e-06, "loss": 0.3954, "step": 11306 }, { "epoch": 1.9409804133751758, "grad_norm": 1.3125, "learning_rate": 5.44263425928464e-06, "loss": 0.4787, "step": 11307 }, { "epoch": 1.9411535548100853, "grad_norm": 1.3828125, "learning_rate": 5.441010893702437e-06, "loss": 0.4775, "step": 11308 }, { "epoch": 1.941326696244995, "grad_norm": 1.375, "learning_rate": 5.439387679772828e-06, "loss": 0.4303, "step": 11309 }, { "epoch": 1.9414998376799049, "grad_norm": 1.625, "learning_rate": 5.437764617549805e-06, "loss": 0.4932, "step": 11310 }, { "epoch": 1.9416729791148144, "grad_norm": 1.390625, "learning_rate": 5.436141707087363e-06, "loss": 0.5232, "step": 11311 }, { "epoch": 1.941846120549724, "grad_norm": 1.2890625, "learning_rate": 5.434518948439482e-06, "loss": 0.4367, "step": 11312 }, { "epoch": 1.9420192619846337, "grad_norm": 1.3359375, "learning_rate": 5.432896341660145e-06, "loss": 0.4871, "step": 11313 }, { "epoch": 1.9421924034195435, "grad_norm": 1.2890625, "learning_rate": 5.431273886803325e-06, "loss": 0.4347, "step": 11314 }, { "epoch": 1.942365544854453, "grad_norm": 1.5703125, "learning_rate": 5.429651583922995e-06, "loss": 0.5022, "step": 11315 }, { "epoch": 1.9425386862893625, "grad_norm": 1.453125, "learning_rate": 5.428029433073117e-06, "loss": 0.4646, "step": 11316 }, { "epoch": 1.9427118277242723, "grad_norm": 1.4609375, "learning_rate": 5.426407434307654e-06, "loss": 0.4507, "step": 11317 }, { "epoch": 1.9428849691591819, "grad_norm": 1.421875, "learning_rate": 5.424785587680561e-06, "loss": 0.4756, "step": 11318 }, { "epoch": 1.9430581105940914, "grad_norm": 1.4296875, "learning_rate": 5.423163893245786e-06, "loss": 0.473, "step": 11319 }, { "epoch": 1.9432312520290012, "grad_norm": 1.5859375, "learning_rate": 5.4215423510572794e-06, "loss": 0.5196, "step": 11320 }, { "epoch": 1.943404393463911, "grad_norm": 1.359375, "learning_rate": 5.419920961168973e-06, "loss": 0.4152, "step": 11321 }, { "epoch": 1.9435775348988205, "grad_norm": 1.5390625, "learning_rate": 5.418299723634805e-06, "loss": 0.4788, "step": 11322 }, { "epoch": 1.94375067633373, "grad_norm": 1.4296875, "learning_rate": 5.416678638508703e-06, "loss": 0.4471, "step": 11323 }, { "epoch": 1.9439238177686398, "grad_norm": 1.453125, "learning_rate": 5.415057705844597e-06, "loss": 0.4574, "step": 11324 }, { "epoch": 1.9440969592035495, "grad_norm": 1.234375, "learning_rate": 5.413436925696405e-06, "loss": 0.4227, "step": 11325 }, { "epoch": 1.944270100638459, "grad_norm": 1.34375, "learning_rate": 5.411816298118033e-06, "loss": 0.4466, "step": 11326 }, { "epoch": 1.9444432420733686, "grad_norm": 1.484375, "learning_rate": 5.410195823163402e-06, "loss": 0.4812, "step": 11327 }, { "epoch": 1.9446163835082784, "grad_norm": 1.3359375, "learning_rate": 5.4085755008864105e-06, "loss": 0.4685, "step": 11328 }, { "epoch": 1.9447895249431881, "grad_norm": 1.390625, "learning_rate": 5.406955331340959e-06, "loss": 0.4181, "step": 11329 }, { "epoch": 1.9449626663780974, "grad_norm": 1.390625, "learning_rate": 5.405335314580944e-06, "loss": 0.4515, "step": 11330 }, { "epoch": 1.9451358078130072, "grad_norm": 1.421875, "learning_rate": 5.40371545066025e-06, "loss": 0.4672, "step": 11331 }, { "epoch": 1.945308949247917, "grad_norm": 1.515625, "learning_rate": 5.402095739632763e-06, "loss": 0.4704, "step": 11332 }, { "epoch": 1.9454820906828265, "grad_norm": 1.3359375, "learning_rate": 5.400476181552361e-06, "loss": 0.5065, "step": 11333 }, { "epoch": 1.945655232117736, "grad_norm": 1.421875, "learning_rate": 5.39885677647292e-06, "loss": 0.46, "step": 11334 }, { "epoch": 1.9458283735526458, "grad_norm": 1.4375, "learning_rate": 5.39723752444831e-06, "loss": 0.4885, "step": 11335 }, { "epoch": 1.9460015149875556, "grad_norm": 1.4375, "learning_rate": 5.39561842553239e-06, "loss": 0.4454, "step": 11336 }, { "epoch": 1.9461746564224651, "grad_norm": 1.421875, "learning_rate": 5.393999479779015e-06, "loss": 0.4655, "step": 11337 }, { "epoch": 1.9463477978573747, "grad_norm": 1.4140625, "learning_rate": 5.392380687242048e-06, "loss": 0.4884, "step": 11338 }, { "epoch": 1.9465209392922844, "grad_norm": 1.359375, "learning_rate": 5.390762047975338e-06, "loss": 0.4289, "step": 11339 }, { "epoch": 1.9466940807271942, "grad_norm": 1.4140625, "learning_rate": 5.389143562032722e-06, "loss": 0.4256, "step": 11340 }, { "epoch": 1.9468672221621035, "grad_norm": 1.578125, "learning_rate": 5.387525229468037e-06, "loss": 0.5507, "step": 11341 }, { "epoch": 1.9470403635970133, "grad_norm": 1.484375, "learning_rate": 5.38590705033512e-06, "loss": 0.4897, "step": 11342 }, { "epoch": 1.947213505031923, "grad_norm": 1.390625, "learning_rate": 5.384289024687799e-06, "loss": 0.5427, "step": 11343 }, { "epoch": 1.9473866464668326, "grad_norm": 1.4140625, "learning_rate": 5.382671152579898e-06, "loss": 0.4813, "step": 11344 }, { "epoch": 1.9475597879017421, "grad_norm": 1.4765625, "learning_rate": 5.381053434065229e-06, "loss": 0.5153, "step": 11345 }, { "epoch": 1.9477329293366519, "grad_norm": 1.40625, "learning_rate": 5.379435869197609e-06, "loss": 0.4492, "step": 11346 }, { "epoch": 1.9479060707715616, "grad_norm": 1.53125, "learning_rate": 5.377818458030845e-06, "loss": 0.4685, "step": 11347 }, { "epoch": 1.9480792122064712, "grad_norm": 1.421875, "learning_rate": 5.376201200618738e-06, "loss": 0.4477, "step": 11348 }, { "epoch": 1.9482523536413807, "grad_norm": 1.3828125, "learning_rate": 5.374584097015088e-06, "loss": 0.407, "step": 11349 }, { "epoch": 1.9484254950762905, "grad_norm": 1.4140625, "learning_rate": 5.3729671472736835e-06, "loss": 0.4873, "step": 11350 }, { "epoch": 1.9485986365112002, "grad_norm": 1.3203125, "learning_rate": 5.371350351448314e-06, "loss": 0.4383, "step": 11351 }, { "epoch": 1.9487717779461096, "grad_norm": 1.421875, "learning_rate": 5.369733709592763e-06, "loss": 0.4666, "step": 11352 }, { "epoch": 1.9489449193810193, "grad_norm": 1.3984375, "learning_rate": 5.368117221760803e-06, "loss": 0.4892, "step": 11353 }, { "epoch": 1.949118060815929, "grad_norm": 1.4140625, "learning_rate": 5.366500888006214e-06, "loss": 0.5129, "step": 11354 }, { "epoch": 1.9492912022508386, "grad_norm": 1.3515625, "learning_rate": 5.364884708382751e-06, "loss": 0.4441, "step": 11355 }, { "epoch": 1.9494643436857482, "grad_norm": 1.46875, "learning_rate": 5.363268682944183e-06, "loss": 0.4696, "step": 11356 }, { "epoch": 1.949637485120658, "grad_norm": 1.4140625, "learning_rate": 5.361652811744265e-06, "loss": 0.4701, "step": 11357 }, { "epoch": 1.9498106265555677, "grad_norm": 1.5546875, "learning_rate": 5.360037094836745e-06, "loss": 0.4616, "step": 11358 }, { "epoch": 1.9499837679904772, "grad_norm": 1.3125, "learning_rate": 5.358421532275373e-06, "loss": 0.4003, "step": 11359 }, { "epoch": 1.9501569094253868, "grad_norm": 1.3671875, "learning_rate": 5.356806124113888e-06, "loss": 0.4318, "step": 11360 }, { "epoch": 1.9503300508602965, "grad_norm": 1.4765625, "learning_rate": 5.355190870406026e-06, "loss": 0.5285, "step": 11361 }, { "epoch": 1.9505031922952063, "grad_norm": 1.4296875, "learning_rate": 5.353575771205519e-06, "loss": 0.3975, "step": 11362 }, { "epoch": 1.9506763337301158, "grad_norm": 1.3828125, "learning_rate": 5.351960826566096e-06, "loss": 0.4661, "step": 11363 }, { "epoch": 1.9508494751650254, "grad_norm": 1.4609375, "learning_rate": 5.350346036541468e-06, "loss": 0.5068, "step": 11364 }, { "epoch": 1.9510226165999351, "grad_norm": 1.5625, "learning_rate": 5.348731401185355e-06, "loss": 0.4906, "step": 11365 }, { "epoch": 1.9511957580348447, "grad_norm": 1.3828125, "learning_rate": 5.347116920551467e-06, "loss": 0.4655, "step": 11366 }, { "epoch": 1.9513688994697542, "grad_norm": 1.4765625, "learning_rate": 5.3455025946935105e-06, "loss": 0.4696, "step": 11367 }, { "epoch": 1.951542040904664, "grad_norm": 1.5234375, "learning_rate": 5.343888423665187e-06, "loss": 0.4937, "step": 11368 }, { "epoch": 1.9517151823395738, "grad_norm": 1.390625, "learning_rate": 5.34227440752018e-06, "loss": 0.4395, "step": 11369 }, { "epoch": 1.9518883237744833, "grad_norm": 1.4609375, "learning_rate": 5.340660546312191e-06, "loss": 0.4888, "step": 11370 }, { "epoch": 1.9520614652093928, "grad_norm": 1.3828125, "learning_rate": 5.339046840094899e-06, "loss": 0.4345, "step": 11371 }, { "epoch": 1.9522346066443026, "grad_norm": 1.40625, "learning_rate": 5.337433288921991e-06, "loss": 0.5061, "step": 11372 }, { "epoch": 1.9524077480792124, "grad_norm": 1.3828125, "learning_rate": 5.335819892847128e-06, "loss": 0.4658, "step": 11373 }, { "epoch": 1.952580889514122, "grad_norm": 1.3359375, "learning_rate": 5.334206651923987e-06, "loss": 0.4593, "step": 11374 }, { "epoch": 1.9527540309490314, "grad_norm": 1.421875, "learning_rate": 5.332593566206229e-06, "loss": 0.4687, "step": 11375 }, { "epoch": 1.9529271723839412, "grad_norm": 1.328125, "learning_rate": 5.330980635747514e-06, "loss": 0.4357, "step": 11376 }, { "epoch": 1.9531003138188507, "grad_norm": 1.4296875, "learning_rate": 5.329367860601495e-06, "loss": 0.497, "step": 11377 }, { "epoch": 1.9532734552537603, "grad_norm": 1.328125, "learning_rate": 5.327755240821824e-06, "loss": 0.4259, "step": 11378 }, { "epoch": 1.95344659668867, "grad_norm": 1.328125, "learning_rate": 5.326142776462135e-06, "loss": 0.4412, "step": 11379 }, { "epoch": 1.9536197381235798, "grad_norm": 1.390625, "learning_rate": 5.324530467576068e-06, "loss": 0.4797, "step": 11380 }, { "epoch": 1.9537928795584893, "grad_norm": 1.3515625, "learning_rate": 5.322918314217263e-06, "loss": 0.4542, "step": 11381 }, { "epoch": 1.9539660209933989, "grad_norm": 1.3515625, "learning_rate": 5.3213063164393465e-06, "loss": 0.44, "step": 11382 }, { "epoch": 1.9541391624283087, "grad_norm": 1.4765625, "learning_rate": 5.319694474295933e-06, "loss": 0.435, "step": 11383 }, { "epoch": 1.9543123038632184, "grad_norm": 1.2734375, "learning_rate": 5.318082787840646e-06, "loss": 0.4196, "step": 11384 }, { "epoch": 1.954485445298128, "grad_norm": 1.4765625, "learning_rate": 5.316471257127094e-06, "loss": 0.4675, "step": 11385 }, { "epoch": 1.9546585867330375, "grad_norm": 1.453125, "learning_rate": 5.314859882208885e-06, "loss": 0.4476, "step": 11386 }, { "epoch": 1.9548317281679473, "grad_norm": 1.4453125, "learning_rate": 5.313248663139626e-06, "loss": 0.4759, "step": 11387 }, { "epoch": 1.9550048696028568, "grad_norm": 1.4375, "learning_rate": 5.311637599972905e-06, "loss": 0.4668, "step": 11388 }, { "epoch": 1.9551780110377663, "grad_norm": 1.328125, "learning_rate": 5.310026692762316e-06, "loss": 0.4286, "step": 11389 }, { "epoch": 1.955351152472676, "grad_norm": 1.3359375, "learning_rate": 5.308415941561446e-06, "loss": 0.4308, "step": 11390 }, { "epoch": 1.9555242939075859, "grad_norm": 1.3671875, "learning_rate": 5.306805346423876e-06, "loss": 0.4259, "step": 11391 }, { "epoch": 1.9556974353424954, "grad_norm": 1.359375, "learning_rate": 5.305194907403179e-06, "loss": 0.4818, "step": 11392 }, { "epoch": 1.955870576777405, "grad_norm": 1.3984375, "learning_rate": 5.303584624552931e-06, "loss": 0.4258, "step": 11393 }, { "epoch": 1.9560437182123147, "grad_norm": 1.3515625, "learning_rate": 5.301974497926692e-06, "loss": 0.4696, "step": 11394 }, { "epoch": 1.9562168596472245, "grad_norm": 1.5078125, "learning_rate": 5.3003645275780245e-06, "loss": 0.4441, "step": 11395 }, { "epoch": 1.956390001082134, "grad_norm": 1.3359375, "learning_rate": 5.298754713560488e-06, "loss": 0.3973, "step": 11396 }, { "epoch": 1.9565631425170436, "grad_norm": 1.4609375, "learning_rate": 5.297145055927622e-06, "loss": 0.5145, "step": 11397 }, { "epoch": 1.9567362839519533, "grad_norm": 1.453125, "learning_rate": 5.295535554732978e-06, "loss": 0.4998, "step": 11398 }, { "epoch": 1.9569094253868629, "grad_norm": 1.3984375, "learning_rate": 5.293926210030091e-06, "loss": 0.4723, "step": 11399 }, { "epoch": 1.9570825668217724, "grad_norm": 1.4296875, "learning_rate": 5.2923170218725e-06, "loss": 0.4786, "step": 11400 }, { "epoch": 1.9572557082566822, "grad_norm": 1.3671875, "learning_rate": 5.290707990313735e-06, "loss": 0.476, "step": 11401 }, { "epoch": 1.957428849691592, "grad_norm": 1.4375, "learning_rate": 5.289099115407308e-06, "loss": 0.5031, "step": 11402 }, { "epoch": 1.9576019911265015, "grad_norm": 1.3671875, "learning_rate": 5.287490397206749e-06, "loss": 0.4792, "step": 11403 }, { "epoch": 1.957775132561411, "grad_norm": 1.40625, "learning_rate": 5.285881835765569e-06, "loss": 0.4806, "step": 11404 }, { "epoch": 1.9579482739963208, "grad_norm": 1.328125, "learning_rate": 5.284273431137274e-06, "loss": 0.4889, "step": 11405 }, { "epoch": 1.9581214154312305, "grad_norm": 1.390625, "learning_rate": 5.282665183375371e-06, "loss": 0.5465, "step": 11406 }, { "epoch": 1.95829455686614, "grad_norm": 1.3828125, "learning_rate": 5.281057092533351e-06, "loss": 0.455, "step": 11407 }, { "epoch": 1.9584676983010496, "grad_norm": 1.46875, "learning_rate": 5.279449158664711e-06, "loss": 0.4807, "step": 11408 }, { "epoch": 1.9586408397359594, "grad_norm": 1.4765625, "learning_rate": 5.277841381822935e-06, "loss": 0.5321, "step": 11409 }, { "epoch": 1.958813981170869, "grad_norm": 1.375, "learning_rate": 5.276233762061507e-06, "loss": 0.5025, "step": 11410 }, { "epoch": 1.9589871226057785, "grad_norm": 1.390625, "learning_rate": 5.274626299433908e-06, "loss": 0.4019, "step": 11411 }, { "epoch": 1.9591602640406882, "grad_norm": 1.515625, "learning_rate": 5.273018993993602e-06, "loss": 0.4825, "step": 11412 }, { "epoch": 1.959333405475598, "grad_norm": 1.484375, "learning_rate": 5.2714118457940524e-06, "loss": 0.4502, "step": 11413 }, { "epoch": 1.9595065469105075, "grad_norm": 1.2734375, "learning_rate": 5.269804854888731e-06, "loss": 0.4109, "step": 11414 }, { "epoch": 1.959679688345417, "grad_norm": 1.4140625, "learning_rate": 5.268198021331091e-06, "loss": 0.4275, "step": 11415 }, { "epoch": 1.9598528297803268, "grad_norm": 1.5, "learning_rate": 5.2665913451745785e-06, "loss": 0.4334, "step": 11416 }, { "epoch": 1.9600259712152366, "grad_norm": 1.3984375, "learning_rate": 5.264984826472639e-06, "loss": 0.4626, "step": 11417 }, { "epoch": 1.9601991126501461, "grad_norm": 1.3359375, "learning_rate": 5.263378465278716e-06, "loss": 0.4546, "step": 11418 }, { "epoch": 1.9603722540850557, "grad_norm": 1.4453125, "learning_rate": 5.2617722616462415e-06, "loss": 0.5056, "step": 11419 }, { "epoch": 1.9605453955199654, "grad_norm": 1.390625, "learning_rate": 5.260166215628652e-06, "loss": 0.4443, "step": 11420 }, { "epoch": 1.960718536954875, "grad_norm": 1.3515625, "learning_rate": 5.2585603272793605e-06, "loss": 0.4077, "step": 11421 }, { "epoch": 1.9608916783897845, "grad_norm": 1.46875, "learning_rate": 5.256954596651793e-06, "loss": 0.5221, "step": 11422 }, { "epoch": 1.9610648198246943, "grad_norm": 1.53125, "learning_rate": 5.255349023799357e-06, "loss": 0.4435, "step": 11423 }, { "epoch": 1.961237961259604, "grad_norm": 1.421875, "learning_rate": 5.253743608775475e-06, "loss": 0.5804, "step": 11424 }, { "epoch": 1.9614111026945136, "grad_norm": 1.4453125, "learning_rate": 5.252138351633537e-06, "loss": 0.4128, "step": 11425 }, { "epoch": 1.9615842441294231, "grad_norm": 1.4921875, "learning_rate": 5.250533252426947e-06, "loss": 0.5106, "step": 11426 }, { "epoch": 1.9617573855643329, "grad_norm": 1.515625, "learning_rate": 5.248928311209096e-06, "loss": 0.5109, "step": 11427 }, { "epoch": 1.9619305269992426, "grad_norm": 1.34375, "learning_rate": 5.247323528033373e-06, "loss": 0.3993, "step": 11428 }, { "epoch": 1.9621036684341522, "grad_norm": 1.40625, "learning_rate": 5.2457189029531575e-06, "loss": 0.4997, "step": 11429 }, { "epoch": 1.9622768098690617, "grad_norm": 1.3984375, "learning_rate": 5.2441144360218346e-06, "loss": 0.4873, "step": 11430 }, { "epoch": 1.9624499513039715, "grad_norm": 1.390625, "learning_rate": 5.2425101272927644e-06, "loss": 0.4187, "step": 11431 }, { "epoch": 1.962623092738881, "grad_norm": 1.46875, "learning_rate": 5.24090597681932e-06, "loss": 0.4675, "step": 11432 }, { "epoch": 1.9627962341737906, "grad_norm": 1.4296875, "learning_rate": 5.239301984654864e-06, "loss": 0.4128, "step": 11433 }, { "epoch": 1.9629693756087003, "grad_norm": 1.4765625, "learning_rate": 5.237698150852747e-06, "loss": 0.533, "step": 11434 }, { "epoch": 1.96314251704361, "grad_norm": 1.4140625, "learning_rate": 5.236094475466325e-06, "loss": 0.457, "step": 11435 }, { "epoch": 1.9633156584785196, "grad_norm": 1.4453125, "learning_rate": 5.234490958548941e-06, "loss": 0.4909, "step": 11436 }, { "epoch": 1.9634887999134292, "grad_norm": 1.328125, "learning_rate": 5.232887600153937e-06, "loss": 0.4378, "step": 11437 }, { "epoch": 1.963661941348339, "grad_norm": 1.5546875, "learning_rate": 5.231284400334645e-06, "loss": 0.5287, "step": 11438 }, { "epoch": 1.9638350827832487, "grad_norm": 1.40625, "learning_rate": 5.2296813591444e-06, "loss": 0.5175, "step": 11439 }, { "epoch": 1.9640082242181582, "grad_norm": 1.359375, "learning_rate": 5.228078476636523e-06, "loss": 0.4742, "step": 11440 }, { "epoch": 1.9641813656530678, "grad_norm": 1.546875, "learning_rate": 5.2264757528643285e-06, "loss": 0.4567, "step": 11441 }, { "epoch": 1.9643545070879775, "grad_norm": 1.5234375, "learning_rate": 5.224873187881136e-06, "loss": 0.4738, "step": 11442 }, { "epoch": 1.964527648522887, "grad_norm": 1.4609375, "learning_rate": 5.223270781740254e-06, "loss": 0.524, "step": 11443 }, { "epoch": 1.9647007899577966, "grad_norm": 1.3359375, "learning_rate": 5.221668534494988e-06, "loss": 0.476, "step": 11444 }, { "epoch": 1.9648739313927064, "grad_norm": 1.4296875, "learning_rate": 5.220066446198625e-06, "loss": 0.456, "step": 11445 }, { "epoch": 1.9650470728276161, "grad_norm": 1.453125, "learning_rate": 5.218464516904469e-06, "loss": 0.4391, "step": 11446 }, { "epoch": 1.9652202142625257, "grad_norm": 1.5078125, "learning_rate": 5.216862746665803e-06, "loss": 0.5603, "step": 11447 }, { "epoch": 1.9653933556974352, "grad_norm": 1.421875, "learning_rate": 5.215261135535915e-06, "loss": 0.5329, "step": 11448 }, { "epoch": 1.965566497132345, "grad_norm": 1.3828125, "learning_rate": 5.213659683568073e-06, "loss": 0.4672, "step": 11449 }, { "epoch": 1.9657396385672548, "grad_norm": 1.4609375, "learning_rate": 5.212058390815551e-06, "loss": 0.4976, "step": 11450 }, { "epoch": 1.9659127800021643, "grad_norm": 1.625, "learning_rate": 5.210457257331617e-06, "loss": 0.4292, "step": 11451 }, { "epoch": 1.9660859214370738, "grad_norm": 1.390625, "learning_rate": 5.208856283169532e-06, "loss": 0.5035, "step": 11452 }, { "epoch": 1.9662590628719836, "grad_norm": 1.5234375, "learning_rate": 5.2072554683825505e-06, "loss": 0.5274, "step": 11453 }, { "epoch": 1.9664322043068931, "grad_norm": 1.515625, "learning_rate": 5.205654813023927e-06, "loss": 0.5098, "step": 11454 }, { "epoch": 1.9666053457418027, "grad_norm": 1.390625, "learning_rate": 5.204054317146899e-06, "loss": 0.4392, "step": 11455 }, { "epoch": 1.9667784871767124, "grad_norm": 1.3359375, "learning_rate": 5.202453980804707e-06, "loss": 0.4799, "step": 11456 }, { "epoch": 1.9669516286116222, "grad_norm": 1.40625, "learning_rate": 5.200853804050592e-06, "loss": 0.4874, "step": 11457 }, { "epoch": 1.9671247700465317, "grad_norm": 1.3515625, "learning_rate": 5.199253786937783e-06, "loss": 0.4593, "step": 11458 }, { "epoch": 1.9672979114814413, "grad_norm": 1.3359375, "learning_rate": 5.197653929519498e-06, "loss": 0.4656, "step": 11459 }, { "epoch": 1.967471052916351, "grad_norm": 1.390625, "learning_rate": 5.196054231848957e-06, "loss": 0.4692, "step": 11460 }, { "epoch": 1.9676441943512608, "grad_norm": 1.3671875, "learning_rate": 5.194454693979375e-06, "loss": 0.4661, "step": 11461 }, { "epoch": 1.9678173357861704, "grad_norm": 1.4765625, "learning_rate": 5.192855315963959e-06, "loss": 0.4895, "step": 11462 }, { "epoch": 1.96799047722108, "grad_norm": 1.4375, "learning_rate": 5.191256097855914e-06, "loss": 0.3978, "step": 11463 }, { "epoch": 1.9681636186559897, "grad_norm": 1.375, "learning_rate": 5.189657039708433e-06, "loss": 0.5814, "step": 11464 }, { "epoch": 1.9683367600908994, "grad_norm": 1.375, "learning_rate": 5.188058141574708e-06, "loss": 0.5054, "step": 11465 }, { "epoch": 1.9685099015258087, "grad_norm": 1.515625, "learning_rate": 5.186459403507929e-06, "loss": 0.505, "step": 11466 }, { "epoch": 1.9686830429607185, "grad_norm": 1.375, "learning_rate": 5.1848608255612735e-06, "loss": 0.4279, "step": 11467 }, { "epoch": 1.9688561843956283, "grad_norm": 1.375, "learning_rate": 5.183262407787922e-06, "loss": 0.4532, "step": 11468 }, { "epoch": 1.9690293258305378, "grad_norm": 1.3203125, "learning_rate": 5.181664150241039e-06, "loss": 0.4077, "step": 11469 }, { "epoch": 1.9692024672654473, "grad_norm": 1.40625, "learning_rate": 5.1800660529737955e-06, "loss": 0.5438, "step": 11470 }, { "epoch": 1.969375608700357, "grad_norm": 1.375, "learning_rate": 5.17846811603935e-06, "loss": 0.4551, "step": 11471 }, { "epoch": 1.9695487501352669, "grad_norm": 1.4140625, "learning_rate": 5.176870339490859e-06, "loss": 0.4947, "step": 11472 }, { "epoch": 1.9697218915701764, "grad_norm": 1.515625, "learning_rate": 5.175272723381466e-06, "loss": 0.4622, "step": 11473 }, { "epoch": 1.969895033005086, "grad_norm": 1.40625, "learning_rate": 5.173675267764318e-06, "loss": 0.4862, "step": 11474 }, { "epoch": 1.9700681744399957, "grad_norm": 1.421875, "learning_rate": 5.172077972692553e-06, "loss": 0.5103, "step": 11475 }, { "epoch": 1.9702413158749055, "grad_norm": 1.4296875, "learning_rate": 5.170480838219305e-06, "loss": 0.4619, "step": 11476 }, { "epoch": 1.9704144573098148, "grad_norm": 1.3828125, "learning_rate": 5.168883864397703e-06, "loss": 0.4459, "step": 11477 }, { "epoch": 1.9705875987447246, "grad_norm": 1.328125, "learning_rate": 5.167287051280867e-06, "loss": 0.4603, "step": 11478 }, { "epoch": 1.9707607401796343, "grad_norm": 1.3203125, "learning_rate": 5.165690398921916e-06, "loss": 0.448, "step": 11479 }, { "epoch": 1.9709338816145439, "grad_norm": 1.3671875, "learning_rate": 5.164093907373964e-06, "loss": 0.4403, "step": 11480 }, { "epoch": 1.9711070230494534, "grad_norm": 1.484375, "learning_rate": 5.162497576690112e-06, "loss": 0.4999, "step": 11481 }, { "epoch": 1.9712801644843632, "grad_norm": 1.53125, "learning_rate": 5.160901406923469e-06, "loss": 0.485, "step": 11482 }, { "epoch": 1.971453305919273, "grad_norm": 1.4765625, "learning_rate": 5.159305398127124e-06, "loss": 0.5205, "step": 11483 }, { "epoch": 1.9716264473541825, "grad_norm": 1.3515625, "learning_rate": 5.157709550354166e-06, "loss": 0.4393, "step": 11484 }, { "epoch": 1.971799588789092, "grad_norm": 1.46875, "learning_rate": 5.156113863657686e-06, "loss": 0.5177, "step": 11485 }, { "epoch": 1.9719727302240018, "grad_norm": 1.5625, "learning_rate": 5.154518338090762e-06, "loss": 0.4743, "step": 11486 }, { "epoch": 1.9721458716589115, "grad_norm": 1.328125, "learning_rate": 5.15292297370647e-06, "loss": 0.414, "step": 11487 }, { "epoch": 1.9723190130938208, "grad_norm": 1.3984375, "learning_rate": 5.15132777055787e-06, "loss": 0.4643, "step": 11488 }, { "epoch": 1.9724921545287306, "grad_norm": 1.3125, "learning_rate": 5.149732728698036e-06, "loss": 0.4343, "step": 11489 }, { "epoch": 1.9726652959636404, "grad_norm": 1.40625, "learning_rate": 5.148137848180024e-06, "loss": 0.4712, "step": 11490 }, { "epoch": 1.97283843739855, "grad_norm": 1.4609375, "learning_rate": 5.146543129056888e-06, "loss": 0.5051, "step": 11491 }, { "epoch": 1.9730115788334595, "grad_norm": 1.4375, "learning_rate": 5.144948571381669e-06, "loss": 0.4585, "step": 11492 }, { "epoch": 1.9731847202683692, "grad_norm": 1.3828125, "learning_rate": 5.1433541752074156e-06, "loss": 0.5095, "step": 11493 }, { "epoch": 1.973357861703279, "grad_norm": 1.3828125, "learning_rate": 5.141759940587161e-06, "loss": 0.4746, "step": 11494 }, { "epoch": 1.9735310031381885, "grad_norm": 1.4453125, "learning_rate": 5.14016586757394e-06, "loss": 0.4346, "step": 11495 }, { "epoch": 1.973704144573098, "grad_norm": 1.484375, "learning_rate": 5.138571956220779e-06, "loss": 0.5071, "step": 11496 }, { "epoch": 1.9738772860080078, "grad_norm": 1.3125, "learning_rate": 5.136978206580694e-06, "loss": 0.4652, "step": 11497 }, { "epoch": 1.9740504274429176, "grad_norm": 1.40625, "learning_rate": 5.1353846187067015e-06, "loss": 0.4279, "step": 11498 }, { "epoch": 1.9742235688778271, "grad_norm": 1.4140625, "learning_rate": 5.133791192651809e-06, "loss": 0.4269, "step": 11499 }, { "epoch": 1.9743967103127367, "grad_norm": 1.40625, "learning_rate": 5.132197928469033e-06, "loss": 0.4847, "step": 11500 }, { "epoch": 1.9745698517476464, "grad_norm": 1.4765625, "learning_rate": 5.130604826211361e-06, "loss": 0.4846, "step": 11501 }, { "epoch": 1.974742993182556, "grad_norm": 1.3359375, "learning_rate": 5.129011885931791e-06, "loss": 0.4283, "step": 11502 }, { "epoch": 1.9749161346174655, "grad_norm": 1.265625, "learning_rate": 5.127419107683309e-06, "loss": 0.406, "step": 11503 }, { "epoch": 1.9750892760523753, "grad_norm": 1.3671875, "learning_rate": 5.1258264915189015e-06, "loss": 0.4157, "step": 11504 }, { "epoch": 1.975262417487285, "grad_norm": 1.4453125, "learning_rate": 5.124234037491543e-06, "loss": 0.4073, "step": 11505 }, { "epoch": 1.9754355589221946, "grad_norm": 1.484375, "learning_rate": 5.122641745654212e-06, "loss": 0.4215, "step": 11506 }, { "epoch": 1.9756087003571041, "grad_norm": 1.3828125, "learning_rate": 5.121049616059866e-06, "loss": 0.4154, "step": 11507 }, { "epoch": 1.9757818417920139, "grad_norm": 1.4609375, "learning_rate": 5.119457648761471e-06, "loss": 0.4754, "step": 11508 }, { "epoch": 1.9759549832269236, "grad_norm": 1.3671875, "learning_rate": 5.117865843811982e-06, "loss": 0.4617, "step": 11509 }, { "epoch": 1.9761281246618332, "grad_norm": 1.3515625, "learning_rate": 5.116274201264352e-06, "loss": 0.4713, "step": 11510 }, { "epoch": 1.9763012660967427, "grad_norm": 1.2890625, "learning_rate": 5.114682721171525e-06, "loss": 0.4728, "step": 11511 }, { "epoch": 1.9764744075316525, "grad_norm": 1.359375, "learning_rate": 5.113091403586439e-06, "loss": 0.4586, "step": 11512 }, { "epoch": 1.976647548966562, "grad_norm": 1.2890625, "learning_rate": 5.11150024856203e-06, "loss": 0.439, "step": 11513 }, { "epoch": 1.9768206904014716, "grad_norm": 1.328125, "learning_rate": 5.109909256151227e-06, "loss": 0.4468, "step": 11514 }, { "epoch": 1.9769938318363813, "grad_norm": 1.53125, "learning_rate": 5.108318426406958e-06, "loss": 0.5165, "step": 11515 }, { "epoch": 1.977166973271291, "grad_norm": 1.4140625, "learning_rate": 5.106727759382133e-06, "loss": 0.4499, "step": 11516 }, { "epoch": 1.9773401147062006, "grad_norm": 1.3515625, "learning_rate": 5.105137255129669e-06, "loss": 0.4516, "step": 11517 }, { "epoch": 1.9775132561411102, "grad_norm": 1.3984375, "learning_rate": 5.103546913702472e-06, "loss": 0.4646, "step": 11518 }, { "epoch": 1.97768639757602, "grad_norm": 1.5546875, "learning_rate": 5.101956735153445e-06, "loss": 0.4787, "step": 11519 }, { "epoch": 1.9778595390109297, "grad_norm": 1.3984375, "learning_rate": 5.100366719535488e-06, "loss": 0.4032, "step": 11520 }, { "epoch": 1.9780326804458392, "grad_norm": 1.4765625, "learning_rate": 5.098776866901483e-06, "loss": 0.4927, "step": 11521 }, { "epoch": 1.9782058218807488, "grad_norm": 1.3359375, "learning_rate": 5.097187177304322e-06, "loss": 0.4722, "step": 11522 }, { "epoch": 1.9783789633156585, "grad_norm": 1.3828125, "learning_rate": 5.095597650796887e-06, "loss": 0.4708, "step": 11523 }, { "epoch": 1.978552104750568, "grad_norm": 1.40625, "learning_rate": 5.094008287432052e-06, "loss": 0.4663, "step": 11524 }, { "epoch": 1.9787252461854776, "grad_norm": 1.484375, "learning_rate": 5.092419087262681e-06, "loss": 0.4651, "step": 11525 }, { "epoch": 1.9788983876203874, "grad_norm": 1.4140625, "learning_rate": 5.090830050341642e-06, "loss": 0.4343, "step": 11526 }, { "epoch": 1.9790715290552972, "grad_norm": 1.3984375, "learning_rate": 5.089241176721794e-06, "loss": 0.4394, "step": 11527 }, { "epoch": 1.9792446704902067, "grad_norm": 1.4375, "learning_rate": 5.087652466455989e-06, "loss": 0.4659, "step": 11528 }, { "epoch": 1.9794178119251162, "grad_norm": 1.3828125, "learning_rate": 5.086063919597075e-06, "loss": 0.4772, "step": 11529 }, { "epoch": 1.979590953360026, "grad_norm": 1.328125, "learning_rate": 5.084475536197898e-06, "loss": 0.4632, "step": 11530 }, { "epoch": 1.9797640947949358, "grad_norm": 1.4296875, "learning_rate": 5.082887316311288e-06, "loss": 0.4964, "step": 11531 }, { "epoch": 1.9799372362298453, "grad_norm": 1.3984375, "learning_rate": 5.081299259990075e-06, "loss": 0.4482, "step": 11532 }, { "epoch": 1.9801103776647548, "grad_norm": 1.4453125, "learning_rate": 5.079711367287093e-06, "loss": 0.4375, "step": 11533 }, { "epoch": 1.9802835190996646, "grad_norm": 1.484375, "learning_rate": 5.078123638255164e-06, "loss": 0.5875, "step": 11534 }, { "epoch": 1.9804566605345741, "grad_norm": 1.4296875, "learning_rate": 5.076536072947092e-06, "loss": 0.4599, "step": 11535 }, { "epoch": 1.9806298019694837, "grad_norm": 1.3359375, "learning_rate": 5.074948671415694e-06, "loss": 0.4525, "step": 11536 }, { "epoch": 1.9808029434043934, "grad_norm": 1.4296875, "learning_rate": 5.073361433713772e-06, "loss": 0.4669, "step": 11537 }, { "epoch": 1.9809760848393032, "grad_norm": 1.6328125, "learning_rate": 5.0717743598941264e-06, "loss": 0.5949, "step": 11538 }, { "epoch": 1.9811492262742127, "grad_norm": 1.5234375, "learning_rate": 5.070187450009553e-06, "loss": 0.483, "step": 11539 }, { "epoch": 1.9813223677091223, "grad_norm": 1.34375, "learning_rate": 5.068600704112832e-06, "loss": 0.4416, "step": 11540 }, { "epoch": 1.981495509144032, "grad_norm": 1.484375, "learning_rate": 5.06701412225675e-06, "loss": 0.4931, "step": 11541 }, { "epoch": 1.9816686505789418, "grad_norm": 1.3515625, "learning_rate": 5.065427704494079e-06, "loss": 0.4745, "step": 11542 }, { "epoch": 1.9818417920138514, "grad_norm": 1.4296875, "learning_rate": 5.063841450877603e-06, "loss": 0.4518, "step": 11543 }, { "epoch": 1.982014933448761, "grad_norm": 1.34375, "learning_rate": 5.062255361460078e-06, "loss": 0.4141, "step": 11544 }, { "epoch": 1.9821880748836707, "grad_norm": 1.390625, "learning_rate": 5.060669436294265e-06, "loss": 0.3838, "step": 11545 }, { "epoch": 1.9823612163185802, "grad_norm": 1.3671875, "learning_rate": 5.059083675432921e-06, "loss": 0.4587, "step": 11546 }, { "epoch": 1.9825343577534897, "grad_norm": 1.5, "learning_rate": 5.057498078928796e-06, "loss": 0.5237, "step": 11547 }, { "epoch": 1.9827074991883995, "grad_norm": 1.40625, "learning_rate": 5.0559126468346354e-06, "loss": 0.4588, "step": 11548 }, { "epoch": 1.9828806406233093, "grad_norm": 1.4375, "learning_rate": 5.054327379203172e-06, "loss": 0.4736, "step": 11549 }, { "epoch": 1.9830537820582188, "grad_norm": 1.34375, "learning_rate": 5.052742276087144e-06, "loss": 0.4212, "step": 11550 } ], "logging_steps": 1, "max_steps": 17325, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 5775, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.4084702110980506e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }