{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9904153354632586, "eval_steps": 500, "global_step": 468, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.666666666666667e-06, "loss": 3.7352, "step": 1 }, { "epoch": 0.01, "learning_rate": 5.333333333333334e-06, "loss": 3.7667, "step": 2 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-06, "loss": 3.7797, "step": 3 }, { "epoch": 0.03, "learning_rate": 1.0666666666666667e-05, "loss": 3.6592, "step": 4 }, { "epoch": 0.03, "learning_rate": 1.3333333333333333e-05, "loss": 3.6968, "step": 5 }, { "epoch": 0.04, "learning_rate": 1.6000000000000003e-05, "loss": 3.7754, "step": 6 }, { "epoch": 0.04, "learning_rate": 1.866666666666667e-05, "loss": 3.4281, "step": 7 }, { "epoch": 0.05, "learning_rate": 2.1333333333333335e-05, "loss": 3.6136, "step": 8 }, { "epoch": 0.06, "learning_rate": 2.4e-05, "loss": 3.0098, "step": 9 }, { "epoch": 0.06, "learning_rate": 2.6666666666666667e-05, "loss": 2.7693, "step": 10 }, { "epoch": 0.07, "learning_rate": 2.9333333333333333e-05, "loss": 2.2559, "step": 11 }, { "epoch": 0.08, "learning_rate": 3.2000000000000005e-05, "loss": 2.1567, "step": 12 }, { "epoch": 0.08, "learning_rate": 3.466666666666667e-05, "loss": 1.7343, "step": 13 }, { "epoch": 0.09, "learning_rate": 3.733333333333334e-05, "loss": 1.677, "step": 14 }, { "epoch": 0.1, "learning_rate": 4e-05, "loss": 1.2854, "step": 15 }, { "epoch": 0.1, "learning_rate": 3.999951904814875e-05, "loss": 1.1381, "step": 16 }, { "epoch": 0.11, "learning_rate": 3.999807621572648e-05, "loss": 0.7843, "step": 17 }, { "epoch": 0.12, "learning_rate": 3.999567157212646e-05, "loss": 0.7415, "step": 18 }, { "epoch": 0.12, "learning_rate": 3.999230523300049e-05, "loss": 0.6684, "step": 19 }, { "epoch": 0.13, "learning_rate": 3.998797736025326e-05, "loss": 0.4267, "step": 20 }, { "epoch": 0.13, "learning_rate": 3.9982688162034624e-05, "loss": 0.5993, "step": 21 }, { "epoch": 0.14, "learning_rate": 3.997643789272954e-05, "loss": 0.555, "step": 22 }, { "epoch": 0.15, "learning_rate": 3.996922685294587e-05, "loss": 0.5142, "step": 23 }, { "epoch": 0.15, "learning_rate": 3.9961055389499904e-05, "loss": 0.3841, "step": 24 }, { "epoch": 0.16, "learning_rate": 3.9951923895399696e-05, "loss": 0.3057, "step": 25 }, { "epoch": 0.17, "learning_rate": 3.9941832809826136e-05, "loss": 0.4327, "step": 26 }, { "epoch": 0.17, "learning_rate": 3.993078261811186e-05, "loss": 0.3207, "step": 27 }, { "epoch": 0.18, "learning_rate": 3.991877385171789e-05, "loss": 0.454, "step": 28 }, { "epoch": 0.19, "learning_rate": 3.990580708820805e-05, "loss": 0.2997, "step": 29 }, { "epoch": 0.19, "learning_rate": 3.9891882951221246e-05, "loss": 0.3583, "step": 30 }, { "epoch": 0.2, "learning_rate": 3.9877002110441424e-05, "loss": 0.2823, "step": 31 }, { "epoch": 0.2, "learning_rate": 3.986116528156537e-05, "loss": 0.2659, "step": 32 }, { "epoch": 0.21, "learning_rate": 3.9844373226268305e-05, "loss": 0.3158, "step": 33 }, { "epoch": 0.22, "learning_rate": 3.982662675216723e-05, "loss": 0.2704, "step": 34 }, { "epoch": 0.22, "learning_rate": 3.9807926712782115e-05, "loss": 0.3642, "step": 35 }, { "epoch": 0.23, "learning_rate": 3.978827400749481e-05, "loss": 0.3058, "step": 36 }, { "epoch": 0.24, "learning_rate": 3.976766958150581e-05, "loss": 0.3343, "step": 37 }, { "epoch": 0.24, "learning_rate": 3.97461144257888e-05, "loss": 0.2987, "step": 38 }, { "epoch": 0.25, "learning_rate": 3.972360957704298e-05, "loss": 0.2469, "step": 39 }, { "epoch": 0.26, "learning_rate": 3.970015611764323e-05, "loss": 0.359, "step": 40 }, { "epoch": 0.26, "learning_rate": 3.9675755175588006e-05, "loss": 0.2903, "step": 41 }, { "epoch": 0.27, "learning_rate": 3.9650407924445147e-05, "loss": 0.3881, "step": 42 }, { "epoch": 0.27, "learning_rate": 3.9624115583295375e-05, "loss": 0.2877, "step": 43 }, { "epoch": 0.28, "learning_rate": 3.959687941667372e-05, "loss": 0.2628, "step": 44 }, { "epoch": 0.29, "learning_rate": 3.9568700734508645e-05, "loss": 0.2789, "step": 45 }, { "epoch": 0.29, "learning_rate": 3.9539580892059086e-05, "loss": 0.2164, "step": 46 }, { "epoch": 0.3, "learning_rate": 3.950952128984927e-05, "loss": 0.217, "step": 47 }, { "epoch": 0.31, "learning_rate": 3.9478523373601325e-05, "loss": 0.2196, "step": 48 }, { "epoch": 0.31, "learning_rate": 3.944658863416575e-05, "loss": 0.3036, "step": 49 }, { "epoch": 0.32, "learning_rate": 3.941371860744978e-05, "loss": 0.2398, "step": 50 }, { "epoch": 0.33, "learning_rate": 3.937991487434342e-05, "loss": 0.2441, "step": 51 }, { "epoch": 0.33, "learning_rate": 3.934517906064348e-05, "loss": 0.2922, "step": 52 }, { "epoch": 0.34, "learning_rate": 3.930951283697534e-05, "loss": 0.3127, "step": 53 }, { "epoch": 0.35, "learning_rate": 3.927291791871264e-05, "loss": 0.2994, "step": 54 }, { "epoch": 0.35, "learning_rate": 3.923539606589473e-05, "loss": 0.2534, "step": 55 }, { "epoch": 0.36, "learning_rate": 3.919694908314209e-05, "loss": 0.2855, "step": 56 }, { "epoch": 0.36, "learning_rate": 3.9157578819569455e-05, "loss": 0.2889, "step": 57 }, { "epoch": 0.37, "learning_rate": 3.9117287168696956e-05, "loss": 0.3228, "step": 58 }, { "epoch": 0.38, "learning_rate": 3.907607606835899e-05, "loss": 0.2197, "step": 59 }, { "epoch": 0.38, "learning_rate": 3.903394750061106e-05, "loss": 0.3422, "step": 60 }, { "epoch": 0.39, "learning_rate": 3.899090349163444e-05, "loss": 0.2788, "step": 61 }, { "epoch": 0.4, "learning_rate": 3.8946946111638696e-05, "loss": 0.3245, "step": 62 }, { "epoch": 0.4, "learning_rate": 3.8902077474762155e-05, "loss": 0.336, "step": 63 }, { "epoch": 0.41, "learning_rate": 3.8856299738970225e-05, "loss": 0.2951, "step": 64 }, { "epoch": 0.42, "learning_rate": 3.880961510595158e-05, "loss": 0.3156, "step": 65 }, { "epoch": 0.42, "learning_rate": 3.876202582101229e-05, "loss": 0.2534, "step": 66 }, { "epoch": 0.43, "learning_rate": 3.8713534172967815e-05, "loss": 0.2775, "step": 67 }, { "epoch": 0.43, "learning_rate": 3.866414249403295e-05, "loss": 0.3381, "step": 68 }, { "epoch": 0.44, "learning_rate": 3.861385315970964e-05, "loss": 0.2593, "step": 69 }, { "epoch": 0.45, "learning_rate": 3.856266858867273e-05, "loss": 0.166, "step": 70 }, { "epoch": 0.45, "learning_rate": 3.851059124265363e-05, "loss": 0.3453, "step": 71 }, { "epoch": 0.46, "learning_rate": 3.8457623626321944e-05, "loss": 0.2238, "step": 72 }, { "epoch": 0.47, "learning_rate": 3.840376828716499e-05, "loss": 0.2773, "step": 73 }, { "epoch": 0.47, "learning_rate": 3.834902781536527e-05, "loss": 0.2777, "step": 74 }, { "epoch": 0.48, "learning_rate": 3.8293404843675904e-05, "loss": 0.3032, "step": 75 }, { "epoch": 0.49, "learning_rate": 3.8236902047294015e-05, "loss": 0.2138, "step": 76 }, { "epoch": 0.49, "learning_rate": 3.817952214373206e-05, "loss": 0.2605, "step": 77 }, { "epoch": 0.5, "learning_rate": 3.812126789268712e-05, "loss": 0.3205, "step": 78 }, { "epoch": 0.5, "learning_rate": 3.806214209590819e-05, "loss": 0.27, "step": 79 }, { "epoch": 0.51, "learning_rate": 3.80021475970614e-05, "loss": 0.2882, "step": 80 }, { "epoch": 0.52, "learning_rate": 3.7941287281593284e-05, "loss": 0.3081, "step": 81 }, { "epoch": 0.52, "learning_rate": 3.787956407659198e-05, "loss": 0.3268, "step": 82 }, { "epoch": 0.53, "learning_rate": 3.781698095064647e-05, "loss": 0.2018, "step": 83 }, { "epoch": 0.54, "learning_rate": 3.775354091370376e-05, "loss": 0.2624, "step": 84 }, { "epoch": 0.54, "learning_rate": 3.7689247016924186e-05, "loss": 0.2042, "step": 85 }, { "epoch": 0.55, "learning_rate": 3.7624102352534615e-05, "loss": 0.3193, "step": 86 }, { "epoch": 0.56, "learning_rate": 3.755811005367974e-05, "loss": 0.2487, "step": 87 }, { "epoch": 0.56, "learning_rate": 3.7491273294271386e-05, "loss": 0.1408, "step": 88 }, { "epoch": 0.57, "learning_rate": 3.742359528883588e-05, "loss": 0.276, "step": 89 }, { "epoch": 0.58, "learning_rate": 3.735507929235941e-05, "loss": 0.2293, "step": 90 }, { "epoch": 0.58, "learning_rate": 3.7285728600131535e-05, "loss": 0.2223, "step": 91 }, { "epoch": 0.59, "learning_rate": 3.7215546547586596e-05, "loss": 0.3011, "step": 92 }, { "epoch": 0.59, "learning_rate": 3.7144536510143436e-05, "loss": 0.2439, "step": 93 }, { "epoch": 0.6, "learning_rate": 3.707270190304294e-05, "loss": 0.2061, "step": 94 }, { "epoch": 0.61, "learning_rate": 3.7000046181183834e-05, "loss": 0.1888, "step": 95 }, { "epoch": 0.61, "learning_rate": 3.692657283895651e-05, "loss": 0.2698, "step": 96 }, { "epoch": 0.62, "learning_rate": 3.6852285410074974e-05, "loss": 0.1813, "step": 97 }, { "epoch": 0.63, "learning_rate": 3.6777187467406857e-05, "loss": 0.3824, "step": 98 }, { "epoch": 0.63, "learning_rate": 3.6701282622801626e-05, "loss": 0.2891, "step": 99 }, { "epoch": 0.64, "learning_rate": 3.662457452691682e-05, "loss": 0.1906, "step": 100 }, { "epoch": 0.65, "learning_rate": 3.6547066869042524e-05, "loss": 0.2127, "step": 101 }, { "epoch": 0.65, "learning_rate": 3.6468763376923886e-05, "loss": 0.3177, "step": 102 }, { "epoch": 0.66, "learning_rate": 3.638966781658187e-05, "loss": 0.1738, "step": 103 }, { "epoch": 0.66, "learning_rate": 3.630978399213206e-05, "loss": 0.1801, "step": 104 }, { "epoch": 0.67, "learning_rate": 3.622911574560181e-05, "loss": 0.2335, "step": 105 }, { "epoch": 0.68, "learning_rate": 3.6147666956745364e-05, "loss": 0.2521, "step": 106 }, { "epoch": 0.68, "learning_rate": 3.60654415428573e-05, "loss": 0.204, "step": 107 }, { "epoch": 0.69, "learning_rate": 3.598244345858412e-05, "loss": 0.2199, "step": 108 }, { "epoch": 0.7, "learning_rate": 3.589867669573404e-05, "loss": 0.322, "step": 109 }, { "epoch": 0.7, "learning_rate": 3.5814145283085055e-05, "loss": 0.2733, "step": 110 }, { "epoch": 0.71, "learning_rate": 3.5728853286191075e-05, "loss": 0.3102, "step": 111 }, { "epoch": 0.72, "learning_rate": 3.56428048071865e-05, "loss": 0.2365, "step": 112 }, { "epoch": 0.72, "learning_rate": 3.555600398458885e-05, "loss": 0.1718, "step": 113 }, { "epoch": 0.73, "learning_rate": 3.546845499309976e-05, "loss": 0.198, "step": 114 }, { "epoch": 0.73, "learning_rate": 3.538016204340418e-05, "loss": 0.2557, "step": 115 }, { "epoch": 0.74, "learning_rate": 3.529112938196787e-05, "loss": 0.1825, "step": 116 }, { "epoch": 0.75, "learning_rate": 3.5201361290833165e-05, "loss": 0.3099, "step": 117 }, { "epoch": 0.75, "learning_rate": 3.511086208741303e-05, "loss": 0.3273, "step": 118 }, { "epoch": 0.76, "learning_rate": 3.501963612428341e-05, "loss": 0.4337, "step": 119 }, { "epoch": 0.77, "learning_rate": 3.492768778897388e-05, "loss": 0.2754, "step": 120 }, { "epoch": 0.77, "learning_rate": 3.483502150375665e-05, "loss": 0.2226, "step": 121 }, { "epoch": 0.78, "learning_rate": 3.474164172543386e-05, "loss": 0.2698, "step": 122 }, { "epoch": 0.79, "learning_rate": 3.464755294512325e-05, "loss": 0.2, "step": 123 }, { "epoch": 0.79, "learning_rate": 3.455275968804212e-05, "loss": 0.1865, "step": 124 }, { "epoch": 0.8, "learning_rate": 3.445726651328971e-05, "loss": 0.2586, "step": 125 }, { "epoch": 0.81, "learning_rate": 3.4361078013627945e-05, "loss": 0.3064, "step": 126 }, { "epoch": 0.81, "learning_rate": 3.426419881526052e-05, "loss": 0.21, "step": 127 }, { "epoch": 0.82, "learning_rate": 3.4166633577610425e-05, "loss": 0.1701, "step": 128 }, { "epoch": 0.82, "learning_rate": 3.4068386993095806e-05, "loss": 0.2909, "step": 129 }, { "epoch": 0.83, "learning_rate": 3.396946378690435e-05, "loss": 0.2558, "step": 130 }, { "epoch": 0.84, "learning_rate": 3.386986871676597e-05, "loss": 0.1833, "step": 131 }, { "epoch": 0.84, "learning_rate": 3.3769606572724e-05, "loss": 0.184, "step": 132 }, { "epoch": 0.85, "learning_rate": 3.366868217690482e-05, "loss": 0.2481, "step": 133 }, { "epoch": 0.86, "learning_rate": 3.3567100383285925e-05, "loss": 0.1628, "step": 134 }, { "epoch": 0.86, "learning_rate": 3.346486607746249e-05, "loss": 0.1711, "step": 135 }, { "epoch": 0.87, "learning_rate": 3.336198417641238e-05, "loss": 0.2293, "step": 136 }, { "epoch": 0.88, "learning_rate": 3.325845962825966e-05, "loss": 0.2515, "step": 137 }, { "epoch": 0.88, "learning_rate": 3.315429741203666e-05, "loss": 0.1741, "step": 138 }, { "epoch": 0.89, "learning_rate": 3.304950253744443e-05, "loss": 0.1595, "step": 139 }, { "epoch": 0.89, "learning_rate": 3.294408004461188e-05, "loss": 0.1889, "step": 140 }, { "epoch": 0.9, "learning_rate": 3.283803500385332e-05, "loss": 0.2864, "step": 141 }, { "epoch": 0.91, "learning_rate": 3.27313725154246e-05, "loss": 0.1941, "step": 142 }, { "epoch": 0.91, "learning_rate": 3.2624097709277855e-05, "loss": 0.2526, "step": 143 }, { "epoch": 0.92, "learning_rate": 3.251621574481475e-05, "loss": 0.1784, "step": 144 }, { "epoch": 0.93, "learning_rate": 3.240773181063834e-05, "loss": 0.3217, "step": 145 }, { "epoch": 0.93, "learning_rate": 3.229865112430352e-05, "loss": 0.2438, "step": 146 }, { "epoch": 0.94, "learning_rate": 3.218897893206608e-05, "loss": 0.2797, "step": 147 }, { "epoch": 0.95, "learning_rate": 3.2078720508630427e-05, "loss": 0.2358, "step": 148 }, { "epoch": 0.95, "learning_rate": 3.196788115689584e-05, "loss": 0.2403, "step": 149 }, { "epoch": 0.96, "learning_rate": 3.185646620770146e-05, "loss": 0.2295, "step": 150 }, { "epoch": 0.96, "learning_rate": 3.1744481019569885e-05, "loss": 0.2063, "step": 151 }, { "epoch": 0.97, "learning_rate": 3.163193097844949e-05, "loss": 0.2654, "step": 152 }, { "epoch": 0.98, "learning_rate": 3.1518821497455326e-05, "loss": 0.2071, "step": 153 }, { "epoch": 0.98, "learning_rate": 3.1405158016608806e-05, "loss": 0.2035, "step": 154 }, { "epoch": 0.99, "learning_rate": 3.129094600257611e-05, "loss": 0.2398, "step": 155 }, { "epoch": 1.0, "learning_rate": 3.1176190948405194e-05, "loss": 0.1623, "step": 156 }, { "epoch": 1.0, "learning_rate": 3.106089837326161e-05, "loss": 0.2313, "step": 157 }, { "epoch": 1.01, "learning_rate": 3.094507382216312e-05, "loss": 0.2687, "step": 158 }, { "epoch": 1.02, "learning_rate": 3.082872286571295e-05, "loss": 0.246, "step": 159 }, { "epoch": 1.02, "learning_rate": 3.0711851099831885e-05, "loss": 0.191, "step": 160 }, { "epoch": 1.03, "learning_rate": 3.059446414548915e-05, "loss": 0.1543, "step": 161 }, { "epoch": 1.04, "learning_rate": 3.047656764843203e-05, "loss": 0.1978, "step": 162 }, { "epoch": 1.04, "learning_rate": 3.0358167278914387e-05, "loss": 0.2698, "step": 163 }, { "epoch": 1.05, "learning_rate": 3.023926873142391e-05, "loss": 0.2351, "step": 164 }, { "epoch": 1.05, "learning_rate": 3.011987772440825e-05, "loss": 0.1925, "step": 165 }, { "epoch": 1.06, "learning_rate": 3.0000000000000004e-05, "loss": 0.2246, "step": 166 }, { "epoch": 1.07, "learning_rate": 2.9879641323740505e-05, "loss": 0.2093, "step": 167 }, { "epoch": 1.07, "learning_rate": 2.9758807484302566e-05, "loss": 0.2332, "step": 168 }, { "epoch": 1.08, "learning_rate": 2.963750429321208e-05, "loss": 0.1983, "step": 169 }, { "epoch": 1.09, "learning_rate": 2.9515737584568463e-05, "loss": 0.2707, "step": 170 }, { "epoch": 1.09, "learning_rate": 2.939351321476412e-05, "loss": 0.2485, "step": 171 }, { "epoch": 1.1, "learning_rate": 2.927083706220274e-05, "loss": 0.1947, "step": 172 }, { "epoch": 1.11, "learning_rate": 2.9147715027016593e-05, "loss": 0.1949, "step": 173 }, { "epoch": 1.11, "learning_rate": 2.902415303078275e-05, "loss": 0.1907, "step": 174 }, { "epoch": 1.12, "learning_rate": 2.8900157016238296e-05, "loss": 0.1801, "step": 175 }, { "epoch": 1.12, "learning_rate": 2.8775732946994508e-05, "loss": 0.2489, "step": 176 }, { "epoch": 1.13, "learning_rate": 2.8650886807250024e-05, "loss": 0.1969, "step": 177 }, { "epoch": 1.14, "learning_rate": 2.8525624601503055e-05, "loss": 0.1724, "step": 178 }, { "epoch": 1.14, "learning_rate": 2.8399952354262566e-05, "loss": 0.1797, "step": 179 }, { "epoch": 1.15, "learning_rate": 2.8273876109758568e-05, "loss": 0.1341, "step": 180 }, { "epoch": 1.16, "learning_rate": 2.8147401931651363e-05, "loss": 0.2223, "step": 181 }, { "epoch": 1.16, "learning_rate": 2.802053590273997e-05, "loss": 0.2221, "step": 182 }, { "epoch": 1.17, "learning_rate": 2.789328412466953e-05, "loss": 0.2019, "step": 183 }, { "epoch": 1.18, "learning_rate": 2.7765652717637873e-05, "loss": 0.1336, "step": 184 }, { "epoch": 1.18, "learning_rate": 2.763764782010116e-05, "loss": 0.2361, "step": 185 }, { "epoch": 1.19, "learning_rate": 2.7509275588478606e-05, "loss": 0.2023, "step": 186 }, { "epoch": 1.19, "learning_rate": 2.738054219685647e-05, "loss": 0.2428, "step": 187 }, { "epoch": 1.2, "learning_rate": 2.725145383669106e-05, "loss": 0.2394, "step": 188 }, { "epoch": 1.21, "learning_rate": 2.712201671651094e-05, "loss": 0.1464, "step": 189 }, { "epoch": 1.21, "learning_rate": 2.699223706161839e-05, "loss": 0.2485, "step": 190 }, { "epoch": 1.22, "learning_rate": 2.6862121113789917e-05, "loss": 0.3039, "step": 191 }, { "epoch": 1.23, "learning_rate": 2.673167513097613e-05, "loss": 0.1387, "step": 192 }, { "epoch": 1.23, "learning_rate": 2.6600905387000716e-05, "loss": 0.2657, "step": 193 }, { "epoch": 1.24, "learning_rate": 2.6469818171258723e-05, "loss": 0.176, "step": 194 }, { "epoch": 1.25, "learning_rate": 2.633841978841406e-05, "loss": 0.1876, "step": 195 }, { "epoch": 1.25, "learning_rate": 2.620671655809627e-05, "loss": 0.1726, "step": 196 }, { "epoch": 1.26, "learning_rate": 2.60747148145966e-05, "loss": 0.2352, "step": 197 }, { "epoch": 1.27, "learning_rate": 2.594242090656335e-05, "loss": 0.254, "step": 198 }, { "epoch": 1.27, "learning_rate": 2.5809841196696504e-05, "loss": 0.2125, "step": 199 }, { "epoch": 1.28, "learning_rate": 2.5676982061441763e-05, "loss": 0.2049, "step": 200 }, { "epoch": 1.28, "learning_rate": 2.5543849890683813e-05, "loss": 0.1865, "step": 201 }, { "epoch": 1.29, "learning_rate": 2.5410451087439075e-05, "loss": 0.2203, "step": 202 }, { "epoch": 1.3, "learning_rate": 2.5276792067547672e-05, "loss": 0.2002, "step": 203 }, { "epoch": 1.3, "learning_rate": 2.514287925936492e-05, "loss": 0.1524, "step": 204 }, { "epoch": 1.31, "learning_rate": 2.500871910345212e-05, "loss": 0.2059, "step": 205 }, { "epoch": 1.32, "learning_rate": 2.4874318052266794e-05, "loss": 0.1885, "step": 206 }, { "epoch": 1.32, "learning_rate": 2.473968256985238e-05, "loss": 0.1837, "step": 207 }, { "epoch": 1.33, "learning_rate": 2.460481913152734e-05, "loss": 0.1938, "step": 208 }, { "epoch": 1.34, "learning_rate": 2.4469734223573703e-05, "loss": 0.1497, "step": 209 }, { "epoch": 1.34, "learning_rate": 2.4334434342925133e-05, "loss": 0.2644, "step": 210 }, { "epoch": 1.35, "learning_rate": 2.4198925996854422e-05, "loss": 0.1937, "step": 211 }, { "epoch": 1.35, "learning_rate": 2.4063215702660564e-05, "loss": 0.2791, "step": 212 }, { "epoch": 1.36, "learning_rate": 2.392730998735529e-05, "loss": 0.24, "step": 213 }, { "epoch": 1.37, "learning_rate": 2.379121538734912e-05, "loss": 0.1574, "step": 214 }, { "epoch": 1.37, "learning_rate": 2.3654938448137062e-05, "loss": 0.1896, "step": 215 }, { "epoch": 1.38, "learning_rate": 2.351848572398371e-05, "loss": 0.1865, "step": 216 }, { "epoch": 1.39, "learning_rate": 2.338186377760811e-05, "loss": 0.1336, "step": 217 }, { "epoch": 1.39, "learning_rate": 2.3245079179868054e-05, "loss": 0.2047, "step": 218 }, { "epoch": 1.4, "learning_rate": 2.31081385094441e-05, "loss": 0.31, "step": 219 }, { "epoch": 1.41, "learning_rate": 2.297104835252314e-05, "loss": 0.2096, "step": 220 }, { "epoch": 1.41, "learning_rate": 2.283381530248165e-05, "loss": 0.2593, "step": 221 }, { "epoch": 1.42, "learning_rate": 2.2696445959568577e-05, "loss": 0.1585, "step": 222 }, { "epoch": 1.42, "learning_rate": 2.2558946930587907e-05, "loss": 0.1781, "step": 223 }, { "epoch": 1.43, "learning_rate": 2.2421324828580877e-05, "loss": 0.1462, "step": 224 }, { "epoch": 1.44, "learning_rate": 2.2283586272507975e-05, "loss": 0.1687, "step": 225 }, { "epoch": 1.44, "learning_rate": 2.214573788693054e-05, "loss": 0.1716, "step": 226 }, { "epoch": 1.45, "learning_rate": 2.2007786301692205e-05, "loss": 0.2694, "step": 227 }, { "epoch": 1.46, "learning_rate": 2.18697381516e-05, "loss": 0.1615, "step": 228 }, { "epoch": 1.46, "learning_rate": 2.1731600076105264e-05, "loss": 0.1891, "step": 229 }, { "epoch": 1.47, "learning_rate": 2.159337871898431e-05, "loss": 0.1321, "step": 230 }, { "epoch": 1.48, "learning_rate": 2.145508072801888e-05, "loss": 0.1706, "step": 231 }, { "epoch": 1.48, "learning_rate": 2.131671275467647e-05, "loss": 0.2391, "step": 232 }, { "epoch": 1.49, "learning_rate": 2.1178281453790358e-05, "loss": 0.1462, "step": 233 }, { "epoch": 1.5, "learning_rate": 2.1039793483239607e-05, "loss": 0.1928, "step": 234 }, { "epoch": 1.5, "learning_rate": 2.090125550362879e-05, "loss": 0.3621, "step": 235 }, { "epoch": 1.51, "learning_rate": 2.0762674177967676e-05, "loss": 0.1987, "step": 236 }, { "epoch": 1.51, "learning_rate": 2.0624056171350785e-05, "loss": 0.2071, "step": 237 }, { "epoch": 1.52, "learning_rate": 2.0485408150636804e-05, "loss": 0.1817, "step": 238 }, { "epoch": 1.53, "learning_rate": 2.0346736784127955e-05, "loss": 0.1615, "step": 239 }, { "epoch": 1.53, "learning_rate": 2.0208048741249288e-05, "loss": 0.1521, "step": 240 }, { "epoch": 1.54, "learning_rate": 2.006935069222789e-05, "loss": 0.156, "step": 241 }, { "epoch": 1.55, "learning_rate": 1.9930649307772114e-05, "loss": 0.2438, "step": 242 }, { "epoch": 1.55, "learning_rate": 1.979195125875072e-05, "loss": 0.2432, "step": 243 }, { "epoch": 1.56, "learning_rate": 1.9653263215872048e-05, "loss": 0.1292, "step": 244 }, { "epoch": 1.57, "learning_rate": 1.9514591849363203e-05, "loss": 0.2293, "step": 245 }, { "epoch": 1.57, "learning_rate": 1.9375943828649215e-05, "loss": 0.1932, "step": 246 }, { "epoch": 1.58, "learning_rate": 1.923732582203233e-05, "loss": 0.1508, "step": 247 }, { "epoch": 1.58, "learning_rate": 1.909874449637122e-05, "loss": 0.2317, "step": 248 }, { "epoch": 1.59, "learning_rate": 1.8960206516760396e-05, "loss": 0.218, "step": 249 }, { "epoch": 1.6, "learning_rate": 1.8821718546209646e-05, "loss": 0.1777, "step": 250 }, { "epoch": 1.6, "learning_rate": 1.8683287245323536e-05, "loss": 0.1577, "step": 251 }, { "epoch": 1.61, "learning_rate": 1.8544919271981125e-05, "loss": 0.2977, "step": 252 }, { "epoch": 1.62, "learning_rate": 1.84066212810157e-05, "loss": 0.2277, "step": 253 }, { "epoch": 1.62, "learning_rate": 1.8268399923894736e-05, "loss": 0.1574, "step": 254 }, { "epoch": 1.63, "learning_rate": 1.8130261848399996e-05, "loss": 0.3187, "step": 255 }, { "epoch": 1.64, "learning_rate": 1.7992213698307795e-05, "loss": 0.1758, "step": 256 }, { "epoch": 1.64, "learning_rate": 1.7854262113069468e-05, "loss": 0.2118, "step": 257 }, { "epoch": 1.65, "learning_rate": 1.7716413727492035e-05, "loss": 0.1539, "step": 258 }, { "epoch": 1.65, "learning_rate": 1.757867517141913e-05, "loss": 0.1885, "step": 259 }, { "epoch": 1.66, "learning_rate": 1.7441053069412103e-05, "loss": 0.1915, "step": 260 }, { "epoch": 1.67, "learning_rate": 1.7303554040431426e-05, "loss": 0.1716, "step": 261 }, { "epoch": 1.67, "learning_rate": 1.7166184697518352e-05, "loss": 0.2217, "step": 262 }, { "epoch": 1.68, "learning_rate": 1.7028951647476862e-05, "loss": 0.3266, "step": 263 }, { "epoch": 1.69, "learning_rate": 1.6891861490555906e-05, "loss": 0.2631, "step": 264 }, { "epoch": 1.69, "learning_rate": 1.6754920820131946e-05, "loss": 0.2481, "step": 265 }, { "epoch": 1.7, "learning_rate": 1.6618136222391893e-05, "loss": 0.2521, "step": 266 }, { "epoch": 1.71, "learning_rate": 1.6481514276016297e-05, "loss": 0.2022, "step": 267 }, { "epoch": 1.71, "learning_rate": 1.634506155186295e-05, "loss": 0.2055, "step": 268 }, { "epoch": 1.72, "learning_rate": 1.6208784612650883e-05, "loss": 0.2257, "step": 269 }, { "epoch": 1.73, "learning_rate": 1.6072690012644717e-05, "loss": 0.2634, "step": 270 }, { "epoch": 1.73, "learning_rate": 1.593678429733944e-05, "loss": 0.2195, "step": 271 }, { "epoch": 1.74, "learning_rate": 1.5801074003145585e-05, "loss": 0.2063, "step": 272 }, { "epoch": 1.74, "learning_rate": 1.5665565657074874e-05, "loss": 0.2601, "step": 273 }, { "epoch": 1.75, "learning_rate": 1.5530265776426294e-05, "loss": 0.1633, "step": 274 }, { "epoch": 1.76, "learning_rate": 1.5395180868472662e-05, "loss": 0.2333, "step": 275 }, { "epoch": 1.76, "learning_rate": 1.5260317430147627e-05, "loss": 0.149, "step": 276 }, { "epoch": 1.77, "learning_rate": 1.512568194773322e-05, "loss": 0.2587, "step": 277 }, { "epoch": 1.78, "learning_rate": 1.4991280896547893e-05, "loss": 0.1951, "step": 278 }, { "epoch": 1.78, "learning_rate": 1.4857120740635084e-05, "loss": 0.2757, "step": 279 }, { "epoch": 1.79, "learning_rate": 1.472320793245233e-05, "loss": 0.2321, "step": 280 }, { "epoch": 1.8, "learning_rate": 1.4589548912560932e-05, "loss": 0.2177, "step": 281 }, { "epoch": 1.8, "learning_rate": 1.4456150109316192e-05, "loss": 0.1363, "step": 282 }, { "epoch": 1.81, "learning_rate": 1.4323017938558245e-05, "loss": 0.1428, "step": 283 }, { "epoch": 1.81, "learning_rate": 1.4190158803303498e-05, "loss": 0.2112, "step": 284 }, { "epoch": 1.82, "learning_rate": 1.4057579093436653e-05, "loss": 0.2269, "step": 285 }, { "epoch": 1.83, "learning_rate": 1.3925285185403406e-05, "loss": 0.1712, "step": 286 }, { "epoch": 1.83, "learning_rate": 1.3793283441903737e-05, "loss": 0.1182, "step": 287 }, { "epoch": 1.84, "learning_rate": 1.3661580211585947e-05, "loss": 0.1667, "step": 288 }, { "epoch": 1.85, "learning_rate": 1.3530181828741285e-05, "loss": 0.2887, "step": 289 }, { "epoch": 1.85, "learning_rate": 1.3399094612999291e-05, "loss": 0.1624, "step": 290 }, { "epoch": 1.86, "learning_rate": 1.3268324869023878e-05, "loss": 0.2356, "step": 291 }, { "epoch": 1.87, "learning_rate": 1.313787888621009e-05, "loss": 0.2557, "step": 292 }, { "epoch": 1.87, "learning_rate": 1.3007762938381619e-05, "loss": 0.2203, "step": 293 }, { "epoch": 1.88, "learning_rate": 1.2877983283489062e-05, "loss": 0.3142, "step": 294 }, { "epoch": 1.88, "learning_rate": 1.2748546163308947e-05, "loss": 0.16, "step": 295 }, { "epoch": 1.89, "learning_rate": 1.261945780314354e-05, "loss": 0.1647, "step": 296 }, { "epoch": 1.9, "learning_rate": 1.2490724411521406e-05, "loss": 0.2373, "step": 297 }, { "epoch": 1.9, "learning_rate": 1.2362352179898855e-05, "loss": 0.1701, "step": 298 }, { "epoch": 1.91, "learning_rate": 1.2234347282362129e-05, "loss": 0.1522, "step": 299 }, { "epoch": 1.92, "learning_rate": 1.2106715875330475e-05, "loss": 0.1613, "step": 300 }, { "epoch": 1.92, "learning_rate": 1.1979464097260039e-05, "loss": 0.1883, "step": 301 }, { "epoch": 1.93, "learning_rate": 1.1852598068348642e-05, "loss": 0.2338, "step": 302 }, { "epoch": 1.94, "learning_rate": 1.1726123890241439e-05, "loss": 0.2248, "step": 303 }, { "epoch": 1.94, "learning_rate": 1.1600047645737433e-05, "loss": 0.1811, "step": 304 }, { "epoch": 1.95, "learning_rate": 1.1474375398496948e-05, "loss": 0.1893, "step": 305 }, { "epoch": 1.96, "learning_rate": 1.1349113192749986e-05, "loss": 0.203, "step": 306 }, { "epoch": 1.96, "learning_rate": 1.1224267053005504e-05, "loss": 0.1425, "step": 307 }, { "epoch": 1.97, "learning_rate": 1.1099842983761712e-05, "loss": 0.1337, "step": 308 }, { "epoch": 1.97, "learning_rate": 1.0975846969217258e-05, "loss": 0.1684, "step": 309 }, { "epoch": 1.98, "learning_rate": 1.0852284972983415e-05, "loss": 0.165, "step": 310 }, { "epoch": 1.99, "learning_rate": 1.0729162937797257e-05, "loss": 0.2462, "step": 311 }, { "epoch": 1.99, "learning_rate": 1.0606486785235879e-05, "loss": 0.2298, "step": 312 }, { "epoch": 2.0, "learning_rate": 1.0484262415431536e-05, "loss": 0.2047, "step": 313 }, { "epoch": 2.01, "learning_rate": 1.0362495706787923e-05, "loss": 0.2365, "step": 314 }, { "epoch": 2.01, "learning_rate": 1.0241192515697432e-05, "loss": 0.1738, "step": 315 }, { "epoch": 2.02, "learning_rate": 1.0120358676259508e-05, "loss": 0.2467, "step": 316 }, { "epoch": 2.03, "learning_rate": 1.0000000000000006e-05, "loss": 0.1459, "step": 317 }, { "epoch": 2.03, "learning_rate": 9.880122275591752e-06, "loss": 0.1594, "step": 318 }, { "epoch": 2.04, "learning_rate": 9.760731268576095e-06, "loss": 0.1145, "step": 319 }, { "epoch": 2.04, "learning_rate": 9.64183272108562e-06, "loss": 0.1485, "step": 320 }, { "epoch": 2.05, "learning_rate": 9.523432351567979e-06, "loss": 0.0932, "step": 321 }, { "epoch": 2.06, "learning_rate": 9.405535854510863e-06, "loss": 0.1819, "step": 322 }, { "epoch": 2.06, "learning_rate": 9.288148900168122e-06, "loss": 0.1534, "step": 323 }, { "epoch": 2.07, "learning_rate": 9.171277134287057e-06, "loss": 0.1875, "step": 324 }, { "epoch": 2.08, "learning_rate": 9.054926177836878e-06, "loss": 0.1416, "step": 325 }, { "epoch": 2.08, "learning_rate": 8.939101626738395e-06, "loss": 0.1717, "step": 326 }, { "epoch": 2.09, "learning_rate": 8.823809051594816e-06, "loss": 0.1975, "step": 327 }, { "epoch": 2.1, "learning_rate": 8.70905399742389e-06, "loss": 0.1706, "step": 328 }, { "epoch": 2.1, "learning_rate": 8.594841983391196e-06, "loss": 0.1869, "step": 329 }, { "epoch": 2.11, "learning_rate": 8.481178502544684e-06, "loss": 0.2313, "step": 330 }, { "epoch": 2.12, "learning_rate": 8.368069021550516e-06, "loss": 0.1843, "step": 331 }, { "epoch": 2.12, "learning_rate": 8.255518980430115e-06, "loss": 0.2359, "step": 332 }, { "epoch": 2.13, "learning_rate": 8.143533792298545e-06, "loss": 0.1243, "step": 333 }, { "epoch": 2.13, "learning_rate": 8.032118843104164e-06, "loss": 0.1325, "step": 334 }, { "epoch": 2.14, "learning_rate": 7.921279491369575e-06, "loss": 0.099, "step": 335 }, { "epoch": 2.15, "learning_rate": 7.811021067933919e-06, "loss": 0.2295, "step": 336 }, { "epoch": 2.15, "learning_rate": 7.701348875696486e-06, "loss": 0.1494, "step": 337 }, { "epoch": 2.16, "learning_rate": 7.59226818936166e-06, "loss": 0.1063, "step": 338 }, { "epoch": 2.17, "learning_rate": 7.483784255185249e-06, "loss": 0.1389, "step": 339 }, { "epoch": 2.17, "learning_rate": 7.375902290722146e-06, "loss": 0.176, "step": 340 }, { "epoch": 2.18, "learning_rate": 7.268627484575406e-06, "loss": 0.142, "step": 341 }, { "epoch": 2.19, "learning_rate": 7.161964996146689e-06, "loss": 0.2029, "step": 342 }, { "epoch": 2.19, "learning_rate": 7.055919955388122e-06, "loss": 0.1416, "step": 343 }, { "epoch": 2.2, "learning_rate": 6.95049746255557e-06, "loss": 0.1515, "step": 344 }, { "epoch": 2.2, "learning_rate": 6.845702587963352e-06, "loss": 0.1573, "step": 345 }, { "epoch": 2.21, "learning_rate": 6.741540371740347e-06, "loss": 0.2249, "step": 346 }, { "epoch": 2.22, "learning_rate": 6.6380158235876335e-06, "loss": 0.2207, "step": 347 }, { "epoch": 2.22, "learning_rate": 6.535133922537513e-06, "loss": 0.1194, "step": 348 }, { "epoch": 2.23, "learning_rate": 6.4328996167140786e-06, "loss": 0.1655, "step": 349 }, { "epoch": 2.24, "learning_rate": 6.331317823095184e-06, "loss": 0.1652, "step": 350 }, { "epoch": 2.24, "learning_rate": 6.230393427276e-06, "loss": 0.2378, "step": 351 }, { "epoch": 2.25, "learning_rate": 6.130131283234031e-06, "loss": 0.229, "step": 352 }, { "epoch": 2.26, "learning_rate": 6.0305362130956504e-06, "loss": 0.1397, "step": 353 }, { "epoch": 2.26, "learning_rate": 5.931613006904196e-06, "loss": 0.1139, "step": 354 }, { "epoch": 2.27, "learning_rate": 5.8333664223895906e-06, "loss": 0.1611, "step": 355 }, { "epoch": 2.27, "learning_rate": 5.735801184739489e-06, "loss": 0.1784, "step": 356 }, { "epoch": 2.28, "learning_rate": 5.638921986372064e-06, "loss": 0.2004, "step": 357 }, { "epoch": 2.29, "learning_rate": 5.542733486710299e-06, "loss": 0.1553, "step": 358 }, { "epoch": 2.29, "learning_rate": 5.447240311957891e-06, "loss": 0.1787, "step": 359 }, { "epoch": 2.3, "learning_rate": 5.352447054876755e-06, "loss": 0.1856, "step": 360 }, { "epoch": 2.31, "learning_rate": 5.258358274566142e-06, "loss": 0.2069, "step": 361 }, { "epoch": 2.31, "learning_rate": 5.164978496243354e-06, "loss": 0.2023, "step": 362 }, { "epoch": 2.32, "learning_rate": 5.072312211026125e-06, "loss": 0.1453, "step": 363 }, { "epoch": 2.33, "learning_rate": 4.980363875716592e-06, "loss": 0.1377, "step": 364 }, { "epoch": 2.33, "learning_rate": 4.889137912586972e-06, "loss": 0.1887, "step": 365 }, { "epoch": 2.34, "learning_rate": 4.7986387091668365e-06, "loss": 0.0932, "step": 366 }, { "epoch": 2.35, "learning_rate": 4.708870618032133e-06, "loss": 0.1506, "step": 367 }, { "epoch": 2.35, "learning_rate": 4.619837956595825e-06, "loss": 0.1293, "step": 368 }, { "epoch": 2.36, "learning_rate": 4.531545006900244e-06, "loss": 0.1505, "step": 369 }, { "epoch": 2.36, "learning_rate": 4.443996015411151e-06, "loss": 0.1176, "step": 370 }, { "epoch": 2.37, "learning_rate": 4.357195192813504e-06, "loss": 0.2187, "step": 371 }, { "epoch": 2.38, "learning_rate": 4.271146713808927e-06, "loss": 0.1538, "step": 372 }, { "epoch": 2.38, "learning_rate": 4.185854716914952e-06, "loss": 0.2165, "step": 373 }, { "epoch": 2.39, "learning_rate": 4.1013233042659606e-06, "loss": 0.1662, "step": 374 }, { "epoch": 2.4, "learning_rate": 4.017556541415888e-06, "loss": 0.143, "step": 375 }, { "epoch": 2.4, "learning_rate": 3.9345584571427055e-06, "loss": 0.1883, "step": 376 }, { "epoch": 2.41, "learning_rate": 3.852333043254639e-06, "loss": 0.1624, "step": 377 }, { "epoch": 2.42, "learning_rate": 3.7708842543981928e-06, "loss": 0.1824, "step": 378 }, { "epoch": 2.42, "learning_rate": 3.690216007867944e-06, "loss": 0.1933, "step": 379 }, { "epoch": 2.43, "learning_rate": 3.6103321834181437e-06, "loss": 0.1557, "step": 380 }, { "epoch": 2.43, "learning_rate": 3.5312366230761154e-06, "loss": 0.143, "step": 381 }, { "epoch": 2.44, "learning_rate": 3.452933130957481e-06, "loss": 0.1588, "step": 382 }, { "epoch": 2.45, "learning_rate": 3.375425473083185e-06, "loss": 0.1964, "step": 383 }, { "epoch": 2.45, "learning_rate": 3.2987173771983816e-06, "loss": 0.1688, "step": 384 }, { "epoch": 2.46, "learning_rate": 3.2228125325931514e-06, "loss": 0.1837, "step": 385 }, { "epoch": 2.47, "learning_rate": 3.1477145899250326e-06, "loss": 0.1463, "step": 386 }, { "epoch": 2.47, "learning_rate": 3.073427161043492e-06, "loss": 0.2133, "step": 387 }, { "epoch": 2.48, "learning_rate": 2.9999538188161705e-06, "loss": 0.1345, "step": 388 }, { "epoch": 2.49, "learning_rate": 2.927298096957063e-06, "loss": 0.2226, "step": 389 }, { "epoch": 2.49, "learning_rate": 2.8554634898565668e-06, "loss": 0.1664, "step": 390 }, { "epoch": 2.5, "learning_rate": 2.784453452413405e-06, "loss": 0.1388, "step": 391 }, { "epoch": 2.5, "learning_rate": 2.714271399868473e-06, "loss": 0.2065, "step": 392 }, { "epoch": 2.51, "learning_rate": 2.6449207076405857e-06, "loss": 0.1353, "step": 393 }, { "epoch": 2.52, "learning_rate": 2.57640471116412e-06, "loss": 0.1601, "step": 394 }, { "epoch": 2.52, "learning_rate": 2.508726705728617e-06, "loss": 0.2478, "step": 395 }, { "epoch": 2.53, "learning_rate": 2.441889946320266e-06, "loss": 0.1515, "step": 396 }, { "epoch": 2.54, "learning_rate": 2.3758976474653904e-06, "loss": 0.11, "step": 397 }, { "epoch": 2.54, "learning_rate": 2.310752983075819e-06, "loss": 0.179, "step": 398 }, { "epoch": 2.55, "learning_rate": 2.2464590862962443e-06, "loss": 0.1746, "step": 399 }, { "epoch": 2.56, "learning_rate": 2.1830190493535385e-06, "loss": 0.1632, "step": 400 }, { "epoch": 2.56, "learning_rate": 2.1204359234080196e-06, "loss": 0.2318, "step": 401 }, { "epoch": 2.57, "learning_rate": 2.058712718406719e-06, "loss": 0.2058, "step": 402 }, { "epoch": 2.58, "learning_rate": 1.9978524029386026e-06, "loss": 0.1229, "step": 403 }, { "epoch": 2.58, "learning_rate": 1.937857904091818e-06, "loss": 0.1819, "step": 404 }, { "epoch": 2.59, "learning_rate": 1.8787321073128817e-06, "loss": 0.1619, "step": 405 }, { "epoch": 2.59, "learning_rate": 1.8204778562679437e-06, "loss": 0.1799, "step": 406 }, { "epoch": 2.6, "learning_rate": 1.7630979527059877e-06, "loss": 0.1823, "step": 407 }, { "epoch": 2.61, "learning_rate": 1.7065951563241022e-06, "loss": 0.1946, "step": 408 }, { "epoch": 2.61, "learning_rate": 1.6509721846347382e-06, "loss": 0.1903, "step": 409 }, { "epoch": 2.62, "learning_rate": 1.5962317128350147e-06, "loss": 0.1675, "step": 410 }, { "epoch": 2.63, "learning_rate": 1.5423763736780583e-06, "loss": 0.1545, "step": 411 }, { "epoch": 2.63, "learning_rate": 1.4894087573463734e-06, "loss": 0.1292, "step": 412 }, { "epoch": 2.64, "learning_rate": 1.437331411327274e-06, "loss": 0.1341, "step": 413 }, { "epoch": 2.65, "learning_rate": 1.3861468402903634e-06, "loss": 0.1419, "step": 414 }, { "epoch": 2.65, "learning_rate": 1.3358575059670532e-06, "loss": 0.2009, "step": 415 }, { "epoch": 2.66, "learning_rate": 1.2864658270321905e-06, "loss": 0.1272, "step": 416 }, { "epoch": 2.66, "learning_rate": 1.2379741789877175e-06, "loss": 0.1821, "step": 417 }, { "epoch": 2.67, "learning_rate": 1.1903848940484241e-06, "loss": 0.1756, "step": 418 }, { "epoch": 2.68, "learning_rate": 1.1437002610297787e-06, "loss": 0.1342, "step": 419 }, { "epoch": 2.68, "learning_rate": 1.097922525237849e-06, "loss": 0.2193, "step": 420 }, { "epoch": 2.69, "learning_rate": 1.0530538883613129e-06, "loss": 0.1873, "step": 421 }, { "epoch": 2.7, "learning_rate": 1.0090965083655657e-06, "loss": 0.2289, "step": 422 }, { "epoch": 2.7, "learning_rate": 9.660524993889386e-07, "loss": 0.1694, "step": 423 }, { "epoch": 2.71, "learning_rate": 9.239239316410109e-07, "loss": 0.1551, "step": 424 }, { "epoch": 2.72, "learning_rate": 8.827128313030453e-07, "loss": 0.2198, "step": 425 }, { "epoch": 2.72, "learning_rate": 8.42421180430546e-07, "loss": 0.1434, "step": 426 }, { "epoch": 2.73, "learning_rate": 8.03050916857917e-07, "loss": 0.2212, "step": 427 }, { "epoch": 2.73, "learning_rate": 7.646039341052747e-07, "loss": 0.214, "step": 428 }, { "epoch": 2.74, "learning_rate": 7.270820812873714e-07, "loss": 0.1959, "step": 429 }, { "epoch": 2.75, "learning_rate": 6.904871630246646e-07, "loss": 0.1571, "step": 430 }, { "epoch": 2.75, "learning_rate": 6.548209393565241e-07, "loss": 0.1282, "step": 431 }, { "epoch": 2.76, "learning_rate": 6.200851256565799e-07, "loss": 0.0944, "step": 432 }, { "epoch": 2.77, "learning_rate": 5.862813925502209e-07, "loss": 0.1417, "step": 433 }, { "epoch": 2.77, "learning_rate": 5.53411365834251e-07, "loss": 0.2177, "step": 434 }, { "epoch": 2.78, "learning_rate": 5.214766263986848e-07, "loss": 0.1399, "step": 435 }, { "epoch": 2.79, "learning_rate": 4.904787101507324e-07, "loss": 0.1264, "step": 436 }, { "epoch": 2.79, "learning_rate": 4.604191079409126e-07, "loss": 0.1468, "step": 437 }, { "epoch": 2.8, "learning_rate": 4.3129926549136057e-07, "loss": 0.1082, "step": 438 }, { "epoch": 2.81, "learning_rate": 4.031205833262863e-07, "loss": 0.1996, "step": 439 }, { "epoch": 2.81, "learning_rate": 3.7588441670462827e-07, "loss": 0.159, "step": 440 }, { "epoch": 2.82, "learning_rate": 3.4959207555485873e-07, "loss": 0.1389, "step": 441 }, { "epoch": 2.82, "learning_rate": 3.242448244119967e-07, "loss": 0.1808, "step": 442 }, { "epoch": 2.83, "learning_rate": 2.99843882356774e-07, "loss": 0.1019, "step": 443 }, { "epoch": 2.84, "learning_rate": 2.7639042295702245e-07, "loss": 0.1137, "step": 444 }, { "epoch": 2.84, "learning_rate": 2.5388557421120564e-07, "loss": 0.2141, "step": 445 }, { "epoch": 2.85, "learning_rate": 2.3233041849419547e-07, "loss": 0.1763, "step": 446 }, { "epoch": 2.86, "learning_rate": 2.1172599250519398e-07, "loss": 0.1588, "step": 447 }, { "epoch": 2.86, "learning_rate": 1.9207328721788653e-07, "loss": 0.2067, "step": 448 }, { "epoch": 2.87, "learning_rate": 1.7337324783276878e-07, "loss": 0.1241, "step": 449 }, { "epoch": 2.88, "learning_rate": 1.5562677373169855e-07, "loss": 0.1489, "step": 450 }, { "epoch": 2.88, "learning_rate": 1.388347184346328e-07, "loss": 0.136, "step": 451 }, { "epoch": 2.89, "learning_rate": 1.2299788955857817e-07, "loss": 0.1723, "step": 452 }, { "epoch": 2.89, "learning_rate": 1.0811704877875528e-07, "loss": 0.227, "step": 453 }, { "epoch": 2.9, "learning_rate": 9.419291179195267e-08, "loss": 0.1729, "step": 454 }, { "epoch": 2.91, "learning_rate": 8.122614828211861e-08, "loss": 0.1533, "step": 455 }, { "epoch": 2.91, "learning_rate": 6.921738188814254e-08, "loss": 0.1285, "step": 456 }, { "epoch": 2.92, "learning_rate": 5.816719017386785e-08, "loss": 0.2263, "step": 457 }, { "epoch": 2.93, "learning_rate": 4.807610460030976e-08, "loss": 0.1303, "step": 458 }, { "epoch": 2.93, "learning_rate": 3.894461050010012e-08, "loss": 0.1302, "step": 459 }, { "epoch": 2.94, "learning_rate": 3.077314705413503e-08, "loss": 0.1507, "step": 460 }, { "epoch": 2.95, "learning_rate": 2.356210727046504e-08, "loss": 0.2156, "step": 461 }, { "epoch": 2.95, "learning_rate": 1.7311837965379164e-08, "loss": 0.2267, "step": 462 }, { "epoch": 2.96, "learning_rate": 1.202263974674045e-08, "loss": 0.2884, "step": 463 }, { "epoch": 2.96, "learning_rate": 7.694766999513104e-09, "loss": 0.163, "step": 464 }, { "epoch": 2.97, "learning_rate": 4.328427873541152e-09, "loss": 0.1972, "step": 465 }, { "epoch": 2.98, "learning_rate": 1.9237842735275737e-09, "loss": 0.1847, "step": 466 }, { "epoch": 2.98, "learning_rate": 4.809518512494116e-10, "loss": 0.2029, "step": 467 }, { "epoch": 2.99, "learning_rate": 0.0, "loss": 0.1615, "step": 468 }, { "epoch": 2.99, "step": 468, "total_flos": 4.256702100026721e+21, "train_loss": 0.3061195729762061, "train_runtime": 5033.3609, "train_samples_per_second": 5.958, "train_steps_per_second": 0.093 } ], "logging_steps": 1.0, "max_steps": 468, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "total_flos": 4.256702100026721e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }