{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 242, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.5e-06, "loss": 2.2266, "step": 1 }, { "epoch": 0.01, "learning_rate": 5e-06, "loss": 2.2949, "step": 2 }, { "epoch": 0.01, "learning_rate": 7.500000000000001e-06, "loss": 2.1211, "step": 3 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 1.748, "step": 4 }, { "epoch": 0.02, "learning_rate": 1.25e-05, "loss": 1.4082, "step": 5 }, { "epoch": 0.02, "learning_rate": 1.5000000000000002e-05, "loss": 1.3105, "step": 6 }, { "epoch": 0.03, "learning_rate": 1.7500000000000002e-05, "loss": 1.1182, "step": 7 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 1.1958, "step": 8 }, { "epoch": 0.04, "learning_rate": 1.999909877856721e-05, "loss": 1.0928, "step": 9 }, { "epoch": 0.04, "learning_rate": 1.9996395276708856e-05, "loss": 0.979, "step": 10 }, { "epoch": 0.05, "learning_rate": 1.9991889981715696e-05, "loss": 0.9541, "step": 11 }, { "epoch": 0.05, "learning_rate": 1.9985583705641418e-05, "loss": 0.9253, "step": 12 }, { "epoch": 0.05, "learning_rate": 1.9977477585156252e-05, "loss": 0.936, "step": 13 }, { "epoch": 0.06, "learning_rate": 1.9967573081342103e-05, "loss": 0.8491, "step": 14 }, { "epoch": 0.06, "learning_rate": 1.9955871979429188e-05, "loss": 0.8838, "step": 15 }, { "epoch": 0.07, "learning_rate": 1.9942376388474282e-05, "loss": 0.7446, "step": 16 }, { "epoch": 0.07, "learning_rate": 1.992708874098054e-05, "loss": 0.7017, "step": 17 }, { "epoch": 0.07, "learning_rate": 1.9910011792459086e-05, "loss": 0.8101, "step": 18 }, { "epoch": 0.08, "learning_rate": 1.989114862093232e-05, "loss": 0.7563, "step": 19 }, { "epoch": 0.08, "learning_rate": 1.9870502626379127e-05, "loss": 0.7939, "step": 20 }, { "epoch": 0.09, "learning_rate": 1.9848077530122083e-05, "loss": 0.7065, "step": 21 }, { "epoch": 0.09, "learning_rate": 1.9823877374156647e-05, "loss": 0.7466, "step": 22 }, { "epoch": 0.1, "learning_rate": 1.979790652042268e-05, "loss": 0.7607, "step": 23 }, { "epoch": 0.1, "learning_rate": 1.977016965001817e-05, "loss": 0.689, "step": 24 }, { "epoch": 0.1, "learning_rate": 1.9740671762355548e-05, "loss": 0.8286, "step": 25 }, { "epoch": 0.11, "learning_rate": 1.9709418174260523e-05, "loss": 0.6978, "step": 26 }, { "epoch": 0.11, "learning_rate": 1.9676414519013782e-05, "loss": 0.6284, "step": 27 }, { "epoch": 0.12, "learning_rate": 1.9641666745335626e-05, "loss": 0.7383, "step": 28 }, { "epoch": 0.12, "learning_rate": 1.9605181116313725e-05, "loss": 0.6973, "step": 29 }, { "epoch": 0.12, "learning_rate": 1.9566964208274254e-05, "loss": 0.7092, "step": 30 }, { "epoch": 0.13, "learning_rate": 1.9527022909596537e-05, "loss": 0.7104, "step": 31 }, { "epoch": 0.13, "learning_rate": 1.9485364419471454e-05, "loss": 0.6423, "step": 32 }, { "epoch": 0.14, "learning_rate": 1.9441996246603848e-05, "loss": 0.6541, "step": 33 }, { "epoch": 0.14, "learning_rate": 1.9396926207859085e-05, "loss": 0.6978, "step": 34 }, { "epoch": 0.14, "learning_rate": 1.9350162426854152e-05, "loss": 0.7002, "step": 35 }, { "epoch": 0.15, "learning_rate": 1.9301713332493386e-05, "loss": 0.6743, "step": 36 }, { "epoch": 0.15, "learning_rate": 1.925158765744924e-05, "loss": 0.6343, "step": 37 }, { "epoch": 0.16, "learning_rate": 1.9199794436588244e-05, "loss": 0.6274, "step": 38 }, { "epoch": 0.16, "learning_rate": 1.9146343005342546e-05, "loss": 0.7134, "step": 39 }, { "epoch": 0.17, "learning_rate": 1.909124299802724e-05, "loss": 0.646, "step": 40 }, { "epoch": 0.17, "learning_rate": 1.9034504346103825e-05, "loss": 0.6392, "step": 41 }, { "epoch": 0.17, "learning_rate": 1.8976137276390145e-05, "loss": 0.5862, "step": 42 }, { "epoch": 0.18, "learning_rate": 1.891615230921703e-05, "loss": 0.6609, "step": 43 }, { "epoch": 0.18, "learning_rate": 1.8854560256532098e-05, "loss": 0.6292, "step": 44 }, { "epoch": 0.19, "learning_rate": 1.879137221995095e-05, "loss": 0.5913, "step": 45 }, { "epoch": 0.19, "learning_rate": 1.8726599588756144e-05, "loss": 0.6235, "step": 46 }, { "epoch": 0.19, "learning_rate": 1.866025403784439e-05, "loss": 0.7061, "step": 47 }, { "epoch": 0.2, "learning_rate": 1.859234752562217e-05, "loss": 0.6748, "step": 48 }, { "epoch": 0.2, "learning_rate": 1.8522892291850335e-05, "loss": 0.5935, "step": 49 }, { "epoch": 0.21, "learning_rate": 1.845190085543795e-05, "loss": 0.687, "step": 50 }, { "epoch": 0.21, "learning_rate": 1.8379386012185813e-05, "loss": 0.6694, "step": 51 }, { "epoch": 0.21, "learning_rate": 1.8305360832480118e-05, "loss": 0.5725, "step": 52 }, { "epoch": 0.22, "learning_rate": 1.8229838658936566e-05, "loss": 0.6375, "step": 53 }, { "epoch": 0.22, "learning_rate": 1.8152833103995443e-05, "loss": 0.6128, "step": 54 }, { "epoch": 0.23, "learning_rate": 1.807435804746807e-05, "loss": 0.6118, "step": 55 }, { "epoch": 0.23, "learning_rate": 1.7994427634035016e-05, "loss": 0.6218, "step": 56 }, { "epoch": 0.24, "learning_rate": 1.791305627069662e-05, "loss": 0.6094, "step": 57 }, { "epoch": 0.24, "learning_rate": 1.7830258624176224e-05, "loss": 0.6504, "step": 58 }, { "epoch": 0.24, "learning_rate": 1.7746049618276545e-05, "loss": 0.5811, "step": 59 }, { "epoch": 0.25, "learning_rate": 1.766044443118978e-05, "loss": 0.5911, "step": 60 }, { "epoch": 0.25, "learning_rate": 1.7573458492761802e-05, "loss": 0.5942, "step": 61 }, { "epoch": 0.26, "learning_rate": 1.7485107481711014e-05, "loss": 0.6296, "step": 62 }, { "epoch": 0.26, "learning_rate": 1.7395407322802374e-05, "loss": 0.6377, "step": 63 }, { "epoch": 0.26, "learning_rate": 1.7304374183977032e-05, "loss": 0.5991, "step": 64 }, { "epoch": 0.27, "learning_rate": 1.7212024473438145e-05, "loss": 0.5847, "step": 65 }, { "epoch": 0.27, "learning_rate": 1.7118374836693407e-05, "loss": 0.5864, "step": 66 }, { "epoch": 0.28, "learning_rate": 1.7023442153554776e-05, "loss": 0.5608, "step": 67 }, { "epoch": 0.28, "learning_rate": 1.6927243535095995e-05, "loss": 0.603, "step": 68 }, { "epoch": 0.29, "learning_rate": 1.6829796320568416e-05, "loss": 0.5874, "step": 69 }, { "epoch": 0.29, "learning_rate": 1.67311180742757e-05, "loss": 0.5879, "step": 70 }, { "epoch": 0.29, "learning_rate": 1.6631226582407954e-05, "loss": 0.6262, "step": 71 }, { "epoch": 0.3, "learning_rate": 1.653013984983585e-05, "loss": 0.5554, "step": 72 }, { "epoch": 0.3, "learning_rate": 1.6427876096865394e-05, "loss": 0.5928, "step": 73 }, { "epoch": 0.31, "learning_rate": 1.6324453755953772e-05, "loss": 0.6152, "step": 74 }, { "epoch": 0.31, "learning_rate": 1.621989146838704e-05, "loss": 0.6365, "step": 75 }, { "epoch": 0.31, "learning_rate": 1.6114208080920125e-05, "loss": 0.6069, "step": 76 }, { "epoch": 0.32, "learning_rate": 1.600742264237979e-05, "loss": 0.5688, "step": 77 }, { "epoch": 0.32, "learning_rate": 1.5899554400231233e-05, "loss": 0.7437, "step": 78 }, { "epoch": 0.33, "learning_rate": 1.579062279710879e-05, "loss": 0.6782, "step": 79 }, { "epoch": 0.33, "learning_rate": 1.568064746731156e-05, "loss": 0.689, "step": 80 }, { "epoch": 0.33, "learning_rate": 1.5569648233264395e-05, "loss": 0.6775, "step": 81 }, { "epoch": 0.34, "learning_rate": 1.5457645101945046e-05, "loss": 0.5874, "step": 82 }, { "epoch": 0.34, "learning_rate": 1.5344658261278013e-05, "loss": 0.5696, "step": 83 }, { "epoch": 0.35, "learning_rate": 1.5230708076495777e-05, "loss": 0.5891, "step": 84 }, { "epoch": 0.35, "learning_rate": 1.5115815086468103e-05, "loss": 0.562, "step": 85 }, { "epoch": 0.36, "learning_rate": 1.5000000000000002e-05, "loss": 0.5867, "step": 86 }, { "epoch": 0.36, "learning_rate": 1.4883283692099114e-05, "loss": 0.6345, "step": 87 }, { "epoch": 0.36, "learning_rate": 1.4765687200213079e-05, "loss": 0.6353, "step": 88 }, { "epoch": 0.37, "learning_rate": 1.4647231720437687e-05, "loss": 0.5674, "step": 89 }, { "epoch": 0.37, "learning_rate": 1.4527938603696376e-05, "loss": 0.5137, "step": 90 }, { "epoch": 0.38, "learning_rate": 1.4407829351891858e-05, "loss": 0.6074, "step": 91 }, { "epoch": 0.38, "learning_rate": 1.4286925614030542e-05, "loss": 0.6169, "step": 92 }, { "epoch": 0.38, "learning_rate": 1.4165249182320401e-05, "loss": 0.5901, "step": 93 }, { "epoch": 0.39, "learning_rate": 1.404282198824305e-05, "loss": 0.5825, "step": 94 }, { "epoch": 0.39, "learning_rate": 1.3919666098600753e-05, "loss": 0.6836, "step": 95 }, { "epoch": 0.4, "learning_rate": 1.3795803711538966e-05, "loss": 0.5596, "step": 96 }, { "epoch": 0.4, "learning_rate": 1.3671257152545277e-05, "loss": 0.5923, "step": 97 }, { "epoch": 0.4, "learning_rate": 1.3546048870425356e-05, "loss": 0.6089, "step": 98 }, { "epoch": 0.41, "learning_rate": 1.342020143325669e-05, "loss": 0.5906, "step": 99 }, { "epoch": 0.41, "learning_rate": 1.3293737524320798e-05, "loss": 0.5283, "step": 100 }, { "epoch": 0.42, "learning_rate": 1.3166679938014728e-05, "loss": 0.6548, "step": 101 }, { "epoch": 0.42, "learning_rate": 1.303905157574247e-05, "loss": 0.5793, "step": 102 }, { "epoch": 0.43, "learning_rate": 1.291087544178713e-05, "loss": 0.5471, "step": 103 }, { "epoch": 0.43, "learning_rate": 1.2782174639164528e-05, "loss": 0.6052, "step": 104 }, { "epoch": 0.43, "learning_rate": 1.2652972365459008e-05, "loss": 0.6357, "step": 105 }, { "epoch": 0.44, "learning_rate": 1.2523291908642219e-05, "loss": 0.5308, "step": 106 }, { "epoch": 0.44, "learning_rate": 1.2393156642875579e-05, "loss": 0.6238, "step": 107 }, { "epoch": 0.45, "learning_rate": 1.2262590024297226e-05, "loss": 0.6016, "step": 108 }, { "epoch": 0.45, "learning_rate": 1.2131615586794162e-05, "loss": 0.5947, "step": 109 }, { "epoch": 0.45, "learning_rate": 1.2000256937760446e-05, "loss": 0.626, "step": 110 }, { "epoch": 0.46, "learning_rate": 1.1868537753842052e-05, "loss": 0.5552, "step": 111 }, { "epoch": 0.46, "learning_rate": 1.1736481776669307e-05, "loss": 0.6091, "step": 112 }, { "epoch": 0.47, "learning_rate": 1.1604112808577603e-05, "loss": 0.6089, "step": 113 }, { "epoch": 0.47, "learning_rate": 1.1471454708317163e-05, "loss": 0.5085, "step": 114 }, { "epoch": 0.48, "learning_rate": 1.1338531386752618e-05, "loss": 0.574, "step": 115 }, { "epoch": 0.48, "learning_rate": 1.1205366802553231e-05, "loss": 0.5652, "step": 116 }, { "epoch": 0.48, "learning_rate": 1.107198495787448e-05, "loss": 0.5852, "step": 117 }, { "epoch": 0.49, "learning_rate": 1.0938409894031793e-05, "loss": 0.5049, "step": 118 }, { "epoch": 0.49, "learning_rate": 1.0804665687167262e-05, "loss": 0.6001, "step": 119 }, { "epoch": 0.5, "learning_rate": 1.0670776443910024e-05, "loss": 0.6045, "step": 120 }, { "epoch": 0.5, "learning_rate": 1.0536766297031216e-05, "loss": 0.574, "step": 121 }, { "epoch": 0.5, "learning_rate": 1.0402659401094154e-05, "loss": 0.5769, "step": 122 }, { "epoch": 0.51, "learning_rate": 1.0268479928100615e-05, "loss": 0.5615, "step": 123 }, { "epoch": 0.51, "learning_rate": 1.0134252063133976e-05, "loss": 0.6157, "step": 124 }, { "epoch": 0.52, "learning_rate": 1e-05, "loss": 0.6018, "step": 125 }, { "epoch": 0.52, "learning_rate": 9.865747936866027e-06, "loss": 0.5439, "step": 126 }, { "epoch": 0.52, "learning_rate": 9.73152007189939e-06, "loss": 0.5176, "step": 127 }, { "epoch": 0.53, "learning_rate": 9.597340598905851e-06, "loss": 0.6396, "step": 128 }, { "epoch": 0.53, "learning_rate": 9.463233702968784e-06, "loss": 0.626, "step": 129 }, { "epoch": 0.54, "learning_rate": 9.329223556089976e-06, "loss": 0.6184, "step": 130 }, { "epoch": 0.54, "learning_rate": 9.195334312832742e-06, "loss": 0.5371, "step": 131 }, { "epoch": 0.55, "learning_rate": 9.061590105968208e-06, "loss": 0.5627, "step": 132 }, { "epoch": 0.55, "learning_rate": 8.928015042125523e-06, "loss": 0.5444, "step": 133 }, { "epoch": 0.55, "learning_rate": 8.79463319744677e-06, "loss": 0.5645, "step": 134 }, { "epoch": 0.56, "learning_rate": 8.661468613247387e-06, "loss": 0.5764, "step": 135 }, { "epoch": 0.56, "learning_rate": 8.528545291682839e-06, "loss": 0.5312, "step": 136 }, { "epoch": 0.57, "learning_rate": 8.395887191422397e-06, "loss": 0.5015, "step": 137 }, { "epoch": 0.57, "learning_rate": 8.263518223330698e-06, "loss": 0.5667, "step": 138 }, { "epoch": 0.57, "learning_rate": 8.131462246157953e-06, "loss": 0.5728, "step": 139 }, { "epoch": 0.58, "learning_rate": 7.999743062239557e-06, "loss": 0.573, "step": 140 }, { "epoch": 0.58, "learning_rate": 7.868384413205842e-06, "loss": 0.5425, "step": 141 }, { "epoch": 0.59, "learning_rate": 7.73740997570278e-06, "loss": 0.6074, "step": 142 }, { "epoch": 0.59, "learning_rate": 7.606843357124426e-06, "loss": 0.5059, "step": 143 }, { "epoch": 0.6, "learning_rate": 7.476708091357783e-06, "loss": 0.5574, "step": 144 }, { "epoch": 0.6, "learning_rate": 7.347027634540993e-06, "loss": 0.5779, "step": 145 }, { "epoch": 0.6, "learning_rate": 7.217825360835475e-06, "loss": 0.5581, "step": 146 }, { "epoch": 0.61, "learning_rate": 7.089124558212872e-06, "loss": 0.5833, "step": 147 }, { "epoch": 0.61, "learning_rate": 6.960948424257532e-06, "loss": 0.53, "step": 148 }, { "epoch": 0.62, "learning_rate": 6.833320061985278e-06, "loss": 0.6011, "step": 149 }, { "epoch": 0.62, "learning_rate": 6.706262475679205e-06, "loss": 0.5784, "step": 150 }, { "epoch": 0.62, "learning_rate": 6.579798566743314e-06, "loss": 0.6519, "step": 151 }, { "epoch": 0.63, "learning_rate": 6.453951129574644e-06, "loss": 0.6189, "step": 152 }, { "epoch": 0.63, "learning_rate": 6.3287428474547256e-06, "loss": 0.6582, "step": 153 }, { "epoch": 0.64, "learning_rate": 6.204196288461037e-06, "loss": 0.6318, "step": 154 }, { "epoch": 0.64, "learning_rate": 6.080333901399252e-06, "loss": 0.542, "step": 155 }, { "epoch": 0.64, "learning_rate": 5.957178011756952e-06, "loss": 0.5054, "step": 156 }, { "epoch": 0.65, "learning_rate": 5.834750817679606e-06, "loss": 0.623, "step": 157 }, { "epoch": 0.65, "learning_rate": 5.713074385969457e-06, "loss": 0.5654, "step": 158 }, { "epoch": 0.66, "learning_rate": 5.5921706481081405e-06, "loss": 0.5664, "step": 159 }, { "epoch": 0.66, "learning_rate": 5.47206139630363e-06, "loss": 0.5759, "step": 160 }, { "epoch": 0.67, "learning_rate": 5.352768279562315e-06, "loss": 0.5417, "step": 161 }, { "epoch": 0.67, "learning_rate": 5.234312799786921e-06, "loss": 0.5359, "step": 162 }, { "epoch": 0.67, "learning_rate": 5.116716307900893e-06, "loss": 0.5945, "step": 163 }, { "epoch": 0.68, "learning_rate": 5.000000000000003e-06, "loss": 0.6191, "step": 164 }, { "epoch": 0.68, "learning_rate": 4.8841849135319015e-06, "loss": 0.5498, "step": 165 }, { "epoch": 0.69, "learning_rate": 4.769291923504226e-06, "loss": 0.5535, "step": 166 }, { "epoch": 0.69, "learning_rate": 4.655341738721989e-06, "loss": 0.5085, "step": 167 }, { "epoch": 0.69, "learning_rate": 4.542354898054953e-06, "loss": 0.5669, "step": 168 }, { "epoch": 0.7, "learning_rate": 4.430351766735609e-06, "loss": 0.541, "step": 169 }, { "epoch": 0.7, "learning_rate": 4.319352532688444e-06, "loss": 0.5391, "step": 170 }, { "epoch": 0.71, "learning_rate": 4.209377202891212e-06, "loss": 0.543, "step": 171 }, { "epoch": 0.71, "learning_rate": 4.100445599768774e-06, "loss": 0.5576, "step": 172 }, { "epoch": 0.71, "learning_rate": 3.99257735762021e-06, "loss": 0.5491, "step": 173 }, { "epoch": 0.72, "learning_rate": 3.885791919079878e-06, "loss": 0.6331, "step": 174 }, { "epoch": 0.72, "learning_rate": 3.7801085316129615e-06, "loss": 0.5042, "step": 175 }, { "epoch": 0.73, "learning_rate": 3.6755462440462288e-06, "loss": 0.5757, "step": 176 }, { "epoch": 0.73, "learning_rate": 3.5721239031346067e-06, "loss": 0.5269, "step": 177 }, { "epoch": 0.74, "learning_rate": 3.4698601501641517e-06, "loss": 0.6038, "step": 178 }, { "epoch": 0.74, "learning_rate": 3.3687734175920505e-06, "loss": 0.5627, "step": 179 }, { "epoch": 0.74, "learning_rate": 3.2688819257242963e-06, "loss": 0.5598, "step": 180 }, { "epoch": 0.75, "learning_rate": 3.1702036794315837e-06, "loss": 0.5784, "step": 181 }, { "epoch": 0.75, "learning_rate": 3.0727564649040066e-06, "loss": 0.5947, "step": 182 }, { "epoch": 0.76, "learning_rate": 2.976557846445225e-06, "loss": 0.5703, "step": 183 }, { "epoch": 0.76, "learning_rate": 2.8816251633065963e-06, "loss": 0.5503, "step": 184 }, { "epoch": 0.76, "learning_rate": 2.7879755265618558e-06, "loss": 0.5996, "step": 185 }, { "epoch": 0.77, "learning_rate": 2.69562581602297e-06, "loss": 0.5635, "step": 186 }, { "epoch": 0.77, "learning_rate": 2.6045926771976306e-06, "loss": 0.564, "step": 187 }, { "epoch": 0.78, "learning_rate": 2.514892518288988e-06, "loss": 0.5691, "step": 188 }, { "epoch": 0.78, "learning_rate": 2.4265415072382016e-06, "loss": 0.5339, "step": 189 }, { "epoch": 0.79, "learning_rate": 2.339555568810221e-06, "loss": 0.5229, "step": 190 }, { "epoch": 0.79, "learning_rate": 2.2539503817234553e-06, "loss": 0.5537, "step": 191 }, { "epoch": 0.79, "learning_rate": 2.1697413758237785e-06, "loss": 0.4932, "step": 192 }, { "epoch": 0.8, "learning_rate": 2.0869437293033835e-06, "loss": 0.6104, "step": 193 }, { "epoch": 0.8, "learning_rate": 2.0055723659649907e-06, "loss": 0.5398, "step": 194 }, { "epoch": 0.81, "learning_rate": 1.9256419525319316e-06, "loss": 0.5278, "step": 195 }, { "epoch": 0.81, "learning_rate": 1.8471668960045575e-06, "loss": 0.5093, "step": 196 }, { "epoch": 0.81, "learning_rate": 1.7701613410634367e-06, "loss": 0.6057, "step": 197 }, { "epoch": 0.82, "learning_rate": 1.6946391675198838e-06, "loss": 0.5505, "step": 198 }, { "epoch": 0.82, "learning_rate": 1.620613987814189e-06, "loss": 0.5613, "step": 199 }, { "epoch": 0.83, "learning_rate": 1.5480991445620541e-06, "loss": 0.6006, "step": 200 }, { "epoch": 0.83, "learning_rate": 1.4771077081496654e-06, "loss": 0.5789, "step": 201 }, { "epoch": 0.83, "learning_rate": 1.407652474377832e-06, "loss": 0.5161, "step": 202 }, { "epoch": 0.84, "learning_rate": 1.339745962155613e-06, "loss": 0.5525, "step": 203 }, { "epoch": 0.84, "learning_rate": 1.273400411243857e-06, "loss": 0.5801, "step": 204 }, { "epoch": 0.85, "learning_rate": 1.2086277800490554e-06, "loss": 0.5269, "step": 205 }, { "epoch": 0.85, "learning_rate": 1.1454397434679022e-06, "loss": 0.5669, "step": 206 }, { "epoch": 0.86, "learning_rate": 1.083847690782972e-06, "loss": 0.5742, "step": 207 }, { "epoch": 0.86, "learning_rate": 1.0238627236098619e-06, "loss": 0.5005, "step": 208 }, { "epoch": 0.86, "learning_rate": 9.65495653896179e-07, "loss": 0.6187, "step": 209 }, { "epoch": 0.87, "learning_rate": 9.08757001972762e-07, "loss": 0.5801, "step": 210 }, { "epoch": 0.87, "learning_rate": 8.536569946574546e-07, "loss": 0.5444, "step": 211 }, { "epoch": 0.88, "learning_rate": 8.002055634117578e-07, "loss": 0.5105, "step": 212 }, { "epoch": 0.88, "learning_rate": 7.48412342550765e-07, "loss": 0.5176, "step": 213 }, { "epoch": 0.88, "learning_rate": 6.98286667506618e-07, "loss": 0.5625, "step": 214 }, { "epoch": 0.89, "learning_rate": 6.498375731458529e-07, "loss": 0.5059, "step": 215 }, { "epoch": 0.89, "learning_rate": 6.030737921409169e-07, "loss": 0.5312, "step": 216 }, { "epoch": 0.9, "learning_rate": 5.580037533961546e-07, "loss": 0.5488, "step": 217 }, { "epoch": 0.9, "learning_rate": 5.146355805285452e-07, "loss": 0.481, "step": 218 }, { "epoch": 0.9, "learning_rate": 4.7297709040346474e-07, "loss": 0.4875, "step": 219 }, { "epoch": 0.91, "learning_rate": 4.3303579172574884e-07, "loss": 0.5137, "step": 220 }, { "epoch": 0.91, "learning_rate": 3.9481888368627764e-07, "loss": 0.5737, "step": 221 }, { "epoch": 0.92, "learning_rate": 3.5833325466437697e-07, "loss": 0.5044, "step": 222 }, { "epoch": 0.92, "learning_rate": 3.235854809862193e-07, "loss": 0.4939, "step": 223 }, { "epoch": 0.93, "learning_rate": 2.905818257394799e-07, "loss": 0.5977, "step": 224 }, { "epoch": 0.93, "learning_rate": 2.593282376444539e-07, "loss": 0.5251, "step": 225 }, { "epoch": 0.93, "learning_rate": 2.2983034998182997e-07, "loss": 0.4976, "step": 226 }, { "epoch": 0.94, "learning_rate": 2.0209347957732328e-07, "loss": 0.5818, "step": 227 }, { "epoch": 0.94, "learning_rate": 1.761226258433524e-07, "loss": 0.5381, "step": 228 }, { "epoch": 0.95, "learning_rate": 1.519224698779198e-07, "loss": 0.5776, "step": 229 }, { "epoch": 0.95, "learning_rate": 1.2949737362087156e-07, "loss": 0.5542, "step": 230 }, { "epoch": 0.95, "learning_rate": 1.0885137906768373e-07, "loss": 0.5691, "step": 231 }, { "epoch": 0.96, "learning_rate": 8.99882075409153e-08, "loss": 0.5183, "step": 232 }, { "epoch": 0.96, "learning_rate": 7.291125901946027e-08, "loss": 0.6221, "step": 233 }, { "epoch": 0.97, "learning_rate": 5.7623611525721155e-08, "loss": 0.6509, "step": 234 }, { "epoch": 0.97, "learning_rate": 4.412802057081278e-08, "loss": 0.6748, "step": 235 }, { "epoch": 0.98, "learning_rate": 3.242691865790071e-08, "loss": 0.5635, "step": 236 }, { "epoch": 0.98, "learning_rate": 2.2522414843748618e-08, "loss": 0.6003, "step": 237 }, { "epoch": 0.98, "learning_rate": 1.4416294358582383e-08, "loss": 0.5596, "step": 238 }, { "epoch": 0.99, "learning_rate": 8.110018284304132e-09, "loss": 0.5457, "step": 239 }, { "epoch": 0.99, "learning_rate": 3.6047232911462506e-09, "loss": 0.5715, "step": 240 }, { "epoch": 1.0, "learning_rate": 9.012214327897006e-10, "loss": 0.5107, "step": 241 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.4875, "step": 242 }, { "epoch": 1.0, "step": 242, "total_flos": 3.991902850132214e+17, "train_loss": 0.6390532186208677, "train_runtime": 1897.3254, "train_samples_per_second": 16.267, "train_steps_per_second": 0.128 } ], "max_steps": 242, "num_train_epochs": 1, "total_flos": 3.991902850132214e+17, "trial_name": null, "trial_params": null }