|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3723, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.964028776978418e-06, |
|
"loss": 0.3063, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.892086330935252e-06, |
|
"loss": 0.1585, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.820143884892086e-06, |
|
"loss": 0.1134, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.748201438848922e-06, |
|
"loss": 0.0801, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.676258992805757e-06, |
|
"loss": 0.0699, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.60431654676259e-06, |
|
"loss": 0.0568, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.532374100719425e-06, |
|
"loss": 0.0654, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.46043165467626e-06, |
|
"loss": 0.0608, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.388489208633095e-06, |
|
"loss": 0.0469, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.31654676258993e-06, |
|
"loss": 0.0453, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.244604316546764e-06, |
|
"loss": 0.0412, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.172661870503598e-06, |
|
"loss": 0.0335, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.100719424460432e-06, |
|
"loss": 0.0382, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.028776978417268e-06, |
|
"loss": 0.0356, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 8.956834532374102e-06, |
|
"loss": 0.0363, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.884892086330936e-06, |
|
"loss": 0.034, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.81294964028777e-06, |
|
"loss": 0.0371, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.741007194244605e-06, |
|
"loss": 0.0316, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.66906474820144e-06, |
|
"loss": 0.0295, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.597122302158273e-06, |
|
"loss": 0.0196, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.525179856115109e-06, |
|
"loss": 0.0191, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.453237410071943e-06, |
|
"loss": 0.0198, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.381294964028778e-06, |
|
"loss": 0.0169, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.309352517985614e-06, |
|
"loss": 0.0151, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.237410071942446e-06, |
|
"loss": 0.0151, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.165467625899282e-06, |
|
"loss": 0.0191, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 8.093525179856116e-06, |
|
"loss": 0.0189, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.02158273381295e-06, |
|
"loss": 0.0184, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.949640287769785e-06, |
|
"loss": 0.019, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 7.877697841726619e-06, |
|
"loss": 0.0349, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 7.805755395683455e-06, |
|
"loss": 0.0206, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 7.733812949640287e-06, |
|
"loss": 0.0179, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 7.661870503597123e-06, |
|
"loss": 0.0182, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.589928057553958e-06, |
|
"loss": 0.0171, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.517985611510792e-06, |
|
"loss": 0.0194, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.446043165467627e-06, |
|
"loss": 0.0192, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.374100719424461e-06, |
|
"loss": 0.018, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 7.302158273381296e-06, |
|
"loss": 0.019, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.230215827338129e-06, |
|
"loss": 0.0196, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 7.1582733812949644e-06, |
|
"loss": 0.0081, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 7.0863309352517995e-06, |
|
"loss": 0.0085, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 7.014388489208634e-06, |
|
"loss": 0.0072, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 6.942446043165468e-06, |
|
"loss": 0.0071, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 6.870503597122302e-06, |
|
"loss": 0.0075, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.798561151079137e-06, |
|
"loss": 0.0065, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 6.726618705035972e-06, |
|
"loss": 0.0084, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.654676258992806e-06, |
|
"loss": 0.0063, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 6.582733812949641e-06, |
|
"loss": 0.0079, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.510791366906475e-06, |
|
"loss": 0.0076, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 6.43884892086331e-06, |
|
"loss": 0.0069, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.366906474820145e-06, |
|
"loss": 0.0086, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 6.2949640287769785e-06, |
|
"loss": 0.0063, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.2230215827338136e-06, |
|
"loss": 0.0082, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 6.151079136690648e-06, |
|
"loss": 0.0063, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.079136690647483e-06, |
|
"loss": 0.0059, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 6.007194244604317e-06, |
|
"loss": 0.0078, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.935251798561151e-06, |
|
"loss": 0.0062, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 5.863309352517986e-06, |
|
"loss": 0.0072, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5.79136690647482e-06, |
|
"loss": 0.0053, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 5.719424460431655e-06, |
|
"loss": 0.0026, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 5.64748201438849e-06, |
|
"loss": 0.0026, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 5.575539568345324e-06, |
|
"loss": 0.003, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5.503597122302159e-06, |
|
"loss": 0.0026, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 5.4316546762589925e-06, |
|
"loss": 0.0023, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 5.359712230215828e-06, |
|
"loss": 0.0021, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 5.287769784172663e-06, |
|
"loss": 0.0025, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5.215827338129497e-06, |
|
"loss": 0.0027, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 5.143884892086332e-06, |
|
"loss": 0.0028, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 5.071942446043165e-06, |
|
"loss": 0.0028, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0034, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 4.928057553956835e-06, |
|
"loss": 0.0033, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.856115107913669e-06, |
|
"loss": 0.0027, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 4.784172661870504e-06, |
|
"loss": 0.0021, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.712230215827339e-06, |
|
"loss": 0.0027, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.640287769784173e-06, |
|
"loss": 0.003, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 4.5683453237410074e-06, |
|
"loss": 0.0028, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.496402877697842e-06, |
|
"loss": 0.0039, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.424460431654677e-06, |
|
"loss": 0.0034, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.352517985611511e-06, |
|
"loss": 0.0033, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.280575539568346e-06, |
|
"loss": 0.0014, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.20863309352518e-06, |
|
"loss": 0.0012, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 4.1366906474820145e-06, |
|
"loss": 0.0014, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 4.0647482014388495e-06, |
|
"loss": 0.0013, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 3.992805755395684e-06, |
|
"loss": 0.0015, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 3.920863309352518e-06, |
|
"loss": 0.001, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 3.848920863309353e-06, |
|
"loss": 0.0011, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 3.7769784172661873e-06, |
|
"loss": 0.001, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 3.7050359712230215e-06, |
|
"loss": 0.0018, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 3.6330935251798566e-06, |
|
"loss": 0.0009, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 3.561151079136691e-06, |
|
"loss": 0.0009, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 3.4892086330935254e-06, |
|
"loss": 0.0013, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 3.4172661870503596e-06, |
|
"loss": 0.001, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 3.3453237410071943e-06, |
|
"loss": 0.0011, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 3.2733812949640294e-06, |
|
"loss": 0.0011, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 3.2014388489208636e-06, |
|
"loss": 0.0012, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 3.1294964028776982e-06, |
|
"loss": 0.0014, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 3.0575539568345324e-06, |
|
"loss": 0.0011, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.985611510791367e-06, |
|
"loss": 0.001, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.9136690647482017e-06, |
|
"loss": 0.0011, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.8417266187050364e-06, |
|
"loss": 0.0004, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 2.7697841726618706e-06, |
|
"loss": 0.0005, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 2.6978417266187052e-06, |
|
"loss": 0.0003, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 2.6258992805755395e-06, |
|
"loss": 0.0003, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 2.5539568345323745e-06, |
|
"loss": 0.0005, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 2.4820143884892088e-06, |
|
"loss": 0.0004, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 2.4100719424460434e-06, |
|
"loss": 0.0007, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 2.3381294964028776e-06, |
|
"loss": 0.0003, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 2.2661870503597123e-06, |
|
"loss": 0.0005, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 2.194244604316547e-06, |
|
"loss": 0.0004, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 2.1223021582733816e-06, |
|
"loss": 0.0005, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 2.050359712230216e-06, |
|
"loss": 0.0003, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 1.9784172661870504e-06, |
|
"loss": 0.0005, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 1.906474820143885e-06, |
|
"loss": 0.0005, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 1.8345323741007195e-06, |
|
"loss": 0.0004, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 1.762589928057554e-06, |
|
"loss": 0.0003, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 1.6906474820143886e-06, |
|
"loss": 0.0006, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 1.618705035971223e-06, |
|
"loss": 0.0004, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 1.5467625899280579e-06, |
|
"loss": 0.0005, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.474820143884892e-06, |
|
"loss": 0.0003, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 1.4028776978417265e-06, |
|
"loss": 0.0002, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.3309352517985614e-06, |
|
"loss": 0.0002, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.2589928057553958e-06, |
|
"loss": 0.0001, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 1.1870503597122303e-06, |
|
"loss": 0.0001, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1.115107913669065e-06, |
|
"loss": 0.0001, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 1.0431654676258993e-06, |
|
"loss": 0.0001, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 9.71223021582734e-07, |
|
"loss": 0.0002, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 8.992805755395684e-07, |
|
"loss": 0.0001, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 8.27338129496403e-07, |
|
"loss": 0.0001, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 7.553956834532375e-07, |
|
"loss": 0.0002, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 6.83453237410072e-07, |
|
"loss": 0.0002, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 6.115107913669066e-07, |
|
"loss": 0.0001, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 5.39568345323741e-07, |
|
"loss": 0.0002, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 4.676258992805756e-07, |
|
"loss": 0.0001, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 3.956834532374101e-07, |
|
"loss": 0.0002, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3.237410071942446e-07, |
|
"loss": 0.0001, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 2.5179856115107916e-07, |
|
"loss": 0.0001, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 1.7985611510791368e-07, |
|
"loss": 0.0001, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 1.0791366906474822e-07, |
|
"loss": 0.0001, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 3.597122302158274e-08, |
|
"loss": 0.0001, |
|
"step": 280 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 40, |
|
"total_flos": 58523658485760.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|