|
{ |
|
"best_metric": 0.03415210172533989, |
|
"best_model_checkpoint": "AlexWang99/byt5_3k_4d/checkpoint-376", |
|
"epoch": 94.0, |
|
"eval_steps": 500, |
|
"global_step": 376, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.47024813294410706, |
|
"eval_runtime": 10.75, |
|
"eval_samples_per_second": 930.235, |
|
"eval_steps_per_second": 1.209, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.4366174340248108, |
|
"eval_runtime": 10.8447, |
|
"eval_samples_per_second": 922.113, |
|
"eval_steps_per_second": 1.199, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 4.875e-05, |
|
"loss": 0.8301, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.4202331006526947, |
|
"eval_runtime": 10.7807, |
|
"eval_samples_per_second": 927.582, |
|
"eval_steps_per_second": 1.206, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.38434356451034546, |
|
"eval_runtime": 10.8022, |
|
"eval_samples_per_second": 925.735, |
|
"eval_steps_per_second": 1.203, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.7703, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.38975098729133606, |
|
"eval_runtime": 10.7363, |
|
"eval_samples_per_second": 931.418, |
|
"eval_steps_per_second": 1.211, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.35734379291534424, |
|
"eval_runtime": 10.725, |
|
"eval_samples_per_second": 932.399, |
|
"eval_steps_per_second": 1.212, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.34219321608543396, |
|
"eval_runtime": 10.9668, |
|
"eval_samples_per_second": 911.84, |
|
"eval_steps_per_second": 1.185, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 4.6250000000000006e-05, |
|
"loss": 0.7169, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.330382376909256, |
|
"eval_runtime": 10.8312, |
|
"eval_samples_per_second": 923.257, |
|
"eval_steps_per_second": 1.2, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.3048989772796631, |
|
"eval_runtime": 10.726, |
|
"eval_samples_per_second": 932.316, |
|
"eval_steps_per_second": 1.212, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6727, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.30665355920791626, |
|
"eval_runtime": 10.9181, |
|
"eval_samples_per_second": 915.912, |
|
"eval_steps_per_second": 1.191, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.29648929834365845, |
|
"eval_runtime": 10.7354, |
|
"eval_samples_per_second": 931.494, |
|
"eval_steps_per_second": 1.211, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.26932698488235474, |
|
"eval_runtime": 10.8379, |
|
"eval_samples_per_second": 922.691, |
|
"eval_steps_per_second": 1.199, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.6394, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.271121084690094, |
|
"eval_runtime": 10.8381, |
|
"eval_samples_per_second": 922.67, |
|
"eval_steps_per_second": 1.199, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.25609534978866577, |
|
"eval_runtime": 10.7523, |
|
"eval_samples_per_second": 930.029, |
|
"eval_steps_per_second": 1.209, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.6047, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.2453787624835968, |
|
"eval_runtime": 10.9268, |
|
"eval_samples_per_second": 915.184, |
|
"eval_steps_per_second": 1.19, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.23679418861865997, |
|
"eval_runtime": 10.7591, |
|
"eval_samples_per_second": 929.449, |
|
"eval_steps_per_second": 1.208, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.22503486275672913, |
|
"eval_runtime": 10.8371, |
|
"eval_samples_per_second": 922.76, |
|
"eval_steps_per_second": 1.2, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 4.125e-05, |
|
"loss": 0.565, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.21100571751594543, |
|
"eval_runtime": 10.8584, |
|
"eval_samples_per_second": 920.947, |
|
"eval_steps_per_second": 1.197, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.21086934208869934, |
|
"eval_runtime": 10.746, |
|
"eval_samples_per_second": 930.581, |
|
"eval_steps_per_second": 1.21, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5368, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.1950305849313736, |
|
"eval_runtime": 10.7833, |
|
"eval_samples_per_second": 927.356, |
|
"eval_steps_per_second": 1.206, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.19742541015148163, |
|
"eval_runtime": 10.7496, |
|
"eval_samples_per_second": 930.268, |
|
"eval_steps_per_second": 1.209, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.18190620839595795, |
|
"eval_runtime": 11.0063, |
|
"eval_samples_per_second": 908.569, |
|
"eval_steps_per_second": 1.181, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 3.875e-05, |
|
"loss": 0.518, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.1795072704553604, |
|
"eval_runtime": 10.855, |
|
"eval_samples_per_second": 921.235, |
|
"eval_steps_per_second": 1.198, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.16476453840732574, |
|
"eval_runtime": 10.7846, |
|
"eval_samples_per_second": 927.252, |
|
"eval_steps_per_second": 1.205, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4862, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.16748683154582977, |
|
"eval_runtime": 10.9227, |
|
"eval_samples_per_second": 915.525, |
|
"eval_steps_per_second": 1.19, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.15504491329193115, |
|
"eval_runtime": 10.7793, |
|
"eval_samples_per_second": 927.702, |
|
"eval_steps_per_second": 1.206, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.15300293266773224, |
|
"eval_runtime": 10.8547, |
|
"eval_samples_per_second": 921.263, |
|
"eval_steps_per_second": 1.198, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 3.625e-05, |
|
"loss": 0.4628, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.14466118812561035, |
|
"eval_runtime": 10.8549, |
|
"eval_samples_per_second": 921.246, |
|
"eval_steps_per_second": 1.198, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.1442325860261917, |
|
"eval_runtime": 10.764, |
|
"eval_samples_per_second": 929.023, |
|
"eval_steps_per_second": 1.208, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4408, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.13103845715522766, |
|
"eval_runtime": 10.9404, |
|
"eval_samples_per_second": 914.043, |
|
"eval_steps_per_second": 1.188, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 0.13363589346408844, |
|
"eval_runtime": 10.7472, |
|
"eval_samples_per_second": 930.475, |
|
"eval_steps_per_second": 1.21, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.12347704917192459, |
|
"eval_runtime": 10.8696, |
|
"eval_samples_per_second": 920.0, |
|
"eval_steps_per_second": 1.196, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 3.375000000000001e-05, |
|
"loss": 0.4192, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 0.1190723404288292, |
|
"eval_runtime": 10.8564, |
|
"eval_samples_per_second": 921.113, |
|
"eval_steps_per_second": 1.197, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.11926523596048355, |
|
"eval_runtime": 10.7486, |
|
"eval_samples_per_second": 930.351, |
|
"eval_steps_per_second": 1.209, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.4133, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.1123439222574234, |
|
"eval_runtime": 10.9392, |
|
"eval_samples_per_second": 914.14, |
|
"eval_steps_per_second": 1.188, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.11556507647037506, |
|
"eval_runtime": 10.7494, |
|
"eval_samples_per_second": 930.287, |
|
"eval_steps_per_second": 1.209, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.10512395203113556, |
|
"eval_runtime": 10.8606, |
|
"eval_samples_per_second": 920.755, |
|
"eval_steps_per_second": 1.197, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.3922, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.09992647171020508, |
|
"eval_runtime": 10.8473, |
|
"eval_samples_per_second": 921.889, |
|
"eval_steps_per_second": 1.198, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 0.09912022948265076, |
|
"eval_runtime": 10.751, |
|
"eval_samples_per_second": 930.148, |
|
"eval_steps_per_second": 1.209, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3778, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.09952548891305923, |
|
"eval_runtime": 10.9239, |
|
"eval_samples_per_second": 915.427, |
|
"eval_steps_per_second": 1.19, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 0.09118303656578064, |
|
"eval_runtime": 10.7525, |
|
"eval_samples_per_second": 930.014, |
|
"eval_steps_per_second": 1.209, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 0.09032606333494186, |
|
"eval_runtime": 10.8423, |
|
"eval_samples_per_second": 922.31, |
|
"eval_steps_per_second": 1.199, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 2.8749999999999997e-05, |
|
"loss": 0.3655, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 0.08409227430820465, |
|
"eval_runtime": 10.8572, |
|
"eval_samples_per_second": 921.048, |
|
"eval_steps_per_second": 1.197, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.0789598897099495, |
|
"eval_runtime": 10.7437, |
|
"eval_samples_per_second": 930.776, |
|
"eval_steps_per_second": 1.21, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3526, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 0.08267370611429214, |
|
"eval_runtime": 10.9337, |
|
"eval_samples_per_second": 914.6, |
|
"eval_steps_per_second": 1.189, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.07559242099523544, |
|
"eval_runtime": 10.7359, |
|
"eval_samples_per_second": 931.456, |
|
"eval_steps_per_second": 1.211, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 0.07468590885400772, |
|
"eval_runtime": 10.8561, |
|
"eval_samples_per_second": 921.142, |
|
"eval_steps_per_second": 1.197, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"learning_rate": 2.625e-05, |
|
"loss": 0.3378, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.0737282931804657, |
|
"eval_runtime": 10.8372, |
|
"eval_samples_per_second": 922.747, |
|
"eval_steps_per_second": 1.2, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 0.07465548813343048, |
|
"eval_runtime": 10.7608, |
|
"eval_samples_per_second": 929.3, |
|
"eval_steps_per_second": 1.208, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3308, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.07294411957263947, |
|
"eval_runtime": 10.9347, |
|
"eval_samples_per_second": 914.518, |
|
"eval_steps_per_second": 1.189, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 0.0665128082036972, |
|
"eval_runtime": 10.7481, |
|
"eval_samples_per_second": 930.401, |
|
"eval_steps_per_second": 1.21, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 0.06627500057220459, |
|
"eval_runtime": 10.8409, |
|
"eval_samples_per_second": 922.432, |
|
"eval_steps_per_second": 1.199, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 52.5, |
|
"learning_rate": 2.375e-05, |
|
"loss": 0.321, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 0.06422976404428482, |
|
"eval_runtime": 10.8561, |
|
"eval_samples_per_second": 921.142, |
|
"eval_steps_per_second": 1.197, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 0.06401015818119049, |
|
"eval_runtime": 10.7568, |
|
"eval_samples_per_second": 929.646, |
|
"eval_steps_per_second": 1.209, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3084, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 0.06319215148687363, |
|
"eval_runtime": 10.9357, |
|
"eval_samples_per_second": 914.439, |
|
"eval_steps_per_second": 1.189, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 0.059920959174633026, |
|
"eval_runtime": 10.7511, |
|
"eval_samples_per_second": 930.141, |
|
"eval_steps_per_second": 1.209, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 0.057977043092250824, |
|
"eval_runtime": 10.8555, |
|
"eval_samples_per_second": 921.188, |
|
"eval_steps_per_second": 1.198, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 57.5, |
|
"learning_rate": 2.125e-05, |
|
"loss": 0.2967, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 0.05669580027461052, |
|
"eval_runtime": 10.8434, |
|
"eval_samples_per_second": 922.219, |
|
"eval_steps_per_second": 1.199, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 0.0525004044175148, |
|
"eval_runtime": 10.7506, |
|
"eval_samples_per_second": 930.18, |
|
"eval_steps_per_second": 1.209, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2928, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 0.05224265158176422, |
|
"eval_runtime": 10.9382, |
|
"eval_samples_per_second": 914.231, |
|
"eval_steps_per_second": 1.188, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 0.05358195677399635, |
|
"eval_runtime": 10.7598, |
|
"eval_samples_per_second": 929.387, |
|
"eval_steps_per_second": 1.208, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 0.052435729652643204, |
|
"eval_runtime": 10.8537, |
|
"eval_samples_per_second": 921.341, |
|
"eval_steps_per_second": 1.198, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.2929, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 0.056764379143714905, |
|
"eval_runtime": 10.8657, |
|
"eval_samples_per_second": 920.329, |
|
"eval_steps_per_second": 1.196, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 0.0530422069132328, |
|
"eval_runtime": 10.7635, |
|
"eval_samples_per_second": 929.065, |
|
"eval_steps_per_second": 1.208, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.283, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 0.04763900488615036, |
|
"eval_runtime": 10.9405, |
|
"eval_samples_per_second": 914.038, |
|
"eval_steps_per_second": 1.188, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 0.04787232354283333, |
|
"eval_runtime": 10.7497, |
|
"eval_samples_per_second": 930.255, |
|
"eval_steps_per_second": 1.209, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 0.05069798231124878, |
|
"eval_runtime": 10.8615, |
|
"eval_samples_per_second": 920.685, |
|
"eval_steps_per_second": 1.197, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 67.5, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 0.2766, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 0.04605843871831894, |
|
"eval_runtime": 10.8475, |
|
"eval_samples_per_second": 921.871, |
|
"eval_steps_per_second": 1.198, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 0.04439115151762962, |
|
"eval_runtime": 10.7586, |
|
"eval_samples_per_second": 929.488, |
|
"eval_steps_per_second": 1.208, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2677, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 0.0455540269613266, |
|
"eval_runtime": 10.9449, |
|
"eval_samples_per_second": 913.667, |
|
"eval_steps_per_second": 1.188, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 0.04371872544288635, |
|
"eval_runtime": 10.758, |
|
"eval_samples_per_second": 929.543, |
|
"eval_steps_per_second": 1.208, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 0.04281056672334671, |
|
"eval_runtime": 10.8458, |
|
"eval_samples_per_second": 922.019, |
|
"eval_steps_per_second": 1.199, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 72.5, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 0.2614, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 0.04188177362084389, |
|
"eval_runtime": 10.8487, |
|
"eval_samples_per_second": 921.77, |
|
"eval_steps_per_second": 1.198, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 0.04140578955411911, |
|
"eval_runtime": 10.7507, |
|
"eval_samples_per_second": 930.172, |
|
"eval_steps_per_second": 1.209, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2595, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 0.04175864905118942, |
|
"eval_runtime": 10.9254, |
|
"eval_samples_per_second": 915.297, |
|
"eval_steps_per_second": 1.19, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 0.04123968258500099, |
|
"eval_runtime": 10.7366, |
|
"eval_samples_per_second": 931.392, |
|
"eval_steps_per_second": 1.211, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 0.03961934149265289, |
|
"eval_runtime": 10.8404, |
|
"eval_samples_per_second": 922.477, |
|
"eval_steps_per_second": 1.199, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 77.5, |
|
"learning_rate": 1.125e-05, |
|
"loss": 0.2582, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 0.03820143640041351, |
|
"eval_runtime": 10.8282, |
|
"eval_samples_per_second": 923.515, |
|
"eval_steps_per_second": 1.201, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 0.038101743906736374, |
|
"eval_runtime": 10.7523, |
|
"eval_samples_per_second": 930.033, |
|
"eval_steps_per_second": 1.209, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2511, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 0.03866468369960785, |
|
"eval_runtime": 10.9151, |
|
"eval_samples_per_second": 916.163, |
|
"eval_steps_per_second": 1.191, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 0.0387905091047287, |
|
"eval_runtime": 10.7459, |
|
"eval_samples_per_second": 930.586, |
|
"eval_steps_per_second": 1.21, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 0.03723302111029625, |
|
"eval_runtime": 10.8332, |
|
"eval_samples_per_second": 923.092, |
|
"eval_steps_per_second": 1.2, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 82.5, |
|
"learning_rate": 8.75e-06, |
|
"loss": 0.2481, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 0.03600754216313362, |
|
"eval_runtime": 10.8588, |
|
"eval_samples_per_second": 920.91, |
|
"eval_steps_per_second": 1.197, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 0.036581408232450485, |
|
"eval_runtime": 10.7405, |
|
"eval_samples_per_second": 931.052, |
|
"eval_steps_per_second": 1.21, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2474, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 0.03646053001284599, |
|
"eval_runtime": 10.9336, |
|
"eval_samples_per_second": 914.608, |
|
"eval_steps_per_second": 1.189, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 0.03565111756324768, |
|
"eval_runtime": 10.7346, |
|
"eval_samples_per_second": 931.569, |
|
"eval_steps_per_second": 1.211, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 0.035478148609399796, |
|
"eval_runtime": 10.8414, |
|
"eval_samples_per_second": 922.386, |
|
"eval_steps_per_second": 1.199, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.2537, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 0.03596709668636322, |
|
"eval_runtime": 10.8347, |
|
"eval_samples_per_second": 922.957, |
|
"eval_steps_per_second": 1.2, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 0.03587077185511589, |
|
"eval_runtime": 10.7439, |
|
"eval_samples_per_second": 930.763, |
|
"eval_steps_per_second": 1.21, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2438, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 0.03546379879117012, |
|
"eval_runtime": 10.9027, |
|
"eval_samples_per_second": 917.201, |
|
"eval_steps_per_second": 1.192, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 0.03530960530042648, |
|
"eval_runtime": 10.7401, |
|
"eval_samples_per_second": 931.093, |
|
"eval_steps_per_second": 1.21, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 0.034898195415735245, |
|
"eval_runtime": 10.838, |
|
"eval_samples_per_second": 922.682, |
|
"eval_steps_per_second": 1.199, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 92.5, |
|
"learning_rate": 3.75e-06, |
|
"loss": 0.2461, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 0.03429694473743439, |
|
"eval_runtime": 10.8462, |
|
"eval_samples_per_second": 921.982, |
|
"eval_steps_per_second": 1.199, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 0.03415210172533989, |
|
"eval_runtime": 10.73, |
|
"eval_samples_per_second": 931.966, |
|
"eval_steps_per_second": 1.212, |
|
"step": 376 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 400, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 8096480649216000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|