{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 1, "global_step": 70, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14285714285714285, "grad_norm": 115.0, "learning_rate": 2.5e-05, "loss": 2.3293, "step": 1 }, { "epoch": 0.14285714285714285, "eval_loss": 2.3536651134490967, "eval_matthews_correlation": -0.06591512432573125, "eval_runtime": 2.3426, "eval_samples_per_second": 94.34, "eval_steps_per_second": 1.708, "step": 1 }, { "epoch": 0.2857142857142857, "grad_norm": 57.75, "learning_rate": 5e-05, "loss": 1.5584, "step": 2 }, { "epoch": 0.2857142857142857, "eval_loss": 1.9677380323410034, "eval_matthews_correlation": -0.006695650197517534, "eval_runtime": 2.4387, "eval_samples_per_second": 90.624, "eval_steps_per_second": 1.64, "step": 2 }, { "epoch": 0.42857142857142855, "grad_norm": 61.0, "learning_rate": 4.9264705882352944e-05, "loss": 1.8664, "step": 3 }, { "epoch": 0.42857142857142855, "eval_loss": 3.9521396160125732, "eval_matthews_correlation": 0.19430266940230304, "eval_runtime": 2.4032, "eval_samples_per_second": 91.959, "eval_steps_per_second": 1.664, "step": 3 }, { "epoch": 0.5714285714285714, "grad_norm": 304.0, "learning_rate": 4.8529411764705885e-05, "loss": 3.7735, "step": 4 }, { "epoch": 0.5714285714285714, "eval_loss": 1.8678630590438843, "eval_matthews_correlation": 0.4251456680325992, "eval_runtime": 2.4969, "eval_samples_per_second": 88.509, "eval_steps_per_second": 1.602, "step": 4 }, { "epoch": 0.7142857142857143, "grad_norm": 87.0, "learning_rate": 4.7794117647058826e-05, "loss": 1.9643, "step": 5 }, { "epoch": 0.7142857142857143, "eval_loss": 2.8472695350646973, "eval_matthews_correlation": 0.33395224488110414, "eval_runtime": 2.433, "eval_samples_per_second": 90.836, "eval_steps_per_second": 1.644, "step": 5 }, { "epoch": 0.8571428571428571, "grad_norm": 230.0, "learning_rate": 4.705882352941177e-05, "loss": 3.0758, "step": 6 }, { "epoch": 0.8571428571428571, "eval_loss": 2.5650100708007812, "eval_matthews_correlation": 0.3136710011001962, "eval_runtime": 2.5174, "eval_samples_per_second": 87.789, "eval_steps_per_second": 1.589, "step": 6 }, { "epoch": 1.0, "grad_norm": 239.0, "learning_rate": 4.632352941176471e-05, "loss": 3.3907, "step": 7 }, { "epoch": 1.0, "eval_loss": 1.2579513788223267, "eval_matthews_correlation": 0.41294426532673595, "eval_runtime": 2.3986, "eval_samples_per_second": 92.137, "eval_steps_per_second": 1.668, "step": 7 }, { "epoch": 1.1428571428571428, "grad_norm": 52.75, "learning_rate": 4.558823529411765e-05, "loss": 1.0873, "step": 8 }, { "epoch": 1.1428571428571428, "eval_loss": 2.0917861461639404, "eval_matthews_correlation": 0.362354082768012, "eval_runtime": 2.3981, "eval_samples_per_second": 92.157, "eval_steps_per_second": 1.668, "step": 8 }, { "epoch": 1.2857142857142856, "grad_norm": 178.0, "learning_rate": 4.485294117647059e-05, "loss": 1.5217, "step": 9 }, { "epoch": 1.2857142857142856, "eval_loss": 2.355184555053711, "eval_matthews_correlation": 0.291910232021563, "eval_runtime": 2.4479, "eval_samples_per_second": 90.283, "eval_steps_per_second": 1.634, "step": 9 }, { "epoch": 1.4285714285714286, "grad_norm": 174.0, "learning_rate": 4.411764705882353e-05, "loss": 1.381, "step": 10 }, { "epoch": 1.4285714285714286, "eval_loss": 1.6294102668762207, "eval_matthews_correlation": 0.42663939531137113, "eval_runtime": 2.4472, "eval_samples_per_second": 90.306, "eval_steps_per_second": 1.635, "step": 10 }, { "epoch": 1.5714285714285714, "grad_norm": 173.0, "learning_rate": 4.3382352941176474e-05, "loss": 1.837, "step": 11 }, { "epoch": 1.5714285714285714, "eval_loss": 1.2466400861740112, "eval_matthews_correlation": 0.4498698839028164, "eval_runtime": 2.4515, "eval_samples_per_second": 90.147, "eval_steps_per_second": 1.632, "step": 11 }, { "epoch": 1.7142857142857144, "grad_norm": 38.75, "learning_rate": 4.2647058823529415e-05, "loss": 1.0087, "step": 12 }, { "epoch": 1.7142857142857144, "eval_loss": 1.7990175485610962, "eval_matthews_correlation": 0.27245548483246373, "eval_runtime": 2.4526, "eval_samples_per_second": 90.108, "eval_steps_per_second": 1.631, "step": 12 }, { "epoch": 1.8571428571428572, "grad_norm": 177.0, "learning_rate": 4.1911764705882356e-05, "loss": 1.3649, "step": 13 }, { "epoch": 1.8571428571428572, "eval_loss": 1.8390474319458008, "eval_matthews_correlation": 0.25282726975747233, "eval_runtime": 2.4436, "eval_samples_per_second": 90.44, "eval_steps_per_second": 1.637, "step": 13 }, { "epoch": 2.0, "grad_norm": 188.0, "learning_rate": 4.11764705882353e-05, "loss": 1.4765, "step": 14 }, { "epoch": 2.0, "eval_loss": 1.3225667476654053, "eval_matthews_correlation": 0.42075821027925114, "eval_runtime": 2.4529, "eval_samples_per_second": 90.099, "eval_steps_per_second": 1.631, "step": 14 }, { "epoch": 2.142857142857143, "grad_norm": 90.5, "learning_rate": 4.044117647058824e-05, "loss": 0.721, "step": 15 }, { "epoch": 2.142857142857143, "eval_loss": 1.0870685577392578, "eval_matthews_correlation": 0.3977728811458742, "eval_runtime": 2.4487, "eval_samples_per_second": 90.251, "eval_steps_per_second": 1.633, "step": 15 }, { "epoch": 2.2857142857142856, "grad_norm": 30.75, "learning_rate": 3.970588235294117e-05, "loss": 0.5558, "step": 16 }, { "epoch": 2.2857142857142856, "eval_loss": 1.6198923587799072, "eval_matthews_correlation": 0.3547921342690866, "eval_runtime": 2.4476, "eval_samples_per_second": 90.293, "eval_steps_per_second": 1.634, "step": 16 }, { "epoch": 2.4285714285714284, "grad_norm": 98.5, "learning_rate": 3.897058823529412e-05, "loss": 0.6862, "step": 17 }, { "epoch": 2.4285714285714284, "eval_loss": 1.977512001991272, "eval_matthews_correlation": 0.325163923401339, "eval_runtime": 2.451, "eval_samples_per_second": 90.168, "eval_steps_per_second": 1.632, "step": 17 }, { "epoch": 2.571428571428571, "grad_norm": 147.0, "learning_rate": 3.8235294117647055e-05, "loss": 1.2147, "step": 18 }, { "epoch": 2.571428571428571, "eval_loss": 1.86572265625, "eval_matthews_correlation": 0.34367974406613594, "eval_runtime": 2.444, "eval_samples_per_second": 90.427, "eval_steps_per_second": 1.637, "step": 18 }, { "epoch": 2.7142857142857144, "grad_norm": 123.5, "learning_rate": 3.7500000000000003e-05, "loss": 0.9887, "step": 19 }, { "epoch": 2.7142857142857144, "eval_loss": 1.4242559671401978, "eval_matthews_correlation": 0.4393080679849749, "eval_runtime": 2.4464, "eval_samples_per_second": 90.337, "eval_steps_per_second": 1.635, "step": 19 }, { "epoch": 2.857142857142857, "grad_norm": 112.0, "learning_rate": 3.6764705882352945e-05, "loss": 0.9255, "step": 20 }, { "epoch": 2.857142857142857, "eval_loss": 1.0208874940872192, "eval_matthews_correlation": 0.4561037659877011, "eval_runtime": 2.4474, "eval_samples_per_second": 90.299, "eval_steps_per_second": 1.634, "step": 20 }, { "epoch": 3.0, "grad_norm": 28.75, "learning_rate": 3.6029411764705886e-05, "loss": 0.521, "step": 21 }, { "epoch": 3.0, "eval_loss": 1.15372896194458, "eval_matthews_correlation": 0.42984107582508957, "eval_runtime": 2.4504, "eval_samples_per_second": 90.19, "eval_steps_per_second": 1.632, "step": 21 }, { "epoch": 3.142857142857143, "grad_norm": 103.0, "learning_rate": 3.529411764705883e-05, "loss": 0.6449, "step": 22 }, { "epoch": 3.142857142857143, "eval_loss": 1.4311715364456177, "eval_matthews_correlation": 0.35056721354125436, "eval_runtime": 2.4502, "eval_samples_per_second": 90.197, "eval_steps_per_second": 1.633, "step": 22 }, { "epoch": 3.2857142857142856, "grad_norm": 120.5, "learning_rate": 3.455882352941177e-05, "loss": 0.8555, "step": 23 }, { "epoch": 3.2857142857142856, "eval_loss": 1.412217140197754, "eval_matthews_correlation": 0.3570256366956951, "eval_runtime": 2.4452, "eval_samples_per_second": 90.382, "eval_steps_per_second": 1.636, "step": 23 }, { "epoch": 3.4285714285714284, "grad_norm": 91.5, "learning_rate": 3.382352941176471e-05, "loss": 0.7243, "step": 24 }, { "epoch": 3.4285714285714284, "eval_loss": 1.155587077140808, "eval_matthews_correlation": 0.4394341621715417, "eval_runtime": 2.45, "eval_samples_per_second": 90.205, "eval_steps_per_second": 1.633, "step": 24 }, { "epoch": 3.571428571428571, "grad_norm": 57.0, "learning_rate": 3.308823529411765e-05, "loss": 0.3214, "step": 25 }, { "epoch": 3.571428571428571, "eval_loss": 0.9137119054794312, "eval_matthews_correlation": 0.5300936664086263, "eval_runtime": 2.4473, "eval_samples_per_second": 90.304, "eval_steps_per_second": 1.634, "step": 25 }, { "epoch": 3.7142857142857144, "grad_norm": 40.0, "learning_rate": 3.235294117647059e-05, "loss": 0.3758, "step": 26 }, { "epoch": 3.7142857142857144, "eval_loss": 0.9561758041381836, "eval_matthews_correlation": 0.4651714656882337, "eval_runtime": 2.4463, "eval_samples_per_second": 90.34, "eval_steps_per_second": 1.635, "step": 26 }, { "epoch": 3.857142857142857, "grad_norm": 21.75, "learning_rate": 3.161764705882353e-05, "loss": 0.2258, "step": 27 }, { "epoch": 3.857142857142857, "eval_loss": 1.128272533416748, "eval_matthews_correlation": 0.4814890197334374, "eval_runtime": 2.4491, "eval_samples_per_second": 90.237, "eval_steps_per_second": 1.633, "step": 27 }, { "epoch": 4.0, "grad_norm": 69.0, "learning_rate": 3.0882352941176475e-05, "loss": 0.3708, "step": 28 }, { "epoch": 4.0, "eval_loss": 1.177422285079956, "eval_matthews_correlation": 0.4998140485129056, "eval_runtime": 2.4044, "eval_samples_per_second": 91.916, "eval_steps_per_second": 1.664, "step": 28 }, { "epoch": 4.142857142857143, "grad_norm": 67.0, "learning_rate": 3.0147058823529413e-05, "loss": 0.3731, "step": 29 }, { "epoch": 4.142857142857143, "eval_loss": 1.086193323135376, "eval_matthews_correlation": 0.4918366209765684, "eval_runtime": 2.4518, "eval_samples_per_second": 90.139, "eval_steps_per_second": 1.631, "step": 29 }, { "epoch": 4.285714285714286, "grad_norm": 101.5, "learning_rate": 2.9411764705882354e-05, "loss": 0.4808, "step": 30 }, { "epoch": 4.285714285714286, "eval_loss": 0.9441786408424377, "eval_matthews_correlation": 0.46521158567025794, "eval_runtime": 2.4478, "eval_samples_per_second": 90.284, "eval_steps_per_second": 1.634, "step": 30 }, { "epoch": 4.428571428571429, "grad_norm": 55.25, "learning_rate": 2.8676470588235295e-05, "loss": 0.2656, "step": 31 }, { "epoch": 4.428571428571429, "eval_loss": 0.8748857975006104, "eval_matthews_correlation": 0.4648371224980464, "eval_runtime": 2.4437, "eval_samples_per_second": 90.435, "eval_steps_per_second": 1.637, "step": 31 }, { "epoch": 4.571428571428571, "grad_norm": 11.6875, "learning_rate": 2.7941176470588236e-05, "loss": 0.2129, "step": 32 }, { "epoch": 4.571428571428571, "eval_loss": 0.9578605890274048, "eval_matthews_correlation": 0.48306400916503295, "eval_runtime": 2.4414, "eval_samples_per_second": 90.523, "eval_steps_per_second": 1.638, "step": 32 }, { "epoch": 4.714285714285714, "grad_norm": 37.75, "learning_rate": 2.7205882352941174e-05, "loss": 0.1662, "step": 33 }, { "epoch": 4.714285714285714, "eval_loss": 1.0285699367523193, "eval_matthews_correlation": 0.45099444835506275, "eval_runtime": 2.4439, "eval_samples_per_second": 90.428, "eval_steps_per_second": 1.637, "step": 33 }, { "epoch": 4.857142857142857, "grad_norm": 72.0, "learning_rate": 2.647058823529412e-05, "loss": 0.3764, "step": 34 }, { "epoch": 4.857142857142857, "eval_loss": 1.0230013132095337, "eval_matthews_correlation": 0.44663306131570457, "eval_runtime": 2.4405, "eval_samples_per_second": 90.554, "eval_steps_per_second": 1.639, "step": 34 }, { "epoch": 5.0, "grad_norm": 52.0, "learning_rate": 2.5735294117647057e-05, "loss": 0.2526, "step": 35 }, { "epoch": 5.0, "eval_loss": 0.9536852836608887, "eval_matthews_correlation": 0.464431647717182, "eval_runtime": 2.4485, "eval_samples_per_second": 90.259, "eval_steps_per_second": 1.634, "step": 35 }, { "epoch": 5.142857142857143, "grad_norm": 29.625, "learning_rate": 2.5e-05, "loss": 0.1377, "step": 36 }, { "epoch": 5.142857142857143, "eval_loss": 0.9085801243782043, "eval_matthews_correlation": 0.4468374029095797, "eval_runtime": 2.4414, "eval_samples_per_second": 90.521, "eval_steps_per_second": 1.638, "step": 36 }, { "epoch": 5.285714285714286, "grad_norm": 7.9375, "learning_rate": 2.4264705882352942e-05, "loss": 0.0878, "step": 37 }, { "epoch": 5.285714285714286, "eval_loss": 0.9484843015670776, "eval_matthews_correlation": 0.4447429282606205, "eval_runtime": 2.4472, "eval_samples_per_second": 90.308, "eval_steps_per_second": 1.635, "step": 37 }, { "epoch": 5.428571428571429, "grad_norm": 35.75, "learning_rate": 2.3529411764705884e-05, "loss": 0.1931, "step": 38 }, { "epoch": 5.428571428571429, "eval_loss": 0.9881438612937927, "eval_matthews_correlation": 0.45462446923262295, "eval_runtime": 2.4467, "eval_samples_per_second": 90.327, "eval_steps_per_second": 1.635, "step": 38 }, { "epoch": 5.571428571428571, "grad_norm": 30.5, "learning_rate": 2.2794117647058825e-05, "loss": 0.2088, "step": 39 }, { "epoch": 5.571428571428571, "eval_loss": 0.9699224829673767, "eval_matthews_correlation": 0.46521158567025794, "eval_runtime": 2.4373, "eval_samples_per_second": 90.673, "eval_steps_per_second": 1.641, "step": 39 }, { "epoch": 5.714285714285714, "grad_norm": 18.125, "learning_rate": 2.2058823529411766e-05, "loss": 0.0894, "step": 40 }, { "epoch": 5.714285714285714, "eval_loss": 0.9157021045684814, "eval_matthews_correlation": 0.4757491334290412, "eval_runtime": 2.4471, "eval_samples_per_second": 90.312, "eval_steps_per_second": 1.635, "step": 40 }, { "epoch": 5.857142857142857, "grad_norm": 13.75, "learning_rate": 2.1323529411764707e-05, "loss": 0.0847, "step": 41 }, { "epoch": 5.857142857142857, "eval_loss": 0.875838041305542, "eval_matthews_correlation": 0.4535865464379655, "eval_runtime": 2.4434, "eval_samples_per_second": 90.449, "eval_steps_per_second": 1.637, "step": 41 }, { "epoch": 6.0, "grad_norm": 7.78125, "learning_rate": 2.058823529411765e-05, "loss": 0.0814, "step": 42 }, { "epoch": 6.0, "eval_loss": 0.8775647282600403, "eval_matthews_correlation": 0.5032327641012869, "eval_runtime": 2.4462, "eval_samples_per_second": 90.343, "eval_steps_per_second": 1.635, "step": 42 }, { "epoch": 6.142857142857143, "grad_norm": 11.1875, "learning_rate": 1.9852941176470586e-05, "loss": 0.0322, "step": 43 }, { "epoch": 6.142857142857143, "eval_loss": 0.8905591368675232, "eval_matthews_correlation": 0.4985778131509898, "eval_runtime": 2.4501, "eval_samples_per_second": 90.201, "eval_steps_per_second": 1.633, "step": 43 }, { "epoch": 6.285714285714286, "grad_norm": 18.875, "learning_rate": 1.9117647058823528e-05, "loss": 0.0929, "step": 44 }, { "epoch": 6.285714285714286, "eval_loss": 0.8745710849761963, "eval_matthews_correlation": 0.5063592272792496, "eval_runtime": 2.4431, "eval_samples_per_second": 90.459, "eval_steps_per_second": 1.637, "step": 44 }, { "epoch": 6.428571428571429, "grad_norm": 23.375, "learning_rate": 1.8382352941176472e-05, "loss": 0.092, "step": 45 }, { "epoch": 6.428571428571429, "eval_loss": 0.8459537625312805, "eval_matthews_correlation": 0.5221955837331487, "eval_runtime": 2.4409, "eval_samples_per_second": 90.541, "eval_steps_per_second": 1.639, "step": 45 }, { "epoch": 6.571428571428571, "grad_norm": 14.1875, "learning_rate": 1.7647058823529414e-05, "loss": 0.0364, "step": 46 }, { "epoch": 6.571428571428571, "eval_loss": 0.8510618805885315, "eval_matthews_correlation": 0.471552805496144, "eval_runtime": 2.4458, "eval_samples_per_second": 90.361, "eval_steps_per_second": 1.635, "step": 46 }, { "epoch": 6.714285714285714, "grad_norm": 7.65625, "learning_rate": 1.6911764705882355e-05, "loss": 0.0793, "step": 47 }, { "epoch": 6.714285714285714, "eval_loss": 0.8684913516044617, "eval_matthews_correlation": 0.4581214898872277, "eval_runtime": 2.4434, "eval_samples_per_second": 90.447, "eval_steps_per_second": 1.637, "step": 47 }, { "epoch": 6.857142857142857, "grad_norm": 16.875, "learning_rate": 1.6176470588235296e-05, "loss": 0.1175, "step": 48 }, { "epoch": 6.857142857142857, "eval_loss": 0.8669148683547974, "eval_matthews_correlation": 0.4594112493758089, "eval_runtime": 2.452, "eval_samples_per_second": 90.13, "eval_steps_per_second": 1.631, "step": 48 }, { "epoch": 7.0, "grad_norm": 12.9375, "learning_rate": 1.5441176470588237e-05, "loss": 0.0645, "step": 49 }, { "epoch": 7.0, "eval_loss": 0.8497275710105896, "eval_matthews_correlation": 0.471552805496144, "eval_runtime": 2.4455, "eval_samples_per_second": 90.372, "eval_steps_per_second": 1.636, "step": 49 }, { "epoch": 7.142857142857143, "grad_norm": 1.5625, "learning_rate": 1.4705882352941177e-05, "loss": 0.0144, "step": 50 }, { "epoch": 7.142857142857143, "eval_loss": 0.8556525707244873, "eval_matthews_correlation": 0.48388603901876864, "eval_runtime": 2.4464, "eval_samples_per_second": 90.335, "eval_steps_per_second": 1.635, "step": 50 }, { "epoch": 7.285714285714286, "grad_norm": 4.1875, "learning_rate": 1.3970588235294118e-05, "loss": 0.0368, "step": 51 }, { "epoch": 7.285714285714286, "eval_loss": 0.8561367988586426, "eval_matthews_correlation": 0.498477763912085, "eval_runtime": 2.4469, "eval_samples_per_second": 90.318, "eval_steps_per_second": 1.635, "step": 51 }, { "epoch": 7.428571428571429, "grad_norm": 5.15625, "learning_rate": 1.323529411764706e-05, "loss": 0.0434, "step": 52 }, { "epoch": 7.428571428571429, "eval_loss": 0.8746027946472168, "eval_matthews_correlation": 0.49478720037093177, "eval_runtime": 2.4474, "eval_samples_per_second": 90.301, "eval_steps_per_second": 1.634, "step": 52 }, { "epoch": 7.571428571428571, "grad_norm": 8.9375, "learning_rate": 1.25e-05, "loss": 0.0601, "step": 53 }, { "epoch": 7.571428571428571, "eval_loss": 0.8838711977005005, "eval_matthews_correlation": 0.4974465105449455, "eval_runtime": 2.4486, "eval_samples_per_second": 90.257, "eval_steps_per_second": 1.634, "step": 53 }, { "epoch": 7.714285714285714, "grad_norm": 9.375, "learning_rate": 1.1764705882352942e-05, "loss": 0.0495, "step": 54 }, { "epoch": 7.714285714285714, "eval_loss": 0.9004490375518799, "eval_matthews_correlation": 0.4974465105449455, "eval_runtime": 2.4468, "eval_samples_per_second": 90.322, "eval_steps_per_second": 1.635, "step": 54 }, { "epoch": 7.857142857142857, "grad_norm": 10.375, "learning_rate": 1.1029411764705883e-05, "loss": 0.038, "step": 55 }, { "epoch": 7.857142857142857, "eval_loss": 0.9040365219116211, "eval_matthews_correlation": 0.4922849337838682, "eval_runtime": 2.4021, "eval_samples_per_second": 92.005, "eval_steps_per_second": 1.665, "step": 55 }, { "epoch": 8.0, "grad_norm": 5.46875, "learning_rate": 1.0294117647058824e-05, "loss": 0.0201, "step": 56 }, { "epoch": 8.0, "eval_loss": 0.9134412407875061, "eval_matthews_correlation": 0.45553170828567224, "eval_runtime": 2.4448, "eval_samples_per_second": 90.396, "eval_steps_per_second": 1.636, "step": 56 }, { "epoch": 8.142857142857142, "grad_norm": 5.59375, "learning_rate": 9.558823529411764e-06, "loss": 0.0205, "step": 57 }, { "epoch": 8.142857142857142, "eval_loss": 0.927843451499939, "eval_matthews_correlation": 0.47656224625627874, "eval_runtime": 2.3994, "eval_samples_per_second": 92.108, "eval_steps_per_second": 1.667, "step": 57 }, { "epoch": 8.285714285714286, "grad_norm": 4.0, "learning_rate": 8.823529411764707e-06, "loss": 0.0161, "step": 58 }, { "epoch": 8.285714285714286, "eval_loss": 0.9634011387825012, "eval_matthews_correlation": 0.496768767764674, "eval_runtime": 2.4014, "eval_samples_per_second": 92.028, "eval_steps_per_second": 1.666, "step": 58 }, { "epoch": 8.428571428571429, "grad_norm": 12.9375, "learning_rate": 8.088235294117648e-06, "loss": 0.0345, "step": 59 }, { "epoch": 8.428571428571429, "eval_loss": 0.9694340825080872, "eval_matthews_correlation": 0.4972077082664266, "eval_runtime": 2.4455, "eval_samples_per_second": 90.37, "eval_steps_per_second": 1.636, "step": 59 }, { "epoch": 8.571428571428571, "grad_norm": 4.84375, "learning_rate": 7.3529411764705884e-06, "loss": 0.014, "step": 60 }, { "epoch": 8.571428571428571, "eval_loss": 0.9751154184341431, "eval_matthews_correlation": 0.4972077082664266, "eval_runtime": 2.3952, "eval_samples_per_second": 92.267, "eval_steps_per_second": 1.67, "step": 60 }, { "epoch": 8.714285714285714, "grad_norm": 15.6875, "learning_rate": 6.61764705882353e-06, "loss": 0.038, "step": 61 }, { "epoch": 8.714285714285714, "eval_loss": 0.9702379107475281, "eval_matthews_correlation": 0.5075469372121825, "eval_runtime": 2.3957, "eval_samples_per_second": 92.25, "eval_steps_per_second": 1.67, "step": 61 }, { "epoch": 8.857142857142858, "grad_norm": 7.78125, "learning_rate": 5.882352941176471e-06, "loss": 0.0316, "step": 62 }, { "epoch": 8.857142857142858, "eval_loss": 0.9534982442855835, "eval_matthews_correlation": 0.517455027315522, "eval_runtime": 2.3965, "eval_samples_per_second": 92.218, "eval_steps_per_second": 1.669, "step": 62 }, { "epoch": 9.0, "grad_norm": 10.4375, "learning_rate": 5.147058823529412e-06, "loss": 0.0253, "step": 63 }, { "epoch": 9.0, "eval_loss": 0.9324150681495667, "eval_matthews_correlation": 0.4687627817830735, "eval_runtime": 2.4459, "eval_samples_per_second": 90.356, "eval_steps_per_second": 1.635, "step": 63 }, { "epoch": 9.142857142857142, "grad_norm": 2.6875, "learning_rate": 4.411764705882353e-06, "loss": 0.0231, "step": 64 }, { "epoch": 9.142857142857142, "eval_loss": 0.9279094934463501, "eval_matthews_correlation": 0.45553170828567224, "eval_runtime": 2.4506, "eval_samples_per_second": 90.182, "eval_steps_per_second": 1.632, "step": 64 }, { "epoch": 9.285714285714286, "grad_norm": 1.40625, "learning_rate": 3.6764705882352942e-06, "loss": 0.0075, "step": 65 }, { "epoch": 9.285714285714286, "eval_loss": 0.9252597689628601, "eval_matthews_correlation": 0.45144276042785747, "eval_runtime": 2.4513, "eval_samples_per_second": 90.155, "eval_steps_per_second": 1.632, "step": 65 }, { "epoch": 9.428571428571429, "grad_norm": 2.515625, "learning_rate": 2.9411764705882355e-06, "loss": 0.0108, "step": 66 }, { "epoch": 9.428571428571429, "eval_loss": 0.9272325038909912, "eval_matthews_correlation": 0.47312414294796906, "eval_runtime": 2.4484, "eval_samples_per_second": 90.261, "eval_steps_per_second": 1.634, "step": 66 }, { "epoch": 9.571428571428571, "grad_norm": 1.21875, "learning_rate": 2.2058823529411767e-06, "loss": 0.0045, "step": 67 }, { "epoch": 9.571428571428571, "eval_loss": 0.9301801919937134, "eval_matthews_correlation": 0.4865760231907581, "eval_runtime": 2.4029, "eval_samples_per_second": 91.971, "eval_steps_per_second": 1.665, "step": 67 }, { "epoch": 9.714285714285714, "grad_norm": 3.9375, "learning_rate": 1.4705882352941177e-06, "loss": 0.0132, "step": 68 }, { "epoch": 9.714285714285714, "eval_loss": 0.9324151873588562, "eval_matthews_correlation": 0.4865760231907581, "eval_runtime": 2.4466, "eval_samples_per_second": 90.33, "eval_steps_per_second": 1.635, "step": 68 }, { "epoch": 9.857142857142858, "grad_norm": 5.09375, "learning_rate": 7.352941176470589e-07, "loss": 0.0207, "step": 69 }, { "epoch": 9.857142857142858, "eval_loss": 0.9318564534187317, "eval_matthews_correlation": 0.4865760231907581, "eval_runtime": 2.4458, "eval_samples_per_second": 90.357, "eval_steps_per_second": 1.635, "step": 69 }, { "epoch": 10.0, "grad_norm": 2.40625, "learning_rate": 0.0, "loss": 0.009, "step": 70 }, { "epoch": 10.0, "eval_loss": 0.9318756461143494, "eval_matthews_correlation": 0.4865760231907581, "eval_runtime": 2.4456, "eval_samples_per_second": 90.368, "eval_steps_per_second": 1.636, "step": 70 }, { "epoch": 10.0, "step": 70, "total_flos": 3.960220259503309e+16, "train_loss": 0.5831886287379477, "train_runtime": 467.92, "train_samples_per_second": 18.871, "train_steps_per_second": 0.15 } ], "logging_steps": 1, "max_steps": 70, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.960220259503309e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }