|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.974958263772955, |
|
"eval_steps": 10, |
|
"global_step": 745, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3e-05, |
|
"loss": 2.3217, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.4901315789473684, |
|
"eval_loss": 0.7262606620788574, |
|
"eval_runtime": 25.3718, |
|
"eval_samples_per_second": 11.982, |
|
"eval_steps_per_second": 2.995, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3e-05, |
|
"loss": 0.56, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.5526315789473685, |
|
"eval_loss": 0.6898001432418823, |
|
"eval_runtime": 25.3761, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 2.995, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5281, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.5888157894736842, |
|
"eval_loss": 0.6465093493461609, |
|
"eval_runtime": 25.3947, |
|
"eval_samples_per_second": 11.971, |
|
"eval_steps_per_second": 2.993, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.994, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5986842105263158, |
|
"eval_loss": 0.7351367473602295, |
|
"eval_runtime": 25.4621, |
|
"eval_samples_per_second": 11.939, |
|
"eval_steps_per_second": 2.985, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4785, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.6118421052631579, |
|
"eval_loss": 0.6004362106323242, |
|
"eval_runtime": 25.4138, |
|
"eval_samples_per_second": 11.962, |
|
"eval_steps_per_second": 2.991, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4732, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.6348684210526315, |
|
"eval_loss": 0.5782976746559143, |
|
"eval_runtime": 25.369, |
|
"eval_samples_per_second": 11.983, |
|
"eval_steps_per_second": 2.996, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4466, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.6414473684210527, |
|
"eval_loss": 0.5713546872138977, |
|
"eval_runtime": 25.3858, |
|
"eval_samples_per_second": 11.975, |
|
"eval_steps_per_second": 2.994, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 3e-05, |
|
"loss": 0.8737, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.618421052631579, |
|
"eval_loss": 0.567269504070282, |
|
"eval_runtime": 25.3639, |
|
"eval_samples_per_second": 11.986, |
|
"eval_steps_per_second": 2.996, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4471, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.6282894736842105, |
|
"eval_loss": 0.5630530118942261, |
|
"eval_runtime": 25.3754, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 2.995, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.46, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.6348684210526315, |
|
"eval_loss": 0.5503756999969482, |
|
"eval_runtime": 25.3686, |
|
"eval_samples_per_second": 11.983, |
|
"eval_steps_per_second": 2.996, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3294, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.625, |
|
"eval_loss": 0.6009898781776428, |
|
"eval_runtime": 25.3647, |
|
"eval_samples_per_second": 11.985, |
|
"eval_steps_per_second": 2.996, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6526, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.6282894736842105, |
|
"eval_loss": 0.5730816721916199, |
|
"eval_runtime": 25.3832, |
|
"eval_samples_per_second": 11.976, |
|
"eval_steps_per_second": 2.994, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3712, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.6447368421052632, |
|
"eval_loss": 0.5378755331039429, |
|
"eval_runtime": 25.3825, |
|
"eval_samples_per_second": 11.977, |
|
"eval_steps_per_second": 2.994, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3341, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.6282894736842105, |
|
"eval_loss": 0.5408769249916077, |
|
"eval_runtime": 25.3787, |
|
"eval_samples_per_second": 11.979, |
|
"eval_steps_per_second": 2.995, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.552, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6381578947368421, |
|
"eval_loss": 0.5310616493225098, |
|
"eval_runtime": 25.4512, |
|
"eval_samples_per_second": 11.944, |
|
"eval_steps_per_second": 2.986, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4681, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.6414473684210527, |
|
"eval_loss": 0.5371212959289551, |
|
"eval_runtime": 25.3633, |
|
"eval_samples_per_second": 11.986, |
|
"eval_steps_per_second": 2.996, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3119, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.6282894736842105, |
|
"eval_loss": 0.6171860694885254, |
|
"eval_runtime": 25.3622, |
|
"eval_samples_per_second": 11.986, |
|
"eval_steps_per_second": 2.997, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3082, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.6513157894736842, |
|
"eval_loss": 0.5360857844352722, |
|
"eval_runtime": 25.3868, |
|
"eval_samples_per_second": 11.975, |
|
"eval_steps_per_second": 2.994, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5217, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_accuracy": 0.625, |
|
"eval_loss": 0.5467653870582581, |
|
"eval_runtime": 25.3846, |
|
"eval_samples_per_second": 11.976, |
|
"eval_steps_per_second": 2.994, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3888, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.631578947368421, |
|
"eval_loss": 0.5891463756561279, |
|
"eval_runtime": 25.374, |
|
"eval_samples_per_second": 11.981, |
|
"eval_steps_per_second": 2.995, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2841, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.6282894736842105, |
|
"eval_loss": 0.5428625345230103, |
|
"eval_runtime": 25.3734, |
|
"eval_samples_per_second": 11.981, |
|
"eval_steps_per_second": 2.995, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2728, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.6381578947368421, |
|
"eval_loss": 0.5246651768684387, |
|
"eval_runtime": 25.3858, |
|
"eval_samples_per_second": 11.975, |
|
"eval_steps_per_second": 2.994, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5563, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_accuracy": 0.6513157894736842, |
|
"eval_loss": 0.5003584027290344, |
|
"eval_runtime": 25.3763, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 2.995, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2862, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.6546052631578947, |
|
"eval_loss": 0.4741169810295105, |
|
"eval_runtime": 25.3852, |
|
"eval_samples_per_second": 11.975, |
|
"eval_steps_per_second": 2.994, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2289, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.6513157894736842, |
|
"eval_loss": 0.5441343188285828, |
|
"eval_runtime": 25.3897, |
|
"eval_samples_per_second": 11.973, |
|
"eval_steps_per_second": 2.993, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2481, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.6513157894736842, |
|
"eval_loss": 0.5170696377754211, |
|
"eval_runtime": 25.3747, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 2.995, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.329, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.6546052631578947, |
|
"eval_loss": 0.5371391177177429, |
|
"eval_runtime": 25.3708, |
|
"eval_samples_per_second": 11.982, |
|
"eval_steps_per_second": 2.996, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1741, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.6677631578947368, |
|
"eval_loss": 0.5411613583564758, |
|
"eval_runtime": 25.3792, |
|
"eval_samples_per_second": 11.978, |
|
"eval_steps_per_second": 2.995, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2888, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.6710526315789473, |
|
"eval_loss": 0.5130823850631714, |
|
"eval_runtime": 25.3665, |
|
"eval_samples_per_second": 11.984, |
|
"eval_steps_per_second": 2.996, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4157, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6447368421052632, |
|
"eval_loss": 0.45547178387641907, |
|
"eval_runtime": 25.356, |
|
"eval_samples_per_second": 11.989, |
|
"eval_steps_per_second": 2.997, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1982, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.6611842105263158, |
|
"eval_loss": 0.5670450329780579, |
|
"eval_runtime": 25.3778, |
|
"eval_samples_per_second": 11.979, |
|
"eval_steps_per_second": 2.995, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3e-05, |
|
"loss": 0.106, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.6677631578947368, |
|
"eval_loss": 0.7942893505096436, |
|
"eval_runtime": 25.3984, |
|
"eval_samples_per_second": 11.969, |
|
"eval_steps_per_second": 2.992, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1718, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.6644736842105263, |
|
"eval_loss": 0.7496399879455566, |
|
"eval_runtime": 25.3753, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 2.995, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.214, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_accuracy": 0.6842105263157895, |
|
"eval_loss": 0.626396656036377, |
|
"eval_runtime": 25.3727, |
|
"eval_samples_per_second": 11.981, |
|
"eval_steps_per_second": 2.995, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1571, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.631578947368421, |
|
"eval_loss": 0.6138848066329956, |
|
"eval_runtime": 25.3708, |
|
"eval_samples_per_second": 11.982, |
|
"eval_steps_per_second": 2.996, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1432, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.6842105263157895, |
|
"eval_loss": 0.6198970675468445, |
|
"eval_runtime": 25.3819, |
|
"eval_samples_per_second": 11.977, |
|
"eval_steps_per_second": 2.994, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1038, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_accuracy": 0.6973684210526315, |
|
"eval_loss": 0.636822521686554, |
|
"eval_runtime": 25.4286, |
|
"eval_samples_per_second": 11.955, |
|
"eval_steps_per_second": 2.989, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1728, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_accuracy": 0.6677631578947368, |
|
"eval_loss": 0.7889474630355835, |
|
"eval_runtime": 25.3888, |
|
"eval_samples_per_second": 11.974, |
|
"eval_steps_per_second": 2.993, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3e-05, |
|
"loss": 0.14, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_accuracy": 0.6546052631578947, |
|
"eval_loss": 0.795179545879364, |
|
"eval_runtime": 25.3862, |
|
"eval_samples_per_second": 11.975, |
|
"eval_steps_per_second": 2.994, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1522, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_accuracy": 0.6578947368421053, |
|
"eval_loss": 0.7745038866996765, |
|
"eval_runtime": 25.3703, |
|
"eval_samples_per_second": 11.983, |
|
"eval_steps_per_second": 2.996, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1345, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_accuracy": 0.6513157894736842, |
|
"eval_loss": 0.7230806946754456, |
|
"eval_runtime": 25.3678, |
|
"eval_samples_per_second": 11.984, |
|
"eval_steps_per_second": 2.996, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1587, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.6480263157894737, |
|
"eval_loss": 0.7153956890106201, |
|
"eval_runtime": 25.3506, |
|
"eval_samples_per_second": 11.992, |
|
"eval_steps_per_second": 2.998, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1391, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_accuracy": 0.6513157894736842, |
|
"eval_loss": 0.6923157572746277, |
|
"eval_runtime": 25.3652, |
|
"eval_samples_per_second": 11.985, |
|
"eval_steps_per_second": 2.996, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3e-05, |
|
"loss": 0.129, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_accuracy": 0.6710526315789473, |
|
"eval_loss": 0.6483842730522156, |
|
"eval_runtime": 25.3639, |
|
"eval_samples_per_second": 11.986, |
|
"eval_steps_per_second": 2.996, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2092, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_accuracy": 0.6743421052631579, |
|
"eval_loss": 0.5821840167045593, |
|
"eval_runtime": 25.3814, |
|
"eval_samples_per_second": 11.977, |
|
"eval_steps_per_second": 2.994, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.015, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_accuracy": 0.6578947368421053, |
|
"eval_loss": 1.1217145919799805, |
|
"eval_runtime": 25.3698, |
|
"eval_samples_per_second": 11.983, |
|
"eval_steps_per_second": 2.996, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3e-05, |
|
"loss": 0.051, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_accuracy": 0.6480263157894737, |
|
"eval_loss": 1.5789867639541626, |
|
"eval_runtime": 25.3704, |
|
"eval_samples_per_second": 11.982, |
|
"eval_steps_per_second": 2.996, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0999, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.6677631578947368, |
|
"eval_loss": 1.5168237686157227, |
|
"eval_runtime": 25.3794, |
|
"eval_samples_per_second": 11.978, |
|
"eval_steps_per_second": 2.995, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1776, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_accuracy": 0.6875, |
|
"eval_loss": 1.2342281341552734, |
|
"eval_runtime": 25.3944, |
|
"eval_samples_per_second": 11.971, |
|
"eval_steps_per_second": 2.993, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0612, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_accuracy": 0.6973684210526315, |
|
"eval_loss": 1.0370548963546753, |
|
"eval_runtime": 25.385, |
|
"eval_samples_per_second": 11.976, |
|
"eval_steps_per_second": 2.994, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0858, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_accuracy": 0.6776315789473685, |
|
"eval_loss": 1.0277096033096313, |
|
"eval_runtime": 25.386, |
|
"eval_samples_per_second": 11.975, |
|
"eval_steps_per_second": 2.994, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0316, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_accuracy": 0.680921052631579, |
|
"eval_loss": 1.0386649370193481, |
|
"eval_runtime": 25.3706, |
|
"eval_samples_per_second": 11.982, |
|
"eval_steps_per_second": 2.996, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1899, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_accuracy": 0.6907894736842105, |
|
"eval_loss": 0.8184946775436401, |
|
"eval_runtime": 25.3771, |
|
"eval_samples_per_second": 11.979, |
|
"eval_steps_per_second": 2.995, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1517, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_accuracy": 0.6842105263157895, |
|
"eval_loss": 0.7053664922714233, |
|
"eval_runtime": 25.3758, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 2.995, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0324, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_accuracy": 0.6842105263157895, |
|
"eval_loss": 0.8504552245140076, |
|
"eval_runtime": 25.3714, |
|
"eval_samples_per_second": 11.982, |
|
"eval_steps_per_second": 2.995, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0646, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_accuracy": 0.6611842105263158, |
|
"eval_loss": 1.0056827068328857, |
|
"eval_runtime": 25.3614, |
|
"eval_samples_per_second": 11.987, |
|
"eval_steps_per_second": 2.997, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1038, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_accuracy": 0.6644736842105263, |
|
"eval_loss": 1.0026819705963135, |
|
"eval_runtime": 25.376, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 2.995, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0844, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_accuracy": 0.6513157894736842, |
|
"eval_loss": 0.9926165342330933, |
|
"eval_runtime": 25.3651, |
|
"eval_samples_per_second": 11.985, |
|
"eval_steps_per_second": 2.996, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0986, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.6578947368421053, |
|
"eval_loss": 0.9245979189872742, |
|
"eval_runtime": 25.3574, |
|
"eval_samples_per_second": 11.989, |
|
"eval_steps_per_second": 2.997, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0627, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_accuracy": 0.6546052631578947, |
|
"eval_loss": 0.8538947105407715, |
|
"eval_runtime": 25.3724, |
|
"eval_samples_per_second": 11.982, |
|
"eval_steps_per_second": 2.995, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0513, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_accuracy": 0.6513157894736842, |
|
"eval_loss": 0.924721896648407, |
|
"eval_runtime": 25.4077, |
|
"eval_samples_per_second": 11.965, |
|
"eval_steps_per_second": 2.991, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0484, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_accuracy": 0.6546052631578947, |
|
"eval_loss": 1.112806797027588, |
|
"eval_runtime": 25.3865, |
|
"eval_samples_per_second": 11.975, |
|
"eval_steps_per_second": 2.994, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0244, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_accuracy": 0.6480263157894737, |
|
"eval_loss": 1.2701855897903442, |
|
"eval_runtime": 25.3744, |
|
"eval_samples_per_second": 11.981, |
|
"eval_steps_per_second": 2.995, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0672, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_accuracy": 0.6414473684210527, |
|
"eval_loss": 1.716863751411438, |
|
"eval_runtime": 25.3891, |
|
"eval_samples_per_second": 11.974, |
|
"eval_steps_per_second": 2.993, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0824, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"eval_accuracy": 0.6414473684210527, |
|
"eval_loss": 1.662705659866333, |
|
"eval_runtime": 25.3803, |
|
"eval_samples_per_second": 11.978, |
|
"eval_steps_per_second": 2.994, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0068, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_accuracy": 0.6348684210526315, |
|
"eval_loss": 1.342494010925293, |
|
"eval_runtime": 25.376, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 2.995, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 3e-05, |
|
"loss": 0.044, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_accuracy": 0.6611842105263158, |
|
"eval_loss": 1.2208458185195923, |
|
"eval_runtime": 25.3753, |
|
"eval_samples_per_second": 11.98, |
|
"eval_steps_per_second": 2.995, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0378, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_accuracy": 0.6447368421052632, |
|
"eval_loss": 1.289115309715271, |
|
"eval_runtime": 25.3905, |
|
"eval_samples_per_second": 11.973, |
|
"eval_steps_per_second": 2.993, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0411, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_accuracy": 0.6611842105263158, |
|
"eval_loss": 1.3528344631195068, |
|
"eval_runtime": 25.3678, |
|
"eval_samples_per_second": 11.984, |
|
"eval_steps_per_second": 2.996, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0215, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_accuracy": 0.6677631578947368, |
|
"eval_loss": 1.2606314420700073, |
|
"eval_runtime": 25.3852, |
|
"eval_samples_per_second": 11.975, |
|
"eval_steps_per_second": 2.994, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0438, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_accuracy": 0.6546052631578947, |
|
"eval_loss": 1.2514981031417847, |
|
"eval_runtime": 25.3679, |
|
"eval_samples_per_second": 11.984, |
|
"eval_steps_per_second": 2.996, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0936, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_accuracy": 0.6644736842105263, |
|
"eval_loss": 1.0857858657836914, |
|
"eval_runtime": 25.3812, |
|
"eval_samples_per_second": 11.977, |
|
"eval_steps_per_second": 2.994, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0305, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_accuracy": 0.6578947368421053, |
|
"eval_loss": 0.9838737845420837, |
|
"eval_runtime": 25.3673, |
|
"eval_samples_per_second": 11.984, |
|
"eval_steps_per_second": 2.996, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0282, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_accuracy": 0.680921052631579, |
|
"eval_loss": 1.0233386754989624, |
|
"eval_runtime": 25.3685, |
|
"eval_samples_per_second": 11.983, |
|
"eval_steps_per_second": 2.996, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"step": 745, |
|
"total_flos": 2.959225892752589e+17, |
|
"train_loss": 0.25739487704614666, |
|
"train_runtime": 4901.8806, |
|
"train_samples_per_second": 2.444, |
|
"train_steps_per_second": 0.152 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 745, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 150, |
|
"total_flos": 2.959225892752589e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|