|
{ |
|
"best_metric": 0.6427481174468994, |
|
"best_model_checkpoint": "./models/adapters_mlm_cn/mt/checkpoint-40000", |
|
"epoch": 82.81573498964804, |
|
"eval_steps": 500, |
|
"global_step": 40000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 2.6689, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_accuracy": 0.6676783004552352, |
|
"eval_loss": 2.1344573497772217, |
|
"eval_runtime": 1.6941, |
|
"eval_samples_per_second": 506.449, |
|
"eval_steps_per_second": 31.874, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.9e-05, |
|
"loss": 2.1415, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.6926470588235294, |
|
"eval_loss": 1.8435733318328857, |
|
"eval_runtime": 1.6896, |
|
"eval_samples_per_second": 507.824, |
|
"eval_steps_per_second": 31.961, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 4.85e-05, |
|
"loss": 1.9421, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_accuracy": 0.690674753601213, |
|
"eval_loss": 1.787391185760498, |
|
"eval_runtime": 1.6956, |
|
"eval_samples_per_second": 506.016, |
|
"eval_steps_per_second": 31.847, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.7588, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_accuracy": 0.7012509197939661, |
|
"eval_loss": 1.760498285293579, |
|
"eval_runtime": 1.6894, |
|
"eval_samples_per_second": 507.882, |
|
"eval_steps_per_second": 31.965, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 4.75e-05, |
|
"loss": 1.6729, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"eval_accuracy": 0.6956845238095238, |
|
"eval_loss": 1.7567747831344604, |
|
"eval_runtime": 1.6937, |
|
"eval_samples_per_second": 506.597, |
|
"eval_steps_per_second": 31.884, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.596, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5006115436553955, |
|
"eval_runtime": 1.6969, |
|
"eval_samples_per_second": 505.627, |
|
"eval_steps_per_second": 31.823, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 1.5778, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_accuracy": 0.7450832072617246, |
|
"eval_loss": 1.3923866748809814, |
|
"eval_runtime": 1.6898, |
|
"eval_samples_per_second": 507.75, |
|
"eval_steps_per_second": 31.956, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.4821, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"eval_accuracy": 0.7099236641221374, |
|
"eval_loss": 1.609680414199829, |
|
"eval_runtime": 1.6898, |
|
"eval_samples_per_second": 507.758, |
|
"eval_steps_per_second": 31.957, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 4.55e-05, |
|
"loss": 1.4183, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"eval_accuracy": 0.7490551776266062, |
|
"eval_loss": 1.3551626205444336, |
|
"eval_runtime": 1.6905, |
|
"eval_samples_per_second": 507.539, |
|
"eval_steps_per_second": 31.943, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.4197, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"eval_accuracy": 0.7513471901462664, |
|
"eval_loss": 1.284741997718811, |
|
"eval_runtime": 1.6878, |
|
"eval_samples_per_second": 508.353, |
|
"eval_steps_per_second": 31.994, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 1.3156, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.39, |
|
"eval_accuracy": 0.7496318114874816, |
|
"eval_loss": 1.3172950744628906, |
|
"eval_runtime": 1.6891, |
|
"eval_samples_per_second": 507.956, |
|
"eval_steps_per_second": 31.969, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.2882, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"eval_accuracy": 0.7738095238095238, |
|
"eval_loss": 1.2816879749298096, |
|
"eval_runtime": 1.6955, |
|
"eval_samples_per_second": 506.058, |
|
"eval_steps_per_second": 31.85, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 4.35e-05, |
|
"loss": 1.2692, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"eval_accuracy": 0.775112443778111, |
|
"eval_loss": 1.189226746559143, |
|
"eval_runtime": 1.6876, |
|
"eval_samples_per_second": 508.403, |
|
"eval_steps_per_second": 31.997, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 4.3e-05, |
|
"loss": 1.2368, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_accuracy": 0.7816432272390822, |
|
"eval_loss": 1.2362936735153198, |
|
"eval_runtime": 1.6828, |
|
"eval_samples_per_second": 509.861, |
|
"eval_steps_per_second": 32.089, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 4.25e-05, |
|
"loss": 1.1975, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"eval_accuracy": 0.76996336996337, |
|
"eval_loss": 1.2442289590835571, |
|
"eval_runtime": 1.6849, |
|
"eval_samples_per_second": 509.233, |
|
"eval_steps_per_second": 32.05, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.1907, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"eval_accuracy": 0.7720320466132556, |
|
"eval_loss": 1.256901502609253, |
|
"eval_runtime": 1.6884, |
|
"eval_samples_per_second": 508.188, |
|
"eval_steps_per_second": 31.984, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 4.15e-05, |
|
"loss": 1.1231, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"eval_accuracy": 0.776085300837776, |
|
"eval_loss": 1.13861083984375, |
|
"eval_runtime": 1.6842, |
|
"eval_samples_per_second": 509.436, |
|
"eval_steps_per_second": 32.062, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 4.1e-05, |
|
"loss": 1.0873, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"eval_accuracy": 0.7855547282204021, |
|
"eval_loss": 1.2104856967926025, |
|
"eval_runtime": 1.6846, |
|
"eval_samples_per_second": 509.309, |
|
"eval_steps_per_second": 32.054, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 4.05e-05, |
|
"loss": 1.1242, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"eval_accuracy": 0.7737909516380655, |
|
"eval_loss": 1.214229702949524, |
|
"eval_runtime": 1.6848, |
|
"eval_samples_per_second": 509.258, |
|
"eval_steps_per_second": 32.051, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 20.7, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0367, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 20.7, |
|
"eval_accuracy": 0.7712369597615499, |
|
"eval_loss": 1.2120734453201294, |
|
"eval_runtime": 1.6938, |
|
"eval_samples_per_second": 506.56, |
|
"eval_steps_per_second": 31.881, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 1.0869, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"eval_accuracy": 0.7955390334572491, |
|
"eval_loss": 1.0782362222671509, |
|
"eval_runtime": 1.6863, |
|
"eval_samples_per_second": 508.795, |
|
"eval_steps_per_second": 32.022, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 22.77, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 1.0353, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 22.77, |
|
"eval_accuracy": 0.8068535825545171, |
|
"eval_loss": 0.9917858839035034, |
|
"eval_runtime": 1.6841, |
|
"eval_samples_per_second": 509.483, |
|
"eval_steps_per_second": 32.065, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"learning_rate": 3.85e-05, |
|
"loss": 1.0324, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 23.81, |
|
"eval_accuracy": 0.7971233913701741, |
|
"eval_loss": 1.0908266305923462, |
|
"eval_runtime": 1.6848, |
|
"eval_samples_per_second": 509.246, |
|
"eval_steps_per_second": 32.05, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 24.84, |
|
"learning_rate": 3.8e-05, |
|
"loss": 1.0145, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 24.84, |
|
"eval_accuracy": 0.7975460122699386, |
|
"eval_loss": 1.0944875478744507, |
|
"eval_runtime": 1.6827, |
|
"eval_samples_per_second": 509.9, |
|
"eval_steps_per_second": 32.092, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.9951, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 25.88, |
|
"eval_accuracy": 0.8028064992614475, |
|
"eval_loss": 1.000519037246704, |
|
"eval_runtime": 1.6933, |
|
"eval_samples_per_second": 506.714, |
|
"eval_steps_per_second": 31.891, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.9483, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"eval_accuracy": 0.8186646433990895, |
|
"eval_loss": 0.963790237903595, |
|
"eval_runtime": 1.6874, |
|
"eval_samples_per_second": 508.479, |
|
"eval_steps_per_second": 32.002, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"learning_rate": 3.65e-05, |
|
"loss": 0.9304, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"eval_accuracy": 0.8204747774480712, |
|
"eval_loss": 0.9761123657226562, |
|
"eval_runtime": 1.6869, |
|
"eval_samples_per_second": 508.622, |
|
"eval_steps_per_second": 32.011, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.8835, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.8045801526717558, |
|
"eval_loss": 1.062032699584961, |
|
"eval_runtime": 1.6883, |
|
"eval_samples_per_second": 508.21, |
|
"eval_steps_per_second": 31.985, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"learning_rate": 3.55e-05, |
|
"loss": 0.9097, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 30.02, |
|
"eval_accuracy": 0.806015037593985, |
|
"eval_loss": 0.9137569069862366, |
|
"eval_runtime": 1.6924, |
|
"eval_samples_per_second": 506.97, |
|
"eval_steps_per_second": 31.907, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 31.06, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.9293, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 31.06, |
|
"eval_accuracy": 0.8176197836166924, |
|
"eval_loss": 0.918023943901062, |
|
"eval_runtime": 1.6905, |
|
"eval_samples_per_second": 507.53, |
|
"eval_steps_per_second": 31.942, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 32.09, |
|
"learning_rate": 3.45e-05, |
|
"loss": 0.9043, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 32.09, |
|
"eval_accuracy": 0.8208269525267994, |
|
"eval_loss": 0.9214709401130676, |
|
"eval_runtime": 1.691, |
|
"eval_samples_per_second": 507.403, |
|
"eval_steps_per_second": 31.934, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 33.13, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.8581, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 33.13, |
|
"eval_accuracy": 0.822452229299363, |
|
"eval_loss": 0.9624596834182739, |
|
"eval_runtime": 1.6897, |
|
"eval_samples_per_second": 507.793, |
|
"eval_steps_per_second": 31.959, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 34.16, |
|
"learning_rate": 3.35e-05, |
|
"loss": 0.8638, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 34.16, |
|
"eval_accuracy": 0.8367816091954023, |
|
"eval_loss": 0.8585591316223145, |
|
"eval_runtime": 1.6912, |
|
"eval_samples_per_second": 507.329, |
|
"eval_steps_per_second": 31.93, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.874, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"eval_accuracy": 0.8135072908672295, |
|
"eval_loss": 1.0043973922729492, |
|
"eval_runtime": 1.6896, |
|
"eval_samples_per_second": 507.801, |
|
"eval_steps_per_second": 31.96, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.8235, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 36.23, |
|
"eval_accuracy": 0.8183890577507599, |
|
"eval_loss": 0.9755066633224487, |
|
"eval_runtime": 1.6947, |
|
"eval_samples_per_second": 506.289, |
|
"eval_steps_per_second": 31.864, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 37.27, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.8589, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 37.27, |
|
"eval_accuracy": 0.8291761148904006, |
|
"eval_loss": 0.9042153358459473, |
|
"eval_runtime": 1.6905, |
|
"eval_samples_per_second": 507.55, |
|
"eval_steps_per_second": 31.944, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 38.3, |
|
"learning_rate": 3.15e-05, |
|
"loss": 0.8107, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 38.3, |
|
"eval_accuracy": 0.8272327964860908, |
|
"eval_loss": 0.8821109533309937, |
|
"eval_runtime": 1.6895, |
|
"eval_samples_per_second": 507.845, |
|
"eval_steps_per_second": 31.962, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.8346, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 39.34, |
|
"eval_accuracy": 0.8248286367098249, |
|
"eval_loss": 0.9061236381530762, |
|
"eval_runtime": 1.6919, |
|
"eval_samples_per_second": 507.136, |
|
"eval_steps_per_second": 31.918, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 40.37, |
|
"learning_rate": 3.05e-05, |
|
"loss": 0.8393, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 40.37, |
|
"eval_accuracy": 0.8234854151084517, |
|
"eval_loss": 0.9795840978622437, |
|
"eval_runtime": 1.6939, |
|
"eval_samples_per_second": 506.513, |
|
"eval_steps_per_second": 31.878, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 41.41, |
|
"learning_rate": 3e-05, |
|
"loss": 0.789, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 41.41, |
|
"eval_accuracy": 0.833076923076923, |
|
"eval_loss": 0.9014851450920105, |
|
"eval_runtime": 1.689, |
|
"eval_samples_per_second": 508.0, |
|
"eval_steps_per_second": 31.972, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 42.44, |
|
"learning_rate": 2.95e-05, |
|
"loss": 0.8121, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 42.44, |
|
"eval_accuracy": 0.8385913426265591, |
|
"eval_loss": 0.8589309453964233, |
|
"eval_runtime": 1.6873, |
|
"eval_samples_per_second": 508.516, |
|
"eval_steps_per_second": 32.005, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.7709, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 43.48, |
|
"eval_accuracy": 0.8350903614457831, |
|
"eval_loss": 0.8835715055465698, |
|
"eval_runtime": 1.6829, |
|
"eval_samples_per_second": 509.835, |
|
"eval_steps_per_second": 32.088, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 44.51, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 0.7922, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 44.51, |
|
"eval_accuracy": 0.817974105102818, |
|
"eval_loss": 0.9523779153823853, |
|
"eval_runtime": 1.6863, |
|
"eval_samples_per_second": 508.799, |
|
"eval_steps_per_second": 32.022, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 45.55, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.7457, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 45.55, |
|
"eval_accuracy": 0.8364451082897685, |
|
"eval_loss": 0.8350428938865662, |
|
"eval_runtime": 1.6901, |
|
"eval_samples_per_second": 507.673, |
|
"eval_steps_per_second": 31.951, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 46.58, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.7386, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 46.58, |
|
"eval_accuracy": 0.8340807174887892, |
|
"eval_loss": 0.9024766087532043, |
|
"eval_runtime": 1.6912, |
|
"eval_samples_per_second": 507.34, |
|
"eval_steps_per_second": 31.93, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.7515, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 47.62, |
|
"eval_accuracy": 0.8390166534496432, |
|
"eval_loss": 0.9091906547546387, |
|
"eval_runtime": 1.686, |
|
"eval_samples_per_second": 508.899, |
|
"eval_steps_per_second": 32.029, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 48.65, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"loss": 0.7324, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 48.65, |
|
"eval_accuracy": 0.8420647149460708, |
|
"eval_loss": 0.8322407007217407, |
|
"eval_runtime": 1.6918, |
|
"eval_samples_per_second": 507.153, |
|
"eval_steps_per_second": 31.919, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 49.69, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.7314, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 49.69, |
|
"eval_accuracy": 0.8477078477078477, |
|
"eval_loss": 0.7967829704284668, |
|
"eval_runtime": 1.6933, |
|
"eval_samples_per_second": 506.713, |
|
"eval_steps_per_second": 31.891, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 50.72, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"loss": 0.7442, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 50.72, |
|
"eval_accuracy": 0.8324407039020658, |
|
"eval_loss": 0.930473268032074, |
|
"eval_runtime": 1.6828, |
|
"eval_samples_per_second": 509.873, |
|
"eval_steps_per_second": 32.09, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 51.76, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.7074, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 51.76, |
|
"eval_accuracy": 0.820839580209895, |
|
"eval_loss": 1.001060962677002, |
|
"eval_runtime": 1.6867, |
|
"eval_samples_per_second": 508.672, |
|
"eval_steps_per_second": 32.014, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 2.45e-05, |
|
"loss": 0.739, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"eval_accuracy": 0.8330945558739254, |
|
"eval_loss": 0.8732258677482605, |
|
"eval_runtime": 1.6896, |
|
"eval_samples_per_second": 507.823, |
|
"eval_steps_per_second": 31.961, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 53.83, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.7243, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 53.83, |
|
"eval_accuracy": 0.8479880774962743, |
|
"eval_loss": 0.7857112288475037, |
|
"eval_runtime": 1.687, |
|
"eval_samples_per_second": 508.591, |
|
"eval_steps_per_second": 32.009, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 54.87, |
|
"learning_rate": 2.35e-05, |
|
"loss": 0.6842, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 54.87, |
|
"eval_accuracy": 0.8377192982456141, |
|
"eval_loss": 0.7945135235786438, |
|
"eval_runtime": 1.6902, |
|
"eval_samples_per_second": 507.642, |
|
"eval_steps_per_second": 31.949, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 55.9, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.6991, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 55.9, |
|
"eval_accuracy": 0.8275351591413768, |
|
"eval_loss": 0.9627696871757507, |
|
"eval_runtime": 1.6871, |
|
"eval_samples_per_second": 508.578, |
|
"eval_steps_per_second": 32.008, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 56.94, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.6896, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 56.94, |
|
"eval_accuracy": 0.840960240060015, |
|
"eval_loss": 0.8363039493560791, |
|
"eval_runtime": 1.684, |
|
"eval_samples_per_second": 509.495, |
|
"eval_steps_per_second": 32.066, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 57.97, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.6925, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 57.97, |
|
"eval_accuracy": 0.8391812865497076, |
|
"eval_loss": 0.8432921767234802, |
|
"eval_runtime": 1.6968, |
|
"eval_samples_per_second": 505.655, |
|
"eval_steps_per_second": 31.824, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"learning_rate": 2.15e-05, |
|
"loss": 0.7081, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 59.01, |
|
"eval_accuracy": 0.8223048327137547, |
|
"eval_loss": 1.0085676908493042, |
|
"eval_runtime": 1.69, |
|
"eval_samples_per_second": 507.688, |
|
"eval_steps_per_second": 31.952, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 60.04, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.6598, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 60.04, |
|
"eval_accuracy": 0.8333333333333334, |
|
"eval_loss": 0.9250668883323669, |
|
"eval_runtime": 1.686, |
|
"eval_samples_per_second": 508.895, |
|
"eval_steps_per_second": 32.028, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 61.08, |
|
"learning_rate": 2.05e-05, |
|
"loss": 0.6677, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 61.08, |
|
"eval_accuracy": 0.8437047756874095, |
|
"eval_loss": 0.8822752237319946, |
|
"eval_runtime": 1.693, |
|
"eval_samples_per_second": 506.807, |
|
"eval_steps_per_second": 31.897, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 62.11, |
|
"learning_rate": 2e-05, |
|
"loss": 0.695, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 62.11, |
|
"eval_accuracy": 0.8560371517027864, |
|
"eval_loss": 0.7750544548034668, |
|
"eval_runtime": 1.6969, |
|
"eval_samples_per_second": 505.632, |
|
"eval_steps_per_second": 31.823, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 63.15, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 0.7108, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 63.15, |
|
"eval_accuracy": 0.8481104651162791, |
|
"eval_loss": 0.8452057242393494, |
|
"eval_runtime": 1.6974, |
|
"eval_samples_per_second": 505.49, |
|
"eval_steps_per_second": 31.814, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 64.18, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.6721, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 64.18, |
|
"eval_accuracy": 0.8413284132841329, |
|
"eval_loss": 0.8559600114822388, |
|
"eval_runtime": 1.6936, |
|
"eval_samples_per_second": 506.623, |
|
"eval_steps_per_second": 31.885, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"learning_rate": 1.85e-05, |
|
"loss": 0.6571, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 65.22, |
|
"eval_accuracy": 0.8163109756097561, |
|
"eval_loss": 0.98003089427948, |
|
"eval_runtime": 1.6913, |
|
"eval_samples_per_second": 507.303, |
|
"eval_steps_per_second": 31.928, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 66.25, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.6891, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 66.25, |
|
"eval_accuracy": 0.8457446808510638, |
|
"eval_loss": 0.8105884194374084, |
|
"eval_runtime": 1.6942, |
|
"eval_samples_per_second": 506.435, |
|
"eval_steps_per_second": 31.874, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 67.29, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.6541, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 67.29, |
|
"eval_accuracy": 0.8429752066115702, |
|
"eval_loss": 0.8197007179260254, |
|
"eval_runtime": 1.6912, |
|
"eval_samples_per_second": 507.332, |
|
"eval_steps_per_second": 31.93, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 68.32, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.6559, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 68.32, |
|
"eval_accuracy": 0.8388305847076462, |
|
"eval_loss": 0.8678442239761353, |
|
"eval_runtime": 1.6945, |
|
"eval_samples_per_second": 506.35, |
|
"eval_steps_per_second": 31.868, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 69.36, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.6554, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 69.36, |
|
"eval_accuracy": 0.8661764705882353, |
|
"eval_loss": 0.7396097183227539, |
|
"eval_runtime": 1.6934, |
|
"eval_samples_per_second": 506.658, |
|
"eval_steps_per_second": 31.888, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 70.39, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.618, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 70.39, |
|
"eval_accuracy": 0.8375634517766497, |
|
"eval_loss": 0.8517589569091797, |
|
"eval_runtime": 1.6983, |
|
"eval_samples_per_second": 505.224, |
|
"eval_steps_per_second": 31.797, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"learning_rate": 1.55e-05, |
|
"loss": 0.6558, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 71.43, |
|
"eval_accuracy": 0.8409090909090909, |
|
"eval_loss": 0.7705618739128113, |
|
"eval_runtime": 1.6954, |
|
"eval_samples_per_second": 506.065, |
|
"eval_steps_per_second": 31.85, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 72.46, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.6034, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 72.46, |
|
"eval_accuracy": 0.8517699115044248, |
|
"eval_loss": 0.7829406261444092, |
|
"eval_runtime": 1.6974, |
|
"eval_samples_per_second": 505.471, |
|
"eval_steps_per_second": 31.813, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 73.5, |
|
"learning_rate": 1.45e-05, |
|
"loss": 0.6336, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 73.5, |
|
"eval_accuracy": 0.8591445427728613, |
|
"eval_loss": 0.7834987640380859, |
|
"eval_runtime": 1.6914, |
|
"eval_samples_per_second": 507.26, |
|
"eval_steps_per_second": 31.925, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.6287, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"eval_accuracy": 0.8574748257164988, |
|
"eval_loss": 0.7547706961631775, |
|
"eval_runtime": 1.6906, |
|
"eval_samples_per_second": 507.513, |
|
"eval_steps_per_second": 31.941, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 75.57, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 0.6065, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 75.57, |
|
"eval_accuracy": 0.8508005822416302, |
|
"eval_loss": 0.8541703224182129, |
|
"eval_runtime": 1.6919, |
|
"eval_samples_per_second": 507.134, |
|
"eval_steps_per_second": 31.918, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 76.6, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.6029, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 76.6, |
|
"eval_accuracy": 0.8405267008046818, |
|
"eval_loss": 0.8202521800994873, |
|
"eval_runtime": 1.6903, |
|
"eval_samples_per_second": 507.595, |
|
"eval_steps_per_second": 31.947, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 77.64, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.6208, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 77.64, |
|
"eval_accuracy": 0.8661417322834646, |
|
"eval_loss": 0.7082335948944092, |
|
"eval_runtime": 1.6867, |
|
"eval_samples_per_second": 508.681, |
|
"eval_steps_per_second": 32.015, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 78.67, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.64, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 78.67, |
|
"eval_accuracy": 0.8410295230885693, |
|
"eval_loss": 0.8504825234413147, |
|
"eval_runtime": 1.6943, |
|
"eval_samples_per_second": 506.417, |
|
"eval_steps_per_second": 31.872, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 79.71, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"loss": 0.6144, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 79.71, |
|
"eval_accuracy": 0.8603636363636363, |
|
"eval_loss": 0.7246142625808716, |
|
"eval_runtime": 1.6864, |
|
"eval_samples_per_second": 508.77, |
|
"eval_steps_per_second": 32.02, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 80.75, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.6507, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 80.75, |
|
"eval_accuracy": 0.861132660977502, |
|
"eval_loss": 0.7150202393531799, |
|
"eval_runtime": 1.701, |
|
"eval_samples_per_second": 504.398, |
|
"eval_steps_per_second": 31.745, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 81.78, |
|
"learning_rate": 1.05e-05, |
|
"loss": 0.6177, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 81.78, |
|
"eval_accuracy": 0.84, |
|
"eval_loss": 0.9331970810890198, |
|
"eval_runtime": 1.6939, |
|
"eval_samples_per_second": 506.536, |
|
"eval_steps_per_second": 31.88, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 82.82, |
|
"learning_rate": 1e-05, |
|
"loss": 0.6159, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 82.82, |
|
"eval_accuracy": 0.8733488733488733, |
|
"eval_loss": 0.6427481174468994, |
|
"eval_runtime": 1.6965, |
|
"eval_samples_per_second": 505.755, |
|
"eval_steps_per_second": 31.831, |
|
"step": 40000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 50000, |
|
"num_train_epochs": 104, |
|
"save_steps": 500, |
|
"total_flos": 6042662847119360.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|