{ "best_metric": 0.6427481174468994, "best_model_checkpoint": "./models/adapters_mlm_cn/mt/checkpoint-40000", "epoch": 82.81573498964804, "eval_steps": 500, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.04, "learning_rate": 4.9500000000000004e-05, "loss": 2.6689, "step": 500 }, { "epoch": 1.04, "eval_accuracy": 0.6676783004552352, "eval_loss": 2.1344573497772217, "eval_runtime": 1.6941, "eval_samples_per_second": 506.449, "eval_steps_per_second": 31.874, "step": 500 }, { "epoch": 2.07, "learning_rate": 4.9e-05, "loss": 2.1415, "step": 1000 }, { "epoch": 2.07, "eval_accuracy": 0.6926470588235294, "eval_loss": 1.8435733318328857, "eval_runtime": 1.6896, "eval_samples_per_second": 507.824, "eval_steps_per_second": 31.961, "step": 1000 }, { "epoch": 3.11, "learning_rate": 4.85e-05, "loss": 1.9421, "step": 1500 }, { "epoch": 3.11, "eval_accuracy": 0.690674753601213, "eval_loss": 1.787391185760498, "eval_runtime": 1.6956, "eval_samples_per_second": 506.016, "eval_steps_per_second": 31.847, "step": 1500 }, { "epoch": 4.14, "learning_rate": 4.8e-05, "loss": 1.7588, "step": 2000 }, { "epoch": 4.14, "eval_accuracy": 0.7012509197939661, "eval_loss": 1.760498285293579, "eval_runtime": 1.6894, "eval_samples_per_second": 507.882, "eval_steps_per_second": 31.965, "step": 2000 }, { "epoch": 5.18, "learning_rate": 4.75e-05, "loss": 1.6729, "step": 2500 }, { "epoch": 5.18, "eval_accuracy": 0.6956845238095238, "eval_loss": 1.7567747831344604, "eval_runtime": 1.6937, "eval_samples_per_second": 506.597, "eval_steps_per_second": 31.884, "step": 2500 }, { "epoch": 6.21, "learning_rate": 4.7e-05, "loss": 1.596, "step": 3000 }, { "epoch": 6.21, "eval_accuracy": 0.7272727272727273, "eval_loss": 1.5006115436553955, "eval_runtime": 1.6969, "eval_samples_per_second": 505.627, "eval_steps_per_second": 31.823, "step": 3000 }, { "epoch": 7.25, "learning_rate": 4.6500000000000005e-05, "loss": 1.5778, "step": 3500 }, { "epoch": 7.25, "eval_accuracy": 0.7450832072617246, "eval_loss": 1.3923866748809814, "eval_runtime": 1.6898, "eval_samples_per_second": 507.75, "eval_steps_per_second": 31.956, "step": 3500 }, { "epoch": 8.28, "learning_rate": 4.600000000000001e-05, "loss": 1.4821, "step": 4000 }, { "epoch": 8.28, "eval_accuracy": 0.7099236641221374, "eval_loss": 1.609680414199829, "eval_runtime": 1.6898, "eval_samples_per_second": 507.758, "eval_steps_per_second": 31.957, "step": 4000 }, { "epoch": 9.32, "learning_rate": 4.55e-05, "loss": 1.4183, "step": 4500 }, { "epoch": 9.32, "eval_accuracy": 0.7490551776266062, "eval_loss": 1.3551626205444336, "eval_runtime": 1.6905, "eval_samples_per_second": 507.539, "eval_steps_per_second": 31.943, "step": 4500 }, { "epoch": 10.35, "learning_rate": 4.5e-05, "loss": 1.4197, "step": 5000 }, { "epoch": 10.35, "eval_accuracy": 0.7513471901462664, "eval_loss": 1.284741997718811, "eval_runtime": 1.6878, "eval_samples_per_second": 508.353, "eval_steps_per_second": 31.994, "step": 5000 }, { "epoch": 11.39, "learning_rate": 4.4500000000000004e-05, "loss": 1.3156, "step": 5500 }, { "epoch": 11.39, "eval_accuracy": 0.7496318114874816, "eval_loss": 1.3172950744628906, "eval_runtime": 1.6891, "eval_samples_per_second": 507.956, "eval_steps_per_second": 31.969, "step": 5500 }, { "epoch": 12.42, "learning_rate": 4.4000000000000006e-05, "loss": 1.2882, "step": 6000 }, { "epoch": 12.42, "eval_accuracy": 0.7738095238095238, "eval_loss": 1.2816879749298096, "eval_runtime": 1.6955, "eval_samples_per_second": 506.058, "eval_steps_per_second": 31.85, "step": 6000 }, { "epoch": 13.46, "learning_rate": 4.35e-05, "loss": 1.2692, "step": 6500 }, { "epoch": 13.46, "eval_accuracy": 0.775112443778111, "eval_loss": 1.189226746559143, "eval_runtime": 1.6876, "eval_samples_per_second": 508.403, "eval_steps_per_second": 31.997, "step": 6500 }, { "epoch": 14.49, "learning_rate": 4.3e-05, "loss": 1.2368, "step": 7000 }, { "epoch": 14.49, "eval_accuracy": 0.7816432272390822, "eval_loss": 1.2362936735153198, "eval_runtime": 1.6828, "eval_samples_per_second": 509.861, "eval_steps_per_second": 32.089, "step": 7000 }, { "epoch": 15.53, "learning_rate": 4.25e-05, "loss": 1.1975, "step": 7500 }, { "epoch": 15.53, "eval_accuracy": 0.76996336996337, "eval_loss": 1.2442289590835571, "eval_runtime": 1.6849, "eval_samples_per_second": 509.233, "eval_steps_per_second": 32.05, "step": 7500 }, { "epoch": 16.56, "learning_rate": 4.2e-05, "loss": 1.1907, "step": 8000 }, { "epoch": 16.56, "eval_accuracy": 0.7720320466132556, "eval_loss": 1.256901502609253, "eval_runtime": 1.6884, "eval_samples_per_second": 508.188, "eval_steps_per_second": 31.984, "step": 8000 }, { "epoch": 17.6, "learning_rate": 4.15e-05, "loss": 1.1231, "step": 8500 }, { "epoch": 17.6, "eval_accuracy": 0.776085300837776, "eval_loss": 1.13861083984375, "eval_runtime": 1.6842, "eval_samples_per_second": 509.436, "eval_steps_per_second": 32.062, "step": 8500 }, { "epoch": 18.63, "learning_rate": 4.1e-05, "loss": 1.0873, "step": 9000 }, { "epoch": 18.63, "eval_accuracy": 0.7855547282204021, "eval_loss": 1.2104856967926025, "eval_runtime": 1.6846, "eval_samples_per_second": 509.309, "eval_steps_per_second": 32.054, "step": 9000 }, { "epoch": 19.67, "learning_rate": 4.05e-05, "loss": 1.1242, "step": 9500 }, { "epoch": 19.67, "eval_accuracy": 0.7737909516380655, "eval_loss": 1.214229702949524, "eval_runtime": 1.6848, "eval_samples_per_second": 509.258, "eval_steps_per_second": 32.051, "step": 9500 }, { "epoch": 20.7, "learning_rate": 4e-05, "loss": 1.0367, "step": 10000 }, { "epoch": 20.7, "eval_accuracy": 0.7712369597615499, "eval_loss": 1.2120734453201294, "eval_runtime": 1.6938, "eval_samples_per_second": 506.56, "eval_steps_per_second": 31.881, "step": 10000 }, { "epoch": 21.74, "learning_rate": 3.9500000000000005e-05, "loss": 1.0869, "step": 10500 }, { "epoch": 21.74, "eval_accuracy": 0.7955390334572491, "eval_loss": 1.0782362222671509, "eval_runtime": 1.6863, "eval_samples_per_second": 508.795, "eval_steps_per_second": 32.022, "step": 10500 }, { "epoch": 22.77, "learning_rate": 3.9000000000000006e-05, "loss": 1.0353, "step": 11000 }, { "epoch": 22.77, "eval_accuracy": 0.8068535825545171, "eval_loss": 0.9917858839035034, "eval_runtime": 1.6841, "eval_samples_per_second": 509.483, "eval_steps_per_second": 32.065, "step": 11000 }, { "epoch": 23.81, "learning_rate": 3.85e-05, "loss": 1.0324, "step": 11500 }, { "epoch": 23.81, "eval_accuracy": 0.7971233913701741, "eval_loss": 1.0908266305923462, "eval_runtime": 1.6848, "eval_samples_per_second": 509.246, "eval_steps_per_second": 32.05, "step": 11500 }, { "epoch": 24.84, "learning_rate": 3.8e-05, "loss": 1.0145, "step": 12000 }, { "epoch": 24.84, "eval_accuracy": 0.7975460122699386, "eval_loss": 1.0944875478744507, "eval_runtime": 1.6827, "eval_samples_per_second": 509.9, "eval_steps_per_second": 32.092, "step": 12000 }, { "epoch": 25.88, "learning_rate": 3.7500000000000003e-05, "loss": 0.9951, "step": 12500 }, { "epoch": 25.88, "eval_accuracy": 0.8028064992614475, "eval_loss": 1.000519037246704, "eval_runtime": 1.6933, "eval_samples_per_second": 506.714, "eval_steps_per_second": 31.891, "step": 12500 }, { "epoch": 26.92, "learning_rate": 3.7e-05, "loss": 0.9483, "step": 13000 }, { "epoch": 26.92, "eval_accuracy": 0.8186646433990895, "eval_loss": 0.963790237903595, "eval_runtime": 1.6874, "eval_samples_per_second": 508.479, "eval_steps_per_second": 32.002, "step": 13000 }, { "epoch": 27.95, "learning_rate": 3.65e-05, "loss": 0.9304, "step": 13500 }, { "epoch": 27.95, "eval_accuracy": 0.8204747774480712, "eval_loss": 0.9761123657226562, "eval_runtime": 1.6869, "eval_samples_per_second": 508.622, "eval_steps_per_second": 32.011, "step": 13500 }, { "epoch": 28.99, "learning_rate": 3.6e-05, "loss": 0.8835, "step": 14000 }, { "epoch": 28.99, "eval_accuracy": 0.8045801526717558, "eval_loss": 1.062032699584961, "eval_runtime": 1.6883, "eval_samples_per_second": 508.21, "eval_steps_per_second": 31.985, "step": 14000 }, { "epoch": 30.02, "learning_rate": 3.55e-05, "loss": 0.9097, "step": 14500 }, { "epoch": 30.02, "eval_accuracy": 0.806015037593985, "eval_loss": 0.9137569069862366, "eval_runtime": 1.6924, "eval_samples_per_second": 506.97, "eval_steps_per_second": 31.907, "step": 14500 }, { "epoch": 31.06, "learning_rate": 3.5e-05, "loss": 0.9293, "step": 15000 }, { "epoch": 31.06, "eval_accuracy": 0.8176197836166924, "eval_loss": 0.918023943901062, "eval_runtime": 1.6905, "eval_samples_per_second": 507.53, "eval_steps_per_second": 31.942, "step": 15000 }, { "epoch": 32.09, "learning_rate": 3.45e-05, "loss": 0.9043, "step": 15500 }, { "epoch": 32.09, "eval_accuracy": 0.8208269525267994, "eval_loss": 0.9214709401130676, "eval_runtime": 1.691, "eval_samples_per_second": 507.403, "eval_steps_per_second": 31.934, "step": 15500 }, { "epoch": 33.13, "learning_rate": 3.4000000000000007e-05, "loss": 0.8581, "step": 16000 }, { "epoch": 33.13, "eval_accuracy": 0.822452229299363, "eval_loss": 0.9624596834182739, "eval_runtime": 1.6897, "eval_samples_per_second": 507.793, "eval_steps_per_second": 31.959, "step": 16000 }, { "epoch": 34.16, "learning_rate": 3.35e-05, "loss": 0.8638, "step": 16500 }, { "epoch": 34.16, "eval_accuracy": 0.8367816091954023, "eval_loss": 0.8585591316223145, "eval_runtime": 1.6912, "eval_samples_per_second": 507.329, "eval_steps_per_second": 31.93, "step": 16500 }, { "epoch": 35.2, "learning_rate": 3.3e-05, "loss": 0.874, "step": 17000 }, { "epoch": 35.2, "eval_accuracy": 0.8135072908672295, "eval_loss": 1.0043973922729492, "eval_runtime": 1.6896, "eval_samples_per_second": 507.801, "eval_steps_per_second": 31.96, "step": 17000 }, { "epoch": 36.23, "learning_rate": 3.2500000000000004e-05, "loss": 0.8235, "step": 17500 }, { "epoch": 36.23, "eval_accuracy": 0.8183890577507599, "eval_loss": 0.9755066633224487, "eval_runtime": 1.6947, "eval_samples_per_second": 506.289, "eval_steps_per_second": 31.864, "step": 17500 }, { "epoch": 37.27, "learning_rate": 3.2000000000000005e-05, "loss": 0.8589, "step": 18000 }, { "epoch": 37.27, "eval_accuracy": 0.8291761148904006, "eval_loss": 0.9042153358459473, "eval_runtime": 1.6905, "eval_samples_per_second": 507.55, "eval_steps_per_second": 31.944, "step": 18000 }, { "epoch": 38.3, "learning_rate": 3.15e-05, "loss": 0.8107, "step": 18500 }, { "epoch": 38.3, "eval_accuracy": 0.8272327964860908, "eval_loss": 0.8821109533309937, "eval_runtime": 1.6895, "eval_samples_per_second": 507.845, "eval_steps_per_second": 31.962, "step": 18500 }, { "epoch": 39.34, "learning_rate": 3.1e-05, "loss": 0.8346, "step": 19000 }, { "epoch": 39.34, "eval_accuracy": 0.8248286367098249, "eval_loss": 0.9061236381530762, "eval_runtime": 1.6919, "eval_samples_per_second": 507.136, "eval_steps_per_second": 31.918, "step": 19000 }, { "epoch": 40.37, "learning_rate": 3.05e-05, "loss": 0.8393, "step": 19500 }, { "epoch": 40.37, "eval_accuracy": 0.8234854151084517, "eval_loss": 0.9795840978622437, "eval_runtime": 1.6939, "eval_samples_per_second": 506.513, "eval_steps_per_second": 31.878, "step": 19500 }, { "epoch": 41.41, "learning_rate": 3e-05, "loss": 0.789, "step": 20000 }, { "epoch": 41.41, "eval_accuracy": 0.833076923076923, "eval_loss": 0.9014851450920105, "eval_runtime": 1.689, "eval_samples_per_second": 508.0, "eval_steps_per_second": 31.972, "step": 20000 }, { "epoch": 42.44, "learning_rate": 2.95e-05, "loss": 0.8121, "step": 20500 }, { "epoch": 42.44, "eval_accuracy": 0.8385913426265591, "eval_loss": 0.8589309453964233, "eval_runtime": 1.6873, "eval_samples_per_second": 508.516, "eval_steps_per_second": 32.005, "step": 20500 }, { "epoch": 43.48, "learning_rate": 2.9e-05, "loss": 0.7709, "step": 21000 }, { "epoch": 43.48, "eval_accuracy": 0.8350903614457831, "eval_loss": 0.8835715055465698, "eval_runtime": 1.6829, "eval_samples_per_second": 509.835, "eval_steps_per_second": 32.088, "step": 21000 }, { "epoch": 44.51, "learning_rate": 2.8499999999999998e-05, "loss": 0.7922, "step": 21500 }, { "epoch": 44.51, "eval_accuracy": 0.817974105102818, "eval_loss": 0.9523779153823853, "eval_runtime": 1.6863, "eval_samples_per_second": 508.799, "eval_steps_per_second": 32.022, "step": 21500 }, { "epoch": 45.55, "learning_rate": 2.8000000000000003e-05, "loss": 0.7457, "step": 22000 }, { "epoch": 45.55, "eval_accuracy": 0.8364451082897685, "eval_loss": 0.8350428938865662, "eval_runtime": 1.6901, "eval_samples_per_second": 507.673, "eval_steps_per_second": 31.951, "step": 22000 }, { "epoch": 46.58, "learning_rate": 2.7500000000000004e-05, "loss": 0.7386, "step": 22500 }, { "epoch": 46.58, "eval_accuracy": 0.8340807174887892, "eval_loss": 0.9024766087532043, "eval_runtime": 1.6912, "eval_samples_per_second": 507.34, "eval_steps_per_second": 31.93, "step": 22500 }, { "epoch": 47.62, "learning_rate": 2.7000000000000002e-05, "loss": 0.7515, "step": 23000 }, { "epoch": 47.62, "eval_accuracy": 0.8390166534496432, "eval_loss": 0.9091906547546387, "eval_runtime": 1.686, "eval_samples_per_second": 508.899, "eval_steps_per_second": 32.029, "step": 23000 }, { "epoch": 48.65, "learning_rate": 2.6500000000000004e-05, "loss": 0.7324, "step": 23500 }, { "epoch": 48.65, "eval_accuracy": 0.8420647149460708, "eval_loss": 0.8322407007217407, "eval_runtime": 1.6918, "eval_samples_per_second": 507.153, "eval_steps_per_second": 31.919, "step": 23500 }, { "epoch": 49.69, "learning_rate": 2.6000000000000002e-05, "loss": 0.7314, "step": 24000 }, { "epoch": 49.69, "eval_accuracy": 0.8477078477078477, "eval_loss": 0.7967829704284668, "eval_runtime": 1.6933, "eval_samples_per_second": 506.713, "eval_steps_per_second": 31.891, "step": 24000 }, { "epoch": 50.72, "learning_rate": 2.5500000000000003e-05, "loss": 0.7442, "step": 24500 }, { "epoch": 50.72, "eval_accuracy": 0.8324407039020658, "eval_loss": 0.930473268032074, "eval_runtime": 1.6828, "eval_samples_per_second": 509.873, "eval_steps_per_second": 32.09, "step": 24500 }, { "epoch": 51.76, "learning_rate": 2.5e-05, "loss": 0.7074, "step": 25000 }, { "epoch": 51.76, "eval_accuracy": 0.820839580209895, "eval_loss": 1.001060962677002, "eval_runtime": 1.6867, "eval_samples_per_second": 508.672, "eval_steps_per_second": 32.014, "step": 25000 }, { "epoch": 52.8, "learning_rate": 2.45e-05, "loss": 0.739, "step": 25500 }, { "epoch": 52.8, "eval_accuracy": 0.8330945558739254, "eval_loss": 0.8732258677482605, "eval_runtime": 1.6896, "eval_samples_per_second": 507.823, "eval_steps_per_second": 31.961, "step": 25500 }, { "epoch": 53.83, "learning_rate": 2.4e-05, "loss": 0.7243, "step": 26000 }, { "epoch": 53.83, "eval_accuracy": 0.8479880774962743, "eval_loss": 0.7857112288475037, "eval_runtime": 1.687, "eval_samples_per_second": 508.591, "eval_steps_per_second": 32.009, "step": 26000 }, { "epoch": 54.87, "learning_rate": 2.35e-05, "loss": 0.6842, "step": 26500 }, { "epoch": 54.87, "eval_accuracy": 0.8377192982456141, "eval_loss": 0.7945135235786438, "eval_runtime": 1.6902, "eval_samples_per_second": 507.642, "eval_steps_per_second": 31.949, "step": 26500 }, { "epoch": 55.9, "learning_rate": 2.3000000000000003e-05, "loss": 0.6991, "step": 27000 }, { "epoch": 55.9, "eval_accuracy": 0.8275351591413768, "eval_loss": 0.9627696871757507, "eval_runtime": 1.6871, "eval_samples_per_second": 508.578, "eval_steps_per_second": 32.008, "step": 27000 }, { "epoch": 56.94, "learning_rate": 2.25e-05, "loss": 0.6896, "step": 27500 }, { "epoch": 56.94, "eval_accuracy": 0.840960240060015, "eval_loss": 0.8363039493560791, "eval_runtime": 1.684, "eval_samples_per_second": 509.495, "eval_steps_per_second": 32.066, "step": 27500 }, { "epoch": 57.97, "learning_rate": 2.2000000000000003e-05, "loss": 0.6925, "step": 28000 }, { "epoch": 57.97, "eval_accuracy": 0.8391812865497076, "eval_loss": 0.8432921767234802, "eval_runtime": 1.6968, "eval_samples_per_second": 505.655, "eval_steps_per_second": 31.824, "step": 28000 }, { "epoch": 59.01, "learning_rate": 2.15e-05, "loss": 0.7081, "step": 28500 }, { "epoch": 59.01, "eval_accuracy": 0.8223048327137547, "eval_loss": 1.0085676908493042, "eval_runtime": 1.69, "eval_samples_per_second": 507.688, "eval_steps_per_second": 31.952, "step": 28500 }, { "epoch": 60.04, "learning_rate": 2.1e-05, "loss": 0.6598, "step": 29000 }, { "epoch": 60.04, "eval_accuracy": 0.8333333333333334, "eval_loss": 0.9250668883323669, "eval_runtime": 1.686, "eval_samples_per_second": 508.895, "eval_steps_per_second": 32.028, "step": 29000 }, { "epoch": 61.08, "learning_rate": 2.05e-05, "loss": 0.6677, "step": 29500 }, { "epoch": 61.08, "eval_accuracy": 0.8437047756874095, "eval_loss": 0.8822752237319946, "eval_runtime": 1.693, "eval_samples_per_second": 506.807, "eval_steps_per_second": 31.897, "step": 29500 }, { "epoch": 62.11, "learning_rate": 2e-05, "loss": 0.695, "step": 30000 }, { "epoch": 62.11, "eval_accuracy": 0.8560371517027864, "eval_loss": 0.7750544548034668, "eval_runtime": 1.6969, "eval_samples_per_second": 505.632, "eval_steps_per_second": 31.823, "step": 30000 }, { "epoch": 63.15, "learning_rate": 1.9500000000000003e-05, "loss": 0.7108, "step": 30500 }, { "epoch": 63.15, "eval_accuracy": 0.8481104651162791, "eval_loss": 0.8452057242393494, "eval_runtime": 1.6974, "eval_samples_per_second": 505.49, "eval_steps_per_second": 31.814, "step": 30500 }, { "epoch": 64.18, "learning_rate": 1.9e-05, "loss": 0.6721, "step": 31000 }, { "epoch": 64.18, "eval_accuracy": 0.8413284132841329, "eval_loss": 0.8559600114822388, "eval_runtime": 1.6936, "eval_samples_per_second": 506.623, "eval_steps_per_second": 31.885, "step": 31000 }, { "epoch": 65.22, "learning_rate": 1.85e-05, "loss": 0.6571, "step": 31500 }, { "epoch": 65.22, "eval_accuracy": 0.8163109756097561, "eval_loss": 0.98003089427948, "eval_runtime": 1.6913, "eval_samples_per_second": 507.303, "eval_steps_per_second": 31.928, "step": 31500 }, { "epoch": 66.25, "learning_rate": 1.8e-05, "loss": 0.6891, "step": 32000 }, { "epoch": 66.25, "eval_accuracy": 0.8457446808510638, "eval_loss": 0.8105884194374084, "eval_runtime": 1.6942, "eval_samples_per_second": 506.435, "eval_steps_per_second": 31.874, "step": 32000 }, { "epoch": 67.29, "learning_rate": 1.75e-05, "loss": 0.6541, "step": 32500 }, { "epoch": 67.29, "eval_accuracy": 0.8429752066115702, "eval_loss": 0.8197007179260254, "eval_runtime": 1.6912, "eval_samples_per_second": 507.332, "eval_steps_per_second": 31.93, "step": 32500 }, { "epoch": 68.32, "learning_rate": 1.7000000000000003e-05, "loss": 0.6559, "step": 33000 }, { "epoch": 68.32, "eval_accuracy": 0.8388305847076462, "eval_loss": 0.8678442239761353, "eval_runtime": 1.6945, "eval_samples_per_second": 506.35, "eval_steps_per_second": 31.868, "step": 33000 }, { "epoch": 69.36, "learning_rate": 1.65e-05, "loss": 0.6554, "step": 33500 }, { "epoch": 69.36, "eval_accuracy": 0.8661764705882353, "eval_loss": 0.7396097183227539, "eval_runtime": 1.6934, "eval_samples_per_second": 506.658, "eval_steps_per_second": 31.888, "step": 33500 }, { "epoch": 70.39, "learning_rate": 1.6000000000000003e-05, "loss": 0.618, "step": 34000 }, { "epoch": 70.39, "eval_accuracy": 0.8375634517766497, "eval_loss": 0.8517589569091797, "eval_runtime": 1.6983, "eval_samples_per_second": 505.224, "eval_steps_per_second": 31.797, "step": 34000 }, { "epoch": 71.43, "learning_rate": 1.55e-05, "loss": 0.6558, "step": 34500 }, { "epoch": 71.43, "eval_accuracy": 0.8409090909090909, "eval_loss": 0.7705618739128113, "eval_runtime": 1.6954, "eval_samples_per_second": 506.065, "eval_steps_per_second": 31.85, "step": 34500 }, { "epoch": 72.46, "learning_rate": 1.5e-05, "loss": 0.6034, "step": 35000 }, { "epoch": 72.46, "eval_accuracy": 0.8517699115044248, "eval_loss": 0.7829406261444092, "eval_runtime": 1.6974, "eval_samples_per_second": 505.471, "eval_steps_per_second": 31.813, "step": 35000 }, { "epoch": 73.5, "learning_rate": 1.45e-05, "loss": 0.6336, "step": 35500 }, { "epoch": 73.5, "eval_accuracy": 0.8591445427728613, "eval_loss": 0.7834987640380859, "eval_runtime": 1.6914, "eval_samples_per_second": 507.26, "eval_steps_per_second": 31.925, "step": 35500 }, { "epoch": 74.53, "learning_rate": 1.4000000000000001e-05, "loss": 0.6287, "step": 36000 }, { "epoch": 74.53, "eval_accuracy": 0.8574748257164988, "eval_loss": 0.7547706961631775, "eval_runtime": 1.6906, "eval_samples_per_second": 507.513, "eval_steps_per_second": 31.941, "step": 36000 }, { "epoch": 75.57, "learning_rate": 1.3500000000000001e-05, "loss": 0.6065, "step": 36500 }, { "epoch": 75.57, "eval_accuracy": 0.8508005822416302, "eval_loss": 0.8541703224182129, "eval_runtime": 1.6919, "eval_samples_per_second": 507.134, "eval_steps_per_second": 31.918, "step": 36500 }, { "epoch": 76.6, "learning_rate": 1.3000000000000001e-05, "loss": 0.6029, "step": 37000 }, { "epoch": 76.6, "eval_accuracy": 0.8405267008046818, "eval_loss": 0.8202521800994873, "eval_runtime": 1.6903, "eval_samples_per_second": 507.595, "eval_steps_per_second": 31.947, "step": 37000 }, { "epoch": 77.64, "learning_rate": 1.25e-05, "loss": 0.6208, "step": 37500 }, { "epoch": 77.64, "eval_accuracy": 0.8661417322834646, "eval_loss": 0.7082335948944092, "eval_runtime": 1.6867, "eval_samples_per_second": 508.681, "eval_steps_per_second": 32.015, "step": 37500 }, { "epoch": 78.67, "learning_rate": 1.2e-05, "loss": 0.64, "step": 38000 }, { "epoch": 78.67, "eval_accuracy": 0.8410295230885693, "eval_loss": 0.8504825234413147, "eval_runtime": 1.6943, "eval_samples_per_second": 506.417, "eval_steps_per_second": 31.872, "step": 38000 }, { "epoch": 79.71, "learning_rate": 1.1500000000000002e-05, "loss": 0.6144, "step": 38500 }, { "epoch": 79.71, "eval_accuracy": 0.8603636363636363, "eval_loss": 0.7246142625808716, "eval_runtime": 1.6864, "eval_samples_per_second": 508.77, "eval_steps_per_second": 32.02, "step": 38500 }, { "epoch": 80.75, "learning_rate": 1.1000000000000001e-05, "loss": 0.6507, "step": 39000 }, { "epoch": 80.75, "eval_accuracy": 0.861132660977502, "eval_loss": 0.7150202393531799, "eval_runtime": 1.701, "eval_samples_per_second": 504.398, "eval_steps_per_second": 31.745, "step": 39000 }, { "epoch": 81.78, "learning_rate": 1.05e-05, "loss": 0.6177, "step": 39500 }, { "epoch": 81.78, "eval_accuracy": 0.84, "eval_loss": 0.9331970810890198, "eval_runtime": 1.6939, "eval_samples_per_second": 506.536, "eval_steps_per_second": 31.88, "step": 39500 }, { "epoch": 82.82, "learning_rate": 1e-05, "loss": 0.6159, "step": 40000 }, { "epoch": 82.82, "eval_accuracy": 0.8733488733488733, "eval_loss": 0.6427481174468994, "eval_runtime": 1.6965, "eval_samples_per_second": 505.755, "eval_steps_per_second": 31.831, "step": 40000 } ], "logging_steps": 500, "max_steps": 50000, "num_train_epochs": 104, "save_steps": 500, "total_flos": 6042662847119360.0, "trial_name": null, "trial_params": null }