maltese-cn-lang-adapter / trainer_state.json
DGurgurov's picture
Upload 18 files
9371975 verified
{
"best_metric": 0.6427481174468994,
"best_model_checkpoint": "./models/adapters_mlm_cn/mt/checkpoint-40000",
"epoch": 82.81573498964804,
"eval_steps": 500,
"global_step": 40000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.04,
"learning_rate": 4.9500000000000004e-05,
"loss": 2.6689,
"step": 500
},
{
"epoch": 1.04,
"eval_accuracy": 0.6676783004552352,
"eval_loss": 2.1344573497772217,
"eval_runtime": 1.6941,
"eval_samples_per_second": 506.449,
"eval_steps_per_second": 31.874,
"step": 500
},
{
"epoch": 2.07,
"learning_rate": 4.9e-05,
"loss": 2.1415,
"step": 1000
},
{
"epoch": 2.07,
"eval_accuracy": 0.6926470588235294,
"eval_loss": 1.8435733318328857,
"eval_runtime": 1.6896,
"eval_samples_per_second": 507.824,
"eval_steps_per_second": 31.961,
"step": 1000
},
{
"epoch": 3.11,
"learning_rate": 4.85e-05,
"loss": 1.9421,
"step": 1500
},
{
"epoch": 3.11,
"eval_accuracy": 0.690674753601213,
"eval_loss": 1.787391185760498,
"eval_runtime": 1.6956,
"eval_samples_per_second": 506.016,
"eval_steps_per_second": 31.847,
"step": 1500
},
{
"epoch": 4.14,
"learning_rate": 4.8e-05,
"loss": 1.7588,
"step": 2000
},
{
"epoch": 4.14,
"eval_accuracy": 0.7012509197939661,
"eval_loss": 1.760498285293579,
"eval_runtime": 1.6894,
"eval_samples_per_second": 507.882,
"eval_steps_per_second": 31.965,
"step": 2000
},
{
"epoch": 5.18,
"learning_rate": 4.75e-05,
"loss": 1.6729,
"step": 2500
},
{
"epoch": 5.18,
"eval_accuracy": 0.6956845238095238,
"eval_loss": 1.7567747831344604,
"eval_runtime": 1.6937,
"eval_samples_per_second": 506.597,
"eval_steps_per_second": 31.884,
"step": 2500
},
{
"epoch": 6.21,
"learning_rate": 4.7e-05,
"loss": 1.596,
"step": 3000
},
{
"epoch": 6.21,
"eval_accuracy": 0.7272727272727273,
"eval_loss": 1.5006115436553955,
"eval_runtime": 1.6969,
"eval_samples_per_second": 505.627,
"eval_steps_per_second": 31.823,
"step": 3000
},
{
"epoch": 7.25,
"learning_rate": 4.6500000000000005e-05,
"loss": 1.5778,
"step": 3500
},
{
"epoch": 7.25,
"eval_accuracy": 0.7450832072617246,
"eval_loss": 1.3923866748809814,
"eval_runtime": 1.6898,
"eval_samples_per_second": 507.75,
"eval_steps_per_second": 31.956,
"step": 3500
},
{
"epoch": 8.28,
"learning_rate": 4.600000000000001e-05,
"loss": 1.4821,
"step": 4000
},
{
"epoch": 8.28,
"eval_accuracy": 0.7099236641221374,
"eval_loss": 1.609680414199829,
"eval_runtime": 1.6898,
"eval_samples_per_second": 507.758,
"eval_steps_per_second": 31.957,
"step": 4000
},
{
"epoch": 9.32,
"learning_rate": 4.55e-05,
"loss": 1.4183,
"step": 4500
},
{
"epoch": 9.32,
"eval_accuracy": 0.7490551776266062,
"eval_loss": 1.3551626205444336,
"eval_runtime": 1.6905,
"eval_samples_per_second": 507.539,
"eval_steps_per_second": 31.943,
"step": 4500
},
{
"epoch": 10.35,
"learning_rate": 4.5e-05,
"loss": 1.4197,
"step": 5000
},
{
"epoch": 10.35,
"eval_accuracy": 0.7513471901462664,
"eval_loss": 1.284741997718811,
"eval_runtime": 1.6878,
"eval_samples_per_second": 508.353,
"eval_steps_per_second": 31.994,
"step": 5000
},
{
"epoch": 11.39,
"learning_rate": 4.4500000000000004e-05,
"loss": 1.3156,
"step": 5500
},
{
"epoch": 11.39,
"eval_accuracy": 0.7496318114874816,
"eval_loss": 1.3172950744628906,
"eval_runtime": 1.6891,
"eval_samples_per_second": 507.956,
"eval_steps_per_second": 31.969,
"step": 5500
},
{
"epoch": 12.42,
"learning_rate": 4.4000000000000006e-05,
"loss": 1.2882,
"step": 6000
},
{
"epoch": 12.42,
"eval_accuracy": 0.7738095238095238,
"eval_loss": 1.2816879749298096,
"eval_runtime": 1.6955,
"eval_samples_per_second": 506.058,
"eval_steps_per_second": 31.85,
"step": 6000
},
{
"epoch": 13.46,
"learning_rate": 4.35e-05,
"loss": 1.2692,
"step": 6500
},
{
"epoch": 13.46,
"eval_accuracy": 0.775112443778111,
"eval_loss": 1.189226746559143,
"eval_runtime": 1.6876,
"eval_samples_per_second": 508.403,
"eval_steps_per_second": 31.997,
"step": 6500
},
{
"epoch": 14.49,
"learning_rate": 4.3e-05,
"loss": 1.2368,
"step": 7000
},
{
"epoch": 14.49,
"eval_accuracy": 0.7816432272390822,
"eval_loss": 1.2362936735153198,
"eval_runtime": 1.6828,
"eval_samples_per_second": 509.861,
"eval_steps_per_second": 32.089,
"step": 7000
},
{
"epoch": 15.53,
"learning_rate": 4.25e-05,
"loss": 1.1975,
"step": 7500
},
{
"epoch": 15.53,
"eval_accuracy": 0.76996336996337,
"eval_loss": 1.2442289590835571,
"eval_runtime": 1.6849,
"eval_samples_per_second": 509.233,
"eval_steps_per_second": 32.05,
"step": 7500
},
{
"epoch": 16.56,
"learning_rate": 4.2e-05,
"loss": 1.1907,
"step": 8000
},
{
"epoch": 16.56,
"eval_accuracy": 0.7720320466132556,
"eval_loss": 1.256901502609253,
"eval_runtime": 1.6884,
"eval_samples_per_second": 508.188,
"eval_steps_per_second": 31.984,
"step": 8000
},
{
"epoch": 17.6,
"learning_rate": 4.15e-05,
"loss": 1.1231,
"step": 8500
},
{
"epoch": 17.6,
"eval_accuracy": 0.776085300837776,
"eval_loss": 1.13861083984375,
"eval_runtime": 1.6842,
"eval_samples_per_second": 509.436,
"eval_steps_per_second": 32.062,
"step": 8500
},
{
"epoch": 18.63,
"learning_rate": 4.1e-05,
"loss": 1.0873,
"step": 9000
},
{
"epoch": 18.63,
"eval_accuracy": 0.7855547282204021,
"eval_loss": 1.2104856967926025,
"eval_runtime": 1.6846,
"eval_samples_per_second": 509.309,
"eval_steps_per_second": 32.054,
"step": 9000
},
{
"epoch": 19.67,
"learning_rate": 4.05e-05,
"loss": 1.1242,
"step": 9500
},
{
"epoch": 19.67,
"eval_accuracy": 0.7737909516380655,
"eval_loss": 1.214229702949524,
"eval_runtime": 1.6848,
"eval_samples_per_second": 509.258,
"eval_steps_per_second": 32.051,
"step": 9500
},
{
"epoch": 20.7,
"learning_rate": 4e-05,
"loss": 1.0367,
"step": 10000
},
{
"epoch": 20.7,
"eval_accuracy": 0.7712369597615499,
"eval_loss": 1.2120734453201294,
"eval_runtime": 1.6938,
"eval_samples_per_second": 506.56,
"eval_steps_per_second": 31.881,
"step": 10000
},
{
"epoch": 21.74,
"learning_rate": 3.9500000000000005e-05,
"loss": 1.0869,
"step": 10500
},
{
"epoch": 21.74,
"eval_accuracy": 0.7955390334572491,
"eval_loss": 1.0782362222671509,
"eval_runtime": 1.6863,
"eval_samples_per_second": 508.795,
"eval_steps_per_second": 32.022,
"step": 10500
},
{
"epoch": 22.77,
"learning_rate": 3.9000000000000006e-05,
"loss": 1.0353,
"step": 11000
},
{
"epoch": 22.77,
"eval_accuracy": 0.8068535825545171,
"eval_loss": 0.9917858839035034,
"eval_runtime": 1.6841,
"eval_samples_per_second": 509.483,
"eval_steps_per_second": 32.065,
"step": 11000
},
{
"epoch": 23.81,
"learning_rate": 3.85e-05,
"loss": 1.0324,
"step": 11500
},
{
"epoch": 23.81,
"eval_accuracy": 0.7971233913701741,
"eval_loss": 1.0908266305923462,
"eval_runtime": 1.6848,
"eval_samples_per_second": 509.246,
"eval_steps_per_second": 32.05,
"step": 11500
},
{
"epoch": 24.84,
"learning_rate": 3.8e-05,
"loss": 1.0145,
"step": 12000
},
{
"epoch": 24.84,
"eval_accuracy": 0.7975460122699386,
"eval_loss": 1.0944875478744507,
"eval_runtime": 1.6827,
"eval_samples_per_second": 509.9,
"eval_steps_per_second": 32.092,
"step": 12000
},
{
"epoch": 25.88,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.9951,
"step": 12500
},
{
"epoch": 25.88,
"eval_accuracy": 0.8028064992614475,
"eval_loss": 1.000519037246704,
"eval_runtime": 1.6933,
"eval_samples_per_second": 506.714,
"eval_steps_per_second": 31.891,
"step": 12500
},
{
"epoch": 26.92,
"learning_rate": 3.7e-05,
"loss": 0.9483,
"step": 13000
},
{
"epoch": 26.92,
"eval_accuracy": 0.8186646433990895,
"eval_loss": 0.963790237903595,
"eval_runtime": 1.6874,
"eval_samples_per_second": 508.479,
"eval_steps_per_second": 32.002,
"step": 13000
},
{
"epoch": 27.95,
"learning_rate": 3.65e-05,
"loss": 0.9304,
"step": 13500
},
{
"epoch": 27.95,
"eval_accuracy": 0.8204747774480712,
"eval_loss": 0.9761123657226562,
"eval_runtime": 1.6869,
"eval_samples_per_second": 508.622,
"eval_steps_per_second": 32.011,
"step": 13500
},
{
"epoch": 28.99,
"learning_rate": 3.6e-05,
"loss": 0.8835,
"step": 14000
},
{
"epoch": 28.99,
"eval_accuracy": 0.8045801526717558,
"eval_loss": 1.062032699584961,
"eval_runtime": 1.6883,
"eval_samples_per_second": 508.21,
"eval_steps_per_second": 31.985,
"step": 14000
},
{
"epoch": 30.02,
"learning_rate": 3.55e-05,
"loss": 0.9097,
"step": 14500
},
{
"epoch": 30.02,
"eval_accuracy": 0.806015037593985,
"eval_loss": 0.9137569069862366,
"eval_runtime": 1.6924,
"eval_samples_per_second": 506.97,
"eval_steps_per_second": 31.907,
"step": 14500
},
{
"epoch": 31.06,
"learning_rate": 3.5e-05,
"loss": 0.9293,
"step": 15000
},
{
"epoch": 31.06,
"eval_accuracy": 0.8176197836166924,
"eval_loss": 0.918023943901062,
"eval_runtime": 1.6905,
"eval_samples_per_second": 507.53,
"eval_steps_per_second": 31.942,
"step": 15000
},
{
"epoch": 32.09,
"learning_rate": 3.45e-05,
"loss": 0.9043,
"step": 15500
},
{
"epoch": 32.09,
"eval_accuracy": 0.8208269525267994,
"eval_loss": 0.9214709401130676,
"eval_runtime": 1.691,
"eval_samples_per_second": 507.403,
"eval_steps_per_second": 31.934,
"step": 15500
},
{
"epoch": 33.13,
"learning_rate": 3.4000000000000007e-05,
"loss": 0.8581,
"step": 16000
},
{
"epoch": 33.13,
"eval_accuracy": 0.822452229299363,
"eval_loss": 0.9624596834182739,
"eval_runtime": 1.6897,
"eval_samples_per_second": 507.793,
"eval_steps_per_second": 31.959,
"step": 16000
},
{
"epoch": 34.16,
"learning_rate": 3.35e-05,
"loss": 0.8638,
"step": 16500
},
{
"epoch": 34.16,
"eval_accuracy": 0.8367816091954023,
"eval_loss": 0.8585591316223145,
"eval_runtime": 1.6912,
"eval_samples_per_second": 507.329,
"eval_steps_per_second": 31.93,
"step": 16500
},
{
"epoch": 35.2,
"learning_rate": 3.3e-05,
"loss": 0.874,
"step": 17000
},
{
"epoch": 35.2,
"eval_accuracy": 0.8135072908672295,
"eval_loss": 1.0043973922729492,
"eval_runtime": 1.6896,
"eval_samples_per_second": 507.801,
"eval_steps_per_second": 31.96,
"step": 17000
},
{
"epoch": 36.23,
"learning_rate": 3.2500000000000004e-05,
"loss": 0.8235,
"step": 17500
},
{
"epoch": 36.23,
"eval_accuracy": 0.8183890577507599,
"eval_loss": 0.9755066633224487,
"eval_runtime": 1.6947,
"eval_samples_per_second": 506.289,
"eval_steps_per_second": 31.864,
"step": 17500
},
{
"epoch": 37.27,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.8589,
"step": 18000
},
{
"epoch": 37.27,
"eval_accuracy": 0.8291761148904006,
"eval_loss": 0.9042153358459473,
"eval_runtime": 1.6905,
"eval_samples_per_second": 507.55,
"eval_steps_per_second": 31.944,
"step": 18000
},
{
"epoch": 38.3,
"learning_rate": 3.15e-05,
"loss": 0.8107,
"step": 18500
},
{
"epoch": 38.3,
"eval_accuracy": 0.8272327964860908,
"eval_loss": 0.8821109533309937,
"eval_runtime": 1.6895,
"eval_samples_per_second": 507.845,
"eval_steps_per_second": 31.962,
"step": 18500
},
{
"epoch": 39.34,
"learning_rate": 3.1e-05,
"loss": 0.8346,
"step": 19000
},
{
"epoch": 39.34,
"eval_accuracy": 0.8248286367098249,
"eval_loss": 0.9061236381530762,
"eval_runtime": 1.6919,
"eval_samples_per_second": 507.136,
"eval_steps_per_second": 31.918,
"step": 19000
},
{
"epoch": 40.37,
"learning_rate": 3.05e-05,
"loss": 0.8393,
"step": 19500
},
{
"epoch": 40.37,
"eval_accuracy": 0.8234854151084517,
"eval_loss": 0.9795840978622437,
"eval_runtime": 1.6939,
"eval_samples_per_second": 506.513,
"eval_steps_per_second": 31.878,
"step": 19500
},
{
"epoch": 41.41,
"learning_rate": 3e-05,
"loss": 0.789,
"step": 20000
},
{
"epoch": 41.41,
"eval_accuracy": 0.833076923076923,
"eval_loss": 0.9014851450920105,
"eval_runtime": 1.689,
"eval_samples_per_second": 508.0,
"eval_steps_per_second": 31.972,
"step": 20000
},
{
"epoch": 42.44,
"learning_rate": 2.95e-05,
"loss": 0.8121,
"step": 20500
},
{
"epoch": 42.44,
"eval_accuracy": 0.8385913426265591,
"eval_loss": 0.8589309453964233,
"eval_runtime": 1.6873,
"eval_samples_per_second": 508.516,
"eval_steps_per_second": 32.005,
"step": 20500
},
{
"epoch": 43.48,
"learning_rate": 2.9e-05,
"loss": 0.7709,
"step": 21000
},
{
"epoch": 43.48,
"eval_accuracy": 0.8350903614457831,
"eval_loss": 0.8835715055465698,
"eval_runtime": 1.6829,
"eval_samples_per_second": 509.835,
"eval_steps_per_second": 32.088,
"step": 21000
},
{
"epoch": 44.51,
"learning_rate": 2.8499999999999998e-05,
"loss": 0.7922,
"step": 21500
},
{
"epoch": 44.51,
"eval_accuracy": 0.817974105102818,
"eval_loss": 0.9523779153823853,
"eval_runtime": 1.6863,
"eval_samples_per_second": 508.799,
"eval_steps_per_second": 32.022,
"step": 21500
},
{
"epoch": 45.55,
"learning_rate": 2.8000000000000003e-05,
"loss": 0.7457,
"step": 22000
},
{
"epoch": 45.55,
"eval_accuracy": 0.8364451082897685,
"eval_loss": 0.8350428938865662,
"eval_runtime": 1.6901,
"eval_samples_per_second": 507.673,
"eval_steps_per_second": 31.951,
"step": 22000
},
{
"epoch": 46.58,
"learning_rate": 2.7500000000000004e-05,
"loss": 0.7386,
"step": 22500
},
{
"epoch": 46.58,
"eval_accuracy": 0.8340807174887892,
"eval_loss": 0.9024766087532043,
"eval_runtime": 1.6912,
"eval_samples_per_second": 507.34,
"eval_steps_per_second": 31.93,
"step": 22500
},
{
"epoch": 47.62,
"learning_rate": 2.7000000000000002e-05,
"loss": 0.7515,
"step": 23000
},
{
"epoch": 47.62,
"eval_accuracy": 0.8390166534496432,
"eval_loss": 0.9091906547546387,
"eval_runtime": 1.686,
"eval_samples_per_second": 508.899,
"eval_steps_per_second": 32.029,
"step": 23000
},
{
"epoch": 48.65,
"learning_rate": 2.6500000000000004e-05,
"loss": 0.7324,
"step": 23500
},
{
"epoch": 48.65,
"eval_accuracy": 0.8420647149460708,
"eval_loss": 0.8322407007217407,
"eval_runtime": 1.6918,
"eval_samples_per_second": 507.153,
"eval_steps_per_second": 31.919,
"step": 23500
},
{
"epoch": 49.69,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.7314,
"step": 24000
},
{
"epoch": 49.69,
"eval_accuracy": 0.8477078477078477,
"eval_loss": 0.7967829704284668,
"eval_runtime": 1.6933,
"eval_samples_per_second": 506.713,
"eval_steps_per_second": 31.891,
"step": 24000
},
{
"epoch": 50.72,
"learning_rate": 2.5500000000000003e-05,
"loss": 0.7442,
"step": 24500
},
{
"epoch": 50.72,
"eval_accuracy": 0.8324407039020658,
"eval_loss": 0.930473268032074,
"eval_runtime": 1.6828,
"eval_samples_per_second": 509.873,
"eval_steps_per_second": 32.09,
"step": 24500
},
{
"epoch": 51.76,
"learning_rate": 2.5e-05,
"loss": 0.7074,
"step": 25000
},
{
"epoch": 51.76,
"eval_accuracy": 0.820839580209895,
"eval_loss": 1.001060962677002,
"eval_runtime": 1.6867,
"eval_samples_per_second": 508.672,
"eval_steps_per_second": 32.014,
"step": 25000
},
{
"epoch": 52.8,
"learning_rate": 2.45e-05,
"loss": 0.739,
"step": 25500
},
{
"epoch": 52.8,
"eval_accuracy": 0.8330945558739254,
"eval_loss": 0.8732258677482605,
"eval_runtime": 1.6896,
"eval_samples_per_second": 507.823,
"eval_steps_per_second": 31.961,
"step": 25500
},
{
"epoch": 53.83,
"learning_rate": 2.4e-05,
"loss": 0.7243,
"step": 26000
},
{
"epoch": 53.83,
"eval_accuracy": 0.8479880774962743,
"eval_loss": 0.7857112288475037,
"eval_runtime": 1.687,
"eval_samples_per_second": 508.591,
"eval_steps_per_second": 32.009,
"step": 26000
},
{
"epoch": 54.87,
"learning_rate": 2.35e-05,
"loss": 0.6842,
"step": 26500
},
{
"epoch": 54.87,
"eval_accuracy": 0.8377192982456141,
"eval_loss": 0.7945135235786438,
"eval_runtime": 1.6902,
"eval_samples_per_second": 507.642,
"eval_steps_per_second": 31.949,
"step": 26500
},
{
"epoch": 55.9,
"learning_rate": 2.3000000000000003e-05,
"loss": 0.6991,
"step": 27000
},
{
"epoch": 55.9,
"eval_accuracy": 0.8275351591413768,
"eval_loss": 0.9627696871757507,
"eval_runtime": 1.6871,
"eval_samples_per_second": 508.578,
"eval_steps_per_second": 32.008,
"step": 27000
},
{
"epoch": 56.94,
"learning_rate": 2.25e-05,
"loss": 0.6896,
"step": 27500
},
{
"epoch": 56.94,
"eval_accuracy": 0.840960240060015,
"eval_loss": 0.8363039493560791,
"eval_runtime": 1.684,
"eval_samples_per_second": 509.495,
"eval_steps_per_second": 32.066,
"step": 27500
},
{
"epoch": 57.97,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.6925,
"step": 28000
},
{
"epoch": 57.97,
"eval_accuracy": 0.8391812865497076,
"eval_loss": 0.8432921767234802,
"eval_runtime": 1.6968,
"eval_samples_per_second": 505.655,
"eval_steps_per_second": 31.824,
"step": 28000
},
{
"epoch": 59.01,
"learning_rate": 2.15e-05,
"loss": 0.7081,
"step": 28500
},
{
"epoch": 59.01,
"eval_accuracy": 0.8223048327137547,
"eval_loss": 1.0085676908493042,
"eval_runtime": 1.69,
"eval_samples_per_second": 507.688,
"eval_steps_per_second": 31.952,
"step": 28500
},
{
"epoch": 60.04,
"learning_rate": 2.1e-05,
"loss": 0.6598,
"step": 29000
},
{
"epoch": 60.04,
"eval_accuracy": 0.8333333333333334,
"eval_loss": 0.9250668883323669,
"eval_runtime": 1.686,
"eval_samples_per_second": 508.895,
"eval_steps_per_second": 32.028,
"step": 29000
},
{
"epoch": 61.08,
"learning_rate": 2.05e-05,
"loss": 0.6677,
"step": 29500
},
{
"epoch": 61.08,
"eval_accuracy": 0.8437047756874095,
"eval_loss": 0.8822752237319946,
"eval_runtime": 1.693,
"eval_samples_per_second": 506.807,
"eval_steps_per_second": 31.897,
"step": 29500
},
{
"epoch": 62.11,
"learning_rate": 2e-05,
"loss": 0.695,
"step": 30000
},
{
"epoch": 62.11,
"eval_accuracy": 0.8560371517027864,
"eval_loss": 0.7750544548034668,
"eval_runtime": 1.6969,
"eval_samples_per_second": 505.632,
"eval_steps_per_second": 31.823,
"step": 30000
},
{
"epoch": 63.15,
"learning_rate": 1.9500000000000003e-05,
"loss": 0.7108,
"step": 30500
},
{
"epoch": 63.15,
"eval_accuracy": 0.8481104651162791,
"eval_loss": 0.8452057242393494,
"eval_runtime": 1.6974,
"eval_samples_per_second": 505.49,
"eval_steps_per_second": 31.814,
"step": 30500
},
{
"epoch": 64.18,
"learning_rate": 1.9e-05,
"loss": 0.6721,
"step": 31000
},
{
"epoch": 64.18,
"eval_accuracy": 0.8413284132841329,
"eval_loss": 0.8559600114822388,
"eval_runtime": 1.6936,
"eval_samples_per_second": 506.623,
"eval_steps_per_second": 31.885,
"step": 31000
},
{
"epoch": 65.22,
"learning_rate": 1.85e-05,
"loss": 0.6571,
"step": 31500
},
{
"epoch": 65.22,
"eval_accuracy": 0.8163109756097561,
"eval_loss": 0.98003089427948,
"eval_runtime": 1.6913,
"eval_samples_per_second": 507.303,
"eval_steps_per_second": 31.928,
"step": 31500
},
{
"epoch": 66.25,
"learning_rate": 1.8e-05,
"loss": 0.6891,
"step": 32000
},
{
"epoch": 66.25,
"eval_accuracy": 0.8457446808510638,
"eval_loss": 0.8105884194374084,
"eval_runtime": 1.6942,
"eval_samples_per_second": 506.435,
"eval_steps_per_second": 31.874,
"step": 32000
},
{
"epoch": 67.29,
"learning_rate": 1.75e-05,
"loss": 0.6541,
"step": 32500
},
{
"epoch": 67.29,
"eval_accuracy": 0.8429752066115702,
"eval_loss": 0.8197007179260254,
"eval_runtime": 1.6912,
"eval_samples_per_second": 507.332,
"eval_steps_per_second": 31.93,
"step": 32500
},
{
"epoch": 68.32,
"learning_rate": 1.7000000000000003e-05,
"loss": 0.6559,
"step": 33000
},
{
"epoch": 68.32,
"eval_accuracy": 0.8388305847076462,
"eval_loss": 0.8678442239761353,
"eval_runtime": 1.6945,
"eval_samples_per_second": 506.35,
"eval_steps_per_second": 31.868,
"step": 33000
},
{
"epoch": 69.36,
"learning_rate": 1.65e-05,
"loss": 0.6554,
"step": 33500
},
{
"epoch": 69.36,
"eval_accuracy": 0.8661764705882353,
"eval_loss": 0.7396097183227539,
"eval_runtime": 1.6934,
"eval_samples_per_second": 506.658,
"eval_steps_per_second": 31.888,
"step": 33500
},
{
"epoch": 70.39,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.618,
"step": 34000
},
{
"epoch": 70.39,
"eval_accuracy": 0.8375634517766497,
"eval_loss": 0.8517589569091797,
"eval_runtime": 1.6983,
"eval_samples_per_second": 505.224,
"eval_steps_per_second": 31.797,
"step": 34000
},
{
"epoch": 71.43,
"learning_rate": 1.55e-05,
"loss": 0.6558,
"step": 34500
},
{
"epoch": 71.43,
"eval_accuracy": 0.8409090909090909,
"eval_loss": 0.7705618739128113,
"eval_runtime": 1.6954,
"eval_samples_per_second": 506.065,
"eval_steps_per_second": 31.85,
"step": 34500
},
{
"epoch": 72.46,
"learning_rate": 1.5e-05,
"loss": 0.6034,
"step": 35000
},
{
"epoch": 72.46,
"eval_accuracy": 0.8517699115044248,
"eval_loss": 0.7829406261444092,
"eval_runtime": 1.6974,
"eval_samples_per_second": 505.471,
"eval_steps_per_second": 31.813,
"step": 35000
},
{
"epoch": 73.5,
"learning_rate": 1.45e-05,
"loss": 0.6336,
"step": 35500
},
{
"epoch": 73.5,
"eval_accuracy": 0.8591445427728613,
"eval_loss": 0.7834987640380859,
"eval_runtime": 1.6914,
"eval_samples_per_second": 507.26,
"eval_steps_per_second": 31.925,
"step": 35500
},
{
"epoch": 74.53,
"learning_rate": 1.4000000000000001e-05,
"loss": 0.6287,
"step": 36000
},
{
"epoch": 74.53,
"eval_accuracy": 0.8574748257164988,
"eval_loss": 0.7547706961631775,
"eval_runtime": 1.6906,
"eval_samples_per_second": 507.513,
"eval_steps_per_second": 31.941,
"step": 36000
},
{
"epoch": 75.57,
"learning_rate": 1.3500000000000001e-05,
"loss": 0.6065,
"step": 36500
},
{
"epoch": 75.57,
"eval_accuracy": 0.8508005822416302,
"eval_loss": 0.8541703224182129,
"eval_runtime": 1.6919,
"eval_samples_per_second": 507.134,
"eval_steps_per_second": 31.918,
"step": 36500
},
{
"epoch": 76.6,
"learning_rate": 1.3000000000000001e-05,
"loss": 0.6029,
"step": 37000
},
{
"epoch": 76.6,
"eval_accuracy": 0.8405267008046818,
"eval_loss": 0.8202521800994873,
"eval_runtime": 1.6903,
"eval_samples_per_second": 507.595,
"eval_steps_per_second": 31.947,
"step": 37000
},
{
"epoch": 77.64,
"learning_rate": 1.25e-05,
"loss": 0.6208,
"step": 37500
},
{
"epoch": 77.64,
"eval_accuracy": 0.8661417322834646,
"eval_loss": 0.7082335948944092,
"eval_runtime": 1.6867,
"eval_samples_per_second": 508.681,
"eval_steps_per_second": 32.015,
"step": 37500
},
{
"epoch": 78.67,
"learning_rate": 1.2e-05,
"loss": 0.64,
"step": 38000
},
{
"epoch": 78.67,
"eval_accuracy": 0.8410295230885693,
"eval_loss": 0.8504825234413147,
"eval_runtime": 1.6943,
"eval_samples_per_second": 506.417,
"eval_steps_per_second": 31.872,
"step": 38000
},
{
"epoch": 79.71,
"learning_rate": 1.1500000000000002e-05,
"loss": 0.6144,
"step": 38500
},
{
"epoch": 79.71,
"eval_accuracy": 0.8603636363636363,
"eval_loss": 0.7246142625808716,
"eval_runtime": 1.6864,
"eval_samples_per_second": 508.77,
"eval_steps_per_second": 32.02,
"step": 38500
},
{
"epoch": 80.75,
"learning_rate": 1.1000000000000001e-05,
"loss": 0.6507,
"step": 39000
},
{
"epoch": 80.75,
"eval_accuracy": 0.861132660977502,
"eval_loss": 0.7150202393531799,
"eval_runtime": 1.701,
"eval_samples_per_second": 504.398,
"eval_steps_per_second": 31.745,
"step": 39000
},
{
"epoch": 81.78,
"learning_rate": 1.05e-05,
"loss": 0.6177,
"step": 39500
},
{
"epoch": 81.78,
"eval_accuracy": 0.84,
"eval_loss": 0.9331970810890198,
"eval_runtime": 1.6939,
"eval_samples_per_second": 506.536,
"eval_steps_per_second": 31.88,
"step": 39500
},
{
"epoch": 82.82,
"learning_rate": 1e-05,
"loss": 0.6159,
"step": 40000
},
{
"epoch": 82.82,
"eval_accuracy": 0.8733488733488733,
"eval_loss": 0.6427481174468994,
"eval_runtime": 1.6965,
"eval_samples_per_second": 505.755,
"eval_steps_per_second": 31.831,
"step": 40000
}
],
"logging_steps": 500,
"max_steps": 50000,
"num_train_epochs": 104,
"save_steps": 500,
"total_flos": 6042662847119360.0,
"trial_name": null,
"trial_params": null
}