bhaskara / trainer_state.json
Matthew Finlayson
adding model
b39dcfe
raw
history blame
46.6 kB
{
"best_metric": 0.58837890625,
"best_model_checkpoint": "/output/checkpoint-3300",
"epoch": 10.0,
"global_step": 17620,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"eval_accuracy": 0.8214358287770616,
"eval_loss": 0.79296875,
"eval_runtime": 301.7299,
"eval_samples_per_second": 14.301,
"eval_steps_per_second": 1.79,
"step": 100
},
{
"epoch": 0.11,
"eval_accuracy": 0.828983212304709,
"eval_loss": 0.75439453125,
"eval_runtime": 295.5652,
"eval_samples_per_second": 14.599,
"eval_steps_per_second": 1.827,
"step": 200
},
{
"epoch": 0.17,
"eval_accuracy": 0.83275463867547,
"eval_loss": 0.73583984375,
"eval_runtime": 297.8323,
"eval_samples_per_second": 14.488,
"eval_steps_per_second": 1.813,
"step": 300
},
{
"epoch": 0.23,
"eval_accuracy": 0.8357279670702464,
"eval_loss": 0.71923828125,
"eval_runtime": 297.4382,
"eval_samples_per_second": 14.507,
"eval_steps_per_second": 1.816,
"step": 400
},
{
"epoch": 0.28,
"learning_rate": 5e-05,
"loss": 0.8156,
"step": 500
},
{
"epoch": 0.28,
"eval_accuracy": 0.8396754144819782,
"eval_loss": 0.701171875,
"eval_runtime": 295.955,
"eval_samples_per_second": 14.58,
"eval_steps_per_second": 1.825,
"step": 500
},
{
"epoch": 0.34,
"eval_accuracy": 0.8418578941585707,
"eval_loss": 0.6904296875,
"eval_runtime": 305.3997,
"eval_samples_per_second": 14.129,
"eval_steps_per_second": 1.768,
"step": 600
},
{
"epoch": 0.4,
"eval_accuracy": 0.8439807939976145,
"eval_loss": 0.68017578125,
"eval_runtime": 303.944,
"eval_samples_per_second": 14.197,
"eval_steps_per_second": 1.777,
"step": 700
},
{
"epoch": 0.45,
"eval_accuracy": 0.8464999110833223,
"eval_loss": 0.6669921875,
"eval_runtime": 297.0827,
"eval_samples_per_second": 14.525,
"eval_steps_per_second": 1.818,
"step": 800
},
{
"epoch": 0.51,
"eval_accuracy": 0.8485815807686252,
"eval_loss": 0.6572265625,
"eval_runtime": 292.5803,
"eval_samples_per_second": 14.748,
"eval_steps_per_second": 1.846,
"step": 900
},
{
"epoch": 0.57,
"learning_rate": 5e-05,
"loss": 0.7219,
"step": 1000
},
{
"epoch": 0.57,
"eval_accuracy": 0.8500273546212319,
"eval_loss": 0.64990234375,
"eval_runtime": 292.7704,
"eval_samples_per_second": 14.739,
"eval_steps_per_second": 1.844,
"step": 1000
},
{
"epoch": 0.62,
"eval_accuracy": 0.8521808372666221,
"eval_loss": 0.64111328125,
"eval_runtime": 292.5084,
"eval_samples_per_second": 14.752,
"eval_steps_per_second": 1.846,
"step": 1100
},
{
"epoch": 0.68,
"eval_accuracy": 0.853712016437692,
"eval_loss": 0.63427734375,
"eval_runtime": 292.6573,
"eval_samples_per_second": 14.744,
"eval_steps_per_second": 1.845,
"step": 1200
},
{
"epoch": 0.74,
"eval_accuracy": 0.8545884970136456,
"eval_loss": 0.6298828125,
"eval_runtime": 291.6972,
"eval_samples_per_second": 14.793,
"eval_steps_per_second": 1.851,
"step": 1300
},
{
"epoch": 0.79,
"eval_accuracy": 0.856088866839063,
"eval_loss": 0.6220703125,
"eval_runtime": 291.6264,
"eval_samples_per_second": 14.796,
"eval_steps_per_second": 1.852,
"step": 1400
},
{
"epoch": 0.85,
"learning_rate": 5e-05,
"loss": 0.662,
"step": 1500
},
{
"epoch": 0.85,
"eval_accuracy": 0.857388975917739,
"eval_loss": 0.61572265625,
"eval_runtime": 292.8293,
"eval_samples_per_second": 14.736,
"eval_steps_per_second": 1.844,
"step": 1500
},
{
"epoch": 0.91,
"eval_accuracy": 0.8579150001868949,
"eval_loss": 0.61376953125,
"eval_runtime": 292.6424,
"eval_samples_per_second": 14.745,
"eval_steps_per_second": 1.845,
"step": 1600
},
{
"epoch": 0.96,
"eval_accuracy": 0.8595497078209298,
"eval_loss": 0.60546875,
"eval_runtime": 292.7935,
"eval_samples_per_second": 14.737,
"eval_steps_per_second": 1.844,
"step": 1700
},
{
"epoch": 1.02,
"eval_accuracy": 0.8597678651728665,
"eval_loss": 0.6142578125,
"eval_runtime": 292.6887,
"eval_samples_per_second": 14.743,
"eval_steps_per_second": 1.845,
"step": 1800
},
{
"epoch": 1.08,
"eval_accuracy": 0.8598800021294695,
"eval_loss": 0.619140625,
"eval_runtime": 291.7039,
"eval_samples_per_second": 14.792,
"eval_steps_per_second": 1.851,
"step": 1900
},
{
"epoch": 1.14,
"learning_rate": 5e-05,
"loss": 0.5707,
"step": 2000
},
{
"epoch": 1.14,
"eval_accuracy": 0.8606622423540152,
"eval_loss": 0.61181640625,
"eval_runtime": 292.661,
"eval_samples_per_second": 14.744,
"eval_steps_per_second": 1.845,
"step": 2000
},
{
"epoch": 1.19,
"eval_accuracy": 0.8611497549411055,
"eval_loss": 0.6123046875,
"eval_runtime": 292.734,
"eval_samples_per_second": 14.74,
"eval_steps_per_second": 1.845,
"step": 2100
},
{
"epoch": 1.25,
"eval_accuracy": 0.8616524456617156,
"eval_loss": 0.60888671875,
"eval_runtime": 291.6011,
"eval_samples_per_second": 14.798,
"eval_steps_per_second": 1.852,
"step": 2200
},
{
"epoch": 1.31,
"eval_accuracy": 0.8618950692587294,
"eval_loss": 0.6064453125,
"eval_runtime": 291.5809,
"eval_samples_per_second": 14.799,
"eval_steps_per_second": 1.852,
"step": 2300
},
{
"epoch": 1.36,
"eval_accuracy": 0.8625089907787176,
"eval_loss": 0.60791015625,
"eval_runtime": 292.7393,
"eval_samples_per_second": 14.74,
"eval_steps_per_second": 1.845,
"step": 2400
},
{
"epoch": 1.42,
"learning_rate": 5e-05,
"loss": 0.4923,
"step": 2500
},
{
"epoch": 1.42,
"eval_accuracy": 0.8624713852538769,
"eval_loss": 0.60400390625,
"eval_runtime": 292.7894,
"eval_samples_per_second": 14.738,
"eval_steps_per_second": 1.844,
"step": 2500
},
{
"epoch": 1.48,
"eval_accuracy": 0.8630089630276525,
"eval_loss": 0.60302734375,
"eval_runtime": 291.5842,
"eval_samples_per_second": 14.798,
"eval_steps_per_second": 1.852,
"step": 2600
},
{
"epoch": 1.53,
"eval_accuracy": 0.8636167679863714,
"eval_loss": 0.60205078125,
"eval_runtime": 291.6586,
"eval_samples_per_second": 14.795,
"eval_steps_per_second": 1.851,
"step": 2700
},
{
"epoch": 1.59,
"eval_accuracy": 0.8643006901519965,
"eval_loss": 0.60009765625,
"eval_runtime": 291.5536,
"eval_samples_per_second": 14.8,
"eval_steps_per_second": 1.852,
"step": 2800
},
{
"epoch": 1.65,
"eval_accuracy": 0.864360949607464,
"eval_loss": 0.59814453125,
"eval_runtime": 292.9127,
"eval_samples_per_second": 14.731,
"eval_steps_per_second": 1.844,
"step": 2900
},
{
"epoch": 1.7,
"learning_rate": 5e-05,
"loss": 0.4909,
"step": 3000
},
{
"epoch": 1.7,
"eval_accuracy": 0.8647961316148062,
"eval_loss": 0.59423828125,
"eval_runtime": 292.7189,
"eval_samples_per_second": 14.741,
"eval_steps_per_second": 1.845,
"step": 3000
},
{
"epoch": 1.76,
"eval_accuracy": 0.8649898227216659,
"eval_loss": 0.591796875,
"eval_runtime": 291.4828,
"eval_samples_per_second": 14.804,
"eval_steps_per_second": 1.853,
"step": 3100
},
{
"epoch": 1.82,
"eval_accuracy": 0.8659145561698547,
"eval_loss": 0.59228515625,
"eval_runtime": 291.6085,
"eval_samples_per_second": 14.797,
"eval_steps_per_second": 1.852,
"step": 3200
},
{
"epoch": 1.87,
"eval_accuracy": 0.8663780555904803,
"eval_loss": 0.58837890625,
"eval_runtime": 291.6163,
"eval_samples_per_second": 14.797,
"eval_steps_per_second": 1.852,
"step": 3300
},
{
"epoch": 1.93,
"eval_accuracy": 0.8662514201182762,
"eval_loss": 0.58837890625,
"eval_runtime": 291.7936,
"eval_samples_per_second": 14.788,
"eval_steps_per_second": 1.851,
"step": 3400
},
{
"epoch": 1.99,
"learning_rate": 5e-05,
"loss": 0.4964,
"step": 3500
},
{
"epoch": 1.99,
"eval_accuracy": 0.8669489346422774,
"eval_loss": 0.59033203125,
"eval_runtime": 291.5679,
"eval_samples_per_second": 14.799,
"eval_steps_per_second": 1.852,
"step": 3500
},
{
"epoch": 2.04,
"eval_accuracy": 0.8654691798937304,
"eval_loss": 0.64208984375,
"eval_runtime": 291.7326,
"eval_samples_per_second": 14.791,
"eval_steps_per_second": 1.851,
"step": 3600
},
{
"epoch": 2.1,
"eval_accuracy": 0.8650994677459,
"eval_loss": 0.64013671875,
"eval_runtime": 293.0337,
"eval_samples_per_second": 14.725,
"eval_steps_per_second": 1.843,
"step": 3700
},
{
"epoch": 2.16,
"eval_accuracy": 0.8649474598713937,
"eval_loss": 0.64111328125,
"eval_runtime": 291.5404,
"eval_samples_per_second": 14.801,
"eval_steps_per_second": 1.852,
"step": 3800
},
{
"epoch": 2.21,
"eval_accuracy": 0.8645410483559476,
"eval_loss": 0.638671875,
"eval_runtime": 291.6679,
"eval_samples_per_second": 14.794,
"eval_steps_per_second": 1.851,
"step": 3900
},
{
"epoch": 2.27,
"learning_rate": 5e-05,
"loss": 0.345,
"step": 4000
},
{
"epoch": 2.27,
"eval_accuracy": 0.8653572694764337,
"eval_loss": 0.63623046875,
"eval_runtime": 291.7901,
"eval_samples_per_second": 14.788,
"eval_steps_per_second": 1.851,
"step": 4000
},
{
"epoch": 2.33,
"eval_accuracy": 0.8654186616284325,
"eval_loss": 0.63623046875,
"eval_runtime": 291.6935,
"eval_samples_per_second": 14.793,
"eval_steps_per_second": 1.851,
"step": 4100
},
{
"epoch": 2.38,
"eval_accuracy": 0.8653559102405961,
"eval_loss": 0.63623046875,
"eval_runtime": 291.8601,
"eval_samples_per_second": 14.784,
"eval_steps_per_second": 1.85,
"step": 4200
},
{
"epoch": 2.44,
"eval_accuracy": 0.8654696329723429,
"eval_loss": 0.6357421875,
"eval_runtime": 292.876,
"eval_samples_per_second": 14.733,
"eval_steps_per_second": 1.844,
"step": 4300
},
{
"epoch": 2.5,
"eval_accuracy": 0.8655647794809758,
"eval_loss": 0.63623046875,
"eval_runtime": 291.8742,
"eval_samples_per_second": 14.784,
"eval_steps_per_second": 1.85,
"step": 4400
},
{
"epoch": 2.55,
"learning_rate": 5e-05,
"loss": 0.3463,
"step": 4500
},
{
"epoch": 2.55,
"eval_accuracy": 0.865790865708632,
"eval_loss": 0.6376953125,
"eval_runtime": 292.8657,
"eval_samples_per_second": 14.734,
"eval_steps_per_second": 1.844,
"step": 4500
},
{
"epoch": 2.61,
"eval_accuracy": 0.8660047188137496,
"eval_loss": 0.6357421875,
"eval_runtime": 292.8962,
"eval_samples_per_second": 14.732,
"eval_steps_per_second": 1.844,
"step": 4600
},
{
"epoch": 2.67,
"eval_accuracy": 0.8664827167499765,
"eval_loss": 0.62939453125,
"eval_runtime": 291.8634,
"eval_samples_per_second": 14.784,
"eval_steps_per_second": 1.85,
"step": 4700
},
{
"epoch": 2.72,
"eval_accuracy": 0.8664838494465078,
"eval_loss": 0.63330078125,
"eval_runtime": 292.9139,
"eval_samples_per_second": 14.731,
"eval_steps_per_second": 1.844,
"step": 4800
},
{
"epoch": 2.78,
"eval_accuracy": 0.8661707721252445,
"eval_loss": 0.63623046875,
"eval_runtime": 291.8657,
"eval_samples_per_second": 14.784,
"eval_steps_per_second": 1.85,
"step": 4900
},
{
"epoch": 2.84,
"learning_rate": 5e-05,
"loss": 0.3508,
"step": 5000
},
{
"epoch": 2.84,
"eval_accuracy": 0.8666297407597449,
"eval_loss": 0.6357421875,
"eval_runtime": 291.8826,
"eval_samples_per_second": 14.783,
"eval_steps_per_second": 1.85,
"step": 5000
},
{
"epoch": 2.89,
"eval_accuracy": 0.8673172875542703,
"eval_loss": 0.6298828125,
"eval_runtime": 291.7729,
"eval_samples_per_second": 14.789,
"eval_steps_per_second": 1.851,
"step": 5100
},
{
"epoch": 2.95,
"eval_accuracy": 0.866801004475284,
"eval_loss": 0.63134765625,
"eval_runtime": 291.9322,
"eval_samples_per_second": 14.781,
"eval_steps_per_second": 1.85,
"step": 5200
},
{
"epoch": 3.01,
"eval_accuracy": 0.8646230555848169,
"eval_loss": 0.71875,
"eval_runtime": 291.8776,
"eval_samples_per_second": 14.784,
"eval_steps_per_second": 1.85,
"step": 5300
},
{
"epoch": 3.06,
"eval_accuracy": 0.8655867537936839,
"eval_loss": 0.70166015625,
"eval_runtime": 293.0438,
"eval_samples_per_second": 14.725,
"eval_steps_per_second": 1.843,
"step": 5400
},
{
"epoch": 3.12,
"learning_rate": 5e-05,
"loss": 0.295,
"step": 5500
},
{
"epoch": 3.12,
"eval_accuracy": 0.8652775276406272,
"eval_loss": 0.6982421875,
"eval_runtime": 291.9251,
"eval_samples_per_second": 14.781,
"eval_steps_per_second": 1.85,
"step": 5500
},
{
"epoch": 3.18,
"eval_accuracy": 0.8654533221422916,
"eval_loss": 0.703125,
"eval_runtime": 292.9873,
"eval_samples_per_second": 14.728,
"eval_steps_per_second": 1.843,
"step": 5600
},
{
"epoch": 3.23,
"eval_accuracy": 0.8650992412065936,
"eval_loss": 0.69921875,
"eval_runtime": 293.1169,
"eval_samples_per_second": 14.721,
"eval_steps_per_second": 1.842,
"step": 5700
},
{
"epoch": 3.29,
"eval_accuracy": 0.8652641618215573,
"eval_loss": 0.69970703125,
"eval_runtime": 293.0467,
"eval_samples_per_second": 14.725,
"eval_steps_per_second": 1.843,
"step": 5800
},
{
"epoch": 3.35,
"eval_accuracy": 0.865102865835494,
"eval_loss": 0.7041015625,
"eval_runtime": 292.9654,
"eval_samples_per_second": 14.729,
"eval_steps_per_second": 1.843,
"step": 5900
},
{
"epoch": 3.41,
"learning_rate": 5e-05,
"loss": 0.2348,
"step": 6000
},
{
"epoch": 3.41,
"eval_accuracy": 0.8649191424581101,
"eval_loss": 0.70751953125,
"eval_runtime": 291.6546,
"eval_samples_per_second": 14.795,
"eval_steps_per_second": 1.852,
"step": 6000
},
{
"epoch": 3.46,
"eval_accuracy": 0.8649929942719536,
"eval_loss": 0.69921875,
"eval_runtime": 293.0033,
"eval_samples_per_second": 14.727,
"eval_steps_per_second": 1.843,
"step": 6100
},
{
"epoch": 3.52,
"eval_accuracy": 0.8647333802269698,
"eval_loss": 0.70654296875,
"eval_runtime": 292.0433,
"eval_samples_per_second": 14.775,
"eval_steps_per_second": 1.849,
"step": 6200
},
{
"epoch": 3.58,
"eval_accuracy": 0.8651932550186952,
"eval_loss": 0.69970703125,
"eval_runtime": 292.8338,
"eval_samples_per_second": 14.735,
"eval_steps_per_second": 1.844,
"step": 6300
},
{
"epoch": 3.63,
"eval_accuracy": 0.8651128335649698,
"eval_loss": 0.70263671875,
"eval_runtime": 291.8152,
"eval_samples_per_second": 14.787,
"eval_steps_per_second": 1.85,
"step": 6400
},
{
"epoch": 3.69,
"learning_rate": 5e-05,
"loss": 0.2411,
"step": 6500
},
{
"epoch": 3.69,
"eval_accuracy": 0.8655713491208575,
"eval_loss": 0.70458984375,
"eval_runtime": 292.9241,
"eval_samples_per_second": 14.731,
"eval_steps_per_second": 1.843,
"step": 6500
},
{
"epoch": 3.75,
"eval_accuracy": 0.8655097304295525,
"eval_loss": 0.70068359375,
"eval_runtime": 293.0902,
"eval_samples_per_second": 14.722,
"eval_steps_per_second": 1.842,
"step": 6600
},
{
"epoch": 3.8,
"eval_accuracy": 0.8651318628666963,
"eval_loss": 0.70263671875,
"eval_runtime": 292.9879,
"eval_samples_per_second": 14.728,
"eval_steps_per_second": 1.843,
"step": 6700
},
{
"epoch": 3.86,
"eval_accuracy": 0.8654825457128003,
"eval_loss": 0.703125,
"eval_runtime": 292.8939,
"eval_samples_per_second": 14.732,
"eval_steps_per_second": 1.844,
"step": 6800
},
{
"epoch": 3.92,
"eval_accuracy": 0.8657906391693256,
"eval_loss": 0.701171875,
"eval_runtime": 292.7957,
"eval_samples_per_second": 14.737,
"eval_steps_per_second": 1.844,
"step": 6900
},
{
"epoch": 3.97,
"learning_rate": 5e-05,
"loss": 0.251,
"step": 7000
},
{
"epoch": 3.97,
"eval_accuracy": 0.8656236797006056,
"eval_loss": 0.705078125,
"eval_runtime": 292.768,
"eval_samples_per_second": 14.739,
"eval_steps_per_second": 1.844,
"step": 7000
},
{
"epoch": 4.03,
"eval_accuracy": 0.8650197259100934,
"eval_loss": 0.7607421875,
"eval_runtime": 293.0243,
"eval_samples_per_second": 14.726,
"eval_steps_per_second": 1.843,
"step": 7100
},
{
"epoch": 4.09,
"eval_accuracy": 0.8655654590988946,
"eval_loss": 0.76318359375,
"eval_runtime": 293.1258,
"eval_samples_per_second": 14.721,
"eval_steps_per_second": 1.842,
"step": 7200
},
{
"epoch": 4.14,
"eval_accuracy": 0.8654986300035453,
"eval_loss": 0.7587890625,
"eval_runtime": 291.7489,
"eval_samples_per_second": 14.79,
"eval_steps_per_second": 1.851,
"step": 7300
},
{
"epoch": 4.2,
"eval_accuracy": 0.8650992412065936,
"eval_loss": 0.7578125,
"eval_runtime": 291.7365,
"eval_samples_per_second": 14.791,
"eval_steps_per_second": 1.851,
"step": 7400
},
{
"epoch": 4.26,
"learning_rate": 5e-05,
"loss": 0.1797,
"step": 7500
},
{
"epoch": 4.26,
"eval_accuracy": 0.8644635719132038,
"eval_loss": 0.77099609375,
"eval_runtime": 292.8962,
"eval_samples_per_second": 14.732,
"eval_steps_per_second": 1.844,
"step": 7500
},
{
"epoch": 4.31,
"eval_accuracy": 0.8648369086899346,
"eval_loss": 0.7626953125,
"eval_runtime": 291.9443,
"eval_samples_per_second": 14.78,
"eval_steps_per_second": 1.85,
"step": 7600
},
{
"epoch": 4.37,
"eval_accuracy": 0.8650006966083668,
"eval_loss": 0.75830078125,
"eval_runtime": 292.6223,
"eval_samples_per_second": 14.746,
"eval_steps_per_second": 1.845,
"step": 7700
},
{
"epoch": 4.43,
"eval_accuracy": 0.8648572972274987,
"eval_loss": 0.7646484375,
"eval_runtime": 292.6746,
"eval_samples_per_second": 14.743,
"eval_steps_per_second": 1.845,
"step": 7800
},
{
"epoch": 4.48,
"eval_accuracy": 0.8645709515443751,
"eval_loss": 0.759765625,
"eval_runtime": 292.8238,
"eval_samples_per_second": 14.736,
"eval_steps_per_second": 1.844,
"step": 7900
},
{
"epoch": 4.54,
"learning_rate": 5e-05,
"loss": 0.1784,
"step": 8000
},
{
"epoch": 4.54,
"eval_accuracy": 0.8649886900251346,
"eval_loss": 0.765625,
"eval_runtime": 292.7739,
"eval_samples_per_second": 14.738,
"eval_steps_per_second": 1.844,
"step": 8000
},
{
"epoch": 4.6,
"eval_accuracy": 0.8647777819309984,
"eval_loss": 0.76171875,
"eval_runtime": 291.5794,
"eval_samples_per_second": 14.799,
"eval_steps_per_second": 1.852,
"step": 8100
},
{
"epoch": 4.65,
"eval_accuracy": 0.8650895000164242,
"eval_loss": 0.75732421875,
"eval_runtime": 292.5766,
"eval_samples_per_second": 14.748,
"eval_steps_per_second": 1.846,
"step": 8200
},
{
"epoch": 4.71,
"eval_accuracy": 0.8647773288523859,
"eval_loss": 0.76708984375,
"eval_runtime": 292.8751,
"eval_samples_per_second": 14.733,
"eval_steps_per_second": 1.844,
"step": 8300
},
{
"epoch": 4.77,
"eval_accuracy": 0.8651295974736336,
"eval_loss": 0.75634765625,
"eval_runtime": 292.7435,
"eval_samples_per_second": 14.74,
"eval_steps_per_second": 1.845,
"step": 8400
},
{
"epoch": 4.82,
"learning_rate": 5e-05,
"loss": 0.1827,
"step": 8500
},
{
"epoch": 4.82,
"eval_accuracy": 0.8648883331124575,
"eval_loss": 0.76513671875,
"eval_runtime": 291.561,
"eval_samples_per_second": 14.8,
"eval_steps_per_second": 1.852,
"step": 8500
},
{
"epoch": 4.88,
"eval_accuracy": 0.8649513110396002,
"eval_loss": 0.763671875,
"eval_runtime": 292.871,
"eval_samples_per_second": 14.733,
"eval_steps_per_second": 1.844,
"step": 8600
},
{
"epoch": 4.94,
"eval_accuracy": 0.8653917034509865,
"eval_loss": 0.7607421875,
"eval_runtime": 292.9668,
"eval_samples_per_second": 14.729,
"eval_steps_per_second": 1.843,
"step": 8700
},
{
"epoch": 4.99,
"eval_accuracy": 0.86499322081126,
"eval_loss": 0.7607421875,
"eval_runtime": 292.8128,
"eval_samples_per_second": 14.736,
"eval_steps_per_second": 1.844,
"step": 8800
},
{
"epoch": 5.05,
"eval_accuracy": 0.8645997220362712,
"eval_loss": 0.81494140625,
"eval_runtime": 292.864,
"eval_samples_per_second": 14.734,
"eval_steps_per_second": 1.844,
"step": 8900
},
{
"epoch": 5.11,
"learning_rate": 5e-05,
"loss": 0.167,
"step": 9000
},
{
"epoch": 5.11,
"eval_accuracy": 0.8647852577281052,
"eval_loss": 0.80810546875,
"eval_runtime": 292.7519,
"eval_samples_per_second": 14.739,
"eval_steps_per_second": 1.845,
"step": 9000
},
{
"epoch": 5.16,
"eval_accuracy": 0.8643582311357888,
"eval_loss": 0.818359375,
"eval_runtime": 291.6434,
"eval_samples_per_second": 14.795,
"eval_steps_per_second": 1.852,
"step": 9100
},
{
"epoch": 5.22,
"eval_accuracy": 0.8647263575084754,
"eval_loss": 0.81396484375,
"eval_runtime": 292.5189,
"eval_samples_per_second": 14.751,
"eval_steps_per_second": 1.846,
"step": 9200
},
{
"epoch": 5.28,
"eval_accuracy": 0.8643802054484968,
"eval_loss": 0.81689453125,
"eval_runtime": 291.4921,
"eval_samples_per_second": 14.803,
"eval_steps_per_second": 1.853,
"step": 9300
},
{
"epoch": 5.33,
"eval_accuracy": 0.8644685557779417,
"eval_loss": 0.81201171875,
"eval_runtime": 292.6907,
"eval_samples_per_second": 14.743,
"eval_steps_per_second": 1.845,
"step": 9400
},
{
"epoch": 5.39,
"learning_rate": 5e-05,
"loss": 0.1371,
"step": 9500
},
{
"epoch": 5.39,
"eval_accuracy": 0.8642560619086617,
"eval_loss": 0.8154296875,
"eval_runtime": 292.8129,
"eval_samples_per_second": 14.736,
"eval_steps_per_second": 1.844,
"step": 9500
},
{
"epoch": 5.45,
"eval_accuracy": 0.8642103009687954,
"eval_loss": 0.81787109375,
"eval_runtime": 292.7443,
"eval_samples_per_second": 14.74,
"eval_steps_per_second": 1.845,
"step": 9600
},
{
"epoch": 5.51,
"eval_accuracy": 0.8642599130768682,
"eval_loss": 0.8154296875,
"eval_runtime": 291.6813,
"eval_samples_per_second": 14.794,
"eval_steps_per_second": 1.851,
"step": 9700
},
{
"epoch": 5.56,
"eval_accuracy": 0.8645023101345757,
"eval_loss": 0.81201171875,
"eval_runtime": 292.6716,
"eval_samples_per_second": 14.743,
"eval_steps_per_second": 1.845,
"step": 9800
},
{
"epoch": 5.62,
"eval_accuracy": 0.8649979781366915,
"eval_loss": 0.81103515625,
"eval_runtime": 292.7654,
"eval_samples_per_second": 14.739,
"eval_steps_per_second": 1.844,
"step": 9900
},
{
"epoch": 5.68,
"learning_rate": 5e-05,
"loss": 0.1425,
"step": 10000
},
{
"epoch": 5.68,
"eval_accuracy": 0.8645428606703978,
"eval_loss": 0.81591796875,
"eval_runtime": 292.6875,
"eval_samples_per_second": 14.743,
"eval_steps_per_second": 1.845,
"step": 10000
},
{
"epoch": 5.73,
"eval_accuracy": 0.8646024405079464,
"eval_loss": 0.8173828125,
"eval_runtime": 292.6736,
"eval_samples_per_second": 14.743,
"eval_steps_per_second": 1.845,
"step": 10100
},
{
"epoch": 5.79,
"eval_accuracy": 0.8649073624141841,
"eval_loss": 0.81591796875,
"eval_runtime": 292.7868,
"eval_samples_per_second": 14.738,
"eval_steps_per_second": 1.844,
"step": 10200
},
{
"epoch": 5.85,
"eval_accuracy": 0.8639400395764169,
"eval_loss": 0.81103515625,
"eval_runtime": 292.8417,
"eval_samples_per_second": 14.735,
"eval_steps_per_second": 1.844,
"step": 10300
},
{
"epoch": 5.9,
"eval_accuracy": 0.8645482976137482,
"eval_loss": 0.8134765625,
"eval_runtime": 292.765,
"eval_samples_per_second": 14.739,
"eval_steps_per_second": 1.844,
"step": 10400
},
{
"epoch": 5.96,
"learning_rate": 5e-05,
"loss": 0.1505,
"step": 10500
},
{
"epoch": 5.96,
"eval_accuracy": 0.8642195890803523,
"eval_loss": 0.81396484375,
"eval_runtime": 292.5807,
"eval_samples_per_second": 14.748,
"eval_steps_per_second": 1.846,
"step": 10500
},
{
"epoch": 6.02,
"eval_accuracy": 0.8639674508324753,
"eval_loss": 0.86279296875,
"eval_runtime": 292.6261,
"eval_samples_per_second": 14.746,
"eval_steps_per_second": 1.845,
"step": 10600
},
{
"epoch": 6.07,
"eval_accuracy": 0.8644205294450127,
"eval_loss": 0.85400390625,
"eval_runtime": 292.6297,
"eval_samples_per_second": 14.746,
"eval_steps_per_second": 1.845,
"step": 10700
},
{
"epoch": 6.13,
"eval_accuracy": 0.8642433757075105,
"eval_loss": 0.85302734375,
"eval_runtime": 291.5398,
"eval_samples_per_second": 14.801,
"eval_steps_per_second": 1.852,
"step": 10800
},
{
"epoch": 6.19,
"eval_accuracy": 0.8646898846801662,
"eval_loss": 0.85595703125,
"eval_runtime": 291.664,
"eval_samples_per_second": 14.794,
"eval_steps_per_second": 1.851,
"step": 10900
},
{
"epoch": 6.24,
"learning_rate": 5e-05,
"loss": 0.1086,
"step": 11000
},
{
"epoch": 6.24,
"eval_accuracy": 0.864855937991661,
"eval_loss": 0.85546875,
"eval_runtime": 292.8292,
"eval_samples_per_second": 14.736,
"eval_steps_per_second": 1.844,
"step": 11000
},
{
"epoch": 6.3,
"eval_accuracy": 0.8643829239201721,
"eval_loss": 0.8603515625,
"eval_runtime": 292.6815,
"eval_samples_per_second": 14.743,
"eval_steps_per_second": 1.845,
"step": 11100
},
{
"epoch": 6.36,
"eval_accuracy": 0.8641971616890317,
"eval_loss": 0.85693359375,
"eval_runtime": 292.4181,
"eval_samples_per_second": 14.756,
"eval_steps_per_second": 1.847,
"step": 11200
},
{
"epoch": 6.41,
"eval_accuracy": 0.8638854436036061,
"eval_loss": 0.85302734375,
"eval_runtime": 293.018,
"eval_samples_per_second": 14.726,
"eval_steps_per_second": 1.843,
"step": 11300
},
{
"epoch": 6.47,
"eval_accuracy": 0.8642818873895762,
"eval_loss": 0.85888671875,
"eval_runtime": 291.7384,
"eval_samples_per_second": 14.791,
"eval_steps_per_second": 1.851,
"step": 11400
},
{
"epoch": 6.53,
"learning_rate": 5e-05,
"loss": 0.1076,
"step": 11500
},
{
"epoch": 6.53,
"eval_accuracy": 0.8638872559180562,
"eval_loss": 0.8525390625,
"eval_runtime": 293.4652,
"eval_samples_per_second": 14.704,
"eval_steps_per_second": 1.84,
"step": 11500
},
{
"epoch": 6.58,
"eval_accuracy": 0.8640002990318842,
"eval_loss": 0.85791015625,
"eval_runtime": 292.9912,
"eval_samples_per_second": 14.727,
"eval_steps_per_second": 1.843,
"step": 11600
},
{
"epoch": 6.64,
"eval_accuracy": 0.8639792308764013,
"eval_loss": 0.859375,
"eval_runtime": 292.1362,
"eval_samples_per_second": 14.771,
"eval_steps_per_second": 1.848,
"step": 11700
},
{
"epoch": 6.7,
"eval_accuracy": 0.864300237073384,
"eval_loss": 0.85986328125,
"eval_runtime": 292.9015,
"eval_samples_per_second": 14.732,
"eval_steps_per_second": 1.844,
"step": 11800
},
{
"epoch": 6.75,
"eval_accuracy": 0.8639912374596336,
"eval_loss": 0.8564453125,
"eval_runtime": 292.927,
"eval_samples_per_second": 14.731,
"eval_steps_per_second": 1.843,
"step": 11900
},
{
"epoch": 6.81,
"learning_rate": 5e-05,
"loss": 0.1109,
"step": 12000
},
{
"epoch": 6.81,
"eval_accuracy": 0.8640392637925625,
"eval_loss": 0.86328125,
"eval_runtime": 292.9284,
"eval_samples_per_second": 14.731,
"eval_steps_per_second": 1.843,
"step": 12000
},
{
"epoch": 6.87,
"eval_accuracy": 0.8638109121718437,
"eval_loss": 0.8583984375,
"eval_runtime": 292.6011,
"eval_samples_per_second": 14.747,
"eval_steps_per_second": 1.846,
"step": 12100
},
{
"epoch": 6.92,
"eval_accuracy": 0.863599550999095,
"eval_loss": 0.86474609375,
"eval_runtime": 292.8714,
"eval_samples_per_second": 14.733,
"eval_steps_per_second": 1.844,
"step": 12200
},
{
"epoch": 6.98,
"eval_accuracy": 0.8634767666950973,
"eval_loss": 0.85986328125,
"eval_runtime": 291.6716,
"eval_samples_per_second": 14.794,
"eval_steps_per_second": 1.851,
"step": 12300
},
{
"epoch": 7.04,
"eval_accuracy": 0.8632459231420095,
"eval_loss": 0.89794921875,
"eval_runtime": 292.8834,
"eval_samples_per_second": 14.733,
"eval_steps_per_second": 1.844,
"step": 12400
},
{
"epoch": 7.09,
"learning_rate": 5e-05,
"loss": 0.1028,
"step": 12500
},
{
"epoch": 7.09,
"eval_accuracy": 0.8634595497078209,
"eval_loss": 0.8935546875,
"eval_runtime": 291.8248,
"eval_samples_per_second": 14.786,
"eval_steps_per_second": 1.85,
"step": 12500
},
{
"epoch": 7.15,
"eval_accuracy": 0.8637445361551069,
"eval_loss": 0.904296875,
"eval_runtime": 293.127,
"eval_samples_per_second": 14.721,
"eval_steps_per_second": 1.842,
"step": 12600
},
{
"epoch": 7.21,
"eval_accuracy": 0.8641618215572539,
"eval_loss": 0.89892578125,
"eval_runtime": 291.7379,
"eval_samples_per_second": 14.791,
"eval_steps_per_second": 1.851,
"step": 12700
},
{
"epoch": 7.26,
"eval_accuracy": 0.8641742812190987,
"eval_loss": 0.8935546875,
"eval_runtime": 291.8762,
"eval_samples_per_second": 14.784,
"eval_steps_per_second": 1.85,
"step": 12800
},
{
"epoch": 7.32,
"eval_accuracy": 0.8641423391769147,
"eval_loss": 0.89208984375,
"eval_runtime": 293.0894,
"eval_samples_per_second": 14.722,
"eval_steps_per_second": 1.842,
"step": 12900
},
{
"epoch": 7.38,
"learning_rate": 5e-05,
"loss": 0.0774,
"step": 13000
},
{
"epoch": 7.38,
"eval_accuracy": 0.8633791282540956,
"eval_loss": 0.8955078125,
"eval_runtime": 292.6585,
"eval_samples_per_second": 14.744,
"eval_steps_per_second": 1.845,
"step": 13000
},
{
"epoch": 7.43,
"eval_accuracy": 0.8636269622551535,
"eval_loss": 0.89501953125,
"eval_runtime": 294.4392,
"eval_samples_per_second": 14.655,
"eval_steps_per_second": 1.834,
"step": 13100
},
{
"epoch": 7.49,
"eval_accuracy": 0.8635223010956573,
"eval_loss": 0.8994140625,
"eval_runtime": 292.0929,
"eval_samples_per_second": 14.773,
"eval_steps_per_second": 1.849,
"step": 13200
},
{
"epoch": 7.55,
"eval_accuracy": 0.8635028187153182,
"eval_loss": 0.89990234375,
"eval_runtime": 291.6539,
"eval_samples_per_second": 14.795,
"eval_steps_per_second": 1.852,
"step": 13300
},
{
"epoch": 7.6,
"eval_accuracy": 0.8631285757813624,
"eval_loss": 0.8935546875,
"eval_runtime": 293.1835,
"eval_samples_per_second": 14.718,
"eval_steps_per_second": 1.842,
"step": 13400
},
{
"epoch": 7.66,
"learning_rate": 5e-05,
"loss": 0.0852,
"step": 13500
},
{
"epoch": 7.66,
"eval_accuracy": 0.863441879641932,
"eval_loss": 0.90478515625,
"eval_runtime": 292.8044,
"eval_samples_per_second": 14.737,
"eval_steps_per_second": 1.844,
"step": 13500
},
{
"epoch": 7.72,
"eval_accuracy": 0.8632284796154269,
"eval_loss": 0.89599609375,
"eval_runtime": 292.7129,
"eval_samples_per_second": 14.741,
"eval_steps_per_second": 1.845,
"step": 13600
},
{
"epoch": 7.78,
"eval_accuracy": 0.8634731420661971,
"eval_loss": 0.90234375,
"eval_runtime": 292.9408,
"eval_samples_per_second": 14.73,
"eval_steps_per_second": 1.843,
"step": 13700
},
{
"epoch": 7.83,
"eval_accuracy": 0.8638301680128765,
"eval_loss": 0.8984375,
"eval_runtime": 292.9793,
"eval_samples_per_second": 14.728,
"eval_steps_per_second": 1.843,
"step": 13800
},
{
"epoch": 7.89,
"eval_accuracy": 0.8635458611835093,
"eval_loss": 0.90185546875,
"eval_runtime": 292.8153,
"eval_samples_per_second": 14.736,
"eval_steps_per_second": 1.844,
"step": 13900
},
{
"epoch": 7.95,
"learning_rate": 5e-05,
"loss": 0.0879,
"step": 14000
},
{
"epoch": 7.95,
"eval_accuracy": 0.863396345241372,
"eval_loss": 0.9013671875,
"eval_runtime": 292.8988,
"eval_samples_per_second": 14.732,
"eval_steps_per_second": 1.844,
"step": 14000
},
{
"epoch": 8.0,
"eval_accuracy": 0.8630044322415271,
"eval_loss": 0.91357421875,
"eval_runtime": 292.7911,
"eval_samples_per_second": 14.737,
"eval_steps_per_second": 1.844,
"step": 14100
},
{
"epoch": 8.06,
"eval_accuracy": 0.8638926928614067,
"eval_loss": 0.93115234375,
"eval_runtime": 292.7997,
"eval_samples_per_second": 14.737,
"eval_steps_per_second": 1.844,
"step": 14200
},
{
"epoch": 8.12,
"eval_accuracy": 0.8635213949384323,
"eval_loss": 0.9345703125,
"eval_runtime": 292.7958,
"eval_samples_per_second": 14.737,
"eval_steps_per_second": 1.844,
"step": 14300
},
{
"epoch": 8.17,
"eval_accuracy": 0.8635272849603952,
"eval_loss": 0.9306640625,
"eval_runtime": 291.7965,
"eval_samples_per_second": 14.788,
"eval_steps_per_second": 1.851,
"step": 14400
},
{
"epoch": 8.23,
"learning_rate": 5e-05,
"loss": 0.0611,
"step": 14500
},
{
"epoch": 8.23,
"eval_accuracy": 0.8640974843942736,
"eval_loss": 0.94189453125,
"eval_runtime": 292.9438,
"eval_samples_per_second": 14.73,
"eval_steps_per_second": 1.843,
"step": 14500
},
{
"epoch": 8.29,
"eval_accuracy": 0.863091196795828,
"eval_loss": 0.93310546875,
"eval_runtime": 292.6114,
"eval_samples_per_second": 14.747,
"eval_steps_per_second": 1.845,
"step": 14600
},
{
"epoch": 8.34,
"eval_accuracy": 0.8635660231817672,
"eval_loss": 0.9375,
"eval_runtime": 292.5076,
"eval_samples_per_second": 14.752,
"eval_steps_per_second": 1.846,
"step": 14700
},
{
"epoch": 8.4,
"eval_accuracy": 0.8626125192416824,
"eval_loss": 0.92919921875,
"eval_runtime": 292.6676,
"eval_samples_per_second": 14.744,
"eval_steps_per_second": 1.845,
"step": 14800
},
{
"epoch": 8.46,
"eval_accuracy": 0.8637236945389302,
"eval_loss": 0.94580078125,
"eval_runtime": 291.6673,
"eval_samples_per_second": 14.794,
"eval_steps_per_second": 1.851,
"step": 14900
},
{
"epoch": 8.51,
"learning_rate": 5e-05,
"loss": 0.061,
"step": 15000
},
{
"epoch": 8.51,
"eval_accuracy": 0.8634267015084119,
"eval_loss": 0.93359375,
"eval_runtime": 291.6632,
"eval_samples_per_second": 14.794,
"eval_steps_per_second": 1.851,
"step": 15000
},
{
"epoch": 8.57,
"eval_accuracy": 0.8629693186490555,
"eval_loss": 0.94091796875,
"eval_runtime": 292.9394,
"eval_samples_per_second": 14.73,
"eval_steps_per_second": 1.843,
"step": 15100
},
{
"epoch": 8.63,
"eval_accuracy": 0.8632108095495379,
"eval_loss": 0.93896484375,
"eval_runtime": 292.836,
"eval_samples_per_second": 14.735,
"eval_steps_per_second": 1.844,
"step": 15200
},
{
"epoch": 8.68,
"eval_accuracy": 0.862787634125428,
"eval_loss": 0.9375,
"eval_runtime": 291.5974,
"eval_samples_per_second": 14.798,
"eval_steps_per_second": 1.852,
"step": 15300
},
{
"epoch": 8.74,
"eval_accuracy": 0.8629693186490555,
"eval_loss": 0.9365234375,
"eval_runtime": 292.847,
"eval_samples_per_second": 14.735,
"eval_steps_per_second": 1.844,
"step": 15400
},
{
"epoch": 8.8,
"learning_rate": 5e-05,
"loss": 0.0646,
"step": 15500
},
{
"epoch": 8.8,
"eval_accuracy": 0.8628259192681874,
"eval_loss": 0.93701171875,
"eval_runtime": 292.6735,
"eval_samples_per_second": 14.743,
"eval_steps_per_second": 1.845,
"step": 15500
},
{
"epoch": 8.85,
"eval_accuracy": 0.8629194800016764,
"eval_loss": 0.935546875,
"eval_runtime": 292.789,
"eval_samples_per_second": 14.738,
"eval_steps_per_second": 1.844,
"step": 15600
},
{
"epoch": 8.91,
"eval_accuracy": 0.8632305184691833,
"eval_loss": 0.9375,
"eval_runtime": 291.6374,
"eval_samples_per_second": 14.796,
"eval_steps_per_second": 1.852,
"step": 15700
},
{
"epoch": 8.97,
"eval_accuracy": 0.8629763413675499,
"eval_loss": 0.93896484375,
"eval_runtime": 293.0327,
"eval_samples_per_second": 14.725,
"eval_steps_per_second": 1.843,
"step": 15800
},
{
"epoch": 9.02,
"eval_accuracy": 0.8630309373403606,
"eval_loss": 0.9716796875,
"eval_runtime": 292.645,
"eval_samples_per_second": 14.745,
"eval_steps_per_second": 1.845,
"step": 15900
},
{
"epoch": 9.08,
"learning_rate": 5e-05,
"loss": 0.0593,
"step": 16000
},
{
"epoch": 9.08,
"eval_accuracy": 0.8626426489694161,
"eval_loss": 0.96728515625,
"eval_runtime": 292.8134,
"eval_samples_per_second": 14.736,
"eval_steps_per_second": 1.844,
"step": 16000
},
{
"epoch": 9.14,
"eval_accuracy": 0.862975661749631,
"eval_loss": 0.96435546875,
"eval_runtime": 292.9061,
"eval_samples_per_second": 14.732,
"eval_steps_per_second": 1.844,
"step": 16100
},
{
"epoch": 9.19,
"eval_accuracy": 0.8630644651576883,
"eval_loss": 0.96240234375,
"eval_runtime": 293.8107,
"eval_samples_per_second": 14.686,
"eval_steps_per_second": 1.838,
"step": 16200
},
{
"epoch": 9.25,
"eval_accuracy": 0.8633050499009457,
"eval_loss": 0.96484375,
"eval_runtime": 291.7944,
"eval_samples_per_second": 14.788,
"eval_steps_per_second": 1.851,
"step": 16300
},
{
"epoch": 9.31,
"eval_accuracy": 0.8632493212316036,
"eval_loss": 0.96728515625,
"eval_runtime": 291.6912,
"eval_samples_per_second": 14.793,
"eval_steps_per_second": 1.851,
"step": 16400
},
{
"epoch": 9.36,
"learning_rate": 5e-05,
"loss": 0.0415,
"step": 16500
},
{
"epoch": 9.36,
"eval_accuracy": 0.8633073152940084,
"eval_loss": 0.9658203125,
"eval_runtime": 291.6781,
"eval_samples_per_second": 14.794,
"eval_steps_per_second": 1.851,
"step": 16500
},
{
"epoch": 9.42,
"eval_accuracy": 0.8627819706427713,
"eval_loss": 0.96875,
"eval_runtime": 292.5363,
"eval_samples_per_second": 14.75,
"eval_steps_per_second": 1.846,
"step": 16600
},
{
"epoch": 9.48,
"eval_accuracy": 0.8632289326940394,
"eval_loss": 0.96533203125,
"eval_runtime": 292.6096,
"eval_samples_per_second": 14.747,
"eval_steps_per_second": 1.845,
"step": 16700
},
{
"epoch": 9.53,
"eval_accuracy": 0.862821388482062,
"eval_loss": 0.9658203125,
"eval_runtime": 291.5436,
"eval_samples_per_second": 14.801,
"eval_steps_per_second": 1.852,
"step": 16800
},
{
"epoch": 9.59,
"eval_accuracy": 0.8629199330802889,
"eval_loss": 0.966796875,
"eval_runtime": 292.7541,
"eval_samples_per_second": 14.739,
"eval_steps_per_second": 1.845,
"step": 16900
},
{
"epoch": 9.65,
"learning_rate": 5e-05,
"loss": 0.0471,
"step": 17000
},
{
"epoch": 9.65,
"eval_accuracy": 0.8625384408885325,
"eval_loss": 0.96044921875,
"eval_runtime": 292.6838,
"eval_samples_per_second": 14.743,
"eval_steps_per_second": 1.845,
"step": 17000
},
{
"epoch": 9.7,
"eval_accuracy": 0.8620747149286004,
"eval_loss": 0.9658203125,
"eval_runtime": 291.636,
"eval_samples_per_second": 14.796,
"eval_steps_per_second": 1.852,
"step": 17100
},
{
"epoch": 9.76,
"eval_accuracy": 0.8629695451883618,
"eval_loss": 0.97314453125,
"eval_runtime": 291.7564,
"eval_samples_per_second": 14.79,
"eval_steps_per_second": 1.851,
"step": 17200
},
{
"epoch": 9.82,
"eval_accuracy": 0.8626276973752023,
"eval_loss": 0.96923828125,
"eval_runtime": 291.4249,
"eval_samples_per_second": 14.807,
"eval_steps_per_second": 1.853,
"step": 17300
},
{
"epoch": 9.88,
"eval_accuracy": 0.8622577586880655,
"eval_loss": 0.96728515625,
"eval_runtime": 291.5949,
"eval_samples_per_second": 14.798,
"eval_steps_per_second": 1.852,
"step": 17400
},
{
"epoch": 9.93,
"learning_rate": 5e-05,
"loss": 0.0528,
"step": 17500
},
{
"epoch": 9.93,
"eval_accuracy": 0.8619999569575318,
"eval_loss": 0.96142578125,
"eval_runtime": 292.6583,
"eval_samples_per_second": 14.744,
"eval_steps_per_second": 1.845,
"step": 17500
},
{
"epoch": 9.99,
"eval_accuracy": 0.8620715433783127,
"eval_loss": 0.9697265625,
"eval_runtime": 291.7031,
"eval_samples_per_second": 14.792,
"eval_steps_per_second": 1.851,
"step": 17600
},
{
"epoch": 10.0,
"step": 17620,
"total_flos": 2.179292736824279e+18,
"train_loss": 0.2421213565700847,
"train_runtime": 122603.1424,
"train_samples_per_second": 1.149,
"train_steps_per_second": 0.144
}
],
"max_steps": 17620,
"num_train_epochs": 10,
"total_flos": 2.179292736824279e+18,
"trial_name": null,
"trial_params": null
}