adapters-gemma-bf16-QLORA-super_glue-boolq / trainer_state-gemma-bf16-QLORA-super_glue-boolq-sequence_classification.json
RMHalak's picture
Task: SequenceClassification
985a53d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.984,
"eval_steps": 1,
"global_step": 124,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016,
"grad_norm": 58.75,
"learning_rate": 2.5e-05,
"loss": 1.6327,
"step": 1
},
{
"epoch": 0.016,
"eval_accuracy": 0.344,
"eval_loss": 1.8050549030303955,
"eval_runtime": 8.5979,
"eval_samples_per_second": 29.077,
"eval_steps_per_second": 3.722,
"step": 1
},
{
"epoch": 0.032,
"grad_norm": 173.0,
"learning_rate": 5e-05,
"loss": 1.2182,
"step": 2
},
{
"epoch": 0.032,
"eval_accuracy": 0.368,
"eval_loss": 1.5831865072250366,
"eval_runtime": 8.6593,
"eval_samples_per_second": 28.871,
"eval_steps_per_second": 3.695,
"step": 2
},
{
"epoch": 0.048,
"grad_norm": 78.5,
"learning_rate": 4.959016393442623e-05,
"loss": 1.0166,
"step": 3
},
{
"epoch": 0.048,
"eval_accuracy": 0.48,
"eval_loss": 1.2497016191482544,
"eval_runtime": 8.6547,
"eval_samples_per_second": 28.886,
"eval_steps_per_second": 3.697,
"step": 3
},
{
"epoch": 0.064,
"grad_norm": 205.0,
"learning_rate": 4.918032786885246e-05,
"loss": 1.1151,
"step": 4
},
{
"epoch": 0.064,
"eval_accuracy": 0.592,
"eval_loss": 0.9809591174125671,
"eval_runtime": 8.6606,
"eval_samples_per_second": 28.866,
"eval_steps_per_second": 3.695,
"step": 4
},
{
"epoch": 0.08,
"grad_norm": 92.0,
"learning_rate": 4.8770491803278687e-05,
"loss": 1.1203,
"step": 5
},
{
"epoch": 0.08,
"eval_accuracy": 0.616,
"eval_loss": 0.9002195000648499,
"eval_runtime": 8.6562,
"eval_samples_per_second": 28.881,
"eval_steps_per_second": 3.697,
"step": 5
},
{
"epoch": 0.096,
"grad_norm": 39.5,
"learning_rate": 4.836065573770492e-05,
"loss": 0.3129,
"step": 6
},
{
"epoch": 0.096,
"eval_accuracy": 0.692,
"eval_loss": 0.8504685759544373,
"eval_runtime": 8.6632,
"eval_samples_per_second": 28.858,
"eval_steps_per_second": 3.694,
"step": 6
},
{
"epoch": 0.112,
"grad_norm": 93.0,
"learning_rate": 4.795081967213115e-05,
"loss": 0.989,
"step": 7
},
{
"epoch": 0.112,
"eval_accuracy": 0.72,
"eval_loss": 0.8811690807342529,
"eval_runtime": 8.6664,
"eval_samples_per_second": 28.847,
"eval_steps_per_second": 3.692,
"step": 7
},
{
"epoch": 0.128,
"grad_norm": 69.5,
"learning_rate": 4.754098360655738e-05,
"loss": 0.6991,
"step": 8
},
{
"epoch": 0.128,
"eval_accuracy": 0.68,
"eval_loss": 1.079397439956665,
"eval_runtime": 8.6622,
"eval_samples_per_second": 28.861,
"eval_steps_per_second": 3.694,
"step": 8
},
{
"epoch": 0.144,
"grad_norm": 161.0,
"learning_rate": 4.713114754098361e-05,
"loss": 1.2626,
"step": 9
},
{
"epoch": 0.144,
"eval_accuracy": 0.688,
"eval_loss": 1.0678237676620483,
"eval_runtime": 8.6644,
"eval_samples_per_second": 28.854,
"eval_steps_per_second": 3.693,
"step": 9
},
{
"epoch": 0.16,
"grad_norm": 155.0,
"learning_rate": 4.672131147540984e-05,
"loss": 0.7883,
"step": 10
},
{
"epoch": 0.16,
"eval_accuracy": 0.696,
"eval_loss": 0.88979172706604,
"eval_runtime": 8.6685,
"eval_samples_per_second": 28.84,
"eval_steps_per_second": 3.692,
"step": 10
},
{
"epoch": 0.176,
"grad_norm": 71.5,
"learning_rate": 4.631147540983607e-05,
"loss": 0.2973,
"step": 11
},
{
"epoch": 0.176,
"eval_accuracy": 0.768,
"eval_loss": 0.7034730315208435,
"eval_runtime": 8.6628,
"eval_samples_per_second": 28.859,
"eval_steps_per_second": 3.694,
"step": 11
},
{
"epoch": 0.192,
"grad_norm": 32.5,
"learning_rate": 4.59016393442623e-05,
"loss": 0.3976,
"step": 12
},
{
"epoch": 0.192,
"eval_accuracy": 0.772,
"eval_loss": 0.64277583360672,
"eval_runtime": 8.6669,
"eval_samples_per_second": 28.845,
"eval_steps_per_second": 3.692,
"step": 12
},
{
"epoch": 0.208,
"grad_norm": 85.0,
"learning_rate": 4.549180327868853e-05,
"loss": 0.8966,
"step": 13
},
{
"epoch": 0.208,
"eval_accuracy": 0.776,
"eval_loss": 0.5894673466682434,
"eval_runtime": 8.6718,
"eval_samples_per_second": 28.829,
"eval_steps_per_second": 3.69,
"step": 13
},
{
"epoch": 0.224,
"grad_norm": 93.0,
"learning_rate": 4.508196721311476e-05,
"loss": 0.3748,
"step": 14
},
{
"epoch": 0.224,
"eval_accuracy": 0.748,
"eval_loss": 0.6436864137649536,
"eval_runtime": 8.6619,
"eval_samples_per_second": 28.862,
"eval_steps_per_second": 3.694,
"step": 14
},
{
"epoch": 0.24,
"grad_norm": 102.0,
"learning_rate": 4.467213114754098e-05,
"loss": 0.6883,
"step": 15
},
{
"epoch": 0.24,
"eval_accuracy": 0.74,
"eval_loss": 0.6454311609268188,
"eval_runtime": 8.6684,
"eval_samples_per_second": 28.841,
"eval_steps_per_second": 3.692,
"step": 15
},
{
"epoch": 0.256,
"grad_norm": 40.5,
"learning_rate": 4.426229508196721e-05,
"loss": 0.3292,
"step": 16
},
{
"epoch": 0.256,
"eval_accuracy": 0.708,
"eval_loss": 0.8357064127922058,
"eval_runtime": 8.6666,
"eval_samples_per_second": 28.846,
"eval_steps_per_second": 3.692,
"step": 16
},
{
"epoch": 0.272,
"grad_norm": 138.0,
"learning_rate": 4.3852459016393444e-05,
"loss": 1.0341,
"step": 17
},
{
"epoch": 0.272,
"eval_accuracy": 0.692,
"eval_loss": 0.920940101146698,
"eval_runtime": 8.6644,
"eval_samples_per_second": 28.854,
"eval_steps_per_second": 3.693,
"step": 17
},
{
"epoch": 0.288,
"grad_norm": 97.5,
"learning_rate": 4.3442622950819674e-05,
"loss": 0.8867,
"step": 18
},
{
"epoch": 0.288,
"eval_accuracy": 0.708,
"eval_loss": 0.8621469736099243,
"eval_runtime": 8.6638,
"eval_samples_per_second": 28.856,
"eval_steps_per_second": 3.694,
"step": 18
},
{
"epoch": 0.304,
"grad_norm": 176.0,
"learning_rate": 4.3032786885245904e-05,
"loss": 1.2041,
"step": 19
},
{
"epoch": 0.304,
"eval_accuracy": 0.744,
"eval_loss": 0.67635178565979,
"eval_runtime": 8.6671,
"eval_samples_per_second": 28.845,
"eval_steps_per_second": 3.692,
"step": 19
},
{
"epoch": 0.32,
"grad_norm": 150.0,
"learning_rate": 4.262295081967213e-05,
"loss": 0.9002,
"step": 20
},
{
"epoch": 0.32,
"eval_accuracy": 0.732,
"eval_loss": 0.5985668301582336,
"eval_runtime": 8.6631,
"eval_samples_per_second": 28.858,
"eval_steps_per_second": 3.694,
"step": 20
},
{
"epoch": 0.336,
"grad_norm": 53.0,
"learning_rate": 4.2213114754098365e-05,
"loss": 0.8948,
"step": 21
},
{
"epoch": 0.336,
"eval_accuracy": 0.716,
"eval_loss": 0.652230978012085,
"eval_runtime": 8.6655,
"eval_samples_per_second": 28.85,
"eval_steps_per_second": 3.693,
"step": 21
},
{
"epoch": 0.352,
"grad_norm": 174.0,
"learning_rate": 4.1803278688524595e-05,
"loss": 0.86,
"step": 22
},
{
"epoch": 0.352,
"eval_accuracy": 0.728,
"eval_loss": 0.6597179174423218,
"eval_runtime": 8.6672,
"eval_samples_per_second": 28.844,
"eval_steps_per_second": 3.692,
"step": 22
},
{
"epoch": 0.368,
"grad_norm": 156.0,
"learning_rate": 4.1393442622950826e-05,
"loss": 0.6364,
"step": 23
},
{
"epoch": 0.368,
"eval_accuracy": 0.744,
"eval_loss": 0.5796850919723511,
"eval_runtime": 8.664,
"eval_samples_per_second": 28.855,
"eval_steps_per_second": 3.693,
"step": 23
},
{
"epoch": 0.384,
"grad_norm": 53.25,
"learning_rate": 4.098360655737705e-05,
"loss": 0.2094,
"step": 24
},
{
"epoch": 0.384,
"eval_accuracy": 0.748,
"eval_loss": 0.5883631706237793,
"eval_runtime": 8.6686,
"eval_samples_per_second": 28.84,
"eval_steps_per_second": 3.692,
"step": 24
},
{
"epoch": 0.4,
"grad_norm": 87.5,
"learning_rate": 4.057377049180328e-05,
"loss": 0.4607,
"step": 25
},
{
"epoch": 0.4,
"eval_accuracy": 0.768,
"eval_loss": 0.5390456318855286,
"eval_runtime": 8.6866,
"eval_samples_per_second": 28.78,
"eval_steps_per_second": 3.684,
"step": 25
},
{
"epoch": 0.416,
"grad_norm": 155.0,
"learning_rate": 4.016393442622951e-05,
"loss": 0.814,
"step": 26
},
{
"epoch": 0.416,
"eval_accuracy": 0.78,
"eval_loss": 0.4743637144565582,
"eval_runtime": 8.6531,
"eval_samples_per_second": 28.892,
"eval_steps_per_second": 3.698,
"step": 26
},
{
"epoch": 0.432,
"grad_norm": 41.0,
"learning_rate": 3.975409836065574e-05,
"loss": 0.5358,
"step": 27
},
{
"epoch": 0.432,
"eval_accuracy": 0.776,
"eval_loss": 0.4668542146682739,
"eval_runtime": 8.6595,
"eval_samples_per_second": 28.87,
"eval_steps_per_second": 3.695,
"step": 27
},
{
"epoch": 0.448,
"grad_norm": 131.0,
"learning_rate": 3.934426229508197e-05,
"loss": 0.5556,
"step": 28
},
{
"epoch": 0.448,
"eval_accuracy": 0.736,
"eval_loss": 0.6067003011703491,
"eval_runtime": 8.6518,
"eval_samples_per_second": 28.896,
"eval_steps_per_second": 3.699,
"step": 28
},
{
"epoch": 0.464,
"grad_norm": 126.5,
"learning_rate": 3.89344262295082e-05,
"loss": 0.505,
"step": 29
},
{
"epoch": 0.464,
"eval_accuracy": 0.712,
"eval_loss": 0.7375366687774658,
"eval_runtime": 8.6519,
"eval_samples_per_second": 28.895,
"eval_steps_per_second": 3.699,
"step": 29
},
{
"epoch": 0.48,
"grad_norm": 171.0,
"learning_rate": 3.8524590163934424e-05,
"loss": 0.9589,
"step": 30
},
{
"epoch": 0.48,
"eval_accuracy": 0.704,
"eval_loss": 0.7679601311683655,
"eval_runtime": 8.6582,
"eval_samples_per_second": 28.874,
"eval_steps_per_second": 3.696,
"step": 30
},
{
"epoch": 0.496,
"grad_norm": 150.0,
"learning_rate": 3.8114754098360655e-05,
"loss": 0.74,
"step": 31
},
{
"epoch": 0.496,
"eval_accuracy": 0.732,
"eval_loss": 0.6937733888626099,
"eval_runtime": 8.6569,
"eval_samples_per_second": 28.879,
"eval_steps_per_second": 3.696,
"step": 31
},
{
"epoch": 0.512,
"grad_norm": 79.5,
"learning_rate": 3.7704918032786885e-05,
"loss": 0.5474,
"step": 32
},
{
"epoch": 0.512,
"eval_accuracy": 0.748,
"eval_loss": 0.5756805539131165,
"eval_runtime": 8.6562,
"eval_samples_per_second": 28.881,
"eval_steps_per_second": 3.697,
"step": 32
},
{
"epoch": 0.528,
"grad_norm": 112.5,
"learning_rate": 3.729508196721312e-05,
"loss": 0.4916,
"step": 33
},
{
"epoch": 0.528,
"eval_accuracy": 0.792,
"eval_loss": 0.47289371490478516,
"eval_runtime": 8.6581,
"eval_samples_per_second": 28.875,
"eval_steps_per_second": 3.696,
"step": 33
},
{
"epoch": 0.544,
"grad_norm": 33.0,
"learning_rate": 3.6885245901639346e-05,
"loss": 0.8822,
"step": 34
},
{
"epoch": 0.544,
"eval_accuracy": 0.82,
"eval_loss": 0.4487142264842987,
"eval_runtime": 8.6584,
"eval_samples_per_second": 28.874,
"eval_steps_per_second": 3.696,
"step": 34
},
{
"epoch": 0.56,
"grad_norm": 84.5,
"learning_rate": 3.6475409836065576e-05,
"loss": 0.7691,
"step": 35
},
{
"epoch": 0.56,
"eval_accuracy": 0.812,
"eval_loss": 0.45519721508026123,
"eval_runtime": 8.6547,
"eval_samples_per_second": 28.886,
"eval_steps_per_second": 3.697,
"step": 35
},
{
"epoch": 0.576,
"grad_norm": 28.625,
"learning_rate": 3.6065573770491806e-05,
"loss": 0.4743,
"step": 36
},
{
"epoch": 0.576,
"eval_accuracy": 0.764,
"eval_loss": 0.5331873893737793,
"eval_runtime": 8.6566,
"eval_samples_per_second": 28.88,
"eval_steps_per_second": 3.697,
"step": 36
},
{
"epoch": 0.592,
"grad_norm": 23.875,
"learning_rate": 3.5655737704918037e-05,
"loss": 0.3101,
"step": 37
},
{
"epoch": 0.592,
"eval_accuracy": 0.744,
"eval_loss": 0.6849313974380493,
"eval_runtime": 8.6571,
"eval_samples_per_second": 28.878,
"eval_steps_per_second": 3.696,
"step": 37
},
{
"epoch": 0.608,
"grad_norm": 103.5,
"learning_rate": 3.524590163934427e-05,
"loss": 0.962,
"step": 38
},
{
"epoch": 0.608,
"eval_accuracy": 0.724,
"eval_loss": 0.7783421874046326,
"eval_runtime": 8.6583,
"eval_samples_per_second": 28.874,
"eval_steps_per_second": 3.696,
"step": 38
},
{
"epoch": 0.624,
"grad_norm": 133.0,
"learning_rate": 3.483606557377049e-05,
"loss": 0.5671,
"step": 39
},
{
"epoch": 0.624,
"eval_accuracy": 0.712,
"eval_loss": 0.7919518947601318,
"eval_runtime": 8.661,
"eval_samples_per_second": 28.865,
"eval_steps_per_second": 3.695,
"step": 39
},
{
"epoch": 0.64,
"grad_norm": 193.0,
"learning_rate": 3.442622950819672e-05,
"loss": 0.7741,
"step": 40
},
{
"epoch": 0.64,
"eval_accuracy": 0.724,
"eval_loss": 0.7195008397102356,
"eval_runtime": 8.6644,
"eval_samples_per_second": 28.854,
"eval_steps_per_second": 3.693,
"step": 40
},
{
"epoch": 0.656,
"grad_norm": 236.0,
"learning_rate": 3.401639344262295e-05,
"loss": 0.9336,
"step": 41
},
{
"epoch": 0.656,
"eval_accuracy": 0.784,
"eval_loss": 0.5999830365180969,
"eval_runtime": 8.6611,
"eval_samples_per_second": 28.865,
"eval_steps_per_second": 3.695,
"step": 41
},
{
"epoch": 0.672,
"grad_norm": 194.0,
"learning_rate": 3.360655737704918e-05,
"loss": 0.9252,
"step": 42
},
{
"epoch": 0.672,
"eval_accuracy": 0.812,
"eval_loss": 0.4787631928920746,
"eval_runtime": 8.6643,
"eval_samples_per_second": 28.854,
"eval_steps_per_second": 3.693,
"step": 42
},
{
"epoch": 0.688,
"grad_norm": 102.0,
"learning_rate": 3.319672131147541e-05,
"loss": 0.2934,
"step": 43
},
{
"epoch": 0.688,
"eval_accuracy": 0.812,
"eval_loss": 0.41090723872184753,
"eval_runtime": 8.6614,
"eval_samples_per_second": 28.864,
"eval_steps_per_second": 3.695,
"step": 43
},
{
"epoch": 0.704,
"grad_norm": 87.5,
"learning_rate": 3.2786885245901635e-05,
"loss": 0.4936,
"step": 44
},
{
"epoch": 0.704,
"eval_accuracy": 0.78,
"eval_loss": 0.46753987669944763,
"eval_runtime": 8.6615,
"eval_samples_per_second": 28.863,
"eval_steps_per_second": 3.694,
"step": 44
},
{
"epoch": 0.72,
"grad_norm": 18.375,
"learning_rate": 3.237704918032787e-05,
"loss": 0.3223,
"step": 45
},
{
"epoch": 0.72,
"eval_accuracy": 0.748,
"eval_loss": 0.5864301919937134,
"eval_runtime": 8.6597,
"eval_samples_per_second": 28.869,
"eval_steps_per_second": 3.695,
"step": 45
},
{
"epoch": 0.736,
"grad_norm": 121.0,
"learning_rate": 3.19672131147541e-05,
"loss": 0.408,
"step": 46
},
{
"epoch": 0.736,
"eval_accuracy": 0.728,
"eval_loss": 0.6596755981445312,
"eval_runtime": 8.663,
"eval_samples_per_second": 28.858,
"eval_steps_per_second": 3.694,
"step": 46
},
{
"epoch": 0.752,
"grad_norm": 54.0,
"learning_rate": 3.155737704918033e-05,
"loss": 0.759,
"step": 47
},
{
"epoch": 0.752,
"eval_accuracy": 0.728,
"eval_loss": 0.6460751891136169,
"eval_runtime": 8.6565,
"eval_samples_per_second": 28.88,
"eval_steps_per_second": 3.697,
"step": 47
},
{
"epoch": 0.768,
"grad_norm": 114.0,
"learning_rate": 3.114754098360656e-05,
"loss": 0.6628,
"step": 48
},
{
"epoch": 0.768,
"eval_accuracy": 0.744,
"eval_loss": 0.5938560962677002,
"eval_runtime": 8.6567,
"eval_samples_per_second": 28.879,
"eval_steps_per_second": 3.697,
"step": 48
},
{
"epoch": 0.784,
"grad_norm": 111.0,
"learning_rate": 3.073770491803279e-05,
"loss": 0.761,
"step": 49
},
{
"epoch": 0.784,
"eval_accuracy": 0.804,
"eval_loss": 0.5164662003517151,
"eval_runtime": 8.6557,
"eval_samples_per_second": 28.883,
"eval_steps_per_second": 3.697,
"step": 49
},
{
"epoch": 0.8,
"grad_norm": 32.0,
"learning_rate": 3.0327868852459017e-05,
"loss": 0.308,
"step": 50
},
{
"epoch": 0.8,
"eval_accuracy": 0.836,
"eval_loss": 0.43705108761787415,
"eval_runtime": 8.6584,
"eval_samples_per_second": 28.874,
"eval_steps_per_second": 3.696,
"step": 50
},
{
"epoch": 0.816,
"grad_norm": 78.0,
"learning_rate": 2.9918032786885248e-05,
"loss": 0.4859,
"step": 51
},
{
"epoch": 0.816,
"eval_accuracy": 0.856,
"eval_loss": 0.3826155364513397,
"eval_runtime": 8.6539,
"eval_samples_per_second": 28.889,
"eval_steps_per_second": 3.698,
"step": 51
},
{
"epoch": 0.832,
"grad_norm": 24.5,
"learning_rate": 2.9508196721311478e-05,
"loss": 0.6841,
"step": 52
},
{
"epoch": 0.832,
"eval_accuracy": 0.828,
"eval_loss": 0.3742530345916748,
"eval_runtime": 8.6541,
"eval_samples_per_second": 28.888,
"eval_steps_per_second": 3.698,
"step": 52
},
{
"epoch": 0.848,
"grad_norm": 37.5,
"learning_rate": 2.9098360655737705e-05,
"loss": 0.7852,
"step": 53
},
{
"epoch": 0.848,
"eval_accuracy": 0.8,
"eval_loss": 0.43144190311431885,
"eval_runtime": 8.653,
"eval_samples_per_second": 28.892,
"eval_steps_per_second": 3.698,
"step": 53
},
{
"epoch": 0.864,
"grad_norm": 91.0,
"learning_rate": 2.8688524590163935e-05,
"loss": 0.3388,
"step": 54
},
{
"epoch": 0.864,
"eval_accuracy": 0.792,
"eval_loss": 0.501422107219696,
"eval_runtime": 8.6518,
"eval_samples_per_second": 28.896,
"eval_steps_per_second": 3.699,
"step": 54
},
{
"epoch": 0.88,
"grad_norm": 17.625,
"learning_rate": 2.8278688524590162e-05,
"loss": 0.3829,
"step": 55
},
{
"epoch": 0.88,
"eval_accuracy": 0.768,
"eval_loss": 0.5729050040245056,
"eval_runtime": 8.6468,
"eval_samples_per_second": 28.912,
"eval_steps_per_second": 3.701,
"step": 55
},
{
"epoch": 0.896,
"grad_norm": 93.5,
"learning_rate": 2.7868852459016392e-05,
"loss": 0.6144,
"step": 56
},
{
"epoch": 0.896,
"eval_accuracy": 0.764,
"eval_loss": 0.6807990074157715,
"eval_runtime": 8.6452,
"eval_samples_per_second": 28.918,
"eval_steps_per_second": 3.701,
"step": 56
},
{
"epoch": 0.912,
"grad_norm": 28.5,
"learning_rate": 2.7459016393442626e-05,
"loss": 0.3515,
"step": 57
},
{
"epoch": 0.912,
"eval_accuracy": 0.756,
"eval_loss": 0.7396586537361145,
"eval_runtime": 8.6535,
"eval_samples_per_second": 28.89,
"eval_steps_per_second": 3.698,
"step": 57
},
{
"epoch": 0.928,
"grad_norm": 112.5,
"learning_rate": 2.7049180327868856e-05,
"loss": 0.3028,
"step": 58
},
{
"epoch": 0.928,
"eval_accuracy": 0.756,
"eval_loss": 0.745948314666748,
"eval_runtime": 8.6584,
"eval_samples_per_second": 28.874,
"eval_steps_per_second": 3.696,
"step": 58
},
{
"epoch": 0.944,
"grad_norm": 164.0,
"learning_rate": 2.6639344262295087e-05,
"loss": 0.6729,
"step": 59
},
{
"epoch": 0.944,
"eval_accuracy": 0.752,
"eval_loss": 0.7118371725082397,
"eval_runtime": 8.6567,
"eval_samples_per_second": 28.879,
"eval_steps_per_second": 3.697,
"step": 59
},
{
"epoch": 0.96,
"grad_norm": 131.0,
"learning_rate": 2.6229508196721314e-05,
"loss": 0.4634,
"step": 60
},
{
"epoch": 0.96,
"eval_accuracy": 0.76,
"eval_loss": 0.6441870331764221,
"eval_runtime": 8.6557,
"eval_samples_per_second": 28.883,
"eval_steps_per_second": 3.697,
"step": 60
},
{
"epoch": 0.976,
"grad_norm": 127.5,
"learning_rate": 2.5819672131147544e-05,
"loss": 0.5924,
"step": 61
},
{
"epoch": 0.976,
"eval_accuracy": 0.776,
"eval_loss": 0.5635260939598083,
"eval_runtime": 8.6577,
"eval_samples_per_second": 28.876,
"eval_steps_per_second": 3.696,
"step": 61
},
{
"epoch": 0.992,
"grad_norm": 130.0,
"learning_rate": 2.540983606557377e-05,
"loss": 0.5527,
"step": 62
},
{
"epoch": 0.992,
"eval_accuracy": 0.796,
"eval_loss": 0.4781284034252167,
"eval_runtime": 8.6636,
"eval_samples_per_second": 28.856,
"eval_steps_per_second": 3.694,
"step": 62
},
{
"epoch": 1.008,
"grad_norm": 43.0,
"learning_rate": 2.5e-05,
"loss": 0.1542,
"step": 63
},
{
"epoch": 1.008,
"eval_accuracy": 0.82,
"eval_loss": 0.4085061252117157,
"eval_runtime": 8.6612,
"eval_samples_per_second": 28.864,
"eval_steps_per_second": 3.695,
"step": 63
},
{
"epoch": 1.024,
"grad_norm": 147.0,
"learning_rate": 2.459016393442623e-05,
"loss": 0.3714,
"step": 64
},
{
"epoch": 1.024,
"eval_accuracy": 0.848,
"eval_loss": 0.37276288866996765,
"eval_runtime": 8.6673,
"eval_samples_per_second": 28.844,
"eval_steps_per_second": 3.692,
"step": 64
},
{
"epoch": 1.04,
"grad_norm": 34.5,
"learning_rate": 2.418032786885246e-05,
"loss": 0.1124,
"step": 65
},
{
"epoch": 1.04,
"eval_accuracy": 0.848,
"eval_loss": 0.36895105242729187,
"eval_runtime": 8.6746,
"eval_samples_per_second": 28.82,
"eval_steps_per_second": 3.689,
"step": 65
},
{
"epoch": 1.056,
"grad_norm": 50.25,
"learning_rate": 2.377049180327869e-05,
"loss": 0.1433,
"step": 66
},
{
"epoch": 1.056,
"eval_accuracy": 0.844,
"eval_loss": 0.3762807548046112,
"eval_runtime": 8.6794,
"eval_samples_per_second": 28.804,
"eval_steps_per_second": 3.687,
"step": 66
},
{
"epoch": 1.072,
"grad_norm": 85.5,
"learning_rate": 2.336065573770492e-05,
"loss": 0.2446,
"step": 67
},
{
"epoch": 1.072,
"eval_accuracy": 0.84,
"eval_loss": 0.38033661246299744,
"eval_runtime": 8.6709,
"eval_samples_per_second": 28.832,
"eval_steps_per_second": 3.691,
"step": 67
},
{
"epoch": 1.088,
"grad_norm": 120.5,
"learning_rate": 2.295081967213115e-05,
"loss": 0.6573,
"step": 68
},
{
"epoch": 1.088,
"eval_accuracy": 0.848,
"eval_loss": 0.37577661871910095,
"eval_runtime": 8.6746,
"eval_samples_per_second": 28.82,
"eval_steps_per_second": 3.689,
"step": 68
},
{
"epoch": 1.104,
"grad_norm": 32.25,
"learning_rate": 2.254098360655738e-05,
"loss": 0.1509,
"step": 69
},
{
"epoch": 1.104,
"eval_accuracy": 0.848,
"eval_loss": 0.36732277274131775,
"eval_runtime": 8.6668,
"eval_samples_per_second": 28.846,
"eval_steps_per_second": 3.692,
"step": 69
},
{
"epoch": 1.12,
"grad_norm": 36.0,
"learning_rate": 2.2131147540983607e-05,
"loss": 0.2131,
"step": 70
},
{
"epoch": 1.12,
"eval_accuracy": 0.856,
"eval_loss": 0.36693572998046875,
"eval_runtime": 8.667,
"eval_samples_per_second": 28.845,
"eval_steps_per_second": 3.692,
"step": 70
},
{
"epoch": 1.1360000000000001,
"grad_norm": 35.0,
"learning_rate": 2.1721311475409837e-05,
"loss": 0.077,
"step": 71
},
{
"epoch": 1.1360000000000001,
"eval_accuracy": 0.836,
"eval_loss": 0.3619978427886963,
"eval_runtime": 8.671,
"eval_samples_per_second": 28.832,
"eval_steps_per_second": 3.69,
"step": 71
},
{
"epoch": 1.152,
"grad_norm": 21.625,
"learning_rate": 2.1311475409836064e-05,
"loss": 0.2332,
"step": 72
},
{
"epoch": 1.152,
"eval_accuracy": 0.832,
"eval_loss": 0.36414313316345215,
"eval_runtime": 8.6706,
"eval_samples_per_second": 28.833,
"eval_steps_per_second": 3.691,
"step": 72
},
{
"epoch": 1.168,
"grad_norm": 69.5,
"learning_rate": 2.0901639344262298e-05,
"loss": 0.2056,
"step": 73
},
{
"epoch": 1.168,
"eval_accuracy": 0.836,
"eval_loss": 0.36293938755989075,
"eval_runtime": 8.6724,
"eval_samples_per_second": 28.827,
"eval_steps_per_second": 3.69,
"step": 73
},
{
"epoch": 1.184,
"grad_norm": 9.5,
"learning_rate": 2.0491803278688525e-05,
"loss": 0.1412,
"step": 74
},
{
"epoch": 1.184,
"eval_accuracy": 0.844,
"eval_loss": 0.3655231297016144,
"eval_runtime": 8.6711,
"eval_samples_per_second": 28.831,
"eval_steps_per_second": 3.69,
"step": 74
},
{
"epoch": 1.2,
"grad_norm": 35.25,
"learning_rate": 2.0081967213114755e-05,
"loss": 0.1982,
"step": 75
},
{
"epoch": 1.2,
"eval_accuracy": 0.84,
"eval_loss": 0.3644102215766907,
"eval_runtime": 8.6641,
"eval_samples_per_second": 28.855,
"eval_steps_per_second": 3.693,
"step": 75
},
{
"epoch": 1.216,
"grad_norm": 12.875,
"learning_rate": 1.9672131147540985e-05,
"loss": 0.2003,
"step": 76
},
{
"epoch": 1.216,
"eval_accuracy": 0.84,
"eval_loss": 0.3651863932609558,
"eval_runtime": 8.6665,
"eval_samples_per_second": 28.847,
"eval_steps_per_second": 3.692,
"step": 76
},
{
"epoch": 1.232,
"grad_norm": 7.28125,
"learning_rate": 1.9262295081967212e-05,
"loss": 0.0934,
"step": 77
},
{
"epoch": 1.232,
"eval_accuracy": 0.84,
"eval_loss": 0.3709143102169037,
"eval_runtime": 8.6583,
"eval_samples_per_second": 28.874,
"eval_steps_per_second": 3.696,
"step": 77
},
{
"epoch": 1.248,
"grad_norm": 42.25,
"learning_rate": 1.8852459016393442e-05,
"loss": 0.1577,
"step": 78
},
{
"epoch": 1.248,
"eval_accuracy": 0.836,
"eval_loss": 0.37103718519210815,
"eval_runtime": 8.6594,
"eval_samples_per_second": 28.87,
"eval_steps_per_second": 3.695,
"step": 78
},
{
"epoch": 1.264,
"grad_norm": 25.25,
"learning_rate": 1.8442622950819673e-05,
"loss": 0.3063,
"step": 79
},
{
"epoch": 1.264,
"eval_accuracy": 0.832,
"eval_loss": 0.3689051866531372,
"eval_runtime": 8.6658,
"eval_samples_per_second": 28.849,
"eval_steps_per_second": 3.693,
"step": 79
},
{
"epoch": 1.28,
"grad_norm": 31.625,
"learning_rate": 1.8032786885245903e-05,
"loss": 0.2724,
"step": 80
},
{
"epoch": 1.28,
"eval_accuracy": 0.832,
"eval_loss": 0.3685128688812256,
"eval_runtime": 8.6623,
"eval_samples_per_second": 28.861,
"eval_steps_per_second": 3.694,
"step": 80
},
{
"epoch": 1.296,
"grad_norm": 34.75,
"learning_rate": 1.7622950819672133e-05,
"loss": 0.4324,
"step": 81
},
{
"epoch": 1.296,
"eval_accuracy": 0.836,
"eval_loss": 0.3717711567878723,
"eval_runtime": 8.6564,
"eval_samples_per_second": 28.88,
"eval_steps_per_second": 3.697,
"step": 81
},
{
"epoch": 1.312,
"grad_norm": 33.0,
"learning_rate": 1.721311475409836e-05,
"loss": 0.1911,
"step": 82
},
{
"epoch": 1.312,
"eval_accuracy": 0.84,
"eval_loss": 0.3723936080932617,
"eval_runtime": 8.6687,
"eval_samples_per_second": 28.839,
"eval_steps_per_second": 3.691,
"step": 82
},
{
"epoch": 1.328,
"grad_norm": 16.125,
"learning_rate": 1.680327868852459e-05,
"loss": 0.1936,
"step": 83
},
{
"epoch": 1.328,
"eval_accuracy": 0.84,
"eval_loss": 0.3704240024089813,
"eval_runtime": 8.6668,
"eval_samples_per_second": 28.846,
"eval_steps_per_second": 3.692,
"step": 83
},
{
"epoch": 1.3439999999999999,
"grad_norm": 34.75,
"learning_rate": 1.6393442622950818e-05,
"loss": 0.0839,
"step": 84
},
{
"epoch": 1.3439999999999999,
"eval_accuracy": 0.832,
"eval_loss": 0.36510899662971497,
"eval_runtime": 8.661,
"eval_samples_per_second": 28.865,
"eval_steps_per_second": 3.695,
"step": 84
},
{
"epoch": 1.3599999999999999,
"grad_norm": 40.0,
"learning_rate": 1.598360655737705e-05,
"loss": 0.2661,
"step": 85
},
{
"epoch": 1.3599999999999999,
"eval_accuracy": 0.84,
"eval_loss": 0.3661534786224365,
"eval_runtime": 8.6702,
"eval_samples_per_second": 28.834,
"eval_steps_per_second": 3.691,
"step": 85
},
{
"epoch": 1.376,
"grad_norm": 52.5,
"learning_rate": 1.557377049180328e-05,
"loss": 0.1679,
"step": 86
},
{
"epoch": 1.376,
"eval_accuracy": 0.848,
"eval_loss": 0.36859577894210815,
"eval_runtime": 8.6649,
"eval_samples_per_second": 28.852,
"eval_steps_per_second": 3.693,
"step": 86
},
{
"epoch": 1.392,
"grad_norm": 12.75,
"learning_rate": 1.5163934426229509e-05,
"loss": 0.0698,
"step": 87
},
{
"epoch": 1.392,
"eval_accuracy": 0.852,
"eval_loss": 0.3691750466823578,
"eval_runtime": 8.6861,
"eval_samples_per_second": 28.782,
"eval_steps_per_second": 3.684,
"step": 87
},
{
"epoch": 1.408,
"grad_norm": 39.25,
"learning_rate": 1.4754098360655739e-05,
"loss": 0.1173,
"step": 88
},
{
"epoch": 1.408,
"eval_accuracy": 0.856,
"eval_loss": 0.3779418170452118,
"eval_runtime": 8.6673,
"eval_samples_per_second": 28.844,
"eval_steps_per_second": 3.692,
"step": 88
},
{
"epoch": 1.424,
"grad_norm": 21.5,
"learning_rate": 1.4344262295081968e-05,
"loss": 0.3727,
"step": 89
},
{
"epoch": 1.424,
"eval_accuracy": 0.86,
"eval_loss": 0.38709089159965515,
"eval_runtime": 8.6636,
"eval_samples_per_second": 28.856,
"eval_steps_per_second": 3.694,
"step": 89
},
{
"epoch": 1.44,
"grad_norm": 18.25,
"learning_rate": 1.3934426229508196e-05,
"loss": 0.3828,
"step": 90
},
{
"epoch": 1.44,
"eval_accuracy": 0.86,
"eval_loss": 0.3986479640007019,
"eval_runtime": 8.6565,
"eval_samples_per_second": 28.88,
"eval_steps_per_second": 3.697,
"step": 90
},
{
"epoch": 1.456,
"grad_norm": 29.875,
"learning_rate": 1.3524590163934428e-05,
"loss": 0.0911,
"step": 91
},
{
"epoch": 1.456,
"eval_accuracy": 0.84,
"eval_loss": 0.4078799784183502,
"eval_runtime": 8.654,
"eval_samples_per_second": 28.888,
"eval_steps_per_second": 3.698,
"step": 91
},
{
"epoch": 1.472,
"grad_norm": 40.75,
"learning_rate": 1.3114754098360657e-05,
"loss": 0.1798,
"step": 92
},
{
"epoch": 1.472,
"eval_accuracy": 0.832,
"eval_loss": 0.4203779399394989,
"eval_runtime": 8.6654,
"eval_samples_per_second": 28.85,
"eval_steps_per_second": 3.693,
"step": 92
},
{
"epoch": 1.488,
"grad_norm": 15.6875,
"learning_rate": 1.2704918032786885e-05,
"loss": 0.0851,
"step": 93
},
{
"epoch": 1.488,
"eval_accuracy": 0.832,
"eval_loss": 0.4253535568714142,
"eval_runtime": 8.6605,
"eval_samples_per_second": 28.867,
"eval_steps_per_second": 3.695,
"step": 93
},
{
"epoch": 1.504,
"grad_norm": 21.0,
"learning_rate": 1.2295081967213116e-05,
"loss": 0.0962,
"step": 94
},
{
"epoch": 1.504,
"eval_accuracy": 0.832,
"eval_loss": 0.42336249351501465,
"eval_runtime": 8.6599,
"eval_samples_per_second": 28.869,
"eval_steps_per_second": 3.695,
"step": 94
},
{
"epoch": 1.52,
"grad_norm": 111.0,
"learning_rate": 1.1885245901639344e-05,
"loss": 0.3427,
"step": 95
},
{
"epoch": 1.52,
"eval_accuracy": 0.828,
"eval_loss": 0.4188750684261322,
"eval_runtime": 8.6648,
"eval_samples_per_second": 28.852,
"eval_steps_per_second": 3.693,
"step": 95
},
{
"epoch": 1.536,
"grad_norm": 27.5,
"learning_rate": 1.1475409836065575e-05,
"loss": 0.0881,
"step": 96
},
{
"epoch": 1.536,
"eval_accuracy": 0.84,
"eval_loss": 0.4100199043750763,
"eval_runtime": 8.6603,
"eval_samples_per_second": 28.867,
"eval_steps_per_second": 3.695,
"step": 96
},
{
"epoch": 1.552,
"grad_norm": 69.0,
"learning_rate": 1.1065573770491803e-05,
"loss": 0.1664,
"step": 97
},
{
"epoch": 1.552,
"eval_accuracy": 0.844,
"eval_loss": 0.39891311526298523,
"eval_runtime": 8.6581,
"eval_samples_per_second": 28.875,
"eval_steps_per_second": 3.696,
"step": 97
},
{
"epoch": 1.568,
"grad_norm": 46.25,
"learning_rate": 1.0655737704918032e-05,
"loss": 0.6757,
"step": 98
},
{
"epoch": 1.568,
"eval_accuracy": 0.856,
"eval_loss": 0.3860258162021637,
"eval_runtime": 8.66,
"eval_samples_per_second": 28.868,
"eval_steps_per_second": 3.695,
"step": 98
},
{
"epoch": 1.584,
"grad_norm": 5.875,
"learning_rate": 1.0245901639344262e-05,
"loss": 0.0751,
"step": 99
},
{
"epoch": 1.584,
"eval_accuracy": 0.86,
"eval_loss": 0.3817059397697449,
"eval_runtime": 8.6608,
"eval_samples_per_second": 28.866,
"eval_steps_per_second": 3.695,
"step": 99
},
{
"epoch": 1.6,
"grad_norm": 64.0,
"learning_rate": 9.836065573770493e-06,
"loss": 0.1923,
"step": 100
},
{
"epoch": 1.6,
"eval_accuracy": 0.856,
"eval_loss": 0.37669360637664795,
"eval_runtime": 8.6602,
"eval_samples_per_second": 28.868,
"eval_steps_per_second": 3.695,
"step": 100
},
{
"epoch": 1.616,
"grad_norm": 11.75,
"learning_rate": 9.426229508196721e-06,
"loss": 0.0365,
"step": 101
},
{
"epoch": 1.616,
"eval_accuracy": 0.848,
"eval_loss": 0.3779665231704712,
"eval_runtime": 8.6661,
"eval_samples_per_second": 28.848,
"eval_steps_per_second": 3.693,
"step": 101
},
{
"epoch": 1.6320000000000001,
"grad_norm": 13.75,
"learning_rate": 9.016393442622952e-06,
"loss": 0.0895,
"step": 102
},
{
"epoch": 1.6320000000000001,
"eval_accuracy": 0.848,
"eval_loss": 0.3783411383628845,
"eval_runtime": 8.6625,
"eval_samples_per_second": 28.86,
"eval_steps_per_second": 3.694,
"step": 102
},
{
"epoch": 1.6480000000000001,
"grad_norm": 87.0,
"learning_rate": 8.60655737704918e-06,
"loss": 0.3337,
"step": 103
},
{
"epoch": 1.6480000000000001,
"eval_accuracy": 0.852,
"eval_loss": 0.3828529417514801,
"eval_runtime": 8.6587,
"eval_samples_per_second": 28.873,
"eval_steps_per_second": 3.696,
"step": 103
},
{
"epoch": 1.6640000000000001,
"grad_norm": 29.375,
"learning_rate": 8.196721311475409e-06,
"loss": 0.1525,
"step": 104
},
{
"epoch": 1.6640000000000001,
"eval_accuracy": 0.852,
"eval_loss": 0.38398581743240356,
"eval_runtime": 8.6634,
"eval_samples_per_second": 28.857,
"eval_steps_per_second": 3.694,
"step": 104
},
{
"epoch": 1.6800000000000002,
"grad_norm": 10.0625,
"learning_rate": 7.78688524590164e-06,
"loss": 0.0973,
"step": 105
},
{
"epoch": 1.6800000000000002,
"eval_accuracy": 0.844,
"eval_loss": 0.3848567605018616,
"eval_runtime": 8.6582,
"eval_samples_per_second": 28.874,
"eval_steps_per_second": 3.696,
"step": 105
},
{
"epoch": 1.696,
"grad_norm": 77.5,
"learning_rate": 7.3770491803278695e-06,
"loss": 0.2626,
"step": 106
},
{
"epoch": 1.696,
"eval_accuracy": 0.852,
"eval_loss": 0.385408878326416,
"eval_runtime": 8.6577,
"eval_samples_per_second": 28.876,
"eval_steps_per_second": 3.696,
"step": 106
},
{
"epoch": 1.712,
"grad_norm": 9.5,
"learning_rate": 6.967213114754098e-06,
"loss": 0.0585,
"step": 107
},
{
"epoch": 1.712,
"eval_accuracy": 0.848,
"eval_loss": 0.38454535603523254,
"eval_runtime": 8.6544,
"eval_samples_per_second": 28.887,
"eval_steps_per_second": 3.698,
"step": 107
},
{
"epoch": 1.728,
"grad_norm": 55.0,
"learning_rate": 6.557377049180328e-06,
"loss": 0.2257,
"step": 108
},
{
"epoch": 1.728,
"eval_accuracy": 0.852,
"eval_loss": 0.38379326462745667,
"eval_runtime": 8.6523,
"eval_samples_per_second": 28.894,
"eval_steps_per_second": 3.698,
"step": 108
},
{
"epoch": 1.744,
"grad_norm": 32.5,
"learning_rate": 6.147540983606558e-06,
"loss": 0.1137,
"step": 109
},
{
"epoch": 1.744,
"eval_accuracy": 0.852,
"eval_loss": 0.3833220303058624,
"eval_runtime": 8.652,
"eval_samples_per_second": 28.895,
"eval_steps_per_second": 3.699,
"step": 109
},
{
"epoch": 1.76,
"grad_norm": 15.4375,
"learning_rate": 5.737704918032787e-06,
"loss": 0.283,
"step": 110
},
{
"epoch": 1.76,
"eval_accuracy": 0.852,
"eval_loss": 0.37939703464508057,
"eval_runtime": 8.6539,
"eval_samples_per_second": 28.889,
"eval_steps_per_second": 3.698,
"step": 110
},
{
"epoch": 1.776,
"grad_norm": 27.375,
"learning_rate": 5.327868852459016e-06,
"loss": 0.1111,
"step": 111
},
{
"epoch": 1.776,
"eval_accuracy": 0.852,
"eval_loss": 0.3771066665649414,
"eval_runtime": 8.6565,
"eval_samples_per_second": 28.88,
"eval_steps_per_second": 3.697,
"step": 111
},
{
"epoch": 1.792,
"grad_norm": 26.125,
"learning_rate": 4.918032786885246e-06,
"loss": 0.1367,
"step": 112
},
{
"epoch": 1.792,
"eval_accuracy": 0.852,
"eval_loss": 0.3757225275039673,
"eval_runtime": 8.6575,
"eval_samples_per_second": 28.877,
"eval_steps_per_second": 3.696,
"step": 112
},
{
"epoch": 1.808,
"grad_norm": 24.875,
"learning_rate": 4.508196721311476e-06,
"loss": 0.0762,
"step": 113
},
{
"epoch": 1.808,
"eval_accuracy": 0.852,
"eval_loss": 0.3756250739097595,
"eval_runtime": 8.6535,
"eval_samples_per_second": 28.89,
"eval_steps_per_second": 3.698,
"step": 113
},
{
"epoch": 1.8239999999999998,
"grad_norm": 47.5,
"learning_rate": 4.098360655737704e-06,
"loss": 0.133,
"step": 114
},
{
"epoch": 1.8239999999999998,
"eval_accuracy": 0.852,
"eval_loss": 0.37420740723609924,
"eval_runtime": 8.6587,
"eval_samples_per_second": 28.873,
"eval_steps_per_second": 3.696,
"step": 114
},
{
"epoch": 1.8399999999999999,
"grad_norm": 22.625,
"learning_rate": 3.6885245901639347e-06,
"loss": 0.2904,
"step": 115
},
{
"epoch": 1.8399999999999999,
"eval_accuracy": 0.852,
"eval_loss": 0.372751921415329,
"eval_runtime": 8.6548,
"eval_samples_per_second": 28.886,
"eval_steps_per_second": 3.697,
"step": 115
},
{
"epoch": 1.8559999999999999,
"grad_norm": 16.75,
"learning_rate": 3.278688524590164e-06,
"loss": 0.1686,
"step": 116
},
{
"epoch": 1.8559999999999999,
"eval_accuracy": 0.848,
"eval_loss": 0.3734797239303589,
"eval_runtime": 8.6629,
"eval_samples_per_second": 28.859,
"eval_steps_per_second": 3.694,
"step": 116
},
{
"epoch": 1.8719999999999999,
"grad_norm": 35.25,
"learning_rate": 2.8688524590163937e-06,
"loss": 0.0737,
"step": 117
},
{
"epoch": 1.8719999999999999,
"eval_accuracy": 0.848,
"eval_loss": 0.3713564872741699,
"eval_runtime": 8.6633,
"eval_samples_per_second": 28.857,
"eval_steps_per_second": 3.694,
"step": 117
},
{
"epoch": 1.888,
"grad_norm": 100.0,
"learning_rate": 2.459016393442623e-06,
"loss": 0.2758,
"step": 118
},
{
"epoch": 1.888,
"eval_accuracy": 0.848,
"eval_loss": 0.3682093322277069,
"eval_runtime": 8.6631,
"eval_samples_per_second": 28.858,
"eval_steps_per_second": 3.694,
"step": 118
},
{
"epoch": 1.904,
"grad_norm": 22.5,
"learning_rate": 2.049180327868852e-06,
"loss": 0.0542,
"step": 119
},
{
"epoch": 1.904,
"eval_accuracy": 0.848,
"eval_loss": 0.3716946542263031,
"eval_runtime": 8.6618,
"eval_samples_per_second": 28.862,
"eval_steps_per_second": 3.694,
"step": 119
},
{
"epoch": 1.92,
"grad_norm": 69.0,
"learning_rate": 1.639344262295082e-06,
"loss": 0.1646,
"step": 120
},
{
"epoch": 1.92,
"eval_accuracy": 0.848,
"eval_loss": 0.3682910203933716,
"eval_runtime": 8.6617,
"eval_samples_per_second": 28.863,
"eval_steps_per_second": 3.694,
"step": 120
},
{
"epoch": 1.936,
"grad_norm": 31.125,
"learning_rate": 1.2295081967213116e-06,
"loss": 0.4908,
"step": 121
},
{
"epoch": 1.936,
"eval_accuracy": 0.848,
"eval_loss": 0.3708224594593048,
"eval_runtime": 8.6585,
"eval_samples_per_second": 28.873,
"eval_steps_per_second": 3.696,
"step": 121
},
{
"epoch": 1.952,
"grad_norm": 55.75,
"learning_rate": 8.19672131147541e-07,
"loss": 0.3249,
"step": 122
},
{
"epoch": 1.952,
"eval_accuracy": 0.848,
"eval_loss": 0.36828938126564026,
"eval_runtime": 8.6603,
"eval_samples_per_second": 28.867,
"eval_steps_per_second": 3.695,
"step": 122
},
{
"epoch": 1.968,
"grad_norm": 22.375,
"learning_rate": 4.098360655737705e-07,
"loss": 0.1096,
"step": 123
},
{
"epoch": 1.968,
"eval_accuracy": 0.852,
"eval_loss": 0.3706204891204834,
"eval_runtime": 8.6631,
"eval_samples_per_second": 28.858,
"eval_steps_per_second": 3.694,
"step": 123
},
{
"epoch": 1.984,
"grad_norm": 52.5,
"learning_rate": 0.0,
"loss": 0.1758,
"step": 124
},
{
"epoch": 1.984,
"eval_accuracy": 0.852,
"eval_loss": 0.36876150965690613,
"eval_runtime": 8.6609,
"eval_samples_per_second": 28.865,
"eval_steps_per_second": 3.695,
"step": 124
},
{
"epoch": 1.984,
"step": 124,
"total_flos": 1.693315531538432e+16,
"train_loss": 0.4407010670871504,
"train_runtime": 1267.2396,
"train_samples_per_second": 1.578,
"train_steps_per_second": 0.098
}
],
"logging_steps": 1,
"max_steps": 124,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.693315531538432e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}