|
{ |
|
"best_metric": 0.7937062937062938, |
|
"best_model_checkpoint": "wav2vec2-5Class-train-test-finetune/checkpoint-4122", |
|
"epoch": 224.0, |
|
"eval_steps": 500, |
|
"global_step": 5432, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.34265734265734266, |
|
"eval_loss": 1.5984586477279663, |
|
"eval_runtime": 5.3437, |
|
"eval_samples_per_second": 53.521, |
|
"eval_steps_per_second": 3.368, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_accuracy": 0.33916083916083917, |
|
"eval_loss": 1.5969289541244507, |
|
"eval_runtime": 3.8653, |
|
"eval_samples_per_second": 73.992, |
|
"eval_steps_per_second": 4.657, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 1.0544973611831665, |
|
"learning_rate": 2.4999999999999998e-06, |
|
"loss": 1.5969, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.32867132867132864, |
|
"eval_loss": 1.5943816900253296, |
|
"eval_runtime": 6.1748, |
|
"eval_samples_per_second": 46.317, |
|
"eval_steps_per_second": 2.915, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.3146853146853147, |
|
"eval_loss": 1.5906767845153809, |
|
"eval_runtime": 5.1678, |
|
"eval_samples_per_second": 55.343, |
|
"eval_steps_per_second": 3.483, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"grad_norm": 0.8443157076835632, |
|
"learning_rate": 4.9999999999999996e-06, |
|
"loss": 1.5896, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.2972027972027972, |
|
"eval_loss": 1.5860023498535156, |
|
"eval_runtime": 4.9416, |
|
"eval_samples_per_second": 57.876, |
|
"eval_steps_per_second": 3.643, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_accuracy": 0.2692307692307692, |
|
"eval_loss": 1.5806005001068115, |
|
"eval_runtime": 4.1837, |
|
"eval_samples_per_second": 68.36, |
|
"eval_steps_per_second": 4.302, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"grad_norm": 1.0938074588775635, |
|
"learning_rate": 7.5e-06, |
|
"loss": 1.5743, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.25874125874125875, |
|
"eval_loss": 1.5742768049240112, |
|
"eval_runtime": 7.1914, |
|
"eval_samples_per_second": 39.77, |
|
"eval_steps_per_second": 2.503, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.23426573426573427, |
|
"eval_loss": 1.5664165019989014, |
|
"eval_runtime": 5.6489, |
|
"eval_samples_per_second": 50.629, |
|
"eval_steps_per_second": 3.186, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"grad_norm": 0.9692079424858093, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 1.5508, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.557572841644287, |
|
"eval_runtime": 5.5182, |
|
"eval_samples_per_second": 51.828, |
|
"eval_steps_per_second": 3.262, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5482373237609863, |
|
"eval_runtime": 5.3205, |
|
"eval_samples_per_second": 53.754, |
|
"eval_steps_per_second": 3.383, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"grad_norm": 1.02046799659729, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.5157, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.539355993270874, |
|
"eval_runtime": 6.3116, |
|
"eval_samples_per_second": 45.313, |
|
"eval_steps_per_second": 2.852, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5350520610809326, |
|
"eval_runtime": 4.3422, |
|
"eval_samples_per_second": 65.865, |
|
"eval_steps_per_second": 4.145, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"grad_norm": 1.6058833599090576, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.4534, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5525730848312378, |
|
"eval_runtime": 5.245, |
|
"eval_samples_per_second": 54.528, |
|
"eval_steps_per_second": 3.432, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.599926471710205, |
|
"eval_runtime": 6.0088, |
|
"eval_samples_per_second": 47.597, |
|
"eval_steps_per_second": 2.996, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"grad_norm": 0.8243080377578735, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 1.3638, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.22727272727272727, |
|
"eval_loss": 1.5896875858306885, |
|
"eval_runtime": 4.8752, |
|
"eval_samples_per_second": 58.664, |
|
"eval_steps_per_second": 3.692, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.26573426573426573, |
|
"eval_loss": 1.560091495513916, |
|
"eval_runtime": 5.5082, |
|
"eval_samples_per_second": 51.922, |
|
"eval_steps_per_second": 3.268, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"grad_norm": 0.7977257370948792, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 1.2951, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.2937062937062937, |
|
"eval_loss": 1.5349317789077759, |
|
"eval_runtime": 4.7526, |
|
"eval_samples_per_second": 60.178, |
|
"eval_steps_per_second": 3.787, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 17.98, |
|
"eval_accuracy": 0.34265734265734266, |
|
"eval_loss": 1.5053907632827759, |
|
"eval_runtime": 4.8638, |
|
"eval_samples_per_second": 58.801, |
|
"eval_steps_per_second": 3.701, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"grad_norm": 0.7064552903175354, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.2369, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.3741258741258741, |
|
"eval_loss": 1.4689087867736816, |
|
"eval_runtime": 4.3712, |
|
"eval_samples_per_second": 65.428, |
|
"eval_steps_per_second": 4.118, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.4370629370629371, |
|
"eval_loss": 1.404613971710205, |
|
"eval_runtime": 4.7203, |
|
"eval_samples_per_second": 60.59, |
|
"eval_steps_per_second": 3.813, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"grad_norm": 0.598238468170166, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.1566, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.4405594405594406, |
|
"eval_loss": 1.3691043853759766, |
|
"eval_runtime": 6.6443, |
|
"eval_samples_per_second": 43.044, |
|
"eval_steps_per_second": 2.709, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 21.98, |
|
"eval_accuracy": 0.4825174825174825, |
|
"eval_loss": 1.3120107650756836, |
|
"eval_runtime": 4.9585, |
|
"eval_samples_per_second": 57.679, |
|
"eval_steps_per_second": 3.63, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 22.68, |
|
"grad_norm": 0.682925820350647, |
|
"learning_rate": 2.75e-05, |
|
"loss": 1.0676, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.486013986013986, |
|
"eval_loss": 1.2839338779449463, |
|
"eval_runtime": 4.0382, |
|
"eval_samples_per_second": 70.824, |
|
"eval_steps_per_second": 4.457, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.5104895104895105, |
|
"eval_loss": 1.2549891471862793, |
|
"eval_runtime": 5.1896, |
|
"eval_samples_per_second": 55.11, |
|
"eval_steps_per_second": 3.468, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 24.74, |
|
"grad_norm": 1.1368101835250854, |
|
"learning_rate": 3e-05, |
|
"loss": 0.992, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.5209790209790209, |
|
"eval_loss": 1.2106566429138184, |
|
"eval_runtime": 6.8941, |
|
"eval_samples_per_second": 41.485, |
|
"eval_steps_per_second": 2.611, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 25.98, |
|
"eval_accuracy": 0.5384615384615384, |
|
"eval_loss": 1.1711338758468628, |
|
"eval_runtime": 4.9707, |
|
"eval_samples_per_second": 57.537, |
|
"eval_steps_per_second": 3.621, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"grad_norm": 0.9649831056594849, |
|
"learning_rate": 2.9722222222222223e-05, |
|
"loss": 0.9272, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.5594405594405595, |
|
"eval_loss": 1.1318116188049316, |
|
"eval_runtime": 5.5564, |
|
"eval_samples_per_second": 51.472, |
|
"eval_steps_per_second": 3.24, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6153846153846154, |
|
"eval_loss": 1.0594333410263062, |
|
"eval_runtime": 4.6773, |
|
"eval_samples_per_second": 61.147, |
|
"eval_steps_per_second": 3.848, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 28.87, |
|
"grad_norm": 0.883937418460846, |
|
"learning_rate": 2.9444444444444445e-05, |
|
"loss": 0.8478, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.6013986013986014, |
|
"eval_loss": 1.054669737815857, |
|
"eval_runtime": 4.9219, |
|
"eval_samples_per_second": 58.108, |
|
"eval_steps_per_second": 3.657, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 29.98, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 0.9822685122489929, |
|
"eval_runtime": 6.3133, |
|
"eval_samples_per_second": 45.302, |
|
"eval_steps_per_second": 2.851, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"grad_norm": 1.3742878437042236, |
|
"learning_rate": 2.9166666666666666e-05, |
|
"loss": 0.7627, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.6398601398601399, |
|
"eval_loss": 1.00295090675354, |
|
"eval_runtime": 6.154, |
|
"eval_samples_per_second": 46.473, |
|
"eval_steps_per_second": 2.925, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6608391608391608, |
|
"eval_loss": 0.930969774723053, |
|
"eval_runtime": 5.6747, |
|
"eval_samples_per_second": 50.399, |
|
"eval_steps_per_second": 3.172, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"grad_norm": 1.329268217086792, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.7266, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 32.99, |
|
"eval_accuracy": 0.6678321678321678, |
|
"eval_loss": 0.9228739738464355, |
|
"eval_runtime": 5.382, |
|
"eval_samples_per_second": 53.14, |
|
"eval_steps_per_second": 3.344, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.98, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8684509992599487, |
|
"eval_runtime": 4.8497, |
|
"eval_samples_per_second": 58.973, |
|
"eval_steps_per_second": 3.712, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.6643356643356644, |
|
"eval_loss": 0.8954732418060303, |
|
"eval_runtime": 5.2083, |
|
"eval_samples_per_second": 54.912, |
|
"eval_steps_per_second": 3.456, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 35.05, |
|
"grad_norm": 1.3892701864242554, |
|
"learning_rate": 2.8611111111111113e-05, |
|
"loss": 0.6906, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6713286713286714, |
|
"eval_loss": 0.9125654101371765, |
|
"eval_runtime": 5.3068, |
|
"eval_samples_per_second": 53.894, |
|
"eval_steps_per_second": 3.392, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 36.99, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8543534874916077, |
|
"eval_runtime": 4.3351, |
|
"eval_samples_per_second": 65.974, |
|
"eval_steps_per_second": 4.152, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 37.11, |
|
"grad_norm": 0.836291491985321, |
|
"learning_rate": 2.8333333333333332e-05, |
|
"loss": 0.6721, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.98, |
|
"eval_accuracy": 0.6923076923076923, |
|
"eval_loss": 0.8480322957038879, |
|
"eval_runtime": 5.1861, |
|
"eval_samples_per_second": 55.147, |
|
"eval_steps_per_second": 3.471, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 38.97, |
|
"eval_accuracy": 0.7097902097902098, |
|
"eval_loss": 0.8354606628417969, |
|
"eval_runtime": 6.3247, |
|
"eval_samples_per_second": 45.22, |
|
"eval_steps_per_second": 2.846, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 39.18, |
|
"grad_norm": 1.6499431133270264, |
|
"learning_rate": 2.8055555555555557e-05, |
|
"loss": 0.6442, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8412452340126038, |
|
"eval_runtime": 5.2281, |
|
"eval_samples_per_second": 54.704, |
|
"eval_steps_per_second": 3.443, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 40.99, |
|
"eval_accuracy": 0.6888111888111889, |
|
"eval_loss": 0.8356389999389648, |
|
"eval_runtime": 4.8326, |
|
"eval_samples_per_second": 59.181, |
|
"eval_steps_per_second": 3.725, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 41.24, |
|
"grad_norm": 1.1766818761825562, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.6465, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.98, |
|
"eval_accuracy": 0.7062937062937062, |
|
"eval_loss": 0.8180016875267029, |
|
"eval_runtime": 5.7926, |
|
"eval_samples_per_second": 49.374, |
|
"eval_steps_per_second": 3.107, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 42.97, |
|
"eval_accuracy": 0.7027972027972028, |
|
"eval_loss": 0.8103991150856018, |
|
"eval_runtime": 5.5185, |
|
"eval_samples_per_second": 51.825, |
|
"eval_steps_per_second": 3.262, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 43.3, |
|
"grad_norm": 0.9722403287887573, |
|
"learning_rate": 2.75e-05, |
|
"loss": 0.6086, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8162235617637634, |
|
"eval_runtime": 4.9174, |
|
"eval_samples_per_second": 58.161, |
|
"eval_steps_per_second": 3.66, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 44.99, |
|
"eval_accuracy": 0.7027972027972028, |
|
"eval_loss": 0.7957289218902588, |
|
"eval_runtime": 4.6891, |
|
"eval_samples_per_second": 60.992, |
|
"eval_steps_per_second": 3.839, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 45.36, |
|
"grad_norm": 1.269113302230835, |
|
"learning_rate": 2.7222222222222223e-05, |
|
"loss": 0.5863, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.98, |
|
"eval_accuracy": 0.6958041958041958, |
|
"eval_loss": 0.8143528699874878, |
|
"eval_runtime": 6.6805, |
|
"eval_samples_per_second": 42.811, |
|
"eval_steps_per_second": 2.694, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 46.97, |
|
"eval_accuracy": 0.7027972027972028, |
|
"eval_loss": 0.78568434715271, |
|
"eval_runtime": 4.7422, |
|
"eval_samples_per_second": 60.31, |
|
"eval_steps_per_second": 3.796, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 47.42, |
|
"grad_norm": 0.9775255918502808, |
|
"learning_rate": 2.6944444444444445e-05, |
|
"loss": 0.5877, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7132867132867133, |
|
"eval_loss": 0.7764595150947571, |
|
"eval_runtime": 5.76, |
|
"eval_samples_per_second": 49.653, |
|
"eval_steps_per_second": 3.125, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 48.99, |
|
"eval_accuracy": 0.6993006993006993, |
|
"eval_loss": 0.7881478071212769, |
|
"eval_runtime": 5.4965, |
|
"eval_samples_per_second": 52.033, |
|
"eval_steps_per_second": 3.275, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 49.48, |
|
"grad_norm": 1.540124773979187, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.5629, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.98, |
|
"eval_accuracy": 0.7097902097902098, |
|
"eval_loss": 0.7658265829086304, |
|
"eval_runtime": 4.731, |
|
"eval_samples_per_second": 60.452, |
|
"eval_steps_per_second": 3.805, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 50.97, |
|
"eval_accuracy": 0.7132867132867133, |
|
"eval_loss": 0.7723098397254944, |
|
"eval_runtime": 5.8352, |
|
"eval_samples_per_second": 49.013, |
|
"eval_steps_per_second": 3.085, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 51.55, |
|
"grad_norm": 1.2498500347137451, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.5476, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7097902097902098, |
|
"eval_loss": 0.7603952884674072, |
|
"eval_runtime": 4.448, |
|
"eval_samples_per_second": 64.299, |
|
"eval_steps_per_second": 4.047, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 52.99, |
|
"eval_accuracy": 0.7202797202797203, |
|
"eval_loss": 0.7554137706756592, |
|
"eval_runtime": 6.4218, |
|
"eval_samples_per_second": 44.536, |
|
"eval_steps_per_second": 2.803, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 53.61, |
|
"grad_norm": 0.9919388890266418, |
|
"learning_rate": 2.6116666666666667e-05, |
|
"loss": 0.5357, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.98, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 0.7458928227424622, |
|
"eval_runtime": 5.3791, |
|
"eval_samples_per_second": 53.168, |
|
"eval_steps_per_second": 3.346, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 54.97, |
|
"eval_accuracy": 0.7132867132867133, |
|
"eval_loss": 0.7632877230644226, |
|
"eval_runtime": 5.278, |
|
"eval_samples_per_second": 54.187, |
|
"eval_steps_per_second": 3.41, |
|
"step": 1333 |
|
}, |
|
{ |
|
"epoch": 55.67, |
|
"grad_norm": 1.688183307647705, |
|
"learning_rate": 2.5838888888888892e-05, |
|
"loss": 0.5335, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7167832167832168, |
|
"eval_loss": 0.768308162689209, |
|
"eval_runtime": 5.7022, |
|
"eval_samples_per_second": 50.156, |
|
"eval_steps_per_second": 3.157, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 56.99, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 0.7380541563034058, |
|
"eval_runtime": 4.522, |
|
"eval_samples_per_second": 63.247, |
|
"eval_steps_per_second": 3.981, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 57.73, |
|
"grad_norm": 1.4895784854888916, |
|
"learning_rate": 2.556111111111111e-05, |
|
"loss": 0.5107, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.98, |
|
"eval_accuracy": 0.7377622377622378, |
|
"eval_loss": 0.7308338284492493, |
|
"eval_runtime": 4.4787, |
|
"eval_samples_per_second": 63.857, |
|
"eval_steps_per_second": 4.019, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"eval_accuracy": 0.7237762237762237, |
|
"eval_loss": 0.7441032528877258, |
|
"eval_runtime": 5.8744, |
|
"eval_samples_per_second": 48.685, |
|
"eval_steps_per_second": 3.064, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 59.79, |
|
"grad_norm": 1.4925004243850708, |
|
"learning_rate": 2.5283333333333332e-05, |
|
"loss": 0.5105, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 0.7481815218925476, |
|
"eval_runtime": 7.272, |
|
"eval_samples_per_second": 39.329, |
|
"eval_steps_per_second": 2.475, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 60.99, |
|
"eval_accuracy": 0.7342657342657343, |
|
"eval_loss": 0.733482301235199, |
|
"eval_runtime": 4.6235, |
|
"eval_samples_per_second": 61.858, |
|
"eval_steps_per_second": 3.893, |
|
"step": 1479 |
|
}, |
|
{ |
|
"epoch": 61.86, |
|
"grad_norm": 1.3200663328170776, |
|
"learning_rate": 2.5005555555555558e-05, |
|
"loss": 0.4914, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.98, |
|
"eval_accuracy": 0.7447552447552448, |
|
"eval_loss": 0.7241908311843872, |
|
"eval_runtime": 4.8198, |
|
"eval_samples_per_second": 59.338, |
|
"eval_steps_per_second": 3.735, |
|
"step": 1503 |
|
}, |
|
{ |
|
"epoch": 62.97, |
|
"eval_accuracy": 0.7377622377622378, |
|
"eval_loss": 0.7321043014526367, |
|
"eval_runtime": 5.8929, |
|
"eval_samples_per_second": 48.533, |
|
"eval_steps_per_second": 3.055, |
|
"step": 1527 |
|
}, |
|
{ |
|
"epoch": 63.92, |
|
"grad_norm": 1.1309747695922852, |
|
"learning_rate": 2.472777777777778e-05, |
|
"loss": 0.4839, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7342657342657343, |
|
"eval_loss": 0.7220665216445923, |
|
"eval_runtime": 5.8635, |
|
"eval_samples_per_second": 48.776, |
|
"eval_steps_per_second": 3.07, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 64.99, |
|
"eval_accuracy": 0.7412587412587412, |
|
"eval_loss": 0.7136482000350952, |
|
"eval_runtime": 4.3102, |
|
"eval_samples_per_second": 66.354, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 65.98, |
|
"grad_norm": 1.1314157247543335, |
|
"learning_rate": 2.4449999999999998e-05, |
|
"loss": 0.4751, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.98, |
|
"eval_accuracy": 0.7412587412587412, |
|
"eval_loss": 0.7198111414909363, |
|
"eval_runtime": 4.7841, |
|
"eval_samples_per_second": 59.781, |
|
"eval_steps_per_second": 3.762, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 66.97, |
|
"eval_accuracy": 0.7377622377622378, |
|
"eval_loss": 0.7145721912384033, |
|
"eval_runtime": 6.347, |
|
"eval_samples_per_second": 45.061, |
|
"eval_steps_per_second": 2.836, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7447552447552448, |
|
"eval_loss": 0.6970916390419006, |
|
"eval_runtime": 5.6871, |
|
"eval_samples_per_second": 50.289, |
|
"eval_steps_per_second": 3.165, |
|
"step": 1649 |
|
}, |
|
{ |
|
"epoch": 68.04, |
|
"grad_norm": 2.397585153579712, |
|
"learning_rate": 2.4172222222222223e-05, |
|
"loss": 0.4639, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 68.99, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 0.7201464176177979, |
|
"eval_runtime": 4.4157, |
|
"eval_samples_per_second": 64.769, |
|
"eval_steps_per_second": 4.076, |
|
"step": 1673 |
|
}, |
|
{ |
|
"epoch": 69.98, |
|
"eval_accuracy": 0.7307692307692307, |
|
"eval_loss": 0.7244682312011719, |
|
"eval_runtime": 5.4392, |
|
"eval_samples_per_second": 52.581, |
|
"eval_steps_per_second": 3.309, |
|
"step": 1697 |
|
}, |
|
{ |
|
"epoch": 70.1, |
|
"grad_norm": 2.062610387802124, |
|
"learning_rate": 2.3894444444444445e-05, |
|
"loss": 0.4581, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 70.97, |
|
"eval_accuracy": 0.7447552447552448, |
|
"eval_loss": 0.7077587842941284, |
|
"eval_runtime": 5.1002, |
|
"eval_samples_per_second": 56.076, |
|
"eval_steps_per_second": 3.529, |
|
"step": 1721 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7517482517482518, |
|
"eval_loss": 0.6957913637161255, |
|
"eval_runtime": 4.4485, |
|
"eval_samples_per_second": 64.291, |
|
"eval_steps_per_second": 4.046, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 72.16, |
|
"grad_norm": 2.7808456420898438, |
|
"learning_rate": 2.3616666666666667e-05, |
|
"loss": 0.4643, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 72.99, |
|
"eval_accuracy": 0.7447552447552448, |
|
"eval_loss": 0.7036928534507751, |
|
"eval_runtime": 5.9101, |
|
"eval_samples_per_second": 48.392, |
|
"eval_steps_per_second": 3.046, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 73.98, |
|
"eval_accuracy": 0.7482517482517482, |
|
"eval_loss": 0.71629399061203, |
|
"eval_runtime": 6.0211, |
|
"eval_samples_per_second": 47.5, |
|
"eval_steps_per_second": 2.989, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 74.23, |
|
"grad_norm": 1.78495192527771, |
|
"learning_rate": 2.333888888888889e-05, |
|
"loss": 0.442, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 74.97, |
|
"eval_accuracy": 0.7377622377622378, |
|
"eval_loss": 0.6997957229614258, |
|
"eval_runtime": 4.4212, |
|
"eval_samples_per_second": 64.688, |
|
"eval_steps_per_second": 4.071, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7447552447552448, |
|
"eval_loss": 0.6946483850479126, |
|
"eval_runtime": 4.0507, |
|
"eval_samples_per_second": 70.605, |
|
"eval_steps_per_second": 4.444, |
|
"step": 1843 |
|
}, |
|
{ |
|
"epoch": 76.29, |
|
"grad_norm": 1.7383118867874146, |
|
"learning_rate": 2.306111111111111e-05, |
|
"loss": 0.4305, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 76.99, |
|
"eval_accuracy": 0.7552447552447552, |
|
"eval_loss": 0.6857091784477234, |
|
"eval_runtime": 4.1718, |
|
"eval_samples_per_second": 68.556, |
|
"eval_steps_per_second": 4.315, |
|
"step": 1867 |
|
}, |
|
{ |
|
"epoch": 77.98, |
|
"eval_accuracy": 0.7447552447552448, |
|
"eval_loss": 0.6936307549476624, |
|
"eval_runtime": 3.8781, |
|
"eval_samples_per_second": 73.747, |
|
"eval_steps_per_second": 4.641, |
|
"step": 1891 |
|
}, |
|
{ |
|
"epoch": 78.35, |
|
"grad_norm": 1.047067403793335, |
|
"learning_rate": 2.2783333333333336e-05, |
|
"loss": 0.4416, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 78.97, |
|
"eval_accuracy": 0.7517482517482518, |
|
"eval_loss": 0.6965110301971436, |
|
"eval_runtime": 5.1318, |
|
"eval_samples_per_second": 55.731, |
|
"eval_steps_per_second": 3.508, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7482517482517482, |
|
"eval_loss": 0.7017127871513367, |
|
"eval_runtime": 4.3418, |
|
"eval_samples_per_second": 65.871, |
|
"eval_steps_per_second": 4.146, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 80.41, |
|
"grad_norm": 1.5354928970336914, |
|
"learning_rate": 2.2505555555555554e-05, |
|
"loss": 0.428, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 80.99, |
|
"eval_accuracy": 0.7552447552447552, |
|
"eval_loss": 0.6970596313476562, |
|
"eval_runtime": 5.973, |
|
"eval_samples_per_second": 47.882, |
|
"eval_steps_per_second": 3.014, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 81.98, |
|
"eval_accuracy": 0.7552447552447552, |
|
"eval_loss": 0.6897542476654053, |
|
"eval_runtime": 5.0481, |
|
"eval_samples_per_second": 56.655, |
|
"eval_steps_per_second": 3.566, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 82.47, |
|
"grad_norm": 1.7141317129135132, |
|
"learning_rate": 2.2227777777777776e-05, |
|
"loss": 0.4093, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 82.97, |
|
"eval_accuracy": 0.7482517482517482, |
|
"eval_loss": 0.7004020810127258, |
|
"eval_runtime": 4.1986, |
|
"eval_samples_per_second": 68.118, |
|
"eval_steps_per_second": 4.287, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7552447552447552, |
|
"eval_loss": 0.6867479681968689, |
|
"eval_runtime": 4.6871, |
|
"eval_samples_per_second": 61.018, |
|
"eval_steps_per_second": 3.84, |
|
"step": 2037 |
|
}, |
|
{ |
|
"epoch": 84.54, |
|
"grad_norm": 2.0219666957855225, |
|
"learning_rate": 2.195e-05, |
|
"loss": 0.4148, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 84.99, |
|
"eval_accuracy": 0.7377622377622378, |
|
"eval_loss": 0.7070020437240601, |
|
"eval_runtime": 5.9326, |
|
"eval_samples_per_second": 48.208, |
|
"eval_steps_per_second": 3.034, |
|
"step": 2061 |
|
}, |
|
{ |
|
"epoch": 85.98, |
|
"eval_accuracy": 0.7447552447552448, |
|
"eval_loss": 0.7030305862426758, |
|
"eval_runtime": 5.3564, |
|
"eval_samples_per_second": 53.394, |
|
"eval_steps_per_second": 3.36, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 86.6, |
|
"grad_norm": 1.4678714275360107, |
|
"learning_rate": 2.1672222222222223e-05, |
|
"loss": 0.3923, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 86.97, |
|
"eval_accuracy": 0.7587412587412588, |
|
"eval_loss": 0.678174614906311, |
|
"eval_runtime": 3.9745, |
|
"eval_samples_per_second": 71.96, |
|
"eval_steps_per_second": 4.529, |
|
"step": 2109 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7412587412587412, |
|
"eval_loss": 0.7166118621826172, |
|
"eval_runtime": 4.0358, |
|
"eval_samples_per_second": 70.866, |
|
"eval_steps_per_second": 4.46, |
|
"step": 2134 |
|
}, |
|
{ |
|
"epoch": 88.66, |
|
"grad_norm": 1.589543342590332, |
|
"learning_rate": 2.1394444444444445e-05, |
|
"loss": 0.3964, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"eval_accuracy": 0.7482517482517482, |
|
"eval_loss": 0.7075912952423096, |
|
"eval_runtime": 5.0331, |
|
"eval_samples_per_second": 56.823, |
|
"eval_steps_per_second": 3.576, |
|
"step": 2158 |
|
}, |
|
{ |
|
"epoch": 89.98, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.6867172122001648, |
|
"eval_runtime": 5.386, |
|
"eval_samples_per_second": 53.101, |
|
"eval_steps_per_second": 3.342, |
|
"step": 2182 |
|
}, |
|
{ |
|
"epoch": 90.72, |
|
"grad_norm": 1.3886605501174927, |
|
"learning_rate": 2.1116666666666667e-05, |
|
"loss": 0.3846, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 90.97, |
|
"eval_accuracy": 0.7517482517482518, |
|
"eval_loss": 0.6913285851478577, |
|
"eval_runtime": 5.5324, |
|
"eval_samples_per_second": 51.696, |
|
"eval_steps_per_second": 3.254, |
|
"step": 2206 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.7482517482517482, |
|
"eval_loss": 0.7160294651985168, |
|
"eval_runtime": 5.2753, |
|
"eval_samples_per_second": 54.215, |
|
"eval_steps_per_second": 3.412, |
|
"step": 2231 |
|
}, |
|
{ |
|
"epoch": 92.78, |
|
"grad_norm": 2.4106783866882324, |
|
"learning_rate": 2.083888888888889e-05, |
|
"loss": 0.3654, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 92.99, |
|
"eval_accuracy": 0.7517482517482518, |
|
"eval_loss": 0.6765207052230835, |
|
"eval_runtime": 5.5671, |
|
"eval_samples_per_second": 51.373, |
|
"eval_steps_per_second": 3.233, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 93.98, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.6881967186927795, |
|
"eval_runtime": 3.8228, |
|
"eval_samples_per_second": 74.814, |
|
"eval_steps_per_second": 4.709, |
|
"step": 2279 |
|
}, |
|
{ |
|
"epoch": 94.85, |
|
"grad_norm": 0.8871183395385742, |
|
"learning_rate": 2.0561111111111114e-05, |
|
"loss": 0.3577, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 94.97, |
|
"eval_accuracy": 0.7552447552447552, |
|
"eval_loss": 0.6852585673332214, |
|
"eval_runtime": 4.7228, |
|
"eval_samples_per_second": 60.557, |
|
"eval_steps_per_second": 3.811, |
|
"step": 2303 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.7552447552447552, |
|
"eval_loss": 0.7158808708190918, |
|
"eval_runtime": 5.6504, |
|
"eval_samples_per_second": 50.616, |
|
"eval_steps_per_second": 3.186, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 96.91, |
|
"grad_norm": 1.0019863843917847, |
|
"learning_rate": 2.0283333333333333e-05, |
|
"loss": 0.37, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 96.99, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.6943120360374451, |
|
"eval_runtime": 4.8337, |
|
"eval_samples_per_second": 59.168, |
|
"eval_steps_per_second": 3.724, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 97.98, |
|
"eval_accuracy": 0.7587412587412588, |
|
"eval_loss": 0.7010317444801331, |
|
"eval_runtime": 4.6874, |
|
"eval_samples_per_second": 61.015, |
|
"eval_steps_per_second": 3.84, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 98.97, |
|
"grad_norm": 1.2908928394317627, |
|
"learning_rate": 2.0005555555555555e-05, |
|
"loss": 0.3473, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 98.97, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.693758487701416, |
|
"eval_runtime": 4.7585, |
|
"eval_samples_per_second": 60.103, |
|
"eval_steps_per_second": 3.783, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.7587412587412588, |
|
"eval_loss": 0.6918778419494629, |
|
"eval_runtime": 6.6891, |
|
"eval_samples_per_second": 42.756, |
|
"eval_steps_per_second": 2.691, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 100.99, |
|
"eval_accuracy": 0.7552447552447552, |
|
"eval_loss": 0.6849302053451538, |
|
"eval_runtime": 4.4685, |
|
"eval_samples_per_second": 64.003, |
|
"eval_steps_per_second": 4.028, |
|
"step": 2449 |
|
}, |
|
{ |
|
"epoch": 101.03, |
|
"grad_norm": 1.1730871200561523, |
|
"learning_rate": 1.972777777777778e-05, |
|
"loss": 0.3587, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 101.98, |
|
"eval_accuracy": 0.7587412587412588, |
|
"eval_loss": 0.6855939030647278, |
|
"eval_runtime": 4.3434, |
|
"eval_samples_per_second": 65.847, |
|
"eval_steps_per_second": 4.144, |
|
"step": 2473 |
|
}, |
|
{ |
|
"epoch": 102.97, |
|
"eval_accuracy": 0.7517482517482518, |
|
"eval_loss": 0.7046144604682922, |
|
"eval_runtime": 4.7166, |
|
"eval_samples_per_second": 60.637, |
|
"eval_steps_per_second": 3.816, |
|
"step": 2497 |
|
}, |
|
{ |
|
"epoch": 103.09, |
|
"grad_norm": 1.3693217039108276, |
|
"learning_rate": 1.945e-05, |
|
"loss": 0.3429, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.6892997622489929, |
|
"eval_runtime": 5.3868, |
|
"eval_samples_per_second": 53.092, |
|
"eval_steps_per_second": 3.341, |
|
"step": 2522 |
|
}, |
|
{ |
|
"epoch": 104.99, |
|
"eval_accuracy": 0.7622377622377622, |
|
"eval_loss": 0.6913393139839172, |
|
"eval_runtime": 5.09, |
|
"eval_samples_per_second": 56.188, |
|
"eval_steps_per_second": 3.536, |
|
"step": 2546 |
|
}, |
|
{ |
|
"epoch": 105.15, |
|
"grad_norm": 1.923829436302185, |
|
"learning_rate": 1.9172222222222224e-05, |
|
"loss": 0.3549, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 105.98, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.6880810856819153, |
|
"eval_runtime": 4.6668, |
|
"eval_samples_per_second": 61.283, |
|
"eval_steps_per_second": 3.857, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 106.97, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.7097887396812439, |
|
"eval_runtime": 6.4652, |
|
"eval_samples_per_second": 44.237, |
|
"eval_steps_per_second": 2.784, |
|
"step": 2594 |
|
}, |
|
{ |
|
"epoch": 107.22, |
|
"grad_norm": 2.702012062072754, |
|
"learning_rate": 1.8894444444444446e-05, |
|
"loss": 0.3403, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.6878336668014526, |
|
"eval_runtime": 4.6923, |
|
"eval_samples_per_second": 60.951, |
|
"eval_steps_per_second": 3.836, |
|
"step": 2619 |
|
}, |
|
{ |
|
"epoch": 108.99, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.695954442024231, |
|
"eval_runtime": 4.4809, |
|
"eval_samples_per_second": 63.827, |
|
"eval_steps_per_second": 4.017, |
|
"step": 2643 |
|
}, |
|
{ |
|
"epoch": 109.28, |
|
"grad_norm": 2.3427536487579346, |
|
"learning_rate": 1.8616666666666667e-05, |
|
"loss": 0.3253, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 109.98, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.7005948424339294, |
|
"eval_runtime": 4.8882, |
|
"eval_samples_per_second": 58.508, |
|
"eval_steps_per_second": 3.682, |
|
"step": 2667 |
|
}, |
|
{ |
|
"epoch": 110.97, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.6916196346282959, |
|
"eval_runtime": 5.2891, |
|
"eval_samples_per_second": 54.073, |
|
"eval_steps_per_second": 3.403, |
|
"step": 2691 |
|
}, |
|
{ |
|
"epoch": 111.34, |
|
"grad_norm": 2.178089141845703, |
|
"learning_rate": 1.833888888888889e-05, |
|
"loss": 0.3332, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.7059447765350342, |
|
"eval_runtime": 4.7437, |
|
"eval_samples_per_second": 60.291, |
|
"eval_steps_per_second": 3.795, |
|
"step": 2716 |
|
}, |
|
{ |
|
"epoch": 112.99, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.6904045939445496, |
|
"eval_runtime": 4.9942, |
|
"eval_samples_per_second": 57.267, |
|
"eval_steps_per_second": 3.604, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 113.4, |
|
"grad_norm": 1.1625444889068604, |
|
"learning_rate": 1.806111111111111e-05, |
|
"loss": 0.3188, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 113.98, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.6970774531364441, |
|
"eval_runtime": 6.4809, |
|
"eval_samples_per_second": 44.13, |
|
"eval_steps_per_second": 2.777, |
|
"step": 2764 |
|
}, |
|
{ |
|
"epoch": 114.97, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.700820803642273, |
|
"eval_runtime": 5.2617, |
|
"eval_samples_per_second": 54.355, |
|
"eval_steps_per_second": 3.421, |
|
"step": 2788 |
|
}, |
|
{ |
|
"epoch": 115.46, |
|
"grad_norm": 1.2394715547561646, |
|
"learning_rate": 1.7783333333333333e-05, |
|
"loss": 0.3112, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7002130150794983, |
|
"eval_runtime": 5.0937, |
|
"eval_samples_per_second": 56.147, |
|
"eval_steps_per_second": 3.534, |
|
"step": 2813 |
|
}, |
|
{ |
|
"epoch": 116.99, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.6909505724906921, |
|
"eval_runtime": 4.7575, |
|
"eval_samples_per_second": 60.116, |
|
"eval_steps_per_second": 3.784, |
|
"step": 2837 |
|
}, |
|
{ |
|
"epoch": 117.53, |
|
"grad_norm": 2.4334964752197266, |
|
"learning_rate": 1.7505555555555558e-05, |
|
"loss": 0.3153, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 117.98, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.6957750916481018, |
|
"eval_runtime": 4.8105, |
|
"eval_samples_per_second": 59.453, |
|
"eval_steps_per_second": 3.742, |
|
"step": 2861 |
|
}, |
|
{ |
|
"epoch": 118.97, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.6867520213127136, |
|
"eval_runtime": 4.5411, |
|
"eval_samples_per_second": 62.98, |
|
"eval_steps_per_second": 3.964, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 119.59, |
|
"grad_norm": 0.769097089767456, |
|
"learning_rate": 1.7227777777777777e-05, |
|
"loss": 0.3006, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.6890790462493896, |
|
"eval_runtime": 4.5864, |
|
"eval_samples_per_second": 62.358, |
|
"eval_steps_per_second": 3.925, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 120.99, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.6889089941978455, |
|
"eval_runtime": 6.5804, |
|
"eval_samples_per_second": 43.462, |
|
"eval_steps_per_second": 2.735, |
|
"step": 2934 |
|
}, |
|
{ |
|
"epoch": 121.65, |
|
"grad_norm": 1.8714542388916016, |
|
"learning_rate": 1.695e-05, |
|
"loss": 0.2967, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 121.98, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.6935350894927979, |
|
"eval_runtime": 4.7491, |
|
"eval_samples_per_second": 60.223, |
|
"eval_steps_per_second": 3.79, |
|
"step": 2958 |
|
}, |
|
{ |
|
"epoch": 122.97, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.7058219909667969, |
|
"eval_runtime": 4.8941, |
|
"eval_samples_per_second": 58.438, |
|
"eval_steps_per_second": 3.678, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 123.71, |
|
"grad_norm": 2.062924385070801, |
|
"learning_rate": 1.6672222222222224e-05, |
|
"loss": 0.2939, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.7220865488052368, |
|
"eval_runtime": 5.0487, |
|
"eval_samples_per_second": 56.648, |
|
"eval_steps_per_second": 3.565, |
|
"step": 3007 |
|
}, |
|
{ |
|
"epoch": 124.99, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.6857044696807861, |
|
"eval_runtime": 5.6134, |
|
"eval_samples_per_second": 50.95, |
|
"eval_steps_per_second": 3.207, |
|
"step": 3031 |
|
}, |
|
{ |
|
"epoch": 125.77, |
|
"grad_norm": 1.7039302587509155, |
|
"learning_rate": 1.6394444444444446e-05, |
|
"loss": 0.3101, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 125.98, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.6742061972618103, |
|
"eval_runtime": 5.3609, |
|
"eval_samples_per_second": 53.349, |
|
"eval_steps_per_second": 3.358, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 126.97, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.7029407620429993, |
|
"eval_runtime": 5.8891, |
|
"eval_samples_per_second": 48.564, |
|
"eval_steps_per_second": 3.056, |
|
"step": 3079 |
|
}, |
|
{ |
|
"epoch": 127.84, |
|
"grad_norm": 1.434970736503601, |
|
"learning_rate": 1.6116666666666668e-05, |
|
"loss": 0.284, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.682050347328186, |
|
"eval_runtime": 5.1437, |
|
"eval_samples_per_second": 55.602, |
|
"eval_steps_per_second": 3.499, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 128.99, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.68370121717453, |
|
"eval_runtime": 4.2733, |
|
"eval_samples_per_second": 66.927, |
|
"eval_steps_per_second": 4.212, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 129.9, |
|
"grad_norm": 1.320789098739624, |
|
"learning_rate": 1.583888888888889e-05, |
|
"loss": 0.2902, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 129.98, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.6823462843894958, |
|
"eval_runtime": 5.7566, |
|
"eval_samples_per_second": 49.682, |
|
"eval_steps_per_second": 3.127, |
|
"step": 3152 |
|
}, |
|
{ |
|
"epoch": 130.97, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.6950440406799316, |
|
"eval_runtime": 4.9248, |
|
"eval_samples_per_second": 58.074, |
|
"eval_steps_per_second": 3.655, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 131.96, |
|
"grad_norm": 2.1280930042266846, |
|
"learning_rate": 1.556111111111111e-05, |
|
"loss": 0.301, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.6800761818885803, |
|
"eval_runtime": 8.1328, |
|
"eval_samples_per_second": 35.166, |
|
"eval_steps_per_second": 2.213, |
|
"step": 3201 |
|
}, |
|
{ |
|
"epoch": 132.99, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.6867505311965942, |
|
"eval_runtime": 4.2532, |
|
"eval_samples_per_second": 67.244, |
|
"eval_steps_per_second": 4.232, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 133.98, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7061284184455872, |
|
"eval_runtime": 5.3031, |
|
"eval_samples_per_second": 53.93, |
|
"eval_steps_per_second": 3.394, |
|
"step": 3249 |
|
}, |
|
{ |
|
"epoch": 134.02, |
|
"grad_norm": 1.532638669013977, |
|
"learning_rate": 1.5283333333333333e-05, |
|
"loss": 0.2736, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 134.97, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.7114368677139282, |
|
"eval_runtime": 4.6536, |
|
"eval_samples_per_second": 61.458, |
|
"eval_steps_per_second": 3.868, |
|
"step": 3273 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.6914551854133606, |
|
"eval_runtime": 4.5505, |
|
"eval_samples_per_second": 62.851, |
|
"eval_steps_per_second": 3.956, |
|
"step": 3298 |
|
}, |
|
{ |
|
"epoch": 136.08, |
|
"grad_norm": 2.0108492374420166, |
|
"learning_rate": 1.5005555555555555e-05, |
|
"loss": 0.2931, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 136.99, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7055917978286743, |
|
"eval_runtime": 5.3067, |
|
"eval_samples_per_second": 53.894, |
|
"eval_steps_per_second": 3.392, |
|
"step": 3322 |
|
}, |
|
{ |
|
"epoch": 137.98, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.7026935815811157, |
|
"eval_runtime": 5.186, |
|
"eval_samples_per_second": 55.149, |
|
"eval_steps_per_second": 3.471, |
|
"step": 3346 |
|
}, |
|
{ |
|
"epoch": 138.14, |
|
"grad_norm": 1.0804469585418701, |
|
"learning_rate": 1.4727777777777779e-05, |
|
"loss": 0.2864, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 138.97, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.6983500719070435, |
|
"eval_runtime": 6.955, |
|
"eval_samples_per_second": 41.122, |
|
"eval_steps_per_second": 2.588, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.7168787121772766, |
|
"eval_runtime": 4.234, |
|
"eval_samples_per_second": 67.548, |
|
"eval_steps_per_second": 4.251, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 140.21, |
|
"grad_norm": 2.370694637298584, |
|
"learning_rate": 1.445e-05, |
|
"loss": 0.2765, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 140.99, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.6960318088531494, |
|
"eval_runtime": 5.0294, |
|
"eval_samples_per_second": 56.865, |
|
"eval_steps_per_second": 3.579, |
|
"step": 3419 |
|
}, |
|
{ |
|
"epoch": 141.98, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.6990492343902588, |
|
"eval_runtime": 5.2727, |
|
"eval_samples_per_second": 54.242, |
|
"eval_steps_per_second": 3.414, |
|
"step": 3443 |
|
}, |
|
{ |
|
"epoch": 142.27, |
|
"grad_norm": 1.6676194667816162, |
|
"learning_rate": 1.4172222222222222e-05, |
|
"loss": 0.2808, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 142.97, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.706200897693634, |
|
"eval_runtime": 4.5273, |
|
"eval_samples_per_second": 63.173, |
|
"eval_steps_per_second": 3.976, |
|
"step": 3467 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.6821764707565308, |
|
"eval_runtime": 5.3614, |
|
"eval_samples_per_second": 53.344, |
|
"eval_steps_per_second": 3.357, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 144.33, |
|
"grad_norm": 1.9151145219802856, |
|
"learning_rate": 1.3894444444444444e-05, |
|
"loss": 0.2712, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 144.99, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.7063603401184082, |
|
"eval_runtime": 4.9088, |
|
"eval_samples_per_second": 58.263, |
|
"eval_steps_per_second": 3.667, |
|
"step": 3516 |
|
}, |
|
{ |
|
"epoch": 145.98, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.7150112390518188, |
|
"eval_runtime": 7.2044, |
|
"eval_samples_per_second": 39.698, |
|
"eval_steps_per_second": 2.498, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 146.39, |
|
"grad_norm": 1.5093848705291748, |
|
"learning_rate": 1.3622222222222223e-05, |
|
"loss": 0.2726, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 146.97, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.696849524974823, |
|
"eval_runtime": 4.9386, |
|
"eval_samples_per_second": 57.911, |
|
"eval_steps_per_second": 3.645, |
|
"step": 3564 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.7086759209632874, |
|
"eval_runtime": 4.4363, |
|
"eval_samples_per_second": 64.468, |
|
"eval_steps_per_second": 4.057, |
|
"step": 3589 |
|
}, |
|
{ |
|
"epoch": 148.45, |
|
"grad_norm": 1.4403679370880127, |
|
"learning_rate": 1.3344444444444444e-05, |
|
"loss": 0.2607, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 148.99, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.7129560112953186, |
|
"eval_runtime": 5.3809, |
|
"eval_samples_per_second": 53.15, |
|
"eval_steps_per_second": 3.345, |
|
"step": 3613 |
|
}, |
|
{ |
|
"epoch": 149.98, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7080287933349609, |
|
"eval_runtime": 5.8187, |
|
"eval_samples_per_second": 49.152, |
|
"eval_steps_per_second": 3.093, |
|
"step": 3637 |
|
}, |
|
{ |
|
"epoch": 150.52, |
|
"grad_norm": 2.036515235900879, |
|
"learning_rate": 1.3066666666666666e-05, |
|
"loss": 0.2546, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 150.97, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.7088435888290405, |
|
"eval_runtime": 4.8742, |
|
"eval_samples_per_second": 58.677, |
|
"eval_steps_per_second": 3.693, |
|
"step": 3661 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7030193209648132, |
|
"eval_runtime": 4.9492, |
|
"eval_samples_per_second": 57.787, |
|
"eval_steps_per_second": 3.637, |
|
"step": 3686 |
|
}, |
|
{ |
|
"epoch": 152.58, |
|
"grad_norm": 1.200052261352539, |
|
"learning_rate": 1.2788888888888888e-05, |
|
"loss": 0.2563, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 152.99, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.7077969908714294, |
|
"eval_runtime": 4.614, |
|
"eval_samples_per_second": 61.985, |
|
"eval_steps_per_second": 3.901, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 153.98, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.700455904006958, |
|
"eval_runtime": 5.7657, |
|
"eval_samples_per_second": 49.604, |
|
"eval_steps_per_second": 3.122, |
|
"step": 3734 |
|
}, |
|
{ |
|
"epoch": 154.64, |
|
"grad_norm": 2.2751214504241943, |
|
"learning_rate": 1.2511111111111112e-05, |
|
"loss": 0.2531, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 154.97, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.7160292267799377, |
|
"eval_runtime": 5.1079, |
|
"eval_samples_per_second": 55.992, |
|
"eval_steps_per_second": 3.524, |
|
"step": 3758 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7175909876823425, |
|
"eval_runtime": 5.4035, |
|
"eval_samples_per_second": 52.929, |
|
"eval_steps_per_second": 3.331, |
|
"step": 3783 |
|
}, |
|
{ |
|
"epoch": 156.7, |
|
"grad_norm": 1.9024412631988525, |
|
"learning_rate": 1.2233333333333334e-05, |
|
"loss": 0.2446, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 156.99, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.7190600037574768, |
|
"eval_runtime": 4.3633, |
|
"eval_samples_per_second": 65.546, |
|
"eval_steps_per_second": 4.125, |
|
"step": 3807 |
|
}, |
|
{ |
|
"epoch": 157.98, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.719641387462616, |
|
"eval_runtime": 5.0426, |
|
"eval_samples_per_second": 56.717, |
|
"eval_steps_per_second": 3.57, |
|
"step": 3831 |
|
}, |
|
{ |
|
"epoch": 158.76, |
|
"grad_norm": 3.471806287765503, |
|
"learning_rate": 1.1955555555555556e-05, |
|
"loss": 0.2479, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 158.97, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7073430418968201, |
|
"eval_runtime": 3.6336, |
|
"eval_samples_per_second": 78.711, |
|
"eval_steps_per_second": 4.954, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7328661680221558, |
|
"eval_runtime": 5.2625, |
|
"eval_samples_per_second": 54.347, |
|
"eval_steps_per_second": 3.42, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 160.82, |
|
"grad_norm": 2.1171793937683105, |
|
"learning_rate": 1.1677777777777777e-05, |
|
"loss": 0.2523, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 160.99, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7158821821212769, |
|
"eval_runtime": 6.5877, |
|
"eval_samples_per_second": 43.414, |
|
"eval_steps_per_second": 2.732, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 161.98, |
|
"eval_accuracy": 0.7692307692307693, |
|
"eval_loss": 0.719171404838562, |
|
"eval_runtime": 4.5674, |
|
"eval_samples_per_second": 62.618, |
|
"eval_steps_per_second": 3.941, |
|
"step": 3928 |
|
}, |
|
{ |
|
"epoch": 162.89, |
|
"grad_norm": 1.7515395879745483, |
|
"learning_rate": 1.1400000000000001e-05, |
|
"loss": 0.2523, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 162.97, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.7281435132026672, |
|
"eval_runtime": 4.4866, |
|
"eval_samples_per_second": 63.746, |
|
"eval_steps_per_second": 4.012, |
|
"step": 3952 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7078841328620911, |
|
"eval_runtime": 4.4241, |
|
"eval_samples_per_second": 64.645, |
|
"eval_steps_per_second": 4.069, |
|
"step": 3977 |
|
}, |
|
{ |
|
"epoch": 164.95, |
|
"grad_norm": 1.456335186958313, |
|
"learning_rate": 1.1122222222222223e-05, |
|
"loss": 0.2422, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 164.99, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.7161521911621094, |
|
"eval_runtime": 5.1239, |
|
"eval_samples_per_second": 55.817, |
|
"eval_steps_per_second": 3.513, |
|
"step": 4001 |
|
}, |
|
{ |
|
"epoch": 165.98, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7190020084381104, |
|
"eval_runtime": 3.4488, |
|
"eval_samples_per_second": 82.926, |
|
"eval_steps_per_second": 5.219, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 166.97, |
|
"eval_accuracy": 0.7762237762237763, |
|
"eval_loss": 0.7311248779296875, |
|
"eval_runtime": 5.0389, |
|
"eval_samples_per_second": 56.759, |
|
"eval_steps_per_second": 3.572, |
|
"step": 4049 |
|
}, |
|
{ |
|
"epoch": 167.01, |
|
"grad_norm": 1.2554075717926025, |
|
"learning_rate": 1.0844444444444445e-05, |
|
"loss": 0.242, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7110462188720703, |
|
"eval_runtime": 4.4612, |
|
"eval_samples_per_second": 64.108, |
|
"eval_steps_per_second": 4.035, |
|
"step": 4074 |
|
}, |
|
{ |
|
"epoch": 168.99, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7028501629829407, |
|
"eval_runtime": 6.955, |
|
"eval_samples_per_second": 41.122, |
|
"eval_steps_per_second": 2.588, |
|
"step": 4098 |
|
}, |
|
{ |
|
"epoch": 169.07, |
|
"grad_norm": 2.8003265857696533, |
|
"learning_rate": 1.0566666666666667e-05, |
|
"loss": 0.2392, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 169.98, |
|
"eval_accuracy": 0.7937062937062938, |
|
"eval_loss": 0.7108554840087891, |
|
"eval_runtime": 5.0033, |
|
"eval_samples_per_second": 57.162, |
|
"eval_steps_per_second": 3.598, |
|
"step": 4122 |
|
}, |
|
{ |
|
"epoch": 170.97, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7106384634971619, |
|
"eval_runtime": 5.1984, |
|
"eval_samples_per_second": 55.017, |
|
"eval_steps_per_second": 3.463, |
|
"step": 4146 |
|
}, |
|
{ |
|
"epoch": 171.13, |
|
"grad_norm": 2.1897969245910645, |
|
"learning_rate": 1.028888888888889e-05, |
|
"loss": 0.247, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7151694297790527, |
|
"eval_runtime": 5.1963, |
|
"eval_samples_per_second": 55.039, |
|
"eval_steps_per_second": 3.464, |
|
"step": 4171 |
|
}, |
|
{ |
|
"epoch": 172.99, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_loss": 0.7254167795181274, |
|
"eval_runtime": 4.4466, |
|
"eval_samples_per_second": 64.319, |
|
"eval_steps_per_second": 4.048, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 173.2, |
|
"grad_norm": 2.769357681274414, |
|
"learning_rate": 1.0011111111111112e-05, |
|
"loss": 0.2341, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 173.98, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7290962338447571, |
|
"eval_runtime": 6.2221, |
|
"eval_samples_per_second": 45.965, |
|
"eval_steps_per_second": 2.893, |
|
"step": 4219 |
|
}, |
|
{ |
|
"epoch": 174.97, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7088623046875, |
|
"eval_runtime": 4.3709, |
|
"eval_samples_per_second": 65.433, |
|
"eval_steps_per_second": 4.118, |
|
"step": 4243 |
|
}, |
|
{ |
|
"epoch": 175.26, |
|
"grad_norm": 2.044703483581543, |
|
"learning_rate": 9.733333333333332e-06, |
|
"loss": 0.2317, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7185826897621155, |
|
"eval_runtime": 5.4095, |
|
"eval_samples_per_second": 52.87, |
|
"eval_steps_per_second": 3.327, |
|
"step": 4268 |
|
}, |
|
{ |
|
"epoch": 176.99, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7167823314666748, |
|
"eval_runtime": 4.9506, |
|
"eval_samples_per_second": 57.77, |
|
"eval_steps_per_second": 3.636, |
|
"step": 4292 |
|
}, |
|
{ |
|
"epoch": 177.32, |
|
"grad_norm": 1.078834056854248, |
|
"learning_rate": 9.455555555555556e-06, |
|
"loss": 0.2269, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 177.98, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7237738966941833, |
|
"eval_runtime": 4.781, |
|
"eval_samples_per_second": 59.82, |
|
"eval_steps_per_second": 3.765, |
|
"step": 4316 |
|
}, |
|
{ |
|
"epoch": 178.97, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7131801247596741, |
|
"eval_runtime": 4.6869, |
|
"eval_samples_per_second": 61.022, |
|
"eval_steps_per_second": 3.841, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 179.38, |
|
"grad_norm": 2.008120536804199, |
|
"learning_rate": 9.177777777777778e-06, |
|
"loss": 0.2283, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7384253144264221, |
|
"eval_runtime": 4.5879, |
|
"eval_samples_per_second": 62.338, |
|
"eval_steps_per_second": 3.923, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 180.99, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7002861499786377, |
|
"eval_runtime": 5.3238, |
|
"eval_samples_per_second": 53.721, |
|
"eval_steps_per_second": 3.381, |
|
"step": 4389 |
|
}, |
|
{ |
|
"epoch": 181.44, |
|
"grad_norm": 1.9518792629241943, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 0.2303, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 181.98, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7278482913970947, |
|
"eval_runtime": 5.8358, |
|
"eval_samples_per_second": 49.008, |
|
"eval_steps_per_second": 3.084, |
|
"step": 4413 |
|
}, |
|
{ |
|
"epoch": 182.97, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7143127918243408, |
|
"eval_runtime": 6.1229, |
|
"eval_samples_per_second": 46.71, |
|
"eval_steps_per_second": 2.94, |
|
"step": 4437 |
|
}, |
|
{ |
|
"epoch": 183.51, |
|
"grad_norm": 1.0936890840530396, |
|
"learning_rate": 8.622222222222221e-06, |
|
"loss": 0.2109, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7406834363937378, |
|
"eval_runtime": 5.0467, |
|
"eval_samples_per_second": 56.671, |
|
"eval_steps_per_second": 3.567, |
|
"step": 4462 |
|
}, |
|
{ |
|
"epoch": 184.99, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7053534388542175, |
|
"eval_runtime": 5.279, |
|
"eval_samples_per_second": 54.177, |
|
"eval_steps_per_second": 3.41, |
|
"step": 4486 |
|
}, |
|
{ |
|
"epoch": 185.57, |
|
"grad_norm": 2.9350059032440186, |
|
"learning_rate": 8.344444444444445e-06, |
|
"loss": 0.2261, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 185.98, |
|
"eval_accuracy": 0.7727272727272727, |
|
"eval_loss": 0.7260809540748596, |
|
"eval_runtime": 5.4165, |
|
"eval_samples_per_second": 52.802, |
|
"eval_steps_per_second": 3.323, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 186.97, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7240064144134521, |
|
"eval_runtime": 5.4866, |
|
"eval_samples_per_second": 52.127, |
|
"eval_steps_per_second": 3.281, |
|
"step": 4534 |
|
}, |
|
{ |
|
"epoch": 187.63, |
|
"grad_norm": 1.8322782516479492, |
|
"learning_rate": 8.066666666666667e-06, |
|
"loss": 0.2282, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7199599146842957, |
|
"eval_runtime": 4.6736, |
|
"eval_samples_per_second": 61.195, |
|
"eval_steps_per_second": 3.851, |
|
"step": 4559 |
|
}, |
|
{ |
|
"epoch": 188.99, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7102844715118408, |
|
"eval_runtime": 5.4219, |
|
"eval_samples_per_second": 52.749, |
|
"eval_steps_per_second": 3.32, |
|
"step": 4583 |
|
}, |
|
{ |
|
"epoch": 189.69, |
|
"grad_norm": 1.8777916431427002, |
|
"learning_rate": 7.78888888888889e-06, |
|
"loss": 0.2321, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 189.98, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7083376049995422, |
|
"eval_runtime": 5.9634, |
|
"eval_samples_per_second": 47.959, |
|
"eval_steps_per_second": 3.018, |
|
"step": 4607 |
|
}, |
|
{ |
|
"epoch": 190.97, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7244677543640137, |
|
"eval_runtime": 5.2078, |
|
"eval_samples_per_second": 54.918, |
|
"eval_steps_per_second": 3.456, |
|
"step": 4631 |
|
}, |
|
{ |
|
"epoch": 191.75, |
|
"grad_norm": 1.5277408361434937, |
|
"learning_rate": 7.5111111111111105e-06, |
|
"loss": 0.2261, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7124583721160889, |
|
"eval_runtime": 5.7079, |
|
"eval_samples_per_second": 50.106, |
|
"eval_steps_per_second": 3.154, |
|
"step": 4656 |
|
}, |
|
{ |
|
"epoch": 192.99, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7308976054191589, |
|
"eval_runtime": 5.3404, |
|
"eval_samples_per_second": 53.554, |
|
"eval_steps_per_second": 3.371, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 193.81, |
|
"grad_norm": 2.095749616622925, |
|
"learning_rate": 7.233333333333333e-06, |
|
"loss": 0.2231, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 193.98, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7237818837165833, |
|
"eval_runtime": 4.6666, |
|
"eval_samples_per_second": 61.286, |
|
"eval_steps_per_second": 3.857, |
|
"step": 4704 |
|
}, |
|
{ |
|
"epoch": 194.97, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7253320217132568, |
|
"eval_runtime": 5.8059, |
|
"eval_samples_per_second": 49.261, |
|
"eval_steps_per_second": 3.1, |
|
"step": 4728 |
|
}, |
|
{ |
|
"epoch": 195.88, |
|
"grad_norm": 1.6955636739730835, |
|
"learning_rate": 6.955555555555556e-06, |
|
"loss": 0.2083, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7240011692047119, |
|
"eval_runtime": 6.0767, |
|
"eval_samples_per_second": 47.065, |
|
"eval_steps_per_second": 2.962, |
|
"step": 4753 |
|
}, |
|
{ |
|
"epoch": 196.99, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7131750583648682, |
|
"eval_runtime": 5.3063, |
|
"eval_samples_per_second": 53.898, |
|
"eval_steps_per_second": 3.392, |
|
"step": 4777 |
|
}, |
|
{ |
|
"epoch": 197.94, |
|
"grad_norm": 0.8933289051055908, |
|
"learning_rate": 6.677777777777778e-06, |
|
"loss": 0.2116, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 197.98, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7169559597969055, |
|
"eval_runtime": 5.5713, |
|
"eval_samples_per_second": 51.335, |
|
"eval_steps_per_second": 3.231, |
|
"step": 4801 |
|
}, |
|
{ |
|
"epoch": 198.97, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7265609502792358, |
|
"eval_runtime": 4.1397, |
|
"eval_samples_per_second": 69.087, |
|
"eval_steps_per_second": 4.348, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"grad_norm": 2.175414562225342, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.2219, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7162622213363647, |
|
"eval_runtime": 5.2016, |
|
"eval_samples_per_second": 54.984, |
|
"eval_steps_per_second": 3.461, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 200.99, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7302048802375793, |
|
"eval_runtime": 4.9222, |
|
"eval_samples_per_second": 58.104, |
|
"eval_steps_per_second": 3.657, |
|
"step": 4874 |
|
}, |
|
{ |
|
"epoch": 201.98, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7223746180534363, |
|
"eval_runtime": 4.6884, |
|
"eval_samples_per_second": 61.002, |
|
"eval_steps_per_second": 3.839, |
|
"step": 4898 |
|
}, |
|
{ |
|
"epoch": 202.06, |
|
"grad_norm": 2.053739309310913, |
|
"learning_rate": 6.1222222222222224e-06, |
|
"loss": 0.2183, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 202.97, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7179226279258728, |
|
"eval_runtime": 4.5556, |
|
"eval_samples_per_second": 62.78, |
|
"eval_steps_per_second": 3.951, |
|
"step": 4922 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7245286107063293, |
|
"eval_runtime": 5.7474, |
|
"eval_samples_per_second": 49.762, |
|
"eval_steps_per_second": 3.132, |
|
"step": 4947 |
|
}, |
|
{ |
|
"epoch": 204.12, |
|
"grad_norm": 1.1081063747406006, |
|
"learning_rate": 5.844444444444444e-06, |
|
"loss": 0.2053, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 204.99, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7344977259635925, |
|
"eval_runtime": 5.4178, |
|
"eval_samples_per_second": 52.789, |
|
"eval_steps_per_second": 3.322, |
|
"step": 4971 |
|
}, |
|
{ |
|
"epoch": 205.98, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7249557971954346, |
|
"eval_runtime": 5.6352, |
|
"eval_samples_per_second": 50.753, |
|
"eval_steps_per_second": 3.194, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 206.19, |
|
"grad_norm": 1.09213125705719, |
|
"learning_rate": 5.566666666666667e-06, |
|
"loss": 0.2113, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 206.97, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7246001958847046, |
|
"eval_runtime": 4.9071, |
|
"eval_samples_per_second": 58.283, |
|
"eval_steps_per_second": 3.668, |
|
"step": 5019 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7270117998123169, |
|
"eval_runtime": 5.8385, |
|
"eval_samples_per_second": 48.985, |
|
"eval_steps_per_second": 3.083, |
|
"step": 5044 |
|
}, |
|
{ |
|
"epoch": 208.25, |
|
"grad_norm": 1.6693130731582642, |
|
"learning_rate": 5.288888888888889e-06, |
|
"loss": 0.2152, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 208.99, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7285901308059692, |
|
"eval_runtime": 5.489, |
|
"eval_samples_per_second": 52.104, |
|
"eval_steps_per_second": 3.279, |
|
"step": 5068 |
|
}, |
|
{ |
|
"epoch": 209.98, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7332947254180908, |
|
"eval_runtime": 5.3017, |
|
"eval_samples_per_second": 53.945, |
|
"eval_steps_per_second": 3.395, |
|
"step": 5092 |
|
}, |
|
{ |
|
"epoch": 210.31, |
|
"grad_norm": 2.0511515140533447, |
|
"learning_rate": 5.011111111111112e-06, |
|
"loss": 0.2129, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 210.97, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7307863831520081, |
|
"eval_runtime": 5.2991, |
|
"eval_samples_per_second": 53.971, |
|
"eval_steps_per_second": 3.397, |
|
"step": 5116 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7176437973976135, |
|
"eval_runtime": 4.9452, |
|
"eval_samples_per_second": 57.834, |
|
"eval_steps_per_second": 3.64, |
|
"step": 5141 |
|
}, |
|
{ |
|
"epoch": 212.37, |
|
"grad_norm": 1.8491023778915405, |
|
"learning_rate": 4.7333333333333335e-06, |
|
"loss": 0.2173, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 212.99, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7334882020950317, |
|
"eval_runtime": 4.9602, |
|
"eval_samples_per_second": 57.659, |
|
"eval_steps_per_second": 3.629, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 213.98, |
|
"eval_accuracy": 0.7797202797202797, |
|
"eval_loss": 0.7268483638763428, |
|
"eval_runtime": 5.885, |
|
"eval_samples_per_second": 48.598, |
|
"eval_steps_per_second": 3.059, |
|
"step": 5189 |
|
}, |
|
{ |
|
"epoch": 214.43, |
|
"grad_norm": 1.2067769765853882, |
|
"learning_rate": 4.455555555555556e-06, |
|
"loss": 0.2042, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 214.97, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7299237847328186, |
|
"eval_runtime": 5.7645, |
|
"eval_samples_per_second": 49.614, |
|
"eval_steps_per_second": 3.123, |
|
"step": 5213 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7360625863075256, |
|
"eval_runtime": 4.7143, |
|
"eval_samples_per_second": 60.667, |
|
"eval_steps_per_second": 3.818, |
|
"step": 5238 |
|
}, |
|
{ |
|
"epoch": 216.49, |
|
"grad_norm": 1.3863427639007568, |
|
"learning_rate": 4.177777777777777e-06, |
|
"loss": 0.2112, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 216.99, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.723866879940033, |
|
"eval_runtime": 5.3445, |
|
"eval_samples_per_second": 53.513, |
|
"eval_steps_per_second": 3.368, |
|
"step": 5262 |
|
}, |
|
{ |
|
"epoch": 217.98, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.7252445220947266, |
|
"eval_runtime": 4.6314, |
|
"eval_samples_per_second": 61.753, |
|
"eval_steps_per_second": 3.887, |
|
"step": 5286 |
|
}, |
|
{ |
|
"epoch": 218.56, |
|
"grad_norm": 1.1177924871444702, |
|
"learning_rate": 3.9e-06, |
|
"loss": 0.2007, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 218.97, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.719983696937561, |
|
"eval_runtime": 4.865, |
|
"eval_samples_per_second": 58.787, |
|
"eval_steps_per_second": 3.7, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7195786237716675, |
|
"eval_runtime": 5.5422, |
|
"eval_samples_per_second": 51.604, |
|
"eval_steps_per_second": 3.248, |
|
"step": 5335 |
|
}, |
|
{ |
|
"epoch": 220.62, |
|
"grad_norm": 1.413304090499878, |
|
"learning_rate": 3.6222222222222226e-06, |
|
"loss": 0.2163, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 220.99, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.7309580445289612, |
|
"eval_runtime": 5.2512, |
|
"eval_samples_per_second": 54.463, |
|
"eval_steps_per_second": 3.428, |
|
"step": 5359 |
|
}, |
|
{ |
|
"epoch": 221.98, |
|
"eval_accuracy": 0.7867132867132867, |
|
"eval_loss": 0.7313971519470215, |
|
"eval_runtime": 5.1151, |
|
"eval_samples_per_second": 55.913, |
|
"eval_steps_per_second": 3.519, |
|
"step": 5383 |
|
}, |
|
{ |
|
"epoch": 222.68, |
|
"grad_norm": 3.0471901893615723, |
|
"learning_rate": 3.3444444444444445e-06, |
|
"loss": 0.2141, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 222.97, |
|
"eval_accuracy": 0.7832167832167832, |
|
"eval_loss": 0.727938175201416, |
|
"eval_runtime": 4.6405, |
|
"eval_samples_per_second": 61.631, |
|
"eval_steps_per_second": 3.879, |
|
"step": 5407 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_accuracy": 0.7902097902097902, |
|
"eval_loss": 0.725923478603363, |
|
"eval_runtime": 5.0906, |
|
"eval_samples_per_second": 56.182, |
|
"eval_steps_per_second": 3.536, |
|
"step": 5432 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 6000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 250, |
|
"save_steps": 500, |
|
"total_flos": 3.037085846065152e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|