|
{ |
|
"best_metric": 0.016573524102568626, |
|
"best_model_checkpoint": "/gpfs/gibbs/pi/dijk/BrainLM_runs/2023-11-17-17_37_00/checkpoint-4400", |
|
"epoch": 19.292899505226252, |
|
"global_step": 4600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0, |
|
"learning_rate": 2.100840336134454e-05, |
|
"loss": 0.0205, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_loss": 0.020380878821015358, |
|
"eval_mae": 0.10914861410856247, |
|
"eval_mse": 0.020377445966005325, |
|
"eval_r2": 0.08699230219766863, |
|
"eval_runtime": 65.4495, |
|
"eval_samples_per_second": 6.112, |
|
"eval_steps_per_second": 3.056, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"learning_rate": 4.201680672268908e-05, |
|
"loss": 0.0195, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_loss": 0.01993260346353054, |
|
"eval_mae": 0.10798273980617523, |
|
"eval_mse": 0.019940046593546867, |
|
"eval_r2": 0.10634720488402916, |
|
"eval_runtime": 82.6009, |
|
"eval_samples_per_second": 4.843, |
|
"eval_steps_per_second": 2.421, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"learning_rate": 6.302521008403361e-05, |
|
"loss": 0.0191, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_loss": 0.01966927945613861, |
|
"eval_mae": 0.10720709711313248, |
|
"eval_mse": 0.019672850146889687, |
|
"eval_r2": 0.1188939908583877, |
|
"eval_runtime": 273.6405, |
|
"eval_samples_per_second": 1.462, |
|
"eval_steps_per_second": 0.731, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"learning_rate": 8.403361344537815e-05, |
|
"loss": 0.0189, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0, |
|
"eval_loss": 0.019402366131544113, |
|
"eval_mae": 0.10650211572647095, |
|
"eval_mse": 0.01940837688744068, |
|
"eval_r2": 0.1273264644968536, |
|
"eval_runtime": 65.4954, |
|
"eval_samples_per_second": 6.107, |
|
"eval_steps_per_second": 3.054, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 9.999826244478293e-05, |
|
"loss": 0.0187, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.019103730097413063, |
|
"eval_mae": 0.10563357919454575, |
|
"eval_mse": 0.01910446584224701, |
|
"eval_r2": 0.14288129156056273, |
|
"eval_runtime": 67.6226, |
|
"eval_samples_per_second": 5.915, |
|
"eval_steps_per_second": 2.958, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 9.995362383107962e-05, |
|
"loss": 0.0183, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.0188433900475502, |
|
"eval_mae": 0.1049303263425827, |
|
"eval_mse": 0.018846124410629272, |
|
"eval_r2": 0.15557466279282905, |
|
"eval_runtime": 63.0108, |
|
"eval_samples_per_second": 6.348, |
|
"eval_steps_per_second": 3.174, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 9.98487151097676e-05, |
|
"loss": 0.0181, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.018534274771809578, |
|
"eval_mae": 0.10398274660110474, |
|
"eval_mse": 0.018525807186961174, |
|
"eval_r2": 0.16440368152546336, |
|
"eval_runtime": 63.6126, |
|
"eval_samples_per_second": 6.288, |
|
"eval_steps_per_second": 3.144, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 9.968366285544619e-05, |
|
"loss": 0.0179, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.01846296340227127, |
|
"eval_mae": 0.10383900254964828, |
|
"eval_mse": 0.018469005823135376, |
|
"eval_r2": 0.17126507792798895, |
|
"eval_runtime": 64.4338, |
|
"eval_samples_per_second": 6.208, |
|
"eval_steps_per_second": 3.104, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"learning_rate": 9.945866620716411e-05, |
|
"loss": 0.0177, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1, |
|
"eval_loss": 0.01835346780717373, |
|
"eval_mae": 0.10348159074783325, |
|
"eval_mse": 0.018356231972575188, |
|
"eval_r2": 0.1812631242910432, |
|
"eval_runtime": 63.7952, |
|
"eval_samples_per_second": 6.27, |
|
"eval_steps_per_second": 3.135, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 9.917399662815391e-05, |
|
"loss": 0.0176, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.018152762204408646, |
|
"eval_mae": 0.10292253643274307, |
|
"eval_mse": 0.018158329650759697, |
|
"eval_r2": 0.18455260254083927, |
|
"eval_runtime": 62.2123, |
|
"eval_samples_per_second": 6.43, |
|
"eval_steps_per_second": 3.215, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 9.882999757830588e-05, |
|
"loss": 0.0175, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.018096117302775383, |
|
"eval_mae": 0.10270749032497406, |
|
"eval_mse": 0.01808706857264042, |
|
"eval_r2": 0.1878013710016545, |
|
"eval_runtime": 62.45, |
|
"eval_samples_per_second": 6.405, |
|
"eval_steps_per_second": 3.203, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 9.842708409977635e-05, |
|
"loss": 0.0174, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.018007792532444, |
|
"eval_mae": 0.10250235348939896, |
|
"eval_mse": 0.01801513321697712, |
|
"eval_r2": 0.19156642781886424, |
|
"eval_runtime": 62.617, |
|
"eval_samples_per_second": 6.388, |
|
"eval_steps_per_second": 3.194, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 9.796574231623055e-05, |
|
"loss": 0.0173, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.017887134104967117, |
|
"eval_mae": 0.10214251279830933, |
|
"eval_mse": 0.017883572727441788, |
|
"eval_r2": 0.19610529281414957, |
|
"eval_runtime": 63.0647, |
|
"eval_samples_per_second": 6.343, |
|
"eval_steps_per_second": 3.171, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"learning_rate": 9.744652884632406e-05, |
|
"loss": 0.0173, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2, |
|
"eval_loss": 0.017843402922153473, |
|
"eval_mae": 0.10201350599527359, |
|
"eval_mse": 0.017849572002887726, |
|
"eval_r2": 0.20036411183239422, |
|
"eval_runtime": 63.1964, |
|
"eval_samples_per_second": 6.329, |
|
"eval_steps_per_second": 3.165, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 9.687007013213063e-05, |
|
"loss": 0.0172, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.017804041504859924, |
|
"eval_mae": 0.10189322382211685, |
|
"eval_mse": 0.01781976968050003, |
|
"eval_r2": 0.20562580407293896, |
|
"eval_runtime": 63.7696, |
|
"eval_samples_per_second": 6.273, |
|
"eval_steps_per_second": 3.136, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 9.623706168332645e-05, |
|
"loss": 0.0171, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.017708342522382736, |
|
"eval_mae": 0.10164093226194382, |
|
"eval_mse": 0.017712706699967384, |
|
"eval_r2": 0.20602592052107038, |
|
"eval_runtime": 63.456, |
|
"eval_samples_per_second": 6.304, |
|
"eval_steps_per_second": 3.152, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 9.554826723804303e-05, |
|
"loss": 0.017, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.017668385058641434, |
|
"eval_mae": 0.10145936906337738, |
|
"eval_mse": 0.017669973894953728, |
|
"eval_r2": 0.20682701060142172, |
|
"eval_runtime": 63.5861, |
|
"eval_samples_per_second": 6.291, |
|
"eval_steps_per_second": 3.145, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 9.480451784140091e-05, |
|
"loss": 0.017, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.017553946003317833, |
|
"eval_mae": 0.10114699602127075, |
|
"eval_mse": 0.01755087450146675, |
|
"eval_r2": 0.2111549450823803, |
|
"eval_runtime": 62.7965, |
|
"eval_samples_per_second": 6.37, |
|
"eval_steps_per_second": 3.185, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"learning_rate": 9.400671084283607e-05, |
|
"loss": 0.017, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3, |
|
"eval_loss": 0.017535727471113205, |
|
"eval_mae": 0.10109320282936096, |
|
"eval_mse": 0.017534563317894936, |
|
"eval_r2": 0.21152867082641214, |
|
"eval_runtime": 62.081, |
|
"eval_samples_per_second": 6.443, |
|
"eval_steps_per_second": 3.222, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"learning_rate": 9.315580881342876e-05, |
|
"loss": 0.0168, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"eval_loss": 0.01751740463078022, |
|
"eval_mae": 0.10102058947086334, |
|
"eval_mse": 0.017521008849143982, |
|
"eval_r2": 0.2173146917939317, |
|
"eval_runtime": 63.7702, |
|
"eval_samples_per_second": 6.273, |
|
"eval_steps_per_second": 3.136, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"learning_rate": 9.225283838454111e-05, |
|
"loss": 0.0169, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"eval_loss": 0.017473317682743073, |
|
"eval_mae": 0.1008896455168724, |
|
"eval_mse": 0.0174697358161211, |
|
"eval_r2": 0.21592913051237428, |
|
"eval_runtime": 63.0763, |
|
"eval_samples_per_second": 6.342, |
|
"eval_steps_per_second": 3.171, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"learning_rate": 9.129888900916456e-05, |
|
"loss": 0.0168, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"eval_loss": 0.01740197278559208, |
|
"eval_mae": 0.10066132992506027, |
|
"eval_mse": 0.017395442351698875, |
|
"eval_r2": 0.21965390849250066, |
|
"eval_runtime": 62.679, |
|
"eval_samples_per_second": 6.382, |
|
"eval_steps_per_second": 3.191, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"learning_rate": 9.029511164747175e-05, |
|
"loss": 0.0168, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4, |
|
"eval_loss": 0.01737840846180916, |
|
"eval_mae": 0.100642628967762, |
|
"eval_mse": 0.01738792657852173, |
|
"eval_r2": 0.2177673870807011, |
|
"eval_runtime": 63.0635, |
|
"eval_samples_per_second": 6.343, |
|
"eval_steps_per_second": 3.171, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 8.924271737815854e-05, |
|
"loss": 0.0167, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.017356639727950096, |
|
"eval_mae": 0.10054408758878708, |
|
"eval_mse": 0.01736092008650303, |
|
"eval_r2": 0.22268234003478382, |
|
"eval_runtime": 62.6903, |
|
"eval_samples_per_second": 6.381, |
|
"eval_steps_per_second": 3.19, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 8.814297593725199e-05, |
|
"loss": 0.0167, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.017370322719216347, |
|
"eval_mae": 0.10060014575719833, |
|
"eval_mse": 0.017372848466038704, |
|
"eval_r2": 0.22218442610232647, |
|
"eval_runtime": 62.9578, |
|
"eval_samples_per_second": 6.353, |
|
"eval_steps_per_second": 3.177, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 8.699721418614673e-05, |
|
"loss": 0.0167, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.017255190759897232, |
|
"eval_mae": 0.10025202482938766, |
|
"eval_mse": 0.0172572061419487, |
|
"eval_r2": 0.2245981157395297, |
|
"eval_runtime": 62.725, |
|
"eval_samples_per_second": 6.377, |
|
"eval_steps_per_second": 3.189, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 8.580681451071866e-05, |
|
"loss": 0.0166, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.017206793650984764, |
|
"eval_mae": 0.10011155903339386, |
|
"eval_mse": 0.017209110781550407, |
|
"eval_r2": 0.22857719354252215, |
|
"eval_runtime": 63.8964, |
|
"eval_samples_per_second": 6.26, |
|
"eval_steps_per_second": 3.13, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"learning_rate": 8.457321315344694e-05, |
|
"loss": 0.0167, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5, |
|
"eval_loss": 0.017166156321763992, |
|
"eval_mae": 0.10000505298376083, |
|
"eval_mse": 0.017173225060105324, |
|
"eval_r2": 0.22773855218146166, |
|
"eval_runtime": 63.4268, |
|
"eval_samples_per_second": 6.306, |
|
"eval_steps_per_second": 3.153, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 8.329789848055704e-05, |
|
"loss": 0.0164, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.01718437485396862, |
|
"eval_mae": 0.10002291202545166, |
|
"eval_mse": 0.01718369498848915, |
|
"eval_r2": 0.2300388059901679, |
|
"eval_runtime": 63.8338, |
|
"eval_samples_per_second": 6.266, |
|
"eval_steps_per_second": 3.133, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 8.198240918627524e-05, |
|
"loss": 0.0166, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.017157739028334618, |
|
"eval_mae": 0.09996546804904938, |
|
"eval_mse": 0.017160937190055847, |
|
"eval_r2": 0.23035895673172035, |
|
"eval_runtime": 62.0856, |
|
"eval_samples_per_second": 6.443, |
|
"eval_steps_per_second": 3.221, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 8.062833243636134e-05, |
|
"loss": 0.0165, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.01712297834455967, |
|
"eval_mae": 0.09981416165828705, |
|
"eval_mse": 0.017125777900218964, |
|
"eval_r2": 0.23338275672251196, |
|
"eval_runtime": 62.5642, |
|
"eval_samples_per_second": 6.393, |
|
"eval_steps_per_second": 3.197, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 7.923730195315962e-05, |
|
"loss": 0.0165, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.01711028628051281, |
|
"eval_mae": 0.09980826824903488, |
|
"eval_mse": 0.01711735874414444, |
|
"eval_r2": 0.23415986485617102, |
|
"eval_runtime": 63.2895, |
|
"eval_samples_per_second": 6.32, |
|
"eval_steps_per_second": 3.16, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"learning_rate": 7.781099604447794e-05, |
|
"loss": 0.0164, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 6, |
|
"eval_loss": 0.017070267349481583, |
|
"eval_mae": 0.09970243275165558, |
|
"eval_mse": 0.01707725040614605, |
|
"eval_r2": 0.23408428218933097, |
|
"eval_runtime": 62.7311, |
|
"eval_samples_per_second": 6.376, |
|
"eval_steps_per_second": 3.188, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 7.635113557867395e-05, |
|
"loss": 0.0165, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.017109189182519913, |
|
"eval_mae": 0.09978970885276794, |
|
"eval_mse": 0.017108654603362083, |
|
"eval_r2": 0.23454243286934806, |
|
"eval_runtime": 63.1248, |
|
"eval_samples_per_second": 6.337, |
|
"eval_steps_per_second": 3.168, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 7.485948190839077e-05, |
|
"loss": 0.0164, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.01705513708293438, |
|
"eval_mae": 0.09960421919822693, |
|
"eval_mse": 0.017058243975043297, |
|
"eval_r2": 0.2349071550024685, |
|
"eval_runtime": 62.8704, |
|
"eval_samples_per_second": 6.362, |
|
"eval_steps_per_second": 3.181, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 7.333783474544758e-05, |
|
"loss": 0.0165, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.01703445240855217, |
|
"eval_mae": 0.09956526756286621, |
|
"eval_mse": 0.017035936936736107, |
|
"eval_r2": 0.23700014890629983, |
|
"eval_runtime": 62.8756, |
|
"eval_samples_per_second": 6.362, |
|
"eval_steps_per_second": 3.181, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 7.178802998944933e-05, |
|
"loss": 0.0163, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.017014818266034126, |
|
"eval_mae": 0.09949322044849396, |
|
"eval_mse": 0.01701194979250431, |
|
"eval_r2": 0.23927528453479352, |
|
"eval_runtime": 62.7426, |
|
"eval_samples_per_second": 6.375, |
|
"eval_steps_per_second": 3.188, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"learning_rate": 7.021193751273462e-05, |
|
"loss": 0.0163, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 7, |
|
"eval_loss": 0.016955234110355377, |
|
"eval_mae": 0.09930498152971268, |
|
"eval_mse": 0.01695319451391697, |
|
"eval_r2": 0.2370158653005695, |
|
"eval_runtime": 63.4621, |
|
"eval_samples_per_second": 6.303, |
|
"eval_steps_per_second": 3.151, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"learning_rate": 6.86114589043352e-05, |
|
"loss": 0.0163, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"eval_loss": 0.016957027837634087, |
|
"eval_mae": 0.09929080307483673, |
|
"eval_mse": 0.016954666003584862, |
|
"eval_r2": 0.23837789423341782, |
|
"eval_runtime": 63.6096, |
|
"eval_samples_per_second": 6.288, |
|
"eval_steps_per_second": 3.144, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"learning_rate": 6.698852517566836e-05, |
|
"loss": 0.0163, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"eval_loss": 0.016924967989325523, |
|
"eval_mae": 0.09920791536569595, |
|
"eval_mse": 0.016921618953347206, |
|
"eval_r2": 0.23921405049323863, |
|
"eval_runtime": 62.3232, |
|
"eval_samples_per_second": 6.418, |
|
"eval_steps_per_second": 3.209, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"learning_rate": 6.534509443073072e-05, |
|
"loss": 0.0163, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"eval_loss": 0.016955628991127014, |
|
"eval_mae": 0.09933258593082428, |
|
"eval_mse": 0.01695682480931282, |
|
"eval_r2": 0.24050878076217264, |
|
"eval_runtime": 62.7525, |
|
"eval_samples_per_second": 6.374, |
|
"eval_steps_per_second": 3.187, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"learning_rate": 6.368314950360415e-05, |
|
"loss": 0.0163, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 8, |
|
"eval_loss": 0.016915885731577873, |
|
"eval_mae": 0.09923145920038223, |
|
"eval_mse": 0.01692046783864498, |
|
"eval_r2": 0.24116580712888347, |
|
"eval_runtime": 62.4103, |
|
"eval_samples_per_second": 6.409, |
|
"eval_steps_per_second": 3.205, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 6.200469556612435e-05, |
|
"loss": 0.0163, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.016883673146367073, |
|
"eval_mae": 0.09907432645559311, |
|
"eval_mse": 0.016882291063666344, |
|
"eval_r2": 0.23994247077801656, |
|
"eval_runtime": 63.1914, |
|
"eval_samples_per_second": 6.33, |
|
"eval_steps_per_second": 3.165, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 6.031175770859848e-05, |
|
"loss": 0.0163, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.01691693253815174, |
|
"eval_mae": 0.09921909868717194, |
|
"eval_mse": 0.01691514253616333, |
|
"eval_r2": 0.24448073571582285, |
|
"eval_runtime": 63.0869, |
|
"eval_samples_per_second": 6.34, |
|
"eval_steps_per_second": 3.17, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 5.8606378496490735e-05, |
|
"loss": 0.0163, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.01691095158457756, |
|
"eval_mae": 0.09917566925287247, |
|
"eval_mse": 0.016911856830120087, |
|
"eval_r2": 0.24367263560029062, |
|
"eval_runtime": 62.6214, |
|
"eval_samples_per_second": 6.388, |
|
"eval_steps_per_second": 3.194, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 5.6890615506023705e-05, |
|
"loss": 0.0162, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.016852255910634995, |
|
"eval_mae": 0.09895263612270355, |
|
"eval_mse": 0.016846586018800735, |
|
"eval_r2": 0.24440525606317043, |
|
"eval_runtime": 63.9638, |
|
"eval_samples_per_second": 6.254, |
|
"eval_steps_per_second": 3.127, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"learning_rate": 5.5166538841669025e-05, |
|
"loss": 0.0161, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 9, |
|
"eval_loss": 0.016831671819090843, |
|
"eval_mae": 0.09894772619009018, |
|
"eval_mse": 0.01683351770043373, |
|
"eval_r2": 0.24440384235611556, |
|
"eval_runtime": 63.0545, |
|
"eval_samples_per_second": 6.344, |
|
"eval_steps_per_second": 3.172, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 5.343622863852232e-05, |
|
"loss": 0.0163, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.016827262938022614, |
|
"eval_mae": 0.09892594069242477, |
|
"eval_mse": 0.01683083176612854, |
|
"eval_r2": 0.24909316086739675, |
|
"eval_runtime": 62.71, |
|
"eval_samples_per_second": 6.379, |
|
"eval_steps_per_second": 3.189, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 5.170177255257618e-05, |
|
"loss": 0.0162, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.016822684556245804, |
|
"eval_mae": 0.09890511631965637, |
|
"eval_mse": 0.016828058287501335, |
|
"eval_r2": 0.24537013068417224, |
|
"eval_runtime": 62.313, |
|
"eval_samples_per_second": 6.419, |
|
"eval_steps_per_second": 3.21, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 4.996526324191872e-05, |
|
"loss": 0.0161, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.016806134954094887, |
|
"eval_mae": 0.09885768592357635, |
|
"eval_mse": 0.01680225133895874, |
|
"eval_r2": 0.24621643781805225, |
|
"eval_runtime": 62.9889, |
|
"eval_samples_per_second": 6.35, |
|
"eval_steps_per_second": 3.175, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 4.822879584189731e-05, |
|
"loss": 0.0162, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.01680067367851734, |
|
"eval_mae": 0.09885375201702118, |
|
"eval_mse": 0.01680714637041092, |
|
"eval_r2": 0.2480926793721172, |
|
"eval_runtime": 62.8794, |
|
"eval_samples_per_second": 6.361, |
|
"eval_steps_per_second": 3.181, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"learning_rate": 4.6494465437293225e-05, |
|
"loss": 0.0162, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 10, |
|
"eval_loss": 0.01678573526442051, |
|
"eval_mae": 0.09876307845115662, |
|
"eval_mse": 0.01678406447172165, |
|
"eval_r2": 0.24622674800267175, |
|
"eval_runtime": 63.1145, |
|
"eval_samples_per_second": 6.338, |
|
"eval_steps_per_second": 3.169, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 4.476436453455742e-05, |
|
"loss": 0.0161, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.016767781227827072, |
|
"eval_mae": 0.09874103963375092, |
|
"eval_mse": 0.01676834560930729, |
|
"eval_r2": 0.24893675048568276, |
|
"eval_runtime": 63.6881, |
|
"eval_samples_per_second": 6.281, |
|
"eval_steps_per_second": 3.14, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 4.3040580537157024e-05, |
|
"loss": 0.0161, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.016766654327511787, |
|
"eval_mae": 0.0987214520573616, |
|
"eval_mse": 0.016764981672167778, |
|
"eval_r2": 0.24921689372938038, |
|
"eval_runtime": 62.2087, |
|
"eval_samples_per_second": 6.43, |
|
"eval_steps_per_second": 3.215, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 4.1325193227078816e-05, |
|
"loss": 0.0161, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.01673816703259945, |
|
"eval_mae": 0.09863594174385071, |
|
"eval_mse": 0.01673576422035694, |
|
"eval_r2": 0.24761288350255228, |
|
"eval_runtime": 62.9694, |
|
"eval_samples_per_second": 6.352, |
|
"eval_steps_per_second": 3.176, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 3.962027225552807e-05, |
|
"loss": 0.0162, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.016739826649427414, |
|
"eval_mae": 0.09863518178462982, |
|
"eval_mse": 0.016731785610318184, |
|
"eval_r2": 0.24922910335616988, |
|
"eval_runtime": 62.5788, |
|
"eval_samples_per_second": 6.392, |
|
"eval_steps_per_second": 3.196, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"learning_rate": 3.79278746458504e-05, |
|
"loss": 0.0161, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 11, |
|
"eval_loss": 0.016736237332224846, |
|
"eval_mae": 0.09862860292196274, |
|
"eval_mse": 0.016738129779696465, |
|
"eval_r2": 0.24936630700011952, |
|
"eval_runtime": 63.2169, |
|
"eval_samples_per_second": 6.327, |
|
"eval_steps_per_second": 3.164, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"learning_rate": 3.6250042311689505e-05, |
|
"loss": 0.0161, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"eval_loss": 0.016720617190003395, |
|
"eval_mae": 0.09856829792261124, |
|
"eval_mse": 0.016721663996577263, |
|
"eval_r2": 0.24943757367634767, |
|
"eval_runtime": 62.9559, |
|
"eval_samples_per_second": 6.354, |
|
"eval_steps_per_second": 3.177, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"learning_rate": 3.458879959337494e-05, |
|
"loss": 0.016, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"eval_loss": 0.01672213524580002, |
|
"eval_mae": 0.0985676720738411, |
|
"eval_mse": 0.016720319166779518, |
|
"eval_r2": 0.24865355220765406, |
|
"eval_runtime": 63.4117, |
|
"eval_samples_per_second": 6.308, |
|
"eval_steps_per_second": 3.154, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"learning_rate": 3.294615081551259e-05, |
|
"loss": 0.0161, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"eval_loss": 0.016707362607121468, |
|
"eval_mae": 0.09850940108299255, |
|
"eval_mse": 0.016702940687537193, |
|
"eval_r2": 0.24840998553438287, |
|
"eval_runtime": 62.8197, |
|
"eval_samples_per_second": 6.367, |
|
"eval_steps_per_second": 3.184, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"learning_rate": 3.132407786872442e-05, |
|
"loss": 0.0161, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 12, |
|
"eval_loss": 0.01671912521123886, |
|
"eval_mae": 0.09860337525606155, |
|
"eval_mse": 0.016718650236725807, |
|
"eval_r2": 0.2525614324396165, |
|
"eval_runtime": 62.3061, |
|
"eval_samples_per_second": 6.42, |
|
"eval_steps_per_second": 3.21, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"learning_rate": 2.9724537818455466e-05, |
|
"loss": 0.016, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"eval_loss": 0.016661079600453377, |
|
"eval_mae": 0.09843301773071289, |
|
"eval_mse": 0.016662681475281715, |
|
"eval_r2": 0.25208811381954643, |
|
"eval_runtime": 62.656, |
|
"eval_samples_per_second": 6.384, |
|
"eval_steps_per_second": 3.192, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"learning_rate": 2.8149460543732664e-05, |
|
"loss": 0.0161, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"eval_loss": 0.016676336526870728, |
|
"eval_mae": 0.09842842072248459, |
|
"eval_mse": 0.01667998544871807, |
|
"eval_r2": 0.25096189530992574, |
|
"eval_runtime": 61.8687, |
|
"eval_samples_per_second": 6.465, |
|
"eval_steps_per_second": 3.233, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"learning_rate": 2.6600746408725063e-05, |
|
"loss": 0.0161, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"eval_loss": 0.016732489690184593, |
|
"eval_mae": 0.09858258813619614, |
|
"eval_mse": 0.016734851524233818, |
|
"eval_r2": 0.25186837331344525, |
|
"eval_runtime": 62.9675, |
|
"eval_samples_per_second": 6.352, |
|
"eval_steps_per_second": 3.176, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"learning_rate": 2.5080263969913897e-05, |
|
"loss": 0.016, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"eval_loss": 0.016638994216918945, |
|
"eval_mae": 0.09835705161094666, |
|
"eval_mse": 0.016642747446894646, |
|
"eval_r2": 0.25120772331092656, |
|
"eval_runtime": 62.2252, |
|
"eval_samples_per_second": 6.428, |
|
"eval_steps_per_second": 3.214, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"learning_rate": 2.3589847721639617e-05, |
|
"loss": 0.0161, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 13, |
|
"eval_loss": 0.01668979413807392, |
|
"eval_mae": 0.09847620874643326, |
|
"eval_mse": 0.016691412776708603, |
|
"eval_r2": 0.2516642585572433, |
|
"eval_runtime": 63.1219, |
|
"eval_samples_per_second": 6.337, |
|
"eval_steps_per_second": 3.168, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"learning_rate": 2.2131295882745597e-05, |
|
"loss": 0.016, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"eval_loss": 0.016662921756505966, |
|
"eval_mae": 0.09842050820589066, |
|
"eval_mse": 0.016670849174261093, |
|
"eval_r2": 0.2505160489332994, |
|
"eval_runtime": 62.776, |
|
"eval_samples_per_second": 6.372, |
|
"eval_steps_per_second": 3.186, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"learning_rate": 2.070636822698877e-05, |
|
"loss": 0.0159, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"eval_loss": 0.01665448024868965, |
|
"eval_mae": 0.09836740046739578, |
|
"eval_mse": 0.016657505184412003, |
|
"eval_r2": 0.25081546773511, |
|
"eval_runtime": 62.5067, |
|
"eval_samples_per_second": 6.399, |
|
"eval_steps_per_second": 3.2, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"learning_rate": 1.9316783959835345e-05, |
|
"loss": 0.016, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"eval_loss": 0.01669839210808277, |
|
"eval_mae": 0.09849409759044647, |
|
"eval_mse": 0.01669597439467907, |
|
"eval_r2": 0.25497888003639246, |
|
"eval_runtime": 63.4933, |
|
"eval_samples_per_second": 6.3, |
|
"eval_steps_per_second": 3.15, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"learning_rate": 1.796421964420285e-05, |
|
"loss": 0.0161, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"eval_loss": 0.01662326790392399, |
|
"eval_mae": 0.0982794538140297, |
|
"eval_mse": 0.016625171527266502, |
|
"eval_r2": 0.2520529598686879, |
|
"eval_runtime": 62.7103, |
|
"eval_samples_per_second": 6.379, |
|
"eval_steps_per_second": 3.189, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"learning_rate": 1.665030717765149e-05, |
|
"loss": 0.016, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 14, |
|
"eval_loss": 0.016656357795000076, |
|
"eval_mae": 0.09835183620452881, |
|
"eval_mse": 0.016651729121804237, |
|
"eval_r2": 0.2521458867843698, |
|
"eval_runtime": 64.4578, |
|
"eval_samples_per_second": 6.206, |
|
"eval_steps_per_second": 3.103, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"learning_rate": 1.5376631823464953e-05, |
|
"loss": 0.0161, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"eval_loss": 0.01665300317108631, |
|
"eval_mae": 0.09836214780807495, |
|
"eval_mse": 0.01665370911359787, |
|
"eval_r2": 0.25267425736469795, |
|
"eval_runtime": 63.0441, |
|
"eval_samples_per_second": 6.345, |
|
"eval_steps_per_second": 3.172, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"learning_rate": 1.4144730297996666e-05, |
|
"loss": 0.016, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"eval_loss": 0.01662749983370304, |
|
"eval_mae": 0.09827445447444916, |
|
"eval_mse": 0.016627401113510132, |
|
"eval_r2": 0.2526889218862315, |
|
"eval_runtime": 62.6614, |
|
"eval_samples_per_second": 6.384, |
|
"eval_steps_per_second": 3.192, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"learning_rate": 1.295608891658896e-05, |
|
"loss": 0.016, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"eval_loss": 0.016592269763350487, |
|
"eval_mae": 0.09820396453142166, |
|
"eval_mse": 0.0165996253490448, |
|
"eval_r2": 0.25472996642841683, |
|
"eval_runtime": 63.4328, |
|
"eval_samples_per_second": 6.306, |
|
"eval_steps_per_second": 3.153, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"learning_rate": 1.1812141800301945e-05, |
|
"loss": 0.016, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"eval_loss": 0.016636423766613007, |
|
"eval_mae": 0.0983065739274025, |
|
"eval_mse": 0.016633499413728714, |
|
"eval_r2": 0.25374046203309764, |
|
"eval_runtime": 62.8839, |
|
"eval_samples_per_second": 6.361, |
|
"eval_steps_per_second": 3.18, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"learning_rate": 1.0714269145616063e-05, |
|
"loss": 0.0159, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 15, |
|
"eval_loss": 0.016631022095680237, |
|
"eval_mae": 0.09828473627567291, |
|
"eval_mse": 0.01662875898182392, |
|
"eval_r2": 0.25143078561422905, |
|
"eval_runtime": 62.4886, |
|
"eval_samples_per_second": 6.401, |
|
"eval_steps_per_second": 3.201, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"learning_rate": 9.663795559195733e-06, |
|
"loss": 0.0159, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"eval_loss": 0.016640575602650642, |
|
"eval_mae": 0.09832051396369934, |
|
"eval_mse": 0.016639817506074905, |
|
"eval_r2": 0.25420519067693936, |
|
"eval_runtime": 63.7918, |
|
"eval_samples_per_second": 6.27, |
|
"eval_steps_per_second": 3.135, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"learning_rate": 8.661988459723309e-06, |
|
"loss": 0.016, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"eval_loss": 0.016633687540888786, |
|
"eval_mae": 0.09830807894468307, |
|
"eval_mse": 0.0166340135037899, |
|
"eval_r2": 0.2559419583101087, |
|
"eval_runtime": 64.9995, |
|
"eval_samples_per_second": 6.154, |
|
"eval_steps_per_second": 3.077, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"learning_rate": 7.710056548731447e-06, |
|
"loss": 0.016, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"eval_loss": 0.01665404625236988, |
|
"eval_mae": 0.09835316985845566, |
|
"eval_mse": 0.01665414310991764, |
|
"eval_r2": 0.25389154611800946, |
|
"eval_runtime": 63.3284, |
|
"eval_samples_per_second": 6.316, |
|
"eval_steps_per_second": 3.158, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"learning_rate": 6.809148352279182e-06, |
|
"loss": 0.016, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"eval_loss": 0.016643621027469635, |
|
"eval_mae": 0.09831266850233078, |
|
"eval_mse": 0.0166462492197752, |
|
"eval_r2": 0.253610910132235, |
|
"eval_runtime": 62.8474, |
|
"eval_samples_per_second": 6.365, |
|
"eval_steps_per_second": 3.182, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"learning_rate": 5.960350835230766e-06, |
|
"loss": 0.0159, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 16, |
|
"eval_loss": 0.016641885042190552, |
|
"eval_mae": 0.09831728786230087, |
|
"eval_mse": 0.01664014533162117, |
|
"eval_r2": 0.2530172455633002, |
|
"eval_runtime": 62.9766, |
|
"eval_samples_per_second": 6.352, |
|
"eval_steps_per_second": 3.176, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 17, |
|
"learning_rate": 5.164688089809444e-06, |
|
"loss": 0.0159, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 17, |
|
"eval_loss": 0.016620321199297905, |
|
"eval_mae": 0.09825479984283447, |
|
"eval_mse": 0.016620123758912086, |
|
"eval_r2": 0.2543216647578763, |
|
"eval_runtime": 63.7821, |
|
"eval_samples_per_second": 6.271, |
|
"eval_steps_per_second": 3.136, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 17, |
|
"learning_rate": 4.423120100008582e-06, |
|
"loss": 0.016, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 17, |
|
"eval_loss": 0.016626114025712013, |
|
"eval_mae": 0.0982520654797554, |
|
"eval_mse": 0.01661662384867668, |
|
"eval_r2": 0.25688829356860643, |
|
"eval_runtime": 63.3768, |
|
"eval_samples_per_second": 6.311, |
|
"eval_steps_per_second": 3.156, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 17, |
|
"learning_rate": 3.7365415833504725e-06, |
|
"loss": 0.0159, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17, |
|
"eval_loss": 0.016621023416519165, |
|
"eval_mae": 0.0982469841837883, |
|
"eval_mse": 0.01662403903901577, |
|
"eval_r2": 0.2562592876194061, |
|
"eval_runtime": 63.3616, |
|
"eval_samples_per_second": 6.313, |
|
"eval_steps_per_second": 3.156, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 17, |
|
"learning_rate": 3.105780911390738e-06, |
|
"loss": 0.016, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 17, |
|
"eval_loss": 0.01661110669374466, |
|
"eval_mae": 0.09821216762065887, |
|
"eval_mse": 0.016603710129857063, |
|
"eval_r2": 0.2534045631002596, |
|
"eval_runtime": 63.6674, |
|
"eval_samples_per_second": 6.283, |
|
"eval_steps_per_second": 3.141, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"learning_rate": 2.5315991102703716e-06, |
|
"loss": 0.016, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"eval_loss": 0.016594773158431053, |
|
"eval_mae": 0.09816820174455643, |
|
"eval_mse": 0.016600053757429123, |
|
"eval_r2": 0.25425809369382535, |
|
"eval_runtime": 63.3127, |
|
"eval_samples_per_second": 6.318, |
|
"eval_steps_per_second": 3.159, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"learning_rate": 2.0146889425216476e-06, |
|
"loss": 0.016, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"eval_loss": 0.016598813235759735, |
|
"eval_mae": 0.09816797077655792, |
|
"eval_mse": 0.016591567546129227, |
|
"eval_r2": 0.2551125072294508, |
|
"eval_runtime": 61.9837, |
|
"eval_samples_per_second": 6.453, |
|
"eval_steps_per_second": 3.227, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"learning_rate": 1.555674071235358e-06, |
|
"loss": 0.0159, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"eval_loss": 0.016573524102568626, |
|
"eval_mae": 0.09814300388097763, |
|
"eval_mse": 0.016571756452322006, |
|
"eval_r2": 0.25463921169715287, |
|
"eval_runtime": 63.1303, |
|
"eval_samples_per_second": 6.336, |
|
"eval_steps_per_second": 3.168, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"learning_rate": 1.155108307598024e-06, |
|
"loss": 0.016, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"eval_loss": 0.016580162569880486, |
|
"eval_mae": 0.09815159440040588, |
|
"eval_mse": 0.01658167876303196, |
|
"eval_r2": 0.25566179703610914, |
|
"eval_runtime": 63.3862, |
|
"eval_samples_per_second": 6.311, |
|
"eval_steps_per_second": 3.155, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"learning_rate": 8.134749427070376e-07, |
|
"loss": 0.016, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18, |
|
"eval_loss": 0.016592783853411674, |
|
"eval_mae": 0.0981706902384758, |
|
"eval_mse": 0.016597216948866844, |
|
"eval_r2": 0.25482598937850753, |
|
"eval_runtime": 63.3827, |
|
"eval_samples_per_second": 6.311, |
|
"eval_steps_per_second": 3.155, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 19, |
|
"learning_rate": 5.311861644696048e-07, |
|
"loss": 0.0159, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 19, |
|
"eval_loss": 0.01663564145565033, |
|
"eval_mae": 0.09826894104480743, |
|
"eval_mse": 0.0166276004165411, |
|
"eval_r2": 0.2547617221900892, |
|
"eval_runtime": 62.9726, |
|
"eval_samples_per_second": 6.352, |
|
"eval_steps_per_second": 3.176, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 19, |
|
"learning_rate": 3.0858256028932776e-07, |
|
"loss": 0.016, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 19, |
|
"eval_loss": 0.016616160050034523, |
|
"eval_mae": 0.09824874997138977, |
|
"eval_mse": 0.016616739332675934, |
|
"eval_r2": 0.2550670819722727, |
|
"eval_runtime": 62.541, |
|
"eval_samples_per_second": 6.396, |
|
"eval_steps_per_second": 3.198, |
|
"step": 4600 |
|
} |
|
], |
|
"max_steps": 4760, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.1833548390119886e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|