|
{ |
|
"best_metric": 0.3305796980857849, |
|
"best_model_checkpoint": "autotrain-m1b56-8ger6/checkpoint-3462", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 3462, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014442518775274409, |
|
"grad_norm": 52.66768264770508, |
|
"learning_rate": 2.0192307692307692e-06, |
|
"loss": 8.6614, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.028885037550548817, |
|
"grad_norm": 70.3343276977539, |
|
"learning_rate": 4.423076923076924e-06, |
|
"loss": 7.3266, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.043327556325823226, |
|
"grad_norm": 64.09215545654297, |
|
"learning_rate": 6.730769230769231e-06, |
|
"loss": 1.6514, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.057770075101097634, |
|
"grad_norm": 29.524045944213867, |
|
"learning_rate": 9.134615384615384e-06, |
|
"loss": 1.0548, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07221259387637204, |
|
"grad_norm": 14.635738372802734, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 0.9082, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08665511265164645, |
|
"grad_norm": 19.21646499633789, |
|
"learning_rate": 1.3942307692307693e-05, |
|
"loss": 0.5165, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.10109763142692085, |
|
"grad_norm": 34.39830017089844, |
|
"learning_rate": 1.6346153846153847e-05, |
|
"loss": 0.5166, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.11554015020219527, |
|
"grad_norm": 12.46200180053711, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.6898, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12998266897746968, |
|
"grad_norm": 44.481101989746094, |
|
"learning_rate": 2.1153846153846154e-05, |
|
"loss": 0.5929, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.14442518775274407, |
|
"grad_norm": 14.731172561645508, |
|
"learning_rate": 2.355769230769231e-05, |
|
"loss": 0.5474, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1588677065280185, |
|
"grad_norm": 27.139829635620117, |
|
"learning_rate": 2.586538461538462e-05, |
|
"loss": 0.7523, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.1733102253032929, |
|
"grad_norm": 13.601739883422852, |
|
"learning_rate": 2.826923076923077e-05, |
|
"loss": 0.6244, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1877527440785673, |
|
"grad_norm": 18.18332862854004, |
|
"learning_rate": 3.0673076923076926e-05, |
|
"loss": 0.618, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2021952628538417, |
|
"grad_norm": 6.123499870300293, |
|
"learning_rate": 3.307692307692308e-05, |
|
"loss": 0.7482, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21663778162911612, |
|
"grad_norm": 10.999884605407715, |
|
"learning_rate": 3.548076923076924e-05, |
|
"loss": 0.5383, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.23108030040439054, |
|
"grad_norm": 22.577754974365234, |
|
"learning_rate": 3.788461538461538e-05, |
|
"loss": 0.5854, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24552281917966493, |
|
"grad_norm": 31.469331741333008, |
|
"learning_rate": 4.028846153846154e-05, |
|
"loss": 0.5403, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.25996533795493937, |
|
"grad_norm": 8.64493465423584, |
|
"learning_rate": 4.269230769230769e-05, |
|
"loss": 0.6795, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.27440785673021373, |
|
"grad_norm": 7.3773393630981445, |
|
"learning_rate": 4.509615384615385e-05, |
|
"loss": 0.5801, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.28885037550548814, |
|
"grad_norm": 11.987143516540527, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.508, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30329289428076256, |
|
"grad_norm": 7.039488792419434, |
|
"learning_rate": 4.9903846153846154e-05, |
|
"loss": 0.5415, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.317735413056037, |
|
"grad_norm": 13.371904373168945, |
|
"learning_rate": 4.9743205649475715e-05, |
|
"loss": 0.5646, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3321779318313114, |
|
"grad_norm": 32.50100326538086, |
|
"learning_rate": 4.947571153434625e-05, |
|
"loss": 0.7043, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3466204506065858, |
|
"grad_norm": 25.765533447265625, |
|
"learning_rate": 4.920821741921678e-05, |
|
"loss": 0.5854, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3610629693818602, |
|
"grad_norm": 33.29277420043945, |
|
"learning_rate": 4.894072330408731e-05, |
|
"loss": 0.7418, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3755054881571346, |
|
"grad_norm": 23.995582580566406, |
|
"learning_rate": 4.8673229188957844e-05, |
|
"loss": 0.7084, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.389948006932409, |
|
"grad_norm": 24.641578674316406, |
|
"learning_rate": 4.840573507382838e-05, |
|
"loss": 0.6657, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.4043905257076834, |
|
"grad_norm": 16.46844482421875, |
|
"learning_rate": 4.813824095869891e-05, |
|
"loss": 0.4334, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41883304448295783, |
|
"grad_norm": 15.955053329467773, |
|
"learning_rate": 4.7870746843569445e-05, |
|
"loss": 0.4989, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.43327556325823224, |
|
"grad_norm": 20.922630310058594, |
|
"learning_rate": 4.760325272843998e-05, |
|
"loss": 0.5395, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.44771808203350666, |
|
"grad_norm": 33.461490631103516, |
|
"learning_rate": 4.733575861331051e-05, |
|
"loss": 0.7912, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.4621606008087811, |
|
"grad_norm": 28.123592376708984, |
|
"learning_rate": 4.7068264498181045e-05, |
|
"loss": 0.7634, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.47660311958405543, |
|
"grad_norm": 6.648211479187012, |
|
"learning_rate": 4.6800770383051574e-05, |
|
"loss": 0.5912, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.49104563835932985, |
|
"grad_norm": 6.500886917114258, |
|
"learning_rate": 4.653327626792211e-05, |
|
"loss": 0.595, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5054881571346043, |
|
"grad_norm": 17.182079315185547, |
|
"learning_rate": 4.626578215279264e-05, |
|
"loss": 0.6416, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5199306759098787, |
|
"grad_norm": 20.7849063873291, |
|
"learning_rate": 4.5998288037663175e-05, |
|
"loss": 0.4613, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5343731946851531, |
|
"grad_norm": 6.231338977813721, |
|
"learning_rate": 4.5730793922533704e-05, |
|
"loss": 0.4601, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5488157134604275, |
|
"grad_norm": 37.750099182128906, |
|
"learning_rate": 4.546329980740424e-05, |
|
"loss": 0.6588, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5632582322357019, |
|
"grad_norm": 13.106170654296875, |
|
"learning_rate": 4.5195805692274775e-05, |
|
"loss": 0.6352, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5777007510109763, |
|
"grad_norm": 12.079797744750977, |
|
"learning_rate": 4.4928311577145304e-05, |
|
"loss": 0.6447, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5921432697862508, |
|
"grad_norm": 14.380491256713867, |
|
"learning_rate": 4.466081746201583e-05, |
|
"loss": 0.4655, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.6065857885615251, |
|
"grad_norm": 18.562135696411133, |
|
"learning_rate": 4.4393323346886376e-05, |
|
"loss": 0.4547, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.6210283073367996, |
|
"grad_norm": 22.869455337524414, |
|
"learning_rate": 4.4125829231756905e-05, |
|
"loss": 0.5574, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.635470826112074, |
|
"grad_norm": 15.650333404541016, |
|
"learning_rate": 4.3858335116627433e-05, |
|
"loss": 0.5149, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6499133448873483, |
|
"grad_norm": 19.41951560974121, |
|
"learning_rate": 4.359084100149797e-05, |
|
"loss": 0.5395, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6643558636626228, |
|
"grad_norm": 14.825141906738281, |
|
"learning_rate": 4.3323346886368505e-05, |
|
"loss": 0.3763, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6787983824378971, |
|
"grad_norm": 10.986475944519043, |
|
"learning_rate": 4.3055852771239034e-05, |
|
"loss": 0.383, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6932409012131716, |
|
"grad_norm": 6.805363655090332, |
|
"learning_rate": 4.278835865610957e-05, |
|
"loss": 0.4967, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.707683419988446, |
|
"grad_norm": 9.07087230682373, |
|
"learning_rate": 4.25208645409801e-05, |
|
"loss": 0.4451, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.7221259387637204, |
|
"grad_norm": 15.876053810119629, |
|
"learning_rate": 4.2253370425850634e-05, |
|
"loss": 0.4178, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.7365684575389948, |
|
"grad_norm": 10.015765190124512, |
|
"learning_rate": 4.198587631072117e-05, |
|
"loss": 0.5196, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.7510109763142692, |
|
"grad_norm": 14.343164443969727, |
|
"learning_rate": 4.17183821955917e-05, |
|
"loss": 0.3987, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7654534950895436, |
|
"grad_norm": 7.555254936218262, |
|
"learning_rate": 4.145088808046223e-05, |
|
"loss": 0.4268, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.779896013864818, |
|
"grad_norm": 19.02402114868164, |
|
"learning_rate": 4.1183393965332764e-05, |
|
"loss": 0.5197, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7943385326400925, |
|
"grad_norm": 17.702478408813477, |
|
"learning_rate": 4.09158998502033e-05, |
|
"loss": 0.3667, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.8087810514153668, |
|
"grad_norm": 14.175180435180664, |
|
"learning_rate": 4.064840573507383e-05, |
|
"loss": 0.4353, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8232235701906413, |
|
"grad_norm": 11.903855323791504, |
|
"learning_rate": 4.0380911619944364e-05, |
|
"loss": 0.5338, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.8376660889659157, |
|
"grad_norm": 13.651313781738281, |
|
"learning_rate": 4.01134175048149e-05, |
|
"loss": 0.4009, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.85210860774119, |
|
"grad_norm": 5.367980480194092, |
|
"learning_rate": 3.984592338968543e-05, |
|
"loss": 0.5129, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.8665511265164645, |
|
"grad_norm": 18.119848251342773, |
|
"learning_rate": 3.957842927455596e-05, |
|
"loss": 0.4251, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8809936452917388, |
|
"grad_norm": 8.821969032287598, |
|
"learning_rate": 3.9310935159426494e-05, |
|
"loss": 0.3965, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.8954361640670133, |
|
"grad_norm": 5.4203782081604, |
|
"learning_rate": 3.904344104429703e-05, |
|
"loss": 0.427, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9098786828422877, |
|
"grad_norm": 10.101963996887207, |
|
"learning_rate": 3.877594692916756e-05, |
|
"loss": 0.3891, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.9243212016175621, |
|
"grad_norm": 5.893563747406006, |
|
"learning_rate": 3.8508452814038094e-05, |
|
"loss": 0.4165, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9387637203928365, |
|
"grad_norm": 6.127294540405273, |
|
"learning_rate": 3.8240958698908623e-05, |
|
"loss": 0.5194, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.9532062391681109, |
|
"grad_norm": 13.313063621520996, |
|
"learning_rate": 3.797346458377916e-05, |
|
"loss": 0.4108, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9676487579433853, |
|
"grad_norm": 14.13697624206543, |
|
"learning_rate": 3.7705970468649695e-05, |
|
"loss": 0.4612, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.9820912767186597, |
|
"grad_norm": 8.540029525756836, |
|
"learning_rate": 3.7438476353520224e-05, |
|
"loss": 0.4081, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9965337954939342, |
|
"grad_norm": 15.01349925994873, |
|
"learning_rate": 3.717098223839075e-05, |
|
"loss": 0.4559, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_explained_variance": 0.6565504670143127, |
|
"eval_loss": 0.5362390279769897, |
|
"eval_mae": 0.5734534859657288, |
|
"eval_mse": 0.5362390279769897, |
|
"eval_r2": 0.5139721769346014, |
|
"eval_rmse": 0.7322834134101868, |
|
"eval_runtime": 21.5346, |
|
"eval_samples_per_second": 160.765, |
|
"eval_steps_per_second": 10.077, |
|
"step": 1731 |
|
}, |
|
{ |
|
"epoch": 1.0109763142692085, |
|
"grad_norm": 21.412073135375977, |
|
"learning_rate": 3.6903488123261295e-05, |
|
"loss": 0.4508, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.025418833044483, |
|
"grad_norm": 8.56926155090332, |
|
"learning_rate": 3.6635994008131824e-05, |
|
"loss": 0.4829, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.0398613518197575, |
|
"grad_norm": 9.545137405395508, |
|
"learning_rate": 3.6368499893002353e-05, |
|
"loss": 0.4499, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0543038705950318, |
|
"grad_norm": 27.51245880126953, |
|
"learning_rate": 3.610100577787289e-05, |
|
"loss": 0.4109, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.0687463893703062, |
|
"grad_norm": 12.80823802947998, |
|
"learning_rate": 3.583351166274342e-05, |
|
"loss": 0.3662, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.0831889081455806, |
|
"grad_norm": 7.883637428283691, |
|
"learning_rate": 3.5566017547613954e-05, |
|
"loss": 0.4184, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.097631426920855, |
|
"grad_norm": 7.406681537628174, |
|
"learning_rate": 3.529852343248449e-05, |
|
"loss": 0.3439, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1120739456961295, |
|
"grad_norm": 19.036779403686523, |
|
"learning_rate": 3.503102931735502e-05, |
|
"loss": 0.4009, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.1265164644714039, |
|
"grad_norm": 8.52979850769043, |
|
"learning_rate": 3.476353520222555e-05, |
|
"loss": 0.4397, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.1409589832466782, |
|
"grad_norm": 16.250051498413086, |
|
"learning_rate": 3.449604108709609e-05, |
|
"loss": 0.4651, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.1554015020219526, |
|
"grad_norm": 5.9189300537109375, |
|
"learning_rate": 3.422854697196662e-05, |
|
"loss": 0.4419, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.169844020797227, |
|
"grad_norm": 11.332290649414062, |
|
"learning_rate": 3.396105285683715e-05, |
|
"loss": 0.3755, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.1842865395725015, |
|
"grad_norm": 9.792673110961914, |
|
"learning_rate": 3.3693558741707684e-05, |
|
"loss": 0.358, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.1987290583477759, |
|
"grad_norm": 14.335423469543457, |
|
"learning_rate": 3.342606462657822e-05, |
|
"loss": 0.3512, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.2131715771230502, |
|
"grad_norm": 9.749696731567383, |
|
"learning_rate": 3.315857051144875e-05, |
|
"loss": 0.4072, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.2276140958983246, |
|
"grad_norm": 9.317971229553223, |
|
"learning_rate": 3.2891076396319284e-05, |
|
"loss": 0.4359, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.242056614673599, |
|
"grad_norm": 14.866842269897461, |
|
"learning_rate": 3.262358228118981e-05, |
|
"loss": 0.4314, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.2564991334488735, |
|
"grad_norm": 6.312429428100586, |
|
"learning_rate": 3.235608816606035e-05, |
|
"loss": 0.3819, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.270941652224148, |
|
"grad_norm": 5.175512313842773, |
|
"learning_rate": 3.208859405093088e-05, |
|
"loss": 0.4469, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.2853841709994223, |
|
"grad_norm": 16.6768856048584, |
|
"learning_rate": 3.1821099935801414e-05, |
|
"loss": 0.4156, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.2998266897746968, |
|
"grad_norm": 5.419372081756592, |
|
"learning_rate": 3.155360582067194e-05, |
|
"loss": 0.3966, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.314269208549971, |
|
"grad_norm": 9.6641263961792, |
|
"learning_rate": 3.128611170554248e-05, |
|
"loss": 0.3622, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.3287117273252456, |
|
"grad_norm": 11.433446884155273, |
|
"learning_rate": 3.1018617590413014e-05, |
|
"loss": 0.5056, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.34315424610052, |
|
"grad_norm": 8.54787540435791, |
|
"learning_rate": 3.075112347528354e-05, |
|
"loss": 0.3228, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.3575967648757943, |
|
"grad_norm": 24.707653045654297, |
|
"learning_rate": 3.0483629360154076e-05, |
|
"loss": 0.3755, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.3720392836510689, |
|
"grad_norm": 13.02287483215332, |
|
"learning_rate": 3.021613524502461e-05, |
|
"loss": 0.313, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.3864818024263432, |
|
"grad_norm": 7.0347771644592285, |
|
"learning_rate": 2.9948641129895144e-05, |
|
"loss": 0.3618, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4009243212016176, |
|
"grad_norm": 3.799116611480713, |
|
"learning_rate": 2.9681147014765676e-05, |
|
"loss": 0.3338, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.415366839976892, |
|
"grad_norm": 10.154156684875488, |
|
"learning_rate": 2.941365289963621e-05, |
|
"loss": 0.3184, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.4298093587521663, |
|
"grad_norm": 22.4088191986084, |
|
"learning_rate": 2.9146158784506744e-05, |
|
"loss": 0.3218, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.4442518775274409, |
|
"grad_norm": 5.571261405944824, |
|
"learning_rate": 2.8878664669377277e-05, |
|
"loss": 0.4167, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4586943963027152, |
|
"grad_norm": 10.851147651672363, |
|
"learning_rate": 2.8611170554247806e-05, |
|
"loss": 0.3881, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.4731369150779896, |
|
"grad_norm": 30.00836181640625, |
|
"learning_rate": 2.8343676439118338e-05, |
|
"loss": 0.3706, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.487579433853264, |
|
"grad_norm": 12.909472465515137, |
|
"learning_rate": 2.8076182323988874e-05, |
|
"loss": 0.4325, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.5020219526285383, |
|
"grad_norm": 10.231127738952637, |
|
"learning_rate": 2.7808688208859406e-05, |
|
"loss": 0.3357, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.516464471403813, |
|
"grad_norm": 7.157652378082275, |
|
"learning_rate": 2.754119409372994e-05, |
|
"loss": 0.3927, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.5309069901790873, |
|
"grad_norm": 13.10181999206543, |
|
"learning_rate": 2.727369997860047e-05, |
|
"loss": 0.3189, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.5453495089543616, |
|
"grad_norm": 12.194095611572266, |
|
"learning_rate": 2.7006205863471007e-05, |
|
"loss": 0.4226, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.5597920277296362, |
|
"grad_norm": 18.289899826049805, |
|
"learning_rate": 2.673871174834154e-05, |
|
"loss": 0.2865, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.5742345465049103, |
|
"grad_norm": 4.3070068359375, |
|
"learning_rate": 2.647121763321207e-05, |
|
"loss": 0.3107, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.588677065280185, |
|
"grad_norm": 26.21879768371582, |
|
"learning_rate": 2.62037235180826e-05, |
|
"loss": 0.3243, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.6031195840554593, |
|
"grad_norm": 8.495038986206055, |
|
"learning_rate": 2.593622940295314e-05, |
|
"loss": 0.3285, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.6175621028307337, |
|
"grad_norm": 17.74176788330078, |
|
"learning_rate": 2.566873528782367e-05, |
|
"loss": 0.3819, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.6320046216060082, |
|
"grad_norm": 8.67226505279541, |
|
"learning_rate": 2.54012411726942e-05, |
|
"loss": 0.4388, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.6464471403812824, |
|
"grad_norm": 9.305310249328613, |
|
"learning_rate": 2.5133747057564733e-05, |
|
"loss": 0.3801, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.660889659156557, |
|
"grad_norm": 16.156944274902344, |
|
"learning_rate": 2.4866252942435266e-05, |
|
"loss": 0.337, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.6753321779318313, |
|
"grad_norm": 18.950183868408203, |
|
"learning_rate": 2.45987588273058e-05, |
|
"loss": 0.3913, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.6897746967071057, |
|
"grad_norm": 4.8534321784973145, |
|
"learning_rate": 2.4331264712176334e-05, |
|
"loss": 0.3677, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.7042172154823803, |
|
"grad_norm": 7.860241413116455, |
|
"learning_rate": 2.4063770597046866e-05, |
|
"loss": 0.3495, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.7186597342576544, |
|
"grad_norm": 10.027009010314941, |
|
"learning_rate": 2.37962764819174e-05, |
|
"loss": 0.3787, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.733102253032929, |
|
"grad_norm": 13.39957046508789, |
|
"learning_rate": 2.352878236678793e-05, |
|
"loss": 0.3064, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7475447718082033, |
|
"grad_norm": 8.104743957519531, |
|
"learning_rate": 2.3261288251658463e-05, |
|
"loss": 0.3703, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.7619872905834777, |
|
"grad_norm": 7.085102558135986, |
|
"learning_rate": 2.2993794136529e-05, |
|
"loss": 0.3488, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.7764298093587523, |
|
"grad_norm": 8.273953437805176, |
|
"learning_rate": 2.272630002139953e-05, |
|
"loss": 0.3574, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.7908723281340264, |
|
"grad_norm": 5.399058818817139, |
|
"learning_rate": 2.2458805906270064e-05, |
|
"loss": 0.3699, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.805314846909301, |
|
"grad_norm": 25.818262100219727, |
|
"learning_rate": 2.2191311791140596e-05, |
|
"loss": 0.2714, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.8197573656845754, |
|
"grad_norm": 8.441669464111328, |
|
"learning_rate": 2.192381767601113e-05, |
|
"loss": 0.3225, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.8341998844598497, |
|
"grad_norm": 3.318145751953125, |
|
"learning_rate": 2.165632356088166e-05, |
|
"loss": 0.3359, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.8486424032351243, |
|
"grad_norm": 3.700218439102173, |
|
"learning_rate": 2.1388829445752197e-05, |
|
"loss": 0.3138, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.8630849220103987, |
|
"grad_norm": 4.9609246253967285, |
|
"learning_rate": 2.1121335330622726e-05, |
|
"loss": 0.3429, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.877527440785673, |
|
"grad_norm": 11.287262916564941, |
|
"learning_rate": 2.085384121549326e-05, |
|
"loss": 0.3926, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.8919699595609474, |
|
"grad_norm": 13.642833709716797, |
|
"learning_rate": 2.0586347100363794e-05, |
|
"loss": 0.3484, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.9064124783362217, |
|
"grad_norm": 5.669510364532471, |
|
"learning_rate": 2.0318852985234326e-05, |
|
"loss": 0.3307, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.9208549971114963, |
|
"grad_norm": 11.987211227416992, |
|
"learning_rate": 2.005135887010486e-05, |
|
"loss": 0.2979, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.9352975158867707, |
|
"grad_norm": 8.090258598327637, |
|
"learning_rate": 1.978386475497539e-05, |
|
"loss": 0.2786, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.949740034662045, |
|
"grad_norm": 5.356060028076172, |
|
"learning_rate": 1.9516370639845923e-05, |
|
"loss": 0.281, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.9641825534373196, |
|
"grad_norm": 9.354238510131836, |
|
"learning_rate": 1.924887652471646e-05, |
|
"loss": 0.2761, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.9786250722125938, |
|
"grad_norm": 12.076613426208496, |
|
"learning_rate": 1.8981382409586988e-05, |
|
"loss": 0.3394, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.9930675909878683, |
|
"grad_norm": 6.9947428703308105, |
|
"learning_rate": 1.8713888294457524e-05, |
|
"loss": 0.3652, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_explained_variance": 0.700911283493042, |
|
"eval_loss": 0.3305796980857849, |
|
"eval_mae": 0.44259902834892273, |
|
"eval_mse": 0.3305796980857849, |
|
"eval_r2": 0.7003744220771353, |
|
"eval_rmse": 0.5749605894088745, |
|
"eval_runtime": 21.4982, |
|
"eval_samples_per_second": 161.037, |
|
"eval_steps_per_second": 10.094, |
|
"step": 3462 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5193, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 7285479708948480.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|