|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9995138551288284, |
|
"eval_steps": 500, |
|
"global_step": 1028, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009722897423432182, |
|
"grad_norm": 22.865641182810197, |
|
"learning_rate": 9.70873786407767e-08, |
|
"loss": 1.3413, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004861448711716091, |
|
"grad_norm": 21.908798159393672, |
|
"learning_rate": 4.854368932038835e-07, |
|
"loss": 1.3405, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009722897423432183, |
|
"grad_norm": 8.67999928561487, |
|
"learning_rate": 9.70873786407767e-07, |
|
"loss": 1.227, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014584346135148274, |
|
"grad_norm": 9.222051962167232, |
|
"learning_rate": 1.4563106796116506e-06, |
|
"loss": 1.0774, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.019445794846864366, |
|
"grad_norm": 3.108065986341407, |
|
"learning_rate": 1.941747572815534e-06, |
|
"loss": 0.9546, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.024307243558580455, |
|
"grad_norm": 2.3305737963317545, |
|
"learning_rate": 2.427184466019418e-06, |
|
"loss": 0.8883, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02916869227029655, |
|
"grad_norm": 2.166786564817553, |
|
"learning_rate": 2.912621359223301e-06, |
|
"loss": 0.8596, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03403014098201264, |
|
"grad_norm": 2.2200337024713366, |
|
"learning_rate": 3.398058252427185e-06, |
|
"loss": 0.8302, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03889158969372873, |
|
"grad_norm": 2.266283635100358, |
|
"learning_rate": 3.883495145631068e-06, |
|
"loss": 0.82, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.043753038405444825, |
|
"grad_norm": 2.4391536905162696, |
|
"learning_rate": 4.368932038834952e-06, |
|
"loss": 0.8, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04861448711716091, |
|
"grad_norm": 2.3258135076743423, |
|
"learning_rate": 4.854368932038836e-06, |
|
"loss": 0.792, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.053475935828877004, |
|
"grad_norm": 2.4765334900301914, |
|
"learning_rate": 5.3398058252427185e-06, |
|
"loss": 0.7804, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0583373845405931, |
|
"grad_norm": 2.7215966994164704, |
|
"learning_rate": 5.825242718446602e-06, |
|
"loss": 0.764, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06319883325230918, |
|
"grad_norm": 2.4553563315970854, |
|
"learning_rate": 6.310679611650487e-06, |
|
"loss": 0.7521, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06806028196402528, |
|
"grad_norm": 2.4367790074569675, |
|
"learning_rate": 6.79611650485437e-06, |
|
"loss": 0.7394, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07292173067574137, |
|
"grad_norm": 2.335256384006913, |
|
"learning_rate": 7.2815533980582534e-06, |
|
"loss": 0.7134, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07778317938745746, |
|
"grad_norm": 2.1323333889527953, |
|
"learning_rate": 7.766990291262136e-06, |
|
"loss": 0.7293, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08264462809917356, |
|
"grad_norm": 2.1774984480511095, |
|
"learning_rate": 8.25242718446602e-06, |
|
"loss": 0.7164, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08750607681088965, |
|
"grad_norm": 2.2022981209541608, |
|
"learning_rate": 8.737864077669904e-06, |
|
"loss": 0.7011, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09236752552260574, |
|
"grad_norm": 2.312357582283152, |
|
"learning_rate": 9.223300970873788e-06, |
|
"loss": 0.7079, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09722897423432182, |
|
"grad_norm": 2.1629606540297406, |
|
"learning_rate": 9.708737864077671e-06, |
|
"loss": 0.7021, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10209042294603791, |
|
"grad_norm": 2.5455373567567054, |
|
"learning_rate": 9.999884650793835e-06, |
|
"loss": 0.6992, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10695187165775401, |
|
"grad_norm": 2.3692741540462237, |
|
"learning_rate": 9.998587033345546e-06, |
|
"loss": 0.694, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1118133203694701, |
|
"grad_norm": 2.1411842110512964, |
|
"learning_rate": 9.995847987378953e-06, |
|
"loss": 0.6975, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1166747690811862, |
|
"grad_norm": 2.089784612791783, |
|
"learning_rate": 9.991668302745053e-06, |
|
"loss": 0.6838, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12153621779290229, |
|
"grad_norm": 2.323386252470974, |
|
"learning_rate": 9.98604918472778e-06, |
|
"loss": 0.6777, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12639766650461837, |
|
"grad_norm": 2.109074650826608, |
|
"learning_rate": 9.97899225369643e-06, |
|
"loss": 0.6804, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13125911521633446, |
|
"grad_norm": 1.977824454327765, |
|
"learning_rate": 9.970499544638405e-06, |
|
"loss": 0.6828, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.13612056392805055, |
|
"grad_norm": 2.3675044930177314, |
|
"learning_rate": 9.960573506572391e-06, |
|
"loss": 0.6685, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14098201263976665, |
|
"grad_norm": 2.135635558376172, |
|
"learning_rate": 9.949217001842128e-06, |
|
"loss": 0.6799, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.14584346135148274, |
|
"grad_norm": 2.1238487943590982, |
|
"learning_rate": 9.93643330529103e-06, |
|
"loss": 0.662, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15070491006319883, |
|
"grad_norm": 2.0658607430202953, |
|
"learning_rate": 9.922226103317802e-06, |
|
"loss": 0.6682, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.15556635877491493, |
|
"grad_norm": 2.3142186772351936, |
|
"learning_rate": 9.906599492813413e-06, |
|
"loss": 0.6465, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.16042780748663102, |
|
"grad_norm": 2.078145593908007, |
|
"learning_rate": 9.889557979979695e-06, |
|
"loss": 0.6493, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1652892561983471, |
|
"grad_norm": 2.174446805793003, |
|
"learning_rate": 9.871106479029889e-06, |
|
"loss": 0.6642, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1701507049100632, |
|
"grad_norm": 2.1609913986701184, |
|
"learning_rate": 9.851250310771552e-06, |
|
"loss": 0.6563, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1750121536217793, |
|
"grad_norm": 2.0544241967243533, |
|
"learning_rate": 9.829995201072217e-06, |
|
"loss": 0.6421, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1798736023334954, |
|
"grad_norm": 2.1208283810533115, |
|
"learning_rate": 9.807347279208233e-06, |
|
"loss": 0.6427, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.18473505104521148, |
|
"grad_norm": 2.3054596970818593, |
|
"learning_rate": 9.783313076097285e-06, |
|
"loss": 0.6441, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18959649975692758, |
|
"grad_norm": 2.002111307715066, |
|
"learning_rate": 9.75789952241509e-06, |
|
"loss": 0.6211, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.19445794846864364, |
|
"grad_norm": 2.0437914538851896, |
|
"learning_rate": 9.73111394659682e-06, |
|
"loss": 0.6499, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19931939718035974, |
|
"grad_norm": 2.0833682723245106, |
|
"learning_rate": 9.702964072723825e-06, |
|
"loss": 0.6286, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.20418084589207583, |
|
"grad_norm": 1.8608505397920612, |
|
"learning_rate": 9.673458018296249e-06, |
|
"loss": 0.6214, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20904229460379192, |
|
"grad_norm": 1.9565897157549554, |
|
"learning_rate": 9.642604291892227e-06, |
|
"loss": 0.6313, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.21390374331550802, |
|
"grad_norm": 2.0254219704696546, |
|
"learning_rate": 9.610411790714274e-06, |
|
"loss": 0.6294, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2187651920272241, |
|
"grad_norm": 2.042861634862239, |
|
"learning_rate": 9.576889798023632e-06, |
|
"loss": 0.6062, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2236266407389402, |
|
"grad_norm": 1.9416084530570248, |
|
"learning_rate": 9.54204798046328e-06, |
|
"loss": 0.6163, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2284880894506563, |
|
"grad_norm": 2.1101068845725446, |
|
"learning_rate": 9.505896385270397e-06, |
|
"loss": 0.6086, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.2333495381623724, |
|
"grad_norm": 1.9316514600214862, |
|
"learning_rate": 9.468445437379054e-06, |
|
"loss": 0.6163, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23821098687408848, |
|
"grad_norm": 2.31121577708352, |
|
"learning_rate": 9.42970593641402e-06, |
|
"loss": 0.6083, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.24307243558580457, |
|
"grad_norm": 2.084199435888248, |
|
"learning_rate": 9.389689053576497e-06, |
|
"loss": 0.5875, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.24793388429752067, |
|
"grad_norm": 2.0679175188925742, |
|
"learning_rate": 9.348406328422714e-06, |
|
"loss": 0.5889, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.25279533300923673, |
|
"grad_norm": 1.935193185903998, |
|
"learning_rate": 9.305869665536296e-06, |
|
"loss": 0.5978, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.25765678172095285, |
|
"grad_norm": 1.9881581060186517, |
|
"learning_rate": 9.262091331095375e-06, |
|
"loss": 0.5963, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.2625182304326689, |
|
"grad_norm": 1.9171404527587066, |
|
"learning_rate": 9.217083949335429e-06, |
|
"loss": 0.5826, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.26737967914438504, |
|
"grad_norm": 2.066905479586149, |
|
"learning_rate": 9.170860498908851e-06, |
|
"loss": 0.5868, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.2722411278561011, |
|
"grad_norm": 1.9405147945598347, |
|
"learning_rate": 9.12343430914236e-06, |
|
"loss": 0.5938, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2771025765678172, |
|
"grad_norm": 2.042477110270263, |
|
"learning_rate": 9.07481905619323e-06, |
|
"loss": 0.5879, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2819640252795333, |
|
"grad_norm": 2.316892218301555, |
|
"learning_rate": 9.025028759105558e-06, |
|
"loss": 0.5862, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2868254739912494, |
|
"grad_norm": 1.9826669776669494, |
|
"learning_rate": 8.974077775767618e-06, |
|
"loss": 0.5795, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2916869227029655, |
|
"grad_norm": 1.9928663954865473, |
|
"learning_rate": 8.921980798771521e-06, |
|
"loss": 0.5711, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2965483714146816, |
|
"grad_norm": 1.9834504354777311, |
|
"learning_rate": 8.868752851176357e-06, |
|
"loss": 0.5797, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.30140982012639767, |
|
"grad_norm": 1.8922576118910497, |
|
"learning_rate": 8.814409282176029e-06, |
|
"loss": 0.5816, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.30627126883811373, |
|
"grad_norm": 2.054072819946173, |
|
"learning_rate": 8.758965762673065e-06, |
|
"loss": 0.5671, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.31113271754982985, |
|
"grad_norm": 1.9718238348510062, |
|
"learning_rate": 8.70243828075962e-06, |
|
"loss": 0.5582, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3159941662615459, |
|
"grad_norm": 1.8711561533553072, |
|
"learning_rate": 8.644843137107058e-06, |
|
"loss": 0.5616, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.32085561497326204, |
|
"grad_norm": 2.0374989076214676, |
|
"learning_rate": 8.58619694026536e-06, |
|
"loss": 0.5593, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3257170636849781, |
|
"grad_norm": 2.505113100765879, |
|
"learning_rate": 8.526516601873764e-06, |
|
"loss": 0.5492, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3305785123966942, |
|
"grad_norm": 2.210634577584573, |
|
"learning_rate": 8.46581933178401e-06, |
|
"loss": 0.5564, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3354399611084103, |
|
"grad_norm": 1.9498504768203495, |
|
"learning_rate": 8.404122633097573e-06, |
|
"loss": 0.5446, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.3403014098201264, |
|
"grad_norm": 2.1908777181373567, |
|
"learning_rate": 8.341444297118353e-06, |
|
"loss": 0.5435, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3451628585318425, |
|
"grad_norm": 1.9450484392786167, |
|
"learning_rate": 8.27780239822224e-06, |
|
"loss": 0.5566, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.3500243072435586, |
|
"grad_norm": 1.9837258801331765, |
|
"learning_rate": 8.213215288645058e-06, |
|
"loss": 0.5267, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.35488575595527466, |
|
"grad_norm": 1.9536552831602378, |
|
"learning_rate": 8.147701593190384e-06, |
|
"loss": 0.5387, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.3597472046669908, |
|
"grad_norm": 1.9999100970286463, |
|
"learning_rate": 8.081280203858767e-06, |
|
"loss": 0.5331, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.36460865337870685, |
|
"grad_norm": 1.9212833886247676, |
|
"learning_rate": 8.01397027439989e-06, |
|
"loss": 0.5349, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.36947010209042297, |
|
"grad_norm": 1.9162389845327823, |
|
"learning_rate": 7.945791214789261e-06, |
|
"loss": 0.5221, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.37433155080213903, |
|
"grad_norm": 1.909711155316506, |
|
"learning_rate": 7.876762685631005e-06, |
|
"loss": 0.5235, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.37919299951385516, |
|
"grad_norm": 2.357077558485306, |
|
"learning_rate": 7.806904592488409e-06, |
|
"loss": 0.5222, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3840544482255712, |
|
"grad_norm": 1.9488437106922831, |
|
"learning_rate": 7.736237080143788e-06, |
|
"loss": 0.5198, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3889158969372873, |
|
"grad_norm": 1.9458627487121898, |
|
"learning_rate": 7.664780526789409e-06, |
|
"loss": 0.5145, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3937773456490034, |
|
"grad_norm": 1.944072611413161, |
|
"learning_rate": 7.592555538151073e-06, |
|
"loss": 0.5136, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3986387943607195, |
|
"grad_norm": 1.965724726537108, |
|
"learning_rate": 7.519582941546117e-06, |
|
"loss": 0.5235, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4035002430724356, |
|
"grad_norm": 2.100470738223745, |
|
"learning_rate": 7.445883779877483e-06, |
|
"loss": 0.5094, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.40836169178415166, |
|
"grad_norm": 1.9100636260210968, |
|
"learning_rate": 7.371479305565644e-06, |
|
"loss": 0.5103, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4132231404958678, |
|
"grad_norm": 1.9333255998991457, |
|
"learning_rate": 7.296390974420102e-06, |
|
"loss": 0.5054, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.41808458920758385, |
|
"grad_norm": 2.080473533066085, |
|
"learning_rate": 7.220640439452236e-06, |
|
"loss": 0.4959, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.42294603791929997, |
|
"grad_norm": 1.981039750107447, |
|
"learning_rate": 7.144249544631279e-06, |
|
"loss": 0.4991, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.42780748663101603, |
|
"grad_norm": 1.9695849449401175, |
|
"learning_rate": 7.067240318585242e-06, |
|
"loss": 0.499, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.43266893534273215, |
|
"grad_norm": 1.9718354318180367, |
|
"learning_rate": 6.989634968248578e-06, |
|
"loss": 0.4999, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.4375303840544482, |
|
"grad_norm": 1.849425307215673, |
|
"learning_rate": 6.911455872458423e-06, |
|
"loss": 0.5024, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.44239183276616434, |
|
"grad_norm": 1.968118907489162, |
|
"learning_rate": 6.832725575501287e-06, |
|
"loss": 0.4928, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4472532814778804, |
|
"grad_norm": 2.052190888318457, |
|
"learning_rate": 6.753466780612008e-06, |
|
"loss": 0.4883, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4521147301895965, |
|
"grad_norm": 2.023182275453982, |
|
"learning_rate": 6.673702343426894e-06, |
|
"loss": 0.4917, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4569761789013126, |
|
"grad_norm": 1.9163489656538493, |
|
"learning_rate": 6.593455265392901e-06, |
|
"loss": 0.4805, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.46183762761302866, |
|
"grad_norm": 2.01148328208047, |
|
"learning_rate": 6.512748687134771e-06, |
|
"loss": 0.4848, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.4666990763247448, |
|
"grad_norm": 1.9062562183206269, |
|
"learning_rate": 6.431605881782043e-06, |
|
"loss": 0.4846, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.47156052503646084, |
|
"grad_norm": 2.1740484017488173, |
|
"learning_rate": 6.3500502482578296e-06, |
|
"loss": 0.4697, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.47642197374817696, |
|
"grad_norm": 1.9616349762922658, |
|
"learning_rate": 6.268105304531353e-06, |
|
"loss": 0.4781, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.48128342245989303, |
|
"grad_norm": 2.0065445987317467, |
|
"learning_rate": 6.185794680836124e-06, |
|
"loss": 0.4682, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.48614487117160915, |
|
"grad_norm": 2.1759803858020716, |
|
"learning_rate": 6.103142112855758e-06, |
|
"loss": 0.4669, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4910063198833252, |
|
"grad_norm": 1.928661549886414, |
|
"learning_rate": 6.020171434879385e-06, |
|
"loss": 0.4635, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.49586776859504134, |
|
"grad_norm": 2.0042980585449324, |
|
"learning_rate": 5.936906572928625e-06, |
|
"loss": 0.4657, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5007292173067575, |
|
"grad_norm": 1.9953252819560416, |
|
"learning_rate": 5.8533715378581e-06, |
|
"loss": 0.4642, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.5055906660184735, |
|
"grad_norm": 2.055778442556058, |
|
"learning_rate": 5.769590418431502e-06, |
|
"loss": 0.4586, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5104521147301896, |
|
"grad_norm": 1.9430537005076507, |
|
"learning_rate": 5.685587374375176e-06, |
|
"loss": 0.4513, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5153135634419057, |
|
"grad_norm": 1.850811801991838, |
|
"learning_rate": 5.601386629411247e-06, |
|
"loss": 0.4545, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5201750121536218, |
|
"grad_norm": 1.8848477276717563, |
|
"learning_rate": 5.5170124642723035e-06, |
|
"loss": 0.4459, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5250364608653378, |
|
"grad_norm": 2.0655655892354985, |
|
"learning_rate": 5.432489209699614e-06, |
|
"loss": 0.4485, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.529897909577054, |
|
"grad_norm": 1.8863958006941897, |
|
"learning_rate": 5.347841239426956e-06, |
|
"loss": 0.4353, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5347593582887701, |
|
"grad_norm": 1.9547833187632682, |
|
"learning_rate": 5.263092963152021e-06, |
|
"loss": 0.4469, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5396208070004861, |
|
"grad_norm": 1.9174269856279669, |
|
"learning_rate": 5.178268819497459e-06, |
|
"loss": 0.45, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5444822557122022, |
|
"grad_norm": 2.0224149703614436, |
|
"learning_rate": 5.0933932689635855e-06, |
|
"loss": 0.4389, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5493437044239183, |
|
"grad_norm": 1.9813759308607048, |
|
"learning_rate": 5.008490786874775e-06, |
|
"loss": 0.4344, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5542051531356345, |
|
"grad_norm": 1.8864523302122398, |
|
"learning_rate": 4.923585856321577e-06, |
|
"loss": 0.4385, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5590666018473505, |
|
"grad_norm": 1.915948818115916, |
|
"learning_rate": 4.8387029611005945e-06, |
|
"loss": 0.4402, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5639280505590666, |
|
"grad_norm": 1.8637742571775495, |
|
"learning_rate": 4.753866578654171e-06, |
|
"loss": 0.4315, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5687894992707827, |
|
"grad_norm": 1.9492458299998499, |
|
"learning_rate": 4.669101173011885e-06, |
|
"loss": 0.4262, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5736509479824988, |
|
"grad_norm": 1.919366061689666, |
|
"learning_rate": 4.584431187735939e-06, |
|
"loss": 0.4329, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5785123966942148, |
|
"grad_norm": 1.9206859765403341, |
|
"learning_rate": 4.499881038872424e-06, |
|
"loss": 0.4333, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.583373845405931, |
|
"grad_norm": 2.0207639364785663, |
|
"learning_rate": 4.415475107910553e-06, |
|
"loss": 0.4247, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 1.9003063950335572, |
|
"learning_rate": 4.331237734751813e-06, |
|
"loss": 0.419, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5930967428293632, |
|
"grad_norm": 1.9589720672960007, |
|
"learning_rate": 4.247193210691164e-06, |
|
"loss": 0.4135, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5979581915410792, |
|
"grad_norm": 1.8725922589668766, |
|
"learning_rate": 4.1633657714122e-06, |
|
"loss": 0.4203, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6028196402527953, |
|
"grad_norm": 1.8638498804130523, |
|
"learning_rate": 4.0797795899983984e-06, |
|
"loss": 0.4199, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6076810889645115, |
|
"grad_norm": 1.896661777937066, |
|
"learning_rate": 3.9964587699623705e-06, |
|
"loss": 0.4083, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6125425376762275, |
|
"grad_norm": 1.850040585103086, |
|
"learning_rate": 3.913427338295222e-06, |
|
"loss": 0.4132, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6174039863879436, |
|
"grad_norm": 1.8523931506545597, |
|
"learning_rate": 3.830709238537938e-06, |
|
"loss": 0.4136, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6222654350996597, |
|
"grad_norm": 1.8618206136881559, |
|
"learning_rate": 3.7483283238768685e-06, |
|
"loss": 0.4089, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6271268838113758, |
|
"grad_norm": 1.975112166712253, |
|
"learning_rate": 3.6663083502652335e-06, |
|
"loss": 0.4063, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6319883325230918, |
|
"grad_norm": 1.8034124348015945, |
|
"learning_rate": 3.5846729695727055e-06, |
|
"loss": 0.4022, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.636849781234808, |
|
"grad_norm": 1.8638316469191787, |
|
"learning_rate": 3.503445722764967e-06, |
|
"loss": 0.4087, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6417112299465241, |
|
"grad_norm": 1.843833767339686, |
|
"learning_rate": 3.4226500331152843e-06, |
|
"loss": 0.4009, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6465726786582402, |
|
"grad_norm": 1.9595878869963705, |
|
"learning_rate": 3.342309199449991e-06, |
|
"loss": 0.4068, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.6514341273699562, |
|
"grad_norm": 1.8378464923421844, |
|
"learning_rate": 3.262446389429883e-06, |
|
"loss": 0.3986, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6562955760816723, |
|
"grad_norm": 1.8906212458541776, |
|
"learning_rate": 3.183084632869411e-06, |
|
"loss": 0.3993, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6611570247933884, |
|
"grad_norm": 1.8741986531721049, |
|
"learning_rate": 3.104246815095653e-06, |
|
"loss": 0.3957, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6660184735051046, |
|
"grad_norm": 1.9025410789293065, |
|
"learning_rate": 3.0259556703489245e-06, |
|
"loss": 0.3961, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6708799222168206, |
|
"grad_norm": 1.8317452262969969, |
|
"learning_rate": 2.948233775226975e-06, |
|
"loss": 0.3936, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6757413709285367, |
|
"grad_norm": 1.791486581405094, |
|
"learning_rate": 2.871103542174637e-06, |
|
"loss": 0.3954, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6806028196402528, |
|
"grad_norm": 1.8421048675047038, |
|
"learning_rate": 2.794587213020813e-06, |
|
"loss": 0.392, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6854642683519689, |
|
"grad_norm": 1.8053899992220983, |
|
"learning_rate": 2.7187068525646578e-06, |
|
"loss": 0.3884, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.690325717063685, |
|
"grad_norm": 1.8164816090358933, |
|
"learning_rate": 2.6434843422128225e-06, |
|
"loss": 0.3833, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6951871657754011, |
|
"grad_norm": 1.8692336513214356, |
|
"learning_rate": 2.5689413736695623e-06, |
|
"loss": 0.3925, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.7000486144871172, |
|
"grad_norm": 1.8131226789865298, |
|
"learning_rate": 2.495099442681574e-06, |
|
"loss": 0.3772, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7049100631988332, |
|
"grad_norm": 1.7456402646995377, |
|
"learning_rate": 2.4219798428393167e-06, |
|
"loss": 0.3836, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7097715119105493, |
|
"grad_norm": 1.8417483163838206, |
|
"learning_rate": 2.3496036594366478e-06, |
|
"loss": 0.3767, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7146329606222654, |
|
"grad_norm": 1.8419144501834137, |
|
"learning_rate": 2.2779917633905075e-06, |
|
"loss": 0.3798, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7194944093339816, |
|
"grad_norm": 1.697011917829243, |
|
"learning_rate": 2.207164805222441e-06, |
|
"loss": 0.3731, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7243558580456976, |
|
"grad_norm": 1.7913218626855516, |
|
"learning_rate": 2.1371432091036525e-06, |
|
"loss": 0.3695, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7292173067574137, |
|
"grad_norm": 1.805255097420597, |
|
"learning_rate": 2.0679471669653596e-06, |
|
"loss": 0.3758, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7340787554691298, |
|
"grad_norm": 1.67858386866423, |
|
"learning_rate": 1.999596632676087e-06, |
|
"loss": 0.3723, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7389402041808459, |
|
"grad_norm": 1.6993602534801597, |
|
"learning_rate": 1.93211131628764e-06, |
|
"loss": 0.3684, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.743801652892562, |
|
"grad_norm": 1.6768650553126156, |
|
"learning_rate": 1.865510678351361e-06, |
|
"loss": 0.3664, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7486631016042781, |
|
"grad_norm": 1.8063986711573161, |
|
"learning_rate": 1.7998139243063523e-06, |
|
"loss": 0.3664, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7535245503159942, |
|
"grad_norm": 1.8433399163627073, |
|
"learning_rate": 1.7350399989412503e-06, |
|
"loss": 0.3792, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.7583859990277103, |
|
"grad_norm": 1.789706761318329, |
|
"learning_rate": 1.6712075809311801e-06, |
|
"loss": 0.3665, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7632474477394263, |
|
"grad_norm": 1.874352972700248, |
|
"learning_rate": 1.6083350774514256e-06, |
|
"loss": 0.3588, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7681088964511424, |
|
"grad_norm": 1.8708699807411355, |
|
"learning_rate": 1.5464406188694176e-06, |
|
"loss": 0.3596, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7729703451628586, |
|
"grad_norm": 1.7622927599849374, |
|
"learning_rate": 1.4855420535165177e-06, |
|
"loss": 0.3706, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.7778317938745746, |
|
"grad_norm": 1.8033628264638553, |
|
"learning_rate": 1.4256569425411565e-06, |
|
"loss": 0.3627, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7826932425862907, |
|
"grad_norm": 1.6982941151170463, |
|
"learning_rate": 1.3668025548447645e-06, |
|
"loss": 0.3654, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7875546912980068, |
|
"grad_norm": 1.822085860012342, |
|
"learning_rate": 1.3089958621019966e-06, |
|
"loss": 0.3566, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7924161400097229, |
|
"grad_norm": 1.6803332236549615, |
|
"learning_rate": 1.2522535338666487e-06, |
|
"loss": 0.3539, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.797277588721439, |
|
"grad_norm": 1.8018723375209953, |
|
"learning_rate": 1.1965919327647152e-06, |
|
"loss": 0.3624, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8021390374331551, |
|
"grad_norm": 1.681272890467866, |
|
"learning_rate": 1.1420271097759339e-06, |
|
"loss": 0.3527, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8070004861448712, |
|
"grad_norm": 1.7619365658437645, |
|
"learning_rate": 1.0885747996052203e-06, |
|
"loss": 0.353, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8118619348565873, |
|
"grad_norm": 1.7697650095429134, |
|
"learning_rate": 1.0362504161452857e-06, |
|
"loss": 0.3571, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.8167233835683033, |
|
"grad_norm": 1.7834693090793727, |
|
"learning_rate": 9.850690480317837e-07, |
|
"loss": 0.3588, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8215848322800194, |
|
"grad_norm": 1.6879014168829525, |
|
"learning_rate": 9.350454542922366e-07, |
|
"loss": 0.3485, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8264462809917356, |
|
"grad_norm": 1.6631076557983835, |
|
"learning_rate": 8.861940600900215e-07, |
|
"loss": 0.3569, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8313077297034517, |
|
"grad_norm": 1.7470295248017005, |
|
"learning_rate": 8.385289525646211e-07, |
|
"loss": 0.3545, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8361691784151677, |
|
"grad_norm": 1.6694583459061383, |
|
"learning_rate": 7.920638767693606e-07, |
|
"loss": 0.3459, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8410306271268838, |
|
"grad_norm": 1.812544085868088, |
|
"learning_rate": 7.468122317077786e-07, |
|
"loss": 0.3497, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.8458920758385999, |
|
"grad_norm": 1.804488776919829, |
|
"learning_rate": 7.027870664698011e-07, |
|
"loss": 0.3447, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8507535245503159, |
|
"grad_norm": 1.6565427674580868, |
|
"learning_rate": 6.600010764688042e-07, |
|
"loss": 0.3488, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8556149732620321, |
|
"grad_norm": 1.5660595611029826, |
|
"learning_rate": 6.184665997806832e-07, |
|
"loss": 0.3449, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8604764219737482, |
|
"grad_norm": 1.628170781974643, |
|
"learning_rate": 5.781956135859446e-07, |
|
"loss": 0.3554, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8653378706854643, |
|
"grad_norm": 1.6261129189211507, |
|
"learning_rate": 5.39199730715892e-07, |
|
"loss": 0.345, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8701993193971803, |
|
"grad_norm": 1.6307410900682924, |
|
"learning_rate": 5.01490196303856e-07, |
|
"loss": 0.3456, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8750607681088964, |
|
"grad_norm": 1.6901295083588348, |
|
"learning_rate": 4.650778845424758e-07, |
|
"loss": 0.3476, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8799222168206126, |
|
"grad_norm": 1.730644298411666, |
|
"learning_rate": 4.2997329554792965e-07, |
|
"loss": 0.3532, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8847836655323287, |
|
"grad_norm": 1.5909360839199094, |
|
"learning_rate": 3.961865523320557e-07, |
|
"loss": 0.3406, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8896451142440447, |
|
"grad_norm": 1.7660527301033595, |
|
"learning_rate": 3.637273978831984e-07, |
|
"loss": 0.3434, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8945065629557608, |
|
"grad_norm": 1.5431742144466063, |
|
"learning_rate": 3.326051923566559e-07, |
|
"loss": 0.3389, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8993680116674769, |
|
"grad_norm": 1.5777885568012344, |
|
"learning_rate": 3.028289103755172e-07, |
|
"loss": 0.3438, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.904229460379193, |
|
"grad_norm": 1.6736588557089753, |
|
"learning_rate": 2.744071384426733e-07, |
|
"loss": 0.3456, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 1.584446915524184, |
|
"learning_rate": 2.473480724647548e-07, |
|
"loss": 0.3519, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.9139523578026252, |
|
"grad_norm": 1.544706705307836, |
|
"learning_rate": 2.216595153886969e-07, |
|
"loss": 0.3504, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9188138065143413, |
|
"grad_norm": 1.6069910572578268, |
|
"learning_rate": 1.9734887495163114e-07, |
|
"loss": 0.3396, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9236752552260573, |
|
"grad_norm": 1.5262336921306658, |
|
"learning_rate": 1.7442316154473004e-07, |
|
"loss": 0.3391, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9285367039377734, |
|
"grad_norm": 1.6151419038210972, |
|
"learning_rate": 1.528889861916477e-07, |
|
"loss": 0.3455, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.9333981526494896, |
|
"grad_norm": 1.7463253293156233, |
|
"learning_rate": 1.3275255864211245e-07, |
|
"loss": 0.342, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9382596013612057, |
|
"grad_norm": 2.15764657474219, |
|
"learning_rate": 1.1401968558123977e-07, |
|
"loss": 0.3432, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.9431210500729217, |
|
"grad_norm": 1.5836218621495903, |
|
"learning_rate": 9.669576895507515e-08, |
|
"loss": 0.3434, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9479824987846378, |
|
"grad_norm": 1.6606392678607504, |
|
"learning_rate": 8.078580441285067e-08, |
|
"loss": 0.3488, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9528439474963539, |
|
"grad_norm": 1.634410010175144, |
|
"learning_rate": 6.629437986640397e-08, |
|
"loss": 0.348, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.95770539620807, |
|
"grad_norm": 1.6947942875406026, |
|
"learning_rate": 5.322567416717106e-08, |
|
"loss": 0.3356, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.9625668449197861, |
|
"grad_norm": 1.5244193912137922, |
|
"learning_rate": 4.158345590114965e-08, |
|
"loss": 0.3461, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9674282936315022, |
|
"grad_norm": 1.6425083834973517, |
|
"learning_rate": 3.137108230215513e-08, |
|
"loss": 0.3489, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.9722897423432183, |
|
"grad_norm": 1.6739305879871746, |
|
"learning_rate": 2.259149828370999e-08, |
|
"loss": 0.3466, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9771511910549344, |
|
"grad_norm": 1.6190798474108024, |
|
"learning_rate": 1.5247235589824772e-08, |
|
"loss": 0.3391, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9820126397666504, |
|
"grad_norm": 1.72920925073275, |
|
"learning_rate": 9.340412064927084e-09, |
|
"loss": 0.3438, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9868740884783666, |
|
"grad_norm": 1.6092917702604965, |
|
"learning_rate": 4.872731043143453e-09, |
|
"loss": 0.3454, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9917355371900827, |
|
"grad_norm": 1.5519329352931979, |
|
"learning_rate": 1.845480857116111e-09, |
|
"loss": 0.3391, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9965969859017987, |
|
"grad_norm": 1.6136231766922748, |
|
"learning_rate": 2.595344664868549e-10, |
|
"loss": 0.3413, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.9995138551288284, |
|
"eval_loss": 0.33695414662361145, |
|
"eval_runtime": 96.5912, |
|
"eval_samples_per_second": 3.127, |
|
"eval_steps_per_second": 0.787, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.9995138551288284, |
|
"step": 1028, |
|
"total_flos": 215189941125120.0, |
|
"train_loss": 0.5025305894098393, |
|
"train_runtime": 23224.052, |
|
"train_samples_per_second": 1.417, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1028, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 215189941125120.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|