|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6000474721101353, |
|
"eval_steps": 500, |
|
"global_step": 632, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009494422027059103, |
|
"grad_norm": 5.364119052886963, |
|
"learning_rate": 6.329113924050633e-07, |
|
"loss": 2.9616, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0018988844054118206, |
|
"grad_norm": 5.426063060760498, |
|
"learning_rate": 1.2658227848101265e-06, |
|
"loss": 2.9664, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0028483266081177306, |
|
"grad_norm": 5.507386684417725, |
|
"learning_rate": 1.8987341772151901e-06, |
|
"loss": 2.96, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0037977688108236413, |
|
"grad_norm": 5.467552185058594, |
|
"learning_rate": 2.531645569620253e-06, |
|
"loss": 2.975, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004747211013529551, |
|
"grad_norm": 5.386384963989258, |
|
"learning_rate": 3.1645569620253167e-06, |
|
"loss": 2.9473, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005696653216235461, |
|
"grad_norm": 4.9166951179504395, |
|
"learning_rate": 3.7974683544303802e-06, |
|
"loss": 2.8597, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006646095418941372, |
|
"grad_norm": 5.469020843505859, |
|
"learning_rate": 4.430379746835443e-06, |
|
"loss": 2.9633, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0075955376216472826, |
|
"grad_norm": 5.380453586578369, |
|
"learning_rate": 5.063291139240506e-06, |
|
"loss": 2.8931, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008544979824353193, |
|
"grad_norm": 4.922253131866455, |
|
"learning_rate": 5.69620253164557e-06, |
|
"loss": 2.7452, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.009494422027059102, |
|
"grad_norm": 5.517508029937744, |
|
"learning_rate": 6.329113924050633e-06, |
|
"loss": 2.8326, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010443864229765013, |
|
"grad_norm": 5.810976982116699, |
|
"learning_rate": 6.9620253164556965e-06, |
|
"loss": 2.7854, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.011393306432470923, |
|
"grad_norm": 5.690661430358887, |
|
"learning_rate": 7.5949367088607605e-06, |
|
"loss": 2.7069, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012342748635176834, |
|
"grad_norm": 5.994122505187988, |
|
"learning_rate": 8.227848101265822e-06, |
|
"loss": 2.5705, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.013292190837882745, |
|
"grad_norm": 5.86803674697876, |
|
"learning_rate": 8.860759493670886e-06, |
|
"loss": 2.4461, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.014241633040588654, |
|
"grad_norm": 5.448781490325928, |
|
"learning_rate": 9.49367088607595e-06, |
|
"loss": 2.2408, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.015191075243294565, |
|
"grad_norm": 6.4004902839660645, |
|
"learning_rate": 1.0126582278481012e-05, |
|
"loss": 2.1205, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.016140517446000476, |
|
"grad_norm": 6.970590591430664, |
|
"learning_rate": 1.0759493670886076e-05, |
|
"loss": 1.9474, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.017089959648706386, |
|
"grad_norm": 7.423785209655762, |
|
"learning_rate": 1.139240506329114e-05, |
|
"loss": 1.7348, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.018039401851412295, |
|
"grad_norm": 7.429481029510498, |
|
"learning_rate": 1.2025316455696203e-05, |
|
"loss": 1.4835, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.018988844054118204, |
|
"grad_norm": 6.7193284034729, |
|
"learning_rate": 1.2658227848101267e-05, |
|
"loss": 1.2373, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.019938286256824117, |
|
"grad_norm": 4.46099853515625, |
|
"learning_rate": 1.3291139240506329e-05, |
|
"loss": 1.1095, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.020887728459530026, |
|
"grad_norm": 3.001573085784912, |
|
"learning_rate": 1.3924050632911393e-05, |
|
"loss": 0.8642, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.021837170662235936, |
|
"grad_norm": 2.197000026702881, |
|
"learning_rate": 1.4556962025316457e-05, |
|
"loss": 0.7734, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.022786612864941845, |
|
"grad_norm": 1.8113943338394165, |
|
"learning_rate": 1.5189873417721521e-05, |
|
"loss": 0.7341, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.023736055067647758, |
|
"grad_norm": 1.7461305856704712, |
|
"learning_rate": 1.5822784810126583e-05, |
|
"loss": 0.6743, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.024685497270353667, |
|
"grad_norm": 1.3315849304199219, |
|
"learning_rate": 1.6455696202531644e-05, |
|
"loss": 0.5975, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.025634939473059577, |
|
"grad_norm": 0.726314127445221, |
|
"learning_rate": 1.7088607594936708e-05, |
|
"loss": 0.5659, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02658438167576549, |
|
"grad_norm": 0.6269010901451111, |
|
"learning_rate": 1.7721518987341772e-05, |
|
"loss": 0.5226, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0275338238784714, |
|
"grad_norm": 0.5819966197013855, |
|
"learning_rate": 1.8354430379746836e-05, |
|
"loss": 0.6132, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.028483266081177308, |
|
"grad_norm": 0.6247850060462952, |
|
"learning_rate": 1.89873417721519e-05, |
|
"loss": 0.5, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.029432708283883217, |
|
"grad_norm": 0.702621579170227, |
|
"learning_rate": 1.962025316455696e-05, |
|
"loss": 0.4958, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03038215048658913, |
|
"grad_norm": 0.6045309901237488, |
|
"learning_rate": 2.0253164556962025e-05, |
|
"loss": 0.4405, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.031331592689295036, |
|
"grad_norm": 0.5436626076698303, |
|
"learning_rate": 2.088607594936709e-05, |
|
"loss": 0.5607, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03228103489200095, |
|
"grad_norm": 0.43146297335624695, |
|
"learning_rate": 2.1518987341772153e-05, |
|
"loss": 0.3987, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03323047709470686, |
|
"grad_norm": 0.5124548673629761, |
|
"learning_rate": 2.2151898734177217e-05, |
|
"loss": 0.5084, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03417991929741277, |
|
"grad_norm": 0.4466649293899536, |
|
"learning_rate": 2.278481012658228e-05, |
|
"loss": 0.3761, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03512936150011868, |
|
"grad_norm": 0.41221529245376587, |
|
"learning_rate": 2.341772151898734e-05, |
|
"loss": 0.3859, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03607880370282459, |
|
"grad_norm": 0.3802257180213928, |
|
"learning_rate": 2.4050632911392405e-05, |
|
"loss": 0.3447, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0370282459055305, |
|
"grad_norm": 0.47727710008621216, |
|
"learning_rate": 2.468354430379747e-05, |
|
"loss": 0.3914, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03797768810823641, |
|
"grad_norm": 0.41048529744148254, |
|
"learning_rate": 2.5316455696202533e-05, |
|
"loss": 0.2988, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.038927130310942325, |
|
"grad_norm": 0.5019667744636536, |
|
"learning_rate": 2.5949367088607597e-05, |
|
"loss": 0.2938, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.039876572513648234, |
|
"grad_norm": 0.42121732234954834, |
|
"learning_rate": 2.6582278481012658e-05, |
|
"loss": 0.2579, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04082601471635414, |
|
"grad_norm": 0.4193897247314453, |
|
"learning_rate": 2.7215189873417722e-05, |
|
"loss": 0.3262, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04177545691906005, |
|
"grad_norm": 0.2978931665420532, |
|
"learning_rate": 2.7848101265822786e-05, |
|
"loss": 0.2365, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04272489912176596, |
|
"grad_norm": 0.34771448373794556, |
|
"learning_rate": 2.848101265822785e-05, |
|
"loss": 0.2364, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04367434132447187, |
|
"grad_norm": 0.3881576955318451, |
|
"learning_rate": 2.9113924050632914e-05, |
|
"loss": 0.286, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04462378352717778, |
|
"grad_norm": 0.33863797783851624, |
|
"learning_rate": 2.9746835443037974e-05, |
|
"loss": 0.2739, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04557322572988369, |
|
"grad_norm": 0.2894616723060608, |
|
"learning_rate": 3.0379746835443042e-05, |
|
"loss": 0.2587, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.046522667932589606, |
|
"grad_norm": 0.22292694449424744, |
|
"learning_rate": 3.10126582278481e-05, |
|
"loss": 0.1861, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.047472110135295516, |
|
"grad_norm": 0.21907460689544678, |
|
"learning_rate": 3.1645569620253167e-05, |
|
"loss": 0.1755, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.048421552338001425, |
|
"grad_norm": 0.29593944549560547, |
|
"learning_rate": 3.227848101265823e-05, |
|
"loss": 0.1856, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.049370994540707334, |
|
"grad_norm": 0.23055657744407654, |
|
"learning_rate": 3.291139240506329e-05, |
|
"loss": 0.2102, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.050320436743413244, |
|
"grad_norm": 0.18929323554039001, |
|
"learning_rate": 3.354430379746836e-05, |
|
"loss": 0.1909, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05126987894611915, |
|
"grad_norm": 0.15004883706569672, |
|
"learning_rate": 3.4177215189873416e-05, |
|
"loss": 0.1619, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05221932114882506, |
|
"grad_norm": 0.15621644258499146, |
|
"learning_rate": 3.4810126582278487e-05, |
|
"loss": 0.1759, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05316876335153098, |
|
"grad_norm": 0.16266578435897827, |
|
"learning_rate": 3.5443037974683544e-05, |
|
"loss": 0.1657, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05411820555423689, |
|
"grad_norm": 0.14417718350887299, |
|
"learning_rate": 3.607594936708861e-05, |
|
"loss": 0.1698, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0550676477569428, |
|
"grad_norm": 0.21402889490127563, |
|
"learning_rate": 3.670886075949367e-05, |
|
"loss": 0.2185, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05601708995964871, |
|
"grad_norm": 0.1997889280319214, |
|
"learning_rate": 3.7341772151898736e-05, |
|
"loss": 0.2143, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.056966532162354616, |
|
"grad_norm": 0.13755086064338684, |
|
"learning_rate": 3.79746835443038e-05, |
|
"loss": 0.1677, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.057915974365060525, |
|
"grad_norm": 0.19304363429546356, |
|
"learning_rate": 3.8607594936708864e-05, |
|
"loss": 0.2113, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.058865416567766435, |
|
"grad_norm": 0.14066031575202942, |
|
"learning_rate": 3.924050632911392e-05, |
|
"loss": 0.1612, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.059814858770472344, |
|
"grad_norm": 0.13375213742256165, |
|
"learning_rate": 3.987341772151899e-05, |
|
"loss": 0.164, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06076430097317826, |
|
"grad_norm": 0.15216922760009766, |
|
"learning_rate": 4.050632911392405e-05, |
|
"loss": 0.16, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06171374317588417, |
|
"grad_norm": 0.16130389273166656, |
|
"learning_rate": 4.113924050632912e-05, |
|
"loss": 0.1957, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06266318537859007, |
|
"grad_norm": 0.1791229248046875, |
|
"learning_rate": 4.177215189873418e-05, |
|
"loss": 0.1993, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06361262758129599, |
|
"grad_norm": 0.11038907617330551, |
|
"learning_rate": 4.240506329113924e-05, |
|
"loss": 0.1517, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0645620697840019, |
|
"grad_norm": 0.13327902555465698, |
|
"learning_rate": 4.3037974683544305e-05, |
|
"loss": 0.1501, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06551151198670781, |
|
"grad_norm": 0.13731731474399567, |
|
"learning_rate": 4.367088607594937e-05, |
|
"loss": 0.1596, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06646095418941372, |
|
"grad_norm": 0.13924308121204376, |
|
"learning_rate": 4.430379746835443e-05, |
|
"loss": 0.152, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06741039639211963, |
|
"grad_norm": 0.1482289433479309, |
|
"learning_rate": 4.49367088607595e-05, |
|
"loss": 0.1536, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06835983859482554, |
|
"grad_norm": 0.10759364813566208, |
|
"learning_rate": 4.556962025316456e-05, |
|
"loss": 0.1543, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06930928079753144, |
|
"grad_norm": 0.12899678945541382, |
|
"learning_rate": 4.6202531645569625e-05, |
|
"loss": 0.165, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07025872300023736, |
|
"grad_norm": 0.11689919233322144, |
|
"learning_rate": 4.683544303797468e-05, |
|
"loss": 0.1564, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07120816520294328, |
|
"grad_norm": 0.12697139382362366, |
|
"learning_rate": 4.7468354430379746e-05, |
|
"loss": 0.162, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07215760740564918, |
|
"grad_norm": 0.12069376558065414, |
|
"learning_rate": 4.810126582278481e-05, |
|
"loss": 0.1467, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0731070496083551, |
|
"grad_norm": 0.10199815034866333, |
|
"learning_rate": 4.8734177215189874e-05, |
|
"loss": 0.1528, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.074056491811061, |
|
"grad_norm": 0.1142750009894371, |
|
"learning_rate": 4.936708860759494e-05, |
|
"loss": 0.1574, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07500593401376691, |
|
"grad_norm": 0.11019093543291092, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1512, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07595537621647282, |
|
"grad_norm": 0.09426973015069962, |
|
"learning_rate": 5.0632911392405066e-05, |
|
"loss": 0.1481, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07690481841917873, |
|
"grad_norm": 0.09757663309574127, |
|
"learning_rate": 5.1265822784810124e-05, |
|
"loss": 0.1484, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07785426062188465, |
|
"grad_norm": 0.10646392405033112, |
|
"learning_rate": 5.1898734177215194e-05, |
|
"loss": 0.1549, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07880370282459055, |
|
"grad_norm": 0.12109784036874771, |
|
"learning_rate": 5.253164556962026e-05, |
|
"loss": 0.1448, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07975314502729647, |
|
"grad_norm": 0.12039211392402649, |
|
"learning_rate": 5.3164556962025316e-05, |
|
"loss": 0.1538, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08070258723000237, |
|
"grad_norm": 0.16873961687088013, |
|
"learning_rate": 5.379746835443038e-05, |
|
"loss": 0.1971, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08165202943270829, |
|
"grad_norm": 0.12140022218227386, |
|
"learning_rate": 5.4430379746835444e-05, |
|
"loss": 0.1497, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08260147163541419, |
|
"grad_norm": 0.14637599885463715, |
|
"learning_rate": 5.5063291139240514e-05, |
|
"loss": 0.1958, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0835509138381201, |
|
"grad_norm": 0.1141396313905716, |
|
"learning_rate": 5.569620253164557e-05, |
|
"loss": 0.1457, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08450035604082601, |
|
"grad_norm": 0.2128390371799469, |
|
"learning_rate": 5.6329113924050636e-05, |
|
"loss": 0.2339, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08544979824353192, |
|
"grad_norm": 0.18838858604431152, |
|
"learning_rate": 5.69620253164557e-05, |
|
"loss": 0.2029, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08639924044623784, |
|
"grad_norm": 0.19592566788196564, |
|
"learning_rate": 5.759493670886076e-05, |
|
"loss": 0.2276, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08734868264894374, |
|
"grad_norm": 0.14753012359142303, |
|
"learning_rate": 5.822784810126583e-05, |
|
"loss": 0.1916, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08829812485164966, |
|
"grad_norm": 0.1494351178407669, |
|
"learning_rate": 5.886075949367089e-05, |
|
"loss": 0.1913, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08924756705435556, |
|
"grad_norm": 0.1173478439450264, |
|
"learning_rate": 5.949367088607595e-05, |
|
"loss": 0.1438, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.09019700925706148, |
|
"grad_norm": 0.12023188918828964, |
|
"learning_rate": 6.012658227848101e-05, |
|
"loss": 0.1516, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09114645145976738, |
|
"grad_norm": 0.1275833547115326, |
|
"learning_rate": 6.0759493670886084e-05, |
|
"loss": 0.1492, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0920958936624733, |
|
"grad_norm": 0.1360282599925995, |
|
"learning_rate": 6.139240506329115e-05, |
|
"loss": 0.1507, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09304533586517921, |
|
"grad_norm": 0.1586841195821762, |
|
"learning_rate": 6.20253164556962e-05, |
|
"loss": 0.1956, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09399477806788512, |
|
"grad_norm": 0.14281995594501495, |
|
"learning_rate": 6.265822784810128e-05, |
|
"loss": 0.1774, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09494422027059103, |
|
"grad_norm": 0.12553077936172485, |
|
"learning_rate": 6.329113924050633e-05, |
|
"loss": 0.148, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09589366247329693, |
|
"grad_norm": 0.1117570698261261, |
|
"learning_rate": 6.392405063291139e-05, |
|
"loss": 0.16, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09684310467600285, |
|
"grad_norm": 0.13955281674861908, |
|
"learning_rate": 6.455696202531646e-05, |
|
"loss": 0.1464, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09779254687870875, |
|
"grad_norm": 0.10990285873413086, |
|
"learning_rate": 6.518987341772153e-05, |
|
"loss": 0.147, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09874198908141467, |
|
"grad_norm": 0.10545991361141205, |
|
"learning_rate": 6.582278481012658e-05, |
|
"loss": 0.1436, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09969143128412059, |
|
"grad_norm": 0.1717437207698822, |
|
"learning_rate": 6.645569620253165e-05, |
|
"loss": 0.2278, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10064087348682649, |
|
"grad_norm": 0.10950994491577148, |
|
"learning_rate": 6.708860759493672e-05, |
|
"loss": 0.1493, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1015903156895324, |
|
"grad_norm": 0.11200258880853653, |
|
"learning_rate": 6.772151898734177e-05, |
|
"loss": 0.1536, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1025397578922383, |
|
"grad_norm": 0.10955105721950531, |
|
"learning_rate": 6.835443037974683e-05, |
|
"loss": 0.1483, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.10348920009494422, |
|
"grad_norm": 0.11920775473117828, |
|
"learning_rate": 6.89873417721519e-05, |
|
"loss": 0.1492, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.10443864229765012, |
|
"grad_norm": 0.1390092819929123, |
|
"learning_rate": 6.962025316455697e-05, |
|
"loss": 0.1849, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.10538808450035604, |
|
"grad_norm": 0.1363140493631363, |
|
"learning_rate": 7.025316455696203e-05, |
|
"loss": 0.1849, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.10633752670306196, |
|
"grad_norm": 0.09190025180578232, |
|
"learning_rate": 7.088607594936709e-05, |
|
"loss": 0.1587, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.10728696890576786, |
|
"grad_norm": 0.09020426124334335, |
|
"learning_rate": 7.151898734177216e-05, |
|
"loss": 0.1377, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.10823641110847378, |
|
"grad_norm": 0.10544883459806442, |
|
"learning_rate": 7.215189873417722e-05, |
|
"loss": 0.1516, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.10918585331117968, |
|
"grad_norm": 0.12401281297206879, |
|
"learning_rate": 7.278481012658229e-05, |
|
"loss": 0.154, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1101352955138856, |
|
"grad_norm": 0.1008707657456398, |
|
"learning_rate": 7.341772151898734e-05, |
|
"loss": 0.1448, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.1110847377165915, |
|
"grad_norm": 0.10302747040987015, |
|
"learning_rate": 7.40506329113924e-05, |
|
"loss": 0.1451, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.11203417991929741, |
|
"grad_norm": 0.12748293578624725, |
|
"learning_rate": 7.468354430379747e-05, |
|
"loss": 0.1829, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11298362212200333, |
|
"grad_norm": 0.10413361340761185, |
|
"learning_rate": 7.531645569620254e-05, |
|
"loss": 0.1371, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11393306432470923, |
|
"grad_norm": 0.1243433803319931, |
|
"learning_rate": 7.59493670886076e-05, |
|
"loss": 0.1409, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11488250652741515, |
|
"grad_norm": 0.11630933731794357, |
|
"learning_rate": 7.658227848101266e-05, |
|
"loss": 0.1372, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11583194873012105, |
|
"grad_norm": 0.17981529235839844, |
|
"learning_rate": 7.721518987341773e-05, |
|
"loss": 0.2257, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.11678139093282697, |
|
"grad_norm": 0.14063452184200287, |
|
"learning_rate": 7.78481012658228e-05, |
|
"loss": 0.1841, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.11773083313553287, |
|
"grad_norm": 0.1264188438653946, |
|
"learning_rate": 7.848101265822784e-05, |
|
"loss": 0.1471, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.11868027533823879, |
|
"grad_norm": 0.12827955186367035, |
|
"learning_rate": 7.911392405063291e-05, |
|
"loss": 0.1493, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11962971754094469, |
|
"grad_norm": 0.09800329059362411, |
|
"learning_rate": 7.974683544303798e-05, |
|
"loss": 0.1414, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1205791597436506, |
|
"grad_norm": 0.09902197122573853, |
|
"learning_rate": 8.037974683544304e-05, |
|
"loss": 0.1462, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.12152860194635652, |
|
"grad_norm": 0.09450504928827286, |
|
"learning_rate": 8.10126582278481e-05, |
|
"loss": 0.1484, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.12247804414906242, |
|
"grad_norm": 0.11012883484363556, |
|
"learning_rate": 8.164556962025317e-05, |
|
"loss": 0.1437, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12342748635176834, |
|
"grad_norm": 0.11717642843723297, |
|
"learning_rate": 8.227848101265824e-05, |
|
"loss": 0.1478, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12437692855447424, |
|
"grad_norm": 0.08754123747348785, |
|
"learning_rate": 8.29113924050633e-05, |
|
"loss": 0.1408, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.12532637075718014, |
|
"grad_norm": 0.10017862170934677, |
|
"learning_rate": 8.354430379746835e-05, |
|
"loss": 0.1476, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.12627581295988607, |
|
"grad_norm": 0.08994068205356598, |
|
"learning_rate": 8.417721518987342e-05, |
|
"loss": 0.1478, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.12722525516259198, |
|
"grad_norm": 0.09894968569278717, |
|
"learning_rate": 8.481012658227848e-05, |
|
"loss": 0.1309, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.12817469736529788, |
|
"grad_norm": 0.10028701275587082, |
|
"learning_rate": 8.544303797468355e-05, |
|
"loss": 0.1433, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1291241395680038, |
|
"grad_norm": 0.0897536426782608, |
|
"learning_rate": 8.607594936708861e-05, |
|
"loss": 0.1459, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1300735817707097, |
|
"grad_norm": 0.10435349494218826, |
|
"learning_rate": 8.670886075949367e-05, |
|
"loss": 0.1434, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.13102302397341561, |
|
"grad_norm": 0.11718117445707321, |
|
"learning_rate": 8.734177215189874e-05, |
|
"loss": 0.1509, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.13197246617612152, |
|
"grad_norm": 0.14426474273204803, |
|
"learning_rate": 8.797468354430381e-05, |
|
"loss": 0.1373, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.13292190837882745, |
|
"grad_norm": 0.13101965188980103, |
|
"learning_rate": 8.860759493670887e-05, |
|
"loss": 0.1358, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.13387135058153335, |
|
"grad_norm": 0.11235956102609634, |
|
"learning_rate": 8.924050632911392e-05, |
|
"loss": 0.1394, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.13482079278423925, |
|
"grad_norm": 0.11327100545167923, |
|
"learning_rate": 8.9873417721519e-05, |
|
"loss": 0.1443, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.13577023498694518, |
|
"grad_norm": 0.10912016034126282, |
|
"learning_rate": 9.050632911392407e-05, |
|
"loss": 0.1698, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.13671967718965108, |
|
"grad_norm": 0.16535617411136627, |
|
"learning_rate": 9.113924050632912e-05, |
|
"loss": 0.2255, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.137669119392357, |
|
"grad_norm": 0.10184327512979507, |
|
"learning_rate": 9.177215189873418e-05, |
|
"loss": 0.1371, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1386185615950629, |
|
"grad_norm": 0.10998040437698364, |
|
"learning_rate": 9.240506329113925e-05, |
|
"loss": 0.1794, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.13956800379776882, |
|
"grad_norm": 0.08974044770002365, |
|
"learning_rate": 9.303797468354431e-05, |
|
"loss": 0.144, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.14051744600047472, |
|
"grad_norm": 0.12724193930625916, |
|
"learning_rate": 9.367088607594936e-05, |
|
"loss": 0.1794, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.14146688820318062, |
|
"grad_norm": 0.1079091802239418, |
|
"learning_rate": 9.430379746835444e-05, |
|
"loss": 0.1399, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.14241633040588655, |
|
"grad_norm": 0.09480807185173035, |
|
"learning_rate": 9.493670886075949e-05, |
|
"loss": 0.1395, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14336577260859246, |
|
"grad_norm": 0.08620745688676834, |
|
"learning_rate": 9.556962025316456e-05, |
|
"loss": 0.1415, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.14431521481129836, |
|
"grad_norm": 0.10517002642154694, |
|
"learning_rate": 9.620253164556962e-05, |
|
"loss": 0.1723, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.14526465701400426, |
|
"grad_norm": 0.0956311896443367, |
|
"learning_rate": 9.683544303797469e-05, |
|
"loss": 0.1515, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1462140992167102, |
|
"grad_norm": 0.08050324022769928, |
|
"learning_rate": 9.746835443037975e-05, |
|
"loss": 0.1322, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1471635414194161, |
|
"grad_norm": 0.0853201299905777, |
|
"learning_rate": 9.810126582278482e-05, |
|
"loss": 0.142, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.148112983622122, |
|
"grad_norm": 0.09991180151700974, |
|
"learning_rate": 9.873417721518988e-05, |
|
"loss": 0.1348, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.14906242582482793, |
|
"grad_norm": 0.08640603721141815, |
|
"learning_rate": 9.936708860759493e-05, |
|
"loss": 0.1397, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.15001186802753383, |
|
"grad_norm": 0.09057717025279999, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1381, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.15096131023023973, |
|
"grad_norm": 0.09916041046380997, |
|
"learning_rate": 0.00010063291139240508, |
|
"loss": 0.1509, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.15191075243294563, |
|
"grad_norm": 0.09434045851230621, |
|
"learning_rate": 0.00010126582278481013, |
|
"loss": 0.1388, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15286019463565156, |
|
"grad_norm": 0.1273377537727356, |
|
"learning_rate": 0.0001018987341772152, |
|
"loss": 0.1401, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.15380963683835747, |
|
"grad_norm": 0.1297912299633026, |
|
"learning_rate": 0.00010253164556962025, |
|
"loss": 0.1852, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.15475907904106337, |
|
"grad_norm": 0.1151595488190651, |
|
"learning_rate": 0.00010316455696202532, |
|
"loss": 0.1848, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1557085212437693, |
|
"grad_norm": 0.13381290435791016, |
|
"learning_rate": 0.00010379746835443039, |
|
"loss": 0.1438, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.1566579634464752, |
|
"grad_norm": 0.07880119979381561, |
|
"learning_rate": 0.00010443037974683545, |
|
"loss": 0.1327, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1576074056491811, |
|
"grad_norm": 0.0843740776181221, |
|
"learning_rate": 0.00010506329113924052, |
|
"loss": 0.1398, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.158556847851887, |
|
"grad_norm": 0.0981813594698906, |
|
"learning_rate": 0.00010569620253164559, |
|
"loss": 0.1409, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.15950629005459294, |
|
"grad_norm": 0.10005304962396622, |
|
"learning_rate": 0.00010632911392405063, |
|
"loss": 0.1783, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.16045573225729884, |
|
"grad_norm": 0.08365727961063385, |
|
"learning_rate": 0.00010696202531645569, |
|
"loss": 0.1275, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.16140517446000474, |
|
"grad_norm": 0.1017635315656662, |
|
"learning_rate": 0.00010759493670886076, |
|
"loss": 0.1792, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16235461666271067, |
|
"grad_norm": 0.07007888704538345, |
|
"learning_rate": 0.00010822784810126583, |
|
"loss": 0.1473, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.16330405886541657, |
|
"grad_norm": 0.07718679308891296, |
|
"learning_rate": 0.00010886075949367089, |
|
"loss": 0.1396, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.16425350106812248, |
|
"grad_norm": 0.07228100299835205, |
|
"learning_rate": 0.00010949367088607596, |
|
"loss": 0.1398, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.16520294327082838, |
|
"grad_norm": 0.07955378293991089, |
|
"learning_rate": 0.00011012658227848103, |
|
"loss": 0.1402, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.1661523854735343, |
|
"grad_norm": 0.0816427692770958, |
|
"learning_rate": 0.00011075949367088607, |
|
"loss": 0.1345, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1671018276762402, |
|
"grad_norm": 0.07641757279634476, |
|
"learning_rate": 0.00011139240506329114, |
|
"loss": 0.1373, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.1680512698789461, |
|
"grad_norm": 0.07354450225830078, |
|
"learning_rate": 0.0001120253164556962, |
|
"loss": 0.1394, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.16900071208165202, |
|
"grad_norm": 0.08322398364543915, |
|
"learning_rate": 0.00011265822784810127, |
|
"loss": 0.138, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.16995015428435795, |
|
"grad_norm": 0.13528607785701752, |
|
"learning_rate": 0.00011329113924050634, |
|
"loss": 0.2188, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.17089959648706385, |
|
"grad_norm": 0.10803692042827606, |
|
"learning_rate": 0.0001139240506329114, |
|
"loss": 0.1782, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.17184903868976975, |
|
"grad_norm": 0.08404573053121567, |
|
"learning_rate": 0.00011455696202531647, |
|
"loss": 0.1394, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.17279848089247568, |
|
"grad_norm": 0.12790893018245697, |
|
"learning_rate": 0.00011518987341772151, |
|
"loss": 0.2157, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.17374792309518158, |
|
"grad_norm": 0.09879907220602036, |
|
"learning_rate": 0.00011582278481012658, |
|
"loss": 0.1693, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.17469736529788749, |
|
"grad_norm": 0.08092228323221207, |
|
"learning_rate": 0.00011645569620253166, |
|
"loss": 0.136, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.1756468075005934, |
|
"grad_norm": 0.07660632580518723, |
|
"learning_rate": 0.00011708860759493671, |
|
"loss": 0.1332, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.17659624970329932, |
|
"grad_norm": 0.07474201172590256, |
|
"learning_rate": 0.00011772151898734178, |
|
"loss": 0.1301, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.17754569190600522, |
|
"grad_norm": 0.09162931889295578, |
|
"learning_rate": 0.00011835443037974685, |
|
"loss": 0.1407, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.17849513410871112, |
|
"grad_norm": 0.08646775782108307, |
|
"learning_rate": 0.0001189873417721519, |
|
"loss": 0.139, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.17944457631141705, |
|
"grad_norm": 0.0759253203868866, |
|
"learning_rate": 0.00011962025316455696, |
|
"loss": 0.1342, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.18039401851412296, |
|
"grad_norm": 0.08292865008115768, |
|
"learning_rate": 0.00012025316455696203, |
|
"loss": 0.1389, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18134346071682886, |
|
"grad_norm": 0.12379574030637741, |
|
"learning_rate": 0.0001208860759493671, |
|
"loss": 0.1795, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.18229290291953476, |
|
"grad_norm": 0.10240278393030167, |
|
"learning_rate": 0.00012151898734177217, |
|
"loss": 0.1721, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1832423451222407, |
|
"grad_norm": 0.09666036069393158, |
|
"learning_rate": 0.00012215189873417722, |
|
"loss": 0.1783, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1841917873249466, |
|
"grad_norm": 0.08314768224954605, |
|
"learning_rate": 0.0001227848101265823, |
|
"loss": 0.1429, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1851412295276525, |
|
"grad_norm": 0.07590368390083313, |
|
"learning_rate": 0.00012341772151898734, |
|
"loss": 0.1393, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.18609067173035843, |
|
"grad_norm": 0.10585250705480576, |
|
"learning_rate": 0.0001240506329113924, |
|
"loss": 0.2155, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.18704011393306433, |
|
"grad_norm": 0.06995555013418198, |
|
"learning_rate": 0.00012468354430379748, |
|
"loss": 0.1374, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.18798955613577023, |
|
"grad_norm": 0.07370735704898834, |
|
"learning_rate": 0.00012531645569620255, |
|
"loss": 0.1367, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.18893899833847613, |
|
"grad_norm": 0.07194443792104721, |
|
"learning_rate": 0.0001259493670886076, |
|
"loss": 0.1437, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.18988844054118206, |
|
"grad_norm": 0.06982647627592087, |
|
"learning_rate": 0.00012658227848101267, |
|
"loss": 0.1358, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19083788274388797, |
|
"grad_norm": 0.06538347154855728, |
|
"learning_rate": 0.0001272151898734177, |
|
"loss": 0.1354, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.19178732494659387, |
|
"grad_norm": 0.07789324969053268, |
|
"learning_rate": 0.00012784810126582278, |
|
"loss": 0.178, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1927367671492998, |
|
"grad_norm": 0.07376820594072342, |
|
"learning_rate": 0.00012848101265822785, |
|
"loss": 0.1621, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1936862093520057, |
|
"grad_norm": 0.0720745250582695, |
|
"learning_rate": 0.00012911392405063292, |
|
"loss": 0.132, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1946356515547116, |
|
"grad_norm": 0.06211116537451744, |
|
"learning_rate": 0.000129746835443038, |
|
"loss": 0.1387, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1955850937574175, |
|
"grad_norm": 0.06701771914958954, |
|
"learning_rate": 0.00013037974683544306, |
|
"loss": 0.14, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.19653453596012344, |
|
"grad_norm": 0.07692532986402512, |
|
"learning_rate": 0.0001310126582278481, |
|
"loss": 0.1322, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.19748397816282934, |
|
"grad_norm": 0.07763269543647766, |
|
"learning_rate": 0.00013164556962025315, |
|
"loss": 0.1393, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.19843342036553524, |
|
"grad_norm": 0.08769022673368454, |
|
"learning_rate": 0.00013227848101265822, |
|
"loss": 0.1489, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.19938286256824117, |
|
"grad_norm": 0.08881859481334686, |
|
"learning_rate": 0.0001329113924050633, |
|
"loss": 0.1765, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20033230477094707, |
|
"grad_norm": 0.06811822950839996, |
|
"learning_rate": 0.00013354430379746836, |
|
"loss": 0.1332, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.20128174697365298, |
|
"grad_norm": 0.06390922516584396, |
|
"learning_rate": 0.00013417721518987343, |
|
"loss": 0.1343, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.20223118917635888, |
|
"grad_norm": 0.06630406528711319, |
|
"learning_rate": 0.0001348101265822785, |
|
"loss": 0.1329, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.2031806313790648, |
|
"grad_norm": 0.0730772465467453, |
|
"learning_rate": 0.00013544303797468355, |
|
"loss": 0.1354, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.2041300735817707, |
|
"grad_norm": 0.06487323343753815, |
|
"learning_rate": 0.00013607594936708862, |
|
"loss": 0.1297, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2050795157844766, |
|
"grad_norm": 0.06967955082654953, |
|
"learning_rate": 0.00013670886075949366, |
|
"loss": 0.1398, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.20602895798718254, |
|
"grad_norm": 0.08531820774078369, |
|
"learning_rate": 0.00013734177215189873, |
|
"loss": 0.1336, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.20697840018988845, |
|
"grad_norm": 0.0757659375667572, |
|
"learning_rate": 0.0001379746835443038, |
|
"loss": 0.1606, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.20792784239259435, |
|
"grad_norm": 0.060206469148397446, |
|
"learning_rate": 0.00013860759493670888, |
|
"loss": 0.1337, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.20887728459530025, |
|
"grad_norm": 0.07996556162834167, |
|
"learning_rate": 0.00013924050632911395, |
|
"loss": 0.1308, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.20982672679800618, |
|
"grad_norm": 0.06206861138343811, |
|
"learning_rate": 0.000139873417721519, |
|
"loss": 0.1347, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.21077616900071208, |
|
"grad_norm": 0.08736416697502136, |
|
"learning_rate": 0.00014050632911392406, |
|
"loss": 0.1768, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.21172561120341798, |
|
"grad_norm": 0.06427916139364243, |
|
"learning_rate": 0.00014113924050632913, |
|
"loss": 0.1374, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.21267505340612392, |
|
"grad_norm": 0.10996536910533905, |
|
"learning_rate": 0.00014177215189873418, |
|
"loss": 0.222, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.21362449560882982, |
|
"grad_norm": 0.08439125120639801, |
|
"learning_rate": 0.00014240506329113925, |
|
"loss": 0.1854, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.21457393781153572, |
|
"grad_norm": 0.06892693787813187, |
|
"learning_rate": 0.00014303797468354432, |
|
"loss": 0.139, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.21552338001424162, |
|
"grad_norm": 0.08241122961044312, |
|
"learning_rate": 0.0001436708860759494, |
|
"loss": 0.173, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.21647282221694755, |
|
"grad_norm": 0.07911046594381332, |
|
"learning_rate": 0.00014430379746835443, |
|
"loss": 0.1418, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.21742226441965345, |
|
"grad_norm": 0.06346064805984497, |
|
"learning_rate": 0.0001449367088607595, |
|
"loss": 0.1406, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.21837170662235936, |
|
"grad_norm": 0.060393668711185455, |
|
"learning_rate": 0.00014556962025316457, |
|
"loss": 0.1417, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2193211488250653, |
|
"grad_norm": 0.05912507325410843, |
|
"learning_rate": 0.00014620253164556962, |
|
"loss": 0.1298, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.2202705910277712, |
|
"grad_norm": 0.07730337232351303, |
|
"learning_rate": 0.0001468354430379747, |
|
"loss": 0.1769, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.2212200332304771, |
|
"grad_norm": 0.07612381875514984, |
|
"learning_rate": 0.00014746835443037976, |
|
"loss": 0.1338, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.222169475433183, |
|
"grad_norm": 0.055311791598796844, |
|
"learning_rate": 0.0001481012658227848, |
|
"loss": 0.1313, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.22311891763588892, |
|
"grad_norm": 0.08492033183574677, |
|
"learning_rate": 0.00014873417721518987, |
|
"loss": 0.1367, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.22406835983859483, |
|
"grad_norm": 0.07133237272500992, |
|
"learning_rate": 0.00014936708860759494, |
|
"loss": 0.1308, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.22501780204130073, |
|
"grad_norm": 0.07148605585098267, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 0.133, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.22596724424400666, |
|
"grad_norm": 0.06900472939014435, |
|
"learning_rate": 0.00015063291139240508, |
|
"loss": 0.138, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.22691668644671256, |
|
"grad_norm": 0.062325432896614075, |
|
"learning_rate": 0.00015126582278481013, |
|
"loss": 0.1338, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.22786612864941846, |
|
"grad_norm": 0.06719667464494705, |
|
"learning_rate": 0.0001518987341772152, |
|
"loss": 0.1316, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.22881557085212437, |
|
"grad_norm": 0.07456009089946747, |
|
"learning_rate": 0.00015253164556962024, |
|
"loss": 0.1412, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.2297650130548303, |
|
"grad_norm": 0.05619575083255768, |
|
"learning_rate": 0.00015316455696202531, |
|
"loss": 0.1342, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.2307144552575362, |
|
"grad_norm": 0.06157098710536957, |
|
"learning_rate": 0.00015379746835443038, |
|
"loss": 0.1329, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.2316638974602421, |
|
"grad_norm": 0.06759827584028244, |
|
"learning_rate": 0.00015443037974683546, |
|
"loss": 0.1411, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.232613339662948, |
|
"grad_norm": 0.06892479956150055, |
|
"learning_rate": 0.00015506329113924053, |
|
"loss": 0.1484, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.23356278186565393, |
|
"grad_norm": 0.08536699414253235, |
|
"learning_rate": 0.0001556962025316456, |
|
"loss": 0.1855, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.23451222406835984, |
|
"grad_norm": 0.06800314784049988, |
|
"learning_rate": 0.00015632911392405064, |
|
"loss": 0.1379, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.23546166627106574, |
|
"grad_norm": 0.0625622496008873, |
|
"learning_rate": 0.00015696202531645568, |
|
"loss": 0.1344, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.23641110847377167, |
|
"grad_norm": 0.06030593812465668, |
|
"learning_rate": 0.00015759493670886075, |
|
"loss": 0.1254, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.23736055067647757, |
|
"grad_norm": 0.06694353371858597, |
|
"learning_rate": 0.00015822784810126583, |
|
"loss": 0.1413, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23830999287918347, |
|
"grad_norm": 0.06594134122133255, |
|
"learning_rate": 0.0001588607594936709, |
|
"loss": 0.1394, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.23925943508188938, |
|
"grad_norm": 0.09062930941581726, |
|
"learning_rate": 0.00015949367088607597, |
|
"loss": 0.1883, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.2402088772845953, |
|
"grad_norm": 0.06029089167714119, |
|
"learning_rate": 0.00016012658227848104, |
|
"loss": 0.1271, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.2411583194873012, |
|
"grad_norm": 0.08471622318029404, |
|
"learning_rate": 0.00016075949367088608, |
|
"loss": 0.172, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.2421077616900071, |
|
"grad_norm": 0.061710160225629807, |
|
"learning_rate": 0.00016139240506329115, |
|
"loss": 0.1348, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.24305720389271304, |
|
"grad_norm": 0.0812671035528183, |
|
"learning_rate": 0.0001620253164556962, |
|
"loss": 0.1312, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.24400664609541894, |
|
"grad_norm": 0.06917005032300949, |
|
"learning_rate": 0.00016265822784810127, |
|
"loss": 0.1464, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.24495608829812485, |
|
"grad_norm": 0.0905887708067894, |
|
"learning_rate": 0.00016329113924050634, |
|
"loss": 0.1759, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.24590553050083075, |
|
"grad_norm": 0.05976787209510803, |
|
"learning_rate": 0.0001639240506329114, |
|
"loss": 0.1404, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.24685497270353668, |
|
"grad_norm": 0.07545675337314606, |
|
"learning_rate": 0.00016455696202531648, |
|
"loss": 0.1322, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.24780441490624258, |
|
"grad_norm": 0.07035024464130402, |
|
"learning_rate": 0.00016518987341772152, |
|
"loss": 0.1378, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.24875385710894848, |
|
"grad_norm": 0.07665737718343735, |
|
"learning_rate": 0.0001658227848101266, |
|
"loss": 0.1827, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.24970329931165441, |
|
"grad_norm": 0.06619013845920563, |
|
"learning_rate": 0.00016645569620253166, |
|
"loss": 0.1284, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2506527415143603, |
|
"grad_norm": 0.0647001713514328, |
|
"learning_rate": 0.0001670886075949367, |
|
"loss": 0.133, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.2516021837170662, |
|
"grad_norm": 0.060702718794345856, |
|
"learning_rate": 0.00016772151898734178, |
|
"loss": 0.1335, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.25255162591977215, |
|
"grad_norm": 0.0508468896150589, |
|
"learning_rate": 0.00016835443037974685, |
|
"loss": 0.1333, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.253501068122478, |
|
"grad_norm": 0.09877864271402359, |
|
"learning_rate": 0.0001689873417721519, |
|
"loss": 0.2031, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.25445051032518395, |
|
"grad_norm": 0.06673337519168854, |
|
"learning_rate": 0.00016962025316455696, |
|
"loss": 0.1356, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.2553999525278899, |
|
"grad_norm": 0.10604165494441986, |
|
"learning_rate": 0.00017025316455696204, |
|
"loss": 0.2517, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.25634939473059576, |
|
"grad_norm": 0.07689858227968216, |
|
"learning_rate": 0.0001708860759493671, |
|
"loss": 0.1761, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2572988369333017, |
|
"grad_norm": 0.05482449755072594, |
|
"learning_rate": 0.00017151898734177218, |
|
"loss": 0.131, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.2582482791360076, |
|
"grad_norm": 0.08622145652770996, |
|
"learning_rate": 0.00017215189873417722, |
|
"loss": 0.1335, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.2591977213387135, |
|
"grad_norm": 0.0748213455080986, |
|
"learning_rate": 0.0001727848101265823, |
|
"loss": 0.176, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.2601471635414194, |
|
"grad_norm": 0.06163305416703224, |
|
"learning_rate": 0.00017341772151898733, |
|
"loss": 0.1381, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.26109660574412535, |
|
"grad_norm": 0.06141841039061546, |
|
"learning_rate": 0.0001740506329113924, |
|
"loss": 0.1353, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.26204604794683123, |
|
"grad_norm": 0.07326913625001907, |
|
"learning_rate": 0.00017468354430379748, |
|
"loss": 0.1441, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.26299549014953716, |
|
"grad_norm": 0.05951124057173729, |
|
"learning_rate": 0.00017531645569620255, |
|
"loss": 0.1321, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.26394493235224303, |
|
"grad_norm": 0.08364073932170868, |
|
"learning_rate": 0.00017594936708860762, |
|
"loss": 0.187, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.26489437455494896, |
|
"grad_norm": 0.05849132314324379, |
|
"learning_rate": 0.00017658227848101266, |
|
"loss": 0.1393, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.2658438167576549, |
|
"grad_norm": 0.05452360957860947, |
|
"learning_rate": 0.00017721518987341773, |
|
"loss": 0.1342, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.26679325896036077, |
|
"grad_norm": 0.04878188297152519, |
|
"learning_rate": 0.00017784810126582278, |
|
"loss": 0.1445, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.2677427011630667, |
|
"grad_norm": 0.06066753342747688, |
|
"learning_rate": 0.00017848101265822785, |
|
"loss": 0.1423, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.26869214336577263, |
|
"grad_norm": 0.04918207973241806, |
|
"learning_rate": 0.00017911392405063292, |
|
"loss": 0.1316, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2696415855684785, |
|
"grad_norm": 0.05103525519371033, |
|
"learning_rate": 0.000179746835443038, |
|
"loss": 0.1313, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.27059102777118443, |
|
"grad_norm": 0.05667628347873688, |
|
"learning_rate": 0.00018037974683544306, |
|
"loss": 0.1434, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.27154046997389036, |
|
"grad_norm": 0.06226016581058502, |
|
"learning_rate": 0.00018101265822784813, |
|
"loss": 0.1357, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.27248991217659624, |
|
"grad_norm": 0.04695293679833412, |
|
"learning_rate": 0.00018164556962025317, |
|
"loss": 0.1314, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.27343935437930217, |
|
"grad_norm": 0.05762844532728195, |
|
"learning_rate": 0.00018227848101265824, |
|
"loss": 0.1349, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.27438879658200804, |
|
"grad_norm": 0.05454534292221069, |
|
"learning_rate": 0.0001829113924050633, |
|
"loss": 0.1432, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.275338238784714, |
|
"grad_norm": 0.050270579755306244, |
|
"learning_rate": 0.00018354430379746836, |
|
"loss": 0.1272, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2762876809874199, |
|
"grad_norm": 0.0688452497124672, |
|
"learning_rate": 0.00018417721518987343, |
|
"loss": 0.1708, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2772371231901258, |
|
"grad_norm": 0.06213200092315674, |
|
"learning_rate": 0.0001848101265822785, |
|
"loss": 0.1674, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.2781865653928317, |
|
"grad_norm": 0.059717319905757904, |
|
"learning_rate": 0.00018544303797468354, |
|
"loss": 0.169, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.27913600759553764, |
|
"grad_norm": 0.06223325803875923, |
|
"learning_rate": 0.00018607594936708861, |
|
"loss": 0.1369, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2800854497982435, |
|
"grad_norm": 0.053163208067417145, |
|
"learning_rate": 0.00018670886075949369, |
|
"loss": 0.133, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.28103489200094944, |
|
"grad_norm": 0.06647945195436478, |
|
"learning_rate": 0.00018734177215189873, |
|
"loss": 0.1438, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.2819843342036554, |
|
"grad_norm": 0.0588272288441658, |
|
"learning_rate": 0.0001879746835443038, |
|
"loss": 0.1338, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.28293377640636125, |
|
"grad_norm": 0.05841274932026863, |
|
"learning_rate": 0.00018860759493670887, |
|
"loss": 0.1329, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2838832186090672, |
|
"grad_norm": 0.09033369272947311, |
|
"learning_rate": 0.00018924050632911394, |
|
"loss": 0.1747, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.2848326608117731, |
|
"grad_norm": 0.052215326577425, |
|
"learning_rate": 0.00018987341772151899, |
|
"loss": 0.1296, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.285782103014479, |
|
"grad_norm": 0.05880101025104523, |
|
"learning_rate": 0.00019050632911392406, |
|
"loss": 0.1287, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.2867315452171849, |
|
"grad_norm": 0.0691700354218483, |
|
"learning_rate": 0.00019113924050632913, |
|
"loss": 0.1676, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.2876809874198908, |
|
"grad_norm": 0.057025909423828125, |
|
"learning_rate": 0.0001917721518987342, |
|
"loss": 0.1346, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.2886304296225967, |
|
"grad_norm": 0.04936329275369644, |
|
"learning_rate": 0.00019240506329113924, |
|
"loss": 0.1354, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.28957987182530265, |
|
"grad_norm": 0.0680055245757103, |
|
"learning_rate": 0.0001930379746835443, |
|
"loss": 0.1344, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2905293140280085, |
|
"grad_norm": 0.07374466210603714, |
|
"learning_rate": 0.00019367088607594938, |
|
"loss": 0.1428, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.29147875623071445, |
|
"grad_norm": 0.061204761266708374, |
|
"learning_rate": 0.00019430379746835443, |
|
"loss": 0.1246, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2924281984334204, |
|
"grad_norm": 0.053467705845832825, |
|
"learning_rate": 0.0001949367088607595, |
|
"loss": 0.1342, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.29337764063612626, |
|
"grad_norm": 0.057525087147951126, |
|
"learning_rate": 0.00019556962025316457, |
|
"loss": 0.1377, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2943270828388322, |
|
"grad_norm": 0.07857844978570938, |
|
"learning_rate": 0.00019620253164556964, |
|
"loss": 0.2076, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2952765250415381, |
|
"grad_norm": 0.05250545218586922, |
|
"learning_rate": 0.0001968354430379747, |
|
"loss": 0.1432, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.296225967244244, |
|
"grad_norm": 0.07495012134313583, |
|
"learning_rate": 0.00019746835443037975, |
|
"loss": 0.1766, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.2971754094469499, |
|
"grad_norm": 0.04692578688263893, |
|
"learning_rate": 0.0001981012658227848, |
|
"loss": 0.1408, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.29812485164965585, |
|
"grad_norm": 0.055666085332632065, |
|
"learning_rate": 0.00019873417721518987, |
|
"loss": 0.1391, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.29907429385236173, |
|
"grad_norm": 0.050465911626815796, |
|
"learning_rate": 0.00019936708860759494, |
|
"loss": 0.1415, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.30002373605506766, |
|
"grad_norm": 0.051260240375995636, |
|
"learning_rate": 0.0002, |
|
"loss": 0.1423, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.30097317825777353, |
|
"grad_norm": 0.0503215529024601, |
|
"learning_rate": 0.000199999938945738, |
|
"loss": 0.1348, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.30192262046047946, |
|
"grad_norm": 0.04917483776807785, |
|
"learning_rate": 0.0001999997557830265, |
|
"loss": 0.1342, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.3028720626631854, |
|
"grad_norm": 0.06354209035634995, |
|
"learning_rate": 0.00019999945051208916, |
|
"loss": 0.1365, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.30382150486589127, |
|
"grad_norm": 0.04878314957022667, |
|
"learning_rate": 0.0001999990231332988, |
|
"loss": 0.13, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.3047709470685972, |
|
"grad_norm": 0.07046223431825638, |
|
"learning_rate": 0.0001999984736471772, |
|
"loss": 0.1394, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.30572038927130313, |
|
"grad_norm": 0.04456232488155365, |
|
"learning_rate": 0.00019999780205439538, |
|
"loss": 0.1278, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.306669831474009, |
|
"grad_norm": 0.06280628591775894, |
|
"learning_rate": 0.00019999700835577342, |
|
"loss": 0.1715, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.30761927367671493, |
|
"grad_norm": 0.07462131977081299, |
|
"learning_rate": 0.00019999609255228046, |
|
"loss": 0.1772, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.30856871587942086, |
|
"grad_norm": 0.059642352163791656, |
|
"learning_rate": 0.00019999505464503482, |
|
"loss": 0.1294, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.30951815808212674, |
|
"grad_norm": 0.06458820402622223, |
|
"learning_rate": 0.00019999389463530383, |
|
"loss": 0.173, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.31046760028483267, |
|
"grad_norm": 0.05901939421892166, |
|
"learning_rate": 0.00019999261252450396, |
|
"loss": 0.1419, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.3114170424875386, |
|
"grad_norm": 0.055540215224027634, |
|
"learning_rate": 0.00019999120831420083, |
|
"loss": 0.1314, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.3123664846902445, |
|
"grad_norm": 0.0546739287674427, |
|
"learning_rate": 0.00019998968200610903, |
|
"loss": 0.1354, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.3133159268929504, |
|
"grad_norm": 0.0689477026462555, |
|
"learning_rate": 0.00019998803360209234, |
|
"loss": 0.132, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3142653690956563, |
|
"grad_norm": 0.05279696360230446, |
|
"learning_rate": 0.00019998626310416365, |
|
"loss": 0.1424, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.3152148112983622, |
|
"grad_norm": 0.055384278297424316, |
|
"learning_rate": 0.00019998437051448482, |
|
"loss": 0.141, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.31616425350106814, |
|
"grad_norm": 0.04636182263493538, |
|
"learning_rate": 0.0001999823558353669, |
|
"loss": 0.1414, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.317113695703774, |
|
"grad_norm": 0.04795726016163826, |
|
"learning_rate": 0.00019998021906926993, |
|
"loss": 0.1255, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.31806313790647994, |
|
"grad_norm": 0.05326540395617485, |
|
"learning_rate": 0.00019997796021880318, |
|
"loss": 0.1309, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3190125801091859, |
|
"grad_norm": 0.0684736892580986, |
|
"learning_rate": 0.00019997557928672484, |
|
"loss": 0.1825, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.31996202231189175, |
|
"grad_norm": 0.042282164096832275, |
|
"learning_rate": 0.0001999730762759422, |
|
"loss": 0.12, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.3209114645145977, |
|
"grad_norm": 0.05297423154115677, |
|
"learning_rate": 0.00019997045118951175, |
|
"loss": 0.1309, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.3218609067173036, |
|
"grad_norm": 0.080621138215065, |
|
"learning_rate": 0.00019996770403063883, |
|
"loss": 0.2134, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.3228103489200095, |
|
"grad_norm": 0.05552308261394501, |
|
"learning_rate": 0.00019996483480267803, |
|
"loss": 0.1361, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3237597911227154, |
|
"grad_norm": 0.05070111155509949, |
|
"learning_rate": 0.00019996184350913287, |
|
"loss": 0.1314, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.32470923332542134, |
|
"grad_norm": 0.04412266984581947, |
|
"learning_rate": 0.00019995873015365601, |
|
"loss": 0.1299, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.3256586755281272, |
|
"grad_norm": 0.0445338599383831, |
|
"learning_rate": 0.00019995549474004917, |
|
"loss": 0.1313, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.32660811773083315, |
|
"grad_norm": 0.08224980533123016, |
|
"learning_rate": 0.000199952137272263, |
|
"loss": 0.1844, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.327557559933539, |
|
"grad_norm": 0.04331446811556816, |
|
"learning_rate": 0.0001999486577543972, |
|
"loss": 0.133, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.32850700213624495, |
|
"grad_norm": 0.049314577132463455, |
|
"learning_rate": 0.00019994505619070068, |
|
"loss": 0.1351, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.3294564443389509, |
|
"grad_norm": 0.0697011798620224, |
|
"learning_rate": 0.00019994133258557117, |
|
"loss": 0.1709, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.33040588654165676, |
|
"grad_norm": 0.0510990135371685, |
|
"learning_rate": 0.00019993748694355557, |
|
"loss": 0.1365, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.3313553287443627, |
|
"grad_norm": 0.05100785568356514, |
|
"learning_rate": 0.00019993351926934967, |
|
"loss": 0.1302, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.3323047709470686, |
|
"grad_norm": 0.08001980185508728, |
|
"learning_rate": 0.00019992942956779838, |
|
"loss": 0.1736, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3332542131497745, |
|
"grad_norm": 0.05298507958650589, |
|
"learning_rate": 0.00019992521784389559, |
|
"loss": 0.159, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.3342036553524804, |
|
"grad_norm": 0.04655485600233078, |
|
"learning_rate": 0.00019992088410278414, |
|
"loss": 0.1401, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.33515309755518635, |
|
"grad_norm": 0.047509439289569855, |
|
"learning_rate": 0.00019991642834975594, |
|
"loss": 0.1369, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.3361025397578922, |
|
"grad_norm": 0.046006906777620316, |
|
"learning_rate": 0.0001999118505902518, |
|
"loss": 0.1384, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.33705198196059816, |
|
"grad_norm": 0.07522892951965332, |
|
"learning_rate": 0.00019990715082986155, |
|
"loss": 0.2254, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.33800142416330403, |
|
"grad_norm": 0.048646144568920135, |
|
"learning_rate": 0.00019990232907432404, |
|
"loss": 0.1355, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.33895086636600996, |
|
"grad_norm": 0.03941798582673073, |
|
"learning_rate": 0.000199897385329527, |
|
"loss": 0.1242, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.3399003085687159, |
|
"grad_norm": 0.04582727700471878, |
|
"learning_rate": 0.0001998923196015072, |
|
"loss": 0.1347, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.34084975077142177, |
|
"grad_norm": 0.05890033766627312, |
|
"learning_rate": 0.00019988713189645027, |
|
"loss": 0.1356, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.3417991929741277, |
|
"grad_norm": 0.050398606806993484, |
|
"learning_rate": 0.00019988182222069093, |
|
"loss": 0.1379, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3427486351768336, |
|
"grad_norm": 0.053657352924346924, |
|
"learning_rate": 0.00019987639058071267, |
|
"loss": 0.1417, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.3436980773795395, |
|
"grad_norm": 0.04928993433713913, |
|
"learning_rate": 0.00019987083698314804, |
|
"loss": 0.1269, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.34464751958224543, |
|
"grad_norm": 0.04932550713419914, |
|
"learning_rate": 0.0001998651614347784, |
|
"loss": 0.1429, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.34559696178495136, |
|
"grad_norm": 0.0531768873333931, |
|
"learning_rate": 0.00019985936394253413, |
|
"loss": 0.1367, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.34654640398765724, |
|
"grad_norm": 0.05342009291052818, |
|
"learning_rate": 0.00019985344451349443, |
|
"loss": 0.1365, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.34749584619036317, |
|
"grad_norm": 0.04960772022604942, |
|
"learning_rate": 0.00019984740315488742, |
|
"loss": 0.133, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.3484452883930691, |
|
"grad_norm": 0.04490765556693077, |
|
"learning_rate": 0.00019984123987409013, |
|
"loss": 0.1347, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.34939473059577497, |
|
"grad_norm": 0.05546121671795845, |
|
"learning_rate": 0.0001998349546786285, |
|
"loss": 0.169, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.3503441727984809, |
|
"grad_norm": 0.04962169751524925, |
|
"learning_rate": 0.0001998285475761772, |
|
"loss": 0.1325, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.3512936150011868, |
|
"grad_norm": 0.0451858825981617, |
|
"learning_rate": 0.00019982201857455988, |
|
"loss": 0.1291, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3522430572038927, |
|
"grad_norm": 0.07738906145095825, |
|
"learning_rate": 0.00019981536768174903, |
|
"loss": 0.1841, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.35319249940659864, |
|
"grad_norm": 0.05104148015379906, |
|
"learning_rate": 0.000199808594905866, |
|
"loss": 0.1375, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.3541419416093045, |
|
"grad_norm": 0.04850155860185623, |
|
"learning_rate": 0.00019980170025518082, |
|
"loss": 0.1335, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.35509138381201044, |
|
"grad_norm": 0.050271324813365936, |
|
"learning_rate": 0.00019979468373811248, |
|
"loss": 0.1394, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.35604082601471637, |
|
"grad_norm": 0.050799645483493805, |
|
"learning_rate": 0.0001997875453632288, |
|
"loss": 0.135, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.35699026821742225, |
|
"grad_norm": 0.05703526735305786, |
|
"learning_rate": 0.00019978028513924627, |
|
"loss": 0.1371, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.3579397104201282, |
|
"grad_norm": 0.06665853410959244, |
|
"learning_rate": 0.00019977290307503028, |
|
"loss": 0.1837, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3588891526228341, |
|
"grad_norm": 0.04639972746372223, |
|
"learning_rate": 0.000199765399179595, |
|
"loss": 0.1315, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.35983859482554, |
|
"grad_norm": 0.07625308632850647, |
|
"learning_rate": 0.00019975777346210326, |
|
"loss": 0.2064, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.3607880370282459, |
|
"grad_norm": 0.048770248889923096, |
|
"learning_rate": 0.00019975002593186674, |
|
"loss": 0.1363, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.36173747923095184, |
|
"grad_norm": 0.04932136833667755, |
|
"learning_rate": 0.00019974215659834582, |
|
"loss": 0.1374, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.3626869214336577, |
|
"grad_norm": 0.03848756104707718, |
|
"learning_rate": 0.00019973416547114964, |
|
"loss": 0.1333, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 0.04468891769647598, |
|
"learning_rate": 0.00019972605256003605, |
|
"loss": 0.129, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.3645858058390695, |
|
"grad_norm": 0.048413511365652084, |
|
"learning_rate": 0.0001997178178749116, |
|
"loss": 0.1314, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.36553524804177545, |
|
"grad_norm": 0.045054856687784195, |
|
"learning_rate": 0.00019970946142583155, |
|
"loss": 0.1323, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3664846902444814, |
|
"grad_norm": 0.05541200935840607, |
|
"learning_rate": 0.00019970098322299982, |
|
"loss": 0.1342, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.36743413244718726, |
|
"grad_norm": 0.06861472874879837, |
|
"learning_rate": 0.00019969238327676906, |
|
"loss": 0.1347, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3683835746498932, |
|
"grad_norm": 0.043996453285217285, |
|
"learning_rate": 0.00019968366159764047, |
|
"loss": 0.132, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.3693330168525991, |
|
"grad_norm": 0.06562239676713943, |
|
"learning_rate": 0.000199674818196264, |
|
"loss": 0.1759, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.370282459055305, |
|
"grad_norm": 0.04714899882674217, |
|
"learning_rate": 0.00019966585308343822, |
|
"loss": 0.1274, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3712319012580109, |
|
"grad_norm": 0.04736959934234619, |
|
"learning_rate": 0.00019965676627011026, |
|
"loss": 0.1265, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.37218134346071685, |
|
"grad_norm": 0.056829433888196945, |
|
"learning_rate": 0.0001996475577673759, |
|
"loss": 0.1402, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.3731307856634227, |
|
"grad_norm": 0.0426231250166893, |
|
"learning_rate": 0.00019963822758647953, |
|
"loss": 0.1364, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.37408022786612866, |
|
"grad_norm": 0.07376877963542938, |
|
"learning_rate": 0.00019962877573881404, |
|
"loss": 0.2042, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.3750296700688346, |
|
"grad_norm": 0.043273668736219406, |
|
"learning_rate": 0.00019961920223592104, |
|
"loss": 0.132, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.37597911227154046, |
|
"grad_norm": 0.044406965374946594, |
|
"learning_rate": 0.00019960950708949052, |
|
"loss": 0.1344, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.3769285544742464, |
|
"grad_norm": 0.040342606604099274, |
|
"learning_rate": 0.00019959969031136106, |
|
"loss": 0.1214, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.37787799667695227, |
|
"grad_norm": 0.05118388682603836, |
|
"learning_rate": 0.00019958975191351983, |
|
"loss": 0.14, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3788274388796582, |
|
"grad_norm": 0.045876793563365936, |
|
"learning_rate": 0.00019957969190810245, |
|
"loss": 0.1335, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3797768810823641, |
|
"grad_norm": 0.0645332932472229, |
|
"learning_rate": 0.00019956951030739308, |
|
"loss": 0.1702, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38072632328507, |
|
"grad_norm": 0.05039132386445999, |
|
"learning_rate": 0.00019955920712382423, |
|
"loss": 0.136, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.38167576548777593, |
|
"grad_norm": 0.052004653960466385, |
|
"learning_rate": 0.00019954878236997704, |
|
"loss": 0.1386, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.38262520769048186, |
|
"grad_norm": 0.05021458491683006, |
|
"learning_rate": 0.00019953823605858105, |
|
"loss": 0.1378, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.38357464989318774, |
|
"grad_norm": 0.058653559535741806, |
|
"learning_rate": 0.0001995275682025141, |
|
"loss": 0.1437, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.38452409209589367, |
|
"grad_norm": 0.04466673359274864, |
|
"learning_rate": 0.00019951677881480264, |
|
"loss": 0.1334, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3854735342985996, |
|
"grad_norm": 0.06119415909051895, |
|
"learning_rate": 0.00019950586790862138, |
|
"loss": 0.1296, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.38642297650130547, |
|
"grad_norm": 0.04749077931046486, |
|
"learning_rate": 0.0001994948354972935, |
|
"loss": 0.1341, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3873724187040114, |
|
"grad_norm": 0.037752799689769745, |
|
"learning_rate": 0.00019948368159429053, |
|
"loss": 0.134, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.38832186090671733, |
|
"grad_norm": 0.08903038501739502, |
|
"learning_rate": 0.00019947240621323226, |
|
"loss": 0.2155, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3892713031094232, |
|
"grad_norm": 0.03878140076994896, |
|
"learning_rate": 0.00019946100936788698, |
|
"loss": 0.1176, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.39022074531212914, |
|
"grad_norm": 0.04927309602499008, |
|
"learning_rate": 0.00019944949107217113, |
|
"loss": 0.1344, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.391170187514835, |
|
"grad_norm": 0.04933890327811241, |
|
"learning_rate": 0.00019943785134014962, |
|
"loss": 0.1315, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.39211962971754094, |
|
"grad_norm": 0.06702516227960587, |
|
"learning_rate": 0.0001994260901860355, |
|
"loss": 0.1826, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.39306907192024687, |
|
"grad_norm": 0.048132237046957016, |
|
"learning_rate": 0.00019941420762419014, |
|
"loss": 0.1436, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.39401851412295275, |
|
"grad_norm": 0.07756894826889038, |
|
"learning_rate": 0.00019940220366912318, |
|
"loss": 0.2162, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3949679563256587, |
|
"grad_norm": 0.04789011925458908, |
|
"learning_rate": 0.00019939007833549242, |
|
"loss": 0.1295, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.3959173985283646, |
|
"grad_norm": 0.04369444027543068, |
|
"learning_rate": 0.000199377831638104, |
|
"loss": 0.1322, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3968668407310705, |
|
"grad_norm": 0.05376122146844864, |
|
"learning_rate": 0.00019936546359191216, |
|
"loss": 0.1743, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.3978162829337764, |
|
"grad_norm": 0.045930229127407074, |
|
"learning_rate": 0.0001993529742120193, |
|
"loss": 0.1336, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.39876572513648234, |
|
"grad_norm": 0.039980966597795486, |
|
"learning_rate": 0.00019934036351367606, |
|
"loss": 0.1349, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3997151673391882, |
|
"grad_norm": 0.03797341510653496, |
|
"learning_rate": 0.00019932763151228115, |
|
"loss": 0.1256, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.40066460954189415, |
|
"grad_norm": 0.04779914394021034, |
|
"learning_rate": 0.00019931477822338146, |
|
"loss": 0.1411, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.4016140517446, |
|
"grad_norm": 0.040458668023347855, |
|
"learning_rate": 0.00019930180366267193, |
|
"loss": 0.126, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.40256349394730595, |
|
"grad_norm": 0.04114462807774544, |
|
"learning_rate": 0.0001992887078459956, |
|
"loss": 0.127, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.4035129361500119, |
|
"grad_norm": 0.048119012266397476, |
|
"learning_rate": 0.00019927549078934358, |
|
"loss": 0.1346, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.40446237835271776, |
|
"grad_norm": 0.0545562319457531, |
|
"learning_rate": 0.00019926215250885504, |
|
"loss": 0.1387, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.4054118205554237, |
|
"grad_norm": 0.052092909812927246, |
|
"learning_rate": 0.00019924869302081715, |
|
"loss": 0.1389, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.4063612627581296, |
|
"grad_norm": 0.03847799077630043, |
|
"learning_rate": 0.0001992351123416651, |
|
"loss": 0.1234, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.4073107049608355, |
|
"grad_norm": 0.0436912477016449, |
|
"learning_rate": 0.000199221410487982, |
|
"loss": 0.1362, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.4082601471635414, |
|
"grad_norm": 0.04420888423919678, |
|
"learning_rate": 0.00019920758747649908, |
|
"loss": 0.1243, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.40920958936624735, |
|
"grad_norm": 0.037297070026397705, |
|
"learning_rate": 0.00019919364332409535, |
|
"loss": 0.1331, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.4101590315689532, |
|
"grad_norm": 0.03854360058903694, |
|
"learning_rate": 0.00019917957804779782, |
|
"loss": 0.1266, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.41110847377165916, |
|
"grad_norm": 0.04071418195962906, |
|
"learning_rate": 0.00019916539166478137, |
|
"loss": 0.1292, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.4120579159743651, |
|
"grad_norm": 0.04560808837413788, |
|
"learning_rate": 0.00019915108419236882, |
|
"loss": 0.1381, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.41300735817707096, |
|
"grad_norm": 0.06313233822584152, |
|
"learning_rate": 0.00019913665564803078, |
|
"loss": 0.2031, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4139568003797769, |
|
"grad_norm": 0.04507524147629738, |
|
"learning_rate": 0.00019912210604938578, |
|
"loss": 0.1277, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.41490624258248276, |
|
"grad_norm": 0.05048058554530144, |
|
"learning_rate": 0.00019910743541420007, |
|
"loss": 0.1315, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.4158556847851887, |
|
"grad_norm": 0.04872648045420647, |
|
"learning_rate": 0.0001990926437603878, |
|
"loss": 0.1292, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.4168051269878946, |
|
"grad_norm": 0.04400710016489029, |
|
"learning_rate": 0.00019907773110601075, |
|
"loss": 0.1236, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.4177545691906005, |
|
"grad_norm": 0.051591627299785614, |
|
"learning_rate": 0.00019906269746927863, |
|
"loss": 0.1358, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.41870401139330643, |
|
"grad_norm": 0.04288725182414055, |
|
"learning_rate": 0.00019904754286854877, |
|
"loss": 0.126, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.41965345359601236, |
|
"grad_norm": 0.04984726384282112, |
|
"learning_rate": 0.00019903226732232622, |
|
"loss": 0.1326, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.42060289579871823, |
|
"grad_norm": 0.041585132479667664, |
|
"learning_rate": 0.00019901687084926373, |
|
"loss": 0.136, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.42155233800142417, |
|
"grad_norm": 0.05849035084247589, |
|
"learning_rate": 0.0001990013534681617, |
|
"loss": 0.1727, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.4225017802041301, |
|
"grad_norm": 0.043387994170188904, |
|
"learning_rate": 0.00019898571519796817, |
|
"loss": 0.1393, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.42345122240683597, |
|
"grad_norm": 0.05867496132850647, |
|
"learning_rate": 0.0001989699560577788, |
|
"loss": 0.1664, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.4244006646095419, |
|
"grad_norm": 0.07019232958555222, |
|
"learning_rate": 0.00019895407606683685, |
|
"loss": 0.1653, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.42535010681224783, |
|
"grad_norm": 0.04676515609025955, |
|
"learning_rate": 0.00019893807524453314, |
|
"loss": 0.1368, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.4262995490149537, |
|
"grad_norm": 0.06640240550041199, |
|
"learning_rate": 0.00019892195361040607, |
|
"loss": 0.2089, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.42724899121765963, |
|
"grad_norm": 0.044658735394477844, |
|
"learning_rate": 0.00019890571118414148, |
|
"loss": 0.1298, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4281984334203655, |
|
"grad_norm": 0.04810122773051262, |
|
"learning_rate": 0.00019888934798557278, |
|
"loss": 0.1288, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.42914787562307144, |
|
"grad_norm": 0.0425436794757843, |
|
"learning_rate": 0.0001988728640346808, |
|
"loss": 0.1354, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.43009731782577737, |
|
"grad_norm": 0.04513363912701607, |
|
"learning_rate": 0.0001988562593515939, |
|
"loss": 0.1346, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.43104676002848324, |
|
"grad_norm": 0.052022870630025864, |
|
"learning_rate": 0.0001988395339565878, |
|
"loss": 0.1302, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.4319962022311892, |
|
"grad_norm": 0.04852641373872757, |
|
"learning_rate": 0.0001988226878700856, |
|
"loss": 0.1388, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4329456444338951, |
|
"grad_norm": 0.04990584775805473, |
|
"learning_rate": 0.00019880572111265785, |
|
"loss": 0.1552, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.433895086636601, |
|
"grad_norm": 0.052271679043769836, |
|
"learning_rate": 0.00019878863370502238, |
|
"loss": 0.1404, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.4348445288393069, |
|
"grad_norm": 0.04795520752668381, |
|
"learning_rate": 0.00019877142566804436, |
|
"loss": 0.1341, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.43579397104201284, |
|
"grad_norm": 0.048165664076805115, |
|
"learning_rate": 0.00019875409702273632, |
|
"loss": 0.1343, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.4367434132447187, |
|
"grad_norm": 0.04213611036539078, |
|
"learning_rate": 0.000198736647790258, |
|
"loss": 0.1369, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.43769285544742464, |
|
"grad_norm": 0.05819966271519661, |
|
"learning_rate": 0.00019871907799191632, |
|
"loss": 0.1615, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.4386422976501306, |
|
"grad_norm": 0.057378821074962616, |
|
"learning_rate": 0.00019870138764916558, |
|
"loss": 0.175, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.43959173985283645, |
|
"grad_norm": 0.0432853177189827, |
|
"learning_rate": 0.00019868357678360724, |
|
"loss": 0.1371, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.4405411820555424, |
|
"grad_norm": 0.03890872746706009, |
|
"learning_rate": 0.0001986656454169898, |
|
"loss": 0.1332, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.44149062425824825, |
|
"grad_norm": 0.04006613418459892, |
|
"learning_rate": 0.00019864759357120896, |
|
"loss": 0.1342, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4424400664609542, |
|
"grad_norm": 0.049053166061639786, |
|
"learning_rate": 0.00019862942126830767, |
|
"loss": 0.1756, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.4433895086636601, |
|
"grad_norm": 0.03966079652309418, |
|
"learning_rate": 0.00019861112853047577, |
|
"loss": 0.1303, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.444338950866366, |
|
"grad_norm": 0.04506433755159378, |
|
"learning_rate": 0.0001985927153800503, |
|
"loss": 0.136, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.4452883930690719, |
|
"grad_norm": 0.04392915591597557, |
|
"learning_rate": 0.00019857418183951526, |
|
"loss": 0.1397, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.44623783527177785, |
|
"grad_norm": 0.038007620722055435, |
|
"learning_rate": 0.0001985555279315017, |
|
"loss": 0.1246, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4471872774744837, |
|
"grad_norm": 0.048948097974061966, |
|
"learning_rate": 0.00019853675367878764, |
|
"loss": 0.1329, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.44813671967718965, |
|
"grad_norm": 0.04174380376935005, |
|
"learning_rate": 0.00019851785910429806, |
|
"loss": 0.13, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.4490861618798956, |
|
"grad_norm": 0.048575468361377716, |
|
"learning_rate": 0.00019849884423110478, |
|
"loss": 0.1385, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.45003560408260146, |
|
"grad_norm": 0.05167670175433159, |
|
"learning_rate": 0.00019847970908242664, |
|
"loss": 0.1684, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.4509850462853074, |
|
"grad_norm": 0.06849198788404465, |
|
"learning_rate": 0.00019846045368162923, |
|
"loss": 0.1795, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.4519344884880133, |
|
"grad_norm": 0.044273603707551956, |
|
"learning_rate": 0.0001984410780522251, |
|
"loss": 0.1246, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.4528839306907192, |
|
"grad_norm": 0.048194363713264465, |
|
"learning_rate": 0.00019842158221787353, |
|
"loss": 0.1366, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.4538333728934251, |
|
"grad_norm": 0.033906418830156326, |
|
"learning_rate": 0.00019840196620238057, |
|
"loss": 0.1235, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.454782815096131, |
|
"grad_norm": 0.043933141976594925, |
|
"learning_rate": 0.00019838223002969905, |
|
"loss": 0.1195, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.45573225729883693, |
|
"grad_norm": 0.056823644787073135, |
|
"learning_rate": 0.00019836237372392854, |
|
"loss": 0.1757, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.45668169950154286, |
|
"grad_norm": 0.07587820291519165, |
|
"learning_rate": 0.00019834239730931526, |
|
"loss": 0.1784, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.45763114170424873, |
|
"grad_norm": 0.04008018970489502, |
|
"learning_rate": 0.0001983223008102521, |
|
"loss": 0.1306, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.45858058390695466, |
|
"grad_norm": 0.05180038511753082, |
|
"learning_rate": 0.00019830208425127867, |
|
"loss": 0.1485, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.4595300261096606, |
|
"grad_norm": 0.0691617876291275, |
|
"learning_rate": 0.00019828174765708104, |
|
"loss": 0.1249, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.46047946831236647, |
|
"grad_norm": 0.0565367266535759, |
|
"learning_rate": 0.00019826129105249195, |
|
"loss": 0.1744, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4614289105150724, |
|
"grad_norm": 0.044927019625902176, |
|
"learning_rate": 0.00019824071446249072, |
|
"loss": 0.1341, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.46237835271777833, |
|
"grad_norm": 0.04481721669435501, |
|
"learning_rate": 0.00019822001791220298, |
|
"loss": 0.1354, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.4633277949204842, |
|
"grad_norm": 0.05233500525355339, |
|
"learning_rate": 0.0001981992014269011, |
|
"loss": 0.1501, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.46427723712319013, |
|
"grad_norm": 0.044350553303956985, |
|
"learning_rate": 0.00019817826503200372, |
|
"loss": 0.1335, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.465226679325896, |
|
"grad_norm": 0.03551819548010826, |
|
"learning_rate": 0.000198157208753076, |
|
"loss": 0.1322, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.46617612152860194, |
|
"grad_norm": 0.04409592226147652, |
|
"learning_rate": 0.00019813603261582943, |
|
"loss": 0.1561, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.46712556373130787, |
|
"grad_norm": 0.04842127487063408, |
|
"learning_rate": 0.0001981147366461219, |
|
"loss": 0.1296, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.46807500593401374, |
|
"grad_norm": 0.04349881038069725, |
|
"learning_rate": 0.00019809332086995757, |
|
"loss": 0.1319, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.4690244481367197, |
|
"grad_norm": 0.04413028433918953, |
|
"learning_rate": 0.00019807178531348698, |
|
"loss": 0.1321, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.4699738903394256, |
|
"grad_norm": 0.03972313553094864, |
|
"learning_rate": 0.00019805013000300683, |
|
"loss": 0.1358, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4709233325421315, |
|
"grad_norm": 0.052269116044044495, |
|
"learning_rate": 0.00019802835496496012, |
|
"loss": 0.1389, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.4718727747448374, |
|
"grad_norm": 0.0379653237760067, |
|
"learning_rate": 0.00019800646022593603, |
|
"loss": 0.1283, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.47282221694754334, |
|
"grad_norm": 0.04370688647031784, |
|
"learning_rate": 0.0001979844458126699, |
|
"loss": 0.1278, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.4737716591502492, |
|
"grad_norm": 0.03912369906902313, |
|
"learning_rate": 0.0001979623117520432, |
|
"loss": 0.1257, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.47472110135295514, |
|
"grad_norm": 0.039594005793333054, |
|
"learning_rate": 0.00019794005807108352, |
|
"loss": 0.1375, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4756705435556611, |
|
"grad_norm": 0.03889892250299454, |
|
"learning_rate": 0.00019791768479696448, |
|
"loss": 0.13, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.47661998575836695, |
|
"grad_norm": 0.03966660797595978, |
|
"learning_rate": 0.00019789519195700578, |
|
"loss": 0.1268, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.4775694279610729, |
|
"grad_norm": 0.04501716047525406, |
|
"learning_rate": 0.00019787257957867306, |
|
"loss": 0.1423, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.47851887016377875, |
|
"grad_norm": 0.06255436688661575, |
|
"learning_rate": 0.000197849847689578, |
|
"loss": 0.1799, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4794683123664847, |
|
"grad_norm": 0.050308458507061005, |
|
"learning_rate": 0.00019782699631747813, |
|
"loss": 0.1733, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.4804177545691906, |
|
"grad_norm": 0.0357963964343071, |
|
"learning_rate": 0.00019780402549027698, |
|
"loss": 0.1268, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.4813671967718965, |
|
"grad_norm": 0.03651968017220497, |
|
"learning_rate": 0.00019778093523602384, |
|
"loss": 0.1267, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.4823166389746024, |
|
"grad_norm": 0.043042074888944626, |
|
"learning_rate": 0.0001977577255829139, |
|
"loss": 0.1256, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.48326608117730835, |
|
"grad_norm": 0.07031014561653137, |
|
"learning_rate": 0.00019773439655928815, |
|
"loss": 0.1796, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.4842155233800142, |
|
"grad_norm": 0.04429268836975098, |
|
"learning_rate": 0.00019771094819363326, |
|
"loss": 0.1298, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.48516496558272015, |
|
"grad_norm": 0.0373898483812809, |
|
"learning_rate": 0.00019768738051458172, |
|
"loss": 0.1232, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4861144077854261, |
|
"grad_norm": 0.05853155627846718, |
|
"learning_rate": 0.00019766369355091166, |
|
"loss": 0.1694, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.48706384998813196, |
|
"grad_norm": 0.05050895735621452, |
|
"learning_rate": 0.00019763988733154686, |
|
"loss": 0.1665, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.4880132921908379, |
|
"grad_norm": 0.04074448347091675, |
|
"learning_rate": 0.0001976159618855568, |
|
"loss": 0.1336, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.4889627343935438, |
|
"grad_norm": 0.03826110064983368, |
|
"learning_rate": 0.00019759191724215644, |
|
"loss": 0.132, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4899121765962497, |
|
"grad_norm": 0.04392875358462334, |
|
"learning_rate": 0.0001975677534307064, |
|
"loss": 0.1204, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4908616187989556, |
|
"grad_norm": 0.04615531116724014, |
|
"learning_rate": 0.0001975434704807127, |
|
"loss": 0.1358, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.4918110610016615, |
|
"grad_norm": 0.053060565143823624, |
|
"learning_rate": 0.00019751906842182688, |
|
"loss": 0.1299, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.49276050320436743, |
|
"grad_norm": 0.04905511438846588, |
|
"learning_rate": 0.00019749454728384594, |
|
"loss": 0.1284, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.49370994540707336, |
|
"grad_norm": 0.04257996007800102, |
|
"learning_rate": 0.00019746990709671234, |
|
"loss": 0.1353, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.49465938760977923, |
|
"grad_norm": 0.05581909418106079, |
|
"learning_rate": 0.0001974451478905138, |
|
"loss": 0.1594, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.49560882981248516, |
|
"grad_norm": 0.04603990167379379, |
|
"learning_rate": 0.00019742026969548338, |
|
"loss": 0.1383, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.4965582720151911, |
|
"grad_norm": 0.058511972427368164, |
|
"learning_rate": 0.00019739527254199958, |
|
"loss": 0.1725, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.49750771421789697, |
|
"grad_norm": 0.03875808045268059, |
|
"learning_rate": 0.000197370156460586, |
|
"loss": 0.1405, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4984571564206029, |
|
"grad_norm": 0.040860000997781754, |
|
"learning_rate": 0.00019734492148191151, |
|
"loss": 0.139, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.49940659862330883, |
|
"grad_norm": 0.06110459193587303, |
|
"learning_rate": 0.00019731956763679014, |
|
"loss": 0.223, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.5003560408260147, |
|
"grad_norm": 0.05238598585128784, |
|
"learning_rate": 0.00019729409495618117, |
|
"loss": 0.1681, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.5013054830287206, |
|
"grad_norm": 0.05180145800113678, |
|
"learning_rate": 0.00019726850347118885, |
|
"loss": 0.1743, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.5022549252314266, |
|
"grad_norm": 0.05066410079598427, |
|
"learning_rate": 0.00019724279321306262, |
|
"loss": 0.1634, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.5032043674341324, |
|
"grad_norm": 0.06856084614992142, |
|
"learning_rate": 0.00019721696421319684, |
|
"loss": 0.1685, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5041538096368383, |
|
"grad_norm": 0.045972324907779694, |
|
"learning_rate": 0.00019719101650313096, |
|
"loss": 0.1245, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.5051032518395443, |
|
"grad_norm": 0.04522623121738434, |
|
"learning_rate": 0.00019716495011454934, |
|
"loss": 0.1367, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.5060526940422502, |
|
"grad_norm": 0.0780516117811203, |
|
"learning_rate": 0.00019713876507928126, |
|
"loss": 0.1351, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.507002136244956, |
|
"grad_norm": 0.04264210909605026, |
|
"learning_rate": 0.00019711246142930088, |
|
"loss": 0.1312, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.507951578447662, |
|
"grad_norm": 0.059501100331544876, |
|
"learning_rate": 0.00019708603919672718, |
|
"loss": 0.1698, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.5089010206503679, |
|
"grad_norm": 0.060105033218860626, |
|
"learning_rate": 0.00019705949841382396, |
|
"loss": 0.1303, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.5098504628530738, |
|
"grad_norm": 0.04733967408537865, |
|
"learning_rate": 0.00019703283911299982, |
|
"loss": 0.1245, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.5107999050557798, |
|
"grad_norm": 0.04254663735628128, |
|
"learning_rate": 0.00019700606132680798, |
|
"loss": 0.1343, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.5117493472584856, |
|
"grad_norm": 0.06302463263273239, |
|
"learning_rate": 0.00019697916508794645, |
|
"loss": 0.1831, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.5126987894611915, |
|
"grad_norm": 0.05301344394683838, |
|
"learning_rate": 0.0001969521504292578, |
|
"loss": 0.1316, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5136482316638975, |
|
"grad_norm": 0.04151083528995514, |
|
"learning_rate": 0.00019692501738372922, |
|
"loss": 0.1335, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.5145976738666034, |
|
"grad_norm": 0.05647062510251999, |
|
"learning_rate": 0.00019689776598449257, |
|
"loss": 0.1688, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.5155471160693093, |
|
"grad_norm": 0.037060294300317764, |
|
"learning_rate": 0.000196870396264824, |
|
"loss": 0.1339, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.5164965582720152, |
|
"grad_norm": 0.04036247730255127, |
|
"learning_rate": 0.0001968429082581443, |
|
"loss": 0.1361, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.5174460004747211, |
|
"grad_norm": 0.040889665484428406, |
|
"learning_rate": 0.00019681530199801875, |
|
"loss": 0.1356, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.518395442677427, |
|
"grad_norm": 0.0538480207324028, |
|
"learning_rate": 0.00019678757751815686, |
|
"loss": 0.1689, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.519344884880133, |
|
"grad_norm": 0.04074794426560402, |
|
"learning_rate": 0.0001967597348524126, |
|
"loss": 0.1329, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.5202943270828388, |
|
"grad_norm": 0.03896891698241234, |
|
"learning_rate": 0.00019673177403478428, |
|
"loss": 0.1356, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.5212437692855447, |
|
"grad_norm": 0.04619259387254715, |
|
"learning_rate": 0.00019670369509941442, |
|
"loss": 0.163, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.5221932114882507, |
|
"grad_norm": 0.035968657582998276, |
|
"learning_rate": 0.00019667549808058976, |
|
"loss": 0.1242, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5231426536909566, |
|
"grad_norm": 0.04564007744193077, |
|
"learning_rate": 0.0001966471830127413, |
|
"loss": 0.1364, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.5240920958936625, |
|
"grad_norm": 0.03991610184311867, |
|
"learning_rate": 0.00019661874993044415, |
|
"loss": 0.1312, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.5250415380963683, |
|
"grad_norm": 0.037240512669086456, |
|
"learning_rate": 0.00019659019886841752, |
|
"loss": 0.1279, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.5259909802990743, |
|
"grad_norm": 0.06598762422800064, |
|
"learning_rate": 0.00019656152986152468, |
|
"loss": 0.2165, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.5269404225017802, |
|
"grad_norm": 0.03867746889591217, |
|
"learning_rate": 0.00019653274294477292, |
|
"loss": 0.1233, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5278898647044861, |
|
"grad_norm": 0.051915477961301804, |
|
"learning_rate": 0.00019650383815331357, |
|
"loss": 0.168, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.528839306907192, |
|
"grad_norm": 0.054896485060453415, |
|
"learning_rate": 0.00019647481552244182, |
|
"loss": 0.1678, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.5297887491098979, |
|
"grad_norm": 0.05439051240682602, |
|
"learning_rate": 0.00019644567508759675, |
|
"loss": 0.1607, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.5307381913126038, |
|
"grad_norm": 0.03601578250527382, |
|
"learning_rate": 0.00019641641688436135, |
|
"loss": 0.1271, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.5316876335153098, |
|
"grad_norm": 0.06025104597210884, |
|
"learning_rate": 0.00019638704094846236, |
|
"loss": 0.176, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5326370757180157, |
|
"grad_norm": 0.04126368835568428, |
|
"learning_rate": 0.00019635754731577032, |
|
"loss": 0.1319, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.5335865179207215, |
|
"grad_norm": 0.05305393040180206, |
|
"learning_rate": 0.00019632793602229943, |
|
"loss": 0.1699, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.5345359601234275, |
|
"grad_norm": 0.03538331016898155, |
|
"learning_rate": 0.00019629820710420764, |
|
"loss": 0.124, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.5354854023261334, |
|
"grad_norm": 0.05861300975084305, |
|
"learning_rate": 0.0001962683605977965, |
|
"loss": 0.1688, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.5364348445288393, |
|
"grad_norm": 0.040226079523563385, |
|
"learning_rate": 0.0001962383965395111, |
|
"loss": 0.1334, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5373842867315453, |
|
"grad_norm": 0.035788875073194504, |
|
"learning_rate": 0.00019620831496594017, |
|
"loss": 0.1281, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.5383337289342511, |
|
"grad_norm": 0.0334162712097168, |
|
"learning_rate": 0.0001961781159138158, |
|
"loss": 0.1317, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.539283171136957, |
|
"grad_norm": 0.03352081775665283, |
|
"learning_rate": 0.00019614779942001364, |
|
"loss": 0.1334, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.540232613339663, |
|
"grad_norm": 0.03684060648083687, |
|
"learning_rate": 0.00019611736552155274, |
|
"loss": 0.1349, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.5411820555423689, |
|
"grad_norm": 0.03640671446919441, |
|
"learning_rate": 0.00019608681425559542, |
|
"loss": 0.1278, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5421314977450747, |
|
"grad_norm": 0.04167250171303749, |
|
"learning_rate": 0.00019605614565944748, |
|
"loss": 0.1384, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.5430809399477807, |
|
"grad_norm": 0.0416824147105217, |
|
"learning_rate": 0.00019602535977055778, |
|
"loss": 0.1319, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.5440303821504866, |
|
"grad_norm": 0.03897137567400932, |
|
"learning_rate": 0.00019599445662651861, |
|
"loss": 0.1389, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.5449798243531925, |
|
"grad_norm": 0.03894896060228348, |
|
"learning_rate": 0.00019596343626506526, |
|
"loss": 0.1341, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.5459292665558985, |
|
"grad_norm": 0.04211690276861191, |
|
"learning_rate": 0.00019593229872407627, |
|
"loss": 0.1377, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5468787087586043, |
|
"grad_norm": 0.04308454692363739, |
|
"learning_rate": 0.00019590104404157327, |
|
"loss": 0.1268, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.5478281509613102, |
|
"grad_norm": 0.0525001622736454, |
|
"learning_rate": 0.00019586967225572086, |
|
"loss": 0.1775, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.5487775931640161, |
|
"grad_norm": 0.056315965950489044, |
|
"learning_rate": 0.00019583818340482664, |
|
"loss": 0.1688, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.5497270353667221, |
|
"grad_norm": 0.03801283985376358, |
|
"learning_rate": 0.0001958065775273412, |
|
"loss": 0.1309, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.550676477569428, |
|
"grad_norm": 0.03738854080438614, |
|
"learning_rate": 0.00019577485466185804, |
|
"loss": 0.137, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5516259197721338, |
|
"grad_norm": 0.03772661089897156, |
|
"learning_rate": 0.0001957430148471134, |
|
"loss": 0.1276, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5525753619748398, |
|
"grad_norm": 0.039842378348112106, |
|
"learning_rate": 0.00019571105812198652, |
|
"loss": 0.1329, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5535248041775457, |
|
"grad_norm": 0.033689334988594055, |
|
"learning_rate": 0.0001956789845254992, |
|
"loss": 0.1265, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.5544742463802516, |
|
"grad_norm": 0.046588387340307236, |
|
"learning_rate": 0.00019564679409681608, |
|
"loss": 0.1645, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.5554236885829575, |
|
"grad_norm": 0.03861064463853836, |
|
"learning_rate": 0.0001956144868752444, |
|
"loss": 0.1267, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5563731307856634, |
|
"grad_norm": 0.03467525169253349, |
|
"learning_rate": 0.000195582062900234, |
|
"loss": 0.1299, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5573225729883693, |
|
"grad_norm": 0.03659389913082123, |
|
"learning_rate": 0.0001955495222113774, |
|
"loss": 0.1286, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.5582720151910753, |
|
"grad_norm": 0.03826770931482315, |
|
"learning_rate": 0.0001955168648484095, |
|
"loss": 0.1313, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.5592214573937812, |
|
"grad_norm": 0.038110729306936264, |
|
"learning_rate": 0.00019548409085120772, |
|
"loss": 0.137, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.560170899596487, |
|
"grad_norm": 0.03989555314183235, |
|
"learning_rate": 0.0001954512002597919, |
|
"loss": 0.132, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.561120341799193, |
|
"grad_norm": 0.05395180359482765, |
|
"learning_rate": 0.00019541819311432427, |
|
"loss": 0.1401, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5620697840018989, |
|
"grad_norm": 0.05007918179035187, |
|
"learning_rate": 0.00019538506945510938, |
|
"loss": 0.1584, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.5630192262046048, |
|
"grad_norm": 0.047849785536527634, |
|
"learning_rate": 0.00019535182932259404, |
|
"loss": 0.1265, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.5639686684073107, |
|
"grad_norm": 0.04303041473031044, |
|
"learning_rate": 0.00019531847275736726, |
|
"loss": 0.1245, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5649181106100166, |
|
"grad_norm": 0.04128289222717285, |
|
"learning_rate": 0.00019528499980016025, |
|
"loss": 0.1317, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5658675528127225, |
|
"grad_norm": 0.04311414808034897, |
|
"learning_rate": 0.00019525141049184637, |
|
"loss": 0.1364, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.5668169950154285, |
|
"grad_norm": 0.03765838220715523, |
|
"learning_rate": 0.00019521770487344103, |
|
"loss": 0.1268, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.5677664372181344, |
|
"grad_norm": 0.03674585744738579, |
|
"learning_rate": 0.00019518388298610164, |
|
"loss": 0.1297, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.5687158794208402, |
|
"grad_norm": 0.036937762051820755, |
|
"learning_rate": 0.0001951499448711276, |
|
"loss": 0.1303, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.5696653216235462, |
|
"grad_norm": 0.03748161345720291, |
|
"learning_rate": 0.0001951158905699603, |
|
"loss": 0.1328, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5706147638262521, |
|
"grad_norm": 0.04011257737874985, |
|
"learning_rate": 0.00019508172012418283, |
|
"loss": 0.1346, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.571564206028958, |
|
"grad_norm": 0.03853931650519371, |
|
"learning_rate": 0.00019504743357552035, |
|
"loss": 0.1279, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.572513648231664, |
|
"grad_norm": 0.03750459849834442, |
|
"learning_rate": 0.0001950130309658396, |
|
"loss": 0.1227, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.5734630904343698, |
|
"grad_norm": 0.05542079731822014, |
|
"learning_rate": 0.00019497851233714908, |
|
"loss": 0.1647, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.5744125326370757, |
|
"grad_norm": 0.04472218081355095, |
|
"learning_rate": 0.00019494387773159898, |
|
"loss": 0.1416, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5753619748397816, |
|
"grad_norm": 0.052323974668979645, |
|
"learning_rate": 0.00019490912719148114, |
|
"loss": 0.1367, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.5763114170424876, |
|
"grad_norm": 0.037580832839012146, |
|
"learning_rate": 0.00019487426075922893, |
|
"loss": 0.131, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.5772608592451934, |
|
"grad_norm": 0.03929577395319939, |
|
"learning_rate": 0.0001948392784774172, |
|
"loss": 0.128, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.5782103014478993, |
|
"grad_norm": 0.03706606104969978, |
|
"learning_rate": 0.0001948041803887623, |
|
"loss": 0.1316, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.5791597436506053, |
|
"grad_norm": 0.038938358426094055, |
|
"learning_rate": 0.00019476896653612203, |
|
"loss": 0.1275, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5801091858533112, |
|
"grad_norm": 0.04818068817257881, |
|
"learning_rate": 0.00019473363696249546, |
|
"loss": 0.1662, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.581058628056017, |
|
"grad_norm": 0.03735940158367157, |
|
"learning_rate": 0.00019469819171102304, |
|
"loss": 0.1361, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.582008070258723, |
|
"grad_norm": 0.03568827733397484, |
|
"learning_rate": 0.00019466263082498645, |
|
"loss": 0.1216, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.5829575124614289, |
|
"grad_norm": 0.03913251310586929, |
|
"learning_rate": 0.0001946269543478085, |
|
"loss": 0.1321, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.5839069546641348, |
|
"grad_norm": 0.062009479850530624, |
|
"learning_rate": 0.0001945911623230533, |
|
"loss": 0.1778, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.5848563968668408, |
|
"grad_norm": 0.039088111370801926, |
|
"learning_rate": 0.0001945552547944259, |
|
"loss": 0.1352, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.5858058390695466, |
|
"grad_norm": 0.041976600885391235, |
|
"learning_rate": 0.0001945192318057725, |
|
"loss": 0.1394, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5867552812722525, |
|
"grad_norm": 0.03723563253879547, |
|
"learning_rate": 0.00019448309340108018, |
|
"loss": 0.1246, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5877047234749585, |
|
"grad_norm": 0.0382399819791317, |
|
"learning_rate": 0.00019444683962447707, |
|
"loss": 0.1232, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5886541656776644, |
|
"grad_norm": 0.03758077695965767, |
|
"learning_rate": 0.0001944104705202321, |
|
"loss": 0.1417, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5896036078803703, |
|
"grad_norm": 0.034823786467313766, |
|
"learning_rate": 0.000194373986132755, |
|
"loss": 0.1304, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.5905530500830762, |
|
"grad_norm": 0.03755120187997818, |
|
"learning_rate": 0.00019433738650659641, |
|
"loss": 0.133, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.5915024922857821, |
|
"grad_norm": 0.03759913146495819, |
|
"learning_rate": 0.00019430067168644754, |
|
"loss": 0.1222, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.592451934488488, |
|
"grad_norm": 0.06232694163918495, |
|
"learning_rate": 0.0001942638417171403, |
|
"loss": 0.1778, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.593401376691194, |
|
"grad_norm": 0.05642306059598923, |
|
"learning_rate": 0.00019422689664364725, |
|
"loss": 0.1706, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5943508188938998, |
|
"grad_norm": 0.0827709287405014, |
|
"learning_rate": 0.00019418983651108148, |
|
"loss": 0.2371, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5953002610966057, |
|
"grad_norm": 0.03614366054534912, |
|
"learning_rate": 0.00019415266136469652, |
|
"loss": 0.1225, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5962497032993117, |
|
"grad_norm": 0.042416494339704514, |
|
"learning_rate": 0.00019411537124988643, |
|
"loss": 0.1239, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5971991455020176, |
|
"grad_norm": 0.037246908992528915, |
|
"learning_rate": 0.00019407796621218566, |
|
"loss": 0.1292, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5981485877047235, |
|
"grad_norm": 0.05374092981219292, |
|
"learning_rate": 0.00019404044629726887, |
|
"loss": 0.1782, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5990980299074293, |
|
"grad_norm": 0.052854426205158234, |
|
"learning_rate": 0.00019400281155095112, |
|
"loss": 0.1711, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.6000474721101353, |
|
"grad_norm": 0.038800131529569626, |
|
"learning_rate": 0.00019396506201918765, |
|
"loss": 0.1285, |
|
"step": 632 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 3159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 158, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.667125138649252e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|