|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 708, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002824858757062147, |
|
"grad_norm": 0.6147084683934281, |
|
"learning_rate": 9.999950776495983e-06, |
|
"loss": 0.1718, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005649717514124294, |
|
"grad_norm": 0.6665089926445491, |
|
"learning_rate": 9.99980310695311e-06, |
|
"loss": 0.1906, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00847457627118644, |
|
"grad_norm": 0.5879608524504671, |
|
"learning_rate": 9.99955699427891e-06, |
|
"loss": 0.1905, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011299435028248588, |
|
"grad_norm": 0.5728624077474995, |
|
"learning_rate": 9.999212443319191e-06, |
|
"loss": 0.1806, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014124293785310734, |
|
"grad_norm": 0.45407189292974287, |
|
"learning_rate": 9.998769460857955e-06, |
|
"loss": 0.1587, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01694915254237288, |
|
"grad_norm": 0.695598470337452, |
|
"learning_rate": 9.998228055617264e-06, |
|
"loss": 0.3073, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01977401129943503, |
|
"grad_norm": 0.4388502111835372, |
|
"learning_rate": 9.99758823825706e-06, |
|
"loss": 0.1658, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.022598870056497175, |
|
"grad_norm": 0.506295761059801, |
|
"learning_rate": 9.996850021374969e-06, |
|
"loss": 0.1822, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.025423728813559324, |
|
"grad_norm": 0.5054469922635372, |
|
"learning_rate": 9.996013419506035e-06, |
|
"loss": 0.1878, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02824858757062147, |
|
"grad_norm": 0.5236855115593358, |
|
"learning_rate": 9.99507844912245e-06, |
|
"loss": 0.164, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031073446327683617, |
|
"grad_norm": 0.6005034865395634, |
|
"learning_rate": 9.994045128633221e-06, |
|
"loss": 0.2148, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03389830508474576, |
|
"grad_norm": 0.39196864541665416, |
|
"learning_rate": 9.99291347838381e-06, |
|
"loss": 0.1439, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03672316384180791, |
|
"grad_norm": 0.3520974148515624, |
|
"learning_rate": 9.991683520655735e-06, |
|
"loss": 0.138, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03954802259887006, |
|
"grad_norm": 0.39891278345339304, |
|
"learning_rate": 9.990355279666124e-06, |
|
"loss": 0.1778, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0423728813559322, |
|
"grad_norm": 0.38823463524310226, |
|
"learning_rate": 9.988928781567251e-06, |
|
"loss": 0.1802, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04519774011299435, |
|
"grad_norm": 0.3456396477919156, |
|
"learning_rate": 9.987404054446009e-06, |
|
"loss": 0.1537, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0480225988700565, |
|
"grad_norm": 0.4433642134268957, |
|
"learning_rate": 9.98578112832336e-06, |
|
"loss": 0.2148, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05084745762711865, |
|
"grad_norm": 0.5978046488506871, |
|
"learning_rate": 9.984060035153752e-06, |
|
"loss": 0.1769, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05367231638418079, |
|
"grad_norm": 0.3537901451183181, |
|
"learning_rate": 9.982240808824477e-06, |
|
"loss": 0.1323, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05649717514124294, |
|
"grad_norm": 0.5417108653864326, |
|
"learning_rate": 9.980323485155013e-06, |
|
"loss": 0.1666, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.059322033898305086, |
|
"grad_norm": 0.35289882003600165, |
|
"learning_rate": 9.978308101896318e-06, |
|
"loss": 0.1205, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.062146892655367235, |
|
"grad_norm": 0.36004933523243315, |
|
"learning_rate": 9.97619469873008e-06, |
|
"loss": 0.1371, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06497175141242938, |
|
"grad_norm": 0.3854571301167785, |
|
"learning_rate": 9.973983317267944e-06, |
|
"loss": 0.1568, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06779661016949153, |
|
"grad_norm": 0.6487420228246645, |
|
"learning_rate": 9.971674001050687e-06, |
|
"loss": 0.1647, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07062146892655367, |
|
"grad_norm": 0.3854893336014017, |
|
"learning_rate": 9.969266795547364e-06, |
|
"loss": 0.1854, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07344632768361582, |
|
"grad_norm": 0.3832699272168604, |
|
"learning_rate": 9.96676174815441e-06, |
|
"loss": 0.1955, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07627118644067797, |
|
"grad_norm": 0.31760668668906533, |
|
"learning_rate": 9.964158908194708e-06, |
|
"loss": 0.1195, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07909604519774012, |
|
"grad_norm": 0.41114801745145746, |
|
"learning_rate": 9.961458326916624e-06, |
|
"loss": 0.1414, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08192090395480225, |
|
"grad_norm": 0.3240630544735708, |
|
"learning_rate": 9.958660057492982e-06, |
|
"loss": 0.1562, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0847457627118644, |
|
"grad_norm": 0.3317790911132787, |
|
"learning_rate": 9.955764155020037e-06, |
|
"loss": 0.1263, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08757062146892655, |
|
"grad_norm": 0.3730810477019189, |
|
"learning_rate": 9.952770676516372e-06, |
|
"loss": 0.1549, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0903954802259887, |
|
"grad_norm": 0.3594977608871381, |
|
"learning_rate": 9.94967968092179e-06, |
|
"loss": 0.1362, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09322033898305085, |
|
"grad_norm": 0.5395386567672124, |
|
"learning_rate": 9.946491229096143e-06, |
|
"loss": 0.1447, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.096045197740113, |
|
"grad_norm": 0.34069006187102624, |
|
"learning_rate": 9.943205383818142e-06, |
|
"loss": 0.1602, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09887005649717515, |
|
"grad_norm": 0.40507986945355756, |
|
"learning_rate": 9.93982220978411e-06, |
|
"loss": 0.1727, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1016949152542373, |
|
"grad_norm": 0.3101237427572441, |
|
"learning_rate": 9.936341773606723e-06, |
|
"loss": 0.1328, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10451977401129943, |
|
"grad_norm": 0.32711902171143986, |
|
"learning_rate": 9.932764143813686e-06, |
|
"loss": 0.1377, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10734463276836158, |
|
"grad_norm": 0.3698266114267764, |
|
"learning_rate": 9.929089390846389e-06, |
|
"loss": 0.1593, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11016949152542373, |
|
"grad_norm": 0.36372051261110894, |
|
"learning_rate": 9.925317587058516e-06, |
|
"loss": 0.1238, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11299435028248588, |
|
"grad_norm": 0.36933238520645995, |
|
"learning_rate": 9.92144880671463e-06, |
|
"loss": 0.1678, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11581920903954802, |
|
"grad_norm": 0.3460461190525915, |
|
"learning_rate": 9.9174831259887e-06, |
|
"loss": 0.1578, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11864406779661017, |
|
"grad_norm": 0.5499266672914876, |
|
"learning_rate": 9.913420622962606e-06, |
|
"loss": 0.1437, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12146892655367232, |
|
"grad_norm": 0.4049849988270768, |
|
"learning_rate": 9.909261377624601e-06, |
|
"loss": 0.187, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12429378531073447, |
|
"grad_norm": 0.47235530071518683, |
|
"learning_rate": 9.90500547186774e-06, |
|
"loss": 0.1449, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1271186440677966, |
|
"grad_norm": 0.36137299735697387, |
|
"learning_rate": 9.900652989488255e-06, |
|
"loss": 0.1505, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12994350282485875, |
|
"grad_norm": 0.372928772406179, |
|
"learning_rate": 9.896204016183924e-06, |
|
"loss": 0.1984, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1327683615819209, |
|
"grad_norm": 0.31608553050802646, |
|
"learning_rate": 9.891658639552368e-06, |
|
"loss": 0.1233, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.13559322033898305, |
|
"grad_norm": 0.34009496825621116, |
|
"learning_rate": 9.887016949089334e-06, |
|
"loss": 0.1533, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1384180790960452, |
|
"grad_norm": 0.3475244874058269, |
|
"learning_rate": 9.882279036186927e-06, |
|
"loss": 0.1348, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14124293785310735, |
|
"grad_norm": 0.3465923771954804, |
|
"learning_rate": 9.87744499413182e-06, |
|
"loss": 0.1211, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1440677966101695, |
|
"grad_norm": 0.4625874588181501, |
|
"learning_rate": 9.872514918103407e-06, |
|
"loss": 0.1316, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.14689265536723164, |
|
"grad_norm": 0.4219123981937324, |
|
"learning_rate": 9.867488905171934e-06, |
|
"loss": 0.1499, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.1497175141242938, |
|
"grad_norm": 0.28327838865809335, |
|
"learning_rate": 9.86236705429659e-06, |
|
"loss": 0.118, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.15254237288135594, |
|
"grad_norm": 0.42996698854118004, |
|
"learning_rate": 9.85714946632355e-06, |
|
"loss": 0.1445, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1553672316384181, |
|
"grad_norm": 0.3158297884352036, |
|
"learning_rate": 9.851836243984005e-06, |
|
"loss": 0.1026, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15819209039548024, |
|
"grad_norm": 0.34425358123925115, |
|
"learning_rate": 9.846427491892117e-06, |
|
"loss": 0.1296, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.16101694915254236, |
|
"grad_norm": 0.26454441870053136, |
|
"learning_rate": 9.840923316542984e-06, |
|
"loss": 0.0945, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1638418079096045, |
|
"grad_norm": 0.47330273293079517, |
|
"learning_rate": 9.835323826310522e-06, |
|
"loss": 0.1645, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 0.30609709369908644, |
|
"learning_rate": 9.829629131445342e-06, |
|
"loss": 0.122, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1694915254237288, |
|
"grad_norm": 0.4330091411356326, |
|
"learning_rate": 9.823839344072582e-06, |
|
"loss": 0.1494, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17231638418079095, |
|
"grad_norm": 0.42693180952210663, |
|
"learning_rate": 9.817954578189686e-06, |
|
"loss": 0.1235, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1751412429378531, |
|
"grad_norm": 0.3324864445850298, |
|
"learning_rate": 9.811974949664176e-06, |
|
"loss": 0.1327, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.17796610169491525, |
|
"grad_norm": 0.41054779677427783, |
|
"learning_rate": 9.805900576231358e-06, |
|
"loss": 0.1741, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1807909604519774, |
|
"grad_norm": 0.33830483714924636, |
|
"learning_rate": 9.79973157749201e-06, |
|
"loss": 0.1265, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.18361581920903955, |
|
"grad_norm": 0.3152187519500513, |
|
"learning_rate": 9.793468074910028e-06, |
|
"loss": 0.1202, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1864406779661017, |
|
"grad_norm": 0.368592840409881, |
|
"learning_rate": 9.787110191810027e-06, |
|
"loss": 0.1292, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.18926553672316385, |
|
"grad_norm": 0.3471829002234472, |
|
"learning_rate": 9.780658053374923e-06, |
|
"loss": 0.1532, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.192090395480226, |
|
"grad_norm": 0.3461593726912688, |
|
"learning_rate": 9.77411178664346e-06, |
|
"loss": 0.1352, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.19491525423728814, |
|
"grad_norm": 0.647720230993773, |
|
"learning_rate": 9.767471520507713e-06, |
|
"loss": 0.1291, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1977401129943503, |
|
"grad_norm": 0.31371308918917606, |
|
"learning_rate": 9.760737385710546e-06, |
|
"loss": 0.1363, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.20056497175141244, |
|
"grad_norm": 0.36616052339629535, |
|
"learning_rate": 9.753909514843047e-06, |
|
"loss": 0.163, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.2033898305084746, |
|
"grad_norm": 0.3366775524198522, |
|
"learning_rate": 9.746988042341907e-06, |
|
"loss": 0.1211, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.2062146892655367, |
|
"grad_norm": 0.32238872392227863, |
|
"learning_rate": 9.739973104486777e-06, |
|
"loss": 0.1244, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.20903954802259886, |
|
"grad_norm": 0.309699834626684, |
|
"learning_rate": 9.732864839397585e-06, |
|
"loss": 0.1123, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.211864406779661, |
|
"grad_norm": 0.40415497402853257, |
|
"learning_rate": 9.725663387031818e-06, |
|
"loss": 0.1185, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21468926553672316, |
|
"grad_norm": 0.43571692379160615, |
|
"learning_rate": 9.718368889181763e-06, |
|
"loss": 0.1205, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.2175141242937853, |
|
"grad_norm": 0.3220023585216405, |
|
"learning_rate": 9.710981489471721e-06, |
|
"loss": 0.1513, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.22033898305084745, |
|
"grad_norm": 0.41630953777961766, |
|
"learning_rate": 9.703501333355167e-06, |
|
"loss": 0.1249, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2231638418079096, |
|
"grad_norm": 0.5019640716602216, |
|
"learning_rate": 9.6959285681119e-06, |
|
"loss": 0.1914, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.22598870056497175, |
|
"grad_norm": 0.4023458378359013, |
|
"learning_rate": 9.68826334284514e-06, |
|
"loss": 0.1346, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2288135593220339, |
|
"grad_norm": 0.34337841648791245, |
|
"learning_rate": 9.680505808478583e-06, |
|
"loss": 0.1272, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.23163841807909605, |
|
"grad_norm": 0.32382210914063797, |
|
"learning_rate": 9.672656117753435e-06, |
|
"loss": 0.1155, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2344632768361582, |
|
"grad_norm": 0.4098493186152155, |
|
"learning_rate": 9.664714425225414e-06, |
|
"loss": 0.155, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.23728813559322035, |
|
"grad_norm": 0.30680580576204597, |
|
"learning_rate": 9.656680887261693e-06, |
|
"loss": 0.1268, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2401129943502825, |
|
"grad_norm": 0.34530888636266843, |
|
"learning_rate": 9.648555662037826e-06, |
|
"loss": 0.1255, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.24293785310734464, |
|
"grad_norm": 0.31982112281364816, |
|
"learning_rate": 9.640338909534636e-06, |
|
"loss": 0.1187, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2457627118644068, |
|
"grad_norm": 0.3249369835609272, |
|
"learning_rate": 9.632030791535063e-06, |
|
"loss": 0.1094, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.24858757062146894, |
|
"grad_norm": 0.4202492399280893, |
|
"learning_rate": 9.62363147162098e-06, |
|
"loss": 0.1473, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2514124293785311, |
|
"grad_norm": 0.4206786168286477, |
|
"learning_rate": 9.615141115169968e-06, |
|
"loss": 0.1321, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2542372881355932, |
|
"grad_norm": 0.35455170569024247, |
|
"learning_rate": 9.606559889352065e-06, |
|
"loss": 0.1333, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2570621468926554, |
|
"grad_norm": 0.30654274314933255, |
|
"learning_rate": 9.597887963126476e-06, |
|
"loss": 0.1324, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2598870056497175, |
|
"grad_norm": 0.31426903973238157, |
|
"learning_rate": 9.589125507238234e-06, |
|
"loss": 0.0965, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2627118644067797, |
|
"grad_norm": 0.39888445390797206, |
|
"learning_rate": 9.580272694214855e-06, |
|
"loss": 0.1795, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2655367231638418, |
|
"grad_norm": 0.32688334776107986, |
|
"learning_rate": 9.571329698362931e-06, |
|
"loss": 0.0993, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.268361581920904, |
|
"grad_norm": 0.37334590333180434, |
|
"learning_rate": 9.562296695764695e-06, |
|
"loss": 0.1201, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2711864406779661, |
|
"grad_norm": 0.3189896682514011, |
|
"learning_rate": 9.553173864274567e-06, |
|
"loss": 0.1261, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2740112994350282, |
|
"grad_norm": 0.34974753780587814, |
|
"learning_rate": 9.543961383515638e-06, |
|
"loss": 0.1476, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2768361581920904, |
|
"grad_norm": 0.3593944813062625, |
|
"learning_rate": 9.53465943487614e-06, |
|
"loss": 0.123, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2796610169491525, |
|
"grad_norm": 0.35812916144207474, |
|
"learning_rate": 9.52526820150588e-06, |
|
"loss": 0.1257, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2824858757062147, |
|
"grad_norm": 0.32235620707593265, |
|
"learning_rate": 9.51578786831262e-06, |
|
"loss": 0.1493, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2853107344632768, |
|
"grad_norm": 0.37142164119731536, |
|
"learning_rate": 9.506218621958448e-06, |
|
"loss": 0.1278, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.288135593220339, |
|
"grad_norm": 0.44435482682594635, |
|
"learning_rate": 9.496560650856097e-06, |
|
"loss": 0.1443, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2909604519774011, |
|
"grad_norm": 0.3566707987782221, |
|
"learning_rate": 9.486814145165242e-06, |
|
"loss": 0.1122, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2937853107344633, |
|
"grad_norm": 0.33349931081864886, |
|
"learning_rate": 9.476979296788746e-06, |
|
"loss": 0.1185, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2966101694915254, |
|
"grad_norm": 0.3196942221194822, |
|
"learning_rate": 9.467056299368888e-06, |
|
"loss": 0.1405, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2994350282485876, |
|
"grad_norm": 0.300108694500991, |
|
"learning_rate": 9.457045348283552e-06, |
|
"loss": 0.1032, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3022598870056497, |
|
"grad_norm": 0.2628183218575623, |
|
"learning_rate": 9.446946640642372e-06, |
|
"loss": 0.0891, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3050847457627119, |
|
"grad_norm": 0.3173943921412448, |
|
"learning_rate": 9.436760375282858e-06, |
|
"loss": 0.1389, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.307909604519774, |
|
"grad_norm": 0.3374023308082592, |
|
"learning_rate": 9.426486752766481e-06, |
|
"loss": 0.1433, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.3107344632768362, |
|
"grad_norm": 0.32296890768155634, |
|
"learning_rate": 9.416125975374722e-06, |
|
"loss": 0.1489, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.3135593220338983, |
|
"grad_norm": 0.809125754824113, |
|
"learning_rate": 9.405678247105083e-06, |
|
"loss": 0.1197, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.3163841807909605, |
|
"grad_norm": 0.2899410860114942, |
|
"learning_rate": 9.395143773667089e-06, |
|
"loss": 0.1101, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.3192090395480226, |
|
"grad_norm": 0.35261776609510237, |
|
"learning_rate": 9.38452276247821e-06, |
|
"loss": 0.15, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.3220338983050847, |
|
"grad_norm": 0.40136031754220597, |
|
"learning_rate": 9.373815422659806e-06, |
|
"loss": 0.1822, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.3248587570621469, |
|
"grad_norm": 0.313017574757418, |
|
"learning_rate": 9.363021965032993e-06, |
|
"loss": 0.1188, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.327683615819209, |
|
"grad_norm": 0.373452387713479, |
|
"learning_rate": 9.352142602114487e-06, |
|
"loss": 0.137, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3305084745762712, |
|
"grad_norm": 0.3420346614883704, |
|
"learning_rate": 9.341177548112437e-06, |
|
"loss": 0.1344, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 0.4036780338239374, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 0.133, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.3361581920903955, |
|
"grad_norm": 0.32756640437501205, |
|
"learning_rate": 9.318991232122065e-06, |
|
"loss": 0.1321, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3389830508474576, |
|
"grad_norm": 0.345171287549607, |
|
"learning_rate": 9.307770406969032e-06, |
|
"loss": 0.1202, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3418079096045198, |
|
"grad_norm": 0.437595297190657, |
|
"learning_rate": 9.296464764394422e-06, |
|
"loss": 0.1824, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3446327683615819, |
|
"grad_norm": 0.2838090630299304, |
|
"learning_rate": 9.285074526999577e-06, |
|
"loss": 0.114, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3474576271186441, |
|
"grad_norm": 0.33725871702683274, |
|
"learning_rate": 9.273599919051452e-06, |
|
"loss": 0.1254, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3502824858757062, |
|
"grad_norm": 0.2980485886022378, |
|
"learning_rate": 9.262041166478215e-06, |
|
"loss": 0.1139, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3531073446327684, |
|
"grad_norm": 0.35961892985059185, |
|
"learning_rate": 9.250398496864782e-06, |
|
"loss": 0.114, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3559322033898305, |
|
"grad_norm": 0.44215669470603974, |
|
"learning_rate": 9.238672139448354e-06, |
|
"loss": 0.1133, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3587570621468927, |
|
"grad_norm": 0.4377017180308674, |
|
"learning_rate": 9.226862325113894e-06, |
|
"loss": 0.1438, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3615819209039548, |
|
"grad_norm": 0.3248130129102471, |
|
"learning_rate": 9.214969286389577e-06, |
|
"loss": 0.1079, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3644067796610169, |
|
"grad_norm": 0.3687271690510821, |
|
"learning_rate": 9.202993257442216e-06, |
|
"loss": 0.1576, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3672316384180791, |
|
"grad_norm": 0.31854432765060964, |
|
"learning_rate": 9.190934474072658e-06, |
|
"loss": 0.1128, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3700564971751412, |
|
"grad_norm": 0.43632199490503926, |
|
"learning_rate": 9.178793173711133e-06, |
|
"loss": 0.16, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3728813559322034, |
|
"grad_norm": 0.3184995213048756, |
|
"learning_rate": 9.166569595412576e-06, |
|
"loss": 0.1087, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3757062146892655, |
|
"grad_norm": 0.2852410374038083, |
|
"learning_rate": 9.154263979851932e-06, |
|
"loss": 0.1017, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3785310734463277, |
|
"grad_norm": 0.4894599992720483, |
|
"learning_rate": 9.141876569319405e-06, |
|
"loss": 0.1338, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3813559322033898, |
|
"grad_norm": 0.3025442389978783, |
|
"learning_rate": 9.129407607715697e-06, |
|
"loss": 0.1029, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.384180790960452, |
|
"grad_norm": 0.3362331028333517, |
|
"learning_rate": 9.116857340547203e-06, |
|
"loss": 0.1171, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3870056497175141, |
|
"grad_norm": 0.3058727798799934, |
|
"learning_rate": 9.104226014921171e-06, |
|
"loss": 0.1242, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3898305084745763, |
|
"grad_norm": 0.32225074355114075, |
|
"learning_rate": 9.091513879540845e-06, |
|
"loss": 0.1199, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3926553672316384, |
|
"grad_norm": 0.4160727630117183, |
|
"learning_rate": 9.078721184700565e-06, |
|
"loss": 0.1703, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3954802259887006, |
|
"grad_norm": 0.3825137129544069, |
|
"learning_rate": 9.065848182280835e-06, |
|
"loss": 0.1417, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3983050847457627, |
|
"grad_norm": 0.3109793025220252, |
|
"learning_rate": 9.05289512574337e-06, |
|
"loss": 0.1079, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4011299435028249, |
|
"grad_norm": 0.32403338158384704, |
|
"learning_rate": 9.039862270126102e-06, |
|
"loss": 0.1304, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.403954802259887, |
|
"grad_norm": 0.333516403508159, |
|
"learning_rate": 9.026749872038161e-06, |
|
"loss": 0.1215, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.4067796610169492, |
|
"grad_norm": 0.5512785845644815, |
|
"learning_rate": 9.013558189654819e-06, |
|
"loss": 0.119, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4096045197740113, |
|
"grad_norm": 0.3342692670079559, |
|
"learning_rate": 9.000287482712407e-06, |
|
"loss": 0.1327, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4124293785310734, |
|
"grad_norm": 0.3844315934040279, |
|
"learning_rate": 8.986938012503203e-06, |
|
"loss": 0.1354, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4152542372881356, |
|
"grad_norm": 0.3704715200009876, |
|
"learning_rate": 8.973510041870287e-06, |
|
"loss": 0.116, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4180790960451977, |
|
"grad_norm": 0.3731805906895666, |
|
"learning_rate": 8.960003835202369e-06, |
|
"loss": 0.1447, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4209039548022599, |
|
"grad_norm": 0.3074985687330806, |
|
"learning_rate": 8.946419658428573e-06, |
|
"loss": 0.0944, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.423728813559322, |
|
"grad_norm": 0.43341288159333946, |
|
"learning_rate": 8.932757779013214e-06, |
|
"loss": 0.1266, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4265536723163842, |
|
"grad_norm": 0.336284725389875, |
|
"learning_rate": 8.919018465950517e-06, |
|
"loss": 0.1398, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4293785310734463, |
|
"grad_norm": 0.35240140744565995, |
|
"learning_rate": 8.90520198975934e-06, |
|
"loss": 0.161, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4322033898305085, |
|
"grad_norm": 0.33406617309219283, |
|
"learning_rate": 8.89130862247783e-06, |
|
"loss": 0.1452, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4350282485875706, |
|
"grad_norm": 0.4037611933501478, |
|
"learning_rate": 8.877338637658074e-06, |
|
"loss": 0.1355, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4378531073446328, |
|
"grad_norm": 0.3475789016676025, |
|
"learning_rate": 8.863292310360716e-06, |
|
"loss": 0.1298, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4406779661016949, |
|
"grad_norm": 0.4841263150458153, |
|
"learning_rate": 8.849169917149532e-06, |
|
"loss": 0.1207, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4435028248587571, |
|
"grad_norm": 0.30177409440633907, |
|
"learning_rate": 8.834971736085995e-06, |
|
"loss": 0.1092, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4463276836158192, |
|
"grad_norm": 0.3405364099196524, |
|
"learning_rate": 8.820698046723796e-06, |
|
"loss": 0.1196, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4491525423728814, |
|
"grad_norm": 0.3399165082210464, |
|
"learning_rate": 8.806349130103334e-06, |
|
"loss": 0.1215, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4519774011299435, |
|
"grad_norm": 0.30309252729845493, |
|
"learning_rate": 8.791925268746193e-06, |
|
"loss": 0.133, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4548022598870056, |
|
"grad_norm": 0.4127573259391668, |
|
"learning_rate": 8.777426746649571e-06, |
|
"loss": 0.1131, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4576271186440678, |
|
"grad_norm": 0.3210525339974507, |
|
"learning_rate": 8.762853849280692e-06, |
|
"loss": 0.1097, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4604519774011299, |
|
"grad_norm": 0.38978996569997443, |
|
"learning_rate": 8.748206863571188e-06, |
|
"loss": 0.1259, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4632768361581921, |
|
"grad_norm": 0.3200510621145131, |
|
"learning_rate": 8.73348607791144e-06, |
|
"loss": 0.1028, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4661016949152542, |
|
"grad_norm": 0.5781086641593365, |
|
"learning_rate": 8.718691782144908e-06, |
|
"loss": 0.1504, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4689265536723164, |
|
"grad_norm": 0.3161434045596249, |
|
"learning_rate": 8.703824267562424e-06, |
|
"loss": 0.1341, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4717514124293785, |
|
"grad_norm": 0.2920424513956196, |
|
"learning_rate": 8.688883826896458e-06, |
|
"loss": 0.0985, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4745762711864407, |
|
"grad_norm": 0.3862783048815386, |
|
"learning_rate": 8.673870754315336e-06, |
|
"loss": 0.1352, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4774011299435028, |
|
"grad_norm": 0.4426996657474187, |
|
"learning_rate": 8.658785345417484e-06, |
|
"loss": 0.1414, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.480225988700565, |
|
"grad_norm": 0.461223196307408, |
|
"learning_rate": 8.64362789722557e-06, |
|
"loss": 0.1397, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4830508474576271, |
|
"grad_norm": 0.3236362093117533, |
|
"learning_rate": 8.62839870818068e-06, |
|
"loss": 0.1185, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4858757062146893, |
|
"grad_norm": 0.5017393956700863, |
|
"learning_rate": 8.613098078136436e-06, |
|
"loss": 0.1301, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4887005649717514, |
|
"grad_norm": 0.3476632540894821, |
|
"learning_rate": 8.597726308353085e-06, |
|
"loss": 0.1265, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4915254237288136, |
|
"grad_norm": 0.38990017498501656, |
|
"learning_rate": 8.582283701491576e-06, |
|
"loss": 0.152, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4943502824858757, |
|
"grad_norm": 0.3190826675735708, |
|
"learning_rate": 8.566770561607598e-06, |
|
"loss": 0.1281, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4971751412429379, |
|
"grad_norm": 0.3967141406957403, |
|
"learning_rate": 8.551187194145591e-06, |
|
"loss": 0.1546, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.3328836982483565, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 0.1314, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5028248587570622, |
|
"grad_norm": 0.374457685382645, |
|
"learning_rate": 8.519811005172916e-06, |
|
"loss": 0.1165, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.5056497175141242, |
|
"grad_norm": 0.363385614117873, |
|
"learning_rate": 8.50401880144063e-06, |
|
"loss": 0.1226, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5084745762711864, |
|
"grad_norm": 0.32620237927339973, |
|
"learning_rate": 8.488157605674924e-06, |
|
"loss": 0.1203, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5112994350282486, |
|
"grad_norm": 0.2961670016082417, |
|
"learning_rate": 8.472227730173252e-06, |
|
"loss": 0.104, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5141242937853108, |
|
"grad_norm": 0.3350355060825277, |
|
"learning_rate": 8.456229488585328e-06, |
|
"loss": 0.1136, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5169491525423728, |
|
"grad_norm": 0.3540976396926151, |
|
"learning_rate": 8.440163195906959e-06, |
|
"loss": 0.0946, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.519774011299435, |
|
"grad_norm": 0.33467249667837795, |
|
"learning_rate": 8.424029168473829e-06, |
|
"loss": 0.1341, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5225988700564972, |
|
"grad_norm": 0.3582483372686221, |
|
"learning_rate": 8.407827723955287e-06, |
|
"loss": 0.101, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5254237288135594, |
|
"grad_norm": 0.3871399637156461, |
|
"learning_rate": 8.391559181348081e-06, |
|
"loss": 0.1368, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5282485875706214, |
|
"grad_norm": 0.44494561948427913, |
|
"learning_rate": 8.375223860970078e-06, |
|
"loss": 0.1542, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5310734463276836, |
|
"grad_norm": 0.342778980721638, |
|
"learning_rate": 8.358822084453964e-06, |
|
"loss": 0.1343, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5338983050847458, |
|
"grad_norm": 0.3190954383119015, |
|
"learning_rate": 8.342354174740904e-06, |
|
"loss": 0.1217, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.536723163841808, |
|
"grad_norm": 0.32918038764737034, |
|
"learning_rate": 8.325820456074181e-06, |
|
"loss": 0.1158, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.53954802259887, |
|
"grad_norm": 0.3879838615458632, |
|
"learning_rate": 8.309221253992825e-06, |
|
"loss": 0.1256, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5423728813559322, |
|
"grad_norm": 0.35618247459755487, |
|
"learning_rate": 8.292556895325195e-06, |
|
"loss": 0.1099, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5451977401129944, |
|
"grad_norm": 0.3234827304096444, |
|
"learning_rate": 8.275827708182536e-06, |
|
"loss": 0.1284, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5480225988700564, |
|
"grad_norm": 0.34766152172703946, |
|
"learning_rate": 8.259034021952537e-06, |
|
"loss": 0.1356, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5508474576271186, |
|
"grad_norm": 0.36732776065701966, |
|
"learning_rate": 8.242176167292827e-06, |
|
"loss": 0.1151, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5536723163841808, |
|
"grad_norm": 0.33889281731020826, |
|
"learning_rate": 8.225254476124479e-06, |
|
"loss": 0.1063, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.556497175141243, |
|
"grad_norm": 0.3193880715488108, |
|
"learning_rate": 8.208269281625466e-06, |
|
"loss": 0.1276, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.559322033898305, |
|
"grad_norm": 0.3204272104884364, |
|
"learning_rate": 8.191220918224102e-06, |
|
"loss": 0.1223, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5621468926553672, |
|
"grad_norm": 0.2837424255636162, |
|
"learning_rate": 8.174109721592463e-06, |
|
"loss": 0.1004, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5649717514124294, |
|
"grad_norm": 0.37977501376826955, |
|
"learning_rate": 8.156936028639768e-06, |
|
"loss": 0.1343, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5677966101694916, |
|
"grad_norm": 0.3802933186191023, |
|
"learning_rate": 8.13970017750576e-06, |
|
"loss": 0.1229, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5706214689265536, |
|
"grad_norm": 0.3336231226315229, |
|
"learning_rate": 8.12240250755403e-06, |
|
"loss": 0.1071, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5734463276836158, |
|
"grad_norm": 0.31611617409721354, |
|
"learning_rate": 8.10504335936535e-06, |
|
"loss": 0.1315, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.576271186440678, |
|
"grad_norm": 0.37986304879280974, |
|
"learning_rate": 8.08762307473096e-06, |
|
"loss": 0.1488, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5790960451977402, |
|
"grad_norm": 0.47382310798593613, |
|
"learning_rate": 8.07014199664584e-06, |
|
"loss": 0.1199, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5819209039548022, |
|
"grad_norm": 0.328423980785905, |
|
"learning_rate": 8.052600469301958e-06, |
|
"loss": 0.1094, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5847457627118644, |
|
"grad_norm": 0.36866153232988097, |
|
"learning_rate": 8.03499883808149e-06, |
|
"loss": 0.1215, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5875706214689266, |
|
"grad_norm": 0.4258404561855114, |
|
"learning_rate": 8.01733744955002e-06, |
|
"loss": 0.1728, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5903954802259888, |
|
"grad_norm": 0.4379066754162319, |
|
"learning_rate": 7.999616651449722e-06, |
|
"loss": 0.1334, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5932203389830508, |
|
"grad_norm": 0.3298246566080818, |
|
"learning_rate": 7.981836792692508e-06, |
|
"loss": 0.1321, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.596045197740113, |
|
"grad_norm": 0.3213552929669426, |
|
"learning_rate": 7.963998223353154e-06, |
|
"loss": 0.1475, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5988700564971752, |
|
"grad_norm": 0.6800071694421395, |
|
"learning_rate": 7.946101294662418e-06, |
|
"loss": 0.1521, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.6016949152542372, |
|
"grad_norm": 0.31394438576238753, |
|
"learning_rate": 7.928146359000117e-06, |
|
"loss": 0.1269, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6045197740112994, |
|
"grad_norm": 0.34115929397266076, |
|
"learning_rate": 7.91013376988819e-06, |
|
"loss": 0.1079, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6073446327683616, |
|
"grad_norm": 0.3070888783690557, |
|
"learning_rate": 7.892063881983736e-06, |
|
"loss": 0.1037, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.6101694915254238, |
|
"grad_norm": 0.35233334420808005, |
|
"learning_rate": 7.873937051072037e-06, |
|
"loss": 0.1291, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6129943502824858, |
|
"grad_norm": 0.37926219175637094, |
|
"learning_rate": 7.855753634059543e-06, |
|
"loss": 0.1997, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.615819209039548, |
|
"grad_norm": 0.281725523224709, |
|
"learning_rate": 7.83751398896686e-06, |
|
"loss": 0.0946, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6186440677966102, |
|
"grad_norm": 0.33366851056170144, |
|
"learning_rate": 7.81921847492168e-06, |
|
"loss": 0.1355, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6214689265536724, |
|
"grad_norm": 0.4693528685955929, |
|
"learning_rate": 7.80086745215173e-06, |
|
"loss": 0.1235, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6242937853107344, |
|
"grad_norm": 0.348566291026797, |
|
"learning_rate": 7.782461281977668e-06, |
|
"loss": 0.1463, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6271186440677966, |
|
"grad_norm": 0.3432846241066091, |
|
"learning_rate": 7.764000326805967e-06, |
|
"loss": 0.1253, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.6299435028248588, |
|
"grad_norm": 0.3907373096790576, |
|
"learning_rate": 7.74548495012179e-06, |
|
"loss": 0.1391, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.632768361581921, |
|
"grad_norm": 0.3728412944790509, |
|
"learning_rate": 7.726915516481824e-06, |
|
"loss": 0.122, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.635593220338983, |
|
"grad_norm": 0.5220133391248876, |
|
"learning_rate": 7.708292391507105e-06, |
|
"loss": 0.154, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6384180790960452, |
|
"grad_norm": 0.4146492597116334, |
|
"learning_rate": 7.68961594187582e-06, |
|
"loss": 0.1359, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6412429378531074, |
|
"grad_norm": 0.31397589485063837, |
|
"learning_rate": 7.670886535316086e-06, |
|
"loss": 0.1219, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6440677966101694, |
|
"grad_norm": 0.3176441428020906, |
|
"learning_rate": 7.652104540598712e-06, |
|
"loss": 0.1178, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6468926553672316, |
|
"grad_norm": 0.4044788848890745, |
|
"learning_rate": 7.633270327529936e-06, |
|
"loss": 0.0976, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6497175141242938, |
|
"grad_norm": 0.4182319443410884, |
|
"learning_rate": 7.614384266944139e-06, |
|
"loss": 0.1645, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.652542372881356, |
|
"grad_norm": 0.456563352937022, |
|
"learning_rate": 7.595446730696554e-06, |
|
"loss": 0.1382, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.655367231638418, |
|
"grad_norm": 0.32903524423468056, |
|
"learning_rate": 7.5764580916559405e-06, |
|
"loss": 0.1326, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6581920903954802, |
|
"grad_norm": 0.28365921307195663, |
|
"learning_rate": 7.5574187236972344e-06, |
|
"loss": 0.0935, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6610169491525424, |
|
"grad_norm": 0.33982089630336487, |
|
"learning_rate": 7.5383290016942e-06, |
|
"loss": 0.1454, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6638418079096046, |
|
"grad_norm": 0.3772064426329603, |
|
"learning_rate": 7.519189301512042e-06, |
|
"loss": 0.1138, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 0.32618022313934375, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.1334, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6694915254237288, |
|
"grad_norm": 0.45333599138089803, |
|
"learning_rate": 7.480761474983943e-06, |
|
"loss": 0.1124, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.672316384180791, |
|
"grad_norm": 0.2925984327006227, |
|
"learning_rate": 7.461474105258911e-06, |
|
"loss": 0.1186, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6751412429378532, |
|
"grad_norm": 0.29274180541284806, |
|
"learning_rate": 7.442138270581676e-06, |
|
"loss": 0.1152, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 0.31809376762962216, |
|
"learning_rate": 7.422754351663252e-06, |
|
"loss": 0.1305, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6807909604519774, |
|
"grad_norm": 0.38676571872596516, |
|
"learning_rate": 7.403322730161402e-06, |
|
"loss": 0.1282, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6836158192090396, |
|
"grad_norm": 0.4018879090622138, |
|
"learning_rate": 7.3838437886731264e-06, |
|
"loss": 0.1183, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6864406779661016, |
|
"grad_norm": 0.3776316331740439, |
|
"learning_rate": 7.364317910727128e-06, |
|
"loss": 0.1222, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6892655367231638, |
|
"grad_norm": 0.3824776982755315, |
|
"learning_rate": 7.3447454807762565e-06, |
|
"loss": 0.1428, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.692090395480226, |
|
"grad_norm": 0.3047645047687849, |
|
"learning_rate": 7.325126884189948e-06, |
|
"loss": 0.1385, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6949152542372882, |
|
"grad_norm": 0.3314073663562561, |
|
"learning_rate": 7.30546250724663e-06, |
|
"loss": 0.1206, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6977401129943502, |
|
"grad_norm": 0.4400078921981207, |
|
"learning_rate": 7.285752737126117e-06, |
|
"loss": 0.1327, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7005649717514124, |
|
"grad_norm": 0.4934623443358996, |
|
"learning_rate": 7.265997961901987e-06, |
|
"loss": 0.1564, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7033898305084746, |
|
"grad_norm": 0.30484309715193525, |
|
"learning_rate": 7.246198570533944e-06, |
|
"loss": 0.1242, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7062146892655368, |
|
"grad_norm": 0.29354477145041785, |
|
"learning_rate": 7.226354952860157e-06, |
|
"loss": 0.1149, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7090395480225988, |
|
"grad_norm": 0.3505364073788135, |
|
"learning_rate": 7.206467499589584e-06, |
|
"loss": 0.1087, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.711864406779661, |
|
"grad_norm": 0.29464691426886963, |
|
"learning_rate": 7.186536602294278e-06, |
|
"loss": 0.1142, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.7146892655367232, |
|
"grad_norm": 0.39810478648654263, |
|
"learning_rate": 7.166562653401681e-06, |
|
"loss": 0.1723, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7175141242937854, |
|
"grad_norm": 0.6996654555301565, |
|
"learning_rate": 7.146546046186893e-06, |
|
"loss": 0.1509, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7203389830508474, |
|
"grad_norm": 0.43941978897146655, |
|
"learning_rate": 7.126487174764936e-06, |
|
"loss": 0.1214, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7231638418079096, |
|
"grad_norm": 0.8711552264444874, |
|
"learning_rate": 7.106386434082979e-06, |
|
"loss": 0.1814, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7259887005649718, |
|
"grad_norm": 0.44400959950712626, |
|
"learning_rate": 7.0862442199125836e-06, |
|
"loss": 0.1358, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7288135593220338, |
|
"grad_norm": 0.35970291529551507, |
|
"learning_rate": 7.066060928841891e-06, |
|
"loss": 0.168, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.731638418079096, |
|
"grad_norm": 0.3562466168617285, |
|
"learning_rate": 7.0458369582678276e-06, |
|
"loss": 0.1436, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7344632768361582, |
|
"grad_norm": 0.3120380022750767, |
|
"learning_rate": 7.025572706388268e-06, |
|
"loss": 0.1146, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7372881355932204, |
|
"grad_norm": 0.3774265959308726, |
|
"learning_rate": 7.005268572194208e-06, |
|
"loss": 0.1034, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7401129943502824, |
|
"grad_norm": 0.2979654585888925, |
|
"learning_rate": 6.984924955461901e-06, |
|
"loss": 0.1314, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7429378531073446, |
|
"grad_norm": 0.581337456503481, |
|
"learning_rate": 6.964542256744986e-06, |
|
"loss": 0.1417, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7457627118644068, |
|
"grad_norm": 0.38453399260208176, |
|
"learning_rate": 6.944120877366605e-06, |
|
"loss": 0.1564, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.748587570621469, |
|
"grad_norm": 0.4514847039771076, |
|
"learning_rate": 6.923661219411494e-06, |
|
"loss": 0.1104, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.751412429378531, |
|
"grad_norm": 0.32941464728785497, |
|
"learning_rate": 6.9031636857180795e-06, |
|
"loss": 0.1232, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7542372881355932, |
|
"grad_norm": 0.32342200359362855, |
|
"learning_rate": 6.8826286798705325e-06, |
|
"loss": 0.1298, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7570621468926554, |
|
"grad_norm": 0.31600309169763335, |
|
"learning_rate": 6.86205660619083e-06, |
|
"loss": 0.1052, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7598870056497176, |
|
"grad_norm": 0.3385392307218001, |
|
"learning_rate": 6.841447869730794e-06, |
|
"loss": 0.1078, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7627118644067796, |
|
"grad_norm": 0.3999058208022931, |
|
"learning_rate": 6.820802876264112e-06, |
|
"loss": 0.1002, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7655367231638418, |
|
"grad_norm": 0.3086907716307657, |
|
"learning_rate": 6.800122032278351e-06, |
|
"loss": 0.1057, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.768361581920904, |
|
"grad_norm": 0.3019429125850021, |
|
"learning_rate": 6.7794057449669545e-06, |
|
"loss": 0.1224, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7711864406779662, |
|
"grad_norm": 0.3649142358181941, |
|
"learning_rate": 6.758654422221225e-06, |
|
"loss": 0.1229, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7740112994350282, |
|
"grad_norm": 0.3886884309856128, |
|
"learning_rate": 6.7378684726222875e-06, |
|
"loss": 0.1347, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7768361581920904, |
|
"grad_norm": 0.39178721337354694, |
|
"learning_rate": 6.717048305433053e-06, |
|
"loss": 0.1395, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7796610169491526, |
|
"grad_norm": 0.3560643954364327, |
|
"learning_rate": 6.6961943305901515e-06, |
|
"loss": 0.0996, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7824858757062146, |
|
"grad_norm": 0.2987347215445713, |
|
"learning_rate": 6.675306958695874e-06, |
|
"loss": 0.0932, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7853107344632768, |
|
"grad_norm": 0.391815080682088, |
|
"learning_rate": 6.65438660101007e-06, |
|
"loss": 0.1008, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.788135593220339, |
|
"grad_norm": 0.36385558646484645, |
|
"learning_rate": 6.633433669442066e-06, |
|
"loss": 0.1477, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7909604519774012, |
|
"grad_norm": 0.3950859179251757, |
|
"learning_rate": 6.612448576542545e-06, |
|
"loss": 0.1546, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7937853107344632, |
|
"grad_norm": 0.3508433770997797, |
|
"learning_rate": 6.59143173549543e-06, |
|
"loss": 0.1226, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7966101694915254, |
|
"grad_norm": 0.31172563185935787, |
|
"learning_rate": 6.570383560109745e-06, |
|
"loss": 0.1159, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7994350282485876, |
|
"grad_norm": 0.3722434789366503, |
|
"learning_rate": 6.549304464811467e-06, |
|
"loss": 0.1718, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8022598870056498, |
|
"grad_norm": 0.5468457727438586, |
|
"learning_rate": 6.52819486463537e-06, |
|
"loss": 0.129, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.8050847457627118, |
|
"grad_norm": 0.37527355148100994, |
|
"learning_rate": 6.50705517521685e-06, |
|
"loss": 0.1245, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.807909604519774, |
|
"grad_norm": 0.3327457659403298, |
|
"learning_rate": 6.48588581278374e-06, |
|
"loss": 0.1058, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8107344632768362, |
|
"grad_norm": 0.32768437622701796, |
|
"learning_rate": 6.464687194148121e-06, |
|
"loss": 0.1215, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8135593220338984, |
|
"grad_norm": 0.2981605604482326, |
|
"learning_rate": 6.443459736698106e-06, |
|
"loss": 0.107, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8163841807909604, |
|
"grad_norm": 0.39440686793408264, |
|
"learning_rate": 6.422203858389633e-06, |
|
"loss": 0.099, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8192090395480226, |
|
"grad_norm": 0.3079389916506814, |
|
"learning_rate": 6.400919977738222e-06, |
|
"loss": 0.1261, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8220338983050848, |
|
"grad_norm": 0.39230281512992937, |
|
"learning_rate": 6.379608513810753e-06, |
|
"loss": 0.1388, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8248587570621468, |
|
"grad_norm": 0.34403030238315363, |
|
"learning_rate": 6.3582698862171945e-06, |
|
"loss": 0.1144, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.827683615819209, |
|
"grad_norm": 0.3554434392149389, |
|
"learning_rate": 6.336904515102355e-06, |
|
"loss": 0.1401, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8305084745762712, |
|
"grad_norm": 0.3903419801304912, |
|
"learning_rate": 6.315512821137606e-06, |
|
"loss": 0.1166, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.29926598642859675, |
|
"learning_rate": 6.294095225512604e-06, |
|
"loss": 0.1172, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8361581920903954, |
|
"grad_norm": 0.3247406039107872, |
|
"learning_rate": 6.272652149926989e-06, |
|
"loss": 0.1206, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8389830508474576, |
|
"grad_norm": 0.4155920338425029, |
|
"learning_rate": 6.251184016582088e-06, |
|
"loss": 0.1569, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8418079096045198, |
|
"grad_norm": 0.49517100510425244, |
|
"learning_rate": 6.229691248172599e-06, |
|
"loss": 0.1269, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.844632768361582, |
|
"grad_norm": 0.3069243773291746, |
|
"learning_rate": 6.208174267878272e-06, |
|
"loss": 0.1039, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 0.30008373909078834, |
|
"learning_rate": 6.186633499355576e-06, |
|
"loss": 0.1011, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8502824858757062, |
|
"grad_norm": 0.39498835620426576, |
|
"learning_rate": 6.165069366729347e-06, |
|
"loss": 0.1262, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8531073446327684, |
|
"grad_norm": 0.45259238504149724, |
|
"learning_rate": 6.143482294584459e-06, |
|
"loss": 0.1555, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.8559322033898306, |
|
"grad_norm": 0.3111562216471757, |
|
"learning_rate": 6.121872707957441e-06, |
|
"loss": 0.1037, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8587570621468926, |
|
"grad_norm": 0.38394089498308964, |
|
"learning_rate": 6.100241032328125e-06, |
|
"loss": 0.1381, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.8615819209039548, |
|
"grad_norm": 0.4084090521249512, |
|
"learning_rate": 6.078587693611258e-06, |
|
"loss": 0.132, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.864406779661017, |
|
"grad_norm": 0.32206751376923765, |
|
"learning_rate": 6.056913118148122e-06, |
|
"loss": 0.115, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.867231638418079, |
|
"grad_norm": 0.39859711839150824, |
|
"learning_rate": 6.035217732698141e-06, |
|
"loss": 0.0989, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8700564971751412, |
|
"grad_norm": 0.3223901842186738, |
|
"learning_rate": 6.013501964430468e-06, |
|
"loss": 0.1129, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8728813559322034, |
|
"grad_norm": 0.37501991260972894, |
|
"learning_rate": 5.9917662409155896e-06, |
|
"loss": 0.1158, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8757062146892656, |
|
"grad_norm": 0.3680155603064568, |
|
"learning_rate": 5.970010990116892e-06, |
|
"loss": 0.135, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8785310734463276, |
|
"grad_norm": 0.4225399124226897, |
|
"learning_rate": 5.948236640382249e-06, |
|
"loss": 0.1597, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8813559322033898, |
|
"grad_norm": 0.3062593607691136, |
|
"learning_rate": 5.926443620435572e-06, |
|
"loss": 0.1136, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.884180790960452, |
|
"grad_norm": 0.39959497875600314, |
|
"learning_rate": 5.904632359368388e-06, |
|
"loss": 0.1177, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8870056497175142, |
|
"grad_norm": 1.0041774505409626, |
|
"learning_rate": 5.8828032866313725e-06, |
|
"loss": 0.1129, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8898305084745762, |
|
"grad_norm": 0.33893621066441904, |
|
"learning_rate": 5.860956832025907e-06, |
|
"loss": 0.1375, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8926553672316384, |
|
"grad_norm": 0.39383272399940145, |
|
"learning_rate": 5.839093425695609e-06, |
|
"loss": 0.1422, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8954802259887006, |
|
"grad_norm": 0.35738567030204327, |
|
"learning_rate": 5.817213498117866e-06, |
|
"loss": 0.1529, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8983050847457628, |
|
"grad_norm": 0.39608069155077835, |
|
"learning_rate": 5.795317480095361e-06, |
|
"loss": 0.1716, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9011299435028248, |
|
"grad_norm": 0.3631332118845927, |
|
"learning_rate": 5.773405802747585e-06, |
|
"loss": 0.1555, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.903954802259887, |
|
"grad_norm": 0.3841861480992692, |
|
"learning_rate": 5.751478897502353e-06, |
|
"loss": 0.1894, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9067796610169492, |
|
"grad_norm": 0.29759516462695884, |
|
"learning_rate": 5.729537196087309e-06, |
|
"loss": 0.112, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9096045197740112, |
|
"grad_norm": 0.28809510654440856, |
|
"learning_rate": 5.707581130521424e-06, |
|
"loss": 0.1134, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.9124293785310734, |
|
"grad_norm": 0.397848892460871, |
|
"learning_rate": 5.685611133106491e-06, |
|
"loss": 0.1297, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9152542372881356, |
|
"grad_norm": 0.3160261105495359, |
|
"learning_rate": 5.663627636418611e-06, |
|
"loss": 0.1023, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9180790960451978, |
|
"grad_norm": 0.31177416648537104, |
|
"learning_rate": 5.64163107329968e-06, |
|
"loss": 0.1143, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.9209039548022598, |
|
"grad_norm": 0.32018560545646063, |
|
"learning_rate": 5.619621876848864e-06, |
|
"loss": 0.0991, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.923728813559322, |
|
"grad_norm": 0.2963637468584666, |
|
"learning_rate": 5.597600480414069e-06, |
|
"loss": 0.1292, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9265536723163842, |
|
"grad_norm": 0.44573857637587677, |
|
"learning_rate": 5.575567317583415e-06, |
|
"loss": 0.1217, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9293785310734464, |
|
"grad_norm": 0.39445386690118284, |
|
"learning_rate": 5.553522822176694e-06, |
|
"loss": 0.1684, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9322033898305084, |
|
"grad_norm": 0.5570629744547292, |
|
"learning_rate": 5.531467428236827e-06, |
|
"loss": 0.121, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9350282485875706, |
|
"grad_norm": 0.2927441446558258, |
|
"learning_rate": 5.5094015700213254e-06, |
|
"loss": 0.1199, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9378531073446328, |
|
"grad_norm": 0.31083045505241014, |
|
"learning_rate": 5.4873256819937325e-06, |
|
"loss": 0.1299, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.940677966101695, |
|
"grad_norm": 0.3685953460491956, |
|
"learning_rate": 5.465240198815073e-06, |
|
"loss": 0.1432, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.943502824858757, |
|
"grad_norm": 0.358140929608912, |
|
"learning_rate": 5.443145555335296e-06, |
|
"loss": 0.1148, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9463276836158192, |
|
"grad_norm": 0.4047657464869157, |
|
"learning_rate": 5.421042186584708e-06, |
|
"loss": 0.1339, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9491525423728814, |
|
"grad_norm": 0.30030457933241933, |
|
"learning_rate": 5.398930527765416e-06, |
|
"loss": 0.1301, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9519774011299436, |
|
"grad_norm": 0.31945028324973423, |
|
"learning_rate": 5.376811014242749e-06, |
|
"loss": 0.1147, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9548022598870056, |
|
"grad_norm": 0.48059842793389146, |
|
"learning_rate": 5.354684081536693e-06, |
|
"loss": 0.1251, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9576271186440678, |
|
"grad_norm": 0.3547595891598906, |
|
"learning_rate": 5.332550165313312e-06, |
|
"loss": 0.1256, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.96045197740113, |
|
"grad_norm": 0.4010586374877903, |
|
"learning_rate": 5.31040970137617e-06, |
|
"loss": 0.1521, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.963276836158192, |
|
"grad_norm": 0.30749195608479307, |
|
"learning_rate": 5.288263125657757e-06, |
|
"loss": 0.0898, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.9661016949152542, |
|
"grad_norm": 0.38150734834003086, |
|
"learning_rate": 5.266110874210893e-06, |
|
"loss": 0.1153, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.9689265536723164, |
|
"grad_norm": 0.4214873565398611, |
|
"learning_rate": 5.2439533832001565e-06, |
|
"loss": 0.1148, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.9717514124293786, |
|
"grad_norm": 0.6122493487962786, |
|
"learning_rate": 5.221791088893282e-06, |
|
"loss": 0.1104, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.9745762711864406, |
|
"grad_norm": 0.3363807115202252, |
|
"learning_rate": 5.199624427652589e-06, |
|
"loss": 0.1223, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9774011299435028, |
|
"grad_norm": 0.4560391650148292, |
|
"learning_rate": 5.177453835926366e-06, |
|
"loss": 0.1279, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.980225988700565, |
|
"grad_norm": 0.34518676510036406, |
|
"learning_rate": 5.155279750240302e-06, |
|
"loss": 0.111, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9830508474576272, |
|
"grad_norm": 0.37562743319165254, |
|
"learning_rate": 5.133102607188875e-06, |
|
"loss": 0.1223, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.9858757062146892, |
|
"grad_norm": 0.37060789770525393, |
|
"learning_rate": 5.1109228434267585e-06, |
|
"loss": 0.1205, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9887005649717514, |
|
"grad_norm": 0.30512645778837, |
|
"learning_rate": 5.0887408956602316e-06, |
|
"loss": 0.1123, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9915254237288136, |
|
"grad_norm": 0.31734580505159626, |
|
"learning_rate": 5.06655720063857e-06, |
|
"loss": 0.1393, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9943502824858758, |
|
"grad_norm": 0.4252275291418614, |
|
"learning_rate": 5.044372195145455e-06, |
|
"loss": 0.1804, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9971751412429378, |
|
"grad_norm": 0.445405112876313, |
|
"learning_rate": 5.022186315990371e-06, |
|
"loss": 0.1466, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.33905894838175626, |
|
"learning_rate": 5e-06, |
|
"loss": 0.123, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.002824858757062, |
|
"grad_norm": 0.2724848502115793, |
|
"learning_rate": 4.97781368400963e-06, |
|
"loss": 0.0967, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.0056497175141244, |
|
"grad_norm": 0.25595208945562964, |
|
"learning_rate": 4.9556278048545445e-06, |
|
"loss": 0.0704, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.0084745762711864, |
|
"grad_norm": 0.2425492096389186, |
|
"learning_rate": 4.933442799361432e-06, |
|
"loss": 0.0799, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.0112994350282485, |
|
"grad_norm": 0.2761823631954267, |
|
"learning_rate": 4.911259104339771e-06, |
|
"loss": 0.0936, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.0141242937853108, |
|
"grad_norm": 0.27454279772458723, |
|
"learning_rate": 4.889077156573242e-06, |
|
"loss": 0.1175, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.0169491525423728, |
|
"grad_norm": 0.31992512429885583, |
|
"learning_rate": 4.866897392811127e-06, |
|
"loss": 0.0968, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0197740112994351, |
|
"grad_norm": 0.2894306047919742, |
|
"learning_rate": 4.8447202497596975e-06, |
|
"loss": 0.1236, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.0225988700564972, |
|
"grad_norm": 0.24920669865954365, |
|
"learning_rate": 4.822546164073635e-06, |
|
"loss": 0.0852, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.0254237288135593, |
|
"grad_norm": 0.2952175126202192, |
|
"learning_rate": 4.800375572347414e-06, |
|
"loss": 0.0991, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.0282485875706215, |
|
"grad_norm": 0.23148883860994288, |
|
"learning_rate": 4.778208911106718e-06, |
|
"loss": 0.066, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.0310734463276836, |
|
"grad_norm": 0.2692480902893908, |
|
"learning_rate": 4.756046616799845e-06, |
|
"loss": 0.0973, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.0338983050847457, |
|
"grad_norm": 0.2760736585863661, |
|
"learning_rate": 4.7338891257891085e-06, |
|
"loss": 0.0912, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.036723163841808, |
|
"grad_norm": 0.24876270637682962, |
|
"learning_rate": 4.7117368743422435e-06, |
|
"loss": 0.0837, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.03954802259887, |
|
"grad_norm": 0.2494244392514283, |
|
"learning_rate": 4.689590298623831e-06, |
|
"loss": 0.0811, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.042372881355932, |
|
"grad_norm": 0.24104486268426012, |
|
"learning_rate": 4.667449834686689e-06, |
|
"loss": 0.076, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.0451977401129944, |
|
"grad_norm": 0.2789423674939665, |
|
"learning_rate": 4.645315918463308e-06, |
|
"loss": 0.086, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0480225988700564, |
|
"grad_norm": 0.2770971973884279, |
|
"learning_rate": 4.623188985757252e-06, |
|
"loss": 0.089, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.0508474576271187, |
|
"grad_norm": 0.28375117683833695, |
|
"learning_rate": 4.601069472234584e-06, |
|
"loss": 0.1046, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.0536723163841808, |
|
"grad_norm": 0.23874038173398437, |
|
"learning_rate": 4.578957813415293e-06, |
|
"loss": 0.0657, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.0564971751412429, |
|
"grad_norm": 0.27927992062438906, |
|
"learning_rate": 4.556854444664706e-06, |
|
"loss": 0.0823, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.0593220338983051, |
|
"grad_norm": 0.25006740348604295, |
|
"learning_rate": 4.534759801184928e-06, |
|
"loss": 0.084, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0621468926553672, |
|
"grad_norm": 0.26172192084359713, |
|
"learning_rate": 4.512674318006268e-06, |
|
"loss": 0.0688, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.0649717514124293, |
|
"grad_norm": 0.27984219569057084, |
|
"learning_rate": 4.490598429978676e-06, |
|
"loss": 0.1003, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.0677966101694916, |
|
"grad_norm": 0.31355314424830794, |
|
"learning_rate": 4.468532571763174e-06, |
|
"loss": 0.093, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.0706214689265536, |
|
"grad_norm": 0.2789313738912747, |
|
"learning_rate": 4.446477177823308e-06, |
|
"loss": 0.0891, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.073446327683616, |
|
"grad_norm": 0.2647947856217315, |
|
"learning_rate": 4.424432682416585e-06, |
|
"loss": 0.0657, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.076271186440678, |
|
"grad_norm": 0.309205913837631, |
|
"learning_rate": 4.402399519585932e-06, |
|
"loss": 0.091, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.07909604519774, |
|
"grad_norm": 0.27983746723387665, |
|
"learning_rate": 4.380378123151139e-06, |
|
"loss": 0.0758, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.0819209039548023, |
|
"grad_norm": 0.265317674784406, |
|
"learning_rate": 4.358368926700321e-06, |
|
"loss": 0.0744, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.0847457627118644, |
|
"grad_norm": 0.3844212564195145, |
|
"learning_rate": 4.336372363581391e-06, |
|
"loss": 0.1193, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.0875706214689265, |
|
"grad_norm": 0.301325518174874, |
|
"learning_rate": 4.314388866893512e-06, |
|
"loss": 0.0954, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.0903954802259888, |
|
"grad_norm": 0.2778786659311699, |
|
"learning_rate": 4.292418869478577e-06, |
|
"loss": 0.0791, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.0932203389830508, |
|
"grad_norm": 0.2662950349405817, |
|
"learning_rate": 4.270462803912692e-06, |
|
"loss": 0.076, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.0960451977401129, |
|
"grad_norm": 0.2796794355932831, |
|
"learning_rate": 4.248521102497649e-06, |
|
"loss": 0.0804, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.0988700564971752, |
|
"grad_norm": 0.2768998598394203, |
|
"learning_rate": 4.226594197252417e-06, |
|
"loss": 0.0834, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.1016949152542372, |
|
"grad_norm": 0.27374836470708835, |
|
"learning_rate": 4.204682519904641e-06, |
|
"loss": 0.0718, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.1045197740112995, |
|
"grad_norm": 0.3445589105129125, |
|
"learning_rate": 4.182786501882135e-06, |
|
"loss": 0.1162, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.1073446327683616, |
|
"grad_norm": 0.34154494685980724, |
|
"learning_rate": 4.160906574304392e-06, |
|
"loss": 0.0821, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.1101694915254237, |
|
"grad_norm": 0.28827252450009566, |
|
"learning_rate": 4.139043167974096e-06, |
|
"loss": 0.0789, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.112994350282486, |
|
"grad_norm": 0.29707210161706604, |
|
"learning_rate": 4.117196713368629e-06, |
|
"loss": 0.0826, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.115819209039548, |
|
"grad_norm": 0.27591994189237984, |
|
"learning_rate": 4.095367640631614e-06, |
|
"loss": 0.0703, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.11864406779661, |
|
"grad_norm": 0.31721715937650236, |
|
"learning_rate": 4.073556379564429e-06, |
|
"loss": 0.0741, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.1214689265536724, |
|
"grad_norm": 0.2809918490407584, |
|
"learning_rate": 4.051763359617753e-06, |
|
"loss": 0.0768, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.1242937853107344, |
|
"grad_norm": 0.3012659075431546, |
|
"learning_rate": 4.0299890098831096e-06, |
|
"loss": 0.0899, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.1271186440677967, |
|
"grad_norm": 0.41884111681760783, |
|
"learning_rate": 4.00823375908441e-06, |
|
"loss": 0.1056, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.1299435028248588, |
|
"grad_norm": 0.3072172569806948, |
|
"learning_rate": 3.986498035569533e-06, |
|
"loss": 0.0946, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1327683615819208, |
|
"grad_norm": 0.30733421162176133, |
|
"learning_rate": 3.964782267301861e-06, |
|
"loss": 0.1148, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.1355932203389831, |
|
"grad_norm": 0.3062196925146934, |
|
"learning_rate": 3.9430868818518786e-06, |
|
"loss": 0.0939, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.1384180790960452, |
|
"grad_norm": 0.31120677637400174, |
|
"learning_rate": 3.921412306388744e-06, |
|
"loss": 0.0907, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.1412429378531073, |
|
"grad_norm": 0.29992317179141076, |
|
"learning_rate": 3.899758967671879e-06, |
|
"loss": 0.0936, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.1440677966101696, |
|
"grad_norm": 0.3192209928402148, |
|
"learning_rate": 3.8781272920425605e-06, |
|
"loss": 0.0926, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.1468926553672316, |
|
"grad_norm": 0.2758355027322586, |
|
"learning_rate": 3.856517705415543e-06, |
|
"loss": 0.0716, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.1497175141242937, |
|
"grad_norm": 0.30041138229139885, |
|
"learning_rate": 3.834930633270654e-06, |
|
"loss": 0.0915, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.152542372881356, |
|
"grad_norm": 0.2793345364877422, |
|
"learning_rate": 3.813366500644426e-06, |
|
"loss": 0.084, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.155367231638418, |
|
"grad_norm": 0.31892388449193476, |
|
"learning_rate": 3.791825732121729e-06, |
|
"loss": 0.0874, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.1581920903954803, |
|
"grad_norm": 0.31584149456312116, |
|
"learning_rate": 3.770308751827402e-06, |
|
"loss": 0.0973, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.1610169491525424, |
|
"grad_norm": 0.34820980091040554, |
|
"learning_rate": 3.748815983417914e-06, |
|
"loss": 0.1253, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.1638418079096045, |
|
"grad_norm": 0.28853430489950965, |
|
"learning_rate": 3.727347850073012e-06, |
|
"loss": 0.0759, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 0.3062765715619059, |
|
"learning_rate": 3.705904774487396e-06, |
|
"loss": 0.0933, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.1694915254237288, |
|
"grad_norm": 0.32486365661487987, |
|
"learning_rate": 3.6844871788623946e-06, |
|
"loss": 0.0911, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.1723163841807909, |
|
"grad_norm": 0.3026538149404601, |
|
"learning_rate": 3.6630954848976472e-06, |
|
"loss": 0.0942, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.1751412429378532, |
|
"grad_norm": 0.3953970274281564, |
|
"learning_rate": 3.641730113782807e-06, |
|
"loss": 0.0779, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.1779661016949152, |
|
"grad_norm": 0.2978638146695922, |
|
"learning_rate": 3.6203914861892483e-06, |
|
"loss": 0.0907, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.1807909604519775, |
|
"grad_norm": 0.29056550406150716, |
|
"learning_rate": 3.5990800222617774e-06, |
|
"loss": 0.0754, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.1836158192090396, |
|
"grad_norm": 0.2519599426657091, |
|
"learning_rate": 3.577796141610369e-06, |
|
"loss": 0.0632, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.1864406779661016, |
|
"grad_norm": 0.2867955638349113, |
|
"learning_rate": 3.5565402633018963e-06, |
|
"loss": 0.0854, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.189265536723164, |
|
"grad_norm": 0.27040139234217375, |
|
"learning_rate": 3.535312805851881e-06, |
|
"loss": 0.0676, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.192090395480226, |
|
"grad_norm": 0.3332989661333647, |
|
"learning_rate": 3.5141141872162613e-06, |
|
"loss": 0.1127, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.194915254237288, |
|
"grad_norm": 0.309745904183909, |
|
"learning_rate": 3.4929448247831523e-06, |
|
"loss": 0.0917, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.1977401129943503, |
|
"grad_norm": 0.45096384475777856, |
|
"learning_rate": 3.4718051353646304e-06, |
|
"loss": 0.1173, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.2005649717514124, |
|
"grad_norm": 0.30878271329326906, |
|
"learning_rate": 3.4506955351885346e-06, |
|
"loss": 0.0919, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.2033898305084745, |
|
"grad_norm": 0.320682805049876, |
|
"learning_rate": 3.4296164398902576e-06, |
|
"loss": 0.0922, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.2062146892655368, |
|
"grad_norm": 0.30581016394054344, |
|
"learning_rate": 3.408568264504571e-06, |
|
"loss": 0.0809, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.2090395480225988, |
|
"grad_norm": 0.2747339191019564, |
|
"learning_rate": 3.387551423457456e-06, |
|
"loss": 0.0802, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.211864406779661, |
|
"grad_norm": 0.552467904262321, |
|
"learning_rate": 3.366566330557935e-06, |
|
"loss": 0.1036, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.2146892655367232, |
|
"grad_norm": 0.32041597951648537, |
|
"learning_rate": 3.345613398989932e-06, |
|
"loss": 0.0849, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.2175141242937852, |
|
"grad_norm": 0.32164554989021144, |
|
"learning_rate": 3.324693041304128e-06, |
|
"loss": 0.0901, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.2203389830508475, |
|
"grad_norm": 0.297941242679515, |
|
"learning_rate": 3.3038056694098485e-06, |
|
"loss": 0.0857, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.2231638418079096, |
|
"grad_norm": 0.29988786294219155, |
|
"learning_rate": 3.2829516945669493e-06, |
|
"loss": 0.0658, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.2259887005649717, |
|
"grad_norm": 0.28671491672159266, |
|
"learning_rate": 3.262131527377715e-06, |
|
"loss": 0.0825, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.228813559322034, |
|
"grad_norm": 0.29202073423769753, |
|
"learning_rate": 3.241345577778775e-06, |
|
"loss": 0.0793, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.231638418079096, |
|
"grad_norm": 0.26686163546586056, |
|
"learning_rate": 3.220594255033046e-06, |
|
"loss": 0.0621, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.2344632768361583, |
|
"grad_norm": 0.2713137149878859, |
|
"learning_rate": 3.1998779677216508e-06, |
|
"loss": 0.0731, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.2372881355932204, |
|
"grad_norm": 0.2775500944738776, |
|
"learning_rate": 3.1791971237358893e-06, |
|
"loss": 0.0734, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.2401129943502824, |
|
"grad_norm": 0.28779045523216457, |
|
"learning_rate": 3.1585521302692073e-06, |
|
"loss": 0.0924, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.2429378531073447, |
|
"grad_norm": 0.3012961352670417, |
|
"learning_rate": 3.1379433938091695e-06, |
|
"loss": 0.0977, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2457627118644068, |
|
"grad_norm": 0.2790213382568145, |
|
"learning_rate": 3.117371320129469e-06, |
|
"loss": 0.0638, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.2485875706214689, |
|
"grad_norm": 0.289333568849024, |
|
"learning_rate": 3.0968363142819226e-06, |
|
"loss": 0.0835, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.2514124293785311, |
|
"grad_norm": 0.27842001426208673, |
|
"learning_rate": 3.076338780588507e-06, |
|
"loss": 0.0744, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.2542372881355932, |
|
"grad_norm": 0.29218223586498643, |
|
"learning_rate": 3.0558791226333974e-06, |
|
"loss": 0.084, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.2570621468926553, |
|
"grad_norm": 0.27223825874055874, |
|
"learning_rate": 3.035457743255016e-06, |
|
"loss": 0.0836, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.2598870056497176, |
|
"grad_norm": 0.27957551489646953, |
|
"learning_rate": 3.0150750445380995e-06, |
|
"loss": 0.0782, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.2627118644067796, |
|
"grad_norm": 0.3122940814886727, |
|
"learning_rate": 2.9947314278057927e-06, |
|
"loss": 0.1053, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.2655367231638417, |
|
"grad_norm": 0.3191436186366097, |
|
"learning_rate": 2.9744272936117323e-06, |
|
"loss": 0.1014, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.268361581920904, |
|
"grad_norm": 0.2842992416668331, |
|
"learning_rate": 2.954163041732174e-06, |
|
"loss": 0.0749, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.271186440677966, |
|
"grad_norm": 0.30400106395182974, |
|
"learning_rate": 2.9339390711581105e-06, |
|
"loss": 0.0887, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.274011299435028, |
|
"grad_norm": 0.29506183884480336, |
|
"learning_rate": 2.9137557800874177e-06, |
|
"loss": 0.091, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.2768361581920904, |
|
"grad_norm": 0.3144746461494332, |
|
"learning_rate": 2.8936135659170217e-06, |
|
"loss": 0.1059, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.2796610169491525, |
|
"grad_norm": 0.2996152337603787, |
|
"learning_rate": 2.8735128252350677e-06, |
|
"loss": 0.0786, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.2824858757062148, |
|
"grad_norm": 0.34649915198509984, |
|
"learning_rate": 2.853453953813108e-06, |
|
"loss": 0.0802, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.2853107344632768, |
|
"grad_norm": 0.3012239481267599, |
|
"learning_rate": 2.8334373465983216e-06, |
|
"loss": 0.0895, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.288135593220339, |
|
"grad_norm": 0.28862860268152546, |
|
"learning_rate": 2.8134633977057236e-06, |
|
"loss": 0.0839, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.2909604519774012, |
|
"grad_norm": 0.3084982778477155, |
|
"learning_rate": 2.7935325004104164e-06, |
|
"loss": 0.1009, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.2937853107344632, |
|
"grad_norm": 0.3282395905268862, |
|
"learning_rate": 2.7736450471398435e-06, |
|
"loss": 0.0652, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.2966101694915255, |
|
"grad_norm": 0.2793458687882274, |
|
"learning_rate": 2.7538014294660564e-06, |
|
"loss": 0.06, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.2994350282485876, |
|
"grad_norm": 0.29032428847483527, |
|
"learning_rate": 2.734002038098015e-06, |
|
"loss": 0.0674, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.3022598870056497, |
|
"grad_norm": 0.26031067828121474, |
|
"learning_rate": 2.7142472628738846e-06, |
|
"loss": 0.0628, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.305084745762712, |
|
"grad_norm": 0.3479546918961412, |
|
"learning_rate": 2.69453749275337e-06, |
|
"loss": 0.105, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.307909604519774, |
|
"grad_norm": 0.2777871642249738, |
|
"learning_rate": 2.6748731158100528e-06, |
|
"loss": 0.0733, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.310734463276836, |
|
"grad_norm": 0.29860877000280583, |
|
"learning_rate": 2.655254519223746e-06, |
|
"loss": 0.0956, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.3135593220338984, |
|
"grad_norm": 0.39509978738969753, |
|
"learning_rate": 2.6356820892728752e-06, |
|
"loss": 0.098, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.3163841807909604, |
|
"grad_norm": 0.26842617785571943, |
|
"learning_rate": 2.616156211326875e-06, |
|
"loss": 0.0683, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.3192090395480225, |
|
"grad_norm": 0.2638729229718093, |
|
"learning_rate": 2.5966772698386e-06, |
|
"loss": 0.0697, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.3220338983050848, |
|
"grad_norm": 0.2885132528975007, |
|
"learning_rate": 2.57724564833675e-06, |
|
"loss": 0.0853, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.3248587570621468, |
|
"grad_norm": 0.2909952803458812, |
|
"learning_rate": 2.557861729418326e-06, |
|
"loss": 0.0702, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.327683615819209, |
|
"grad_norm": 0.3508969831429909, |
|
"learning_rate": 2.5385258947410908e-06, |
|
"loss": 0.1163, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.3305084745762712, |
|
"grad_norm": 0.3137041512371342, |
|
"learning_rate": 2.5192385250160587e-06, |
|
"loss": 0.0791, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 0.34011082522993374, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 0.0863, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.3361581920903955, |
|
"grad_norm": 0.31378850435891975, |
|
"learning_rate": 2.4808106984879597e-06, |
|
"loss": 0.1031, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.3389830508474576, |
|
"grad_norm": 0.32916522472842985, |
|
"learning_rate": 2.461670998305802e-06, |
|
"loss": 0.1104, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.34180790960452, |
|
"grad_norm": 0.2858816116663427, |
|
"learning_rate": 2.4425812763027672e-06, |
|
"loss": 0.082, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.344632768361582, |
|
"grad_norm": 0.3311097135552006, |
|
"learning_rate": 2.4235419083440615e-06, |
|
"loss": 0.1001, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.347457627118644, |
|
"grad_norm": 0.3212815875251671, |
|
"learning_rate": 2.404553269303448e-06, |
|
"loss": 0.0706, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.3502824858757063, |
|
"grad_norm": 0.3059752435224393, |
|
"learning_rate": 2.3856157330558625e-06, |
|
"loss": 0.0858, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.3531073446327684, |
|
"grad_norm": 0.3389049114062409, |
|
"learning_rate": 2.366729672470065e-06, |
|
"loss": 0.0853, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.3559322033898304, |
|
"grad_norm": 0.32200853396437623, |
|
"learning_rate": 2.3478954594012884e-06, |
|
"loss": 0.11, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3587570621468927, |
|
"grad_norm": 0.30985593467605343, |
|
"learning_rate": 2.329113464683913e-06, |
|
"loss": 0.0925, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.3615819209039548, |
|
"grad_norm": 0.29010419081013045, |
|
"learning_rate": 2.310384058124181e-06, |
|
"loss": 0.079, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.3644067796610169, |
|
"grad_norm": 0.2868431818032766, |
|
"learning_rate": 2.2917076084928953e-06, |
|
"loss": 0.0691, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.3672316384180792, |
|
"grad_norm": 0.32921780136851597, |
|
"learning_rate": 2.273084483518176e-06, |
|
"loss": 0.1029, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.3700564971751412, |
|
"grad_norm": 0.3443609157960376, |
|
"learning_rate": 2.25451504987821e-06, |
|
"loss": 0.1094, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.3728813559322033, |
|
"grad_norm": 0.3442323075700346, |
|
"learning_rate": 2.2359996731940348e-06, |
|
"loss": 0.1148, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.3757062146892656, |
|
"grad_norm": 0.31019876061048274, |
|
"learning_rate": 2.2175387180223333e-06, |
|
"loss": 0.0846, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.3785310734463276, |
|
"grad_norm": 0.27898269961579986, |
|
"learning_rate": 2.1991325478482695e-06, |
|
"loss": 0.0858, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.3813559322033897, |
|
"grad_norm": 0.30200643340593814, |
|
"learning_rate": 2.1807815250783194e-06, |
|
"loss": 0.0901, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.384180790960452, |
|
"grad_norm": 0.28412971697416345, |
|
"learning_rate": 2.162486011033142e-06, |
|
"loss": 0.0649, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.387005649717514, |
|
"grad_norm": 0.28849690690010993, |
|
"learning_rate": 2.1442463659404587e-06, |
|
"loss": 0.0734, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.3898305084745763, |
|
"grad_norm": 0.2872214027286925, |
|
"learning_rate": 2.1260629489279662e-06, |
|
"loss": 0.0744, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.3926553672316384, |
|
"grad_norm": 0.2856113105572892, |
|
"learning_rate": 2.1079361180162657e-06, |
|
"loss": 0.0772, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.3954802259887007, |
|
"grad_norm": 0.2917591710852941, |
|
"learning_rate": 2.089866230111813e-06, |
|
"loss": 0.0872, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.3983050847457628, |
|
"grad_norm": 0.3156299163799424, |
|
"learning_rate": 2.0718536409998834e-06, |
|
"loss": 0.0755, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.4011299435028248, |
|
"grad_norm": 0.3420052483929326, |
|
"learning_rate": 2.053898705337583e-06, |
|
"loss": 0.0833, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.4039548022598871, |
|
"grad_norm": 0.3007655048264405, |
|
"learning_rate": 2.0360017766468466e-06, |
|
"loss": 0.0755, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.4067796610169492, |
|
"grad_norm": 0.3285257384928867, |
|
"learning_rate": 2.0181632073074925e-06, |
|
"loss": 0.0882, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.4096045197740112, |
|
"grad_norm": 0.27048721301742223, |
|
"learning_rate": 2.000383348550279e-06, |
|
"loss": 0.0739, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.4124293785310735, |
|
"grad_norm": 0.2984410410038522, |
|
"learning_rate": 1.9826625504499807e-06, |
|
"loss": 0.0954, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4124293785310735, |
|
"eval_loss": 0.130197674036026, |
|
"eval_runtime": 1.5872, |
|
"eval_samples_per_second": 18.271, |
|
"eval_steps_per_second": 5.04, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4152542372881356, |
|
"grad_norm": 0.2898926398978837, |
|
"learning_rate": 1.965001161918513e-06, |
|
"loss": 0.0789, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.4180790960451977, |
|
"grad_norm": 0.3465927210704245, |
|
"learning_rate": 1.947399530698043e-06, |
|
"loss": 0.0979, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.42090395480226, |
|
"grad_norm": 0.3475682598971018, |
|
"learning_rate": 1.92985800335416e-06, |
|
"loss": 0.0843, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.423728813559322, |
|
"grad_norm": 0.3294028190875718, |
|
"learning_rate": 1.912376925269041e-06, |
|
"loss": 0.1121, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.426553672316384, |
|
"grad_norm": 0.3498640725406821, |
|
"learning_rate": 1.894956640634652e-06, |
|
"loss": 0.0828, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.4293785310734464, |
|
"grad_norm": 0.30225327794900386, |
|
"learning_rate": 1.8775974924459716e-06, |
|
"loss": 0.085, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.4322033898305084, |
|
"grad_norm": 0.3931329736136659, |
|
"learning_rate": 1.860299822494241e-06, |
|
"loss": 0.0724, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.4350282485875705, |
|
"grad_norm": 0.28157973731776237, |
|
"learning_rate": 1.8430639713602317e-06, |
|
"loss": 0.0658, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.4378531073446328, |
|
"grad_norm": 0.28515233111654203, |
|
"learning_rate": 1.8258902784075394e-06, |
|
"loss": 0.0847, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.4406779661016949, |
|
"grad_norm": 0.2760261233151599, |
|
"learning_rate": 1.808779081775901e-06, |
|
"loss": 0.066, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.4435028248587571, |
|
"grad_norm": 0.2941924396596464, |
|
"learning_rate": 1.7917307183745353e-06, |
|
"loss": 0.0884, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.4463276836158192, |
|
"grad_norm": 0.2686743706277037, |
|
"learning_rate": 1.7747455238755223e-06, |
|
"loss": 0.0743, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.4491525423728815, |
|
"grad_norm": 0.3151180489919417, |
|
"learning_rate": 1.757823832707175e-06, |
|
"loss": 0.1007, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.4519774011299436, |
|
"grad_norm": 0.34222558948663734, |
|
"learning_rate": 1.7409659780474652e-06, |
|
"loss": 0.103, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.4548022598870056, |
|
"grad_norm": 0.3402549223734959, |
|
"learning_rate": 1.7241722918174642e-06, |
|
"loss": 0.1213, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.457627118644068, |
|
"grad_norm": 0.3145312403253552, |
|
"learning_rate": 1.7074431046748075e-06, |
|
"loss": 0.0969, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.46045197740113, |
|
"grad_norm": 0.2896418247469403, |
|
"learning_rate": 1.6907787460071756e-06, |
|
"loss": 0.0862, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.463276836158192, |
|
"grad_norm": 0.31049200696233425, |
|
"learning_rate": 1.6741795439258218e-06, |
|
"loss": 0.098, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.4661016949152543, |
|
"grad_norm": 0.36636752026678193, |
|
"learning_rate": 1.6576458252590988e-06, |
|
"loss": 0.1391, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.4689265536723164, |
|
"grad_norm": 0.27909992931423844, |
|
"learning_rate": 1.641177915546036e-06, |
|
"loss": 0.0744, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.4717514124293785, |
|
"grad_norm": 0.2864859483455, |
|
"learning_rate": 1.6247761390299221e-06, |
|
"loss": 0.0898, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.4745762711864407, |
|
"grad_norm": 0.4362808575365348, |
|
"learning_rate": 1.6084408186519195e-06, |
|
"loss": 0.0734, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.4774011299435028, |
|
"grad_norm": 0.28051391012639115, |
|
"learning_rate": 1.5921722760447144e-06, |
|
"loss": 0.0678, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.4802259887005649, |
|
"grad_norm": 0.30037283427959355, |
|
"learning_rate": 1.5759708315261724e-06, |
|
"loss": 0.0932, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.4830508474576272, |
|
"grad_norm": 0.2982894826882516, |
|
"learning_rate": 1.5598368040930427e-06, |
|
"loss": 0.0735, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.4858757062146892, |
|
"grad_norm": 0.3166739516240939, |
|
"learning_rate": 1.5437705114146735e-06, |
|
"loss": 0.1003, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.4887005649717513, |
|
"grad_norm": 0.3288809776102775, |
|
"learning_rate": 1.527772269826749e-06, |
|
"loss": 0.0984, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.4915254237288136, |
|
"grad_norm": 0.29168718949906514, |
|
"learning_rate": 1.511842394325077e-06, |
|
"loss": 0.0907, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.4943502824858756, |
|
"grad_norm": 0.2993454545746122, |
|
"learning_rate": 1.4959811985593707e-06, |
|
"loss": 0.0648, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.497175141242938, |
|
"grad_norm": 0.2901988801448637, |
|
"learning_rate": 1.4801889948270852e-06, |
|
"loss": 0.0843, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.276078183792322, |
|
"learning_rate": 1.4644660940672628e-06, |
|
"loss": 0.0646, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.5028248587570623, |
|
"grad_norm": 0.2957096527763229, |
|
"learning_rate": 1.44881280585441e-06, |
|
"loss": 0.0838, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.5056497175141241, |
|
"grad_norm": 0.3074950453422565, |
|
"learning_rate": 1.4332294383924034e-06, |
|
"loss": 0.0976, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.5084745762711864, |
|
"grad_norm": 0.28111249969205165, |
|
"learning_rate": 1.4177162985084242e-06, |
|
"loss": 0.07, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.5112994350282487, |
|
"grad_norm": 0.3040746365690992, |
|
"learning_rate": 1.4022736916469166e-06, |
|
"loss": 0.0675, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.5141242937853108, |
|
"grad_norm": 0.2970452295573905, |
|
"learning_rate": 1.3869019218635644e-06, |
|
"loss": 0.0937, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.5169491525423728, |
|
"grad_norm": 0.3091154520174239, |
|
"learning_rate": 1.3716012918193206e-06, |
|
"loss": 0.0761, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.5197740112994351, |
|
"grad_norm": 0.33856268143824403, |
|
"learning_rate": 1.3563721027744309e-06, |
|
"loss": 0.0941, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.5225988700564972, |
|
"grad_norm": 0.2960053858988549, |
|
"learning_rate": 1.3412146545825166e-06, |
|
"loss": 0.0731, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.5254237288135593, |
|
"grad_norm": 0.3000001098205527, |
|
"learning_rate": 1.3261292456846648e-06, |
|
"loss": 0.0777, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.5282485875706215, |
|
"grad_norm": 0.32275915585442194, |
|
"learning_rate": 1.3111161731035448e-06, |
|
"loss": 0.1028, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.5310734463276836, |
|
"grad_norm": 0.28047948839083625, |
|
"learning_rate": 1.2961757324375768e-06, |
|
"loss": 0.0773, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.5338983050847457, |
|
"grad_norm": 0.2897511789785588, |
|
"learning_rate": 1.2813082178550929e-06, |
|
"loss": 0.0761, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.536723163841808, |
|
"grad_norm": 0.3604669071306025, |
|
"learning_rate": 1.2665139220885615e-06, |
|
"loss": 0.0966, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.53954802259887, |
|
"grad_norm": 0.35066125768752815, |
|
"learning_rate": 1.2517931364288133e-06, |
|
"loss": 0.1189, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.542372881355932, |
|
"grad_norm": 0.36481440937249643, |
|
"learning_rate": 1.2371461507193077e-06, |
|
"loss": 0.0854, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.5451977401129944, |
|
"grad_norm": 0.2705446394892136, |
|
"learning_rate": 1.2225732533504309e-06, |
|
"loss": 0.0681, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.5480225988700564, |
|
"grad_norm": 0.2709042435292725, |
|
"learning_rate": 1.2080747312538082e-06, |
|
"loss": 0.0605, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.5508474576271185, |
|
"grad_norm": 0.2941674674541573, |
|
"learning_rate": 1.1936508698966664e-06, |
|
"loss": 0.0759, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.5536723163841808, |
|
"grad_norm": 0.3301045484204278, |
|
"learning_rate": 1.1793019532762057e-06, |
|
"loss": 0.09, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.556497175141243, |
|
"grad_norm": 0.5929461456725253, |
|
"learning_rate": 1.1650282639140066e-06, |
|
"loss": 0.115, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.559322033898305, |
|
"grad_norm": 0.29282230586457825, |
|
"learning_rate": 1.1508300828504682e-06, |
|
"loss": 0.068, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.5621468926553672, |
|
"grad_norm": 0.290916040961216, |
|
"learning_rate": 1.1367076896392853e-06, |
|
"loss": 0.0759, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.5649717514124295, |
|
"grad_norm": 0.3308510796313253, |
|
"learning_rate": 1.122661362341927e-06, |
|
"loss": 0.107, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.5677966101694916, |
|
"grad_norm": 0.2902882953470368, |
|
"learning_rate": 1.1086913775221709e-06, |
|
"loss": 0.0817, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.5706214689265536, |
|
"grad_norm": 0.25808920224703275, |
|
"learning_rate": 1.0947980102406597e-06, |
|
"loss": 0.063, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.573446327683616, |
|
"grad_norm": 0.3042205877096776, |
|
"learning_rate": 1.0809815340494822e-06, |
|
"loss": 0.0755, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.576271186440678, |
|
"grad_norm": 0.3051764415373886, |
|
"learning_rate": 1.0672422209867879e-06, |
|
"loss": 0.0652, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.57909604519774, |
|
"grad_norm": 0.3146319364999993, |
|
"learning_rate": 1.053580341571428e-06, |
|
"loss": 0.1059, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.5819209039548023, |
|
"grad_norm": 0.3027084741124625, |
|
"learning_rate": 1.0399961647976315e-06, |
|
"loss": 0.0812, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.5847457627118644, |
|
"grad_norm": 0.3278175149471125, |
|
"learning_rate": 1.0264899581297121e-06, |
|
"loss": 0.1192, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.5875706214689265, |
|
"grad_norm": 0.28592160027084834, |
|
"learning_rate": 1.0130619874967983e-06, |
|
"loss": 0.0752, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.5903954802259888, |
|
"grad_norm": 0.2930417798367745, |
|
"learning_rate": 9.997125172875943e-07, |
|
"loss": 0.0879, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.5932203389830508, |
|
"grad_norm": 0.2704337798545791, |
|
"learning_rate": 9.86441810345183e-07, |
|
"loss": 0.0624, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.5960451977401129, |
|
"grad_norm": 0.3166751927355104, |
|
"learning_rate": 9.732501279618388e-07, |
|
"loss": 0.0848, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.5988700564971752, |
|
"grad_norm": 0.29461324641929826, |
|
"learning_rate": 9.60137729873898e-07, |
|
"loss": 0.0789, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.6016949152542372, |
|
"grad_norm": 0.31189484709881815, |
|
"learning_rate": 9.471048742566313e-07, |
|
"loss": 0.0822, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.6045197740112993, |
|
"grad_norm": 0.3466231703998608, |
|
"learning_rate": 9.34151817719166e-07, |
|
"loss": 0.0767, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.6073446327683616, |
|
"grad_norm": 0.30675238542761885, |
|
"learning_rate": 9.212788152994367e-07, |
|
"loss": 0.1034, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.6101694915254239, |
|
"grad_norm": 0.522761335835565, |
|
"learning_rate": 9.08486120459155e-07, |
|
"loss": 0.1273, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.6129943502824857, |
|
"grad_norm": 0.2810136760249764, |
|
"learning_rate": 8.957739850788288e-07, |
|
"loss": 0.073, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.615819209039548, |
|
"grad_norm": 0.31293166014889473, |
|
"learning_rate": 8.831426594527976e-07, |
|
"loss": 0.0956, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.6186440677966103, |
|
"grad_norm": 0.3408526367512878, |
|
"learning_rate": 8.705923922843041e-07, |
|
"loss": 0.0891, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.6214689265536724, |
|
"grad_norm": 0.30421762095488025, |
|
"learning_rate": 8.581234306805969e-07, |
|
"loss": 0.0946, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.6242937853107344, |
|
"grad_norm": 0.2940599119195987, |
|
"learning_rate": 8.457360201480702e-07, |
|
"loss": 0.0692, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.6271186440677967, |
|
"grad_norm": 0.3002501406760772, |
|
"learning_rate": 8.334304045874248e-07, |
|
"loss": 0.0815, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.6299435028248588, |
|
"grad_norm": 0.268425275016888, |
|
"learning_rate": 8.212068262888684e-07, |
|
"loss": 0.0751, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.6327683615819208, |
|
"grad_norm": 0.2928986782866679, |
|
"learning_rate": 8.090655259273428e-07, |
|
"loss": 0.0918, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.6355932203389831, |
|
"grad_norm": 0.32485035024590025, |
|
"learning_rate": 7.970067425577849e-07, |
|
"loss": 0.0933, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.6384180790960452, |
|
"grad_norm": 0.3234267210299417, |
|
"learning_rate": 7.850307136104246e-07, |
|
"loss": 0.0904, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.6412429378531073, |
|
"grad_norm": 0.30188930742886005, |
|
"learning_rate": 7.731376748861069e-07, |
|
"loss": 0.0889, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.6440677966101696, |
|
"grad_norm": 0.335467078244967, |
|
"learning_rate": 7.613278605516455e-07, |
|
"loss": 0.1325, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.6468926553672316, |
|
"grad_norm": 0.3072516075801986, |
|
"learning_rate": 7.4960150313522e-07, |
|
"loss": 0.0783, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.6497175141242937, |
|
"grad_norm": 0.3131137120089587, |
|
"learning_rate": 7.379588335217875e-07, |
|
"loss": 0.0995, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.652542372881356, |
|
"grad_norm": 0.2914572623071712, |
|
"learning_rate": 7.264000809485483e-07, |
|
"loss": 0.0863, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.655367231638418, |
|
"grad_norm": 0.32502736910136926, |
|
"learning_rate": 7.149254730004246e-07, |
|
"loss": 0.1124, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.65819209039548, |
|
"grad_norm": 0.326075318059859, |
|
"learning_rate": 7.035352356055786e-07, |
|
"loss": 0.1201, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.6610169491525424, |
|
"grad_norm": 0.37300992352749307, |
|
"learning_rate": 6.922295930309691e-07, |
|
"loss": 0.1073, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.6638418079096047, |
|
"grad_norm": 0.28876296213713953, |
|
"learning_rate": 6.810087678779353e-07, |
|
"loss": 0.0743, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 0.30368795978426233, |
|
"learning_rate": 6.698729810778065e-07, |
|
"loss": 0.0798, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.6694915254237288, |
|
"grad_norm": 0.3072791432365542, |
|
"learning_rate": 6.588224518875647e-07, |
|
"loss": 0.0812, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.672316384180791, |
|
"grad_norm": 0.3056293727238639, |
|
"learning_rate": 6.478573978855146e-07, |
|
"loss": 0.0684, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.6751412429378532, |
|
"grad_norm": 0.30153905844016693, |
|
"learning_rate": 6.369780349670085e-07, |
|
"loss": 0.0779, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.6779661016949152, |
|
"grad_norm": 0.2858390899342033, |
|
"learning_rate": 6.261845773401936e-07, |
|
"loss": 0.0713, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.6807909604519775, |
|
"grad_norm": 0.30891966645412655, |
|
"learning_rate": 6.154772375217905e-07, |
|
"loss": 0.0837, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.6836158192090396, |
|
"grad_norm": 0.29088564996940475, |
|
"learning_rate": 6.048562263329139e-07, |
|
"loss": 0.0825, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.6864406779661016, |
|
"grad_norm": 0.29812823046797693, |
|
"learning_rate": 5.943217528949169e-07, |
|
"loss": 0.0927, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.689265536723164, |
|
"grad_norm": 0.30652461054045976, |
|
"learning_rate": 5.838740246252794e-07, |
|
"loss": 0.0766, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.692090395480226, |
|
"grad_norm": 0.3012630776892009, |
|
"learning_rate": 5.735132472335192e-07, |
|
"loss": 0.0893, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.694915254237288, |
|
"grad_norm": 0.30023304223451514, |
|
"learning_rate": 5.632396247171429e-07, |
|
"loss": 0.1049, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6977401129943503, |
|
"grad_norm": 0.3280835461780043, |
|
"learning_rate": 5.530533593576292e-07, |
|
"loss": 0.116, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.7005649717514124, |
|
"grad_norm": 0.2762418019865388, |
|
"learning_rate": 5.429546517164486e-07, |
|
"loss": 0.067, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.7033898305084745, |
|
"grad_norm": 0.31675842207409677, |
|
"learning_rate": 5.329437006311122e-07, |
|
"loss": 0.0872, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.7062146892655368, |
|
"grad_norm": 0.3041486434567392, |
|
"learning_rate": 5.230207032112549e-07, |
|
"loss": 0.0752, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.7090395480225988, |
|
"grad_norm": 0.29993780041723445, |
|
"learning_rate": 5.131858548347596e-07, |
|
"loss": 0.0717, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.711864406779661, |
|
"grad_norm": 0.30787432406626875, |
|
"learning_rate": 5.034393491439044e-07, |
|
"loss": 0.0802, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.7146892655367232, |
|
"grad_norm": 0.28990243338730465, |
|
"learning_rate": 4.93781378041554e-07, |
|
"loss": 0.0871, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.7175141242937855, |
|
"grad_norm": 0.3024236161639379, |
|
"learning_rate": 4.842121316873821e-07, |
|
"loss": 0.0855, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.7203389830508473, |
|
"grad_norm": 0.3186961475088875, |
|
"learning_rate": 4.747317984941213e-07, |
|
"loss": 0.0875, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.7231638418079096, |
|
"grad_norm": 0.2907838927015749, |
|
"learning_rate": 4.653405651238607e-07, |
|
"loss": 0.0908, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.725988700564972, |
|
"grad_norm": 0.30763579782876077, |
|
"learning_rate": 4.560386164843639e-07, |
|
"loss": 0.0964, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.7288135593220337, |
|
"grad_norm": 0.30987458360594455, |
|
"learning_rate": 4.468261357254339e-07, |
|
"loss": 0.0947, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.731638418079096, |
|
"grad_norm": 0.29429416026094735, |
|
"learning_rate": 4.3770330423530626e-07, |
|
"loss": 0.0834, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.7344632768361583, |
|
"grad_norm": 0.3063918655948546, |
|
"learning_rate": 4.286703016370719e-07, |
|
"loss": 0.0925, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.7372881355932204, |
|
"grad_norm": 0.33329444172148553, |
|
"learning_rate": 4.197273057851464e-07, |
|
"loss": 0.0983, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.7401129943502824, |
|
"grad_norm": 0.29709804495802744, |
|
"learning_rate": 4.108744927617669e-07, |
|
"loss": 0.079, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.7429378531073447, |
|
"grad_norm": 0.2937681249333296, |
|
"learning_rate": 4.021120368735254e-07, |
|
"loss": 0.088, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.7457627118644068, |
|
"grad_norm": 0.33235581919645196, |
|
"learning_rate": 3.934401106479352e-07, |
|
"loss": 0.093, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.7485875706214689, |
|
"grad_norm": 0.3052937062176937, |
|
"learning_rate": 3.8485888483003384e-07, |
|
"loss": 0.0987, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.7514124293785311, |
|
"grad_norm": 0.2953169881958288, |
|
"learning_rate": 3.763685283790208e-07, |
|
"loss": 0.0861, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.7542372881355932, |
|
"grad_norm": 0.39089257775250585, |
|
"learning_rate": 3.679692084649372e-07, |
|
"loss": 0.1092, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.7570621468926553, |
|
"grad_norm": 0.298962166079189, |
|
"learning_rate": 3.596610904653652e-07, |
|
"loss": 0.0877, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.7598870056497176, |
|
"grad_norm": 0.33068479547784435, |
|
"learning_rate": 3.5144433796217515e-07, |
|
"loss": 0.0868, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.7627118644067796, |
|
"grad_norm": 0.29230226311663704, |
|
"learning_rate": 3.433191127383079e-07, |
|
"loss": 0.0786, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.7655367231638417, |
|
"grad_norm": 0.342045177865139, |
|
"learning_rate": 3.352855747745859e-07, |
|
"loss": 0.1034, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.768361581920904, |
|
"grad_norm": 0.3538547033051671, |
|
"learning_rate": 3.2734388224656575e-07, |
|
"loss": 0.0913, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.7711864406779663, |
|
"grad_norm": 0.3022791955228267, |
|
"learning_rate": 3.1949419152142e-07, |
|
"loss": 0.0912, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.774011299435028, |
|
"grad_norm": 0.3286478841800299, |
|
"learning_rate": 3.1173665715486076e-07, |
|
"loss": 0.1005, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.7768361581920904, |
|
"grad_norm": 0.2914491072414654, |
|
"learning_rate": 3.0407143188809885e-07, |
|
"loss": 0.087, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.7796610169491527, |
|
"grad_norm": 0.26788446393967724, |
|
"learning_rate": 2.9649866664483387e-07, |
|
"loss": 0.06, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.7824858757062145, |
|
"grad_norm": 0.29391848362521833, |
|
"learning_rate": 2.8901851052828e-07, |
|
"loss": 0.0789, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.7853107344632768, |
|
"grad_norm": 0.2778847008048511, |
|
"learning_rate": 2.816311108182368e-07, |
|
"loss": 0.0626, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.788135593220339, |
|
"grad_norm": 0.3154659365103027, |
|
"learning_rate": 2.743366129681824e-07, |
|
"loss": 0.101, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.7909604519774012, |
|
"grad_norm": 0.31293085126205966, |
|
"learning_rate": 2.671351606024153e-07, |
|
"loss": 0.0762, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.7937853107344632, |
|
"grad_norm": 0.47243232664184365, |
|
"learning_rate": 2.6002689551322403e-07, |
|
"loss": 0.1006, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.7966101694915255, |
|
"grad_norm": 0.2781939275244853, |
|
"learning_rate": 2.530119576580936e-07, |
|
"loss": 0.0638, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.7994350282485876, |
|
"grad_norm": 0.2796358570748197, |
|
"learning_rate": 2.460904851569534e-07, |
|
"loss": 0.0636, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.8022598870056497, |
|
"grad_norm": 0.31697163166683606, |
|
"learning_rate": 2.3926261428945386e-07, |
|
"loss": 0.0707, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.805084745762712, |
|
"grad_norm": 0.2764809135900223, |
|
"learning_rate": 2.325284794922883e-07, |
|
"loss": 0.0674, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.807909604519774, |
|
"grad_norm": 0.3238518566445213, |
|
"learning_rate": 2.2588821335654044e-07, |
|
"loss": 0.0824, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.810734463276836, |
|
"grad_norm": 0.34129160389189483, |
|
"learning_rate": 2.1934194662507736e-07, |
|
"loss": 0.0851, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.8135593220338984, |
|
"grad_norm": 0.2825014150604838, |
|
"learning_rate": 2.1288980818997272e-07, |
|
"loss": 0.077, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.8163841807909604, |
|
"grad_norm": 0.29713126389115424, |
|
"learning_rate": 2.0653192508997222e-07, |
|
"loss": 0.0762, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.8192090395480225, |
|
"grad_norm": 0.300338264829181, |
|
"learning_rate": 2.0026842250799038e-07, |
|
"loss": 0.0878, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.8220338983050848, |
|
"grad_norm": 1.4166868383475169, |
|
"learning_rate": 1.9409942376864333e-07, |
|
"loss": 0.0867, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.8248587570621468, |
|
"grad_norm": 0.25772375662960606, |
|
"learning_rate": 1.8802505033582608e-07, |
|
"loss": 0.0604, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.827683615819209, |
|
"grad_norm": 0.31296870168050195, |
|
"learning_rate": 1.8204542181031572e-07, |
|
"loss": 0.0909, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.8305084745762712, |
|
"grad_norm": 0.3064057159229424, |
|
"learning_rate": 1.7616065592742038e-07, |
|
"loss": 0.0881, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 0.2998465592381362, |
|
"learning_rate": 1.7037086855465902e-07, |
|
"loss": 0.0629, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.8361581920903953, |
|
"grad_norm": 0.29682999878586, |
|
"learning_rate": 1.6467617368947918e-07, |
|
"loss": 0.0786, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8389830508474576, |
|
"grad_norm": 0.2781558427742286, |
|
"learning_rate": 1.5907668345701732e-07, |
|
"loss": 0.0818, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.84180790960452, |
|
"grad_norm": 0.27914990667274464, |
|
"learning_rate": 1.5357250810788316e-07, |
|
"loss": 0.0739, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.844632768361582, |
|
"grad_norm": 0.3438724850417393, |
|
"learning_rate": 1.4816375601599653e-07, |
|
"loss": 0.0723, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.847457627118644, |
|
"grad_norm": 0.31828540808245065, |
|
"learning_rate": 1.4285053367645074e-07, |
|
"loss": 0.077, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.8502824858757063, |
|
"grad_norm": 0.27972916277751064, |
|
"learning_rate": 1.37632945703412e-07, |
|
"loss": 0.0705, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.8531073446327684, |
|
"grad_norm": 0.24716233730443476, |
|
"learning_rate": 1.3251109482806667e-07, |
|
"loss": 0.0489, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.8559322033898304, |
|
"grad_norm": 0.308468550353476, |
|
"learning_rate": 1.2748508189659447e-07, |
|
"loss": 0.0866, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.8587570621468927, |
|
"grad_norm": 0.3092098443706496, |
|
"learning_rate": 1.2255500586818015e-07, |
|
"loss": 0.1055, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.8615819209039548, |
|
"grad_norm": 0.28162343440142174, |
|
"learning_rate": 1.177209638130733e-07, |
|
"loss": 0.072, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.8644067796610169, |
|
"grad_norm": 0.3007912961091226, |
|
"learning_rate": 1.1298305091066664e-07, |
|
"loss": 0.0908, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.8672316384180792, |
|
"grad_norm": 0.3136447204914414, |
|
"learning_rate": 1.0834136044763188e-07, |
|
"loss": 0.0836, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.8700564971751412, |
|
"grad_norm": 0.31357010636279004, |
|
"learning_rate": 1.0379598381607681e-07, |
|
"loss": 0.0807, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.8728813559322033, |
|
"grad_norm": 0.3090649577202879, |
|
"learning_rate": 9.93470105117461e-08, |
|
"loss": 0.0949, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.8757062146892656, |
|
"grad_norm": 0.30061286344301524, |
|
"learning_rate": 9.499452813226284e-08, |
|
"loss": 0.0832, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.8785310734463276, |
|
"grad_norm": 0.3207780749688742, |
|
"learning_rate": 9.073862237539977e-08, |
|
"loss": 0.0922, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.8813559322033897, |
|
"grad_norm": 0.35510559900468425, |
|
"learning_rate": 8.657937703739516e-08, |
|
"loss": 0.0989, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.884180790960452, |
|
"grad_norm": 0.29122472340879935, |
|
"learning_rate": 8.251687401130137e-08, |
|
"loss": 0.0806, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.8870056497175143, |
|
"grad_norm": 0.3387325114735546, |
|
"learning_rate": 7.855119328537109e-08, |
|
"loss": 0.1179, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.8898305084745761, |
|
"grad_norm": 0.3838105969146293, |
|
"learning_rate": 7.468241294148471e-08, |
|
"loss": 0.1056, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.8926553672316384, |
|
"grad_norm": 0.34521981231042703, |
|
"learning_rate": 7.09106091536127e-08, |
|
"loss": 0.0708, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.8954802259887007, |
|
"grad_norm": 0.2860135017170368, |
|
"learning_rate": 6.723585618631456e-08, |
|
"loss": 0.0807, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.8983050847457628, |
|
"grad_norm": 0.25713094736882913, |
|
"learning_rate": 6.365822639327724e-08, |
|
"loss": 0.0596, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.9011299435028248, |
|
"grad_norm": 0.32493231522498606, |
|
"learning_rate": 6.017779021589065e-08, |
|
"loss": 0.0783, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.9039548022598871, |
|
"grad_norm": 0.3195456319023967, |
|
"learning_rate": 5.679461618185944e-08, |
|
"loss": 0.0989, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.9067796610169492, |
|
"grad_norm": 0.28832414052354244, |
|
"learning_rate": 5.350877090385731e-08, |
|
"loss": 0.0842, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.9096045197740112, |
|
"grad_norm": 0.29862804180466584, |
|
"learning_rate": 5.032031907821089e-08, |
|
"loss": 0.0799, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.9124293785310735, |
|
"grad_norm": 0.39261040413818743, |
|
"learning_rate": 4.722932348362852e-08, |
|
"loss": 0.0763, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.9152542372881356, |
|
"grad_norm": 0.28345109997512263, |
|
"learning_rate": 4.423584497996458e-08, |
|
"loss": 0.0623, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.9180790960451977, |
|
"grad_norm": 0.3730843254054893, |
|
"learning_rate": 4.1339942507018225e-08, |
|
"loss": 0.1051, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.92090395480226, |
|
"grad_norm": 0.408781043992154, |
|
"learning_rate": 3.8541673083377086e-08, |
|
"loss": 0.0972, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.923728813559322, |
|
"grad_norm": 0.27235202225360894, |
|
"learning_rate": 3.584109180529205e-08, |
|
"loss": 0.078, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.926553672316384, |
|
"grad_norm": 0.3249573151531456, |
|
"learning_rate": 3.323825184559204e-08, |
|
"loss": 0.0665, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.9293785310734464, |
|
"grad_norm": 0.3917754153996301, |
|
"learning_rate": 3.073320445263817e-08, |
|
"loss": 0.0948, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.9322033898305084, |
|
"grad_norm": 0.3436451083397938, |
|
"learning_rate": 2.8325998949314536e-08, |
|
"loss": 0.1001, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.9350282485875705, |
|
"grad_norm": 0.2962089436476239, |
|
"learning_rate": 2.6016682732057375e-08, |
|
"loss": 0.092, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.9378531073446328, |
|
"grad_norm": 0.31332687230877443, |
|
"learning_rate": 2.3805301269920754e-08, |
|
"loss": 0.0719, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.940677966101695, |
|
"grad_norm": 0.29206089331438057, |
|
"learning_rate": 2.1691898103682885e-08, |
|
"loss": 0.0803, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.943502824858757, |
|
"grad_norm": 0.29588225232531956, |
|
"learning_rate": 1.9676514844987338e-08, |
|
"loss": 0.0746, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.9463276836158192, |
|
"grad_norm": 0.3052178637723924, |
|
"learning_rate": 1.775919117552427e-08, |
|
"loss": 0.0683, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.9491525423728815, |
|
"grad_norm": 0.33628210770097194, |
|
"learning_rate": 1.593996484624938e-08, |
|
"loss": 0.0876, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.9519774011299436, |
|
"grad_norm": 0.41716535023044066, |
|
"learning_rate": 1.42188716766406e-08, |
|
"loss": 0.0797, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.9548022598870056, |
|
"grad_norm": 0.2721830233815887, |
|
"learning_rate": 1.2595945553992572e-08, |
|
"loss": 0.0746, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.957627118644068, |
|
"grad_norm": 0.3093867803026701, |
|
"learning_rate": 1.1071218432749942e-08, |
|
"loss": 0.0927, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.96045197740113, |
|
"grad_norm": 0.28442945395766916, |
|
"learning_rate": 9.6447203338762e-09, |
|
"loss": 0.0787, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.963276836158192, |
|
"grad_norm": 0.30806352104616475, |
|
"learning_rate": 8.316479344266382e-09, |
|
"loss": 0.0888, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.9661016949152543, |
|
"grad_norm": 0.366990265292286, |
|
"learning_rate": 7.0865216161902785e-09, |
|
"loss": 0.1067, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.9689265536723164, |
|
"grad_norm": 0.37187519570368377, |
|
"learning_rate": 5.954871366779525e-09, |
|
"loss": 0.0814, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.9717514124293785, |
|
"grad_norm": 0.3167633384396195, |
|
"learning_rate": 4.921550877550752e-09, |
|
"loss": 0.0687, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.9745762711864407, |
|
"grad_norm": 0.301910108787057, |
|
"learning_rate": 3.9865804939659414e-09, |
|
"loss": 0.0748, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.9774011299435028, |
|
"grad_norm": 0.32697329312245565, |
|
"learning_rate": 3.1499786250321904e-09, |
|
"loss": 0.1215, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.9802259887005649, |
|
"grad_norm": 0.31440661932949104, |
|
"learning_rate": 2.411761742939778e-09, |
|
"loss": 0.0798, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.9830508474576272, |
|
"grad_norm": 0.32934297243481625, |
|
"learning_rate": 1.7719443827368677e-09, |
|
"loss": 0.0759, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.9858757062146892, |
|
"grad_norm": 0.3018134458199549, |
|
"learning_rate": 1.2305391420458502e-09, |
|
"loss": 0.0935, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.9887005649717513, |
|
"grad_norm": 0.2717967961520749, |
|
"learning_rate": 7.875566808107638e-10, |
|
"loss": 0.0621, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.9915254237288136, |
|
"grad_norm": 0.30491685297395554, |
|
"learning_rate": 4.4300572109134965e-10, |
|
"loss": 0.0913, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.9943502824858759, |
|
"grad_norm": 0.2812227615626389, |
|
"learning_rate": 1.9689304688985667e-10, |
|
"loss": 0.0699, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.9971751412429377, |
|
"grad_norm": 0.27084127166875765, |
|
"learning_rate": 4.922350401781461e-11, |
|
"loss": 0.0745, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.29812406633307104, |
|
"learning_rate": 0.0, |
|
"loss": 0.0894, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 708, |
|
"total_flos": 20169300639744.0, |
|
"train_loss": 0.10943256686362675, |
|
"train_runtime": 738.5688, |
|
"train_samples_per_second": 7.666, |
|
"train_steps_per_second": 0.959 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 708, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 50000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 20169300639744.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|