|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6678476527900797, |
|
"eval_steps": 500, |
|
"global_step": 754, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 15.385598322874909, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 0.7627, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 15.42812332406859, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 0.794, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 13.76934599903778, |
|
"learning_rate": 1.5e-06, |
|
"loss": 0.7894, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 7.9055471186770685, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.7346, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 8.624179170790118, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.7458, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 37.14544394485457, |
|
"learning_rate": 3e-06, |
|
"loss": 0.8249, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.413192499879626, |
|
"learning_rate": 3.5e-06, |
|
"loss": 0.7692, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 11.194156755277431, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.7724, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.569279640169995, |
|
"learning_rate": 4.5e-06, |
|
"loss": 0.7851, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 18.113903622060178, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7874, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.486914001687124, |
|
"learning_rate": 4.999997558722919e-06, |
|
"loss": 0.7553, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.280219682440894, |
|
"learning_rate": 4.999990234896445e-06, |
|
"loss": 0.7095, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 4.3413734180304155, |
|
"learning_rate": 4.99997802853488e-06, |
|
"loss": 0.6916, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.756315245615391, |
|
"learning_rate": 4.999960939662063e-06, |
|
"loss": 0.7407, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.090553047874293, |
|
"learning_rate": 4.999938968311371e-06, |
|
"loss": 0.7387, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.8370558847287075, |
|
"learning_rate": 4.9999121145257126e-06, |
|
"loss": 0.7051, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.986658012877664, |
|
"learning_rate": 4.999880378357535e-06, |
|
"loss": 0.6871, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 4.141716122521651, |
|
"learning_rate": 4.9998437598688195e-06, |
|
"loss": 0.6694, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 4.729722439630604, |
|
"learning_rate": 4.9998022591310815e-06, |
|
"loss": 0.716, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.9486336901615497, |
|
"learning_rate": 4.999755876225375e-06, |
|
"loss": 0.6387, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.8336874650575745, |
|
"learning_rate": 4.999704611242285e-06, |
|
"loss": 0.6542, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.6724374918638905, |
|
"learning_rate": 4.999648464281934e-06, |
|
"loss": 0.6617, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.941494127880678, |
|
"learning_rate": 4.999587435453979e-06, |
|
"loss": 0.6687, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.6261822206464744, |
|
"learning_rate": 4.999521524877608e-06, |
|
"loss": 0.6634, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.8059947014946305, |
|
"learning_rate": 4.999450732681549e-06, |
|
"loss": 0.6901, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 3.131537494217822, |
|
"learning_rate": 4.999375059004058e-06, |
|
"loss": 0.6407, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.7893212245465837, |
|
"learning_rate": 4.99929450399293e-06, |
|
"loss": 0.6638, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.4411586751746, |
|
"learning_rate": 4.999209067805487e-06, |
|
"loss": 0.6196, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.8807261299944082, |
|
"learning_rate": 4.999118750608591e-06, |
|
"loss": 0.6839, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.879993804839069, |
|
"learning_rate": 4.9990235525786326e-06, |
|
"loss": 0.6484, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.604360711268946, |
|
"learning_rate": 4.998923473901535e-06, |
|
"loss": 0.6313, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.403225544767816, |
|
"learning_rate": 4.9988185147727544e-06, |
|
"loss": 0.6209, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.669567772543462, |
|
"learning_rate": 4.998708675397278e-06, |
|
"loss": 0.6068, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.443946495915797, |
|
"learning_rate": 4.998593955989626e-06, |
|
"loss": 0.6731, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.2104680876118317, |
|
"learning_rate": 4.998474356773845e-06, |
|
"loss": 0.6243, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.3602199264043957, |
|
"learning_rate": 4.9983498779835175e-06, |
|
"loss": 0.6649, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.4676911263240844, |
|
"learning_rate": 4.998220519861752e-06, |
|
"loss": 0.6174, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.3419026099030282, |
|
"learning_rate": 4.998086282661188e-06, |
|
"loss": 0.6123, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.14900736954254, |
|
"learning_rate": 4.997947166643993e-06, |
|
"loss": 0.63, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.570907426799795, |
|
"learning_rate": 4.997803172081864e-06, |
|
"loss": 0.6249, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.516952735669967, |
|
"learning_rate": 4.997654299256026e-06, |
|
"loss": 0.6727, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.1600457198543874, |
|
"learning_rate": 4.997500548457231e-06, |
|
"loss": 0.6719, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.2177572033934743, |
|
"learning_rate": 4.997341919985756e-06, |
|
"loss": 0.6148, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.397105205209689, |
|
"learning_rate": 4.997178414151409e-06, |
|
"loss": 0.6167, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.1254940534972167, |
|
"learning_rate": 4.997010031273517e-06, |
|
"loss": 0.6446, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.2113023791837194, |
|
"learning_rate": 4.996836771680937e-06, |
|
"loss": 0.6304, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.386446316275664, |
|
"learning_rate": 4.99665863571205e-06, |
|
"loss": 0.6621, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.1838934384314483, |
|
"learning_rate": 4.996475623714756e-06, |
|
"loss": 0.6214, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.2047933657923586, |
|
"learning_rate": 4.996287736046485e-06, |
|
"loss": 0.6478, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.208809457983808, |
|
"learning_rate": 4.996094973074183e-06, |
|
"loss": 0.6097, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.1318377198138267, |
|
"learning_rate": 4.995897335174322e-06, |
|
"loss": 0.622, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.0673034122993537, |
|
"learning_rate": 4.995694822732893e-06, |
|
"loss": 0.6036, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.195105312645423, |
|
"learning_rate": 4.9954874361454055e-06, |
|
"loss": 0.6052, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.157855029176061, |
|
"learning_rate": 4.995275175816892e-06, |
|
"loss": 0.6455, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.0500405783991043, |
|
"learning_rate": 4.9950580421619e-06, |
|
"loss": 0.6353, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.199629904296075, |
|
"learning_rate": 4.9948360356044965e-06, |
|
"loss": 0.6122, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.186847580161491, |
|
"learning_rate": 4.994609156578267e-06, |
|
"loss": 0.6073, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.0207512037097835, |
|
"learning_rate": 4.994377405526308e-06, |
|
"loss": 0.61, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.3170193964114976, |
|
"learning_rate": 4.994140782901237e-06, |
|
"loss": 0.6322, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.014785890436746, |
|
"learning_rate": 4.9938992891651825e-06, |
|
"loss": 0.6205, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.9538385063221935, |
|
"learning_rate": 4.9936529247897854e-06, |
|
"loss": 0.5992, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.084943826856202, |
|
"learning_rate": 4.993401690256203e-06, |
|
"loss": 0.6148, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.135158856581583, |
|
"learning_rate": 4.9931455860551e-06, |
|
"loss": 0.5937, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.982621418518698, |
|
"learning_rate": 4.992884612686655e-06, |
|
"loss": 0.6091, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.1030931953494956, |
|
"learning_rate": 4.992618770660553e-06, |
|
"loss": 0.6034, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.1994634556563994, |
|
"learning_rate": 4.992348060495989e-06, |
|
"loss": 0.5846, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.410691403277427, |
|
"learning_rate": 4.992072482721669e-06, |
|
"loss": 0.6294, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.9720494401999067, |
|
"learning_rate": 4.991792037875799e-06, |
|
"loss": 0.591, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.147504025949435, |
|
"learning_rate": 4.991506726506094e-06, |
|
"loss": 0.5689, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.1837702519904223, |
|
"learning_rate": 4.991216549169776e-06, |
|
"loss": 0.6422, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.0883865330274958, |
|
"learning_rate": 4.9909215064335655e-06, |
|
"loss": 0.6076, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.20727863923846, |
|
"learning_rate": 4.990621598873687e-06, |
|
"loss": 0.5974, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.0735330806418464, |
|
"learning_rate": 4.990316827075868e-06, |
|
"loss": 0.6809, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.0203203347538774, |
|
"learning_rate": 4.990007191635334e-06, |
|
"loss": 0.6107, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.234889365362174, |
|
"learning_rate": 4.989692693156809e-06, |
|
"loss": 0.6218, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.9902503343433904, |
|
"learning_rate": 4.989373332254516e-06, |
|
"loss": 0.6257, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1041971507252466, |
|
"learning_rate": 4.989049109552173e-06, |
|
"loss": 0.5888, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1151685783302123, |
|
"learning_rate": 4.988720025682995e-06, |
|
"loss": 0.6333, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.9223819269893592, |
|
"learning_rate": 4.988386081289689e-06, |
|
"loss": 0.6442, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.139676463756265, |
|
"learning_rate": 4.988047277024456e-06, |
|
"loss": 0.5966, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.1665820212993068, |
|
"learning_rate": 4.987703613548988e-06, |
|
"loss": 0.603, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.931456975470041, |
|
"learning_rate": 4.987355091534467e-06, |
|
"loss": 0.6122, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.134995092135601, |
|
"learning_rate": 4.987001711661566e-06, |
|
"loss": 0.6213, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.0173352657570818, |
|
"learning_rate": 4.98664347462044e-06, |
|
"loss": 0.5966, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.0816939924571183, |
|
"learning_rate": 4.986280381110737e-06, |
|
"loss": 0.5575, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.0072477771163357, |
|
"learning_rate": 4.985912431841584e-06, |
|
"loss": 0.6225, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.1895945454214507, |
|
"learning_rate": 4.985539627531596e-06, |
|
"loss": 0.6169, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.84518214074801, |
|
"learning_rate": 4.985161968908866e-06, |
|
"loss": 0.6317, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.194209857089938, |
|
"learning_rate": 4.984779456710971e-06, |
|
"loss": 0.6205, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.1604595364123083, |
|
"learning_rate": 4.9843920916849645e-06, |
|
"loss": 0.6176, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.039087518829079, |
|
"learning_rate": 4.9839998745873795e-06, |
|
"loss": 0.5842, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.0148570016863334, |
|
"learning_rate": 4.983602806184225e-06, |
|
"loss": 0.5936, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.073137159272384, |
|
"learning_rate": 4.983200887250982e-06, |
|
"loss": 0.6317, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.045469602089007, |
|
"learning_rate": 4.9827941185726095e-06, |
|
"loss": 0.5338, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.1201743116757417, |
|
"learning_rate": 4.982382500943533e-06, |
|
"loss": 0.6133, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.0637214917996363, |
|
"learning_rate": 4.981966035167654e-06, |
|
"loss": 0.6483, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.155574452675582, |
|
"learning_rate": 4.981544722058336e-06, |
|
"loss": 0.6001, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.9347601392775928, |
|
"learning_rate": 4.981118562438414e-06, |
|
"loss": 0.5954, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.3054537863874756, |
|
"learning_rate": 4.980687557140187e-06, |
|
"loss": 0.6338, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.0421104909837338, |
|
"learning_rate": 4.980251707005417e-06, |
|
"loss": 0.6166, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.023167301994367, |
|
"learning_rate": 4.979811012885329e-06, |
|
"loss": 0.5682, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.0583654213007967, |
|
"learning_rate": 4.979365475640609e-06, |
|
"loss": 0.5759, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.008917223929121, |
|
"learning_rate": 4.9789150961414e-06, |
|
"loss": 0.6324, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.1111479338304306, |
|
"learning_rate": 4.978459875267303e-06, |
|
"loss": 0.5821, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.400366962461983, |
|
"learning_rate": 4.977999813907375e-06, |
|
"loss": 0.5699, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.090668061316384, |
|
"learning_rate": 4.977534912960124e-06, |
|
"loss": 0.5754, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.2103419288491466, |
|
"learning_rate": 4.977065173333515e-06, |
|
"loss": 0.6005, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.1332380447628294, |
|
"learning_rate": 4.9765905959449565e-06, |
|
"loss": 0.6178, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.1372224949542464, |
|
"learning_rate": 4.976111181721309e-06, |
|
"loss": 0.6021, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.636052326949506, |
|
"learning_rate": 4.97562693159888e-06, |
|
"loss": 0.6418, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.1234423477493443, |
|
"learning_rate": 4.975137846523419e-06, |
|
"loss": 0.6231, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.2817790529425315, |
|
"learning_rate": 4.974643927450121e-06, |
|
"loss": 0.5681, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.2605060344304713, |
|
"learning_rate": 4.9741451753436205e-06, |
|
"loss": 0.5803, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.0355236974665876, |
|
"learning_rate": 4.973641591177991e-06, |
|
"loss": 0.6003, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.4343221170301415, |
|
"learning_rate": 4.973133175936743e-06, |
|
"loss": 0.5882, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.2135760843199734, |
|
"learning_rate": 4.972619930612822e-06, |
|
"loss": 0.5886, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.161909448676307, |
|
"learning_rate": 4.972101856208609e-06, |
|
"loss": 0.5792, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.0871148781401927, |
|
"learning_rate": 4.9715789537359126e-06, |
|
"loss": 0.6383, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.1159018206478626, |
|
"learning_rate": 4.971051224215973e-06, |
|
"loss": 0.5865, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.2036428070670375, |
|
"learning_rate": 4.970518668679459e-06, |
|
"loss": 0.5905, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.22262007661876, |
|
"learning_rate": 4.969981288166461e-06, |
|
"loss": 0.5951, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.0713458839382786, |
|
"learning_rate": 4.969439083726496e-06, |
|
"loss": 0.6011, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.0686060725186897, |
|
"learning_rate": 4.9688920564185e-06, |
|
"loss": 0.6038, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.1825376161159964, |
|
"learning_rate": 4.968340207310832e-06, |
|
"loss": 0.6098, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.142436541976576, |
|
"learning_rate": 4.967783537481262e-06, |
|
"loss": 0.6119, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.330044622755397, |
|
"learning_rate": 4.967222048016979e-06, |
|
"loss": 0.6057, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.109116942854107, |
|
"learning_rate": 4.966655740014585e-06, |
|
"loss": 0.5958, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.174219068914296, |
|
"learning_rate": 4.9660846145800914e-06, |
|
"loss": 0.6276, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.135736248304593, |
|
"learning_rate": 4.965508672828918e-06, |
|
"loss": 0.6309, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.2339234058672885, |
|
"learning_rate": 4.964927915885893e-06, |
|
"loss": 0.5879, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.0960660335616224, |
|
"learning_rate": 4.9643423448852455e-06, |
|
"loss": 0.6218, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 1.9468729925472703, |
|
"learning_rate": 4.963751960970609e-06, |
|
"loss": 0.5998, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1623168252289915, |
|
"learning_rate": 4.9631567652950164e-06, |
|
"loss": 0.6885, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.084420579583794, |
|
"learning_rate": 4.962556759020898e-06, |
|
"loss": 0.5758, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1082890389844713, |
|
"learning_rate": 4.961951943320078e-06, |
|
"loss": 0.6116, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.006123424806457, |
|
"learning_rate": 4.9613423193737754e-06, |
|
"loss": 0.5708, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.309431970929405, |
|
"learning_rate": 4.960727888372599e-06, |
|
"loss": 0.621, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.226488524758773, |
|
"learning_rate": 4.9601086515165456e-06, |
|
"loss": 0.5896, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1242070778655253, |
|
"learning_rate": 4.959484610014997e-06, |
|
"loss": 0.624, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.2147491445730516, |
|
"learning_rate": 4.958855765086722e-06, |
|
"loss": 0.6064, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.1818004600393, |
|
"learning_rate": 4.958222117959868e-06, |
|
"loss": 0.6252, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.1094535889409696, |
|
"learning_rate": 4.95758366987196e-06, |
|
"loss": 0.5779, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.2043056809252577, |
|
"learning_rate": 4.9569404220699025e-06, |
|
"loss": 0.6156, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.158056342799238, |
|
"learning_rate": 4.956292375809971e-06, |
|
"loss": 0.5662, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.987581635345228, |
|
"learning_rate": 4.955639532357815e-06, |
|
"loss": 0.6148, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.266145451051948, |
|
"learning_rate": 4.954981892988451e-06, |
|
"loss": 0.5867, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.071082600205798, |
|
"learning_rate": 4.954319458986264e-06, |
|
"loss": 0.5976, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.1615342548575374, |
|
"learning_rate": 4.953652231645002e-06, |
|
"loss": 0.5643, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.145126231371731, |
|
"learning_rate": 4.952980212267773e-06, |
|
"loss": 0.5592, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.9161750244434461, |
|
"learning_rate": 4.952303402167047e-06, |
|
"loss": 0.5547, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.234370958372018, |
|
"learning_rate": 4.9516218026646475e-06, |
|
"loss": 0.578, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.149553338429868, |
|
"learning_rate": 4.950935415091753e-06, |
|
"loss": 0.5952, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.1021801657048016, |
|
"learning_rate": 4.950244240788895e-06, |
|
"loss": 0.573, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.488711367210497, |
|
"learning_rate": 4.949548281105951e-06, |
|
"loss": 0.5776, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.0302393290147167, |
|
"learning_rate": 4.948847537402145e-06, |
|
"loss": 0.5685, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.1563261797248043, |
|
"learning_rate": 4.948142011046044e-06, |
|
"loss": 0.6185, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.1308303224609997, |
|
"learning_rate": 4.947431703415558e-06, |
|
"loss": 0.6229, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.0988414912992273, |
|
"learning_rate": 4.946716615897932e-06, |
|
"loss": 0.6167, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.3558302474583095, |
|
"learning_rate": 4.9459967498897485e-06, |
|
"loss": 0.5903, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.1505555405055223, |
|
"learning_rate": 4.945272106796919e-06, |
|
"loss": 0.5709, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.0604140956574635, |
|
"learning_rate": 4.94454268803469e-06, |
|
"loss": 0.635, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.3699836246614696, |
|
"learning_rate": 4.943808495027631e-06, |
|
"loss": 0.581, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.9809907136859368, |
|
"learning_rate": 4.9430695292096365e-06, |
|
"loss": 0.5703, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.213101907296851, |
|
"learning_rate": 4.942325792023922e-06, |
|
"loss": 0.5915, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.3778783149383944, |
|
"learning_rate": 4.941577284923025e-06, |
|
"loss": 0.537, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.9283694807512721, |
|
"learning_rate": 4.9408240093687934e-06, |
|
"loss": 0.579, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.083087334039033, |
|
"learning_rate": 4.940065966832392e-06, |
|
"loss": 0.5612, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.314684793845775, |
|
"learning_rate": 4.939303158794294e-06, |
|
"loss": 0.6001, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.131977461745334, |
|
"learning_rate": 4.93853558674428e-06, |
|
"loss": 0.5809, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.1291924932946755, |
|
"learning_rate": 4.937763252181434e-06, |
|
"loss": 0.6216, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.9366549866764742, |
|
"learning_rate": 4.936986156614144e-06, |
|
"loss": 0.5888, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.231889540095555, |
|
"learning_rate": 4.9362043015600934e-06, |
|
"loss": 0.6437, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.0696023557568233, |
|
"learning_rate": 4.9354176885462626e-06, |
|
"loss": 0.5951, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.10974806039572, |
|
"learning_rate": 4.934626319108923e-06, |
|
"loss": 0.5817, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.0633698321381946, |
|
"learning_rate": 4.933830194793636e-06, |
|
"loss": 0.5692, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.0163693967733423, |
|
"learning_rate": 4.933029317155251e-06, |
|
"loss": 0.5322, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.1118176135699813, |
|
"learning_rate": 4.932223687757899e-06, |
|
"loss": 0.5809, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.181431947183138, |
|
"learning_rate": 4.9314133081749906e-06, |
|
"loss": 0.5444, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.2055197469621386, |
|
"learning_rate": 4.930598179989215e-06, |
|
"loss": 0.6063, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.1103699877035638, |
|
"learning_rate": 4.929778304792537e-06, |
|
"loss": 0.5908, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.01692648335164, |
|
"learning_rate": 4.928953684186189e-06, |
|
"loss": 0.5729, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.990744003423107, |
|
"learning_rate": 4.928124319780673e-06, |
|
"loss": 0.5935, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.9898687560952446, |
|
"learning_rate": 4.9272902131957555e-06, |
|
"loss": 0.6008, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.9499116832570582, |
|
"learning_rate": 4.926451366060465e-06, |
|
"loss": 0.5731, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.8933258467243923, |
|
"learning_rate": 4.925607780013088e-06, |
|
"loss": 0.5822, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.9711936623837691, |
|
"learning_rate": 4.924759456701167e-06, |
|
"loss": 0.5433, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9981254191144715, |
|
"learning_rate": 4.923906397781495e-06, |
|
"loss": 0.5603, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9489584101682442, |
|
"learning_rate": 4.923048604920115e-06, |
|
"loss": 0.592, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.14587896098926, |
|
"learning_rate": 4.922186079792315e-06, |
|
"loss": 0.5861, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.093505234897306, |
|
"learning_rate": 4.921318824082625e-06, |
|
"loss": 0.5756, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9726924068956073, |
|
"learning_rate": 4.920446839484814e-06, |
|
"loss": 0.5954, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.0009011296035886, |
|
"learning_rate": 4.919570127701888e-06, |
|
"loss": 0.5185, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.0801246171281993, |
|
"learning_rate": 4.9186886904460826e-06, |
|
"loss": 0.5788, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.7712602468155096, |
|
"learning_rate": 4.917802529438865e-06, |
|
"loss": 0.6637, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9721040372060654, |
|
"learning_rate": 4.916911646410926e-06, |
|
"loss": 0.5926, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.1199089061376855, |
|
"learning_rate": 4.91601604310218e-06, |
|
"loss": 0.5854, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 1.9518281461372036, |
|
"learning_rate": 4.915115721261759e-06, |
|
"loss": 0.5456, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.1537515435847734, |
|
"learning_rate": 4.9142106826480114e-06, |
|
"loss": 0.6152, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.3461320565666344, |
|
"learning_rate": 4.913300929028498e-06, |
|
"loss": 0.617, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9789785575462193, |
|
"learning_rate": 4.912386462179987e-06, |
|
"loss": 0.5845, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.0705337307209253, |
|
"learning_rate": 4.9114672838884515e-06, |
|
"loss": 0.6062, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9972918925367322, |
|
"learning_rate": 4.910543395949066e-06, |
|
"loss": 0.6318, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.03173534028091, |
|
"learning_rate": 4.9096148001662055e-06, |
|
"loss": 0.64, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.0861416304602356, |
|
"learning_rate": 4.908681498353436e-06, |
|
"loss": 0.5859, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.932510611788884, |
|
"learning_rate": 4.907743492333517e-06, |
|
"loss": 0.5483, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.9618471764126828, |
|
"learning_rate": 4.906800783938395e-06, |
|
"loss": 0.5767, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.3557796360921786, |
|
"learning_rate": 4.905853375009198e-06, |
|
"loss": 0.5934, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.0993364379712784, |
|
"learning_rate": 4.9049012673962385e-06, |
|
"loss": 0.5879, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.2015612636555155, |
|
"learning_rate": 4.903944462959001e-06, |
|
"loss": 0.5598, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.0374544745406062, |
|
"learning_rate": 4.902982963566147e-06, |
|
"loss": 0.577, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.194866218807, |
|
"learning_rate": 4.902016771095506e-06, |
|
"loss": 0.5848, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.2545375351308614, |
|
"learning_rate": 4.901045887434072e-06, |
|
"loss": 0.5846, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.017012770131601, |
|
"learning_rate": 4.900070314478001e-06, |
|
"loss": 0.5651, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.150900326654639, |
|
"learning_rate": 4.899090054132609e-06, |
|
"loss": 0.568, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.0404886979870454, |
|
"learning_rate": 4.898105108312366e-06, |
|
"loss": 0.5277, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.036614254190257, |
|
"learning_rate": 4.897115478940892e-06, |
|
"loss": 0.5754, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.041133008809928, |
|
"learning_rate": 4.896121167950954e-06, |
|
"loss": 0.6294, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.0029503409054885, |
|
"learning_rate": 4.895122177284465e-06, |
|
"loss": 0.5531, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.0303439698174754, |
|
"learning_rate": 4.894118508892474e-06, |
|
"loss": 0.6008, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.899982778272908, |
|
"learning_rate": 4.893110164735167e-06, |
|
"loss": 0.6076, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.170640326694132, |
|
"learning_rate": 4.892097146781862e-06, |
|
"loss": 0.5806, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.961802557992624, |
|
"learning_rate": 4.8910794570110055e-06, |
|
"loss": 0.5456, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.1149182672715807, |
|
"learning_rate": 4.890057097410167e-06, |
|
"loss": 0.5683, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.9988574008443096, |
|
"learning_rate": 4.889030069976038e-06, |
|
"loss": 0.5603, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.137840782586502, |
|
"learning_rate": 4.887998376714424e-06, |
|
"loss": 0.5713, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.2956357234771634, |
|
"learning_rate": 4.886962019640244e-06, |
|
"loss": 0.5635, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.2175517801056346, |
|
"learning_rate": 4.885921000777528e-06, |
|
"loss": 0.631, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.0861966792656546, |
|
"learning_rate": 4.884875322159407e-06, |
|
"loss": 0.5521, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.170862650134145, |
|
"learning_rate": 4.883824985828114e-06, |
|
"loss": 0.5953, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.016871028914906, |
|
"learning_rate": 4.882769993834978e-06, |
|
"loss": 0.5745, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.4069309610367107, |
|
"learning_rate": 4.8817103482404236e-06, |
|
"loss": 0.5752, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.9834780557891722, |
|
"learning_rate": 4.880646051113959e-06, |
|
"loss": 0.5619, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.1221686040256005, |
|
"learning_rate": 4.87957710453418e-06, |
|
"loss": 0.561, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.1497751964139002, |
|
"learning_rate": 4.878503510588764e-06, |
|
"loss": 0.5754, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.8535318318419167, |
|
"learning_rate": 4.877425271374462e-06, |
|
"loss": 0.5551, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.1537345489224404, |
|
"learning_rate": 4.876342388997099e-06, |
|
"loss": 0.544, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.9695512744073471, |
|
"learning_rate": 4.875254865571567e-06, |
|
"loss": 0.6003, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.2550853928957193, |
|
"learning_rate": 4.874162703221823e-06, |
|
"loss": 0.5968, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.0658630166795917, |
|
"learning_rate": 4.873065904080884e-06, |
|
"loss": 0.5658, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.0821280326495524, |
|
"learning_rate": 4.871964470290823e-06, |
|
"loss": 0.5711, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.9833074137024158, |
|
"learning_rate": 4.8708584040027636e-06, |
|
"loss": 0.5899, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.0288963441502195, |
|
"learning_rate": 4.869747707376877e-06, |
|
"loss": 0.5601, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.0970435875726463, |
|
"learning_rate": 4.868632382582378e-06, |
|
"loss": 0.6381, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.1303280408644194, |
|
"learning_rate": 4.86751243179752e-06, |
|
"loss": 0.5495, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.0851781018580584, |
|
"learning_rate": 4.866387857209591e-06, |
|
"loss": 0.5901, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.8310760160854438, |
|
"learning_rate": 4.86525866101491e-06, |
|
"loss": 0.5513, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.199726167537497, |
|
"learning_rate": 4.8641248454188205e-06, |
|
"loss": 0.5873, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9776691221978735, |
|
"learning_rate": 4.862986412635691e-06, |
|
"loss": 0.6143, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.0663231641830873, |
|
"learning_rate": 4.8618433648889034e-06, |
|
"loss": 0.5937, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.170520506577784, |
|
"learning_rate": 4.860695704410856e-06, |
|
"loss": 0.5374, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9685756224067419, |
|
"learning_rate": 4.8595434334429535e-06, |
|
"loss": 0.5139, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9668205539999677, |
|
"learning_rate": 4.8583865542356065e-06, |
|
"loss": 0.5459, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.0793578279258704, |
|
"learning_rate": 4.857225069048226e-06, |
|
"loss": 0.593, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 1.9265474492849337, |
|
"learning_rate": 4.8560589801492165e-06, |
|
"loss": 0.5559, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.8555278122830696, |
|
"learning_rate": 4.854888289815976e-06, |
|
"loss": 0.5949, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.063838630196542, |
|
"learning_rate": 4.853713000334887e-06, |
|
"loss": 0.5712, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.168668910730517, |
|
"learning_rate": 4.852533114001316e-06, |
|
"loss": 0.5475, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.064042820960706, |
|
"learning_rate": 4.8513486331196055e-06, |
|
"loss": 0.5616, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.026751060346143, |
|
"learning_rate": 4.850159560003074e-06, |
|
"loss": 0.5997, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.1228129299875254, |
|
"learning_rate": 4.848965896974006e-06, |
|
"loss": 0.5622, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.9418510365881214, |
|
"learning_rate": 4.847767646363652e-06, |
|
"loss": 0.5741, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.070611833895483, |
|
"learning_rate": 4.846564810512221e-06, |
|
"loss": 0.5729, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.8833621440375596, |
|
"learning_rate": 4.845357391768877e-06, |
|
"loss": 0.5503, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.1022924907055387, |
|
"learning_rate": 4.844145392491735e-06, |
|
"loss": 0.6204, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.024625007813473, |
|
"learning_rate": 4.842928815047856e-06, |
|
"loss": 0.5776, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.9123739071371275, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"loss": 0.5417, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.062879186086598, |
|
"learning_rate": 4.8404819351728336e-06, |
|
"loss": 0.5387, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.9944627549250884, |
|
"learning_rate": 4.8392516375204986e-06, |
|
"loss": 0.5731, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.9859912626846585, |
|
"learning_rate": 4.838016771259037e-06, |
|
"loss": 0.5969, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.043069520519082, |
|
"learning_rate": 4.836777338800168e-06, |
|
"loss": 0.6217, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.913212451622778, |
|
"learning_rate": 4.835533342564531e-06, |
|
"loss": 0.5527, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.978858281238778, |
|
"learning_rate": 4.834284784981678e-06, |
|
"loss": 0.5997, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.004628826916504, |
|
"learning_rate": 4.833031668490067e-06, |
|
"loss": 0.551, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.164370107566024, |
|
"learning_rate": 4.8317739955370645e-06, |
|
"loss": 0.5537, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.891772326146366, |
|
"learning_rate": 4.83051176857893e-06, |
|
"loss": 0.6075, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.0553128913886645, |
|
"learning_rate": 4.8292449900808216e-06, |
|
"loss": 0.5854, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.009000622167072, |
|
"learning_rate": 4.827973662516786e-06, |
|
"loss": 0.5503, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.9385043396652537, |
|
"learning_rate": 4.826697788369752e-06, |
|
"loss": 0.5704, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.3263786060073826, |
|
"learning_rate": 4.8254173701315295e-06, |
|
"loss": 0.5604, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.9251504140774536, |
|
"learning_rate": 4.8241324103028055e-06, |
|
"loss": 0.5647, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.9714117964729747, |
|
"learning_rate": 4.822842911393131e-06, |
|
"loss": 0.604, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.034372279161665, |
|
"learning_rate": 4.821548875920927e-06, |
|
"loss": 0.5803, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.9849114644945505, |
|
"learning_rate": 4.8202503064134725e-06, |
|
"loss": 0.5854, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.3435998455971343, |
|
"learning_rate": 4.818947205406902e-06, |
|
"loss": 0.4988, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.0672779732760924, |
|
"learning_rate": 4.8176395754462e-06, |
|
"loss": 0.5734, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.1206384205127544, |
|
"learning_rate": 4.816327419085197e-06, |
|
"loss": 0.563, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.1105254841893095, |
|
"learning_rate": 4.815010738886561e-06, |
|
"loss": 0.5765, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.072546090747287, |
|
"learning_rate": 4.813689537421798e-06, |
|
"loss": 0.6003, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.1131138426394442, |
|
"learning_rate": 4.812363817271243e-06, |
|
"loss": 0.6097, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.9218545344238502, |
|
"learning_rate": 4.811033581024056e-06, |
|
"loss": 0.6272, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.235420687671868, |
|
"learning_rate": 4.809698831278217e-06, |
|
"loss": 0.5519, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.8915062282224397, |
|
"learning_rate": 4.808359570640522e-06, |
|
"loss": 0.5832, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.9185231023206675, |
|
"learning_rate": 4.8070158017265755e-06, |
|
"loss": 0.5854, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.086526046887808, |
|
"learning_rate": 4.805667527160788e-06, |
|
"loss": 0.5314, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.9995370937944454, |
|
"learning_rate": 4.804314749576368e-06, |
|
"loss": 0.5749, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.099313489806141, |
|
"learning_rate": 4.802957471615319e-06, |
|
"loss": 0.5173, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.067736275086448, |
|
"learning_rate": 4.8015956959284346e-06, |
|
"loss": 0.5434, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.005525416579935, |
|
"learning_rate": 4.800229425175294e-06, |
|
"loss": 0.5589, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.172708847484724, |
|
"learning_rate": 4.7988586620242515e-06, |
|
"loss": 0.5919, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0010542748493823, |
|
"learning_rate": 4.797483409152438e-06, |
|
"loss": 0.5803, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.1169505971764506, |
|
"learning_rate": 4.7961036692457516e-06, |
|
"loss": 0.5763, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.202849419501746, |
|
"learning_rate": 4.794719444998856e-06, |
|
"loss": 0.5691, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9765013761990564, |
|
"learning_rate": 4.793330739115169e-06, |
|
"loss": 0.5657, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0404392238791136, |
|
"learning_rate": 4.791937554306863e-06, |
|
"loss": 0.5648, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0298920886210516, |
|
"learning_rate": 4.790539893294861e-06, |
|
"loss": 0.5353, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.03157486915788, |
|
"learning_rate": 4.789137758808823e-06, |
|
"loss": 0.5716, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.060346338513047, |
|
"learning_rate": 4.787731153587149e-06, |
|
"loss": 0.5502, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9286831590091769, |
|
"learning_rate": 4.786320080376968e-06, |
|
"loss": 0.5646, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.042346254905274, |
|
"learning_rate": 4.7849045419341376e-06, |
|
"loss": 0.6085, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.0758243469708293, |
|
"learning_rate": 4.7834845410232356e-06, |
|
"loss": 0.5452, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.0454965773706553, |
|
"learning_rate": 4.782060080417553e-06, |
|
"loss": 0.514, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.073931876222572, |
|
"learning_rate": 4.780631162899094e-06, |
|
"loss": 0.5884, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9699688248650635, |
|
"learning_rate": 4.7791977912585645e-06, |
|
"loss": 0.529, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9886162974888701, |
|
"learning_rate": 4.7777599682953696e-06, |
|
"loss": 0.5796, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9579685029739566, |
|
"learning_rate": 4.7763176968176106e-06, |
|
"loss": 0.5553, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.2181861411036086, |
|
"learning_rate": 4.7748709796420735e-06, |
|
"loss": 0.5806, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.0345738930041777, |
|
"learning_rate": 4.773419819594228e-06, |
|
"loss": 0.6059, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.0710385535524902, |
|
"learning_rate": 4.7719642195082224e-06, |
|
"loss": 0.5539, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.1239710444371442, |
|
"learning_rate": 4.770504182226875e-06, |
|
"loss": 0.5655, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9564631444382952, |
|
"learning_rate": 4.769039710601669e-06, |
|
"loss": 0.5914, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 1.9969926160116234, |
|
"learning_rate": 4.767570807492752e-06, |
|
"loss": 0.55, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9650736880864492, |
|
"learning_rate": 4.766097475768919e-06, |
|
"loss": 0.5804, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.1946368157969194, |
|
"learning_rate": 4.7646197183076236e-06, |
|
"loss": 0.5631, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9834181085585831, |
|
"learning_rate": 4.763137537994955e-06, |
|
"loss": 0.5779, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.1081651164417057, |
|
"learning_rate": 4.7616509377256445e-06, |
|
"loss": 0.5375, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9972027344990544, |
|
"learning_rate": 4.760159920403055e-06, |
|
"loss": 0.5608, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9554967826543683, |
|
"learning_rate": 4.758664488939174e-06, |
|
"loss": 0.5613, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.211716512822424, |
|
"learning_rate": 4.757164646254614e-06, |
|
"loss": 0.5863, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9203184200502181, |
|
"learning_rate": 4.755660395278598e-06, |
|
"loss": 0.5275, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.0355308159742505, |
|
"learning_rate": 4.7541517389489626e-06, |
|
"loss": 0.5304, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.005680103405306, |
|
"learning_rate": 4.752638680212145e-06, |
|
"loss": 0.5782, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.9930094995522492, |
|
"learning_rate": 4.751121222023183e-06, |
|
"loss": 0.5197, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.130907347619711, |
|
"learning_rate": 4.749599367345703e-06, |
|
"loss": 0.5453, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.0380649677356715, |
|
"learning_rate": 4.748073119151923e-06, |
|
"loss": 0.5394, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.02655053696048, |
|
"learning_rate": 4.7465424804226366e-06, |
|
"loss": 0.5359, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.108255877778432, |
|
"learning_rate": 4.745007454147215e-06, |
|
"loss": 0.5262, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.8422966312136684, |
|
"learning_rate": 4.7434680433235986e-06, |
|
"loss": 0.529, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.1387816386921004, |
|
"learning_rate": 4.741924250958289e-06, |
|
"loss": 0.5599, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.2063774820548794, |
|
"learning_rate": 4.740376080066346e-06, |
|
"loss": 0.6014, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.917696303327652, |
|
"learning_rate": 4.738823533671383e-06, |
|
"loss": 0.615, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.0283765999277916, |
|
"learning_rate": 4.737266614805554e-06, |
|
"loss": 0.5802, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.0340264609590437, |
|
"learning_rate": 4.7357053265095575e-06, |
|
"loss": 0.5331, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.102037194450825, |
|
"learning_rate": 4.734139671832622e-06, |
|
"loss": 0.5534, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.4389875670618113, |
|
"learning_rate": 4.732569653832505e-06, |
|
"loss": 0.5637, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.1143521053252012, |
|
"learning_rate": 4.730995275575486e-06, |
|
"loss": 0.6539, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.6240136232872064, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"loss": 0.5515, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.037602072097695, |
|
"learning_rate": 4.727833450598433e-06, |
|
"loss": 0.5609, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.10711733636797, |
|
"learning_rate": 4.72624601005351e-06, |
|
"loss": 0.5719, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.277613433738313, |
|
"learning_rate": 4.724654221601899e-06, |
|
"loss": 0.5815, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.0082624113337824, |
|
"learning_rate": 4.7230580883523955e-06, |
|
"loss": 0.5524, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.8922591374161477, |
|
"learning_rate": 4.721457613422285e-06, |
|
"loss": 0.5981, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.108229047424278, |
|
"learning_rate": 4.7198527999373266e-06, |
|
"loss": 0.57, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.152965480400126, |
|
"learning_rate": 4.718243651031759e-06, |
|
"loss": 0.5996, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.8885994019827148, |
|
"learning_rate": 4.716630169848282e-06, |
|
"loss": 0.5543, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.221396082747074, |
|
"learning_rate": 4.715012359538062e-06, |
|
"loss": 0.5423, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.247525651087526, |
|
"learning_rate": 4.7133902232607145e-06, |
|
"loss": 0.6049, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.905837742487114, |
|
"learning_rate": 4.711763764184309e-06, |
|
"loss": 0.5523, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.117965067814315, |
|
"learning_rate": 4.710132985485355e-06, |
|
"loss": 0.5682, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.1530948606389373, |
|
"learning_rate": 4.7084978903487985e-06, |
|
"loss": 0.5506, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.8738866858316863, |
|
"learning_rate": 4.706858481968017e-06, |
|
"loss": 0.5426, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.9967053512246618, |
|
"learning_rate": 4.705214763544806e-06, |
|
"loss": 0.5555, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.352080896364055, |
|
"learning_rate": 4.703566738289389e-06, |
|
"loss": 0.587, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.031696719881503, |
|
"learning_rate": 4.701914409420392e-06, |
|
"loss": 0.6088, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.140107830595095, |
|
"learning_rate": 4.700257780164849e-06, |
|
"loss": 0.5596, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.125236417141067, |
|
"learning_rate": 4.698596853758194e-06, |
|
"loss": 0.5513, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.8878623518397697, |
|
"learning_rate": 4.696931633444251e-06, |
|
"loss": 0.5557, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9523463678463824, |
|
"learning_rate": 4.695262122475232e-06, |
|
"loss": 0.5317, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.3748547328434455, |
|
"learning_rate": 4.6935883241117286e-06, |
|
"loss": 0.5733, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9248854873148575, |
|
"learning_rate": 4.691910241622704e-06, |
|
"loss": 0.5523, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.1731794693383923, |
|
"learning_rate": 4.69022787828549e-06, |
|
"loss": 0.6489, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.996570702327501, |
|
"learning_rate": 4.688541237385781e-06, |
|
"loss": 0.584, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.0272036390008097, |
|
"learning_rate": 4.68685032221762e-06, |
|
"loss": 0.554, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9986403184037858, |
|
"learning_rate": 4.685155136083401e-06, |
|
"loss": 0.5798, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.24642442330448, |
|
"learning_rate": 4.683455682293863e-06, |
|
"loss": 0.5486, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.916261956844043, |
|
"learning_rate": 4.681751964168071e-06, |
|
"loss": 0.5678, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.1597492287443396, |
|
"learning_rate": 4.680043985033427e-06, |
|
"loss": 0.5801, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.9634034606261326, |
|
"learning_rate": 4.6783317482256506e-06, |
|
"loss": 0.5412, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.0128604293697263, |
|
"learning_rate": 4.676615257088777e-06, |
|
"loss": 0.5538, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.2205659530523976, |
|
"learning_rate": 4.674894514975149e-06, |
|
"loss": 0.494, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.000557085172021, |
|
"learning_rate": 4.673169525245416e-06, |
|
"loss": 0.5459, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.0089256125274826, |
|
"learning_rate": 4.671440291268518e-06, |
|
"loss": 0.5729, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.076112293053539, |
|
"learning_rate": 4.66970681642169e-06, |
|
"loss": 0.5277, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.996445627957894, |
|
"learning_rate": 4.667969104090441e-06, |
|
"loss": 0.5879, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.379165029211644, |
|
"learning_rate": 4.666227157668564e-06, |
|
"loss": 0.5924, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.101190475222136, |
|
"learning_rate": 4.664480980558118e-06, |
|
"loss": 0.6466, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.035159570620747, |
|
"learning_rate": 4.662730576169423e-06, |
|
"loss": 0.5979, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.1034174780447814, |
|
"learning_rate": 4.660975947921058e-06, |
|
"loss": 0.5635, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.131573174129039, |
|
"learning_rate": 4.65921709923985e-06, |
|
"loss": 0.5602, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 1.9282515780121203, |
|
"learning_rate": 4.657454033560868e-06, |
|
"loss": 0.5292, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.922997066030009, |
|
"learning_rate": 4.655686754327419e-06, |
|
"loss": 0.5475, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9692624098665525, |
|
"learning_rate": 4.653915264991035e-06, |
|
"loss": 0.5529, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.976011234185068, |
|
"learning_rate": 4.652139569011475e-06, |
|
"loss": 0.5439, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.909657950321316, |
|
"learning_rate": 4.650359669856711e-06, |
|
"loss": 0.5558, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9134183734362904, |
|
"learning_rate": 4.648575571002926e-06, |
|
"loss": 0.5428, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.067168876792994, |
|
"learning_rate": 4.646787275934501e-06, |
|
"loss": 0.6261, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9358304010171785, |
|
"learning_rate": 4.644994788144017e-06, |
|
"loss": 0.5698, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9671634072657547, |
|
"learning_rate": 4.643198111132241e-06, |
|
"loss": 0.5345, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.0176052011599133, |
|
"learning_rate": 4.641397248408122e-06, |
|
"loss": 0.5028, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9960700483606102, |
|
"learning_rate": 4.639592203488784e-06, |
|
"loss": 0.5253, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.9329472749401087, |
|
"learning_rate": 4.63778297989952e-06, |
|
"loss": 0.615, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9689526846990402, |
|
"learning_rate": 4.6359695811737805e-06, |
|
"loss": 0.5558, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.043494453339269, |
|
"learning_rate": 4.634152010853175e-06, |
|
"loss": 0.5955, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9251519214200417, |
|
"learning_rate": 4.632330272487455e-06, |
|
"loss": 0.5587, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.2049650629169495, |
|
"learning_rate": 4.6305043696345175e-06, |
|
"loss": 0.5633, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.8971004366601951, |
|
"learning_rate": 4.628674305860389e-06, |
|
"loss": 0.5147, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.958131978242853, |
|
"learning_rate": 4.626840084739224e-06, |
|
"loss": 0.558, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.8809187299789303, |
|
"learning_rate": 4.625001709853296e-06, |
|
"loss": 0.6029, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.07376704403877, |
|
"learning_rate": 4.623159184792992e-06, |
|
"loss": 0.5985, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9773215118384355, |
|
"learning_rate": 4.621312513156801e-06, |
|
"loss": 0.5592, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.2454931529711373, |
|
"learning_rate": 4.6194616985513144e-06, |
|
"loss": 0.5265, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.917266484743525, |
|
"learning_rate": 4.617606744591214e-06, |
|
"loss": 0.5579, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.9196448264725143, |
|
"learning_rate": 4.615747654899263e-06, |
|
"loss": 0.5345, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.9733157447209138, |
|
"learning_rate": 4.613884433106306e-06, |
|
"loss": 0.528, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.994664364309963, |
|
"learning_rate": 4.612017082851253e-06, |
|
"loss": 0.5489, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8266904473141898, |
|
"learning_rate": 4.610145607781081e-06, |
|
"loss": 0.5411, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.0294108873934364, |
|
"learning_rate": 4.608270011550823e-06, |
|
"loss": 0.5963, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.9735002273071562, |
|
"learning_rate": 4.606390297823555e-06, |
|
"loss": 0.5858, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8987568737188125, |
|
"learning_rate": 4.604506470270403e-06, |
|
"loss": 0.493, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.9371998611194052, |
|
"learning_rate": 4.6026185325705195e-06, |
|
"loss": 0.521, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8926221916061328, |
|
"learning_rate": 4.60072648841109e-06, |
|
"loss": 0.4922, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8759546163633927, |
|
"learning_rate": 4.598830341487317e-06, |
|
"loss": 0.5487, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.9425705301229708, |
|
"learning_rate": 4.596930095502416e-06, |
|
"loss": 0.5155, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.8718904454318124, |
|
"learning_rate": 4.59502575416761e-06, |
|
"loss": 0.5372, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.8361742824749525, |
|
"learning_rate": 4.593117321202117e-06, |
|
"loss": 0.556, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.8520540031413573, |
|
"learning_rate": 4.59120480033315e-06, |
|
"loss": 0.6213, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9670746741442957, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"loss": 0.5685, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.969557039139786, |
|
"learning_rate": 4.587367509833543e-06, |
|
"loss": 0.5472, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9873217018861624, |
|
"learning_rate": 4.585442747697218e-06, |
|
"loss": 0.5419, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9508580236237527, |
|
"learning_rate": 4.5835139126460234e-06, |
|
"loss": 0.566, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.8929503262145966, |
|
"learning_rate": 4.58158100844702e-06, |
|
"loss": 0.5526, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9394545018501204, |
|
"learning_rate": 4.57964403887521e-06, |
|
"loss": 0.5469, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 2.1045619298179927, |
|
"learning_rate": 4.577703007713538e-06, |
|
"loss": 0.5397, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.8886665443222683, |
|
"learning_rate": 4.575757918752879e-06, |
|
"loss": 0.5174, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.849256286655662, |
|
"learning_rate": 4.573808775792033e-06, |
|
"loss": 0.558, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.89537230772545, |
|
"learning_rate": 4.5718555826377195e-06, |
|
"loss": 0.6155, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.028600611269796, |
|
"learning_rate": 4.569898343104568e-06, |
|
"loss": 0.5639, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.1153787641168273, |
|
"learning_rate": 4.567937061015107e-06, |
|
"loss": 0.5883, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.0217937777574075, |
|
"learning_rate": 4.5659717401997655e-06, |
|
"loss": 0.5936, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.248716610859176, |
|
"learning_rate": 4.564002384496856e-06, |
|
"loss": 0.5539, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.9689879082294663, |
|
"learning_rate": 4.562028997752574e-06, |
|
"loss": 0.5636, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.763292547062648, |
|
"learning_rate": 4.560051583820987e-06, |
|
"loss": 0.5402, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.129235681815295, |
|
"learning_rate": 4.558070146564025e-06, |
|
"loss": 0.5279, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.987329959970642, |
|
"learning_rate": 4.55608468985148e-06, |
|
"loss": 0.5597, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.8223595251951752, |
|
"learning_rate": 4.554095217560991e-06, |
|
"loss": 0.5523, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.8945373677348296, |
|
"learning_rate": 4.55210173357804e-06, |
|
"loss": 0.5611, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.8010628987468362, |
|
"learning_rate": 4.550104241795946e-06, |
|
"loss": 0.5406, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.7680591979019162, |
|
"learning_rate": 4.548102746115852e-06, |
|
"loss": 0.5392, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9894409183828397, |
|
"learning_rate": 4.546097250446724e-06, |
|
"loss": 0.568, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9527217933389673, |
|
"learning_rate": 4.544087758705338e-06, |
|
"loss": 0.5616, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.8813970745759399, |
|
"learning_rate": 4.5420742748162735e-06, |
|
"loss": 0.5857, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9697471415378363, |
|
"learning_rate": 4.540056802711911e-06, |
|
"loss": 0.5563, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.8610261764458738, |
|
"learning_rate": 4.5380353463324135e-06, |
|
"loss": 0.5414, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.0760585222699075, |
|
"learning_rate": 4.536009909625733e-06, |
|
"loss": 0.6113, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9376608369819073, |
|
"learning_rate": 4.533980496547588e-06, |
|
"loss": 0.5567, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9360208325717025, |
|
"learning_rate": 4.5319471110614676e-06, |
|
"loss": 0.5637, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9103146510774847, |
|
"learning_rate": 4.529909757138619e-06, |
|
"loss": 0.5049, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.9645365532954322, |
|
"learning_rate": 4.5278684387580356e-06, |
|
"loss": 0.5424, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.0430691701895065, |
|
"learning_rate": 4.52582315990646e-06, |
|
"loss": 0.547, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.995685349345533, |
|
"learning_rate": 4.523773924578362e-06, |
|
"loss": 0.6005, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9830544751269077, |
|
"learning_rate": 4.521720736775947e-06, |
|
"loss": 0.5563, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.8473463212841006, |
|
"learning_rate": 4.519663600509131e-06, |
|
"loss": 0.5913, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.8993140839815026, |
|
"learning_rate": 4.5176025197955495e-06, |
|
"loss": 0.5653, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.8179551662772986, |
|
"learning_rate": 4.515537498660535e-06, |
|
"loss": 0.5485, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9275228062086758, |
|
"learning_rate": 4.51346854113712e-06, |
|
"loss": 0.5248, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9668428438048349, |
|
"learning_rate": 4.511395651266023e-06, |
|
"loss": 0.5939, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.9602042152930792, |
|
"learning_rate": 4.509318833095642e-06, |
|
"loss": 0.5452, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.8348566721600683, |
|
"learning_rate": 4.507238090682049e-06, |
|
"loss": 0.5514, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 1.938525142403929, |
|
"learning_rate": 4.505153428088979e-06, |
|
"loss": 0.5822, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.008973560332548, |
|
"learning_rate": 4.503064849387822e-06, |
|
"loss": 0.5765, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.8911779425902009, |
|
"learning_rate": 4.500972358657618e-06, |
|
"loss": 0.5465, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.9224818772820709, |
|
"learning_rate": 4.4988759599850485e-06, |
|
"loss": 0.5897, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.990817812633161, |
|
"learning_rate": 4.496775657464423e-06, |
|
"loss": 0.5505, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.9167562026803746, |
|
"learning_rate": 4.4946714551976795e-06, |
|
"loss": 0.5779, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.9388400892712594, |
|
"learning_rate": 4.492563357294369e-06, |
|
"loss": 0.574, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.0140312788131762, |
|
"learning_rate": 4.490451367871655e-06, |
|
"loss": 0.4928, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.074902721101316, |
|
"learning_rate": 4.488335491054296e-06, |
|
"loss": 0.5366, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.8245504149698855, |
|
"learning_rate": 4.486215730974646e-06, |
|
"loss": 0.581, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.1100306515160656, |
|
"learning_rate": 4.4840920917726425e-06, |
|
"loss": 0.5677, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.9560380000004616, |
|
"learning_rate": 4.4819645775958e-06, |
|
"loss": 0.5426, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.721267171163405, |
|
"learning_rate": 4.479833192599198e-06, |
|
"loss": 0.5868, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0001169229847124, |
|
"learning_rate": 4.477697940945478e-06, |
|
"loss": 0.5667, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0111322894409134, |
|
"learning_rate": 4.475558826804833e-06, |
|
"loss": 0.5707, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.8179588699061133, |
|
"learning_rate": 4.473415854355e-06, |
|
"loss": 0.5484, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0491236128150345, |
|
"learning_rate": 4.47126902778125e-06, |
|
"loss": 0.5575, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.049676347036571, |
|
"learning_rate": 4.469118351276381e-06, |
|
"loss": 0.5807, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.8999028972772445, |
|
"learning_rate": 4.4669638290407115e-06, |
|
"loss": 0.5447, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.0754807768031687, |
|
"learning_rate": 4.464805465282071e-06, |
|
"loss": 0.503, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.9532719169013661, |
|
"learning_rate": 4.462643264215789e-06, |
|
"loss": 0.5304, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.038547881198709, |
|
"learning_rate": 4.460477230064693e-06, |
|
"loss": 0.6116, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.1342568039197136, |
|
"learning_rate": 4.458307367059092e-06, |
|
"loss": 0.5632, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.9267024509918977, |
|
"learning_rate": 4.456133679436778e-06, |
|
"loss": 0.5574, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.795213135692931, |
|
"learning_rate": 4.453956171443008e-06, |
|
"loss": 0.5737, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.9428252328171443, |
|
"learning_rate": 4.451774847330505e-06, |
|
"loss": 0.5685, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.7903749800219122, |
|
"learning_rate": 4.449589711359439e-06, |
|
"loss": 0.5214, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.111615491479605, |
|
"learning_rate": 4.447400767797429e-06, |
|
"loss": 0.5329, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.936578332165912, |
|
"learning_rate": 4.445208020919531e-06, |
|
"loss": 0.543, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.0005145681262473, |
|
"learning_rate": 4.4430114750082246e-06, |
|
"loss": 0.5593, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.9720912009242426, |
|
"learning_rate": 4.4408111343534125e-06, |
|
"loss": 0.5812, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.0486055586452787, |
|
"learning_rate": 4.4386070032524085e-06, |
|
"loss": 0.5563, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.8043262288689983, |
|
"learning_rate": 4.436399086009928e-06, |
|
"loss": 0.4905, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.9608580808640215, |
|
"learning_rate": 4.43418738693808e-06, |
|
"loss": 0.5548, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.008548225584814, |
|
"learning_rate": 4.431971910356363e-06, |
|
"loss": 0.5955, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.8974274240345173, |
|
"learning_rate": 4.429752660591648e-06, |
|
"loss": 0.5742, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.8257689605722616, |
|
"learning_rate": 4.427529641978181e-06, |
|
"loss": 0.6177, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.0327301577551764, |
|
"learning_rate": 4.425302858857563e-06, |
|
"loss": 0.5872, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.9539661576324254, |
|
"learning_rate": 4.42307231557875e-06, |
|
"loss": 0.5728, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.9346302819034207, |
|
"learning_rate": 4.420838016498043e-06, |
|
"loss": 0.6019, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.1255667417446054, |
|
"learning_rate": 4.418599965979074e-06, |
|
"loss": 0.5981, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.8293805714793054, |
|
"learning_rate": 4.416358168392806e-06, |
|
"loss": 0.5497, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.929762647152706, |
|
"learning_rate": 4.414112628117518e-06, |
|
"loss": 0.5655, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.9808758258773635, |
|
"learning_rate": 4.411863349538798e-06, |
|
"loss": 0.5465, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.0413084054198647, |
|
"learning_rate": 4.409610337049537e-06, |
|
"loss": 0.5264, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 1.9506473664088613, |
|
"learning_rate": 4.4073535950499155e-06, |
|
"loss": 0.5284, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.7875399190820846, |
|
"learning_rate": 4.405093127947402e-06, |
|
"loss": 0.5406, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9594159192262046, |
|
"learning_rate": 4.402828940156735e-06, |
|
"loss": 0.573, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.025943836966642, |
|
"learning_rate": 4.400561036099924e-06, |
|
"loss": 0.5227, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9439140060564322, |
|
"learning_rate": 4.398289420206235e-06, |
|
"loss": 0.5802, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.891060025336787, |
|
"learning_rate": 4.396014096912182e-06, |
|
"loss": 0.55, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9575594944193413, |
|
"learning_rate": 4.393735070661521e-06, |
|
"loss": 0.5213, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.024463679893138, |
|
"learning_rate": 4.391452345905239e-06, |
|
"loss": 0.5354, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.825359223217947, |
|
"learning_rate": 4.389165927101549e-06, |
|
"loss": 0.5506, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.0284690208197484, |
|
"learning_rate": 4.386875818715875e-06, |
|
"loss": 0.5763, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 1.9021830177238082, |
|
"learning_rate": 4.3845820252208476e-06, |
|
"loss": 0.5596, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.0000504821060203, |
|
"learning_rate": 4.3822845510962966e-06, |
|
"loss": 0.5701, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.7341340075311633, |
|
"learning_rate": 4.379983400829237e-06, |
|
"loss": 0.5315, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9297447671947465, |
|
"learning_rate": 4.377678578913868e-06, |
|
"loss": 0.5798, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9233069620366818, |
|
"learning_rate": 4.375370089851554e-06, |
|
"loss": 0.5391, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.976671700063146, |
|
"learning_rate": 4.3730579381508254e-06, |
|
"loss": 0.5674, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.914097057045113, |
|
"learning_rate": 4.3707421283273645e-06, |
|
"loss": 0.5367, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.8477362806445459, |
|
"learning_rate": 4.368422664903997e-06, |
|
"loss": 0.5349, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9704477099484594, |
|
"learning_rate": 4.366099552410686e-06, |
|
"loss": 0.501, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9297086500071385, |
|
"learning_rate": 4.363772795384522e-06, |
|
"loss": 0.5352, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9090996748848685, |
|
"learning_rate": 4.36144239836971e-06, |
|
"loss": 0.5457, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.905870882711107, |
|
"learning_rate": 4.3591083659175655e-06, |
|
"loss": 0.5685, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.968618442539214, |
|
"learning_rate": 4.356770702586506e-06, |
|
"loss": 0.5476, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.9431218136805426, |
|
"learning_rate": 4.354429412942038e-06, |
|
"loss": 0.5719, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.0756451350956215, |
|
"learning_rate": 4.3520845015567495e-06, |
|
"loss": 0.5502, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8350117686217275, |
|
"learning_rate": 4.349735973010306e-06, |
|
"loss": 0.5417, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.03495920394236, |
|
"learning_rate": 4.3473838318894324e-06, |
|
"loss": 0.545, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.7864245375307775, |
|
"learning_rate": 4.3450280827879125e-06, |
|
"loss": 0.5242, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.9018530036883652, |
|
"learning_rate": 4.342668730306575e-06, |
|
"loss": 0.554, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8575071370513128, |
|
"learning_rate": 4.340305779053286e-06, |
|
"loss": 0.5287, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8480049595126469, |
|
"learning_rate": 4.33793923364294e-06, |
|
"loss": 0.5554, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.103039565778625, |
|
"learning_rate": 4.335569098697454e-06, |
|
"loss": 0.5526, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.8712145108160219, |
|
"learning_rate": 4.33319537884575e-06, |
|
"loss": 0.5472, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.9271972466285336, |
|
"learning_rate": 4.330818078723756e-06, |
|
"loss": 0.5827, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.954438973741856, |
|
"learning_rate": 4.328437202974389e-06, |
|
"loss": 0.5433, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.0467264178153726, |
|
"learning_rate": 4.326052756247553e-06, |
|
"loss": 0.5981, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.9418055408636266, |
|
"learning_rate": 4.323664743200123e-06, |
|
"loss": 0.5832, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.444044603553196, |
|
"learning_rate": 4.32127316849594e-06, |
|
"loss": 0.5638, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.8791947879326414, |
|
"learning_rate": 4.318878036805802e-06, |
|
"loss": 0.5864, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.872356245946924, |
|
"learning_rate": 4.3164793528074525e-06, |
|
"loss": 0.5337, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.025493213646544, |
|
"learning_rate": 4.3140771211855725e-06, |
|
"loss": 0.5401, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.9845857759145742, |
|
"learning_rate": 4.3116713466317745e-06, |
|
"loss": 0.5712, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.9091874317608197, |
|
"learning_rate": 4.309262033844587e-06, |
|
"loss": 0.5337, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.926646558220673, |
|
"learning_rate": 4.30684918752945e-06, |
|
"loss": 0.5787, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.0450560123448165, |
|
"learning_rate": 4.304432812398704e-06, |
|
"loss": 0.5704, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 1.915800332391142, |
|
"learning_rate": 4.302012913171584e-06, |
|
"loss": 0.5194, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.9050588229807015, |
|
"learning_rate": 4.299589494574204e-06, |
|
"loss": 0.5104, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.9241714112001687, |
|
"learning_rate": 4.297162561339554e-06, |
|
"loss": 0.5388, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8520273210081386, |
|
"learning_rate": 4.294732118207486e-06, |
|
"loss": 0.5363, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.0240180827444205, |
|
"learning_rate": 4.292298169924709e-06, |
|
"loss": 0.5632, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8385436745856445, |
|
"learning_rate": 4.289860721244776e-06, |
|
"loss": 0.542, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.9260618068482396, |
|
"learning_rate": 4.287419776928078e-06, |
|
"loss": 0.5555, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.155290692386073, |
|
"learning_rate": 4.284975341741833e-06, |
|
"loss": 0.5336, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.461077264148098, |
|
"learning_rate": 4.282527420460073e-06, |
|
"loss": 0.5794, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8539810703173831, |
|
"learning_rate": 4.280076017863643e-06, |
|
"loss": 0.5298, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.981150552962984, |
|
"learning_rate": 4.277621138740185e-06, |
|
"loss": 0.5862, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.8768796036679432, |
|
"learning_rate": 4.275162787884132e-06, |
|
"loss": 0.5255, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.022795676637582, |
|
"learning_rate": 4.272700970096696e-06, |
|
"loss": 0.5984, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.835618231704385, |
|
"learning_rate": 4.27023569018586e-06, |
|
"loss": 0.5297, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.853495005213679, |
|
"learning_rate": 4.267766952966369e-06, |
|
"loss": 0.5188, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8841750183665413, |
|
"learning_rate": 4.265294763259721e-06, |
|
"loss": 0.5678, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8013177249236558, |
|
"learning_rate": 4.262819125894156e-06, |
|
"loss": 0.5286, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8320928495052518, |
|
"learning_rate": 4.2603400457046476e-06, |
|
"loss": 0.5341, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.8323864124122828, |
|
"learning_rate": 4.257857527532891e-06, |
|
"loss": 0.5283, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.9487038959665601, |
|
"learning_rate": 4.255371576227301e-06, |
|
"loss": 0.5418, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.7875154296015772, |
|
"learning_rate": 4.252882196642993e-06, |
|
"loss": 0.5065, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.089827238376911, |
|
"learning_rate": 4.250389393641778e-06, |
|
"loss": 0.5919, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.9078348658003164, |
|
"learning_rate": 4.247893172092157e-06, |
|
"loss": 0.5212, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 1.9952457072102052, |
|
"learning_rate": 4.245393536869303e-06, |
|
"loss": 0.5284, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.0728561008210384, |
|
"learning_rate": 4.242890492855056e-06, |
|
"loss": 0.5214, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.97825451090628, |
|
"learning_rate": 4.240384044937919e-06, |
|
"loss": 0.5586, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.85380003580073, |
|
"learning_rate": 4.237874198013037e-06, |
|
"loss": 0.6078, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.8198051628607304, |
|
"learning_rate": 4.235360956982196e-06, |
|
"loss": 0.5677, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.1343351043013183, |
|
"learning_rate": 4.23284432675381e-06, |
|
"loss": 0.5706, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.0294462862804896, |
|
"learning_rate": 4.230324312242911e-06, |
|
"loss": 0.5399, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.9618881336969853, |
|
"learning_rate": 4.227800918371145e-06, |
|
"loss": 0.5292, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.9665398714083597, |
|
"learning_rate": 4.225274150066752e-06, |
|
"loss": 0.5414, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.0976099857689268, |
|
"learning_rate": 4.222744012264567e-06, |
|
"loss": 0.5204, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.968032018982793, |
|
"learning_rate": 4.220210509906002e-06, |
|
"loss": 0.5622, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.0055542027073523, |
|
"learning_rate": 4.217673647939044e-06, |
|
"loss": 0.5723, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.031612125247833, |
|
"learning_rate": 4.215133431318239e-06, |
|
"loss": 0.5727, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.04253552367063, |
|
"learning_rate": 4.212589865004684e-06, |
|
"loss": 0.5676, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9143447724555291, |
|
"learning_rate": 4.2100429539660205e-06, |
|
"loss": 0.5452, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.1284999811605334, |
|
"learning_rate": 4.20749270317642e-06, |
|
"loss": 0.5679, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9726237378545723, |
|
"learning_rate": 4.204939117616578e-06, |
|
"loss": 0.5514, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.0537722291479583, |
|
"learning_rate": 4.202382202273702e-06, |
|
"loss": 0.5979, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9695944675405062, |
|
"learning_rate": 4.1998219621415035e-06, |
|
"loss": 0.5519, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.1175148159531196, |
|
"learning_rate": 4.197258402220187e-06, |
|
"loss": 0.5437, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9698920488340708, |
|
"learning_rate": 4.19469152751644e-06, |
|
"loss": 0.5765, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.879379971551763, |
|
"learning_rate": 4.192121343043424e-06, |
|
"loss": 0.5219, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.9668215341266202, |
|
"learning_rate": 4.189547853820767e-06, |
|
"loss": 0.4967, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.0264415648360723, |
|
"learning_rate": 4.186971064874547e-06, |
|
"loss": 0.5591, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9996711001240413, |
|
"learning_rate": 4.18439098123729e-06, |
|
"loss": 0.5909, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9209919754307736, |
|
"learning_rate": 4.181807607947954e-06, |
|
"loss": 0.5516, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.8120062816345244, |
|
"learning_rate": 4.1792209500519245e-06, |
|
"loss": 0.5112, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9265993932694714, |
|
"learning_rate": 4.176631012601e-06, |
|
"loss": 0.5716, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.7951063568824173, |
|
"learning_rate": 4.1740378006533835e-06, |
|
"loss": 0.5546, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9478736935670538, |
|
"learning_rate": 4.1714413192736756e-06, |
|
"loss": 0.5137, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9166713700159672, |
|
"learning_rate": 4.168841573532859e-06, |
|
"loss": 0.5285, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.903061790874867, |
|
"learning_rate": 4.166238568508294e-06, |
|
"loss": 0.5643, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.8709574261812854, |
|
"learning_rate": 4.1636323092837065e-06, |
|
"loss": 0.5531, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.891374469060374, |
|
"learning_rate": 4.161022800949177e-06, |
|
"loss": 0.5386, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.8621023435008923, |
|
"learning_rate": 4.1584100486011315e-06, |
|
"loss": 0.5472, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8927480615848256, |
|
"learning_rate": 4.155794057342333e-06, |
|
"loss": 0.567, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.9157957155248084, |
|
"learning_rate": 4.153174832281867e-06, |
|
"loss": 0.5295, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.7900976303440275, |
|
"learning_rate": 4.150552378535138e-06, |
|
"loss": 0.5374, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.9233860209522704, |
|
"learning_rate": 4.1479267012238555e-06, |
|
"loss": 0.5673, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.904244620695313, |
|
"learning_rate": 4.145297805476023e-06, |
|
"loss": 0.5674, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8633100020518014, |
|
"learning_rate": 4.142665696425932e-06, |
|
"loss": 0.5717, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.0449274851229764, |
|
"learning_rate": 4.140030379214147e-06, |
|
"loss": 0.5382, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8437126524936716, |
|
"learning_rate": 4.137391858987502e-06, |
|
"loss": 0.5635, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.9476300616110815, |
|
"learning_rate": 4.134750140899082e-06, |
|
"loss": 0.5354, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8187836169409277, |
|
"learning_rate": 4.132105230108221e-06, |
|
"loss": 0.5678, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8325255303792565, |
|
"learning_rate": 4.1294571317804854e-06, |
|
"loss": 0.5497, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.947073088948294, |
|
"learning_rate": 4.12680585108767e-06, |
|
"loss": 0.6005, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.9094602677105208, |
|
"learning_rate": 4.1241513932077835e-06, |
|
"loss": 0.5442, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.9308069577521967, |
|
"learning_rate": 4.121493763325039e-06, |
|
"loss": 0.4952, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.955225453108231, |
|
"learning_rate": 4.118832966629847e-06, |
|
"loss": 0.5161, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.8884686835300686, |
|
"learning_rate": 4.116169008318798e-06, |
|
"loss": 0.5834, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.851971220446282, |
|
"learning_rate": 4.113501893594662e-06, |
|
"loss": 0.5762, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.982231343732386, |
|
"learning_rate": 4.110831627666372e-06, |
|
"loss": 0.5043, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.8783480932058496, |
|
"learning_rate": 4.108158215749014e-06, |
|
"loss": 0.5202, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.7472053862830499, |
|
"learning_rate": 4.105481663063821e-06, |
|
"loss": 0.5064, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.71435326799849, |
|
"learning_rate": 4.102801974838158e-06, |
|
"loss": 0.5808, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.9383972995582568, |
|
"learning_rate": 4.100119156305514e-06, |
|
"loss": 0.5268, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.7165619283230378, |
|
"learning_rate": 4.097433212705492e-06, |
|
"loss": 0.5376, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.8524888535442023, |
|
"learning_rate": 4.094744149283796e-06, |
|
"loss": 0.5388, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.958121956311822, |
|
"learning_rate": 4.092051971292228e-06, |
|
"loss": 0.5273, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.8752806971174674, |
|
"learning_rate": 4.089356683988668e-06, |
|
"loss": 0.5283, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.4399117721583465, |
|
"learning_rate": 4.086658292637072e-06, |
|
"loss": 0.5643, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.897865148445396, |
|
"learning_rate": 4.083956802507456e-06, |
|
"loss": 0.5432, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.0947253224544826, |
|
"learning_rate": 4.0812522188758874e-06, |
|
"loss": 0.6738, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.8801252766945993, |
|
"learning_rate": 4.078544547024479e-06, |
|
"loss": 0.5516, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.884681207915535, |
|
"learning_rate": 4.075833792241371e-06, |
|
"loss": 0.5521, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.911314829964074, |
|
"learning_rate": 4.073119959820728e-06, |
|
"loss": 0.5279, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.860637117587055, |
|
"learning_rate": 4.070403055062721e-06, |
|
"loss": 0.5543, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.0453601596603157, |
|
"learning_rate": 4.0676830832735245e-06, |
|
"loss": 0.5757, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8114060321351384, |
|
"learning_rate": 4.064960049765304e-06, |
|
"loss": 0.5049, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.959305167631277, |
|
"learning_rate": 4.062233959856202e-06, |
|
"loss": 0.5378, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8509512649844786, |
|
"learning_rate": 4.059504818870332e-06, |
|
"loss": 0.5695, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.0120311393374677, |
|
"learning_rate": 4.056772632137762e-06, |
|
"loss": 0.5548, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.185006431209757, |
|
"learning_rate": 4.054037404994516e-06, |
|
"loss": 0.5796, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8639659087725635, |
|
"learning_rate": 4.05129914278255e-06, |
|
"loss": 0.503, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.0128366658538726, |
|
"learning_rate": 4.048557850849749e-06, |
|
"loss": 0.5543, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.0493127075126467, |
|
"learning_rate": 4.045813534549917e-06, |
|
"loss": 0.5971, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8943877873256292, |
|
"learning_rate": 4.043066199242762e-06, |
|
"loss": 0.5512, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.8607643797927613, |
|
"learning_rate": 4.04031585029389e-06, |
|
"loss": 0.5755, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 1.933467010931308, |
|
"learning_rate": 4.037562493074792e-06, |
|
"loss": 0.546, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.870898209604796, |
|
"learning_rate": 4.034806132962834e-06, |
|
"loss": 0.5101, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.7765005525064146, |
|
"learning_rate": 4.032046775341247e-06, |
|
"loss": 0.535, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.808388020113739, |
|
"learning_rate": 4.029284425599116e-06, |
|
"loss": 0.5532, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.9444426383785842, |
|
"learning_rate": 4.026519089131371e-06, |
|
"loss": 0.5804, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8810929458792174, |
|
"learning_rate": 4.023750771338774e-06, |
|
"loss": 0.5023, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.7587173598023012, |
|
"learning_rate": 4.020979477627907e-06, |
|
"loss": 0.588, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8616544736960938, |
|
"learning_rate": 4.018205213411169e-06, |
|
"loss": 0.5604, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8517363531329913, |
|
"learning_rate": 4.015427984106759e-06, |
|
"loss": 0.5503, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.7164279131663547, |
|
"learning_rate": 4.012647795138664e-06, |
|
"loss": 0.5353, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.8490922932257532, |
|
"learning_rate": 4.009864651936653e-06, |
|
"loss": 0.5527, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 1.9222471762582807, |
|
"learning_rate": 4.007078559936268e-06, |
|
"loss": 0.5449, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.7126406752680576, |
|
"learning_rate": 4.0042895245788035e-06, |
|
"loss": 0.5102, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.7999692875631594, |
|
"learning_rate": 4.001497551311308e-06, |
|
"loss": 0.514, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8482521644616647, |
|
"learning_rate": 3.998702645586565e-06, |
|
"loss": 0.546, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8124842120343776, |
|
"learning_rate": 3.995904812863086e-06, |
|
"loss": 0.5432, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.9053654350943952, |
|
"learning_rate": 3.993104058605099e-06, |
|
"loss": 0.6222, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.851530834120678, |
|
"learning_rate": 3.9903003882825396e-06, |
|
"loss": 0.5069, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.824612938648448, |
|
"learning_rate": 3.987493807371033e-06, |
|
"loss": 0.5279, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8322983038942529, |
|
"learning_rate": 3.984684321351895e-06, |
|
"loss": 0.504, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.1601679247075105, |
|
"learning_rate": 3.981871935712112e-06, |
|
"loss": 0.5448, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.9324323412240167, |
|
"learning_rate": 3.979056655944335e-06, |
|
"loss": 0.5696, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8887222870071794, |
|
"learning_rate": 3.9762384875468645e-06, |
|
"loss": 0.5147, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.9025483031058836, |
|
"learning_rate": 3.973417436023646e-06, |
|
"loss": 0.5322, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.944754689874286, |
|
"learning_rate": 3.970593506884254e-06, |
|
"loss": 0.564, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.8782062559948918, |
|
"learning_rate": 3.9677667056438824e-06, |
|
"loss": 0.5179, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.7615090001622373, |
|
"learning_rate": 3.964937037823337e-06, |
|
"loss": 0.52, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.877979446527034, |
|
"learning_rate": 3.962104508949018e-06, |
|
"loss": 0.5611, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.8668900126580097, |
|
"learning_rate": 3.9592691245529174e-06, |
|
"loss": 0.5398, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.0467424748632395, |
|
"learning_rate": 3.9564308901726016e-06, |
|
"loss": 0.5429, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.7523480652481473, |
|
"learning_rate": 3.9535898113512046e-06, |
|
"loss": 0.5456, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.9384307177445268, |
|
"learning_rate": 3.950745893637414e-06, |
|
"loss": 0.5298, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.0200307543606266, |
|
"learning_rate": 3.947899142585464e-06, |
|
"loss": 0.5813, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.8825594318661294, |
|
"learning_rate": 3.945049563755119e-06, |
|
"loss": 0.5843, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 1.801304483173922, |
|
"learning_rate": 3.94219716271167e-06, |
|
"loss": 0.5332, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.789336412692842, |
|
"learning_rate": 3.939341945025918e-06, |
|
"loss": 0.5712, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.6764596672056864, |
|
"learning_rate": 3.936483916274163e-06, |
|
"loss": 0.5471, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8160991340297739, |
|
"learning_rate": 3.933623082038199e-06, |
|
"loss": 0.5172, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.9958719154660882, |
|
"learning_rate": 3.930759447905298e-06, |
|
"loss": 0.5243, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.7844190098902166, |
|
"learning_rate": 3.927893019468196e-06, |
|
"loss": 0.5679, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8231700761644845, |
|
"learning_rate": 3.925023802325094e-06, |
|
"loss": 0.5415, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8577751348591511, |
|
"learning_rate": 3.922151802079633e-06, |
|
"loss": 0.5451, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.872268020286279, |
|
"learning_rate": 3.919277024340891e-06, |
|
"loss": 0.5805, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.956916033214976, |
|
"learning_rate": 3.916399474723373e-06, |
|
"loss": 0.5142, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8690696320721123, |
|
"learning_rate": 3.913519158846994e-06, |
|
"loss": 0.5377, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.8932224298053513, |
|
"learning_rate": 3.910636082337076e-06, |
|
"loss": 0.5174, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.7671002724508906, |
|
"learning_rate": 3.907750250824327e-06, |
|
"loss": 0.5227, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8537234882936333, |
|
"learning_rate": 3.904861669944839e-06, |
|
"loss": 0.5672, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8993796687475375, |
|
"learning_rate": 3.901970345340075e-06, |
|
"loss": 0.5131, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8118617206389966, |
|
"learning_rate": 3.899076282656853e-06, |
|
"loss": 0.5243, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.8195324114535576, |
|
"learning_rate": 3.89617948754734e-06, |
|
"loss": 0.5255, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.777076552111516, |
|
"learning_rate": 3.89327996566904e-06, |
|
"loss": 0.5482, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.7960584295638569, |
|
"learning_rate": 3.890377722684782e-06, |
|
"loss": 0.5232, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.0180517293259777, |
|
"learning_rate": 3.887472764262709e-06, |
|
"loss": 0.4988, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.7698597985590767, |
|
"learning_rate": 3.884565096076269e-06, |
|
"loss": 0.4934, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.9593013419554524, |
|
"learning_rate": 3.8816547238042e-06, |
|
"loss": 0.554, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 1.803176799671639, |
|
"learning_rate": 3.878741653130521e-06, |
|
"loss": 0.5058, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.8739139669777212, |
|
"learning_rate": 3.875825889744525e-06, |
|
"loss": 0.5291, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7425957572489872, |
|
"learning_rate": 3.872907439340758e-06, |
|
"loss": 0.5132, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7880023308134785, |
|
"learning_rate": 3.86998630761902e-06, |
|
"loss": 0.5388, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.035324802689225, |
|
"learning_rate": 3.867062500284342e-06, |
|
"loss": 0.5225, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7720228048563502, |
|
"learning_rate": 3.864136023046984e-06, |
|
"loss": 0.5535, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.893636721431615, |
|
"learning_rate": 3.861206881622419e-06, |
|
"loss": 0.5445, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.9975882991420841, |
|
"learning_rate": 3.8582750817313245e-06, |
|
"loss": 0.498, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.8894358056153195, |
|
"learning_rate": 3.855340629099568e-06, |
|
"loss": 0.5262, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.8226831631189866, |
|
"learning_rate": 3.852403529458199e-06, |
|
"loss": 0.5289, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.9219589460322386, |
|
"learning_rate": 3.84946378854344e-06, |
|
"loss": 0.5828, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.9524000874112546, |
|
"learning_rate": 3.846521412096665e-06, |
|
"loss": 0.5755, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.7855988589662195, |
|
"learning_rate": 3.8435764058643994e-06, |
|
"loss": 0.508, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.7556968697529176, |
|
"learning_rate": 3.840628775598306e-06, |
|
"loss": 0.5038, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.8615629845007688, |
|
"learning_rate": 3.837678527055168e-06, |
|
"loss": 0.5658, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.355106616980178, |
|
"learning_rate": 3.834725665996889e-06, |
|
"loss": 0.6255, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.057901705133853, |
|
"learning_rate": 3.8317701981904655e-06, |
|
"loss": 0.5009, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.8144866213511652, |
|
"learning_rate": 3.828812129407994e-06, |
|
"loss": 0.5378, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.895740992214761, |
|
"learning_rate": 3.825851465426643e-06, |
|
"loss": 0.5414, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.7690202691648218, |
|
"learning_rate": 3.822888212028658e-06, |
|
"loss": 0.5782, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.9910212850942313, |
|
"learning_rate": 3.819922375001334e-06, |
|
"loss": 0.538, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.022977401775343, |
|
"learning_rate": 3.816953960137017e-06, |
|
"loss": 0.5265, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.18942238369997, |
|
"learning_rate": 3.8139829732330833e-06, |
|
"loss": 0.5419, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.0143145051916487, |
|
"learning_rate": 3.8110094200919356e-06, |
|
"loss": 0.5396, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8684895296380082, |
|
"learning_rate": 3.8080333065209885e-06, |
|
"loss": 0.5285, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.899758991227905, |
|
"learning_rate": 3.8050546383326546e-06, |
|
"loss": 0.5392, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.7830347822365242, |
|
"learning_rate": 3.8020734213443392e-06, |
|
"loss": 0.5395, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.9688219937316351, |
|
"learning_rate": 3.799089661378423e-06, |
|
"loss": 0.5832, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8380061964557934, |
|
"learning_rate": 3.7961033642622536e-06, |
|
"loss": 0.5182, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.9752769027783192, |
|
"learning_rate": 3.793114535828134e-06, |
|
"loss": 0.5189, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.9908258845677271, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"loss": 0.5863, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8419144313470388, |
|
"learning_rate": 3.787129308359963e-06, |
|
"loss": 0.5596, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8578409208981632, |
|
"learning_rate": 3.7841329210151905e-06, |
|
"loss": 0.5757, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8125362585272666, |
|
"learning_rate": 3.7811340257310036e-06, |
|
"loss": 0.5625, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 1.8266843142853604, |
|
"learning_rate": 3.778132628364309e-06, |
|
"loss": 0.5121, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.9286747700189457, |
|
"learning_rate": 3.7751287347769006e-06, |
|
"loss": 0.5856, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.8358169963837994, |
|
"learning_rate": 3.772122350835447e-06, |
|
"loss": 0.5363, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.8751145280860322, |
|
"learning_rate": 3.769113482411483e-06, |
|
"loss": 0.5435, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 1.7372022137266947, |
|
"learning_rate": 3.766102135381393e-06, |
|
"loss": 0.5114, |
|
"step": 754 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2258, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 377, |
|
"total_flos": 355094809804800.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|