|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9937952430196484, |
|
"eval_steps": 500, |
|
"global_step": 482, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004136504653567736, |
|
"grad_norm": 8.454320907592773, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.4689, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.008273009307135471, |
|
"grad_norm": 8.755942344665527, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.4625, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.012409513960703205, |
|
"grad_norm": 13.382512092590332, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.4319, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.016546018614270942, |
|
"grad_norm": 11.072649955749512, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.471, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.020682523267838676, |
|
"grad_norm": 4.4571709632873535, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.4341, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02481902792140641, |
|
"grad_norm": 4.237286567687988, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.4637, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.028955532574974147, |
|
"grad_norm": 3.21901535987854, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.4598, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.033092037228541885, |
|
"grad_norm": 2.7905218601226807, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.4174, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03722854188210962, |
|
"grad_norm": 2.5547449588775635, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.4488, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04136504653567735, |
|
"grad_norm": 2.075817584991455, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.4323, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045501551189245086, |
|
"grad_norm": 1.1852331161499023, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.404, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04963805584281282, |
|
"grad_norm": 1.370549201965332, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.3687, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05377456049638056, |
|
"grad_norm": 0.7430139780044556, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.3812, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.057911065149948295, |
|
"grad_norm": 0.8032243251800537, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.373, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06204756980351603, |
|
"grad_norm": 2.7111802101135254, |
|
"learning_rate": 6e-06, |
|
"loss": 0.371, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.06618407445708377, |
|
"grad_norm": 0.8430923819541931, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.3891, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0703205791106515, |
|
"grad_norm": 0.6954956650733948, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.376, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.07445708376421924, |
|
"grad_norm": 0.7058322429656982, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.3958, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07859358841778696, |
|
"grad_norm": 0.5975633859634399, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.3674, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0827300930713547, |
|
"grad_norm": 0.6905612945556641, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.3925, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08686659772492245, |
|
"grad_norm": 0.6662179827690125, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.3837, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09100310237849017, |
|
"grad_norm": 0.9616004824638367, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.3805, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.09513960703205791, |
|
"grad_norm": 1.6762669086456299, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.3661, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.09927611168562564, |
|
"grad_norm": 3.642876148223877, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.3723, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.10341261633919338, |
|
"grad_norm": 22.331893920898438, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4012, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.10754912099276112, |
|
"grad_norm": 4.078958034515381, |
|
"learning_rate": 9.999881857639567e-06, |
|
"loss": 0.4019, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11168562564632885, |
|
"grad_norm": 2.163355827331543, |
|
"learning_rate": 9.999527436141312e-06, |
|
"loss": 0.4275, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.11582213029989659, |
|
"grad_norm": 1.0988469123840332, |
|
"learning_rate": 9.998936752254111e-06, |
|
"loss": 0.3885, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.11995863495346432, |
|
"grad_norm": 1.299137830734253, |
|
"learning_rate": 9.998109833891883e-06, |
|
"loss": 0.388, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.12409513960703206, |
|
"grad_norm": 5.283950328826904, |
|
"learning_rate": 9.997046720132262e-06, |
|
"loss": 0.4219, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1282316442605998, |
|
"grad_norm": 0.8062543869018555, |
|
"learning_rate": 9.995747461214752e-06, |
|
"loss": 0.3589, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.13236814891416754, |
|
"grad_norm": 0.7722073793411255, |
|
"learning_rate": 9.994212118538364e-06, |
|
"loss": 0.3486, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.13650465356773525, |
|
"grad_norm": 0.762801468372345, |
|
"learning_rate": 9.992440764658697e-06, |
|
"loss": 0.3676, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.140641158221303, |
|
"grad_norm": 0.745180606842041, |
|
"learning_rate": 9.990433483284527e-06, |
|
"loss": 0.4115, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.14477766287487073, |
|
"grad_norm": 0.8027282953262329, |
|
"learning_rate": 9.988190369273834e-06, |
|
"loss": 0.4001, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.14891416752843847, |
|
"grad_norm": 0.6682556867599487, |
|
"learning_rate": 9.985711528629332e-06, |
|
"loss": 0.3637, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.15305067218200621, |
|
"grad_norm": 0.6948946714401245, |
|
"learning_rate": 9.982997078493457e-06, |
|
"loss": 0.3488, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.15718717683557393, |
|
"grad_norm": 0.7381304502487183, |
|
"learning_rate": 9.980047147142824e-06, |
|
"loss": 0.3777, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.16132368148914167, |
|
"grad_norm": 0.6775998473167419, |
|
"learning_rate": 9.976861873982177e-06, |
|
"loss": 0.3904, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1654601861427094, |
|
"grad_norm": 0.8822210431098938, |
|
"learning_rate": 9.973441409537795e-06, |
|
"loss": 0.383, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.16959669079627715, |
|
"grad_norm": 0.657383382320404, |
|
"learning_rate": 9.969785915450368e-06, |
|
"loss": 0.3882, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1737331954498449, |
|
"grad_norm": 0.6214635372161865, |
|
"learning_rate": 9.965895564467381e-06, |
|
"loss": 0.3922, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1778697001034126, |
|
"grad_norm": 0.6440220475196838, |
|
"learning_rate": 9.961770540434931e-06, |
|
"loss": 0.3796, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.18200620475698034, |
|
"grad_norm": 0.6130467653274536, |
|
"learning_rate": 9.95741103828905e-06, |
|
"loss": 0.3562, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.18614270941054809, |
|
"grad_norm": 0.6214406490325928, |
|
"learning_rate": 9.952817264046486e-06, |
|
"loss": 0.396, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19027921406411583, |
|
"grad_norm": 0.5837990641593933, |
|
"learning_rate": 9.947989434794973e-06, |
|
"loss": 0.3455, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.19441571871768357, |
|
"grad_norm": 0.6432331800460815, |
|
"learning_rate": 9.942927778682968e-06, |
|
"loss": 0.3791, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.19855222337125128, |
|
"grad_norm": 0.6208926439285278, |
|
"learning_rate": 9.937632534908872e-06, |
|
"loss": 0.4059, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.20268872802481902, |
|
"grad_norm": 0.6121624112129211, |
|
"learning_rate": 9.932103953709724e-06, |
|
"loss": 0.3693, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.20682523267838676, |
|
"grad_norm": 0.5415109395980835, |
|
"learning_rate": 9.926342296349378e-06, |
|
"loss": 0.3192, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2109617373319545, |
|
"grad_norm": 0.5551713109016418, |
|
"learning_rate": 9.920347835106152e-06, |
|
"loss": 0.3563, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.21509824198552224, |
|
"grad_norm": 0.6338883638381958, |
|
"learning_rate": 9.914120853259968e-06, |
|
"loss": 0.3917, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.21923474663908996, |
|
"grad_norm": 0.6104925274848938, |
|
"learning_rate": 9.90766164507896e-06, |
|
"loss": 0.3983, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.2233712512926577, |
|
"grad_norm": 0.592183530330658, |
|
"learning_rate": 9.900970515805564e-06, |
|
"loss": 0.341, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.22750775594622544, |
|
"grad_norm": 0.513282060623169, |
|
"learning_rate": 9.89404778164211e-06, |
|
"loss": 0.3581, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.23164426059979318, |
|
"grad_norm": 0.5831045508384705, |
|
"learning_rate": 9.886893769735852e-06, |
|
"loss": 0.3561, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.23578076525336092, |
|
"grad_norm": 0.5578728914260864, |
|
"learning_rate": 9.879508818163536e-06, |
|
"loss": 0.3615, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.23991726990692863, |
|
"grad_norm": 0.6278296709060669, |
|
"learning_rate": 9.871893275915408e-06, |
|
"loss": 0.3675, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.24405377456049637, |
|
"grad_norm": 0.7174540758132935, |
|
"learning_rate": 9.864047502878717e-06, |
|
"loss": 0.3633, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.2481902792140641, |
|
"grad_norm": 0.5807657837867737, |
|
"learning_rate": 9.855971869820726e-06, |
|
"loss": 0.3567, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.25232678386763185, |
|
"grad_norm": 0.5903241038322449, |
|
"learning_rate": 9.847666758371175e-06, |
|
"loss": 0.3864, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.2564632885211996, |
|
"grad_norm": 0.5542154312133789, |
|
"learning_rate": 9.83913256100425e-06, |
|
"loss": 0.3763, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.26059979317476734, |
|
"grad_norm": 0.6011348962783813, |
|
"learning_rate": 9.830369681020043e-06, |
|
"loss": 0.363, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.2647362978283351, |
|
"grad_norm": 0.5147583484649658, |
|
"learning_rate": 9.821378532525479e-06, |
|
"loss": 0.3634, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.2688728024819028, |
|
"grad_norm": 0.5299031734466553, |
|
"learning_rate": 9.812159540414766e-06, |
|
"loss": 0.3703, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.2730093071354705, |
|
"grad_norm": 0.525841474533081, |
|
"learning_rate": 9.802713140349294e-06, |
|
"loss": 0.3592, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.27714581178903824, |
|
"grad_norm": 0.47922268509864807, |
|
"learning_rate": 9.79303977873707e-06, |
|
"loss": 0.3484, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.281282316442606, |
|
"grad_norm": 0.5722537636756897, |
|
"learning_rate": 9.783139912711597e-06, |
|
"loss": 0.3435, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2854188210961737, |
|
"grad_norm": 0.603398859500885, |
|
"learning_rate": 9.773014010110298e-06, |
|
"loss": 0.3995, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.28955532574974147, |
|
"grad_norm": 0.529694139957428, |
|
"learning_rate": 9.76266254945238e-06, |
|
"loss": 0.3934, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2936918304033092, |
|
"grad_norm": 0.5173729062080383, |
|
"learning_rate": 9.752086019916246e-06, |
|
"loss": 0.3618, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.29782833505687695, |
|
"grad_norm": 0.5737492442131042, |
|
"learning_rate": 9.74128492131636e-06, |
|
"loss": 0.377, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.3019648397104447, |
|
"grad_norm": 0.6340614557266235, |
|
"learning_rate": 9.730259764079636e-06, |
|
"loss": 0.3887, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.30610134436401243, |
|
"grad_norm": 0.5502659678459167, |
|
"learning_rate": 9.719011069221316e-06, |
|
"loss": 0.3749, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.31023784901758017, |
|
"grad_norm": 0.48107632994651794, |
|
"learning_rate": 9.70753936832034e-06, |
|
"loss": 0.3445, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.31437435367114785, |
|
"grad_norm": 0.47837021946907043, |
|
"learning_rate": 9.695845203494242e-06, |
|
"loss": 0.3566, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.3185108583247156, |
|
"grad_norm": 0.5170641541481018, |
|
"learning_rate": 9.683929127373514e-06, |
|
"loss": 0.3878, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.32264736297828334, |
|
"grad_norm": 0.5370326638221741, |
|
"learning_rate": 9.671791703075502e-06, |
|
"loss": 0.3545, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.3267838676318511, |
|
"grad_norm": 0.5362874865531921, |
|
"learning_rate": 9.659433504177786e-06, |
|
"loss": 0.3947, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.3309203722854188, |
|
"grad_norm": 0.5446822047233582, |
|
"learning_rate": 9.646855114691081e-06, |
|
"loss": 0.3777, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.33505687693898656, |
|
"grad_norm": 0.5000081658363342, |
|
"learning_rate": 9.63405712903164e-06, |
|
"loss": 0.3713, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.3391933815925543, |
|
"grad_norm": 0.4431915581226349, |
|
"learning_rate": 9.621040151993153e-06, |
|
"loss": 0.3508, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.34332988624612204, |
|
"grad_norm": 0.51210618019104, |
|
"learning_rate": 9.607804798718182e-06, |
|
"loss": 0.3702, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.3474663908996898, |
|
"grad_norm": 0.49018731713294983, |
|
"learning_rate": 9.59435169466907e-06, |
|
"loss": 0.3796, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.3516028955532575, |
|
"grad_norm": 0.5700220465660095, |
|
"learning_rate": 9.580681475598413e-06, |
|
"loss": 0.3882, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.3557394002068252, |
|
"grad_norm": 0.48753252625465393, |
|
"learning_rate": 9.566794787518986e-06, |
|
"loss": 0.3773, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.35987590486039295, |
|
"grad_norm": 0.47645437717437744, |
|
"learning_rate": 9.552692286673231e-06, |
|
"loss": 0.3478, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.3640124095139607, |
|
"grad_norm": 0.4645499587059021, |
|
"learning_rate": 9.538374639502247e-06, |
|
"loss": 0.3523, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.36814891416752843, |
|
"grad_norm": 0.4936198890209198, |
|
"learning_rate": 9.523842522614285e-06, |
|
"loss": 0.3233, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.37228541882109617, |
|
"grad_norm": 0.47896862030029297, |
|
"learning_rate": 9.509096622752781e-06, |
|
"loss": 0.3583, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.3764219234746639, |
|
"grad_norm": 0.4804452955722809, |
|
"learning_rate": 9.4941376367639e-06, |
|
"loss": 0.3441, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.38055842812823165, |
|
"grad_norm": 0.47014203667640686, |
|
"learning_rate": 9.478966271563614e-06, |
|
"loss": 0.3406, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.3846949327817994, |
|
"grad_norm": 0.5452392101287842, |
|
"learning_rate": 9.463583244104274e-06, |
|
"loss": 0.3658, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.38883143743536713, |
|
"grad_norm": 0.49594131112098694, |
|
"learning_rate": 9.447989281340753e-06, |
|
"loss": 0.3644, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.3929679420889349, |
|
"grad_norm": 0.48177802562713623, |
|
"learning_rate": 9.43218512019608e-06, |
|
"loss": 0.364, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.39710444674250256, |
|
"grad_norm": 0.4789188504219055, |
|
"learning_rate": 9.416171507526615e-06, |
|
"loss": 0.3724, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.4012409513960703, |
|
"grad_norm": 0.5925107598304749, |
|
"learning_rate": 9.399949200086757e-06, |
|
"loss": 0.3799, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.40537745604963804, |
|
"grad_norm": 0.540553092956543, |
|
"learning_rate": 9.383518964493183e-06, |
|
"loss": 0.3913, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.4095139607032058, |
|
"grad_norm": 0.5033954977989197, |
|
"learning_rate": 9.36688157718862e-06, |
|
"loss": 0.3882, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4136504653567735, |
|
"grad_norm": 0.4835229218006134, |
|
"learning_rate": 9.350037824405151e-06, |
|
"loss": 0.357, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41778697001034126, |
|
"grad_norm": 0.5028110146522522, |
|
"learning_rate": 9.332988502127063e-06, |
|
"loss": 0.3395, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.421923474663909, |
|
"grad_norm": 0.6103828549385071, |
|
"learning_rate": 9.315734416053223e-06, |
|
"loss": 0.3832, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.42605997931747674, |
|
"grad_norm": 0.4925767481327057, |
|
"learning_rate": 9.298276381559015e-06, |
|
"loss": 0.3414, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.4301964839710445, |
|
"grad_norm": 0.5328059792518616, |
|
"learning_rate": 9.280615223657801e-06, |
|
"loss": 0.3887, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.4343329886246122, |
|
"grad_norm": 0.5046906471252441, |
|
"learning_rate": 9.262751776961936e-06, |
|
"loss": 0.3608, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.4384694932781799, |
|
"grad_norm": 0.4689864218235016, |
|
"learning_rate": 9.24468688564332e-06, |
|
"loss": 0.3734, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.44260599793174765, |
|
"grad_norm": 0.46193334460258484, |
|
"learning_rate": 9.226421403393513e-06, |
|
"loss": 0.3557, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.4467425025853154, |
|
"grad_norm": 0.518205463886261, |
|
"learning_rate": 9.207956193383392e-06, |
|
"loss": 0.3293, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.45087900723888313, |
|
"grad_norm": 0.5061272978782654, |
|
"learning_rate": 9.189292128222355e-06, |
|
"loss": 0.3477, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.4550155118924509, |
|
"grad_norm": 0.46607810258865356, |
|
"learning_rate": 9.170430089917089e-06, |
|
"loss": 0.3978, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4591520165460186, |
|
"grad_norm": 0.4538101851940155, |
|
"learning_rate": 9.151370969829883e-06, |
|
"loss": 0.3525, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.46328852119958636, |
|
"grad_norm": 0.4456521272659302, |
|
"learning_rate": 9.132115668636512e-06, |
|
"loss": 0.3575, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.4674250258531541, |
|
"grad_norm": 0.5219409465789795, |
|
"learning_rate": 9.112665096283668e-06, |
|
"loss": 0.3703, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.47156153050672184, |
|
"grad_norm": 0.5195448398590088, |
|
"learning_rate": 9.093020171945966e-06, |
|
"loss": 0.3651, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.4756980351602896, |
|
"grad_norm": 0.5239256620407104, |
|
"learning_rate": 9.073181823982495e-06, |
|
"loss": 0.3555, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.47983453981385726, |
|
"grad_norm": 0.4262794852256775, |
|
"learning_rate": 9.05315098989296e-06, |
|
"loss": 0.3303, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.483971044467425, |
|
"grad_norm": 0.4619412422180176, |
|
"learning_rate": 9.032928616273369e-06, |
|
"loss": 0.3612, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.48810754912099275, |
|
"grad_norm": 0.468650758266449, |
|
"learning_rate": 9.012515658771301e-06, |
|
"loss": 0.3725, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.4922440537745605, |
|
"grad_norm": 0.4874132573604584, |
|
"learning_rate": 8.991913082040752e-06, |
|
"loss": 0.3671, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.4963805584281282, |
|
"grad_norm": 0.48114946484565735, |
|
"learning_rate": 8.971121859696539e-06, |
|
"loss": 0.3603, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.500517063081696, |
|
"grad_norm": 0.5342724919319153, |
|
"learning_rate": 8.950142974268295e-06, |
|
"loss": 0.3561, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.5046535677352637, |
|
"grad_norm": 0.5296602845191956, |
|
"learning_rate": 8.928977417154037e-06, |
|
"loss": 0.3552, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.5087900723888314, |
|
"grad_norm": 0.47604137659072876, |
|
"learning_rate": 8.907626188573319e-06, |
|
"loss": 0.3751, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.5129265770423992, |
|
"grad_norm": 0.544127345085144, |
|
"learning_rate": 8.886090297519956e-06, |
|
"loss": 0.39, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.5170630816959669, |
|
"grad_norm": 0.495714396238327, |
|
"learning_rate": 8.864370761714348e-06, |
|
"loss": 0.3764, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.5211995863495347, |
|
"grad_norm": 0.45246466994285583, |
|
"learning_rate": 8.842468607555389e-06, |
|
"loss": 0.3273, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5253360910031024, |
|
"grad_norm": 0.46964627504348755, |
|
"learning_rate": 8.820384870071951e-06, |
|
"loss": 0.3712, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.5294725956566702, |
|
"grad_norm": 0.5150438547134399, |
|
"learning_rate": 8.79812059287399e-06, |
|
"loss": 0.3676, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.5336091003102379, |
|
"grad_norm": 0.48608115315437317, |
|
"learning_rate": 8.775676828103205e-06, |
|
"loss": 0.3862, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.5377456049638056, |
|
"grad_norm": 0.5238416790962219, |
|
"learning_rate": 8.753054636383336e-06, |
|
"loss": 0.3927, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.5418821096173733, |
|
"grad_norm": 0.4756030738353729, |
|
"learning_rate": 8.730255086770037e-06, |
|
"loss": 0.3429, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.546018614270941, |
|
"grad_norm": 0.46515801548957825, |
|
"learning_rate": 8.707279256700348e-06, |
|
"loss": 0.3367, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.5501551189245087, |
|
"grad_norm": 0.5517006516456604, |
|
"learning_rate": 8.684128231941789e-06, |
|
"loss": 0.3688, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.5542916235780765, |
|
"grad_norm": 0.5072327852249146, |
|
"learning_rate": 8.660803106541044e-06, |
|
"loss": 0.3224, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.5584281282316442, |
|
"grad_norm": 0.4414540231227875, |
|
"learning_rate": 8.637304982772263e-06, |
|
"loss": 0.3166, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.562564632885212, |
|
"grad_norm": 0.5401909351348877, |
|
"learning_rate": 8.613634971084967e-06, |
|
"loss": 0.3697, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.5667011375387797, |
|
"grad_norm": 0.502416729927063, |
|
"learning_rate": 8.589794190051582e-06, |
|
"loss": 0.3647, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.5708376421923474, |
|
"grad_norm": 0.49979519844055176, |
|
"learning_rate": 8.56578376631456e-06, |
|
"loss": 0.3542, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.5749741468459152, |
|
"grad_norm": 0.4783455431461334, |
|
"learning_rate": 8.541604834533159e-06, |
|
"loss": 0.3577, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.5791106514994829, |
|
"grad_norm": 0.4866260886192322, |
|
"learning_rate": 8.51725853732981e-06, |
|
"loss": 0.3567, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5832471561530507, |
|
"grad_norm": 0.480307012796402, |
|
"learning_rate": 8.492746025236113e-06, |
|
"loss": 0.335, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.5873836608066184, |
|
"grad_norm": 0.4932575821876526, |
|
"learning_rate": 8.468068456638491e-06, |
|
"loss": 0.3411, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.5915201654601862, |
|
"grad_norm": 0.49242812395095825, |
|
"learning_rate": 8.443226997723426e-06, |
|
"loss": 0.3589, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.5956566701137539, |
|
"grad_norm": 0.5170210599899292, |
|
"learning_rate": 8.418222822422348e-06, |
|
"loss": 0.385, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.5997931747673216, |
|
"grad_norm": 0.45948079228401184, |
|
"learning_rate": 8.393057112356181e-06, |
|
"loss": 0.3502, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.6039296794208894, |
|
"grad_norm": 0.47525444626808167, |
|
"learning_rate": 8.367731056779476e-06, |
|
"loss": 0.3387, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.6080661840744571, |
|
"grad_norm": 0.4996655583381653, |
|
"learning_rate": 8.342245852524229e-06, |
|
"loss": 0.3243, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.6122026887280249, |
|
"grad_norm": 0.4717055559158325, |
|
"learning_rate": 8.316602703943315e-06, |
|
"loss": 0.3696, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.6163391933815926, |
|
"grad_norm": 0.5229761600494385, |
|
"learning_rate": 8.290802822853576e-06, |
|
"loss": 0.4026, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.6204756980351603, |
|
"grad_norm": 0.4920945465564728, |
|
"learning_rate": 8.26484742847855e-06, |
|
"loss": 0.3555, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6246122026887281, |
|
"grad_norm": 0.416532963514328, |
|
"learning_rate": 8.238737747390859e-06, |
|
"loss": 0.3145, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.6287487073422957, |
|
"grad_norm": 0.4948025941848755, |
|
"learning_rate": 8.212475013454249e-06, |
|
"loss": 0.3603, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.6328852119958635, |
|
"grad_norm": 0.4692654013633728, |
|
"learning_rate": 8.186060467765268e-06, |
|
"loss": 0.3541, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.6370217166494312, |
|
"grad_norm": 0.4930100440979004, |
|
"learning_rate": 8.159495358594627e-06, |
|
"loss": 0.328, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.6411582213029989, |
|
"grad_norm": 0.46493637561798096, |
|
"learning_rate": 8.13278094132821e-06, |
|
"loss": 0.3514, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.6452947259565667, |
|
"grad_norm": 0.5059131383895874, |
|
"learning_rate": 8.10591847840774e-06, |
|
"loss": 0.3522, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6494312306101344, |
|
"grad_norm": 0.5415008664131165, |
|
"learning_rate": 8.078909239271127e-06, |
|
"loss": 0.345, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.6535677352637022, |
|
"grad_norm": 0.49019861221313477, |
|
"learning_rate": 8.051754500292479e-06, |
|
"loss": 0.3526, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.6577042399172699, |
|
"grad_norm": 0.4391830563545227, |
|
"learning_rate": 8.024455544721778e-06, |
|
"loss": 0.3368, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.6618407445708376, |
|
"grad_norm": 0.5758143663406372, |
|
"learning_rate": 7.997013662624246e-06, |
|
"loss": 0.3606, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.6659772492244054, |
|
"grad_norm": 0.46434131264686584, |
|
"learning_rate": 7.969430150819372e-06, |
|
"loss": 0.3263, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.6701137538779731, |
|
"grad_norm": 0.5234054923057556, |
|
"learning_rate": 7.941706312819632e-06, |
|
"loss": 0.3635, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.6742502585315409, |
|
"grad_norm": 0.46102845668792725, |
|
"learning_rate": 7.913843458768892e-06, |
|
"loss": 0.3487, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.6783867631851086, |
|
"grad_norm": 0.505272388458252, |
|
"learning_rate": 7.88584290538049e-06, |
|
"loss": 0.3687, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.6825232678386763, |
|
"grad_norm": 0.5247129797935486, |
|
"learning_rate": 7.857705975875015e-06, |
|
"loss": 0.3575, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.6866597724922441, |
|
"grad_norm": 0.4995476007461548, |
|
"learning_rate": 7.829433999917773e-06, |
|
"loss": 0.3583, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.6907962771458118, |
|
"grad_norm": 0.478943407535553, |
|
"learning_rate": 7.801028313555954e-06, |
|
"loss": 0.3539, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.6949327817993796, |
|
"grad_norm": 0.4750828146934509, |
|
"learning_rate": 7.772490259155493e-06, |
|
"loss": 0.3317, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.6990692864529473, |
|
"grad_norm": 0.4754940867424011, |
|
"learning_rate": 7.743821185337634e-06, |
|
"loss": 0.3209, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.703205791106515, |
|
"grad_norm": 0.4950121343135834, |
|
"learning_rate": 7.715022446915195e-06, |
|
"loss": 0.3341, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.7073422957600828, |
|
"grad_norm": 0.48745468258857727, |
|
"learning_rate": 7.686095404828552e-06, |
|
"loss": 0.3602, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.7114788004136504, |
|
"grad_norm": 0.48764947056770325, |
|
"learning_rate": 7.65704142608132e-06, |
|
"loss": 0.3624, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.7156153050672182, |
|
"grad_norm": 0.5114070773124695, |
|
"learning_rate": 7.627861883675748e-06, |
|
"loss": 0.3449, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.7197518097207859, |
|
"grad_norm": 0.4847152829170227, |
|
"learning_rate": 7.598558156547842e-06, |
|
"loss": 0.3318, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.7238883143743536, |
|
"grad_norm": 0.5162774920463562, |
|
"learning_rate": 7.569131629502201e-06, |
|
"loss": 0.3539, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.7280248190279214, |
|
"grad_norm": 0.49352213740348816, |
|
"learning_rate": 7.53958369314657e-06, |
|
"loss": 0.3504, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.7321613236814891, |
|
"grad_norm": 0.4514661133289337, |
|
"learning_rate": 7.509915743826128e-06, |
|
"loss": 0.3602, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.7362978283350569, |
|
"grad_norm": 0.5056818127632141, |
|
"learning_rate": 7.480129183557499e-06, |
|
"loss": 0.3511, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.7404343329886246, |
|
"grad_norm": 0.5009995102882385, |
|
"learning_rate": 7.450225419962498e-06, |
|
"loss": 0.3299, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.7445708376421923, |
|
"grad_norm": 0.5529451966285706, |
|
"learning_rate": 7.4202058662016155e-06, |
|
"loss": 0.3605, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.7487073422957601, |
|
"grad_norm": 0.5108004212379456, |
|
"learning_rate": 7.390071940907222e-06, |
|
"loss": 0.3497, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.7528438469493278, |
|
"grad_norm": 0.45150938630104065, |
|
"learning_rate": 7.3598250681165485e-06, |
|
"loss": 0.347, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.7569803516028956, |
|
"grad_norm": 0.49005162715911865, |
|
"learning_rate": 7.329466677204371e-06, |
|
"loss": 0.3485, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.7611168562564633, |
|
"grad_norm": 0.4927361011505127, |
|
"learning_rate": 7.298998202815474e-06, |
|
"loss": 0.3432, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.765253360910031, |
|
"grad_norm": 0.48336061835289, |
|
"learning_rate": 7.268421084796852e-06, |
|
"loss": 0.3443, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.7693898655635988, |
|
"grad_norm": 0.48736652731895447, |
|
"learning_rate": 7.237736768129663e-06, |
|
"loss": 0.3418, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.7735263702171665, |
|
"grad_norm": 0.4602266252040863, |
|
"learning_rate": 7.206946702860948e-06, |
|
"loss": 0.3322, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.7776628748707343, |
|
"grad_norm": 0.4475662410259247, |
|
"learning_rate": 7.176052344035101e-06, |
|
"loss": 0.3519, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.781799379524302, |
|
"grad_norm": 0.46669697761535645, |
|
"learning_rate": 7.145055151625113e-06, |
|
"loss": 0.3623, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.7859358841778697, |
|
"grad_norm": 0.4729274809360504, |
|
"learning_rate": 7.1139565904635755e-06, |
|
"loss": 0.3517, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.7900723888314375, |
|
"grad_norm": 0.49703437089920044, |
|
"learning_rate": 7.082758130173456e-06, |
|
"loss": 0.3732, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.7942088934850051, |
|
"grad_norm": 0.5119916200637817, |
|
"learning_rate": 7.051461245098654e-06, |
|
"loss": 0.3421, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.7983453981385729, |
|
"grad_norm": 0.4503278434276581, |
|
"learning_rate": 7.020067414234315e-06, |
|
"loss": 0.3342, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.8024819027921406, |
|
"grad_norm": 0.46572044491767883, |
|
"learning_rate": 6.988578121156956e-06, |
|
"loss": 0.3314, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.8066184074457083, |
|
"grad_norm": 0.49221017956733704, |
|
"learning_rate": 6.956994853954342e-06, |
|
"loss": 0.3634, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.8107549120992761, |
|
"grad_norm": 0.5337055921554565, |
|
"learning_rate": 6.925319105155165e-06, |
|
"loss": 0.346, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.8148914167528438, |
|
"grad_norm": 0.4575997591018677, |
|
"learning_rate": 6.8935523716585195e-06, |
|
"loss": 0.3538, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.8190279214064116, |
|
"grad_norm": 0.5041812062263489, |
|
"learning_rate": 6.8616961546631575e-06, |
|
"loss": 0.3548, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.8231644260599793, |
|
"grad_norm": 0.4733670651912689, |
|
"learning_rate": 6.829751959596544e-06, |
|
"loss": 0.3414, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.827300930713547, |
|
"grad_norm": 0.48330968618392944, |
|
"learning_rate": 6.797721296043727e-06, |
|
"loss": 0.325, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8314374353671148, |
|
"grad_norm": 0.4963349997997284, |
|
"learning_rate": 6.765605677675982e-06, |
|
"loss": 0.3858, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.8355739400206825, |
|
"grad_norm": 0.5333994626998901, |
|
"learning_rate": 6.733406622179295e-06, |
|
"loss": 0.3538, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.8397104446742503, |
|
"grad_norm": 0.4624415338039398, |
|
"learning_rate": 6.701125651182631e-06, |
|
"loss": 0.3025, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.843846949327818, |
|
"grad_norm": 0.45845648646354675, |
|
"learning_rate": 6.668764290186039e-06, |
|
"loss": 0.3458, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.8479834539813857, |
|
"grad_norm": 0.5057909488677979, |
|
"learning_rate": 6.6363240684885465e-06, |
|
"loss": 0.33, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.8521199586349535, |
|
"grad_norm": 0.5474227666854858, |
|
"learning_rate": 6.603806519115899e-06, |
|
"loss": 0.3386, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.8562564632885212, |
|
"grad_norm": 0.5117132067680359, |
|
"learning_rate": 6.571213178748112e-06, |
|
"loss": 0.3775, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.860392967942089, |
|
"grad_norm": 0.4669731557369232, |
|
"learning_rate": 6.538545587646854e-06, |
|
"loss": 0.3575, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.8645294725956567, |
|
"grad_norm": 0.4318840503692627, |
|
"learning_rate": 6.50580528958265e-06, |
|
"loss": 0.3201, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.8686659772492245, |
|
"grad_norm": 0.5034843683242798, |
|
"learning_rate": 6.47299383176194e-06, |
|
"loss": 0.3169, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.8728024819027922, |
|
"grad_norm": 0.5146070122718811, |
|
"learning_rate": 6.440112764753956e-06, |
|
"loss": 0.3653, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.8769389865563598, |
|
"grad_norm": 0.49277129769325256, |
|
"learning_rate": 6.4071636424174435e-06, |
|
"loss": 0.3485, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.8810754912099276, |
|
"grad_norm": 0.4620700776576996, |
|
"learning_rate": 6.374148021827237e-06, |
|
"loss": 0.3525, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.8852119958634953, |
|
"grad_norm": 0.5235023498535156, |
|
"learning_rate": 6.341067463200678e-06, |
|
"loss": 0.3638, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.889348500517063, |
|
"grad_norm": 0.4999266564846039, |
|
"learning_rate": 6.307923529823876e-06, |
|
"loss": 0.3692, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.8934850051706308, |
|
"grad_norm": 0.46116530895233154, |
|
"learning_rate": 6.2747177879778424e-06, |
|
"loss": 0.3316, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.8976215098241985, |
|
"grad_norm": 0.4651578664779663, |
|
"learning_rate": 6.241451806864465e-06, |
|
"loss": 0.3176, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.9017580144777663, |
|
"grad_norm": 0.45744726061820984, |
|
"learning_rate": 6.208127158532358e-06, |
|
"loss": 0.3261, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.905894519131334, |
|
"grad_norm": 0.4478837549686432, |
|
"learning_rate": 6.174745417802563e-06, |
|
"loss": 0.3357, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.9100310237849017, |
|
"grad_norm": 0.49428898096084595, |
|
"learning_rate": 6.141308162194141e-06, |
|
"loss": 0.321, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.9141675284384695, |
|
"grad_norm": 0.4366098642349243, |
|
"learning_rate": 6.1078169718496164e-06, |
|
"loss": 0.3132, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.9183040330920372, |
|
"grad_norm": 0.5066491365432739, |
|
"learning_rate": 6.074273429460296e-06, |
|
"loss": 0.3342, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.922440537745605, |
|
"grad_norm": 0.4254951775074005, |
|
"learning_rate": 6.040679120191491e-06, |
|
"loss": 0.3089, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.9265770423991727, |
|
"grad_norm": 0.46807774901390076, |
|
"learning_rate": 6.007035631607605e-06, |
|
"loss": 0.3182, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.9307135470527405, |
|
"grad_norm": 0.4610796570777893, |
|
"learning_rate": 5.9733445535970915e-06, |
|
"loss": 0.3239, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.9348500517063082, |
|
"grad_norm": 0.5245600342750549, |
|
"learning_rate": 5.939607478297347e-06, |
|
"loss": 0.3818, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.9389865563598759, |
|
"grad_norm": 0.45463472604751587, |
|
"learning_rate": 5.905826000019458e-06, |
|
"loss": 0.3109, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.9431230610134437, |
|
"grad_norm": 0.46084877848625183, |
|
"learning_rate": 5.8720017151728526e-06, |
|
"loss": 0.3475, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.9472595656670114, |
|
"grad_norm": 0.46296611428260803, |
|
"learning_rate": 5.838136222189874e-06, |
|
"loss": 0.3343, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.9513960703205792, |
|
"grad_norm": 0.458286315202713, |
|
"learning_rate": 5.804231121450235e-06, |
|
"loss": 0.3454, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.9555325749741469, |
|
"grad_norm": 0.42349058389663696, |
|
"learning_rate": 5.770288015205385e-06, |
|
"loss": 0.329, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.9596690796277145, |
|
"grad_norm": 0.4541251063346863, |
|
"learning_rate": 5.736308507502805e-06, |
|
"loss": 0.3296, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.9638055842812823, |
|
"grad_norm": 0.4887123107910156, |
|
"learning_rate": 5.702294204110191e-06, |
|
"loss": 0.3374, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.96794208893485, |
|
"grad_norm": 0.46135684847831726, |
|
"learning_rate": 5.668246712439579e-06, |
|
"loss": 0.3426, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.9720785935884177, |
|
"grad_norm": 0.4848094582557678, |
|
"learning_rate": 5.634167641471383e-06, |
|
"loss": 0.3626, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.9762150982419855, |
|
"grad_norm": 0.4424203932285309, |
|
"learning_rate": 5.600058601678357e-06, |
|
"loss": 0.302, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.9803516028955532, |
|
"grad_norm": 0.46382346749305725, |
|
"learning_rate": 5.5659212049494915e-06, |
|
"loss": 0.3357, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.984488107549121, |
|
"grad_norm": 0.4296742379665375, |
|
"learning_rate": 5.531757064513837e-06, |
|
"loss": 0.3162, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.9886246122026887, |
|
"grad_norm": 0.42605388164520264, |
|
"learning_rate": 5.4975677948642704e-06, |
|
"loss": 0.3204, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.9927611168562565, |
|
"grad_norm": 0.4539097547531128, |
|
"learning_rate": 5.4633550116812e-06, |
|
"loss": 0.327, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.9968976215098242, |
|
"grad_norm": 0.4806179404258728, |
|
"learning_rate": 5.429120331756208e-06, |
|
"loss": 0.3469, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.001034126163392, |
|
"grad_norm": 0.4494527280330658, |
|
"learning_rate": 5.394865372915656e-06, |
|
"loss": 0.3304, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.0051706308169597, |
|
"grad_norm": 0.5063448548316956, |
|
"learning_rate": 5.360591753944221e-06, |
|
"loss": 0.2792, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.0093071354705274, |
|
"grad_norm": 0.47153183817863464, |
|
"learning_rate": 5.3263010945083994e-06, |
|
"loss": 0.2593, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.0134436401240952, |
|
"grad_norm": 0.5729573369026184, |
|
"learning_rate": 5.291995015079969e-06, |
|
"loss": 0.2884, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.017580144777663, |
|
"grad_norm": 0.5748021602630615, |
|
"learning_rate": 5.257675136859415e-06, |
|
"loss": 0.2852, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.0217166494312306, |
|
"grad_norm": 0.49926644563674927, |
|
"learning_rate": 5.223343081699302e-06, |
|
"loss": 0.2947, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.0258531540847984, |
|
"grad_norm": 0.5036705732345581, |
|
"learning_rate": 5.189000472027645e-06, |
|
"loss": 0.2747, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.0299896587383661, |
|
"grad_norm": 0.557823896408081, |
|
"learning_rate": 5.1546489307712345e-06, |
|
"loss": 0.2724, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.0341261633919339, |
|
"grad_norm": 0.49561646580696106, |
|
"learning_rate": 5.1202900812789346e-06, |
|
"loss": 0.263, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0382626680455016, |
|
"grad_norm": 0.46465885639190674, |
|
"learning_rate": 5.085925547244978e-06, |
|
"loss": 0.263, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.0423991726990693, |
|
"grad_norm": 0.5004085302352905, |
|
"learning_rate": 5.051556952632235e-06, |
|
"loss": 0.2831, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.046535677352637, |
|
"grad_norm": 0.5784794688224792, |
|
"learning_rate": 5.0171859215954575e-06, |
|
"loss": 0.2835, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.0506721820062048, |
|
"grad_norm": 0.4798305332660675, |
|
"learning_rate": 4.982814078404543e-06, |
|
"loss": 0.2382, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.0548086866597726, |
|
"grad_norm": 0.47284895181655884, |
|
"learning_rate": 4.948443047367767e-06, |
|
"loss": 0.2491, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.0589451913133403, |
|
"grad_norm": 0.4997791051864624, |
|
"learning_rate": 4.9140744527550225e-06, |
|
"loss": 0.2484, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.063081695966908, |
|
"grad_norm": 0.4812958836555481, |
|
"learning_rate": 4.879709918721067e-06, |
|
"loss": 0.2674, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.0672182006204758, |
|
"grad_norm": 0.4800451099872589, |
|
"learning_rate": 4.845351069228767e-06, |
|
"loss": 0.2625, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.0713547052740435, |
|
"grad_norm": 0.5013061165809631, |
|
"learning_rate": 4.8109995279723556e-06, |
|
"loss": 0.2739, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.0754912099276113, |
|
"grad_norm": 0.5202277898788452, |
|
"learning_rate": 4.776656918300699e-06, |
|
"loss": 0.2857, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.079627714581179, |
|
"grad_norm": 0.46747156977653503, |
|
"learning_rate": 4.742324863140587e-06, |
|
"loss": 0.2902, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.0837642192347468, |
|
"grad_norm": 0.4724840223789215, |
|
"learning_rate": 4.70800498492003e-06, |
|
"loss": 0.2845, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.0879007238883145, |
|
"grad_norm": 0.5077059864997864, |
|
"learning_rate": 4.673698905491602e-06, |
|
"loss": 0.297, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.092037228541882, |
|
"grad_norm": 0.4432675540447235, |
|
"learning_rate": 4.639408246055781e-06, |
|
"loss": 0.2286, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.0961737331954498, |
|
"grad_norm": 0.4326833188533783, |
|
"learning_rate": 4.605134627084345e-06, |
|
"loss": 0.2418, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.1003102378490175, |
|
"grad_norm": 0.4976271092891693, |
|
"learning_rate": 4.570879668243792e-06, |
|
"loss": 0.2825, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.1044467425025852, |
|
"grad_norm": 0.4635002613067627, |
|
"learning_rate": 4.536644988318802e-06, |
|
"loss": 0.2503, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.108583247156153, |
|
"grad_norm": 0.4908175766468048, |
|
"learning_rate": 4.502432205135731e-06, |
|
"loss": 0.298, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.1127197518097207, |
|
"grad_norm": 0.4961640238761902, |
|
"learning_rate": 4.468242935486164e-06, |
|
"loss": 0.2696, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.1168562564632885, |
|
"grad_norm": 0.49413740634918213, |
|
"learning_rate": 4.434078795050509e-06, |
|
"loss": 0.2938, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.1209927611168562, |
|
"grad_norm": 0.48604297637939453, |
|
"learning_rate": 4.3999413983216434e-06, |
|
"loss": 0.2884, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.125129265770424, |
|
"grad_norm": 0.4502314329147339, |
|
"learning_rate": 4.365832358528618e-06, |
|
"loss": 0.2514, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.1292657704239917, |
|
"grad_norm": 0.46243977546691895, |
|
"learning_rate": 4.331753287560423e-06, |
|
"loss": 0.2473, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.1334022750775594, |
|
"grad_norm": 0.48582252860069275, |
|
"learning_rate": 4.29770579588981e-06, |
|
"loss": 0.2926, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.1375387797311272, |
|
"grad_norm": 0.4945797622203827, |
|
"learning_rate": 4.263691492497197e-06, |
|
"loss": 0.2803, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.141675284384695, |
|
"grad_norm": 0.5017898082733154, |
|
"learning_rate": 4.229711984794614e-06, |
|
"loss": 0.2695, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.1458117890382626, |
|
"grad_norm": 0.44951367378234863, |
|
"learning_rate": 4.195768878549766e-06, |
|
"loss": 0.2548, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.1499482936918304, |
|
"grad_norm": 0.4264715611934662, |
|
"learning_rate": 4.161863777810128e-06, |
|
"loss": 0.2304, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.1540847983453981, |
|
"grad_norm": 0.4864782392978668, |
|
"learning_rate": 4.127998284827148e-06, |
|
"loss": 0.2883, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.1582213029989659, |
|
"grad_norm": 0.48877304792404175, |
|
"learning_rate": 4.094173999980544e-06, |
|
"loss": 0.2696, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.1623578076525336, |
|
"grad_norm": 0.4845278859138489, |
|
"learning_rate": 4.060392521702655e-06, |
|
"loss": 0.2696, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.1664943123061013, |
|
"grad_norm": 0.4687557816505432, |
|
"learning_rate": 4.026655446402912e-06, |
|
"loss": 0.2242, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.170630816959669, |
|
"grad_norm": 0.4510751962661743, |
|
"learning_rate": 3.9929643683923965e-06, |
|
"loss": 0.2534, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.1747673216132368, |
|
"grad_norm": 0.456969678401947, |
|
"learning_rate": 3.9593208798085094e-06, |
|
"loss": 0.239, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.1789038262668046, |
|
"grad_norm": 0.5285021066665649, |
|
"learning_rate": 3.9257265705397065e-06, |
|
"loss": 0.2706, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.1830403309203723, |
|
"grad_norm": 0.5108174085617065, |
|
"learning_rate": 3.892183028150384e-06, |
|
"loss": 0.292, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.18717683557394, |
|
"grad_norm": 0.4737439751625061, |
|
"learning_rate": 3.8586918378058595e-06, |
|
"loss": 0.2666, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.1913133402275078, |
|
"grad_norm": 0.46854445338249207, |
|
"learning_rate": 3.8252545821974385e-06, |
|
"loss": 0.2473, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.1954498448810755, |
|
"grad_norm": 0.5152525305747986, |
|
"learning_rate": 3.791872841467643e-06, |
|
"loss": 0.2787, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.1995863495346433, |
|
"grad_norm": 0.4602268636226654, |
|
"learning_rate": 3.758548193135536e-06, |
|
"loss": 0.2447, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.203722854188211, |
|
"grad_norm": 0.4676779806613922, |
|
"learning_rate": 3.7252822120221592e-06, |
|
"loss": 0.2715, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.2078593588417788, |
|
"grad_norm": 0.48289844393730164, |
|
"learning_rate": 3.6920764701761263e-06, |
|
"loss": 0.283, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.2119958634953465, |
|
"grad_norm": 0.4726490080356598, |
|
"learning_rate": 3.6589325367993243e-06, |
|
"loss": 0.2807, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.2161323681489142, |
|
"grad_norm": 0.5170783996582031, |
|
"learning_rate": 3.625851978172765e-06, |
|
"loss": 0.2636, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.220268872802482, |
|
"grad_norm": 0.46776092052459717, |
|
"learning_rate": 3.59283635758256e-06, |
|
"loss": 0.2457, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.2244053774560497, |
|
"grad_norm": 0.45310357213020325, |
|
"learning_rate": 3.5598872352460457e-06, |
|
"loss": 0.2538, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.2285418821096175, |
|
"grad_norm": 0.4700476825237274, |
|
"learning_rate": 3.527006168238061e-06, |
|
"loss": 0.2722, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.2326783867631852, |
|
"grad_norm": 0.4869045913219452, |
|
"learning_rate": 3.4941947104173514e-06, |
|
"loss": 0.2695, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.236814891416753, |
|
"grad_norm": 0.4840319752693176, |
|
"learning_rate": 3.4614544123531476e-06, |
|
"loss": 0.2671, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.2409513960703205, |
|
"grad_norm": 0.4536275565624237, |
|
"learning_rate": 3.428786821251888e-06, |
|
"loss": 0.2512, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2450879007238882, |
|
"grad_norm": 0.4808495342731476, |
|
"learning_rate": 3.3961934808841023e-06, |
|
"loss": 0.2531, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.249224405377456, |
|
"grad_norm": 0.48514485359191895, |
|
"learning_rate": 3.363675931511455e-06, |
|
"loss": 0.2695, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.2533609100310237, |
|
"grad_norm": 0.48374173045158386, |
|
"learning_rate": 3.331235709813962e-06, |
|
"loss": 0.2706, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.2574974146845914, |
|
"grad_norm": 0.4591769278049469, |
|
"learning_rate": 3.29887434881737e-06, |
|
"loss": 0.2578, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.2616339193381592, |
|
"grad_norm": 0.4645506739616394, |
|
"learning_rate": 3.2665933778207082e-06, |
|
"loss": 0.2717, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.265770423991727, |
|
"grad_norm": 0.5053009986877441, |
|
"learning_rate": 3.234394322324019e-06, |
|
"loss": 0.2713, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.2699069286452946, |
|
"grad_norm": 0.46575117111206055, |
|
"learning_rate": 3.2022787039562745e-06, |
|
"loss": 0.2445, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.2740434332988624, |
|
"grad_norm": 0.4733026623725891, |
|
"learning_rate": 3.170248040403457e-06, |
|
"loss": 0.2602, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.2781799379524301, |
|
"grad_norm": 0.4547727406024933, |
|
"learning_rate": 3.138303845336844e-06, |
|
"loss": 0.2545, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.2823164426059979, |
|
"grad_norm": 0.5043481588363647, |
|
"learning_rate": 3.1064476283414818e-06, |
|
"loss": 0.2848, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.2864529472595656, |
|
"grad_norm": 0.49556779861450195, |
|
"learning_rate": 3.074680894844837e-06, |
|
"loss": 0.2659, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.2905894519131333, |
|
"grad_norm": 0.4662742614746094, |
|
"learning_rate": 3.04300514604566e-06, |
|
"loss": 0.2696, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.294725956566701, |
|
"grad_norm": 0.46650293469429016, |
|
"learning_rate": 3.011421878843044e-06, |
|
"loss": 0.2573, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.2988624612202688, |
|
"grad_norm": 0.471865177154541, |
|
"learning_rate": 2.9799325857656856e-06, |
|
"loss": 0.2598, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.3029989658738366, |
|
"grad_norm": 0.49665728211402893, |
|
"learning_rate": 2.948538754901349e-06, |
|
"loss": 0.285, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.3071354705274043, |
|
"grad_norm": 0.47322675585746765, |
|
"learning_rate": 2.917241869826545e-06, |
|
"loss": 0.2523, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.311271975180972, |
|
"grad_norm": 0.4811559021472931, |
|
"learning_rate": 2.8860434095364266e-06, |
|
"loss": 0.2762, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.3154084798345398, |
|
"grad_norm": 0.48528894782066345, |
|
"learning_rate": 2.8549448483748888e-06, |
|
"loss": 0.2812, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.3195449844881075, |
|
"grad_norm": 0.47023531794548035, |
|
"learning_rate": 2.8239476559649013e-06, |
|
"loss": 0.2857, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.3236814891416753, |
|
"grad_norm": 0.4679359793663025, |
|
"learning_rate": 2.7930532971390543e-06, |
|
"loss": 0.2639, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.327817993795243, |
|
"grad_norm": 0.4885619580745697, |
|
"learning_rate": 2.762263231870339e-06, |
|
"loss": 0.2919, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.3319544984488108, |
|
"grad_norm": 0.4528388977050781, |
|
"learning_rate": 2.7315789152031504e-06, |
|
"loss": 0.2491, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.3360910031023785, |
|
"grad_norm": 0.43351301550865173, |
|
"learning_rate": 2.7010017971845267e-06, |
|
"loss": 0.2334, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.3402275077559462, |
|
"grad_norm": 0.4791943430900574, |
|
"learning_rate": 2.6705333227956304e-06, |
|
"loss": 0.2759, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.344364012409514, |
|
"grad_norm": 0.42758721113204956, |
|
"learning_rate": 2.6401749318834528e-06, |
|
"loss": 0.2574, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.3485005170630817, |
|
"grad_norm": 0.4758831858634949, |
|
"learning_rate": 2.609928059092779e-06, |
|
"loss": 0.2459, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.3526370217166495, |
|
"grad_norm": 0.45820891857147217, |
|
"learning_rate": 2.579794133798388e-06, |
|
"loss": 0.2678, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.3567735263702172, |
|
"grad_norm": 0.4830312132835388, |
|
"learning_rate": 2.549774580037504e-06, |
|
"loss": 0.2627, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.360910031023785, |
|
"grad_norm": 0.48166197538375854, |
|
"learning_rate": 2.5198708164425046e-06, |
|
"loss": 0.2524, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.3650465356773527, |
|
"grad_norm": 0.48581820726394653, |
|
"learning_rate": 2.4900842561738736e-06, |
|
"loss": 0.2527, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.3691830403309204, |
|
"grad_norm": 0.49055343866348267, |
|
"learning_rate": 2.4604163068534313e-06, |
|
"loss": 0.2541, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.3733195449844882, |
|
"grad_norm": 0.4789126217365265, |
|
"learning_rate": 2.4308683704978e-06, |
|
"loss": 0.2597, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.377456049638056, |
|
"grad_norm": 0.5069748759269714, |
|
"learning_rate": 2.401441843452159e-06, |
|
"loss": 0.2842, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.3815925542916236, |
|
"grad_norm": 0.49611082673072815, |
|
"learning_rate": 2.372138116324254e-06, |
|
"loss": 0.2648, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.3857290589451914, |
|
"grad_norm": 0.4773513376712799, |
|
"learning_rate": 2.342958573918682e-06, |
|
"loss": 0.2846, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.3898655635987591, |
|
"grad_norm": 0.49834293127059937, |
|
"learning_rate": 2.3139045951714473e-06, |
|
"loss": 0.288, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.3940020682523269, |
|
"grad_norm": 0.46547529101371765, |
|
"learning_rate": 2.2849775530848057e-06, |
|
"loss": 0.242, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.3981385729058946, |
|
"grad_norm": 0.46302109956741333, |
|
"learning_rate": 2.256178814662368e-06, |
|
"loss": 0.2553, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.4022750775594623, |
|
"grad_norm": 0.5011090636253357, |
|
"learning_rate": 2.227509740844508e-06, |
|
"loss": 0.281, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.40641158221303, |
|
"grad_norm": 0.43727773427963257, |
|
"learning_rate": 2.198971686444047e-06, |
|
"loss": 0.2409, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.4105480868665978, |
|
"grad_norm": 0.4928194582462311, |
|
"learning_rate": 2.1705660000822286e-06, |
|
"loss": 0.299, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.4146845915201656, |
|
"grad_norm": 0.4691973328590393, |
|
"learning_rate": 2.1422940241249875e-06, |
|
"loss": 0.2552, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.4188210961737333, |
|
"grad_norm": 0.4593028724193573, |
|
"learning_rate": 2.1141570946195106e-06, |
|
"loss": 0.255, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.422957600827301, |
|
"grad_norm": 0.4884447753429413, |
|
"learning_rate": 2.086156541231109e-06, |
|
"loss": 0.2601, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.4270941054808688, |
|
"grad_norm": 0.5032863616943359, |
|
"learning_rate": 2.0582936871803692e-06, |
|
"loss": 0.2888, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.4312306101344365, |
|
"grad_norm": 0.46444255113601685, |
|
"learning_rate": 2.0305698491806297e-06, |
|
"loss": 0.2402, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.4353671147880043, |
|
"grad_norm": 0.4745306074619293, |
|
"learning_rate": 2.0029863373757553e-06, |
|
"loss": 0.2665, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.4395036194415718, |
|
"grad_norm": 0.4591853618621826, |
|
"learning_rate": 1.9755444552782228e-06, |
|
"loss": 0.2209, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.4436401240951395, |
|
"grad_norm": 0.4660813808441162, |
|
"learning_rate": 1.948245499707523e-06, |
|
"loss": 0.2559, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.4477766287487073, |
|
"grad_norm": 0.4885343909263611, |
|
"learning_rate": 1.9210907607288728e-06, |
|
"loss": 0.281, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.451913133402275, |
|
"grad_norm": 0.4733608365058899, |
|
"learning_rate": 1.8940815215922609e-06, |
|
"loss": 0.2762, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.4560496380558428, |
|
"grad_norm": 0.46303790807724, |
|
"learning_rate": 1.867219058671791e-06, |
|
"loss": 0.2626, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 1.4601861427094105, |
|
"grad_norm": 0.4606141149997711, |
|
"learning_rate": 1.8405046414053728e-06, |
|
"loss": 0.2434, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 1.4643226473629782, |
|
"grad_norm": 0.4833987057209015, |
|
"learning_rate": 1.8139395322347335e-06, |
|
"loss": 0.2546, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.468459152016546, |
|
"grad_norm": 0.44073909521102905, |
|
"learning_rate": 1.787524986545753e-06, |
|
"loss": 0.2511, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 1.4725956566701137, |
|
"grad_norm": 0.45749855041503906, |
|
"learning_rate": 1.7612622526091406e-06, |
|
"loss": 0.2391, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 1.4767321613236815, |
|
"grad_norm": 0.4941197633743286, |
|
"learning_rate": 1.7351525715214512e-06, |
|
"loss": 0.2607, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.4808686659772492, |
|
"grad_norm": 0.4428948760032654, |
|
"learning_rate": 1.709197177146425e-06, |
|
"loss": 0.2477, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 1.485005170630817, |
|
"grad_norm": 0.5241663455963135, |
|
"learning_rate": 1.6833972960566868e-06, |
|
"loss": 0.258, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 1.4891416752843847, |
|
"grad_norm": 0.48429349064826965, |
|
"learning_rate": 1.6577541474757712e-06, |
|
"loss": 0.2709, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.4932781799379524, |
|
"grad_norm": 0.49429482221603394, |
|
"learning_rate": 1.6322689432205252e-06, |
|
"loss": 0.2787, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 1.4974146845915202, |
|
"grad_norm": 0.487251341342926, |
|
"learning_rate": 1.6069428876438203e-06, |
|
"loss": 0.2612, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.501551189245088, |
|
"grad_norm": 0.4490973949432373, |
|
"learning_rate": 1.5817771775776508e-06, |
|
"loss": 0.2516, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.5056876938986556, |
|
"grad_norm": 0.48428958654403687, |
|
"learning_rate": 1.5567730022765753e-06, |
|
"loss": 0.2773, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 1.5098241985522234, |
|
"grad_norm": 0.4671989381313324, |
|
"learning_rate": 1.5319315433615101e-06, |
|
"loss": 0.267, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 1.5139607032057911, |
|
"grad_norm": 0.5202347636222839, |
|
"learning_rate": 1.5072539747638887e-06, |
|
"loss": 0.294, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.5180972078593589, |
|
"grad_norm": 0.45074182748794556, |
|
"learning_rate": 1.482741462670193e-06, |
|
"loss": 0.2363, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 1.5222337125129266, |
|
"grad_norm": 0.5174707174301147, |
|
"learning_rate": 1.4583951654668416e-06, |
|
"loss": 0.2767, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.5263702171664943, |
|
"grad_norm": 0.4842411279678345, |
|
"learning_rate": 1.434216233685441e-06, |
|
"loss": 0.2858, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.530506721820062, |
|
"grad_norm": 0.4314868450164795, |
|
"learning_rate": 1.4102058099484188e-06, |
|
"loss": 0.2356, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.5346432264736298, |
|
"grad_norm": 0.47764474153518677, |
|
"learning_rate": 1.3863650289150338e-06, |
|
"loss": 0.2632, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 1.5387797311271976, |
|
"grad_norm": 0.4491686522960663, |
|
"learning_rate": 1.3626950172277398e-06, |
|
"loss": 0.2443, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.542916235780765, |
|
"grad_norm": 0.4948718249797821, |
|
"learning_rate": 1.3391968934589573e-06, |
|
"loss": 0.2772, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 1.5470527404343328, |
|
"grad_norm": 0.4696827828884125, |
|
"learning_rate": 1.3158717680582128e-06, |
|
"loss": 0.2568, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 1.5511892450879006, |
|
"grad_norm": 0.43921521306037903, |
|
"learning_rate": 1.292720743299654e-06, |
|
"loss": 0.229, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.5553257497414683, |
|
"grad_norm": 0.4529230296611786, |
|
"learning_rate": 1.2697449132299649e-06, |
|
"loss": 0.2445, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 1.559462254395036, |
|
"grad_norm": 0.4936879873275757, |
|
"learning_rate": 1.2469453636166645e-06, |
|
"loss": 0.2579, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 1.5635987590486038, |
|
"grad_norm": 0.48198625445365906, |
|
"learning_rate": 1.224323171896797e-06, |
|
"loss": 0.2542, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.5677352637021715, |
|
"grad_norm": 0.4886031448841095, |
|
"learning_rate": 1.201879407126012e-06, |
|
"loss": 0.2707, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 1.5718717683557393, |
|
"grad_norm": 0.47718653082847595, |
|
"learning_rate": 1.1796151299280483e-06, |
|
"loss": 0.2747, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.576008273009307, |
|
"grad_norm": 0.46224093437194824, |
|
"learning_rate": 1.1575313924446123e-06, |
|
"loss": 0.247, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.5801447776628748, |
|
"grad_norm": 0.4610908329486847, |
|
"learning_rate": 1.1356292382856531e-06, |
|
"loss": 0.2624, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 1.5842812823164425, |
|
"grad_norm": 0.46388930082321167, |
|
"learning_rate": 1.113909702480046e-06, |
|
"loss": 0.2485, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.5884177869700102, |
|
"grad_norm": 0.4562687575817108, |
|
"learning_rate": 1.0923738114266824e-06, |
|
"loss": 0.2503, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.592554291623578, |
|
"grad_norm": 0.44876885414123535, |
|
"learning_rate": 1.0710225828459642e-06, |
|
"loss": 0.2453, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.5966907962771457, |
|
"grad_norm": 0.45502784848213196, |
|
"learning_rate": 1.0498570257317075e-06, |
|
"loss": 0.2595, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.6008273009307135, |
|
"grad_norm": 0.47724854946136475, |
|
"learning_rate": 1.028878140303462e-06, |
|
"loss": 0.2541, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.6049638055842812, |
|
"grad_norm": 0.45897573232650757, |
|
"learning_rate": 1.008086917959249e-06, |
|
"loss": 0.2628, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.609100310237849, |
|
"grad_norm": 0.4865526258945465, |
|
"learning_rate": 9.874843412286994e-07, |
|
"loss": 0.2693, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.6132368148914167, |
|
"grad_norm": 0.46964144706726074, |
|
"learning_rate": 9.670713837266322e-07, |
|
"loss": 0.2498, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.6173733195449844, |
|
"grad_norm": 0.42305079102516174, |
|
"learning_rate": 9.46849010107041e-07, |
|
"loss": 0.2262, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.6215098241985522, |
|
"grad_norm": 0.4819132089614868, |
|
"learning_rate": 9.26818176017506e-07, |
|
"loss": 0.2617, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.62564632885212, |
|
"grad_norm": 0.4843488037586212, |
|
"learning_rate": 9.069798280540348e-07, |
|
"loss": 0.2636, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.6297828335056876, |
|
"grad_norm": 0.4789119064807892, |
|
"learning_rate": 8.87334903716332e-07, |
|
"loss": 0.2869, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.6339193381592554, |
|
"grad_norm": 0.42331403493881226, |
|
"learning_rate": 8.678843313634894e-07, |
|
"loss": 0.2192, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.6380558428128231, |
|
"grad_norm": 0.45914411544799805, |
|
"learning_rate": 8.486290301701183e-07, |
|
"loss": 0.2654, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.6421923474663909, |
|
"grad_norm": 0.4775830805301666, |
|
"learning_rate": 8.295699100829124e-07, |
|
"loss": 0.2434, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.6463288521199586, |
|
"grad_norm": 0.5007808804512024, |
|
"learning_rate": 8.107078717776457e-07, |
|
"loss": 0.2697, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.6504653567735263, |
|
"grad_norm": 0.4754742681980133, |
|
"learning_rate": 7.920438066166097e-07, |
|
"loss": 0.2626, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.654601861427094, |
|
"grad_norm": 0.46346259117126465, |
|
"learning_rate": 7.735785966064885e-07, |
|
"loss": 0.2268, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.6587383660806618, |
|
"grad_norm": 0.4413525462150574, |
|
"learning_rate": 7.553131143566822e-07, |
|
"loss": 0.2373, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.6628748707342296, |
|
"grad_norm": 0.447625994682312, |
|
"learning_rate": 7.372482230380657e-07, |
|
"loss": 0.2546, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.6670113753877973, |
|
"grad_norm": 0.4605792462825775, |
|
"learning_rate": 7.193847763421991e-07, |
|
"loss": 0.2656, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.671147880041365, |
|
"grad_norm": 0.4576088786125183, |
|
"learning_rate": 7.017236184409859e-07, |
|
"loss": 0.2576, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.6752843846949328, |
|
"grad_norm": 0.5075780153274536, |
|
"learning_rate": 6.842655839467787e-07, |
|
"loss": 0.3023, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.6794208893485005, |
|
"grad_norm": 0.4650248885154724, |
|
"learning_rate": 6.670114978729392e-07, |
|
"loss": 0.2753, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.6835573940020683, |
|
"grad_norm": 0.4480326175689697, |
|
"learning_rate": 6.499621755948487e-07, |
|
"loss": 0.2448, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.687693898655636, |
|
"grad_norm": 0.48435285687446594, |
|
"learning_rate": 6.331184228113801e-07, |
|
"loss": 0.2729, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.6918304033092038, |
|
"grad_norm": 0.4679297208786011, |
|
"learning_rate": 6.164810355068179e-07, |
|
"loss": 0.2394, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.6959669079627715, |
|
"grad_norm": 0.5232973694801331, |
|
"learning_rate": 6.000507999132444e-07, |
|
"loss": 0.2761, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.7001034126163392, |
|
"grad_norm": 0.43717169761657715, |
|
"learning_rate": 5.838284924733866e-07, |
|
"loss": 0.2476, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.704239917269907, |
|
"grad_norm": 0.4989730417728424, |
|
"learning_rate": 5.678148798039213e-07, |
|
"loss": 0.2723, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.7083764219234747, |
|
"grad_norm": 0.4776909649372101, |
|
"learning_rate": 5.520107186592477e-07, |
|
"loss": 0.2394, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.7125129265770425, |
|
"grad_norm": 0.49704718589782715, |
|
"learning_rate": 5.364167558957267e-07, |
|
"loss": 0.2674, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.7166494312306102, |
|
"grad_norm": 0.5080196857452393, |
|
"learning_rate": 5.210337284363876e-07, |
|
"loss": 0.2846, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.720785935884178, |
|
"grad_norm": 0.5011091828346252, |
|
"learning_rate": 5.058623632361004e-07, |
|
"loss": 0.276, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.7249224405377457, |
|
"grad_norm": 0.4899991750717163, |
|
"learning_rate": 4.909033772472204e-07, |
|
"loss": 0.2465, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.7290589451913134, |
|
"grad_norm": 0.47677579522132874, |
|
"learning_rate": 4.7615747738571636e-07, |
|
"loss": 0.2547, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.7331954498448812, |
|
"grad_norm": 0.4679698050022125, |
|
"learning_rate": 4.6162536049775387e-07, |
|
"loss": 0.2687, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.737331954498449, |
|
"grad_norm": 0.4611322283744812, |
|
"learning_rate": 4.473077133267684e-07, |
|
"loss": 0.2517, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.7414684591520166, |
|
"grad_norm": 0.45688915252685547, |
|
"learning_rate": 4.3320521248101487e-07, |
|
"loss": 0.2449, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.7456049638055844, |
|
"grad_norm": 0.44202756881713867, |
|
"learning_rate": 4.193185244015879e-07, |
|
"loss": 0.2274, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.7497414684591521, |
|
"grad_norm": 0.488298237323761, |
|
"learning_rate": 4.0564830533093014e-07, |
|
"loss": 0.2706, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.7538779731127199, |
|
"grad_norm": 0.44502395391464233, |
|
"learning_rate": 3.9219520128182087e-07, |
|
"loss": 0.2343, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.7580144777662876, |
|
"grad_norm": 0.4559187889099121, |
|
"learning_rate": 3.789598480068479e-07, |
|
"loss": 0.2477, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.7621509824198553, |
|
"grad_norm": 0.43528175354003906, |
|
"learning_rate": 3.659428709683621e-07, |
|
"loss": 0.2279, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.766287487073423, |
|
"grad_norm": 0.47880756855010986, |
|
"learning_rate": 3.531448853089192e-07, |
|
"loss": 0.2631, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.7704239917269908, |
|
"grad_norm": 0.49789199233055115, |
|
"learning_rate": 3.40566495822216e-07, |
|
"loss": 0.2925, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.7745604963805586, |
|
"grad_norm": 0.4378401041030884, |
|
"learning_rate": 3.2820829692449984e-07, |
|
"loss": 0.227, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.7786970010341263, |
|
"grad_norm": 0.4724928140640259, |
|
"learning_rate": 3.160708726264855e-07, |
|
"loss": 0.2657, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.782833505687694, |
|
"grad_norm": 0.43662911653518677, |
|
"learning_rate": 3.0415479650575783e-07, |
|
"loss": 0.2399, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.7869700103412618, |
|
"grad_norm": 0.46386146545410156, |
|
"learning_rate": 2.9246063167965963e-07, |
|
"loss": 0.2447, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.7911065149948295, |
|
"grad_norm": 0.47366079688072205, |
|
"learning_rate": 2.809889307786856e-07, |
|
"loss": 0.2449, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.795243019648397, |
|
"grad_norm": 0.4846685826778412, |
|
"learning_rate": 2.697402359203638e-07, |
|
"loss": 0.2559, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.7993795243019648, |
|
"grad_norm": 0.4788161516189575, |
|
"learning_rate": 2.587150786836407e-07, |
|
"loss": 0.2749, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.8035160289555325, |
|
"grad_norm": 0.49820560216903687, |
|
"learning_rate": 2.4791398008375545e-07, |
|
"loss": 0.2748, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.8076525336091003, |
|
"grad_norm": 0.45833131670951843, |
|
"learning_rate": 2.3733745054762059e-07, |
|
"loss": 0.2293, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.811789038262668, |
|
"grad_norm": 0.5000050067901611, |
|
"learning_rate": 2.2698598988970422e-07, |
|
"loss": 0.2634, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.8159255429162358, |
|
"grad_norm": 0.45837461948394775, |
|
"learning_rate": 2.1686008728840301e-07, |
|
"loss": 0.2525, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.8200620475698035, |
|
"grad_norm": 0.4396543800830841, |
|
"learning_rate": 2.0696022126293126e-07, |
|
"loss": 0.2374, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.8241985522233712, |
|
"grad_norm": 0.4914761483669281, |
|
"learning_rate": 1.9728685965070604e-07, |
|
"loss": 0.2992, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.828335056876939, |
|
"grad_norm": 0.5126286745071411, |
|
"learning_rate": 1.8784045958523623e-07, |
|
"loss": 0.2795, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.8324715615305067, |
|
"grad_norm": 0.44213420152664185, |
|
"learning_rate": 1.786214674745218e-07, |
|
"loss": 0.2247, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.8366080661840745, |
|
"grad_norm": 0.4569559693336487, |
|
"learning_rate": 1.6963031897995863e-07, |
|
"loss": 0.2451, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.8407445708376422, |
|
"grad_norm": 0.4845653474330902, |
|
"learning_rate": 1.6086743899575042e-07, |
|
"loss": 0.2818, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.84488107549121, |
|
"grad_norm": 0.4564604163169861, |
|
"learning_rate": 1.523332416288259e-07, |
|
"loss": 0.2539, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.8490175801447777, |
|
"grad_norm": 0.4548117518424988, |
|
"learning_rate": 1.4402813017927396e-07, |
|
"loss": 0.2554, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.8531540847983454, |
|
"grad_norm": 0.4759480655193329, |
|
"learning_rate": 1.3595249712128334e-07, |
|
"loss": 0.2661, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.8572905894519132, |
|
"grad_norm": 0.46541112661361694, |
|
"learning_rate": 1.28106724084594e-07, |
|
"loss": 0.2486, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.861427094105481, |
|
"grad_norm": 0.4635773003101349, |
|
"learning_rate": 1.2049118183646403e-07, |
|
"loss": 0.2653, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.8655635987590486, |
|
"grad_norm": 0.44061151146888733, |
|
"learning_rate": 1.1310623026414891e-07, |
|
"loss": 0.2255, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.8697001034126164, |
|
"grad_norm": 0.45572927594184875, |
|
"learning_rate": 1.059522183578926e-07, |
|
"loss": 0.2533, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.8738366080661841, |
|
"grad_norm": 0.4822574853897095, |
|
"learning_rate": 9.902948419443669e-08, |
|
"loss": 0.2767, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.8779731127197516, |
|
"grad_norm": 0.4398654103279114, |
|
"learning_rate": 9.233835492104326e-08, |
|
"loss": 0.2492, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.8821096173733194, |
|
"grad_norm": 0.4548628032207489, |
|
"learning_rate": 8.587914674003384e-08, |
|
"loss": 0.254, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.8862461220268871, |
|
"grad_norm": 0.45040181279182434, |
|
"learning_rate": 7.965216489384919e-08, |
|
"loss": 0.2721, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.8903826266804549, |
|
"grad_norm": 0.47080284357070923, |
|
"learning_rate": 7.365770365062308e-08, |
|
"loss": 0.2718, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.8945191313340226, |
|
"grad_norm": 0.48404160141944885, |
|
"learning_rate": 6.789604629027614e-08, |
|
"loss": 0.2924, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.8986556359875904, |
|
"grad_norm": 0.46306654810905457, |
|
"learning_rate": 6.236746509112824e-08, |
|
"loss": 0.2531, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.902792140641158, |
|
"grad_norm": 0.4330954849720001, |
|
"learning_rate": 5.707222131703216e-08, |
|
"loss": 0.2388, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.9069286452947258, |
|
"grad_norm": 0.46021175384521484, |
|
"learning_rate": 5.201056520502734e-08, |
|
"loss": 0.2468, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.9110651499482936, |
|
"grad_norm": 0.5022516250610352, |
|
"learning_rate": 4.718273595351486e-08, |
|
"loss": 0.263, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.9152016546018613, |
|
"grad_norm": 0.47739377617836, |
|
"learning_rate": 4.25889617109515e-08, |
|
"loss": 0.2718, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.919338159255429, |
|
"grad_norm": 0.4588397741317749, |
|
"learning_rate": 3.8229459565070074e-08, |
|
"loss": 0.2412, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.9234746639089968, |
|
"grad_norm": 0.4719136953353882, |
|
"learning_rate": 3.410443553262033e-08, |
|
"loss": 0.2722, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.9276111685625645, |
|
"grad_norm": 0.4567975401878357, |
|
"learning_rate": 3.0214084549632925e-08, |
|
"loss": 0.2536, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.9317476732161323, |
|
"grad_norm": 0.4981779158115387, |
|
"learning_rate": 2.6558590462207322e-08, |
|
"loss": 0.27, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.9358841778697, |
|
"grad_norm": 0.4779011309146881, |
|
"learning_rate": 2.3138126017822614e-08, |
|
"loss": 0.2707, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.9400206825232678, |
|
"grad_norm": 0.4619957506656647, |
|
"learning_rate": 1.99528528571763e-08, |
|
"loss": 0.2516, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.9441571871768355, |
|
"grad_norm": 0.47019270062446594, |
|
"learning_rate": 1.7002921506544812e-08, |
|
"loss": 0.2762, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.9482936918304032, |
|
"grad_norm": 0.48734498023986816, |
|
"learning_rate": 1.4288471370669244e-08, |
|
"loss": 0.2779, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.952430196483971, |
|
"grad_norm": 0.5020056366920471, |
|
"learning_rate": 1.1809630726167808e-08, |
|
"loss": 0.2731, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.9565667011375387, |
|
"grad_norm": 0.4687701165676117, |
|
"learning_rate": 9.566516715474594e-09, |
|
"loss": 0.2584, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.9607032057911065, |
|
"grad_norm": 0.4735799729824066, |
|
"learning_rate": 7.559235341302872e-09, |
|
"loss": 0.2663, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.9648397104446742, |
|
"grad_norm": 0.4657973349094391, |
|
"learning_rate": 5.787881461636891e-09, |
|
"loss": 0.2597, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.968976215098242, |
|
"grad_norm": 0.43754643201828003, |
|
"learning_rate": 4.252538785248228e-09, |
|
"loss": 0.2198, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.9731127197518097, |
|
"grad_norm": 0.45479777455329895, |
|
"learning_rate": 2.9532798677395226e-09, |
|
"loss": 0.2456, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.9772492244053774, |
|
"grad_norm": 0.4745938181877136, |
|
"learning_rate": 1.8901661081172084e-09, |
|
"loss": 0.2719, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.9813857290589452, |
|
"grad_norm": 0.4496646225452423, |
|
"learning_rate": 1.0632477458888401e-09, |
|
"loss": 0.2545, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.985522233712513, |
|
"grad_norm": 0.5044782757759094, |
|
"learning_rate": 4.725638586894344e-10, |
|
"loss": 0.2904, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.9896587383660806, |
|
"grad_norm": 0.45781707763671875, |
|
"learning_rate": 1.1814236043405924e-10, |
|
"loss": 0.2429, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.9937952430196484, |
|
"grad_norm": 0.4693934917449951, |
|
"learning_rate": 0.0, |
|
"loss": 0.2602, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.9937952430196484, |
|
"step": 482, |
|
"total_flos": 3.6089290785072087e+18, |
|
"train_loss": 0.3119487636316861, |
|
"train_runtime": 2571.753, |
|
"train_samples_per_second": 24.051, |
|
"train_steps_per_second": 0.187 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 482, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.6089290785072087e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|