|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998222222222222, |
|
"eval_steps": 5000, |
|
"global_step": 4218, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0023703703703703703, |
|
"grad_norm": 0.542885959148407, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.4997, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004740740740740741, |
|
"grad_norm": 0.44538265466690063, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.4925, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0071111111111111115, |
|
"grad_norm": 0.4945567548274994, |
|
"learning_rate": 3e-06, |
|
"loss": 1.4985, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009481481481481481, |
|
"grad_norm": 0.4560663402080536, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.4031, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011851851851851851, |
|
"grad_norm": 0.4691298007965088, |
|
"learning_rate": 5e-06, |
|
"loss": 1.4175, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014222222222222223, |
|
"grad_norm": 0.44202300906181335, |
|
"learning_rate": 6e-06, |
|
"loss": 1.4337, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016592592592592593, |
|
"grad_norm": 0.5069476366043091, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 1.4629, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.018962962962962963, |
|
"grad_norm": 0.4806945025920868, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.5706, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021333333333333333, |
|
"grad_norm": 0.5269841551780701, |
|
"learning_rate": 9e-06, |
|
"loss": 1.5625, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.023703703703703703, |
|
"grad_norm": 0.37831586599349976, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4083, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026074074074074072, |
|
"grad_norm": 0.442981094121933, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 1.3799, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028444444444444446, |
|
"grad_norm": 0.47675761580467224, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.5356, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.030814814814814816, |
|
"grad_norm": 0.5033993721008301, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 1.504, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.033185185185185186, |
|
"grad_norm": 0.4628155827522278, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 1.3474, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.035555555555555556, |
|
"grad_norm": 0.41637757420539856, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.4352, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.037925925925925925, |
|
"grad_norm": 0.5029244422912598, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.3224, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040296296296296295, |
|
"grad_norm": 0.6434731483459473, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 1.5611, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.042666666666666665, |
|
"grad_norm": 0.42424359917640686, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.4191, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.045037037037037035, |
|
"grad_norm": 0.4729703962802887, |
|
"learning_rate": 1.9e-05, |
|
"loss": 1.3284, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 0.48806190490722656, |
|
"learning_rate": 2e-05, |
|
"loss": 1.48, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.049777777777777775, |
|
"grad_norm": 0.4987320303916931, |
|
"learning_rate": 2.1e-05, |
|
"loss": 1.4535, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052148148148148145, |
|
"grad_norm": 0.46912866830825806, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.4846, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05451851851851852, |
|
"grad_norm": 0.4369196593761444, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 1.5564, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05688888888888889, |
|
"grad_norm": 0.48074963688850403, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.3142, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05925925925925926, |
|
"grad_norm": 0.450253427028656, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.3877, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06162962962962963, |
|
"grad_norm": 0.4517356753349304, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 1.3969, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 0.47781577706336975, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 1.5352, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06637037037037037, |
|
"grad_norm": 0.5579633712768555, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 1.5436, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06874074074074074, |
|
"grad_norm": 0.4838034510612488, |
|
"learning_rate": 2.9e-05, |
|
"loss": 1.3564, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07111111111111111, |
|
"grad_norm": 0.5685828328132629, |
|
"learning_rate": 3e-05, |
|
"loss": 1.4429, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07348148148148148, |
|
"grad_norm": 0.5230541229248047, |
|
"learning_rate": 3.1e-05, |
|
"loss": 1.3933, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07585185185185185, |
|
"grad_norm": 0.45525529980659485, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 1.4224, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07822222222222222, |
|
"grad_norm": 0.47926583886146545, |
|
"learning_rate": 3.3e-05, |
|
"loss": 1.4466, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08059259259259259, |
|
"grad_norm": 0.38689500093460083, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 1.5105, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08296296296296296, |
|
"grad_norm": 0.4488411843776703, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.3392, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08533333333333333, |
|
"grad_norm": 0.5381152033805847, |
|
"learning_rate": 3.6e-05, |
|
"loss": 1.375, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.0877037037037037, |
|
"grad_norm": 0.5788478255271912, |
|
"learning_rate": 3.7e-05, |
|
"loss": 1.3522, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09007407407407407, |
|
"grad_norm": 0.501133143901825, |
|
"learning_rate": 3.8e-05, |
|
"loss": 1.3294, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09244444444444444, |
|
"grad_norm": 0.5300689935684204, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 1.5623, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 0.5409078001976013, |
|
"learning_rate": 4e-05, |
|
"loss": 1.4098, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09718518518518518, |
|
"grad_norm": 0.5598166584968567, |
|
"learning_rate": 4.1e-05, |
|
"loss": 1.4104, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09955555555555555, |
|
"grad_norm": 0.5656659603118896, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.3782, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10192592592592592, |
|
"grad_norm": 0.5094364881515503, |
|
"learning_rate": 4.3e-05, |
|
"loss": 1.6233, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10429629629629629, |
|
"grad_norm": 0.5540050268173218, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 1.512, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10666666666666667, |
|
"grad_norm": 0.5693063139915466, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.641, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10903703703703704, |
|
"grad_norm": 0.5556958913803101, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.4532, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11140740740740741, |
|
"grad_norm": 0.5052928924560547, |
|
"learning_rate": 4.7e-05, |
|
"loss": 1.4863, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11377777777777778, |
|
"grad_norm": 0.5321051478385925, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.353, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11614814814814815, |
|
"grad_norm": 0.658074140548706, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.563, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.11851851851851852, |
|
"grad_norm": 0.4817732870578766, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5347, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12088888888888889, |
|
"grad_norm": 0.6876205801963806, |
|
"learning_rate": 4.9865519096288324e-05, |
|
"loss": 1.6524, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12325925925925926, |
|
"grad_norm": 0.5238626003265381, |
|
"learning_rate": 4.973103819257665e-05, |
|
"loss": 1.5766, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12562962962962962, |
|
"grad_norm": 0.4588116705417633, |
|
"learning_rate": 4.959655728886498e-05, |
|
"loss": 1.4258, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 0.529692530632019, |
|
"learning_rate": 4.946207638515331e-05, |
|
"loss": 1.5574, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13037037037037036, |
|
"grad_norm": 0.475524365901947, |
|
"learning_rate": 4.932759548144163e-05, |
|
"loss": 1.377, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.13274074074074074, |
|
"grad_norm": 0.48722413182258606, |
|
"learning_rate": 4.919311457772996e-05, |
|
"loss": 1.3156, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1351111111111111, |
|
"grad_norm": 0.6309683322906494, |
|
"learning_rate": 4.905863367401829e-05, |
|
"loss": 1.5581, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13748148148148148, |
|
"grad_norm": 0.5029247999191284, |
|
"learning_rate": 4.892415277030662e-05, |
|
"loss": 1.5968, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.13985185185185184, |
|
"grad_norm": 0.421310156583786, |
|
"learning_rate": 4.878967186659494e-05, |
|
"loss": 1.4881, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 0.5082572102546692, |
|
"learning_rate": 4.865519096288327e-05, |
|
"loss": 1.6574, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1445925925925926, |
|
"grad_norm": 0.5082793235778809, |
|
"learning_rate": 4.85207100591716e-05, |
|
"loss": 1.3606, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14696296296296296, |
|
"grad_norm": 0.5169036388397217, |
|
"learning_rate": 4.838622915545993e-05, |
|
"loss": 1.4457, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.14933333333333335, |
|
"grad_norm": 0.5509771704673767, |
|
"learning_rate": 4.825174825174825e-05, |
|
"loss": 1.4754, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.1517037037037037, |
|
"grad_norm": 0.630851149559021, |
|
"learning_rate": 4.811726734803658e-05, |
|
"loss": 1.5453, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15407407407407409, |
|
"grad_norm": 0.5343595147132874, |
|
"learning_rate": 4.798278644432491e-05, |
|
"loss": 1.4822, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15644444444444444, |
|
"grad_norm": 0.5070016980171204, |
|
"learning_rate": 4.7848305540613237e-05, |
|
"loss": 1.3835, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.15881481481481483, |
|
"grad_norm": 0.6097332835197449, |
|
"learning_rate": 4.771382463690156e-05, |
|
"loss": 1.546, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16118518518518518, |
|
"grad_norm": 0.5894319415092468, |
|
"learning_rate": 4.757934373318989e-05, |
|
"loss": 1.3605, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16355555555555557, |
|
"grad_norm": 0.4879942238330841, |
|
"learning_rate": 4.7444862829478216e-05, |
|
"loss": 1.4391, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16592592592592592, |
|
"grad_norm": 0.49390801787376404, |
|
"learning_rate": 4.7310381925766545e-05, |
|
"loss": 1.2228, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1682962962962963, |
|
"grad_norm": 0.6193021535873413, |
|
"learning_rate": 4.717590102205487e-05, |
|
"loss": 1.7474, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.17066666666666666, |
|
"grad_norm": 0.4410654604434967, |
|
"learning_rate": 4.7041420118343196e-05, |
|
"loss": 1.4255, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.17303703703703704, |
|
"grad_norm": 0.5690642595291138, |
|
"learning_rate": 4.6906939214631525e-05, |
|
"loss": 1.4252, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.1754074074074074, |
|
"grad_norm": 0.4561966359615326, |
|
"learning_rate": 4.6772458310919854e-05, |
|
"loss": 1.4214, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17777777777777778, |
|
"grad_norm": 0.5871363282203674, |
|
"learning_rate": 4.6637977407208176e-05, |
|
"loss": 1.5272, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18014814814814814, |
|
"grad_norm": 0.5293774604797363, |
|
"learning_rate": 4.6503496503496505e-05, |
|
"loss": 1.5507, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.18251851851851852, |
|
"grad_norm": 0.483826220035553, |
|
"learning_rate": 4.636901559978483e-05, |
|
"loss": 1.4058, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.18488888888888888, |
|
"grad_norm": 0.551902174949646, |
|
"learning_rate": 4.623453469607316e-05, |
|
"loss": 1.3707, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18725925925925926, |
|
"grad_norm": 0.5492023825645447, |
|
"learning_rate": 4.6100053792361484e-05, |
|
"loss": 1.4901, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 0.5409772396087646, |
|
"learning_rate": 4.596557288864981e-05, |
|
"loss": 1.6356, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.192, |
|
"grad_norm": 0.5469648838043213, |
|
"learning_rate": 4.583109198493814e-05, |
|
"loss": 1.4574, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.19437037037037036, |
|
"grad_norm": 0.5523713827133179, |
|
"learning_rate": 4.569661108122647e-05, |
|
"loss": 1.5304, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.19674074074074074, |
|
"grad_norm": 0.4884456992149353, |
|
"learning_rate": 4.556213017751479e-05, |
|
"loss": 1.4173, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.1991111111111111, |
|
"grad_norm": 0.5865374803543091, |
|
"learning_rate": 4.542764927380312e-05, |
|
"loss": 1.4178, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20148148148148148, |
|
"grad_norm": 0.5571750402450562, |
|
"learning_rate": 4.529316837009145e-05, |
|
"loss": 1.5356, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20385185185185184, |
|
"grad_norm": 0.567616879940033, |
|
"learning_rate": 4.515868746637978e-05, |
|
"loss": 1.4496, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20622222222222222, |
|
"grad_norm": 0.5077497959136963, |
|
"learning_rate": 4.50242065626681e-05, |
|
"loss": 1.4757, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.20859259259259258, |
|
"grad_norm": 0.5118802189826965, |
|
"learning_rate": 4.488972565895643e-05, |
|
"loss": 1.3845, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21096296296296296, |
|
"grad_norm": 0.43292248249053955, |
|
"learning_rate": 4.475524475524476e-05, |
|
"loss": 1.421, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21333333333333335, |
|
"grad_norm": 0.5365243554115295, |
|
"learning_rate": 4.462076385153308e-05, |
|
"loss": 1.4586, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2157037037037037, |
|
"grad_norm": 0.4912022054195404, |
|
"learning_rate": 4.448628294782141e-05, |
|
"loss": 1.5385, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2180740740740741, |
|
"grad_norm": 0.5855193734169006, |
|
"learning_rate": 4.435180204410974e-05, |
|
"loss": 1.5718, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22044444444444444, |
|
"grad_norm": 0.5224360227584839, |
|
"learning_rate": 4.421732114039807e-05, |
|
"loss": 1.3853, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.22281481481481483, |
|
"grad_norm": 0.4283509850502014, |
|
"learning_rate": 4.408284023668639e-05, |
|
"loss": 1.3758, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22518518518518518, |
|
"grad_norm": 0.44806018471717834, |
|
"learning_rate": 4.394835933297472e-05, |
|
"loss": 1.4089, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.22755555555555557, |
|
"grad_norm": 0.4234403967857361, |
|
"learning_rate": 4.381387842926305e-05, |
|
"loss": 1.321, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.22992592592592592, |
|
"grad_norm": 0.45570847392082214, |
|
"learning_rate": 4.3679397525551376e-05, |
|
"loss": 1.384, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2322962962962963, |
|
"grad_norm": 0.6098482608795166, |
|
"learning_rate": 4.35449166218397e-05, |
|
"loss": 1.5165, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.23466666666666666, |
|
"grad_norm": 0.47981974482536316, |
|
"learning_rate": 4.341043571812803e-05, |
|
"loss": 1.4827, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 0.567845344543457, |
|
"learning_rate": 4.3275954814416356e-05, |
|
"loss": 1.4494, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2394074074074074, |
|
"grad_norm": 0.5508958697319031, |
|
"learning_rate": 4.3141473910704685e-05, |
|
"loss": 1.5681, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24177777777777779, |
|
"grad_norm": 0.6119508743286133, |
|
"learning_rate": 4.300699300699301e-05, |
|
"loss": 1.4522, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24414814814814814, |
|
"grad_norm": 0.654909074306488, |
|
"learning_rate": 4.2872512103281336e-05, |
|
"loss": 1.6317, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.24651851851851853, |
|
"grad_norm": 0.5818801522254944, |
|
"learning_rate": 4.2738031199569664e-05, |
|
"loss": 1.4917, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.24888888888888888, |
|
"grad_norm": 0.5295186638832092, |
|
"learning_rate": 4.260355029585799e-05, |
|
"loss": 1.6393, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.25125925925925924, |
|
"grad_norm": 0.5558478832244873, |
|
"learning_rate": 4.2469069392146315e-05, |
|
"loss": 1.5607, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.25362962962962965, |
|
"grad_norm": 0.5266067385673523, |
|
"learning_rate": 4.2334588488434644e-05, |
|
"loss": 1.4366, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.256, |
|
"grad_norm": 0.4949641823768616, |
|
"learning_rate": 4.220010758472297e-05, |
|
"loss": 1.4392, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.25837037037037036, |
|
"grad_norm": 0.48148399591445923, |
|
"learning_rate": 4.20656266810113e-05, |
|
"loss": 1.5301, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.2607407407407407, |
|
"grad_norm": 0.5564059615135193, |
|
"learning_rate": 4.1931145777299624e-05, |
|
"loss": 1.3645, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.26311111111111113, |
|
"grad_norm": 0.6419994235038757, |
|
"learning_rate": 4.179666487358795e-05, |
|
"loss": 1.4543, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2654814814814815, |
|
"grad_norm": 0.5205827355384827, |
|
"learning_rate": 4.166218396987628e-05, |
|
"loss": 1.5358, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.26785185185185184, |
|
"grad_norm": 0.45430988073349, |
|
"learning_rate": 4.152770306616461e-05, |
|
"loss": 1.5483, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.2702222222222222, |
|
"grad_norm": 0.5467645525932312, |
|
"learning_rate": 4.139322216245293e-05, |
|
"loss": 1.4702, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2725925925925926, |
|
"grad_norm": 0.47259363532066345, |
|
"learning_rate": 4.125874125874126e-05, |
|
"loss": 1.451, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.27496296296296296, |
|
"grad_norm": 0.48951438069343567, |
|
"learning_rate": 4.112426035502959e-05, |
|
"loss": 1.4095, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2773333333333333, |
|
"grad_norm": 0.601701021194458, |
|
"learning_rate": 4.098977945131792e-05, |
|
"loss": 1.6848, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.2797037037037037, |
|
"grad_norm": 0.5379857420921326, |
|
"learning_rate": 4.085529854760624e-05, |
|
"loss": 1.3598, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2820740740740741, |
|
"grad_norm": 0.6498066186904907, |
|
"learning_rate": 4.072081764389457e-05, |
|
"loss": 1.4655, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 0.5294344425201416, |
|
"learning_rate": 4.05863367401829e-05, |
|
"loss": 1.3851, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2868148148148148, |
|
"grad_norm": 0.5410310626029968, |
|
"learning_rate": 4.045185583647123e-05, |
|
"loss": 1.5091, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2891851851851852, |
|
"grad_norm": 0.5395278334617615, |
|
"learning_rate": 4.031737493275955e-05, |
|
"loss": 1.3487, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29155555555555557, |
|
"grad_norm": 0.637909471988678, |
|
"learning_rate": 4.018289402904788e-05, |
|
"loss": 1.5848, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.2939259259259259, |
|
"grad_norm": 0.4254130721092224, |
|
"learning_rate": 4.004841312533621e-05, |
|
"loss": 1.4186, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 0.5799821019172668, |
|
"learning_rate": 3.9913932221624536e-05, |
|
"loss": 1.4289, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2986666666666667, |
|
"grad_norm": 0.43250229954719543, |
|
"learning_rate": 3.977945131791286e-05, |
|
"loss": 1.5458, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30103703703703705, |
|
"grad_norm": 0.4722803831100464, |
|
"learning_rate": 3.964497041420119e-05, |
|
"loss": 1.4716, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.3034074074074074, |
|
"grad_norm": 0.5686700940132141, |
|
"learning_rate": 3.9510489510489516e-05, |
|
"loss": 1.6452, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30577777777777776, |
|
"grad_norm": 0.493028461933136, |
|
"learning_rate": 3.9376008606777844e-05, |
|
"loss": 1.4516, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.30814814814814817, |
|
"grad_norm": 0.5100602507591248, |
|
"learning_rate": 3.9241527703066166e-05, |
|
"loss": 1.3366, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3105185185185185, |
|
"grad_norm": 0.6535771489143372, |
|
"learning_rate": 3.910704679935449e-05, |
|
"loss": 1.4312, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.3128888888888889, |
|
"grad_norm": 0.48823079466819763, |
|
"learning_rate": 3.8972565895642824e-05, |
|
"loss": 1.4888, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.31525925925925924, |
|
"grad_norm": 0.4459994435310364, |
|
"learning_rate": 3.8838084991931146e-05, |
|
"loss": 1.4637, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.31762962962962965, |
|
"grad_norm": 0.5344628691673279, |
|
"learning_rate": 3.8703604088219475e-05, |
|
"loss": 1.6118, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.44893643260002136, |
|
"learning_rate": 3.85691231845078e-05, |
|
"loss": 1.4521, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32237037037037036, |
|
"grad_norm": 0.4381811022758484, |
|
"learning_rate": 3.8434642280796126e-05, |
|
"loss": 1.5751, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3247407407407407, |
|
"grad_norm": 0.5791207551956177, |
|
"learning_rate": 3.8300161377084455e-05, |
|
"loss": 1.5563, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.32711111111111113, |
|
"grad_norm": 0.5584151148796082, |
|
"learning_rate": 3.8165680473372784e-05, |
|
"loss": 1.3673, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3294814814814815, |
|
"grad_norm": 0.6110686659812927, |
|
"learning_rate": 3.8031199569661106e-05, |
|
"loss": 1.5949, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 0.5519852638244629, |
|
"learning_rate": 3.7896718665949434e-05, |
|
"loss": 1.5133, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3342222222222222, |
|
"grad_norm": 0.5732788443565369, |
|
"learning_rate": 3.776223776223776e-05, |
|
"loss": 1.4601, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3365925925925926, |
|
"grad_norm": 0.5789920091629028, |
|
"learning_rate": 3.762775685852609e-05, |
|
"loss": 1.6461, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.33896296296296297, |
|
"grad_norm": 0.592776358127594, |
|
"learning_rate": 3.7493275954814414e-05, |
|
"loss": 1.5558, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3413333333333333, |
|
"grad_norm": 0.5435842871665955, |
|
"learning_rate": 3.735879505110274e-05, |
|
"loss": 1.5095, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3437037037037037, |
|
"grad_norm": 0.6474444270133972, |
|
"learning_rate": 3.722431414739107e-05, |
|
"loss": 1.429, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3460740740740741, |
|
"grad_norm": 0.4926964342594147, |
|
"learning_rate": 3.70898332436794e-05, |
|
"loss": 1.4543, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.34844444444444445, |
|
"grad_norm": 0.5748719573020935, |
|
"learning_rate": 3.695535233996772e-05, |
|
"loss": 1.5844, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3508148148148148, |
|
"grad_norm": 0.5535377264022827, |
|
"learning_rate": 3.682087143625605e-05, |
|
"loss": 1.5348, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.35318518518518516, |
|
"grad_norm": 0.4644632339477539, |
|
"learning_rate": 3.668639053254438e-05, |
|
"loss": 1.3157, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.35555555555555557, |
|
"grad_norm": 0.5858569145202637, |
|
"learning_rate": 3.655190962883271e-05, |
|
"loss": 1.4678, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3579259259259259, |
|
"grad_norm": 0.5428529381752014, |
|
"learning_rate": 3.641742872512103e-05, |
|
"loss": 1.5561, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3602962962962963, |
|
"grad_norm": 0.5255948901176453, |
|
"learning_rate": 3.628294782140936e-05, |
|
"loss": 1.5802, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3626666666666667, |
|
"grad_norm": 0.534137487411499, |
|
"learning_rate": 3.614846691769769e-05, |
|
"loss": 1.5142, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.36503703703703705, |
|
"grad_norm": 0.5558648705482483, |
|
"learning_rate": 3.601398601398602e-05, |
|
"loss": 1.5613, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3674074074074074, |
|
"grad_norm": 0.5890410542488098, |
|
"learning_rate": 3.587950511027434e-05, |
|
"loss": 1.4679, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.36977777777777776, |
|
"grad_norm": 0.4830753803253174, |
|
"learning_rate": 3.574502420656267e-05, |
|
"loss": 1.4698, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.3721481481481482, |
|
"grad_norm": 0.5191047787666321, |
|
"learning_rate": 3.5610543302851e-05, |
|
"loss": 1.528, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.37451851851851853, |
|
"grad_norm": 0.5316727161407471, |
|
"learning_rate": 3.5476062399139326e-05, |
|
"loss": 1.4427, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3768888888888889, |
|
"grad_norm": 0.553815484046936, |
|
"learning_rate": 3.534158149542765e-05, |
|
"loss": 1.548, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 0.47779569029808044, |
|
"learning_rate": 3.520710059171598e-05, |
|
"loss": 1.4533, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.38162962962962965, |
|
"grad_norm": 0.5595371127128601, |
|
"learning_rate": 3.5072619688004306e-05, |
|
"loss": 1.4503, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.384, |
|
"grad_norm": 0.5166143774986267, |
|
"learning_rate": 3.4938138784292635e-05, |
|
"loss": 1.3783, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.38637037037037036, |
|
"grad_norm": 0.6249716877937317, |
|
"learning_rate": 3.480365788058096e-05, |
|
"loss": 1.4494, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3887407407407407, |
|
"grad_norm": 0.484937846660614, |
|
"learning_rate": 3.4669176976869286e-05, |
|
"loss": 1.421, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.39111111111111113, |
|
"grad_norm": 0.5464750528335571, |
|
"learning_rate": 3.4534696073157615e-05, |
|
"loss": 1.266, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3934814814814815, |
|
"grad_norm": 0.48874956369400024, |
|
"learning_rate": 3.440021516944594e-05, |
|
"loss": 1.5355, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.39585185185185184, |
|
"grad_norm": 0.47555652260780334, |
|
"learning_rate": 3.4265734265734265e-05, |
|
"loss": 1.5799, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.3982222222222222, |
|
"grad_norm": 0.49769505858421326, |
|
"learning_rate": 3.4131253362022594e-05, |
|
"loss": 1.3748, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.4005925925925926, |
|
"grad_norm": 0.4664982259273529, |
|
"learning_rate": 3.399677245831092e-05, |
|
"loss": 1.4894, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40296296296296297, |
|
"grad_norm": 0.5216518044471741, |
|
"learning_rate": 3.3862291554599245e-05, |
|
"loss": 1.4645, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4053333333333333, |
|
"grad_norm": 0.6157680749893188, |
|
"learning_rate": 3.3727810650887574e-05, |
|
"loss": 1.4002, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4077037037037037, |
|
"grad_norm": 0.5828937888145447, |
|
"learning_rate": 3.35933297471759e-05, |
|
"loss": 1.4816, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.4100740740740741, |
|
"grad_norm": 0.5792407989501953, |
|
"learning_rate": 3.345884884346423e-05, |
|
"loss": 1.3557, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41244444444444445, |
|
"grad_norm": 0.4985092580318451, |
|
"learning_rate": 3.3324367939752554e-05, |
|
"loss": 1.4572, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.4148148148148148, |
|
"grad_norm": 0.5901199579238892, |
|
"learning_rate": 3.318988703604088e-05, |
|
"loss": 1.5292, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.41718518518518516, |
|
"grad_norm": 0.5087295174598694, |
|
"learning_rate": 3.305540613232921e-05, |
|
"loss": 1.3405, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.41955555555555557, |
|
"grad_norm": 0.5455463528633118, |
|
"learning_rate": 3.292092522861754e-05, |
|
"loss": 1.4262, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4219259259259259, |
|
"grad_norm": 0.46563345193862915, |
|
"learning_rate": 3.278644432490586e-05, |
|
"loss": 1.4328, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4242962962962963, |
|
"grad_norm": 0.545524537563324, |
|
"learning_rate": 3.265196342119419e-05, |
|
"loss": 1.4826, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 0.4182009994983673, |
|
"learning_rate": 3.251748251748252e-05, |
|
"loss": 1.4909, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.42903703703703705, |
|
"grad_norm": 0.39127054810523987, |
|
"learning_rate": 3.238300161377085e-05, |
|
"loss": 1.4389, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4314074074074074, |
|
"grad_norm": 0.46866652369499207, |
|
"learning_rate": 3.224852071005917e-05, |
|
"loss": 1.3992, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43377777777777776, |
|
"grad_norm": 0.5216823816299438, |
|
"learning_rate": 3.21140398063475e-05, |
|
"loss": 1.3525, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.4361481481481482, |
|
"grad_norm": 0.49909713864326477, |
|
"learning_rate": 3.197955890263583e-05, |
|
"loss": 1.5491, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.43851851851851853, |
|
"grad_norm": 0.4957892596721649, |
|
"learning_rate": 3.184507799892416e-05, |
|
"loss": 1.4723, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4408888888888889, |
|
"grad_norm": 0.518822431564331, |
|
"learning_rate": 3.171059709521248e-05, |
|
"loss": 1.535, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44325925925925924, |
|
"grad_norm": 0.6380564570426941, |
|
"learning_rate": 3.157611619150081e-05, |
|
"loss": 1.5652, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.44562962962962965, |
|
"grad_norm": 0.49906617403030396, |
|
"learning_rate": 3.144163528778914e-05, |
|
"loss": 1.3624, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.448, |
|
"grad_norm": 0.5234742760658264, |
|
"learning_rate": 3.1307154384077466e-05, |
|
"loss": 1.4192, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45037037037037037, |
|
"grad_norm": 0.5430870056152344, |
|
"learning_rate": 3.117267348036579e-05, |
|
"loss": 1.4674, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4527407407407407, |
|
"grad_norm": 0.5488291382789612, |
|
"learning_rate": 3.103819257665412e-05, |
|
"loss": 1.4759, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.45511111111111113, |
|
"grad_norm": 0.4655541181564331, |
|
"learning_rate": 3.0903711672942446e-05, |
|
"loss": 1.4559, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4574814814814815, |
|
"grad_norm": 0.442128449678421, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 1.3129, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.45985185185185184, |
|
"grad_norm": 0.5909174084663391, |
|
"learning_rate": 3.0634749865519096e-05, |
|
"loss": 1.5915, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4622222222222222, |
|
"grad_norm": 0.41102078557014465, |
|
"learning_rate": 3.0500268961807425e-05, |
|
"loss": 1.3717, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4645925925925926, |
|
"grad_norm": 0.5467662811279297, |
|
"learning_rate": 3.036578805809575e-05, |
|
"loss": 1.4838, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.46696296296296297, |
|
"grad_norm": 0.6555057764053345, |
|
"learning_rate": 3.023130715438408e-05, |
|
"loss": 1.4289, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4693333333333333, |
|
"grad_norm": 0.4430755078792572, |
|
"learning_rate": 3.0096826250672405e-05, |
|
"loss": 1.5556, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.4717037037037037, |
|
"grad_norm": 0.48016276955604553, |
|
"learning_rate": 2.9962345346960734e-05, |
|
"loss": 1.5461, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 0.5283887982368469, |
|
"learning_rate": 2.982786444324906e-05, |
|
"loss": 1.4598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.47644444444444445, |
|
"grad_norm": 0.5336430668830872, |
|
"learning_rate": 2.9693383539537388e-05, |
|
"loss": 1.5837, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4788148148148148, |
|
"grad_norm": 0.39814135432243347, |
|
"learning_rate": 2.9558902635825713e-05, |
|
"loss": 1.3496, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.48118518518518516, |
|
"grad_norm": 0.6095125079154968, |
|
"learning_rate": 2.9424421732114042e-05, |
|
"loss": 1.5925, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.48355555555555557, |
|
"grad_norm": 0.5880560874938965, |
|
"learning_rate": 2.9289940828402368e-05, |
|
"loss": 1.3603, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.48592592592592593, |
|
"grad_norm": 0.5470516085624695, |
|
"learning_rate": 2.9155459924690697e-05, |
|
"loss": 1.5022, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4882962962962963, |
|
"grad_norm": 0.45742228627204895, |
|
"learning_rate": 2.9020979020979022e-05, |
|
"loss": 1.4699, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.49066666666666664, |
|
"grad_norm": 0.5314275622367859, |
|
"learning_rate": 2.888649811726735e-05, |
|
"loss": 1.5043, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.49303703703703705, |
|
"grad_norm": 0.5969755053520203, |
|
"learning_rate": 2.8752017213555676e-05, |
|
"loss": 1.4709, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.4954074074074074, |
|
"grad_norm": 0.5115885138511658, |
|
"learning_rate": 2.8617536309844002e-05, |
|
"loss": 1.4031, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.49777777777777776, |
|
"grad_norm": 0.5907914042472839, |
|
"learning_rate": 2.848305540613233e-05, |
|
"loss": 1.3509, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5001481481481481, |
|
"grad_norm": 0.48430949449539185, |
|
"learning_rate": 2.8348574502420656e-05, |
|
"loss": 1.4393, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5025185185185185, |
|
"grad_norm": 0.5502893328666687, |
|
"learning_rate": 2.8214093598708985e-05, |
|
"loss": 1.5571, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5048888888888889, |
|
"grad_norm": 0.48268720507621765, |
|
"learning_rate": 2.807961269499731e-05, |
|
"loss": 1.3895, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5072592592592593, |
|
"grad_norm": 0.6141895651817322, |
|
"learning_rate": 2.794513179128564e-05, |
|
"loss": 1.5278, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5096296296296297, |
|
"grad_norm": 0.48447638750076294, |
|
"learning_rate": 2.7810650887573965e-05, |
|
"loss": 1.456, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.512, |
|
"grad_norm": 0.4536721408367157, |
|
"learning_rate": 2.7676169983862293e-05, |
|
"loss": 1.4259, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5143703703703704, |
|
"grad_norm": 0.5519189238548279, |
|
"learning_rate": 2.754168908015062e-05, |
|
"loss": 1.508, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5167407407407407, |
|
"grad_norm": 0.4641801416873932, |
|
"learning_rate": 2.7407208176438948e-05, |
|
"loss": 1.5087, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5191111111111111, |
|
"grad_norm": 0.5566359162330627, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 1.4994, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 0.5316601991653442, |
|
"learning_rate": 2.7138246369015602e-05, |
|
"loss": 1.375, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5238518518518519, |
|
"grad_norm": 0.545514702796936, |
|
"learning_rate": 2.7003765465303927e-05, |
|
"loss": 1.4449, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5262222222222223, |
|
"grad_norm": 0.5452851057052612, |
|
"learning_rate": 2.6869284561592256e-05, |
|
"loss": 1.639, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5285925925925926, |
|
"grad_norm": 0.5291896462440491, |
|
"learning_rate": 2.673480365788058e-05, |
|
"loss": 1.3638, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.530962962962963, |
|
"grad_norm": 0.4708302319049835, |
|
"learning_rate": 2.660032275416891e-05, |
|
"loss": 1.1973, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 0.4936722218990326, |
|
"learning_rate": 2.6465841850457236e-05, |
|
"loss": 1.543, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5357037037037037, |
|
"grad_norm": 0.5722488760948181, |
|
"learning_rate": 2.6331360946745565e-05, |
|
"loss": 1.531, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.538074074074074, |
|
"grad_norm": 0.5386027097702026, |
|
"learning_rate": 2.619688004303389e-05, |
|
"loss": 1.4335, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5404444444444444, |
|
"grad_norm": 0.5803340673446655, |
|
"learning_rate": 2.606239913932222e-05, |
|
"loss": 1.4049, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5428148148148149, |
|
"grad_norm": 0.3970150053501129, |
|
"learning_rate": 2.5927918235610544e-05, |
|
"loss": 1.3923, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5451851851851852, |
|
"grad_norm": 0.45682525634765625, |
|
"learning_rate": 2.5793437331898873e-05, |
|
"loss": 1.4838, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5475555555555556, |
|
"grad_norm": 0.5088069438934326, |
|
"learning_rate": 2.56589564281872e-05, |
|
"loss": 1.4416, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5499259259259259, |
|
"grad_norm": 0.5557109713554382, |
|
"learning_rate": 2.5524475524475528e-05, |
|
"loss": 1.4099, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5522962962962963, |
|
"grad_norm": 0.4954288601875305, |
|
"learning_rate": 2.5389994620763853e-05, |
|
"loss": 1.3828, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5546666666666666, |
|
"grad_norm": 0.5320334434509277, |
|
"learning_rate": 2.5255513717052182e-05, |
|
"loss": 1.3686, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.557037037037037, |
|
"grad_norm": 0.511646032333374, |
|
"learning_rate": 2.5121032813340507e-05, |
|
"loss": 1.4752, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5594074074074074, |
|
"grad_norm": 0.4852311909198761, |
|
"learning_rate": 2.4986551909628833e-05, |
|
"loss": 1.5352, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5617777777777778, |
|
"grad_norm": 0.5558280944824219, |
|
"learning_rate": 2.485207100591716e-05, |
|
"loss": 1.3955, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5641481481481482, |
|
"grad_norm": 0.5369210243225098, |
|
"learning_rate": 2.4717590102205487e-05, |
|
"loss": 1.4265, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5665185185185185, |
|
"grad_norm": 0.5134137868881226, |
|
"learning_rate": 2.4583109198493816e-05, |
|
"loss": 1.3688, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 0.47109952569007874, |
|
"learning_rate": 2.444862829478214e-05, |
|
"loss": 1.4207, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5712592592592592, |
|
"grad_norm": 0.4982026517391205, |
|
"learning_rate": 2.431414739107047e-05, |
|
"loss": 1.4249, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5736296296296296, |
|
"grad_norm": 0.5209967494010925, |
|
"learning_rate": 2.4179666487358796e-05, |
|
"loss": 1.4708, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.576, |
|
"grad_norm": 0.5762905478477478, |
|
"learning_rate": 2.4045185583647124e-05, |
|
"loss": 1.4194, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5783703703703704, |
|
"grad_norm": 0.4918428659439087, |
|
"learning_rate": 2.391070467993545e-05, |
|
"loss": 1.6279, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5807407407407408, |
|
"grad_norm": 0.5050658583641052, |
|
"learning_rate": 2.377622377622378e-05, |
|
"loss": 1.5029, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5831111111111111, |
|
"grad_norm": 0.49715667963027954, |
|
"learning_rate": 2.3641742872512104e-05, |
|
"loss": 1.4228, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5854814814814815, |
|
"grad_norm": 0.4800516664981842, |
|
"learning_rate": 2.3507261968800433e-05, |
|
"loss": 1.3595, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5878518518518518, |
|
"grad_norm": 0.5617285966873169, |
|
"learning_rate": 2.337278106508876e-05, |
|
"loss": 1.5974, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5902222222222222, |
|
"grad_norm": 0.5133258700370789, |
|
"learning_rate": 2.3238300161377087e-05, |
|
"loss": 1.4437, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 0.5644205212593079, |
|
"learning_rate": 2.3103819257665413e-05, |
|
"loss": 1.4633, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5949629629629629, |
|
"grad_norm": 0.5411229133605957, |
|
"learning_rate": 2.296933835395374e-05, |
|
"loss": 1.3376, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.5973333333333334, |
|
"grad_norm": 0.5555963516235352, |
|
"learning_rate": 2.2834857450242067e-05, |
|
"loss": 1.5361, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.5997037037037037, |
|
"grad_norm": 0.48606014251708984, |
|
"learning_rate": 2.2700376546530396e-05, |
|
"loss": 1.4386, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6020740740740741, |
|
"grad_norm": 0.4902474582195282, |
|
"learning_rate": 2.256589564281872e-05, |
|
"loss": 1.4536, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6044444444444445, |
|
"grad_norm": 0.5565341114997864, |
|
"learning_rate": 2.243141473910705e-05, |
|
"loss": 1.4801, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6068148148148148, |
|
"grad_norm": 0.5383167862892151, |
|
"learning_rate": 2.2296933835395375e-05, |
|
"loss": 1.3528, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6091851851851852, |
|
"grad_norm": 0.4610041379928589, |
|
"learning_rate": 2.2162452931683704e-05, |
|
"loss": 1.5063, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6115555555555555, |
|
"grad_norm": 0.5119171142578125, |
|
"learning_rate": 2.202797202797203e-05, |
|
"loss": 1.4559, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6139259259259259, |
|
"grad_norm": 0.46622559428215027, |
|
"learning_rate": 2.189349112426036e-05, |
|
"loss": 1.485, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 0.5610603094100952, |
|
"learning_rate": 2.1759010220548684e-05, |
|
"loss": 1.3863, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6186666666666667, |
|
"grad_norm": 0.5185586214065552, |
|
"learning_rate": 2.162452931683701e-05, |
|
"loss": 1.4574, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.621037037037037, |
|
"grad_norm": 0.5091121196746826, |
|
"learning_rate": 2.1490048413125338e-05, |
|
"loss": 1.3745, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6234074074074074, |
|
"grad_norm": 0.39684295654296875, |
|
"learning_rate": 2.1355567509413664e-05, |
|
"loss": 1.5785, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6257777777777778, |
|
"grad_norm": 0.5499323606491089, |
|
"learning_rate": 2.1221086605701993e-05, |
|
"loss": 1.4926, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6281481481481481, |
|
"grad_norm": 0.448824942111969, |
|
"learning_rate": 2.1086605701990318e-05, |
|
"loss": 1.5199, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6305185185185185, |
|
"grad_norm": 0.5647756457328796, |
|
"learning_rate": 2.0952124798278647e-05, |
|
"loss": 1.493, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6328888888888888, |
|
"grad_norm": 0.5426878929138184, |
|
"learning_rate": 2.0817643894566972e-05, |
|
"loss": 1.4232, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6352592592592593, |
|
"grad_norm": 0.5105384588241577, |
|
"learning_rate": 2.0683162990855298e-05, |
|
"loss": 1.5136, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6376296296296297, |
|
"grad_norm": 0.5488259792327881, |
|
"learning_rate": 2.0548682087143627e-05, |
|
"loss": 1.4877, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.47102248668670654, |
|
"learning_rate": 2.0414201183431952e-05, |
|
"loss": 1.2976, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6423703703703704, |
|
"grad_norm": 0.4708435535430908, |
|
"learning_rate": 2.027972027972028e-05, |
|
"loss": 1.4773, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6447407407407407, |
|
"grad_norm": 0.5073569416999817, |
|
"learning_rate": 2.0145239376008606e-05, |
|
"loss": 1.3551, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6471111111111111, |
|
"grad_norm": 0.4934346377849579, |
|
"learning_rate": 2.0010758472296935e-05, |
|
"loss": 1.2423, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6494814814814814, |
|
"grad_norm": 0.5030198097229004, |
|
"learning_rate": 1.987627756858526e-05, |
|
"loss": 1.4272, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6518518518518519, |
|
"grad_norm": 0.4713825583457947, |
|
"learning_rate": 1.974179666487359e-05, |
|
"loss": 1.4898, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6542222222222223, |
|
"grad_norm": 0.430649995803833, |
|
"learning_rate": 1.9607315761161915e-05, |
|
"loss": 1.3365, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6565925925925926, |
|
"grad_norm": 0.58051598072052, |
|
"learning_rate": 1.9472834857450244e-05, |
|
"loss": 1.4567, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.658962962962963, |
|
"grad_norm": 0.46255138516426086, |
|
"learning_rate": 1.933835395373857e-05, |
|
"loss": 1.5205, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6613333333333333, |
|
"grad_norm": 0.5674681663513184, |
|
"learning_rate": 1.9203873050026898e-05, |
|
"loss": 1.3594, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 0.5063351392745972, |
|
"learning_rate": 1.9069392146315223e-05, |
|
"loss": 1.2901, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.666074074074074, |
|
"grad_norm": 0.4963226914405823, |
|
"learning_rate": 1.8934911242603552e-05, |
|
"loss": 1.4437, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6684444444444444, |
|
"grad_norm": 0.5070900917053223, |
|
"learning_rate": 1.8800430338891878e-05, |
|
"loss": 1.376, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6708148148148149, |
|
"grad_norm": 0.5724377036094666, |
|
"learning_rate": 1.8665949435180206e-05, |
|
"loss": 1.5226, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6731851851851852, |
|
"grad_norm": 0.5261855125427246, |
|
"learning_rate": 1.8531468531468532e-05, |
|
"loss": 1.438, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6755555555555556, |
|
"grad_norm": 0.5292350053787231, |
|
"learning_rate": 1.839698762775686e-05, |
|
"loss": 1.4071, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6779259259259259, |
|
"grad_norm": 0.4596816301345825, |
|
"learning_rate": 1.8262506724045186e-05, |
|
"loss": 1.337, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6802962962962963, |
|
"grad_norm": 0.5225928425788879, |
|
"learning_rate": 1.8128025820333515e-05, |
|
"loss": 1.4363, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.6826666666666666, |
|
"grad_norm": 0.49359938502311707, |
|
"learning_rate": 1.799354491662184e-05, |
|
"loss": 1.3388, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.685037037037037, |
|
"grad_norm": 0.5156022906303406, |
|
"learning_rate": 1.785906401291017e-05, |
|
"loss": 1.4515, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6874074074074074, |
|
"grad_norm": 0.5047289133071899, |
|
"learning_rate": 1.7724583109198495e-05, |
|
"loss": 1.4584, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6897777777777778, |
|
"grad_norm": 0.4975475072860718, |
|
"learning_rate": 1.7590102205486824e-05, |
|
"loss": 1.5042, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6921481481481482, |
|
"grad_norm": 0.5997641086578369, |
|
"learning_rate": 1.745562130177515e-05, |
|
"loss": 1.5658, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6945185185185185, |
|
"grad_norm": 0.5376483201980591, |
|
"learning_rate": 1.7321140398063478e-05, |
|
"loss": 1.3429, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.6968888888888889, |
|
"grad_norm": 0.4973870813846588, |
|
"learning_rate": 1.7186659494351803e-05, |
|
"loss": 1.4287, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.6992592592592592, |
|
"grad_norm": 0.5504077076911926, |
|
"learning_rate": 1.7052178590640132e-05, |
|
"loss": 1.5161, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7016296296296296, |
|
"grad_norm": 0.4603710174560547, |
|
"learning_rate": 1.6917697686928457e-05, |
|
"loss": 1.463, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.704, |
|
"grad_norm": 0.5116856694221497, |
|
"learning_rate": 1.6783216783216786e-05, |
|
"loss": 1.3862, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7063703703703703, |
|
"grad_norm": 0.49981990456581116, |
|
"learning_rate": 1.6648735879505112e-05, |
|
"loss": 1.4209, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7087407407407408, |
|
"grad_norm": 0.5085658431053162, |
|
"learning_rate": 1.651425497579344e-05, |
|
"loss": 1.4031, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 0.5499709844589233, |
|
"learning_rate": 1.6379774072081766e-05, |
|
"loss": 1.5093, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7134814814814815, |
|
"grad_norm": 0.5997831225395203, |
|
"learning_rate": 1.6245293168370095e-05, |
|
"loss": 1.517, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7158518518518519, |
|
"grad_norm": 0.5795171856880188, |
|
"learning_rate": 1.611081226465842e-05, |
|
"loss": 1.4279, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7182222222222222, |
|
"grad_norm": 0.5227158069610596, |
|
"learning_rate": 1.5976331360946746e-05, |
|
"loss": 1.5101, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7205925925925926, |
|
"grad_norm": 0.6352266669273376, |
|
"learning_rate": 1.5841850457235075e-05, |
|
"loss": 1.4849, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7229629629629629, |
|
"grad_norm": 0.6044921278953552, |
|
"learning_rate": 1.57073695535234e-05, |
|
"loss": 1.5239, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7253333333333334, |
|
"grad_norm": 0.4992562532424927, |
|
"learning_rate": 1.557288864981173e-05, |
|
"loss": 1.37, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7277037037037037, |
|
"grad_norm": 0.5439409017562866, |
|
"learning_rate": 1.5438407746100054e-05, |
|
"loss": 1.3724, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7300740740740741, |
|
"grad_norm": 0.5415698289871216, |
|
"learning_rate": 1.530392684238838e-05, |
|
"loss": 1.4562, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7324444444444445, |
|
"grad_norm": 0.4034167230129242, |
|
"learning_rate": 1.5169445938676707e-05, |
|
"loss": 1.3212, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7348148148148148, |
|
"grad_norm": 0.5307872891426086, |
|
"learning_rate": 1.5034965034965034e-05, |
|
"loss": 1.4165, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7371851851851852, |
|
"grad_norm": 0.5271874666213989, |
|
"learning_rate": 1.4900484131253361e-05, |
|
"loss": 1.4077, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7395555555555555, |
|
"grad_norm": 0.5995745658874512, |
|
"learning_rate": 1.4766003227541688e-05, |
|
"loss": 1.4732, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7419259259259259, |
|
"grad_norm": 0.45358097553253174, |
|
"learning_rate": 1.4631522323830015e-05, |
|
"loss": 1.4083, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7442962962962963, |
|
"grad_norm": 0.47864630818367004, |
|
"learning_rate": 1.4497041420118343e-05, |
|
"loss": 1.4363, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7466666666666667, |
|
"grad_norm": 0.46107572317123413, |
|
"learning_rate": 1.436256051640667e-05, |
|
"loss": 1.5163, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7490370370370371, |
|
"grad_norm": 0.5413241386413574, |
|
"learning_rate": 1.4228079612694997e-05, |
|
"loss": 1.3225, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.7514074074074074, |
|
"grad_norm": 0.4649742841720581, |
|
"learning_rate": 1.4093598708983324e-05, |
|
"loss": 1.4101, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7537777777777778, |
|
"grad_norm": 0.5219136476516724, |
|
"learning_rate": 1.3959117805271651e-05, |
|
"loss": 1.4523, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7561481481481481, |
|
"grad_norm": 0.5591155886650085, |
|
"learning_rate": 1.3824636901559978e-05, |
|
"loss": 1.5966, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 0.5293004512786865, |
|
"learning_rate": 1.3690155997848305e-05, |
|
"loss": 1.4613, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7608888888888888, |
|
"grad_norm": 0.46828821301460266, |
|
"learning_rate": 1.3555675094136632e-05, |
|
"loss": 1.5187, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.7632592592592593, |
|
"grad_norm": 0.4590572416782379, |
|
"learning_rate": 1.342119419042496e-05, |
|
"loss": 1.4418, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7656296296296297, |
|
"grad_norm": 0.6020212769508362, |
|
"learning_rate": 1.3286713286713287e-05, |
|
"loss": 1.5557, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.768, |
|
"grad_norm": 0.542536199092865, |
|
"learning_rate": 1.3152232383001614e-05, |
|
"loss": 1.3689, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7703703703703704, |
|
"grad_norm": 0.5394562482833862, |
|
"learning_rate": 1.3017751479289941e-05, |
|
"loss": 1.4272, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7727407407407407, |
|
"grad_norm": 0.38109496235847473, |
|
"learning_rate": 1.2883270575578268e-05, |
|
"loss": 1.419, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7751111111111111, |
|
"grad_norm": 0.4617583155632019, |
|
"learning_rate": 1.2748789671866595e-05, |
|
"loss": 1.3547, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7774814814814814, |
|
"grad_norm": 0.5739762783050537, |
|
"learning_rate": 1.2614308768154922e-05, |
|
"loss": 1.5191, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7798518518518519, |
|
"grad_norm": 0.42539921402931213, |
|
"learning_rate": 1.247982786444325e-05, |
|
"loss": 1.3347, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.7822222222222223, |
|
"grad_norm": 0.5273600816726685, |
|
"learning_rate": 1.2345346960731577e-05, |
|
"loss": 1.3758, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7845925925925926, |
|
"grad_norm": 0.4796091616153717, |
|
"learning_rate": 1.2210866057019904e-05, |
|
"loss": 1.3903, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.786962962962963, |
|
"grad_norm": 0.46542009711265564, |
|
"learning_rate": 1.2076385153308231e-05, |
|
"loss": 1.406, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7893333333333333, |
|
"grad_norm": 0.47980108857154846, |
|
"learning_rate": 1.1941904249596558e-05, |
|
"loss": 1.3991, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.7917037037037037, |
|
"grad_norm": 0.507736086845398, |
|
"learning_rate": 1.1807423345884885e-05, |
|
"loss": 1.4583, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.794074074074074, |
|
"grad_norm": 0.5380430817604065, |
|
"learning_rate": 1.1672942442173212e-05, |
|
"loss": 1.2621, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7964444444444444, |
|
"grad_norm": 0.6689913272857666, |
|
"learning_rate": 1.153846153846154e-05, |
|
"loss": 1.5355, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.7988148148148149, |
|
"grad_norm": 0.5129537582397461, |
|
"learning_rate": 1.1403980634749865e-05, |
|
"loss": 1.5895, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8011851851851852, |
|
"grad_norm": 0.5240408182144165, |
|
"learning_rate": 1.1269499731038192e-05, |
|
"loss": 1.4896, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8035555555555556, |
|
"grad_norm": 0.5004174709320068, |
|
"learning_rate": 1.113501882732652e-05, |
|
"loss": 1.4303, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 0.46890896558761597, |
|
"learning_rate": 1.1000537923614846e-05, |
|
"loss": 1.3536, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8082962962962963, |
|
"grad_norm": 0.5150523781776428, |
|
"learning_rate": 1.0866057019903174e-05, |
|
"loss": 1.3952, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8106666666666666, |
|
"grad_norm": 0.4322206676006317, |
|
"learning_rate": 1.07315761161915e-05, |
|
"loss": 1.2252, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.813037037037037, |
|
"grad_norm": 0.5782944560050964, |
|
"learning_rate": 1.0597095212479828e-05, |
|
"loss": 1.4982, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8154074074074074, |
|
"grad_norm": 0.6032952666282654, |
|
"learning_rate": 1.0462614308768155e-05, |
|
"loss": 1.4614, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8177777777777778, |
|
"grad_norm": 0.453756183385849, |
|
"learning_rate": 1.0328133405056482e-05, |
|
"loss": 1.5017, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8201481481481482, |
|
"grad_norm": 0.5315883159637451, |
|
"learning_rate": 1.019365250134481e-05, |
|
"loss": 1.5354, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8225185185185185, |
|
"grad_norm": 0.5065041184425354, |
|
"learning_rate": 1.0059171597633136e-05, |
|
"loss": 1.4245, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8248888888888889, |
|
"grad_norm": 0.542103111743927, |
|
"learning_rate": 9.924690693921463e-06, |
|
"loss": 1.3599, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8272592592592592, |
|
"grad_norm": 0.5330160856246948, |
|
"learning_rate": 9.79020979020979e-06, |
|
"loss": 1.3502, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8296296296296296, |
|
"grad_norm": 0.4731038212776184, |
|
"learning_rate": 9.655728886498118e-06, |
|
"loss": 1.2931, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.832, |
|
"grad_norm": 0.4719734787940979, |
|
"learning_rate": 9.521247982786445e-06, |
|
"loss": 1.3185, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8343703703703703, |
|
"grad_norm": 0.5551607012748718, |
|
"learning_rate": 9.386767079074772e-06, |
|
"loss": 1.4045, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8367407407407408, |
|
"grad_norm": 0.5661736130714417, |
|
"learning_rate": 9.252286175363099e-06, |
|
"loss": 1.6005, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8391111111111111, |
|
"grad_norm": 0.5772873759269714, |
|
"learning_rate": 9.117805271651426e-06, |
|
"loss": 1.3975, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8414814814814815, |
|
"grad_norm": 0.5180752873420715, |
|
"learning_rate": 8.983324367939753e-06, |
|
"loss": 1.5147, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8438518518518519, |
|
"grad_norm": 0.5256723165512085, |
|
"learning_rate": 8.84884346422808e-06, |
|
"loss": 1.4673, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8462222222222222, |
|
"grad_norm": 0.4829583168029785, |
|
"learning_rate": 8.714362560516406e-06, |
|
"loss": 1.5102, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8485925925925926, |
|
"grad_norm": 0.5027347207069397, |
|
"learning_rate": 8.579881656804733e-06, |
|
"loss": 1.6514, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8509629629629629, |
|
"grad_norm": 0.5117186903953552, |
|
"learning_rate": 8.44540075309306e-06, |
|
"loss": 1.4451, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 0.4994155466556549, |
|
"learning_rate": 8.310919849381387e-06, |
|
"loss": 1.3408, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8557037037037037, |
|
"grad_norm": 0.4399481415748596, |
|
"learning_rate": 8.176438945669715e-06, |
|
"loss": 1.4044, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8580740740740741, |
|
"grad_norm": 0.49395203590393066, |
|
"learning_rate": 8.041958041958042e-06, |
|
"loss": 1.3895, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8604444444444445, |
|
"grad_norm": 0.4922611713409424, |
|
"learning_rate": 7.907477138246369e-06, |
|
"loss": 1.5503, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8628148148148148, |
|
"grad_norm": 0.5255241990089417, |
|
"learning_rate": 7.772996234534696e-06, |
|
"loss": 1.4736, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8651851851851852, |
|
"grad_norm": 0.4713379442691803, |
|
"learning_rate": 7.638515330823023e-06, |
|
"loss": 1.4395, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8675555555555555, |
|
"grad_norm": 0.4290190637111664, |
|
"learning_rate": 7.50403442711135e-06, |
|
"loss": 1.4265, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8699259259259259, |
|
"grad_norm": 0.5157113075256348, |
|
"learning_rate": 7.369553523399677e-06, |
|
"loss": 1.4363, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8722962962962963, |
|
"grad_norm": 0.5694654583930969, |
|
"learning_rate": 7.2350726196880045e-06, |
|
"loss": 1.4602, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8746666666666667, |
|
"grad_norm": 0.47969937324523926, |
|
"learning_rate": 7.100591715976332e-06, |
|
"loss": 1.2716, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8770370370370371, |
|
"grad_norm": 0.4939590096473694, |
|
"learning_rate": 6.966110812264659e-06, |
|
"loss": 1.4356, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8794074074074074, |
|
"grad_norm": 0.4829910695552826, |
|
"learning_rate": 6.831629908552986e-06, |
|
"loss": 1.4482, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8817777777777778, |
|
"grad_norm": 0.473178505897522, |
|
"learning_rate": 6.697149004841313e-06, |
|
"loss": 1.3772, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8841481481481481, |
|
"grad_norm": 0.5327422618865967, |
|
"learning_rate": 6.56266810112964e-06, |
|
"loss": 1.5707, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8865185185185185, |
|
"grad_norm": 0.4069652259349823, |
|
"learning_rate": 6.428187197417967e-06, |
|
"loss": 1.3501, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 0.5368005037307739, |
|
"learning_rate": 6.2937062937062944e-06, |
|
"loss": 1.6791, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.8912592592592593, |
|
"grad_norm": 0.5622044801712036, |
|
"learning_rate": 6.159225389994621e-06, |
|
"loss": 1.2727, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.8936296296296297, |
|
"grad_norm": 0.5526837110519409, |
|
"learning_rate": 6.024744486282948e-06, |
|
"loss": 1.478, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.896, |
|
"grad_norm": 0.5442282557487488, |
|
"learning_rate": 5.890263582571275e-06, |
|
"loss": 1.3917, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.8983703703703704, |
|
"grad_norm": 0.5711065530776978, |
|
"learning_rate": 5.755782678859602e-06, |
|
"loss": 1.4748, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 0.5068963766098022, |
|
"learning_rate": 5.621301775147929e-06, |
|
"loss": 1.3375, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9031111111111111, |
|
"grad_norm": 0.6032804846763611, |
|
"learning_rate": 5.486820871436256e-06, |
|
"loss": 1.553, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9054814814814814, |
|
"grad_norm": 0.4718996286392212, |
|
"learning_rate": 5.352339967724583e-06, |
|
"loss": 1.4586, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9078518518518518, |
|
"grad_norm": 0.583185076713562, |
|
"learning_rate": 5.21785906401291e-06, |
|
"loss": 1.4294, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9102222222222223, |
|
"grad_norm": 0.5688157677650452, |
|
"learning_rate": 5.083378160301237e-06, |
|
"loss": 1.3434, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9125925925925926, |
|
"grad_norm": 0.43667495250701904, |
|
"learning_rate": 4.948897256589564e-06, |
|
"loss": 1.5107, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.914962962962963, |
|
"grad_norm": 0.5924187302589417, |
|
"learning_rate": 4.814416352877891e-06, |
|
"loss": 1.3558, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9173333333333333, |
|
"grad_norm": 0.538318932056427, |
|
"learning_rate": 4.679935449166218e-06, |
|
"loss": 1.379, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9197037037037037, |
|
"grad_norm": 0.4840611219406128, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 1.2776, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.922074074074074, |
|
"grad_norm": 0.5927128195762634, |
|
"learning_rate": 4.410973641742873e-06, |
|
"loss": 1.4598, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.9244444444444444, |
|
"grad_norm": 0.5672274827957153, |
|
"learning_rate": 4.2764927380312e-06, |
|
"loss": 1.467, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9268148148148149, |
|
"grad_norm": 0.5424569249153137, |
|
"learning_rate": 4.142011834319527e-06, |
|
"loss": 1.532, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9291851851851852, |
|
"grad_norm": 0.5921524167060852, |
|
"learning_rate": 4.007530930607853e-06, |
|
"loss": 1.5244, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9315555555555556, |
|
"grad_norm": 0.4295691251754761, |
|
"learning_rate": 3.87305002689618e-06, |
|
"loss": 1.4358, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9339259259259259, |
|
"grad_norm": 0.5117975473403931, |
|
"learning_rate": 3.738569123184508e-06, |
|
"loss": 1.4241, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9362962962962963, |
|
"grad_norm": 0.5273323655128479, |
|
"learning_rate": 3.604088219472835e-06, |
|
"loss": 1.3188, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9386666666666666, |
|
"grad_norm": 0.4947817325592041, |
|
"learning_rate": 3.469607315761162e-06, |
|
"loss": 1.444, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.941037037037037, |
|
"grad_norm": 0.5852746367454529, |
|
"learning_rate": 3.3351264120494893e-06, |
|
"loss": 1.4334, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.9434074074074074, |
|
"grad_norm": 0.5183681845664978, |
|
"learning_rate": 3.2006455083378165e-06, |
|
"loss": 1.4422, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9457777777777778, |
|
"grad_norm": 0.5193647146224976, |
|
"learning_rate": 3.066164604626143e-06, |
|
"loss": 1.3769, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 0.5597278475761414, |
|
"learning_rate": 2.9316837009144703e-06, |
|
"loss": 1.5406, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9505185185185185, |
|
"grad_norm": 0.5173184275627136, |
|
"learning_rate": 2.7972027972027974e-06, |
|
"loss": 1.5082, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9528888888888889, |
|
"grad_norm": 0.5205141305923462, |
|
"learning_rate": 2.6627218934911246e-06, |
|
"loss": 1.4971, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.9552592592592593, |
|
"grad_norm": 0.48843199014663696, |
|
"learning_rate": 2.5282409897794517e-06, |
|
"loss": 1.4113, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9576296296296296, |
|
"grad_norm": 0.5854997634887695, |
|
"learning_rate": 2.3937600860677784e-06, |
|
"loss": 1.5666, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.5264609456062317, |
|
"learning_rate": 2.2592791823561056e-06, |
|
"loss": 1.3334, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.9623703703703703, |
|
"grad_norm": 0.5044777989387512, |
|
"learning_rate": 2.1247982786444327e-06, |
|
"loss": 1.4251, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9647407407407408, |
|
"grad_norm": 0.5678290128707886, |
|
"learning_rate": 1.99031737493276e-06, |
|
"loss": 1.4359, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9671111111111111, |
|
"grad_norm": 0.5562720894813538, |
|
"learning_rate": 1.8558364712210868e-06, |
|
"loss": 1.4333, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9694814814814815, |
|
"grad_norm": 0.5118197798728943, |
|
"learning_rate": 1.7213555675094137e-06, |
|
"loss": 1.4257, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9718518518518519, |
|
"grad_norm": 0.4556100070476532, |
|
"learning_rate": 1.5868746637977408e-06, |
|
"loss": 1.4414, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.9742222222222222, |
|
"grad_norm": 0.5370482802391052, |
|
"learning_rate": 1.452393760086068e-06, |
|
"loss": 1.6166, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9765925925925926, |
|
"grad_norm": 0.5851370096206665, |
|
"learning_rate": 1.3179128563743949e-06, |
|
"loss": 1.5649, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9789629629629629, |
|
"grad_norm": 0.5804405212402344, |
|
"learning_rate": 1.183431952662722e-06, |
|
"loss": 1.3898, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9813333333333333, |
|
"grad_norm": 0.5411353707313538, |
|
"learning_rate": 1.0489510489510491e-06, |
|
"loss": 1.4885, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9837037037037037, |
|
"grad_norm": 0.5271933078765869, |
|
"learning_rate": 9.14470145239376e-07, |
|
"loss": 1.3816, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9860740740740741, |
|
"grad_norm": 0.5039179921150208, |
|
"learning_rate": 7.799892415277031e-07, |
|
"loss": 1.2985, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.9884444444444445, |
|
"grad_norm": 0.47758767008781433, |
|
"learning_rate": 6.455083378160301e-07, |
|
"loss": 1.4848, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.9908148148148148, |
|
"grad_norm": 0.5355851054191589, |
|
"learning_rate": 5.110274341043572e-07, |
|
"loss": 1.5264, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.9931851851851852, |
|
"grad_norm": 0.5918956398963928, |
|
"learning_rate": 3.7654653039268424e-07, |
|
"loss": 1.4337, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 0.4990881085395813, |
|
"learning_rate": 2.4206562668101127e-07, |
|
"loss": 1.3851, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.9979259259259259, |
|
"grad_norm": 0.49343588948249817, |
|
"learning_rate": 1.0758472296933835e-07, |
|
"loss": 1.5421, |
|
"step": 4210 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4218, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.660058442305372e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|