|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 42248, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0023669759515243323, |
|
"grad_norm": 0.36582449078559875, |
|
"learning_rate": 5.858508151611091e-08, |
|
"loss": 2.8441, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004733951903048665, |
|
"grad_norm": 0.36473265290260315, |
|
"learning_rate": 1.1657839453205907e-07, |
|
"loss": 2.8263, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.007100927854572997, |
|
"grad_norm": 0.3368377685546875, |
|
"learning_rate": 1.7516347604817e-07, |
|
"loss": 2.8367, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.00946790380609733, |
|
"grad_norm": 0.3552389442920685, |
|
"learning_rate": 2.3434032606444363e-07, |
|
"loss": 2.8383, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.011834879757621663, |
|
"grad_norm": 0.4023584723472595, |
|
"learning_rate": 2.9351717608071723e-07, |
|
"loss": 2.8136, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.014201855709145995, |
|
"grad_norm": 0.27697062492370605, |
|
"learning_rate": 3.526940260969909e-07, |
|
"loss": 2.7922, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.016568831660670327, |
|
"grad_norm": 0.35471972823143005, |
|
"learning_rate": 4.1187087611326455e-07, |
|
"loss": 2.7994, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.01893580761219466, |
|
"grad_norm": 0.31292667984962463, |
|
"learning_rate": 4.710477261295382e-07, |
|
"loss": 2.7759, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.021302783563718994, |
|
"grad_norm": 0.453121542930603, |
|
"learning_rate": 5.302245761458118e-07, |
|
"loss": 2.7526, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.023669759515243326, |
|
"grad_norm": 0.33600056171417236, |
|
"learning_rate": 5.894014261620854e-07, |
|
"loss": 2.7212, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.026036735466767658, |
|
"grad_norm": 0.3094422221183777, |
|
"learning_rate": 6.485782761783591e-07, |
|
"loss": 2.6944, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.02840371141829199, |
|
"grad_norm": 0.3267682194709778, |
|
"learning_rate": 7.077551261946328e-07, |
|
"loss": 2.6648, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03077068736981632, |
|
"grad_norm": 0.5763485431671143, |
|
"learning_rate": 7.669319762109063e-07, |
|
"loss": 2.6594, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.03313766332134065, |
|
"grad_norm": 0.2788572609424591, |
|
"learning_rate": 8.255170577270173e-07, |
|
"loss": 2.6552, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.035504639272864985, |
|
"grad_norm": 0.38050368428230286, |
|
"learning_rate": 8.846939077432909e-07, |
|
"loss": 2.6319, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03787161522438932, |
|
"grad_norm": 0.29289504885673523, |
|
"learning_rate": 9.438707577595646e-07, |
|
"loss": 2.6371, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.040238591175913656, |
|
"grad_norm": 0.20580381155014038, |
|
"learning_rate": 1.0030476077758381e-06, |
|
"loss": 2.6054, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.04260556712743799, |
|
"grad_norm": 0.2935289442539215, |
|
"learning_rate": 1.0622244577921118e-06, |
|
"loss": 2.5963, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.04497254307896232, |
|
"grad_norm": 0.2953510582447052, |
|
"learning_rate": 1.1214013078083855e-06, |
|
"loss": 2.6056, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.04733951903048665, |
|
"grad_norm": 0.3077057898044586, |
|
"learning_rate": 1.180578157824659e-06, |
|
"loss": 2.5864, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.04970649498201098, |
|
"grad_norm": 0.25115400552749634, |
|
"learning_rate": 1.2397550078409327e-06, |
|
"loss": 2.5911, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.052073470933535315, |
|
"grad_norm": 0.2623751759529114, |
|
"learning_rate": 1.2989318578572062e-06, |
|
"loss": 2.5591, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.05444044688505965, |
|
"grad_norm": 0.30447134375572205, |
|
"learning_rate": 1.35810870787348e-06, |
|
"loss": 2.5668, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.05680742283658398, |
|
"grad_norm": 0.2120353877544403, |
|
"learning_rate": 1.4172855578897537e-06, |
|
"loss": 2.5521, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.05917439878810831, |
|
"grad_norm": 0.23940175771713257, |
|
"learning_rate": 1.4764624079060272e-06, |
|
"loss": 2.5594, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06154137473963264, |
|
"grad_norm": 0.2214510440826416, |
|
"learning_rate": 1.5356392579223009e-06, |
|
"loss": 2.5458, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.06390835069115698, |
|
"grad_norm": 0.22601068019866943, |
|
"learning_rate": 1.5948161079385746e-06, |
|
"loss": 2.538, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.0662753266426813, |
|
"grad_norm": 0.23850201070308685, |
|
"learning_rate": 1.6539929579548483e-06, |
|
"loss": 2.532, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.06864230259420565, |
|
"grad_norm": 0.20834830403327942, |
|
"learning_rate": 1.7131698079711218e-06, |
|
"loss": 2.5285, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.07100927854572997, |
|
"grad_norm": 0.21344949305057526, |
|
"learning_rate": 1.7723466579873955e-06, |
|
"loss": 2.5185, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07337625449725431, |
|
"grad_norm": 0.21799206733703613, |
|
"learning_rate": 1.8315235080036692e-06, |
|
"loss": 2.5192, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.07574323044877863, |
|
"grad_norm": 0.21602454781532288, |
|
"learning_rate": 1.8907003580199425e-06, |
|
"loss": 2.5086, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.07811020640030297, |
|
"grad_norm": 0.2055075764656067, |
|
"learning_rate": 1.9498772080362162e-06, |
|
"loss": 2.5068, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.08047718235182731, |
|
"grad_norm": 0.21900290250778198, |
|
"learning_rate": 2.00905405805249e-06, |
|
"loss": 2.5144, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.08284415830335164, |
|
"grad_norm": 0.2083442062139511, |
|
"learning_rate": 2.0682309080687637e-06, |
|
"loss": 2.5137, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.08521113425487598, |
|
"grad_norm": 0.21810264885425568, |
|
"learning_rate": 2.127407758085037e-06, |
|
"loss": 2.5075, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.0875781102064003, |
|
"grad_norm": 0.2033359259366989, |
|
"learning_rate": 2.186584608101311e-06, |
|
"loss": 2.4976, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.08994508615792464, |
|
"grad_norm": 0.20291608572006226, |
|
"learning_rate": 2.2457614581175846e-06, |
|
"loss": 2.5054, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.09231206210944896, |
|
"grad_norm": 0.21681128442287445, |
|
"learning_rate": 2.3049383081338585e-06, |
|
"loss": 2.5165, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.0946790380609733, |
|
"grad_norm": 0.23095227777957916, |
|
"learning_rate": 2.3641151581501316e-06, |
|
"loss": 2.4983, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.09704601401249763, |
|
"grad_norm": 0.23442834615707397, |
|
"learning_rate": 2.4232920081664055e-06, |
|
"loss": 2.4923, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.09941298996402197, |
|
"grad_norm": 0.22967080771923065, |
|
"learning_rate": 2.482468858182679e-06, |
|
"loss": 2.4899, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.10177996591554629, |
|
"grad_norm": 0.22393766045570374, |
|
"learning_rate": 2.5416457081989525e-06, |
|
"loss": 2.4939, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.10414694186707063, |
|
"grad_norm": 0.23877893388271332, |
|
"learning_rate": 2.6008225582152264e-06, |
|
"loss": 2.4974, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.10651391781859497, |
|
"grad_norm": 0.26197168231010437, |
|
"learning_rate": 2.6599994082315e-06, |
|
"loss": 2.4773, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.1088808937701193, |
|
"grad_norm": 0.2509444057941437, |
|
"learning_rate": 2.719176258247774e-06, |
|
"loss": 2.4774, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.11124786972164363, |
|
"grad_norm": 0.22168482840061188, |
|
"learning_rate": 2.7783531082640474e-06, |
|
"loss": 2.49, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.11361484567316796, |
|
"grad_norm": 0.24707303941249847, |
|
"learning_rate": 2.8375299582803213e-06, |
|
"loss": 2.468, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.1159818216246923, |
|
"grad_norm": 0.2593018412590027, |
|
"learning_rate": 2.8967068082965944e-06, |
|
"loss": 2.4749, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.11834879757621662, |
|
"grad_norm": 0.22931291162967682, |
|
"learning_rate": 2.955883658312868e-06, |
|
"loss": 2.4787, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12071577352774096, |
|
"grad_norm": 0.2900484502315521, |
|
"learning_rate": 3.015060508329142e-06, |
|
"loss": 2.4705, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.12308274947926529, |
|
"grad_norm": 0.2222159057855606, |
|
"learning_rate": 3.0742373583454153e-06, |
|
"loss": 2.4629, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.1254497254307896, |
|
"grad_norm": 0.2193671613931656, |
|
"learning_rate": 3.1334142083616892e-06, |
|
"loss": 2.4742, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.12781670138231396, |
|
"grad_norm": 0.22836729884147644, |
|
"learning_rate": 3.1925910583779627e-06, |
|
"loss": 2.4879, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.1301836773338383, |
|
"grad_norm": 0.2218533158302307, |
|
"learning_rate": 3.2517679083942367e-06, |
|
"loss": 2.4636, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.1325506532853626, |
|
"grad_norm": 0.252085417509079, |
|
"learning_rate": 3.31094475841051e-06, |
|
"loss": 2.4727, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.13491762923688697, |
|
"grad_norm": 0.26298022270202637, |
|
"learning_rate": 3.3701216084267837e-06, |
|
"loss": 2.4638, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.1372846051884113, |
|
"grad_norm": 0.23198895156383514, |
|
"learning_rate": 3.429298458443057e-06, |
|
"loss": 2.4597, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.13965158113993562, |
|
"grad_norm": 0.2724401354789734, |
|
"learning_rate": 3.488475308459331e-06, |
|
"loss": 2.4671, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.14201855709145994, |
|
"grad_norm": 0.22617186605930328, |
|
"learning_rate": 3.5476521584756046e-06, |
|
"loss": 2.4665, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.1443855330429843, |
|
"grad_norm": 0.24222290515899658, |
|
"learning_rate": 3.6068290084918785e-06, |
|
"loss": 2.4729, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.14675250899450862, |
|
"grad_norm": 0.23433572053909302, |
|
"learning_rate": 3.666005858508152e-06, |
|
"loss": 2.4512, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.14911948494603294, |
|
"grad_norm": 0.23977671563625336, |
|
"learning_rate": 3.725182708524426e-06, |
|
"loss": 2.464, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.15148646089755727, |
|
"grad_norm": 0.23321278393268585, |
|
"learning_rate": 3.784359558540699e-06, |
|
"loss": 2.4798, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.15385343684908162, |
|
"grad_norm": 0.27208179235458374, |
|
"learning_rate": 3.843536408556973e-06, |
|
"loss": 2.4705, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.15622041280060595, |
|
"grad_norm": 0.23790614306926727, |
|
"learning_rate": 3.902713258573246e-06, |
|
"loss": 2.4555, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.15858738875213027, |
|
"grad_norm": 0.2843892276287079, |
|
"learning_rate": 3.96189010858952e-06, |
|
"loss": 2.4725, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.16095436470365462, |
|
"grad_norm": 0.2643658220767975, |
|
"learning_rate": 4.021066958605794e-06, |
|
"loss": 2.4687, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.16332134065517895, |
|
"grad_norm": 0.29611462354660034, |
|
"learning_rate": 4.080243808622068e-06, |
|
"loss": 2.4594, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.16568831660670327, |
|
"grad_norm": 0.2879164218902588, |
|
"learning_rate": 4.139420658638341e-06, |
|
"loss": 2.4642, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.1680552925582276, |
|
"grad_norm": 0.27046066522598267, |
|
"learning_rate": 4.198597508654615e-06, |
|
"loss": 2.4689, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.17042226850975195, |
|
"grad_norm": 0.24744383990764618, |
|
"learning_rate": 4.257774358670888e-06, |
|
"loss": 2.4526, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.17278924446127628, |
|
"grad_norm": 0.2348434180021286, |
|
"learning_rate": 4.316951208687162e-06, |
|
"loss": 2.4539, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.1751562204128006, |
|
"grad_norm": 0.295792818069458, |
|
"learning_rate": 4.376128058703436e-06, |
|
"loss": 2.4491, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.17752319636432493, |
|
"grad_norm": 0.2649165093898773, |
|
"learning_rate": 4.435304908719709e-06, |
|
"loss": 2.449, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.17989017231584928, |
|
"grad_norm": 0.23758557438850403, |
|
"learning_rate": 4.494481758735983e-06, |
|
"loss": 2.455, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.1822571482673736, |
|
"grad_norm": 0.27746689319610596, |
|
"learning_rate": 4.553658608752257e-06, |
|
"loss": 2.4493, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.18462412421889793, |
|
"grad_norm": 0.2592689096927643, |
|
"learning_rate": 4.6128354587685306e-06, |
|
"loss": 2.4571, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.18699110017042228, |
|
"grad_norm": 0.2735172510147095, |
|
"learning_rate": 4.672012308784804e-06, |
|
"loss": 2.4539, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.1893580761219466, |
|
"grad_norm": 0.2739349603652954, |
|
"learning_rate": 4.7311891588010776e-06, |
|
"loss": 2.4604, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.19172505207347093, |
|
"grad_norm": 0.271176815032959, |
|
"learning_rate": 4.790366008817351e-06, |
|
"loss": 2.448, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.19409202802499526, |
|
"grad_norm": 0.2696959674358368, |
|
"learning_rate": 4.8495428588336246e-06, |
|
"loss": 2.4563, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.1964590039765196, |
|
"grad_norm": 0.30911239981651306, |
|
"learning_rate": 4.9087197088498985e-06, |
|
"loss": 2.4614, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.19882597992804393, |
|
"grad_norm": 0.2745211720466614, |
|
"learning_rate": 4.967896558866172e-06, |
|
"loss": 2.4462, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.20119295587956826, |
|
"grad_norm": 0.29566124081611633, |
|
"learning_rate": 5.0270734088824455e-06, |
|
"loss": 2.451, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.20355993183109258, |
|
"grad_norm": 0.28213486075401306, |
|
"learning_rate": 5.086250258898719e-06, |
|
"loss": 2.4407, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.20592690778261694, |
|
"grad_norm": 0.2758745849132538, |
|
"learning_rate": 5.145427108914993e-06, |
|
"loss": 2.4438, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.20829388373414126, |
|
"grad_norm": 0.2921348810195923, |
|
"learning_rate": 5.204603958931267e-06, |
|
"loss": 2.449, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.21066085968566559, |
|
"grad_norm": 0.26501932740211487, |
|
"learning_rate": 5.26378080894754e-06, |
|
"loss": 2.4486, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.21302783563718994, |
|
"grad_norm": 0.2748875617980957, |
|
"learning_rate": 5.322957658963814e-06, |
|
"loss": 2.4424, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.21539481158871426, |
|
"grad_norm": 0.28109443187713623, |
|
"learning_rate": 5.382134508980087e-06, |
|
"loss": 2.4513, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.2177617875402386, |
|
"grad_norm": 0.27431926131248474, |
|
"learning_rate": 5.44131135899636e-06, |
|
"loss": 2.4437, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.2201287634917629, |
|
"grad_norm": 0.2729012668132782, |
|
"learning_rate": 5.500488209012634e-06, |
|
"loss": 2.4538, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.22249573944328727, |
|
"grad_norm": 0.2898072600364685, |
|
"learning_rate": 5.559665059028908e-06, |
|
"loss": 2.4546, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.2248627153948116, |
|
"grad_norm": 0.3519386649131775, |
|
"learning_rate": 5.618841909045182e-06, |
|
"loss": 2.4462, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.22722969134633592, |
|
"grad_norm": 0.2779889404773712, |
|
"learning_rate": 5.678018759061455e-06, |
|
"loss": 2.443, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.22959666729786024, |
|
"grad_norm": 0.2758658826351166, |
|
"learning_rate": 5.737195609077729e-06, |
|
"loss": 2.4392, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.2319636432493846, |
|
"grad_norm": 0.3754834532737732, |
|
"learning_rate": 5.796372459094003e-06, |
|
"loss": 2.4352, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.23433061920090892, |
|
"grad_norm": 0.27345120906829834, |
|
"learning_rate": 5.855549309110277e-06, |
|
"loss": 2.4523, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.23669759515243324, |
|
"grad_norm": 0.32833969593048096, |
|
"learning_rate": 5.91472615912655e-06, |
|
"loss": 2.4497, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.2390645711039576, |
|
"grad_norm": 0.2878655791282654, |
|
"learning_rate": 5.973903009142824e-06, |
|
"loss": 2.451, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.24143154705548192, |
|
"grad_norm": 0.31419286131858826, |
|
"learning_rate": 6.033079859159098e-06, |
|
"loss": 2.4429, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.24379852300700625, |
|
"grad_norm": 0.2996383607387543, |
|
"learning_rate": 6.092256709175372e-06, |
|
"loss": 2.4349, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.24616549895853057, |
|
"grad_norm": 0.308442085981369, |
|
"learning_rate": 6.151433559191645e-06, |
|
"loss": 2.4495, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.24853247491005492, |
|
"grad_norm": 0.2972429394721985, |
|
"learning_rate": 6.210610409207919e-06, |
|
"loss": 2.433, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.2508994508615792, |
|
"grad_norm": 0.30551430583000183, |
|
"learning_rate": 6.269787259224191e-06, |
|
"loss": 2.447, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.2532664268131036, |
|
"grad_norm": 0.3082588016986847, |
|
"learning_rate": 6.328964109240465e-06, |
|
"loss": 2.4458, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.2556334027646279, |
|
"grad_norm": 0.29121455550193787, |
|
"learning_rate": 6.388140959256739e-06, |
|
"loss": 2.4208, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.2580003787161522, |
|
"grad_norm": 0.32775169610977173, |
|
"learning_rate": 6.447317809273013e-06, |
|
"loss": 2.4263, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.2603673546676766, |
|
"grad_norm": 0.32109200954437256, |
|
"learning_rate": 6.506494659289286e-06, |
|
"loss": 2.4385, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.26273433061920093, |
|
"grad_norm": 0.4912450313568115, |
|
"learning_rate": 6.56567150930556e-06, |
|
"loss": 2.4331, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.2651013065707252, |
|
"grad_norm": 0.30363771319389343, |
|
"learning_rate": 6.624848359321834e-06, |
|
"loss": 2.4339, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.2674682825222496, |
|
"grad_norm": 0.30812105536460876, |
|
"learning_rate": 6.684025209338108e-06, |
|
"loss": 2.4373, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.26983525847377393, |
|
"grad_norm": 0.3601232171058655, |
|
"learning_rate": 6.743202059354381e-06, |
|
"loss": 2.4292, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.27220223442529823, |
|
"grad_norm": 0.3195793926715851, |
|
"learning_rate": 6.802378909370655e-06, |
|
"loss": 2.438, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.2745692103768226, |
|
"grad_norm": 0.31187400221824646, |
|
"learning_rate": 6.861555759386929e-06, |
|
"loss": 2.4413, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.2769361863283469, |
|
"grad_norm": 0.3234810531139374, |
|
"learning_rate": 6.920732609403203e-06, |
|
"loss": 2.4502, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.27930316227987123, |
|
"grad_norm": 0.3229145109653473, |
|
"learning_rate": 6.979909459419476e-06, |
|
"loss": 2.4369, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.2816701382313956, |
|
"grad_norm": 0.30176299810409546, |
|
"learning_rate": 7.03908630943575e-06, |
|
"loss": 2.439, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.2840371141829199, |
|
"grad_norm": 0.3238876461982727, |
|
"learning_rate": 7.0982631594520235e-06, |
|
"loss": 2.4441, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.28640409013444423, |
|
"grad_norm": 0.3230147063732147, |
|
"learning_rate": 7.157440009468296e-06, |
|
"loss": 2.4395, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.2887710660859686, |
|
"grad_norm": 0.33063408732414246, |
|
"learning_rate": 7.21661685948457e-06, |
|
"loss": 2.4332, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.2911380420374929, |
|
"grad_norm": 0.32114726305007935, |
|
"learning_rate": 7.275793709500844e-06, |
|
"loss": 2.4301, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.29350501798901724, |
|
"grad_norm": 0.4075353741645813, |
|
"learning_rate": 7.3349705595171175e-06, |
|
"loss": 2.4333, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.2958719939405416, |
|
"grad_norm": 0.3239745497703552, |
|
"learning_rate": 7.394147409533391e-06, |
|
"loss": 2.4323, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.2982389698920659, |
|
"grad_norm": 0.4447726011276245, |
|
"learning_rate": 7.4533242595496645e-06, |
|
"loss": 2.4321, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.30060594584359024, |
|
"grad_norm": 0.3478521406650543, |
|
"learning_rate": 7.5125011095659385e-06, |
|
"loss": 2.4246, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.30297292179511454, |
|
"grad_norm": 0.35203248262405396, |
|
"learning_rate": 7.571677959582212e-06, |
|
"loss": 2.425, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.3053398977466389, |
|
"grad_norm": 0.328659325838089, |
|
"learning_rate": 7.630854809598486e-06, |
|
"loss": 2.4367, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.30770687369816324, |
|
"grad_norm": 0.3298031985759735, |
|
"learning_rate": 7.69003165961476e-06, |
|
"loss": 2.4273, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.31007384964968754, |
|
"grad_norm": 0.3143956661224365, |
|
"learning_rate": 7.749208509631032e-06, |
|
"loss": 2.4292, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.3124408256012119, |
|
"grad_norm": 0.33441880345344543, |
|
"learning_rate": 7.808385359647306e-06, |
|
"loss": 2.437, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.31480780155273624, |
|
"grad_norm": 0.335602730512619, |
|
"learning_rate": 7.86756220966358e-06, |
|
"loss": 2.438, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.31717477750426054, |
|
"grad_norm": 0.3256273865699768, |
|
"learning_rate": 7.926739059679854e-06, |
|
"loss": 2.4324, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.3195417534557849, |
|
"grad_norm": 0.3533662259578705, |
|
"learning_rate": 7.985915909696128e-06, |
|
"loss": 2.4312, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.32190872940730925, |
|
"grad_norm": 0.34541791677474976, |
|
"learning_rate": 8.0450927597124e-06, |
|
"loss": 2.4294, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.32427570535883354, |
|
"grad_norm": 0.33559226989746094, |
|
"learning_rate": 8.104269609728674e-06, |
|
"loss": 2.4206, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.3266426813103579, |
|
"grad_norm": 0.34667766094207764, |
|
"learning_rate": 8.163446459744948e-06, |
|
"loss": 2.4289, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.3290096572618822, |
|
"grad_norm": 0.3094275891780853, |
|
"learning_rate": 8.222623309761222e-06, |
|
"loss": 2.4335, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.33137663321340655, |
|
"grad_norm": 0.32228076457977295, |
|
"learning_rate": 8.281800159777496e-06, |
|
"loss": 2.4348, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.3337436091649309, |
|
"grad_norm": 0.3154647946357727, |
|
"learning_rate": 8.34097700979377e-06, |
|
"loss": 2.4195, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.3361105851164552, |
|
"grad_norm": 0.380206823348999, |
|
"learning_rate": 8.400153859810042e-06, |
|
"loss": 2.4257, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.33847756106797955, |
|
"grad_norm": 0.32707059383392334, |
|
"learning_rate": 8.459330709826316e-06, |
|
"loss": 2.4279, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.3408445370195039, |
|
"grad_norm": 0.3562242090702057, |
|
"learning_rate": 8.51850755984259e-06, |
|
"loss": 2.4433, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.3432115129710282, |
|
"grad_norm": 0.3338697552680969, |
|
"learning_rate": 8.577684409858864e-06, |
|
"loss": 2.4378, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.34557848892255255, |
|
"grad_norm": 0.3395216166973114, |
|
"learning_rate": 8.636861259875138e-06, |
|
"loss": 2.4274, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.3479454648740769, |
|
"grad_norm": 0.32426705956459045, |
|
"learning_rate": 8.696038109891412e-06, |
|
"loss": 2.4268, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.3503124408256012, |
|
"grad_norm": 0.3478586673736572, |
|
"learning_rate": 8.755214959907686e-06, |
|
"loss": 2.4247, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.35267941677712555, |
|
"grad_norm": 0.3790106475353241, |
|
"learning_rate": 8.81439180992396e-06, |
|
"loss": 2.4372, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.35504639272864985, |
|
"grad_norm": 0.3437531888484955, |
|
"learning_rate": 8.873568659940232e-06, |
|
"loss": 2.4193, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.3574133686801742, |
|
"grad_norm": 0.3627135753631592, |
|
"learning_rate": 8.932745509956506e-06, |
|
"loss": 2.4343, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.35978034463169856, |
|
"grad_norm": 0.3435176610946655, |
|
"learning_rate": 8.99192235997278e-06, |
|
"loss": 2.4231, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.36214732058322285, |
|
"grad_norm": 0.3540484309196472, |
|
"learning_rate": 9.051099209989052e-06, |
|
"loss": 2.426, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.3645142965347472, |
|
"grad_norm": 0.3281879723072052, |
|
"learning_rate": 9.110276060005326e-06, |
|
"loss": 2.4262, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.36688127248627156, |
|
"grad_norm": 0.419574499130249, |
|
"learning_rate": 9.1694529100216e-06, |
|
"loss": 2.4103, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.36924824843779586, |
|
"grad_norm": 0.38810306787490845, |
|
"learning_rate": 9.228629760037874e-06, |
|
"loss": 2.4288, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.3716152243893202, |
|
"grad_norm": 0.3265315592288971, |
|
"learning_rate": 9.287214841553986e-06, |
|
"loss": 2.431, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.37398220034084456, |
|
"grad_norm": 0.3964623510837555, |
|
"learning_rate": 9.346391691570258e-06, |
|
"loss": 2.4272, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.37634917629236886, |
|
"grad_norm": 0.3374871611595154, |
|
"learning_rate": 9.405568541586532e-06, |
|
"loss": 2.4326, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.3787161522438932, |
|
"grad_norm": 0.34002941846847534, |
|
"learning_rate": 9.464745391602806e-06, |
|
"loss": 2.4256, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.3810831281954175, |
|
"grad_norm": 0.3714279234409332, |
|
"learning_rate": 9.52392224161908e-06, |
|
"loss": 2.4202, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.38345010414694186, |
|
"grad_norm": 0.343189537525177, |
|
"learning_rate": 9.583099091635353e-06, |
|
"loss": 2.4168, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.3858170800984662, |
|
"grad_norm": 0.33741703629493713, |
|
"learning_rate": 9.642275941651626e-06, |
|
"loss": 2.4185, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.3881840560499905, |
|
"grad_norm": 0.3652304708957672, |
|
"learning_rate": 9.7014527916679e-06, |
|
"loss": 2.4272, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.39055103200151486, |
|
"grad_norm": 0.3449861407279968, |
|
"learning_rate": 9.760629641684174e-06, |
|
"loss": 2.4048, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.3929180079530392, |
|
"grad_norm": 0.344180703163147, |
|
"learning_rate": 9.819806491700447e-06, |
|
"loss": 2.4201, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.3952849839045635, |
|
"grad_norm": 0.328961044549942, |
|
"learning_rate": 9.878983341716721e-06, |
|
"loss": 2.4252, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.39765195985608787, |
|
"grad_norm": 0.3466714918613434, |
|
"learning_rate": 9.938160191732995e-06, |
|
"loss": 2.4082, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.4000189358076122, |
|
"grad_norm": 0.3624398112297058, |
|
"learning_rate": 9.99733704174927e-06, |
|
"loss": 2.4275, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.4023859117591365, |
|
"grad_norm": 0.35927194356918335, |
|
"learning_rate": 1.0056513891765543e-05, |
|
"loss": 2.4183, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.40475288771066087, |
|
"grad_norm": 0.3643719255924225, |
|
"learning_rate": 1.0115690741781815e-05, |
|
"loss": 2.4299, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.40711986366218517, |
|
"grad_norm": 0.3489636182785034, |
|
"learning_rate": 1.017486759179809e-05, |
|
"loss": 2.4105, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.4094868396137095, |
|
"grad_norm": 0.3617055118083954, |
|
"learning_rate": 1.0234044441814363e-05, |
|
"loss": 2.4262, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.41185381556523387, |
|
"grad_norm": 0.3670959174633026, |
|
"learning_rate": 1.0293221291830637e-05, |
|
"loss": 2.4253, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.41422079151675817, |
|
"grad_norm": 0.4054628610610962, |
|
"learning_rate": 1.0352398141846911e-05, |
|
"loss": 2.4165, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.4165877674682825, |
|
"grad_norm": 0.32820406556129456, |
|
"learning_rate": 1.0411574991863185e-05, |
|
"loss": 2.4156, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.4189547434198069, |
|
"grad_norm": 0.3387589752674103, |
|
"learning_rate": 1.0470751841879459e-05, |
|
"loss": 2.4273, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.42132171937133117, |
|
"grad_norm": 0.3759928047657013, |
|
"learning_rate": 1.0529928691895733e-05, |
|
"loss": 2.4311, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.4236886953228555, |
|
"grad_norm": 0.38023602962493896, |
|
"learning_rate": 1.0589105541912005e-05, |
|
"loss": 2.4243, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.4260556712743799, |
|
"grad_norm": 0.34721675515174866, |
|
"learning_rate": 1.0648282391928279e-05, |
|
"loss": 2.4188, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.4284226472259042, |
|
"grad_norm": 0.34966644644737244, |
|
"learning_rate": 1.0707459241944551e-05, |
|
"loss": 2.4086, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.4307896231774285, |
|
"grad_norm": 0.38616931438446045, |
|
"learning_rate": 1.0766636091960825e-05, |
|
"loss": 2.412, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.4331565991289528, |
|
"grad_norm": 0.3381541967391968, |
|
"learning_rate": 1.0825812941977099e-05, |
|
"loss": 2.414, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.4355235750804772, |
|
"grad_norm": 0.4827527105808258, |
|
"learning_rate": 1.0884989791993373e-05, |
|
"loss": 2.4125, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.43789055103200153, |
|
"grad_norm": 0.3514668941497803, |
|
"learning_rate": 1.0944166642009645e-05, |
|
"loss": 2.4137, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.4402575269835258, |
|
"grad_norm": 0.3542225956916809, |
|
"learning_rate": 1.100334349202592e-05, |
|
"loss": 2.4087, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.4426245029350502, |
|
"grad_norm": 0.40214431285858154, |
|
"learning_rate": 1.1062520342042193e-05, |
|
"loss": 2.4242, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.44499147888657453, |
|
"grad_norm": 0.34530532360076904, |
|
"learning_rate": 1.1121697192058467e-05, |
|
"loss": 2.4115, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.44735845483809883, |
|
"grad_norm": 0.3892427384853363, |
|
"learning_rate": 1.1180874042074741e-05, |
|
"loss": 2.4158, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.4497254307896232, |
|
"grad_norm": 0.3698406219482422, |
|
"learning_rate": 1.1240050892091015e-05, |
|
"loss": 2.4136, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.45209240674114753, |
|
"grad_norm": 0.3435867726802826, |
|
"learning_rate": 1.1299227742107289e-05, |
|
"loss": 2.4181, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.45445938269267183, |
|
"grad_norm": 0.3343878388404846, |
|
"learning_rate": 1.1358404592123563e-05, |
|
"loss": 2.4123, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.4568263586441962, |
|
"grad_norm": 0.3319224417209625, |
|
"learning_rate": 1.1417581442139835e-05, |
|
"loss": 2.4179, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.4591933345957205, |
|
"grad_norm": 0.36949145793914795, |
|
"learning_rate": 1.1476758292156109e-05, |
|
"loss": 2.4288, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.46156031054724483, |
|
"grad_norm": 0.33672720193862915, |
|
"learning_rate": 1.1535935142172383e-05, |
|
"loss": 2.4283, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.4639272864987692, |
|
"grad_norm": 0.36359962821006775, |
|
"learning_rate": 1.1595111992188657e-05, |
|
"loss": 2.4104, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.4662942624502935, |
|
"grad_norm": 0.357768714427948, |
|
"learning_rate": 1.165428884220493e-05, |
|
"loss": 2.4005, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.46866123840181784, |
|
"grad_norm": 0.35632389783859253, |
|
"learning_rate": 1.1713465692221205e-05, |
|
"loss": 2.4156, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.4710282143533422, |
|
"grad_norm": 0.35454291105270386, |
|
"learning_rate": 1.1772642542237479e-05, |
|
"loss": 2.4075, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.4733951903048665, |
|
"grad_norm": 0.337933212518692, |
|
"learning_rate": 1.1831819392253752e-05, |
|
"loss": 2.4119, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.47576216625639084, |
|
"grad_norm": 0.36804336309432983, |
|
"learning_rate": 1.1890996242270025e-05, |
|
"loss": 2.4112, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.4781291422079152, |
|
"grad_norm": 0.3589170575141907, |
|
"learning_rate": 1.1950173092286299e-05, |
|
"loss": 2.4111, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.4804961181594395, |
|
"grad_norm": 0.4138932228088379, |
|
"learning_rate": 1.2009349942302573e-05, |
|
"loss": 2.4147, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.48286309411096384, |
|
"grad_norm": 0.37294042110443115, |
|
"learning_rate": 1.2068526792318846e-05, |
|
"loss": 2.4199, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.48523007006248814, |
|
"grad_norm": 0.34787285327911377, |
|
"learning_rate": 1.212770364233512e-05, |
|
"loss": 2.4125, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.4875970460140125, |
|
"grad_norm": 0.33219948410987854, |
|
"learning_rate": 1.2186880492351394e-05, |
|
"loss": 2.4046, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.48996402196553684, |
|
"grad_norm": 0.3547484278678894, |
|
"learning_rate": 1.2246057342367668e-05, |
|
"loss": 2.4178, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.49233099791706114, |
|
"grad_norm": 0.33837926387786865, |
|
"learning_rate": 1.2305234192383942e-05, |
|
"loss": 2.403, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.4946979738685855, |
|
"grad_norm": 0.35077232122421265, |
|
"learning_rate": 1.2364411042400214e-05, |
|
"loss": 2.4139, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.49706494982010985, |
|
"grad_norm": 0.3571261167526245, |
|
"learning_rate": 1.2422996123916324e-05, |
|
"loss": 2.4001, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.49943192577163414, |
|
"grad_norm": 0.36656296253204346, |
|
"learning_rate": 1.2482172973932598e-05, |
|
"loss": 2.406, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.5017989017231584, |
|
"grad_norm": 0.3557038903236389, |
|
"learning_rate": 1.2541349823948872e-05, |
|
"loss": 2.41, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.5041658776746828, |
|
"grad_norm": 0.361907035112381, |
|
"learning_rate": 1.2600526673965146e-05, |
|
"loss": 2.4106, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.5065328536262071, |
|
"grad_norm": 0.34070518612861633, |
|
"learning_rate": 1.2659703523981418e-05, |
|
"loss": 2.4121, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.5088998295777315, |
|
"grad_norm": 0.35266879200935364, |
|
"learning_rate": 1.2718880373997692e-05, |
|
"loss": 2.4051, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.5112668055292559, |
|
"grad_norm": 0.39729219675064087, |
|
"learning_rate": 1.2778057224013966e-05, |
|
"loss": 2.4004, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.5136337814807802, |
|
"grad_norm": 0.34886813163757324, |
|
"learning_rate": 1.283723407403024e-05, |
|
"loss": 2.4171, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.5160007574323044, |
|
"grad_norm": 0.33244648575782776, |
|
"learning_rate": 1.2896410924046514e-05, |
|
"loss": 2.3979, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.5183677333838288, |
|
"grad_norm": 0.3533230423927307, |
|
"learning_rate": 1.2955587774062788e-05, |
|
"loss": 2.4039, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.5207347093353532, |
|
"grad_norm": 0.3643980920314789, |
|
"learning_rate": 1.3014764624079062e-05, |
|
"loss": 2.417, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.5231016852868775, |
|
"grad_norm": 0.3681216835975647, |
|
"learning_rate": 1.3073941474095336e-05, |
|
"loss": 2.4028, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.5254686612384019, |
|
"grad_norm": 0.3376631438732147, |
|
"learning_rate": 1.3133118324111608e-05, |
|
"loss": 2.4044, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.5278356371899261, |
|
"grad_norm": 0.3588080108165741, |
|
"learning_rate": 1.3192295174127882e-05, |
|
"loss": 2.4152, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.5302026131414505, |
|
"grad_norm": 0.35474061965942383, |
|
"learning_rate": 1.3251472024144156e-05, |
|
"loss": 2.3962, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.5325695890929748, |
|
"grad_norm": 0.36065080761909485, |
|
"learning_rate": 1.331064887416043e-05, |
|
"loss": 2.4035, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.5349365650444992, |
|
"grad_norm": 0.34817591309547424, |
|
"learning_rate": 1.3369825724176704e-05, |
|
"loss": 2.4108, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.5373035409960235, |
|
"grad_norm": 0.33565661311149597, |
|
"learning_rate": 1.3429002574192978e-05, |
|
"loss": 2.403, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.5396705169475479, |
|
"grad_norm": 0.34676095843315125, |
|
"learning_rate": 1.3488179424209252e-05, |
|
"loss": 2.4056, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.5420374928990721, |
|
"grad_norm": 0.3674164116382599, |
|
"learning_rate": 1.3547356274225526e-05, |
|
"loss": 2.4061, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.5444044688505965, |
|
"grad_norm": 0.3376142978668213, |
|
"learning_rate": 1.3605941355741634e-05, |
|
"loss": 2.4158, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.5467714448021208, |
|
"grad_norm": 0.3908544182777405, |
|
"learning_rate": 1.3665118205757908e-05, |
|
"loss": 2.4022, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.5491384207536452, |
|
"grad_norm": 0.38587990403175354, |
|
"learning_rate": 1.3724295055774182e-05, |
|
"loss": 2.4171, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.5515053967051695, |
|
"grad_norm": 0.3695133924484253, |
|
"learning_rate": 1.3783471905790456e-05, |
|
"loss": 2.3997, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.5538723726566938, |
|
"grad_norm": 0.3392127454280853, |
|
"learning_rate": 1.384264875580673e-05, |
|
"loss": 2.4157, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.5562393486082181, |
|
"grad_norm": 0.3664696216583252, |
|
"learning_rate": 1.3901825605823004e-05, |
|
"loss": 2.4123, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.5586063245597425, |
|
"grad_norm": 0.3691762387752533, |
|
"learning_rate": 1.3961002455839276e-05, |
|
"loss": 2.3994, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.5609733005112668, |
|
"grad_norm": 0.3565746247768402, |
|
"learning_rate": 1.402017930585555e-05, |
|
"loss": 2.4027, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.5633402764627912, |
|
"grad_norm": 0.3518475890159607, |
|
"learning_rate": 1.4079356155871824e-05, |
|
"loss": 2.3937, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.5657072524143155, |
|
"grad_norm": 0.34867557883262634, |
|
"learning_rate": 1.4138533005888098e-05, |
|
"loss": 2.4, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.5680742283658398, |
|
"grad_norm": 0.35145652294158936, |
|
"learning_rate": 1.4197709855904371e-05, |
|
"loss": 2.4044, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.5704412043173641, |
|
"grad_norm": 0.3380683958530426, |
|
"learning_rate": 1.4256886705920645e-05, |
|
"loss": 2.4139, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.5728081802688885, |
|
"grad_norm": 0.3554782569408417, |
|
"learning_rate": 1.431606355593692e-05, |
|
"loss": 2.395, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.5751751562204128, |
|
"grad_norm": 0.39881500601768494, |
|
"learning_rate": 1.4375240405953193e-05, |
|
"loss": 2.3942, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.5775421321719372, |
|
"grad_norm": 0.37088507413864136, |
|
"learning_rate": 1.4434417255969465e-05, |
|
"loss": 2.4092, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.5799091081234614, |
|
"grad_norm": 0.3711656630039215, |
|
"learning_rate": 1.449359410598574e-05, |
|
"loss": 2.4184, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.5822760840749858, |
|
"grad_norm": 0.33910948038101196, |
|
"learning_rate": 1.4552770956002013e-05, |
|
"loss": 2.3916, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.5846430600265101, |
|
"grad_norm": 0.35600873827934265, |
|
"learning_rate": 1.4611947806018287e-05, |
|
"loss": 2.4008, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.5870100359780345, |
|
"grad_norm": 0.35309475660324097, |
|
"learning_rate": 1.4671124656034561e-05, |
|
"loss": 2.3979, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.5893770119295588, |
|
"grad_norm": 0.3425716459751129, |
|
"learning_rate": 1.4730301506050835e-05, |
|
"loss": 2.4015, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.5917439878810832, |
|
"grad_norm": 0.3652407228946686, |
|
"learning_rate": 1.4789478356067109e-05, |
|
"loss": 2.3957, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.5941109638326074, |
|
"grad_norm": 0.3365596830844879, |
|
"learning_rate": 1.4848655206083383e-05, |
|
"loss": 2.3913, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.5964779397841318, |
|
"grad_norm": 0.35885608196258545, |
|
"learning_rate": 1.4907832056099655e-05, |
|
"loss": 2.3903, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.5988449157356561, |
|
"grad_norm": 0.38684821128845215, |
|
"learning_rate": 1.4966417137615765e-05, |
|
"loss": 2.3876, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.6012118916871805, |
|
"grad_norm": 0.3497035503387451, |
|
"learning_rate": 1.5025593987632039e-05, |
|
"loss": 2.3874, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.6035788676387048, |
|
"grad_norm": 0.3431876599788666, |
|
"learning_rate": 1.5084770837648313e-05, |
|
"loss": 2.39, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.6059458435902291, |
|
"grad_norm": 0.35600966215133667, |
|
"learning_rate": 1.5143947687664587e-05, |
|
"loss": 2.4009, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.6083128195417534, |
|
"grad_norm": 0.33623310923576355, |
|
"learning_rate": 1.5203124537680861e-05, |
|
"loss": 2.3981, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.6106797954932778, |
|
"grad_norm": 0.33237648010253906, |
|
"learning_rate": 1.5262301387697135e-05, |
|
"loss": 2.4036, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.6130467714448021, |
|
"grad_norm": 0.35398033261299133, |
|
"learning_rate": 1.532147823771341e-05, |
|
"loss": 2.3988, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.6154137473963265, |
|
"grad_norm": 0.47366973757743835, |
|
"learning_rate": 1.5380655087729683e-05, |
|
"loss": 2.4013, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.6177807233478508, |
|
"grad_norm": 0.339417427778244, |
|
"learning_rate": 1.5439831937745957e-05, |
|
"loss": 2.4069, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.6201476992993751, |
|
"grad_norm": 0.3327637016773224, |
|
"learning_rate": 1.5499008787762227e-05, |
|
"loss": 2.3921, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.6225146752508994, |
|
"grad_norm": 0.3412494659423828, |
|
"learning_rate": 1.55581856377785e-05, |
|
"loss": 2.379, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.6248816512024238, |
|
"grad_norm": 0.3637641668319702, |
|
"learning_rate": 1.5617362487794775e-05, |
|
"loss": 2.3911, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.6272486271539481, |
|
"grad_norm": 0.4117577373981476, |
|
"learning_rate": 1.567653933781105e-05, |
|
"loss": 2.391, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.6296156031054725, |
|
"grad_norm": 0.3605392575263977, |
|
"learning_rate": 1.5735716187827323e-05, |
|
"loss": 2.3961, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.6319825790569967, |
|
"grad_norm": 0.35646742582321167, |
|
"learning_rate": 1.5794893037843597e-05, |
|
"loss": 2.3969, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.6343495550085211, |
|
"grad_norm": 0.3432878851890564, |
|
"learning_rate": 1.585406988785987e-05, |
|
"loss": 2.3939, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.6367165309600454, |
|
"grad_norm": 0.3541545569896698, |
|
"learning_rate": 1.5913246737876145e-05, |
|
"loss": 2.4079, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.6390835069115698, |
|
"grad_norm": 0.3709736168384552, |
|
"learning_rate": 1.597242358789242e-05, |
|
"loss": 2.4119, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.6414504828630941, |
|
"grad_norm": 0.32629159092903137, |
|
"learning_rate": 1.6031600437908692e-05, |
|
"loss": 2.3905, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.6438174588146185, |
|
"grad_norm": 0.4810309410095215, |
|
"learning_rate": 1.6090777287924966e-05, |
|
"loss": 2.3926, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.6461844347661427, |
|
"grad_norm": 0.37358030676841736, |
|
"learning_rate": 1.614995413794124e-05, |
|
"loss": 2.3836, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.6485514107176671, |
|
"grad_norm": 0.36473044753074646, |
|
"learning_rate": 1.6209130987957514e-05, |
|
"loss": 2.39, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.6509183866691914, |
|
"grad_norm": 0.32987740635871887, |
|
"learning_rate": 1.6268307837973788e-05, |
|
"loss": 2.3925, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.6532853626207158, |
|
"grad_norm": 0.34442269802093506, |
|
"learning_rate": 1.6327484687990062e-05, |
|
"loss": 2.4023, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.6556523385722401, |
|
"grad_norm": 0.3745739161968231, |
|
"learning_rate": 1.6386661538006333e-05, |
|
"loss": 2.4047, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.6580193145237644, |
|
"grad_norm": 0.3746493458747864, |
|
"learning_rate": 1.6445838388022607e-05, |
|
"loss": 2.4005, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.6603862904752887, |
|
"grad_norm": 0.32949355244636536, |
|
"learning_rate": 1.650501523803888e-05, |
|
"loss": 2.3875, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.6627532664268131, |
|
"grad_norm": 0.331719309091568, |
|
"learning_rate": 1.6564192088055154e-05, |
|
"loss": 2.3876, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.6651202423783374, |
|
"grad_norm": 0.34970593452453613, |
|
"learning_rate": 1.662336893807143e-05, |
|
"loss": 2.3995, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.6674872183298618, |
|
"grad_norm": 0.3494050204753876, |
|
"learning_rate": 1.6682545788087702e-05, |
|
"loss": 2.3852, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.6698541942813862, |
|
"grad_norm": 0.31740233302116394, |
|
"learning_rate": 1.6741722638103973e-05, |
|
"loss": 2.3953, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.6722211702329104, |
|
"grad_norm": 0.3360515236854553, |
|
"learning_rate": 1.6800899488120247e-05, |
|
"loss": 2.3911, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.6745881461844347, |
|
"grad_norm": 0.3421274721622467, |
|
"learning_rate": 1.685948456963636e-05, |
|
"loss": 2.404, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.6769551221359591, |
|
"grad_norm": 0.33647575974464417, |
|
"learning_rate": 1.6918661419652632e-05, |
|
"loss": 2.3986, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.6793220980874835, |
|
"grad_norm": 0.33582180738449097, |
|
"learning_rate": 1.6977838269668906e-05, |
|
"loss": 2.3948, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.6816890740390078, |
|
"grad_norm": 0.34744688868522644, |
|
"learning_rate": 1.703701511968518e-05, |
|
"loss": 2.3921, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.684056049990532, |
|
"grad_norm": 0.3513332009315491, |
|
"learning_rate": 1.7096191969701454e-05, |
|
"loss": 2.397, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.6864230259420564, |
|
"grad_norm": 0.35616153478622437, |
|
"learning_rate": 1.7155368819717728e-05, |
|
"loss": 2.3922, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.6887900018935808, |
|
"grad_norm": 0.3601691424846649, |
|
"learning_rate": 1.7214545669734002e-05, |
|
"loss": 2.3886, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.6911569778451051, |
|
"grad_norm": 0.3415214419364929, |
|
"learning_rate": 1.7273722519750276e-05, |
|
"loss": 2.3836, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.6935239537966295, |
|
"grad_norm": 0.3496253788471222, |
|
"learning_rate": 1.733289936976655e-05, |
|
"loss": 2.3832, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.6958909297481538, |
|
"grad_norm": 0.32848358154296875, |
|
"learning_rate": 1.7392076219782824e-05, |
|
"loss": 2.3839, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.698257905699678, |
|
"grad_norm": 0.3362344801425934, |
|
"learning_rate": 1.7451253069799098e-05, |
|
"loss": 2.3878, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.7006248816512024, |
|
"grad_norm": 0.34034013748168945, |
|
"learning_rate": 1.751042991981537e-05, |
|
"loss": 2.3841, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.7029918576027268, |
|
"grad_norm": 0.34850838780403137, |
|
"learning_rate": 1.7569606769831646e-05, |
|
"loss": 2.3893, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.7053588335542511, |
|
"grad_norm": 0.34481024742126465, |
|
"learning_rate": 1.762878361984792e-05, |
|
"loss": 2.3746, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.7077258095057755, |
|
"grad_norm": 0.319324254989624, |
|
"learning_rate": 1.768796046986419e-05, |
|
"loss": 2.3909, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.7100927854572997, |
|
"grad_norm": 0.3310067057609558, |
|
"learning_rate": 1.7747137319880464e-05, |
|
"loss": 2.3859, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.712459761408824, |
|
"grad_norm": 0.34449535608291626, |
|
"learning_rate": 1.7806314169896738e-05, |
|
"loss": 2.4031, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.7148267373603484, |
|
"grad_norm": 0.36738091707229614, |
|
"learning_rate": 1.7865491019913012e-05, |
|
"loss": 2.3877, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.7171937133118728, |
|
"grad_norm": 0.3570147752761841, |
|
"learning_rate": 1.7924667869929286e-05, |
|
"loss": 2.3921, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.7195606892633971, |
|
"grad_norm": 0.32705631852149963, |
|
"learning_rate": 1.798384471994556e-05, |
|
"loss": 2.3844, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.7219276652149215, |
|
"grad_norm": 0.3508467972278595, |
|
"learning_rate": 1.804302156996183e-05, |
|
"loss": 2.375, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.7242946411664457, |
|
"grad_norm": 0.3959505558013916, |
|
"learning_rate": 1.8102198419978104e-05, |
|
"loss": 2.3893, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.7266616171179701, |
|
"grad_norm": 0.3338560163974762, |
|
"learning_rate": 1.8161375269994378e-05, |
|
"loss": 2.3803, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.7290285930694944, |
|
"grad_norm": 0.3438529968261719, |
|
"learning_rate": 1.8220552120010652e-05, |
|
"loss": 2.383, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.7313955690210188, |
|
"grad_norm": 0.34159713983535767, |
|
"learning_rate": 1.8279728970026926e-05, |
|
"loss": 2.381, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.7337625449725431, |
|
"grad_norm": 0.38974571228027344, |
|
"learning_rate": 1.83389058200432e-05, |
|
"loss": 2.3779, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.7361295209240674, |
|
"grad_norm": 0.3364710211753845, |
|
"learning_rate": 1.8398082670059474e-05, |
|
"loss": 2.3846, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.7384964968755917, |
|
"grad_norm": 0.39294859766960144, |
|
"learning_rate": 1.8457259520075748e-05, |
|
"loss": 2.3857, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.7408634728271161, |
|
"grad_norm": 0.35359159111976624, |
|
"learning_rate": 1.851643637009202e-05, |
|
"loss": 2.3821, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.7432304487786404, |
|
"grad_norm": 0.37089574337005615, |
|
"learning_rate": 1.8575613220108295e-05, |
|
"loss": 2.394, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.7455974247301648, |
|
"grad_norm": 0.32074281573295593, |
|
"learning_rate": 1.863479007012457e-05, |
|
"loss": 2.3854, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.7479644006816891, |
|
"grad_norm": 0.3406684696674347, |
|
"learning_rate": 1.8693966920140843e-05, |
|
"loss": 2.3822, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.7503313766332134, |
|
"grad_norm": 0.3442894220352173, |
|
"learning_rate": 1.8753143770157117e-05, |
|
"loss": 2.3782, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.7526983525847377, |
|
"grad_norm": 0.3537774682044983, |
|
"learning_rate": 1.881172885167323e-05, |
|
"loss": 2.3846, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.7550653285362621, |
|
"grad_norm": 0.31586501002311707, |
|
"learning_rate": 1.8870905701689503e-05, |
|
"loss": 2.3876, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.7574323044877864, |
|
"grad_norm": 0.35079076886177063, |
|
"learning_rate": 1.8930082551705777e-05, |
|
"loss": 2.3891, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.7597992804393108, |
|
"grad_norm": 0.3363019824028015, |
|
"learning_rate": 1.8989259401722047e-05, |
|
"loss": 2.3933, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.762166256390835, |
|
"grad_norm": 0.32039549946784973, |
|
"learning_rate": 1.904843625173832e-05, |
|
"loss": 2.3585, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.7645332323423594, |
|
"grad_norm": 0.33742275834083557, |
|
"learning_rate": 1.9107613101754595e-05, |
|
"loss": 2.3809, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.7669002082938837, |
|
"grad_norm": 0.3437131941318512, |
|
"learning_rate": 1.916678995177087e-05, |
|
"loss": 2.3811, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.7692671842454081, |
|
"grad_norm": 0.3589881658554077, |
|
"learning_rate": 1.9225966801787143e-05, |
|
"loss": 2.3763, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.7716341601969324, |
|
"grad_norm": 0.36550530791282654, |
|
"learning_rate": 1.9285143651803414e-05, |
|
"loss": 2.3804, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.7740011361484568, |
|
"grad_norm": 0.3241026699542999, |
|
"learning_rate": 1.9344320501819688e-05, |
|
"loss": 2.3908, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.776368112099981, |
|
"grad_norm": 0.33091387152671814, |
|
"learning_rate": 1.940349735183596e-05, |
|
"loss": 2.378, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.7787350880515054, |
|
"grad_norm": 0.31871795654296875, |
|
"learning_rate": 1.9462674201852235e-05, |
|
"loss": 2.3837, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.7811020640030297, |
|
"grad_norm": 0.331828773021698, |
|
"learning_rate": 1.952185105186851e-05, |
|
"loss": 2.3774, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.7834690399545541, |
|
"grad_norm": 0.33192068338394165, |
|
"learning_rate": 1.9581027901884783e-05, |
|
"loss": 2.3812, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.7858360159060784, |
|
"grad_norm": 0.3415600657463074, |
|
"learning_rate": 1.9640204751901057e-05, |
|
"loss": 2.3754, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.7882029918576027, |
|
"grad_norm": 0.30927810072898865, |
|
"learning_rate": 1.969938160191733e-05, |
|
"loss": 2.3844, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.790569967809127, |
|
"grad_norm": 0.3214524984359741, |
|
"learning_rate": 1.9758558451933605e-05, |
|
"loss": 2.3678, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.7929369437606514, |
|
"grad_norm": 0.3286936581134796, |
|
"learning_rate": 1.981773530194988e-05, |
|
"loss": 2.3848, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.7953039197121757, |
|
"grad_norm": 0.33375072479248047, |
|
"learning_rate": 1.9876912151966153e-05, |
|
"loss": 2.3737, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.7976708956637001, |
|
"grad_norm": 0.3241300582885742, |
|
"learning_rate": 1.9936089001982427e-05, |
|
"loss": 2.3662, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.8000378716152244, |
|
"grad_norm": 0.34323224425315857, |
|
"learning_rate": 1.99952658519987e-05, |
|
"loss": 2.3809, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.8024048475667487, |
|
"grad_norm": 0.3225324749946594, |
|
"learning_rate": 1.9994152275965527e-05, |
|
"loss": 2.3724, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.804771823518273, |
|
"grad_norm": 0.3365699350833893, |
|
"learning_rate": 1.997453922456623e-05, |
|
"loss": 2.3759, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.8071387994697974, |
|
"grad_norm": 0.32580050826072693, |
|
"learning_rate": 1.994114372491635e-05, |
|
"loss": 2.3733, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.8095057754213217, |
|
"grad_norm": 0.3402758836746216, |
|
"learning_rate": 1.989455103627163e-05, |
|
"loss": 2.3742, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.8118727513728461, |
|
"grad_norm": 0.3205104470252991, |
|
"learning_rate": 1.983388438172617e-05, |
|
"loss": 2.3704, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.8142397273243703, |
|
"grad_norm": 0.3125210404396057, |
|
"learning_rate": 1.975962963057375e-05, |
|
"loss": 2.3652, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.8166067032758947, |
|
"grad_norm": 0.3083760142326355, |
|
"learning_rate": 1.9671889385274698e-05, |
|
"loss": 2.3782, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.818973679227419, |
|
"grad_norm": 0.3169231116771698, |
|
"learning_rate": 1.9570784882044856e-05, |
|
"loss": 2.3826, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.8213406551789434, |
|
"grad_norm": 0.30974331498146057, |
|
"learning_rate": 1.945645582333587e-05, |
|
"loss": 2.3741, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.8237076311304677, |
|
"grad_norm": 0.34712207317352295, |
|
"learning_rate": 1.93290601847995e-05, |
|
"loss": 2.3839, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.8260746070819921, |
|
"grad_norm": 0.3297557234764099, |
|
"learning_rate": 1.918877399700279e-05, |
|
"loss": 2.3762, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.8284415830335163, |
|
"grad_norm": 0.3331148326396942, |
|
"learning_rate": 1.9035791102195484e-05, |
|
"loss": 2.3759, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.8308085589850407, |
|
"grad_norm": 0.3134233057498932, |
|
"learning_rate": 1.8870322886466053e-05, |
|
"loss": 2.3715, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.833175534936565, |
|
"grad_norm": 0.3077858090400696, |
|
"learning_rate": 1.8692597987656205e-05, |
|
"loss": 2.3652, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.8355425108880894, |
|
"grad_norm": 0.3141195476055145, |
|
"learning_rate": 1.8502861979437626e-05, |
|
"loss": 2.3677, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.8379094868396137, |
|
"grad_norm": 0.3238203525543213, |
|
"learning_rate": 1.8301377031987363e-05, |
|
"loss": 2.368, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.840276462791138, |
|
"grad_norm": 0.32180941104888916, |
|
"learning_rate": 1.8088421549730826e-05, |
|
"loss": 2.3654, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.8426434387426623, |
|
"grad_norm": 0.3173375427722931, |
|
"learning_rate": 1.7864289786652865e-05, |
|
"loss": 2.3708, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.8450104146941867, |
|
"grad_norm": 0.3098245859146118, |
|
"learning_rate": 1.762929143970854e-05, |
|
"loss": 2.3847, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.847377390645711, |
|
"grad_norm": 0.3169116675853729, |
|
"learning_rate": 1.7383751220895348e-05, |
|
"loss": 2.3849, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.8497443665972354, |
|
"grad_norm": 0.2940201461315155, |
|
"learning_rate": 1.7128008408578232e-05, |
|
"loss": 2.3777, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.8521113425487598, |
|
"grad_norm": 0.3399713635444641, |
|
"learning_rate": 1.686241637868734e-05, |
|
"loss": 2.3686, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.854478318500284, |
|
"grad_norm": 0.319431871175766, |
|
"learning_rate": 1.658734211643625e-05, |
|
"loss": 2.3656, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.8568452944518083, |
|
"grad_norm": 0.3360809087753296, |
|
"learning_rate": 1.6303165709235443e-05, |
|
"loss": 2.3782, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.8592122704033327, |
|
"grad_norm": 0.3362599015235901, |
|
"learning_rate": 1.6010279821501603e-05, |
|
"loss": 2.3838, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.861579246354857, |
|
"grad_norm": 0.32247358560562134, |
|
"learning_rate": 1.5709089152088488e-05, |
|
"loss": 2.3708, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.8639462223063814, |
|
"grad_norm": 0.3041239380836487, |
|
"learning_rate": 1.5400009875089087e-05, |
|
"loss": 2.3754, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.8663131982579056, |
|
"grad_norm": 0.3329671323299408, |
|
"learning_rate": 1.5083469064781687e-05, |
|
"loss": 2.3611, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.86868017420943, |
|
"grad_norm": 0.3081216812133789, |
|
"learning_rate": 1.475990410551448e-05, |
|
"loss": 2.3697, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.8710471501609544, |
|
"grad_norm": 0.3056845963001251, |
|
"learning_rate": 1.4429762087344101e-05, |
|
"loss": 2.3602, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.8734141261124787, |
|
"grad_norm": 0.3348017632961273, |
|
"learning_rate": 1.4093499188263166e-05, |
|
"loss": 2.3688, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.8757811020640031, |
|
"grad_norm": 0.3079584240913391, |
|
"learning_rate": 1.3751580043870465e-05, |
|
"loss": 2.3741, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.8781480780155274, |
|
"grad_norm": 0.33923518657684326, |
|
"learning_rate": 1.3407972225319847e-05, |
|
"loss": 2.3628, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.8805150539670517, |
|
"grad_norm": 0.325127512216568, |
|
"learning_rate": 1.3056209752459611e-05, |
|
"loss": 2.3621, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.882882029918576, |
|
"grad_norm": 0.32378092408180237, |
|
"learning_rate": 1.270022432234713e-05, |
|
"loss": 2.3662, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.8852490058701004, |
|
"grad_norm": 0.3274565637111664, |
|
"learning_rate": 1.2340507822442868e-05, |
|
"loss": 2.3665, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.8876159818216247, |
|
"grad_norm": 0.33395031094551086, |
|
"learning_rate": 1.1977557295661108e-05, |
|
"loss": 2.3616, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.8899829577731491, |
|
"grad_norm": 0.3172805607318878, |
|
"learning_rate": 1.1611874253574492e-05, |
|
"loss": 2.3676, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.8923499337246733, |
|
"grad_norm": 0.34134411811828613, |
|
"learning_rate": 1.1243963983443936e-05, |
|
"loss": 2.361, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.8947169096761977, |
|
"grad_norm": 0.3161686658859253, |
|
"learning_rate": 1.0874334850031435e-05, |
|
"loss": 2.3653, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.897083885627722, |
|
"grad_norm": 0.31952333450317383, |
|
"learning_rate": 1.0503497593160507e-05, |
|
"loss": 2.3689, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.8994508615792464, |
|
"grad_norm": 0.3199727237224579, |
|
"learning_rate": 1.0131964621994832e-05, |
|
"loss": 2.3679, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.9018178375307707, |
|
"grad_norm": 0.3248251676559448, |
|
"learning_rate": 9.760249307010301e-06, |
|
"loss": 2.3718, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.9041848134822951, |
|
"grad_norm": 0.32015731930732727, |
|
"learning_rate": 9.388865270638724e-06, |
|
"loss": 2.3594, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.9065517894338193, |
|
"grad_norm": 0.336444228887558, |
|
"learning_rate": 9.018325677563413e-06, |
|
"loss": 2.3677, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.9089187653853437, |
|
"grad_norm": 0.3233816623687744, |
|
"learning_rate": 8.649142525647271e-06, |
|
"loss": 2.3651, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.911285741336868, |
|
"grad_norm": 0.3261754512786865, |
|
"learning_rate": 8.281825938473116e-06, |
|
"loss": 2.3586, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.9136527172883924, |
|
"grad_norm": 0.30703264474868774, |
|
"learning_rate": 7.916883460473865e-06, |
|
"loss": 2.3668, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.9160196932399167, |
|
"grad_norm": 0.3308265507221222, |
|
"learning_rate": 7.554819355626455e-06, |
|
"loss": 2.3536, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.918386669191441, |
|
"grad_norm": 0.35222479701042175, |
|
"learning_rate": 7.196133910678582e-06, |
|
"loss": 2.3635, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.9207536451429653, |
|
"grad_norm": 0.3244943618774414, |
|
"learning_rate": 6.841322743871041e-06, |
|
"loss": 2.3705, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.9231206210944897, |
|
"grad_norm": 0.3116552233695984, |
|
"learning_rate": 6.490876120110827e-06, |
|
"loss": 2.3611, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.925487597046014, |
|
"grad_norm": 0.33249032497406006, |
|
"learning_rate": 6.145278273541281e-06, |
|
"loss": 2.3585, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.9278545729975384, |
|
"grad_norm": 0.3132554888725281, |
|
"learning_rate": 5.805006738445294e-06, |
|
"loss": 2.368, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.9302215489490627, |
|
"grad_norm": 0.3264056444168091, |
|
"learning_rate": 5.4705316894061765e-06, |
|
"loss": 2.3635, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.932588524900587, |
|
"grad_norm": 0.3539658188819885, |
|
"learning_rate": 5.142315291637857e-06, |
|
"loss": 2.3624, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.9349555008521113, |
|
"grad_norm": 0.32513052225112915, |
|
"learning_rate": 4.823991412773918e-06, |
|
"loss": 2.3714, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.9373224768036357, |
|
"grad_norm": 0.3148360252380371, |
|
"learning_rate": 4.509569863501355e-06, |
|
"loss": 2.3587, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.93968945275516, |
|
"grad_norm": 0.33431729674339294, |
|
"learning_rate": 4.202734786899464e-06, |
|
"loss": 2.3719, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.9420564287066844, |
|
"grad_norm": 0.3179948031902313, |
|
"learning_rate": 3.903910156293686e-06, |
|
"loss": 2.3668, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.9444234046582086, |
|
"grad_norm": 0.3322046101093292, |
|
"learning_rate": 3.613508876472357e-06, |
|
"loss": 2.3645, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.946790380609733, |
|
"grad_norm": 0.328708291053772, |
|
"learning_rate": 3.331932213150203e-06, |
|
"loss": 2.3592, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.9491573565612573, |
|
"grad_norm": 0.31054648756980896, |
|
"learning_rate": 3.0595692385142717e-06, |
|
"loss": 2.373, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.9515243325127817, |
|
"grad_norm": 0.3367001414299011, |
|
"learning_rate": 2.79679629361839e-06, |
|
"loss": 2.3614, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.953891308464306, |
|
"grad_norm": 0.32708635926246643, |
|
"learning_rate": 2.543976468369088e-06, |
|
"loss": 2.3541, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.9562582844158304, |
|
"grad_norm": 0.32008349895477295, |
|
"learning_rate": 2.301459099821417e-06, |
|
"loss": 2.3742, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.9586252603673546, |
|
"grad_norm": 0.3486650288105011, |
|
"learning_rate": 2.0695792894779788e-06, |
|
"loss": 2.3553, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.960992236318879, |
|
"grad_norm": 0.32483014464378357, |
|
"learning_rate": 1.8486574402580858e-06, |
|
"loss": 2.3573, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.9633592122704033, |
|
"grad_norm": 0.3283023536205292, |
|
"learning_rate": 1.6389988137769153e-06, |
|
"loss": 2.3715, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.9657261882219277, |
|
"grad_norm": 0.3350400924682617, |
|
"learning_rate": 1.4408931085463206e-06, |
|
"loss": 2.3757, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.968093164173452, |
|
"grad_norm": 0.32112061977386475, |
|
"learning_rate": 1.2564174493396274e-06, |
|
"loss": 2.3816, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.9704601401249763, |
|
"grad_norm": 0.3097105324268341, |
|
"learning_rate": 1.0821003902626947e-06, |
|
"loss": 2.365, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.9728271160765006, |
|
"grad_norm": 0.3082149028778076, |
|
"learning_rate": 9.201057540173219e-07, |
|
"loss": 2.3691, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.975194092028025, |
|
"grad_norm": 0.31467440724372864, |
|
"learning_rate": 7.706573787819616e-07, |
|
"loss": 2.3787, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.9775610679795493, |
|
"grad_norm": 0.3216908872127533, |
|
"learning_rate": 6.339617667770615e-07, |
|
"loss": 2.3821, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.9799280439310737, |
|
"grad_norm": 0.32618415355682373, |
|
"learning_rate": 5.102077989279552e-07, |
|
"loss": 2.3609, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.982295019882598, |
|
"grad_norm": 0.32504287362098694, |
|
"learning_rate": 3.9956647387621507e-07, |
|
"loss": 2.3646, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.9846619958341223, |
|
"grad_norm": 0.31874212622642517, |
|
"learning_rate": 3.0219067170006445e-07, |
|
"loss": 2.3579, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.9870289717856466, |
|
"grad_norm": 0.34173473715782166, |
|
"learning_rate": 2.182149426703606e-07, |
|
"loss": 2.3719, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.989395947737171, |
|
"grad_norm": 0.32773253321647644, |
|
"learning_rate": 1.4775532133402547e-07, |
|
"loss": 2.3625, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.9917629236886953, |
|
"grad_norm": 0.34616851806640625, |
|
"learning_rate": 9.090916618180623e-08, |
|
"loss": 2.3645, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.9941298996402197, |
|
"grad_norm": 0.3184524476528168, |
|
"learning_rate": 4.775502512193164e-08, |
|
"loss": 2.3658, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.9964968755917439, |
|
"grad_norm": 0.349345862865448, |
|
"learning_rate": 1.835252694552425e-08, |
|
"loss": 2.3604, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.9988638515432683, |
|
"grad_norm": 0.33537670969963074, |
|
"learning_rate": 2.742298933747778e-09, |
|
"loss": 2.3624, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 42248, |
|
"total_flos": 6.236990962447417e+18, |
|
"train_loss": 2.428118334464832, |
|
"train_runtime": 22646.6509, |
|
"train_samples_per_second": 29.848, |
|
"train_steps_per_second": 1.866 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 42248, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.236990962447417e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|