|
{ |
|
"best_metric": 0.9220191240310669, |
|
"best_model_checkpoint": "/home/datta0/models/lora_final/Qwen2-7B_magiccoder_default/checkpoint-4", |
|
"epoch": 0.99836867862969, |
|
"eval_steps": 4, |
|
"global_step": 153, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0065252854812398045, |
|
"grad_norm": 6.940319538116455, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.8051, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.013050570962479609, |
|
"grad_norm": 5.597632884979248, |
|
"learning_rate": 0.00015, |
|
"loss": 0.9302, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.026101141924959218, |
|
"grad_norm": 4.383840084075928, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8215, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.026101141924959218, |
|
"eval_loss": 0.9220191240310669, |
|
"eval_runtime": 24.7526, |
|
"eval_samples_per_second": 19.756, |
|
"eval_steps_per_second": 2.505, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03915171288743882, |
|
"grad_norm": 3.2939515113830566, |
|
"learning_rate": 0.00029986665273697545, |
|
"loss": 0.8629, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.052202283849918436, |
|
"grad_norm": 49.209835052490234, |
|
"learning_rate": 0.0002994668480344693, |
|
"loss": 0.9247, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.052202283849918436, |
|
"eval_loss": 0.9779874086380005, |
|
"eval_runtime": 24.7429, |
|
"eval_samples_per_second": 19.763, |
|
"eval_steps_per_second": 2.506, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06525285481239804, |
|
"grad_norm": 2.9049675464630127, |
|
"learning_rate": 0.0002988012967306524, |
|
"loss": 0.9425, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07830342577487764, |
|
"grad_norm": 2.8929295539855957, |
|
"learning_rate": 0.000297871182151455, |
|
"loss": 0.9611, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07830342577487764, |
|
"eval_loss": 0.9693307876586914, |
|
"eval_runtime": 24.7094, |
|
"eval_samples_per_second": 19.79, |
|
"eval_steps_per_second": 2.509, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09135399673735727, |
|
"grad_norm": 2.5417490005493164, |
|
"learning_rate": 0.00029667815800665635, |
|
"loss": 0.9792, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10440456769983687, |
|
"grad_norm": 2.707855224609375, |
|
"learning_rate": 0.0002952243454496488, |
|
"loss": 0.9392, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10440456769983687, |
|
"eval_loss": 0.9866985082626343, |
|
"eval_runtime": 24.6246, |
|
"eval_samples_per_second": 19.858, |
|
"eval_steps_per_second": 2.518, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11745513866231648, |
|
"grad_norm": 7.373922348022461, |
|
"learning_rate": 0.0002935123293061047, |
|
"loss": 0.9393, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.13050570962479607, |
|
"grad_norm": 2.5633223056793213, |
|
"learning_rate": 0.0002915451534782506, |
|
"loss": 1.0135, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13050570962479607, |
|
"eval_loss": 1.0108040571212769, |
|
"eval_runtime": 24.6129, |
|
"eval_samples_per_second": 19.868, |
|
"eval_steps_per_second": 2.519, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14355628058727568, |
|
"grad_norm": 2.2816696166992188, |
|
"learning_rate": 0.0002893263155329204, |
|
"loss": 1.0003, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1566068515497553, |
|
"grad_norm": 2.6352310180664062, |
|
"learning_rate": 0.00028685976048300875, |
|
"loss": 0.9152, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1566068515497553, |
|
"eval_loss": 1.0166871547698975, |
|
"eval_runtime": 24.4896, |
|
"eval_samples_per_second": 19.968, |
|
"eval_steps_per_second": 2.532, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16965742251223492, |
|
"grad_norm": 2.428823232650757, |
|
"learning_rate": 0.00028414987377338235, |
|
"loss": 1.0468, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.18270799347471453, |
|
"grad_norm": 2.590581178665161, |
|
"learning_rate": 0.0002812014734837191, |
|
"loss": 0.9298, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.18270799347471453, |
|
"eval_loss": 1.0250943899154663, |
|
"eval_runtime": 54.7083, |
|
"eval_samples_per_second": 8.938, |
|
"eval_steps_per_second": 1.133, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19575856443719414, |
|
"grad_norm": 2.5896878242492676, |
|
"learning_rate": 0.0002780198017621379, |
|
"loss": 1.0095, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20880913539967375, |
|
"grad_norm": 2.394001007080078, |
|
"learning_rate": 0.00027461051550485116, |
|
"loss": 1.0625, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.20880913539967375, |
|
"eval_loss": 1.0349429845809937, |
|
"eval_runtime": 54.8183, |
|
"eval_samples_per_second": 8.92, |
|
"eval_steps_per_second": 1.131, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.22185970636215335, |
|
"grad_norm": 2.3402562141418457, |
|
"learning_rate": 0.00027097967629840906, |
|
"loss": 0.9817, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.23491027732463296, |
|
"grad_norm": 2.0935347080230713, |
|
"learning_rate": 0.0002671337396424204, |
|
"loss": 0.9695, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23491027732463296, |
|
"eval_loss": 1.0332014560699463, |
|
"eval_runtime": 55.2086, |
|
"eval_samples_per_second": 8.857, |
|
"eval_steps_per_second": 1.123, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.24796084828711257, |
|
"grad_norm": 1.9977389574050903, |
|
"learning_rate": 0.00026307954347190983, |
|
"loss": 0.9429, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.26101141924959215, |
|
"grad_norm": 2.104321241378784, |
|
"learning_rate": 0.00025882429599971866, |
|
"loss": 1.0104, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26101141924959215, |
|
"eval_loss": 1.0390156507492065, |
|
"eval_runtime": 55.2819, |
|
"eval_samples_per_second": 8.846, |
|
"eval_steps_per_second": 1.122, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2740619902120718, |
|
"grad_norm": 1.943311095237732, |
|
"learning_rate": 0.0002543755629005657, |
|
"loss": 0.9952, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.28711256117455136, |
|
"grad_norm": 2.2244155406951904, |
|
"learning_rate": 0.0002497412538595537, |
|
"loss": 1.0721, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.28711256117455136, |
|
"eval_loss": 1.0405514240264893, |
|
"eval_runtime": 55.1226, |
|
"eval_samples_per_second": 8.871, |
|
"eval_steps_per_second": 1.125, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.300163132137031, |
|
"grad_norm": 2.213677406311035, |
|
"learning_rate": 0.00024492960850903755, |
|
"loss": 0.9997, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3132137030995106, |
|
"grad_norm": 2.108431100845337, |
|
"learning_rate": 0.00023994918177885902, |
|
"loss": 1.0397, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3132137030995106, |
|
"eval_loss": 1.0448977947235107, |
|
"eval_runtime": 55.076, |
|
"eval_samples_per_second": 8.879, |
|
"eval_steps_per_second": 1.126, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3262642740619902, |
|
"grad_norm": 2.3814570903778076, |
|
"learning_rate": 0.0002348088286859938, |
|
"loss": 1.0839, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33931484502446985, |
|
"grad_norm": 2.261181116104126, |
|
"learning_rate": 0.00022951768859065402, |
|
"loss": 0.9623, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.33931484502446985, |
|
"eval_loss": 1.0447765588760376, |
|
"eval_runtime": 55.341, |
|
"eval_samples_per_second": 8.836, |
|
"eval_steps_per_second": 1.12, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3523654159869494, |
|
"grad_norm": 2.159951686859131, |
|
"learning_rate": 0.0002240851689468395, |
|
"loss": 0.9753, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.36541598694942906, |
|
"grad_norm": 2.21645188331604, |
|
"learning_rate": 0.00021852092857622808, |
|
"loss": 0.9735, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.36541598694942906, |
|
"eval_loss": 1.0435727834701538, |
|
"eval_runtime": 54.9591, |
|
"eval_samples_per_second": 8.898, |
|
"eval_steps_per_second": 1.128, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.37846655791190864, |
|
"grad_norm": 2.0440618991851807, |
|
"learning_rate": 0.00021283486049514277, |
|
"loss": 1.051, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3915171288743883, |
|
"grad_norm": 2.2410900592803955, |
|
"learning_rate": 0.00020703707432513004, |
|
"loss": 1.0016, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3915171288743883, |
|
"eval_loss": 1.043070673942566, |
|
"eval_runtime": 55.0676, |
|
"eval_samples_per_second": 8.88, |
|
"eval_steps_per_second": 1.126, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.40456769983686786, |
|
"grad_norm": 2.09979248046875, |
|
"learning_rate": 0.00020113787831842152, |
|
"loss": 0.9375, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4176182707993475, |
|
"grad_norm": 2.2402355670928955, |
|
"learning_rate": 0.0001951477610302378, |
|
"loss": 1.0557, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4176182707993475, |
|
"eval_loss": 1.0401309728622437, |
|
"eval_runtime": 55.6078, |
|
"eval_samples_per_second": 8.794, |
|
"eval_steps_per_second": 1.115, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.43066884176182707, |
|
"grad_norm": 2.0318586826324463, |
|
"learning_rate": 0.0001890773726705198, |
|
"loss": 1.0214, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4437194127243067, |
|
"grad_norm": 2.138606309890747, |
|
"learning_rate": 0.00018293750616824443, |
|
"loss": 1.0377, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4437194127243067, |
|
"eval_loss": 1.0372790098190308, |
|
"eval_runtime": 24.7257, |
|
"eval_samples_per_second": 19.777, |
|
"eval_steps_per_second": 2.508, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4567699836867863, |
|
"grad_norm": 1.7595700025558472, |
|
"learning_rate": 0.00017673907798199052, |
|
"loss": 1.0546, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4698205546492659, |
|
"grad_norm": 1.987815022468567, |
|
"learning_rate": 0.000170493108690874, |
|
"loss": 1.0022, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4698205546492659, |
|
"eval_loss": 1.0360997915267944, |
|
"eval_runtime": 24.7474, |
|
"eval_samples_per_second": 19.76, |
|
"eval_steps_per_second": 2.505, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4828711256117455, |
|
"grad_norm": 1.9406994581222534, |
|
"learning_rate": 0.00016421070340036023, |
|
"loss": 1.0372, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.49592169657422513, |
|
"grad_norm": 2.0799319744110107, |
|
"learning_rate": 0.00015790303199779193, |
|
"loss": 1.0193, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.49592169657422513, |
|
"eval_loss": 1.0328214168548584, |
|
"eval_runtime": 24.7131, |
|
"eval_samples_per_second": 19.787, |
|
"eval_steps_per_second": 2.509, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5089722675367048, |
|
"grad_norm": 2.057676315307617, |
|
"learning_rate": 0.00015158130929273695, |
|
"loss": 0.9597, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5220228384991843, |
|
"grad_norm": 2.00854754447937, |
|
"learning_rate": 0.00014525677507746615, |
|
"loss": 0.9806, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5220228384991843, |
|
"eval_loss": 1.0301356315612793, |
|
"eval_runtime": 24.7052, |
|
"eval_samples_per_second": 19.793, |
|
"eval_steps_per_second": 2.51, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5350734094616639, |
|
"grad_norm": 1.9844895601272583, |
|
"learning_rate": 0.00013894067414301314, |
|
"loss": 1.068, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5481239804241436, |
|
"grad_norm": 1.9492027759552002, |
|
"learning_rate": 0.0001326442362863458, |
|
"loss": 1.0542, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5481239804241436, |
|
"eval_loss": 1.0262655019760132, |
|
"eval_runtime": 24.6275, |
|
"eval_samples_per_second": 19.856, |
|
"eval_steps_per_second": 2.518, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5611745513866232, |
|
"grad_norm": 1.8868807554244995, |
|
"learning_rate": 0.00012637865634419735, |
|
"loss": 1.0136, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5742251223491027, |
|
"grad_norm": 1.9024137258529663, |
|
"learning_rate": 0.00012015507428905507, |
|
"loss": 0.9692, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5742251223491027, |
|
"eval_loss": 1.024366021156311, |
|
"eval_runtime": 24.5294, |
|
"eval_samples_per_second": 19.935, |
|
"eval_steps_per_second": 2.528, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5872756933115824, |
|
"grad_norm": 2.2372443675994873, |
|
"learning_rate": 0.00011398455542269575, |
|
"loss": 0.9305, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.600326264274062, |
|
"grad_norm": 1.8708783388137817, |
|
"learning_rate": 0.00010787807070248305, |
|
"loss": 1.0464, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.600326264274062, |
|
"eval_loss": 1.0215392112731934, |
|
"eval_runtime": 24.4525, |
|
"eval_samples_per_second": 19.998, |
|
"eval_steps_per_second": 2.536, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6133768352365416, |
|
"grad_norm": 2.0300116539001465, |
|
"learning_rate": 0.00010184647723540557, |
|
"loss": 0.9709, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6264274061990212, |
|
"grad_norm": 2.0198493003845215, |
|
"learning_rate": 9.590049897453668e-05, |
|
"loss": 0.9771, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6264274061990212, |
|
"eval_loss": 1.01658034324646, |
|
"eval_runtime": 53.8396, |
|
"eval_samples_per_second": 9.083, |
|
"eval_steps_per_second": 1.152, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6394779771615008, |
|
"grad_norm": 1.8200911283493042, |
|
"learning_rate": 9.005070765223768e-05, |
|
"loss": 1.0565, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6525285481239804, |
|
"grad_norm": 2.173635721206665, |
|
"learning_rate": 8.430750398400308e-05, |
|
"loss": 1.0659, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6525285481239804, |
|
"eval_loss": 1.0145906209945679, |
|
"eval_runtime": 55.4651, |
|
"eval_samples_per_second": 8.816, |
|
"eval_steps_per_second": 1.118, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6655791190864601, |
|
"grad_norm": 1.9142309427261353, |
|
"learning_rate": 7.868109917636821e-05, |
|
"loss": 0.9761, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6786296900489397, |
|
"grad_norm": 1.9679898023605347, |
|
"learning_rate": 7.318149677175675e-05, |
|
"loss": 0.9476, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6786296900489397, |
|
"eval_loss": 1.0106278657913208, |
|
"eval_runtime": 55.6719, |
|
"eval_samples_per_second": 8.784, |
|
"eval_steps_per_second": 1.114, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6916802610114192, |
|
"grad_norm": 1.9258702993392944, |
|
"learning_rate": 6.781847486254697e-05, |
|
"loss": 0.963, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7047308319738989, |
|
"grad_norm": 2.029904842376709, |
|
"learning_rate": 6.260156870598071e-05, |
|
"loss": 0.983, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7047308319738989, |
|
"eval_loss": 1.0074269771575928, |
|
"eval_runtime": 57.0045, |
|
"eval_samples_per_second": 8.578, |
|
"eval_steps_per_second": 1.088, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7177814029363785, |
|
"grad_norm": 1.779940128326416, |
|
"learning_rate": 5.7540053770823644e-05, |
|
"loss": 0.9698, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7308319738988581, |
|
"grad_norm": 2.0144851207733154, |
|
"learning_rate": 5.264292924592073e-05, |
|
"loss": 0.9585, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7308319738988581, |
|
"eval_loss": 1.0034711360931396, |
|
"eval_runtime": 57.5133, |
|
"eval_samples_per_second": 8.502, |
|
"eval_steps_per_second": 1.078, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7438825448613376, |
|
"grad_norm": 1.9726147651672363, |
|
"learning_rate": 4.791890203996634e-05, |
|
"loss": 0.9865, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7569331158238173, |
|
"grad_norm": 1.7042125463485718, |
|
"learning_rate": 4.3376371300938786e-05, |
|
"loss": 0.9193, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7569331158238173, |
|
"eval_loss": 0.9996815323829651, |
|
"eval_runtime": 57.7466, |
|
"eval_samples_per_second": 8.468, |
|
"eval_steps_per_second": 1.074, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7699836867862969, |
|
"grad_norm": 1.8329825401306152, |
|
"learning_rate": 3.9023413482721426e-05, |
|
"loss": 0.9742, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7830342577487766, |
|
"grad_norm": 1.861943006515503, |
|
"learning_rate": 3.4867767985462507e-05, |
|
"loss": 0.9041, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7830342577487766, |
|
"eval_loss": 0.9974753260612488, |
|
"eval_runtime": 55.1354, |
|
"eval_samples_per_second": 8.869, |
|
"eval_steps_per_second": 1.125, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7960848287112561, |
|
"grad_norm": 1.8173584938049316, |
|
"learning_rate": 3.09168233952042e-05, |
|
"loss": 1.026, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8091353996737357, |
|
"grad_norm": 1.79753839969635, |
|
"learning_rate": 2.717760434724613e-05, |
|
"loss": 0.9697, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8091353996737357, |
|
"eval_loss": 0.9954367876052856, |
|
"eval_runtime": 55.5077, |
|
"eval_samples_per_second": 8.81, |
|
"eval_steps_per_second": 1.117, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8221859706362153, |
|
"grad_norm": 1.7292028665542603, |
|
"learning_rate": 2.3656759036600187e-05, |
|
"loss": 0.9747, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.835236541598695, |
|
"grad_norm": 1.9664617776870728, |
|
"learning_rate": 2.0360547397742523e-05, |
|
"loss": 0.9464, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.835236541598695, |
|
"eval_loss": 0.9932743906974792, |
|
"eval_runtime": 57.4493, |
|
"eval_samples_per_second": 8.512, |
|
"eval_steps_per_second": 1.079, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8482871125611745, |
|
"grad_norm": 1.82283616065979, |
|
"learning_rate": 1.7294829974678338e-05, |
|
"loss": 0.9256, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8613376835236541, |
|
"grad_norm": 1.9917670488357544, |
|
"learning_rate": 1.4465057501108546e-05, |
|
"loss": 1.0252, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8613376835236541, |
|
"eval_loss": 0.9916940927505493, |
|
"eval_runtime": 55.6062, |
|
"eval_samples_per_second": 8.794, |
|
"eval_steps_per_second": 1.115, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8743882544861338, |
|
"grad_norm": 1.8504716157913208, |
|
"learning_rate": 1.1876261209224314e-05, |
|
"loss": 0.9374, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8874388254486134, |
|
"grad_norm": 1.6590113639831543, |
|
"learning_rate": 9.533043884359615e-06, |
|
"loss": 0.9665, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8874388254486134, |
|
"eval_loss": 0.9909241199493408, |
|
"eval_runtime": 24.7544, |
|
"eval_samples_per_second": 19.754, |
|
"eval_steps_per_second": 2.505, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.9004893964110929, |
|
"grad_norm": 1.7258245944976807, |
|
"learning_rate": 7.439571681407053e-06, |
|
"loss": 1.0069, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9135399673735726, |
|
"grad_norm": 1.87185537815094, |
|
"learning_rate": 5.59956671754635e-06, |
|
"loss": 0.9948, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9135399673735726, |
|
"eval_loss": 0.9903515577316284, |
|
"eval_runtime": 24.7378, |
|
"eval_samples_per_second": 19.767, |
|
"eval_steps_per_second": 2.506, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9265905383360522, |
|
"grad_norm": 1.9415644407272339, |
|
"learning_rate": 4.016300454455945e-06, |
|
"loss": 1.0008, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9396411092985318, |
|
"grad_norm": 1.9181973934173584, |
|
"learning_rate": 2.692587881773478e-06, |
|
"loss": 0.946, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9396411092985318, |
|
"eval_loss": 0.9896851778030396, |
|
"eval_runtime": 24.7297, |
|
"eval_samples_per_second": 19.774, |
|
"eval_steps_per_second": 2.507, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9526916802610114, |
|
"grad_norm": 1.8300237655639648, |
|
"learning_rate": 1.6307825121469164e-06, |
|
"loss": 0.9866, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.965742251223491, |
|
"grad_norm": 1.893951177597046, |
|
"learning_rate": 8.327721967749779e-07, |
|
"loss": 1.0095, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.965742251223491, |
|
"eval_loss": 0.9895658493041992, |
|
"eval_runtime": 24.6594, |
|
"eval_samples_per_second": 19.83, |
|
"eval_steps_per_second": 2.514, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9787928221859706, |
|
"grad_norm": 1.895480990409851, |
|
"learning_rate": 2.9997576887660913e-07, |
|
"loss": 0.9295, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9918433931484503, |
|
"grad_norm": 1.8694380521774292, |
|
"learning_rate": 3.334052105728458e-08, |
|
"loss": 0.9675, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9918433931484503, |
|
"eval_loss": 0.9894064664840698, |
|
"eval_runtime": 24.5775, |
|
"eval_samples_per_second": 19.896, |
|
"eval_steps_per_second": 2.523, |
|
"step": 152 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 153, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.85963932651946e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|