|
{ |
|
"best_metric": 1.2662755250930786, |
|
"best_model_checkpoint": "/home/datta0/models/lora_final/Meta-Llama-3-8B_magiccoder_reverse/checkpoint-152", |
|
"epoch": 0.9846153846153847, |
|
"eval_steps": 4, |
|
"global_step": 152, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006477732793522267, |
|
"grad_norm": 7.972510814666748, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3889, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012955465587044534, |
|
"grad_norm": 8.65783977508545, |
|
"learning_rate": 0.00015, |
|
"loss": 1.5051, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.025910931174089068, |
|
"grad_norm": 8.450983047485352, |
|
"learning_rate": 0.0003, |
|
"loss": 1.2593, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.025910931174089068, |
|
"eval_loss": 1.4180008172988892, |
|
"eval_runtime": 26.5474, |
|
"eval_samples_per_second": 18.646, |
|
"eval_steps_per_second": 2.335, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.038866396761133605, |
|
"grad_norm": 10.967939376831055, |
|
"learning_rate": 0.00029986842451482874, |
|
"loss": 1.4591, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.051821862348178135, |
|
"grad_norm": 9.95240306854248, |
|
"learning_rate": 0.0002994739288874256, |
|
"loss": 1.4127, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.051821862348178135, |
|
"eval_loss": 1.395808458328247, |
|
"eval_runtime": 26.5564, |
|
"eval_samples_per_second": 18.64, |
|
"eval_steps_per_second": 2.335, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06477732793522267, |
|
"grad_norm": 7.500934600830078, |
|
"learning_rate": 0.0002988172051971717, |
|
"loss": 1.3336, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07773279352226721, |
|
"grad_norm": 10.566226959228516, |
|
"learning_rate": 0.0002978994055605757, |
|
"loss": 1.3964, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07773279352226721, |
|
"eval_loss": 1.3813759088516235, |
|
"eval_runtime": 26.5682, |
|
"eval_samples_per_second": 18.631, |
|
"eval_steps_per_second": 2.334, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09068825910931175, |
|
"grad_norm": 6.47328519821167, |
|
"learning_rate": 0.0002967221401100708, |
|
"loss": 1.4443, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10364372469635627, |
|
"grad_norm": 6.948887348175049, |
|
"learning_rate": 0.00029528747416929463, |
|
"loss": 1.3824, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10364372469635627, |
|
"eval_loss": 1.3679261207580566, |
|
"eval_runtime": 26.4518, |
|
"eval_samples_per_second": 18.713, |
|
"eval_steps_per_second": 2.344, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11659919028340081, |
|
"grad_norm": 6.1149091720581055, |
|
"learning_rate": 0.00029359792462981004, |
|
"loss": 1.3267, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12955465587044535, |
|
"grad_norm": 8.199859619140625, |
|
"learning_rate": 0.00029165645553562214, |
|
"loss": 1.4044, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12955465587044535, |
|
"eval_loss": 1.3733282089233398, |
|
"eval_runtime": 26.4323, |
|
"eval_samples_per_second": 18.727, |
|
"eval_steps_per_second": 2.346, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14251012145748987, |
|
"grad_norm": 12.22414779663086, |
|
"learning_rate": 0.00028946647288323766, |
|
"loss": 1.4047, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.15546558704453442, |
|
"grad_norm": 7.005305290222168, |
|
"learning_rate": 0.0002870318186463901, |
|
"loss": 1.3092, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15546558704453442, |
|
"eval_loss": 1.363875150680542, |
|
"eval_runtime": 26.302, |
|
"eval_samples_per_second": 18.82, |
|
"eval_steps_per_second": 2.357, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16842105263157894, |
|
"grad_norm": 12.295991897583008, |
|
"learning_rate": 0.0002843567640359119, |
|
"loss": 1.3037, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1813765182186235, |
|
"grad_norm": 10.879226684570312, |
|
"learning_rate": 0.0002814460020065795, |
|
"loss": 1.3823, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1813765182186235, |
|
"eval_loss": 1.4074506759643555, |
|
"eval_runtime": 52.504, |
|
"eval_samples_per_second": 9.428, |
|
"eval_steps_per_second": 1.181, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19433198380566802, |
|
"grad_norm": 18.48771095275879, |
|
"learning_rate": 0.000278304639024076, |
|
"loss": 1.414, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20728744939271254, |
|
"grad_norm": 12.401382446289062, |
|
"learning_rate": 0.00027493818610651487, |
|
"loss": 1.3878, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.20728744939271254, |
|
"eval_loss": 1.3768298625946045, |
|
"eval_runtime": 52.6159, |
|
"eval_samples_per_second": 9.408, |
|
"eval_steps_per_second": 1.178, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2202429149797571, |
|
"grad_norm": 9.700746536254883, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 1.4156, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.23319838056680162, |
|
"grad_norm": 18.97345542907715, |
|
"learning_rate": 0.00026755401859887595, |
|
"loss": 1.3653, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23319838056680162, |
|
"eval_loss": 1.3592467308044434, |
|
"eval_runtime": 52.6554, |
|
"eval_samples_per_second": 9.401, |
|
"eval_steps_per_second": 1.177, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"grad_norm": 4.934860706329346, |
|
"learning_rate": 0.00026354925834776345, |
|
"loss": 1.2749, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2591093117408907, |
|
"grad_norm": 9.491981506347656, |
|
"learning_rate": 0.0002593452941132117, |
|
"loss": 1.4395, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2591093117408907, |
|
"eval_loss": 1.3594356775283813, |
|
"eval_runtime": 50.9109, |
|
"eval_samples_per_second": 9.723, |
|
"eval_steps_per_second": 1.218, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2720647773279352, |
|
"grad_norm": 8.243218421936035, |
|
"learning_rate": 0.0002549495010770048, |
|
"loss": 1.3816, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.28502024291497974, |
|
"grad_norm": 7.602082252502441, |
|
"learning_rate": 0.0002503695909538287, |
|
"loss": 1.3805, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.28502024291497974, |
|
"eval_loss": 1.34442937374115, |
|
"eval_runtime": 52.904, |
|
"eval_samples_per_second": 9.357, |
|
"eval_steps_per_second": 1.172, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2979757085020243, |
|
"grad_norm": 5.125858783721924, |
|
"learning_rate": 0.0002456135984623034, |
|
"loss": 1.3314, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.31093117408906884, |
|
"grad_norm": 4.09897518157959, |
|
"learning_rate": 0.00024068986722935624, |
|
"loss": 1.2631, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.31093117408906884, |
|
"eval_loss": 1.3353846073150635, |
|
"eval_runtime": 52.5161, |
|
"eval_samples_per_second": 9.426, |
|
"eval_steps_per_second": 1.181, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.32388663967611336, |
|
"grad_norm": 6.234148025512695, |
|
"learning_rate": 0.00023560703515266478, |
|
"loss": 1.3797, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"grad_norm": 7.174957275390625, |
|
"learning_rate": 0.00023037401924684946, |
|
"loss": 1.3346, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"eval_loss": 1.3409483432769775, |
|
"eval_runtime": 52.7077, |
|
"eval_samples_per_second": 9.391, |
|
"eval_steps_per_second": 1.176, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3497975708502024, |
|
"grad_norm": 6.94388484954834, |
|
"learning_rate": 0.000225, |
|
"loss": 1.2991, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.362753036437247, |
|
"grad_norm": 5.7069621086120605, |
|
"learning_rate": 0.00021949440526797926, |
|
"loss": 1.3776, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.362753036437247, |
|
"eval_loss": 1.3367204666137695, |
|
"eval_runtime": 52.524, |
|
"eval_samples_per_second": 9.424, |
|
"eval_steps_per_second": 1.18, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3757085020242915, |
|
"grad_norm": 3.7093050479888916, |
|
"learning_rate": 0.00021386689373476087, |
|
"loss": 1.2811, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.38866396761133604, |
|
"grad_norm": 4.604982376098633, |
|
"learning_rate": 0.00020812733796781542, |
|
"loss": 1.3407, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38866396761133604, |
|
"eval_loss": 1.3290596008300781, |
|
"eval_runtime": 53.0771, |
|
"eval_samples_per_second": 9.326, |
|
"eval_steps_per_second": 1.168, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.40161943319838056, |
|
"grad_norm": 4.813035011291504, |
|
"learning_rate": 0.00020228580709827227, |
|
"loss": 1.3081, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4145748987854251, |
|
"grad_norm": 5.186892509460449, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 1.3939, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4145748987854251, |
|
"eval_loss": 1.3306726217269897, |
|
"eval_runtime": 52.8006, |
|
"eval_samples_per_second": 9.375, |
|
"eval_steps_per_second": 1.174, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.42753036437246966, |
|
"grad_norm": 4.551775932312012, |
|
"learning_rate": 0.00019033797309228983, |
|
"loss": 1.2994, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4404858299595142, |
|
"grad_norm": 4.376237392425537, |
|
"learning_rate": 0.00018425263051659836, |
|
"loss": 1.2555, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4404858299595142, |
|
"eval_loss": 1.325662612915039, |
|
"eval_runtime": 26.5626, |
|
"eval_samples_per_second": 18.635, |
|
"eval_steps_per_second": 2.334, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4534412955465587, |
|
"grad_norm": 3.780867338180542, |
|
"learning_rate": 0.0001781071971878587, |
|
"loss": 1.3114, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.46639676113360323, |
|
"grad_norm": 4.040090560913086, |
|
"learning_rate": 0.00017191245428436173, |
|
"loss": 1.3227, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.46639676113360323, |
|
"eval_loss": 1.3216441869735718, |
|
"eval_runtime": 26.5607, |
|
"eval_samples_per_second": 18.637, |
|
"eval_steps_per_second": 2.334, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.47935222672064776, |
|
"grad_norm": 4.139721393585205, |
|
"learning_rate": 0.000165679269490148, |
|
"loss": 1.292, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"grad_norm": 3.38668155670166, |
|
"learning_rate": 0.000159418577929397, |
|
"loss": 1.2664, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"eval_loss": 1.323757290840149, |
|
"eval_runtime": 26.5608, |
|
"eval_samples_per_second": 18.636, |
|
"eval_steps_per_second": 2.334, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5052631578947369, |
|
"grad_norm": 5.189659118652344, |
|
"learning_rate": 0.00015314136298250354, |
|
"loss": 1.3246, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5182186234817814, |
|
"grad_norm": 4.31968355178833, |
|
"learning_rate": 0.00014685863701749646, |
|
"loss": 1.3542, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5182186234817814, |
|
"eval_loss": 1.3156659603118896, |
|
"eval_runtime": 26.5254, |
|
"eval_samples_per_second": 18.661, |
|
"eval_steps_per_second": 2.337, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5311740890688259, |
|
"grad_norm": 3.1360971927642822, |
|
"learning_rate": 0.000140581422070603, |
|
"loss": 1.3407, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5441295546558704, |
|
"grad_norm": 4.838674068450928, |
|
"learning_rate": 0.000134320730509852, |
|
"loss": 1.2873, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5441295546558704, |
|
"eval_loss": 1.3166635036468506, |
|
"eval_runtime": 26.4764, |
|
"eval_samples_per_second": 18.696, |
|
"eval_steps_per_second": 2.342, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.557085020242915, |
|
"grad_norm": 3.8474254608154297, |
|
"learning_rate": 0.00012808754571563827, |
|
"loss": 1.3014, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5700404858299595, |
|
"grad_norm": 4.670653820037842, |
|
"learning_rate": 0.00012189280281214126, |
|
"loss": 1.3065, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5700404858299595, |
|
"eval_loss": 1.310920238494873, |
|
"eval_runtime": 26.3754, |
|
"eval_samples_per_second": 18.768, |
|
"eval_steps_per_second": 2.351, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.582995951417004, |
|
"grad_norm": 2.9593794345855713, |
|
"learning_rate": 0.00011574736948340163, |
|
"loss": 1.2962, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5959514170040486, |
|
"grad_norm": 4.062413215637207, |
|
"learning_rate": 0.00010966202690771014, |
|
"loss": 1.4021, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5959514170040486, |
|
"eval_loss": 1.3056426048278809, |
|
"eval_runtime": 26.2485, |
|
"eval_samples_per_second": 18.858, |
|
"eval_steps_per_second": 2.362, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6089068825910932, |
|
"grad_norm": 3.7216620445251465, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 1.2427, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6218623481781377, |
|
"grad_norm": 3.2399404048919678, |
|
"learning_rate": 9.771419290172773e-05, |
|
"loss": 1.2277, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6218623481781377, |
|
"eval_loss": 1.3020737171173096, |
|
"eval_runtime": 52.6322, |
|
"eval_samples_per_second": 9.405, |
|
"eval_steps_per_second": 1.178, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6348178137651822, |
|
"grad_norm": 3.834902286529541, |
|
"learning_rate": 9.187266203218456e-05, |
|
"loss": 1.3496, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6477732793522267, |
|
"grad_norm": 3.2897722721099854, |
|
"learning_rate": 8.613310626523909e-05, |
|
"loss": 1.3014, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6477732793522267, |
|
"eval_loss": 1.2999552488327026, |
|
"eval_runtime": 54.2992, |
|
"eval_samples_per_second": 9.116, |
|
"eval_steps_per_second": 1.142, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6607287449392713, |
|
"grad_norm": 3.6135973930358887, |
|
"learning_rate": 8.050559473202077e-05, |
|
"loss": 1.3298, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6736842105263158, |
|
"grad_norm": 3.015789270401001, |
|
"learning_rate": 7.500000000000002e-05, |
|
"loss": 1.2138, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6736842105263158, |
|
"eval_loss": 1.291136384010315, |
|
"eval_runtime": 51.4763, |
|
"eval_samples_per_second": 9.616, |
|
"eval_steps_per_second": 1.204, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6866396761133603, |
|
"grad_norm": 4.030185699462891, |
|
"learning_rate": 6.962598075315046e-05, |
|
"loss": 1.3277, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6995951417004048, |
|
"grad_norm": 3.46356463432312, |
|
"learning_rate": 6.439296484733525e-05, |
|
"loss": 1.2367, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6995951417004048, |
|
"eval_loss": 1.2903276681900024, |
|
"eval_runtime": 52.7315, |
|
"eval_samples_per_second": 9.387, |
|
"eval_steps_per_second": 1.176, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7125506072874493, |
|
"grad_norm": 2.628225326538086, |
|
"learning_rate": 5.931013277064377e-05, |
|
"loss": 1.2445, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.725506072874494, |
|
"grad_norm": 3.025768280029297, |
|
"learning_rate": 5.4386401537696536e-05, |
|
"loss": 1.2501, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.725506072874494, |
|
"eval_loss": 1.2843749523162842, |
|
"eval_runtime": 49.1826, |
|
"eval_samples_per_second": 10.065, |
|
"eval_steps_per_second": 1.261, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7384615384615385, |
|
"grad_norm": 3.5894341468811035, |
|
"learning_rate": 4.963040904617131e-05, |
|
"loss": 1.2593, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.751417004048583, |
|
"grad_norm": 2.926424503326416, |
|
"learning_rate": 4.5050498922995166e-05, |
|
"loss": 1.2942, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.751417004048583, |
|
"eval_loss": 1.2818949222564697, |
|
"eval_runtime": 53.1275, |
|
"eval_samples_per_second": 9.317, |
|
"eval_steps_per_second": 1.167, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7643724696356275, |
|
"grad_norm": 2.722059726715088, |
|
"learning_rate": 4.06547058867883e-05, |
|
"loss": 1.2031, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7773279352226721, |
|
"grad_norm": 3.154292583465576, |
|
"learning_rate": 3.645074165223655e-05, |
|
"loss": 1.2762, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7773279352226721, |
|
"eval_loss": 1.2779730558395386, |
|
"eval_runtime": 52.538, |
|
"eval_samples_per_second": 9.422, |
|
"eval_steps_per_second": 1.18, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7902834008097166, |
|
"grad_norm": 2.4806008338928223, |
|
"learning_rate": 3.2445981401124035e-05, |
|
"loss": 1.2446, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8032388663967611, |
|
"grad_norm": 3.3211684226989746, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 1.2871, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8032388663967611, |
|
"eval_loss": 1.2753314971923828, |
|
"eval_runtime": 52.7282, |
|
"eval_samples_per_second": 9.388, |
|
"eval_steps_per_second": 1.176, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8161943319838056, |
|
"grad_norm": 2.937831163406372, |
|
"learning_rate": 2.5061813893485085e-05, |
|
"loss": 1.3213, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.8291497975708502, |
|
"grad_norm": 2.3290576934814453, |
|
"learning_rate": 2.169536097592401e-05, |
|
"loss": 1.2829, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8291497975708502, |
|
"eval_loss": 1.2724393606185913, |
|
"eval_runtime": 52.6961, |
|
"eval_samples_per_second": 9.393, |
|
"eval_steps_per_second": 1.177, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 2.81623911857605, |
|
"learning_rate": 1.8553997993420495e-05, |
|
"loss": 1.2818, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8550607287449393, |
|
"grad_norm": 4.030830383300781, |
|
"learning_rate": 1.5643235964088064e-05, |
|
"loss": 1.272, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8550607287449393, |
|
"eval_loss": 1.269489049911499, |
|
"eval_runtime": 52.9327, |
|
"eval_samples_per_second": 9.351, |
|
"eval_steps_per_second": 1.171, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8680161943319838, |
|
"grad_norm": 2.359203815460205, |
|
"learning_rate": 1.2968181353609852e-05, |
|
"loss": 1.2634, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8809716599190284, |
|
"grad_norm": 3.011812925338745, |
|
"learning_rate": 1.0533527116762296e-05, |
|
"loss": 1.2242, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8809716599190284, |
|
"eval_loss": 1.2684744596481323, |
|
"eval_runtime": 26.5394, |
|
"eval_samples_per_second": 18.652, |
|
"eval_steps_per_second": 2.336, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8939271255060729, |
|
"grad_norm": 3.0021610260009766, |
|
"learning_rate": 8.343544464377849e-06, |
|
"loss": 1.2567, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9068825910931174, |
|
"grad_norm": 2.6871109008789062, |
|
"learning_rate": 6.402075370189913e-06, |
|
"loss": 1.253, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9068825910931174, |
|
"eval_loss": 1.2677539587020874, |
|
"eval_runtime": 26.5802, |
|
"eval_samples_per_second": 18.623, |
|
"eval_steps_per_second": 2.333, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9198380566801619, |
|
"grad_norm": 2.9042418003082275, |
|
"learning_rate": 4.712525830705338e-06, |
|
"loss": 1.2273, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9327935222672065, |
|
"grad_norm": 2.7735819816589355, |
|
"learning_rate": 3.2778598899291465e-06, |
|
"loss": 1.2116, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9327935222672065, |
|
"eval_loss": 1.26712167263031, |
|
"eval_runtime": 26.5685, |
|
"eval_samples_per_second": 18.631, |
|
"eval_steps_per_second": 2.334, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.945748987854251, |
|
"grad_norm": 3.1122586727142334, |
|
"learning_rate": 2.100594439424269e-06, |
|
"loss": 1.2549, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9587044534412955, |
|
"grad_norm": 2.9394922256469727, |
|
"learning_rate": 1.1827948028283352e-06, |
|
"loss": 1.2356, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9587044534412955, |
|
"eval_loss": 1.2666473388671875, |
|
"eval_runtime": 26.5192, |
|
"eval_samples_per_second": 18.666, |
|
"eval_steps_per_second": 2.338, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.97165991902834, |
|
"grad_norm": 3.1697134971618652, |
|
"learning_rate": 5.260711125743444e-07, |
|
"loss": 1.2141, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9846153846153847, |
|
"grad_norm": 2.3742239475250244, |
|
"learning_rate": 1.315754851712425e-07, |
|
"loss": 1.2087, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.9846153846153847, |
|
"eval_loss": 1.2662755250930786, |
|
"eval_runtime": 26.4031, |
|
"eval_samples_per_second": 18.748, |
|
"eval_steps_per_second": 2.348, |
|
"step": 152 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 154, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.140047273385001e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|