|
{ |
|
"best_metric": 1.265087604522705, |
|
"best_model_checkpoint": "/home/datta0/models/lora_final/Meta-Llama-3-8B_magiccoder_ortho/checkpoint-148", |
|
"epoch": 0.9587044534412955, |
|
"eval_steps": 4, |
|
"global_step": 148, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.006477732793522267, |
|
"grad_norm": 4.835215091705322, |
|
"learning_rate": 7.5e-05, |
|
"loss": 1.3887, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.012955465587044534, |
|
"grad_norm": 5.698163032531738, |
|
"learning_rate": 0.00015, |
|
"loss": 1.5047, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.025910931174089068, |
|
"grad_norm": 3.3888368606567383, |
|
"learning_rate": 0.0003, |
|
"loss": 1.2463, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.025910931174089068, |
|
"eval_loss": 1.403336763381958, |
|
"eval_runtime": 26.4071, |
|
"eval_samples_per_second": 18.745, |
|
"eval_steps_per_second": 2.348, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.038866396761133605, |
|
"grad_norm": 4.008037567138672, |
|
"learning_rate": 0.00029986842451482874, |
|
"loss": 1.4275, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.051821862348178135, |
|
"grad_norm": 3.188356637954712, |
|
"learning_rate": 0.0002994739288874256, |
|
"loss": 1.3247, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.051821862348178135, |
|
"eval_loss": 1.3321858644485474, |
|
"eval_runtime": 26.4909, |
|
"eval_samples_per_second": 18.686, |
|
"eval_steps_per_second": 2.34, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06477732793522267, |
|
"grad_norm": 3.098771572113037, |
|
"learning_rate": 0.0002988172051971717, |
|
"loss": 1.2629, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07773279352226721, |
|
"grad_norm": 2.1870386600494385, |
|
"learning_rate": 0.0002978994055605757, |
|
"loss": 1.2964, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07773279352226721, |
|
"eval_loss": 1.3196189403533936, |
|
"eval_runtime": 26.5166, |
|
"eval_samples_per_second": 18.668, |
|
"eval_steps_per_second": 2.338, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09068825910931175, |
|
"grad_norm": 3.890054225921631, |
|
"learning_rate": 0.0002967221401100708, |
|
"loss": 1.3716, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10364372469635627, |
|
"grad_norm": 2.7932441234588623, |
|
"learning_rate": 0.00029528747416929463, |
|
"loss": 1.3289, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.10364372469635627, |
|
"eval_loss": 1.322401762008667, |
|
"eval_runtime": 26.4113, |
|
"eval_samples_per_second": 18.742, |
|
"eval_steps_per_second": 2.347, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11659919028340081, |
|
"grad_norm": 2.2833335399627686, |
|
"learning_rate": 0.00029359792462981004, |
|
"loss": 1.2691, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12955465587044535, |
|
"grad_norm": 2.577002763748169, |
|
"learning_rate": 0.00029165645553562214, |
|
"loss": 1.3483, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12955465587044535, |
|
"eval_loss": 1.3099651336669922, |
|
"eval_runtime": 26.4006, |
|
"eval_samples_per_second": 18.75, |
|
"eval_steps_per_second": 2.348, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14251012145748987, |
|
"grad_norm": 2.350156784057617, |
|
"learning_rate": 0.00028946647288323766, |
|
"loss": 1.3168, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.15546558704453442, |
|
"grad_norm": 2.7228236198425293, |
|
"learning_rate": 0.0002870318186463901, |
|
"loss": 1.2491, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15546558704453442, |
|
"eval_loss": 1.3193615674972534, |
|
"eval_runtime": 26.2281, |
|
"eval_samples_per_second": 18.873, |
|
"eval_steps_per_second": 2.364, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16842105263157894, |
|
"grad_norm": 2.790332078933716, |
|
"learning_rate": 0.0002843567640359119, |
|
"loss": 1.249, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1813765182186235, |
|
"grad_norm": 2.422020196914673, |
|
"learning_rate": 0.0002814460020065795, |
|
"loss": 1.3072, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1813765182186235, |
|
"eval_loss": 1.3144476413726807, |
|
"eval_runtime": 63.9332, |
|
"eval_samples_per_second": 7.742, |
|
"eval_steps_per_second": 0.97, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.19433198380566802, |
|
"grad_norm": 1.7604172229766846, |
|
"learning_rate": 0.000278304639024076, |
|
"loss": 1.3043, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20728744939271254, |
|
"grad_norm": 1.9333935976028442, |
|
"learning_rate": 0.00027493818610651487, |
|
"loss": 1.2865, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.20728744939271254, |
|
"eval_loss": 1.3123451471328735, |
|
"eval_runtime": 65.1552, |
|
"eval_samples_per_second": 7.597, |
|
"eval_steps_per_second": 0.952, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2202429149797571, |
|
"grad_norm": 2.5974128246307373, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 1.3347, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.23319838056680162, |
|
"grad_norm": 2.198709726333618, |
|
"learning_rate": 0.00026755401859887595, |
|
"loss": 1.3102, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23319838056680162, |
|
"eval_loss": 1.3170647621154785, |
|
"eval_runtime": 62.3447, |
|
"eval_samples_per_second": 7.94, |
|
"eval_steps_per_second": 0.994, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"grad_norm": 2.3173534870147705, |
|
"learning_rate": 0.00026354925834776345, |
|
"loss": 1.2321, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.2591093117408907, |
|
"grad_norm": 3.5600650310516357, |
|
"learning_rate": 0.0002593452941132117, |
|
"loss": 1.3752, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2591093117408907, |
|
"eval_loss": 1.3158267736434937, |
|
"eval_runtime": 62.0865, |
|
"eval_samples_per_second": 7.973, |
|
"eval_steps_per_second": 0.999, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2720647773279352, |
|
"grad_norm": 2.1025493144989014, |
|
"learning_rate": 0.0002549495010770048, |
|
"loss": 1.3306, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.28502024291497974, |
|
"grad_norm": 2.076892137527466, |
|
"learning_rate": 0.0002503695909538287, |
|
"loss": 1.3244, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.28502024291497974, |
|
"eval_loss": 1.3114184141159058, |
|
"eval_runtime": 62.2069, |
|
"eval_samples_per_second": 7.957, |
|
"eval_steps_per_second": 0.997, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.2979757085020243, |
|
"grad_norm": 2.493210792541504, |
|
"learning_rate": 0.0002456135984623034, |
|
"loss": 1.2883, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.31093117408906884, |
|
"grad_norm": 1.9219117164611816, |
|
"learning_rate": 0.00024068986722935624, |
|
"loss": 1.2311, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.31093117408906884, |
|
"eval_loss": 1.3117666244506836, |
|
"eval_runtime": 62.4372, |
|
"eval_samples_per_second": 7.928, |
|
"eval_steps_per_second": 0.993, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.32388663967611336, |
|
"grad_norm": 2.240204095840454, |
|
"learning_rate": 0.00023560703515266478, |
|
"loss": 1.3414, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"grad_norm": 2.00461483001709, |
|
"learning_rate": 0.00023037401924684946, |
|
"loss": 1.2911, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3368421052631579, |
|
"eval_loss": 1.3134574890136719, |
|
"eval_runtime": 62.0904, |
|
"eval_samples_per_second": 7.972, |
|
"eval_steps_per_second": 0.999, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3497975708502024, |
|
"grad_norm": 1.787941575050354, |
|
"learning_rate": 0.000225, |
|
"loss": 1.2649, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.362753036437247, |
|
"grad_norm": 2.0428550243377686, |
|
"learning_rate": 0.00021949440526797926, |
|
"loss": 1.3409, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.362753036437247, |
|
"eval_loss": 1.3078958988189697, |
|
"eval_runtime": 62.252, |
|
"eval_samples_per_second": 7.952, |
|
"eval_steps_per_second": 0.996, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3757085020242915, |
|
"grad_norm": 1.9100394248962402, |
|
"learning_rate": 0.00021386689373476087, |
|
"loss": 1.2472, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.38866396761133604, |
|
"grad_norm": 1.7520307302474976, |
|
"learning_rate": 0.00020812733796781542, |
|
"loss": 1.3069, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38866396761133604, |
|
"eval_loss": 1.3043341636657715, |
|
"eval_runtime": 62.3545, |
|
"eval_samples_per_second": 7.938, |
|
"eval_steps_per_second": 0.994, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.40161943319838056, |
|
"grad_norm": 2.4810237884521484, |
|
"learning_rate": 0.00020228580709827227, |
|
"loss": 1.2735, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.4145748987854251, |
|
"grad_norm": 2.136749744415283, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 1.362, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4145748987854251, |
|
"eval_loss": 1.3125, |
|
"eval_runtime": 62.3299, |
|
"eval_samples_per_second": 7.942, |
|
"eval_steps_per_second": 0.995, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.42753036437246966, |
|
"grad_norm": 2.069849967956543, |
|
"learning_rate": 0.00019033797309228983, |
|
"loss": 1.2799, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4404858299595142, |
|
"grad_norm": 1.8651994466781616, |
|
"learning_rate": 0.00018425263051659836, |
|
"loss": 1.2206, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4404858299595142, |
|
"eval_loss": 1.3050696849822998, |
|
"eval_runtime": 26.5576, |
|
"eval_samples_per_second": 18.639, |
|
"eval_steps_per_second": 2.335, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.4534412955465587, |
|
"grad_norm": 1.9996711015701294, |
|
"learning_rate": 0.0001781071971878587, |
|
"loss": 1.2901, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.46639676113360323, |
|
"grad_norm": 1.663189172744751, |
|
"learning_rate": 0.00017191245428436173, |
|
"loss": 1.2838, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.46639676113360323, |
|
"eval_loss": 1.2985526323318481, |
|
"eval_runtime": 26.5419, |
|
"eval_samples_per_second": 18.65, |
|
"eval_steps_per_second": 2.336, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.47935222672064776, |
|
"grad_norm": 1.6311547756195068, |
|
"learning_rate": 0.000165679269490148, |
|
"loss": 1.2626, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"grad_norm": 1.993211269378662, |
|
"learning_rate": 0.000159418577929397, |
|
"loss": 1.2348, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.49230769230769234, |
|
"eval_loss": 1.3072912693023682, |
|
"eval_runtime": 26.5378, |
|
"eval_samples_per_second": 18.653, |
|
"eval_steps_per_second": 2.336, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5052631578947369, |
|
"grad_norm": 1.840496301651001, |
|
"learning_rate": 0.00015314136298250354, |
|
"loss": 1.3011, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5182186234817814, |
|
"grad_norm": 1.9565261602401733, |
|
"learning_rate": 0.00014685863701749646, |
|
"loss": 1.3171, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5182186234817814, |
|
"eval_loss": 1.2922303676605225, |
|
"eval_runtime": 26.4704, |
|
"eval_samples_per_second": 18.7, |
|
"eval_steps_per_second": 2.342, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5311740890688259, |
|
"grad_norm": 1.8801251649856567, |
|
"learning_rate": 0.000140581422070603, |
|
"loss": 1.3119, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5441295546558704, |
|
"grad_norm": 1.784315824508667, |
|
"learning_rate": 0.000134320730509852, |
|
"loss": 1.2556, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5441295546558704, |
|
"eval_loss": 1.296497106552124, |
|
"eval_runtime": 26.4835, |
|
"eval_samples_per_second": 18.691, |
|
"eval_steps_per_second": 2.341, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.557085020242915, |
|
"grad_norm": 2.0044546127319336, |
|
"learning_rate": 0.00012808754571563827, |
|
"loss": 1.2803, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5700404858299595, |
|
"grad_norm": 2.1428680419921875, |
|
"learning_rate": 0.00012189280281214126, |
|
"loss": 1.2803, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5700404858299595, |
|
"eval_loss": 1.291109561920166, |
|
"eval_runtime": 26.3248, |
|
"eval_samples_per_second": 18.804, |
|
"eval_steps_per_second": 2.355, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.582995951417004, |
|
"grad_norm": 1.4893293380737305, |
|
"learning_rate": 0.00011574736948340163, |
|
"loss": 1.2737, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5959514170040486, |
|
"grad_norm": 1.5611094236373901, |
|
"learning_rate": 0.00010966202690771014, |
|
"loss": 1.3796, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5959514170040486, |
|
"eval_loss": 1.2853686809539795, |
|
"eval_runtime": 26.1611, |
|
"eval_samples_per_second": 18.921, |
|
"eval_steps_per_second": 2.37, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6089068825910932, |
|
"grad_norm": 1.776159405708313, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 1.218, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6218623481781377, |
|
"grad_norm": 1.4612406492233276, |
|
"learning_rate": 9.771419290172773e-05, |
|
"loss": 1.2047, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6218623481781377, |
|
"eval_loss": 1.2870514392852783, |
|
"eval_runtime": 61.6553, |
|
"eval_samples_per_second": 8.029, |
|
"eval_steps_per_second": 1.006, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6348178137651822, |
|
"grad_norm": 2.135633707046509, |
|
"learning_rate": 9.187266203218456e-05, |
|
"loss": 1.325, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6477732793522267, |
|
"grad_norm": 1.6593787670135498, |
|
"learning_rate": 8.613310626523909e-05, |
|
"loss": 1.2821, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6477732793522267, |
|
"eval_loss": 1.2865735292434692, |
|
"eval_runtime": 62.3493, |
|
"eval_samples_per_second": 7.939, |
|
"eval_steps_per_second": 0.994, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6607287449392713, |
|
"grad_norm": 1.7937084436416626, |
|
"learning_rate": 8.050559473202077e-05, |
|
"loss": 1.3041, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6736842105263158, |
|
"grad_norm": 1.7992043495178223, |
|
"learning_rate": 7.500000000000002e-05, |
|
"loss": 1.2012, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6736842105263158, |
|
"eval_loss": 1.2837588787078857, |
|
"eval_runtime": 62.4051, |
|
"eval_samples_per_second": 7.932, |
|
"eval_steps_per_second": 0.994, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.6866396761133603, |
|
"grad_norm": 1.7913408279418945, |
|
"learning_rate": 6.962598075315046e-05, |
|
"loss": 1.3132, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6995951417004048, |
|
"grad_norm": 1.61056387424469, |
|
"learning_rate": 6.439296484733525e-05, |
|
"loss": 1.2116, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6995951417004048, |
|
"eval_loss": 1.2799283266067505, |
|
"eval_runtime": 62.0965, |
|
"eval_samples_per_second": 7.971, |
|
"eval_steps_per_second": 0.998, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7125506072874493, |
|
"grad_norm": 1.418599247932434, |
|
"learning_rate": 5.931013277064377e-05, |
|
"loss": 1.232, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.725506072874494, |
|
"grad_norm": 1.4885016679763794, |
|
"learning_rate": 5.4386401537696536e-05, |
|
"loss": 1.23, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.725506072874494, |
|
"eval_loss": 1.2750165462493896, |
|
"eval_runtime": 61.731, |
|
"eval_samples_per_second": 8.019, |
|
"eval_steps_per_second": 1.004, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7384615384615385, |
|
"grad_norm": 1.9909800291061401, |
|
"learning_rate": 4.963040904617131e-05, |
|
"loss": 1.2355, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.751417004048583, |
|
"grad_norm": 1.4655224084854126, |
|
"learning_rate": 4.5050498922995166e-05, |
|
"loss": 1.2679, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.751417004048583, |
|
"eval_loss": 1.2715320587158203, |
|
"eval_runtime": 61.9746, |
|
"eval_samples_per_second": 7.987, |
|
"eval_steps_per_second": 1.0, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7643724696356275, |
|
"grad_norm": 1.7242943048477173, |
|
"learning_rate": 4.06547058867883e-05, |
|
"loss": 1.1916, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7773279352226721, |
|
"grad_norm": 1.4455363750457764, |
|
"learning_rate": 3.645074165223655e-05, |
|
"loss": 1.2573, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7773279352226721, |
|
"eval_loss": 1.2714204788208008, |
|
"eval_runtime": 61.4581, |
|
"eval_samples_per_second": 8.054, |
|
"eval_steps_per_second": 1.009, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7902834008097166, |
|
"grad_norm": 1.4860985279083252, |
|
"learning_rate": 3.2445981401124035e-05, |
|
"loss": 1.232, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8032388663967611, |
|
"grad_norm": 1.4141103029251099, |
|
"learning_rate": 2.8647450843757897e-05, |
|
"loss": 1.2802, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8032388663967611, |
|
"eval_loss": 1.2692365646362305, |
|
"eval_runtime": 61.9164, |
|
"eval_samples_per_second": 7.995, |
|
"eval_steps_per_second": 1.001, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8161943319838056, |
|
"grad_norm": 1.710113763809204, |
|
"learning_rate": 2.5061813893485085e-05, |
|
"loss": 1.3136, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.8291497975708502, |
|
"grad_norm": 1.509994626045227, |
|
"learning_rate": 2.169536097592401e-05, |
|
"loss": 1.2772, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8291497975708502, |
|
"eval_loss": 1.2681289911270142, |
|
"eval_runtime": 62.1335, |
|
"eval_samples_per_second": 7.967, |
|
"eval_steps_per_second": 0.998, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 1.5493632555007935, |
|
"learning_rate": 1.8553997993420495e-05, |
|
"loss": 1.2752, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8550607287449393, |
|
"grad_norm": 1.5303385257720947, |
|
"learning_rate": 1.5643235964088064e-05, |
|
"loss": 1.2594, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8550607287449393, |
|
"eval_loss": 1.2668787240982056, |
|
"eval_runtime": 62.3532, |
|
"eval_samples_per_second": 7.939, |
|
"eval_steps_per_second": 0.994, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8680161943319838, |
|
"grad_norm": 1.4996320009231567, |
|
"learning_rate": 1.2968181353609852e-05, |
|
"loss": 1.2546, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8809716599190284, |
|
"grad_norm": 1.4413329362869263, |
|
"learning_rate": 1.0533527116762296e-05, |
|
"loss": 1.218, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8809716599190284, |
|
"eval_loss": 1.266615390777588, |
|
"eval_runtime": 26.5779, |
|
"eval_samples_per_second": 18.625, |
|
"eval_steps_per_second": 2.333, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.8939271255060729, |
|
"grad_norm": 1.725917100906372, |
|
"learning_rate": 8.343544464377849e-06, |
|
"loss": 1.2472, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9068825910931174, |
|
"grad_norm": 1.4963750839233398, |
|
"learning_rate": 6.402075370189913e-06, |
|
"loss": 1.2391, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9068825910931174, |
|
"eval_loss": 1.265771746635437, |
|
"eval_runtime": 26.5434, |
|
"eval_samples_per_second": 18.649, |
|
"eval_steps_per_second": 2.336, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9198380566801619, |
|
"grad_norm": 1.4187538623809814, |
|
"learning_rate": 4.712525830705338e-06, |
|
"loss": 1.2176, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9327935222672065, |
|
"grad_norm": 1.6049160957336426, |
|
"learning_rate": 3.2778598899291465e-06, |
|
"loss": 1.2084, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9327935222672065, |
|
"eval_loss": 1.2656446695327759, |
|
"eval_runtime": 26.5096, |
|
"eval_samples_per_second": 18.672, |
|
"eval_steps_per_second": 2.339, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.945748987854251, |
|
"grad_norm": 1.41164231300354, |
|
"learning_rate": 2.100594439424269e-06, |
|
"loss": 1.2457, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9587044534412955, |
|
"grad_norm": 1.5624706745147705, |
|
"learning_rate": 1.1827948028283352e-06, |
|
"loss": 1.2245, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9587044534412955, |
|
"eval_loss": 1.265087604522705, |
|
"eval_runtime": 26.4922, |
|
"eval_samples_per_second": 18.685, |
|
"eval_steps_per_second": 2.34, |
|
"step": 148 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 154, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.028697725758341e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|