{ "best_metric": 3.1026012897491455, "best_model_checkpoint": "./model_tweets_2020_Q3_75/checkpoint-1408000", "epoch": 20.28620453565723, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "eval_loss": 3.451251983642578, "eval_runtime": 109.3752, "eval_samples_per_second": 910.865, "eval_steps_per_second": 56.932, "step": 8000 }, { "epoch": 0.14, "learning_rate": 4.0726666666666665e-07, "loss": 3.6645, "step": 16000 }, { "epoch": 0.14, "eval_loss": 3.379037380218506, "eval_runtime": 110.7092, "eval_samples_per_second": 899.889, "eval_steps_per_second": 56.246, "step": 16000 }, { "epoch": 0.2, "eval_loss": 3.322641611099243, "eval_runtime": 110.5211, "eval_samples_per_second": 901.42, "eval_steps_per_second": 56.342, "step": 24000 }, { "epoch": 0.27, "learning_rate": 4.0453333333333336e-07, "loss": 3.4678, "step": 32000 }, { "epoch": 0.27, "eval_loss": 3.290071964263916, "eval_runtime": 110.2146, "eval_samples_per_second": 903.927, "eval_steps_per_second": 56.499, "step": 32000 }, { "epoch": 0.34, "eval_loss": 3.2590460777282715, "eval_runtime": 110.8956, "eval_samples_per_second": 898.376, "eval_steps_per_second": 56.152, "step": 40000 }, { "epoch": 0.41, "learning_rate": 4.018e-07, "loss": 3.4118, "step": 48000 }, { "epoch": 0.41, "eval_loss": 3.2533791065216064, "eval_runtime": 111.7727, "eval_samples_per_second": 891.327, "eval_steps_per_second": 55.711, "step": 48000 }, { "epoch": 0.47, "eval_loss": 3.235682725906372, "eval_runtime": 111.5419, "eval_samples_per_second": 893.171, "eval_steps_per_second": 55.827, "step": 56000 }, { "epoch": 0.54, "learning_rate": 3.9906666666666667e-07, "loss": 3.3843, "step": 64000 }, { "epoch": 0.54, "eval_loss": 3.222811460494995, "eval_runtime": 111.4969, "eval_samples_per_second": 893.532, "eval_steps_per_second": 55.849, "step": 64000 }, { "epoch": 0.61, "eval_loss": 3.233112096786499, "eval_runtime": 111.9875, "eval_samples_per_second": 889.617, "eval_steps_per_second": 55.604, "step": 72000 }, { "epoch": 0.68, "learning_rate": 3.963333333333333e-07, "loss": 3.3633, "step": 80000 }, { "epoch": 0.68, "eval_loss": 3.204684019088745, "eval_runtime": 111.4416, "eval_samples_per_second": 893.975, "eval_steps_per_second": 55.877, "step": 80000 }, { "epoch": 0.74, "eval_loss": 3.213789463043213, "eval_runtime": 111.3931, "eval_samples_per_second": 894.364, "eval_steps_per_second": 55.901, "step": 88000 }, { "epoch": 0.81, "learning_rate": 3.936e-07, "loss": 3.3474, "step": 96000 }, { "epoch": 0.81, "eval_loss": 3.2049667835235596, "eval_runtime": 111.0388, "eval_samples_per_second": 897.218, "eval_steps_per_second": 56.08, "step": 96000 }, { "epoch": 0.88, "eval_loss": 3.2050797939300537, "eval_runtime": 111.9364, "eval_samples_per_second": 890.024, "eval_steps_per_second": 55.63, "step": 104000 }, { "epoch": 0.95, "learning_rate": 3.908666666666667e-07, "loss": 3.3414, "step": 112000 }, { "epoch": 0.95, "eval_loss": 3.1929500102996826, "eval_runtime": 111.7306, "eval_samples_per_second": 891.663, "eval_steps_per_second": 55.732, "step": 112000 }, { "epoch": 1.01, "eval_loss": 3.200173854827881, "eval_runtime": 112.2148, "eval_samples_per_second": 887.815, "eval_steps_per_second": 55.492, "step": 120000 }, { "epoch": 1.08, "learning_rate": 3.8813333333333334e-07, "loss": 3.335, "step": 128000 }, { "epoch": 1.08, "eval_loss": 3.1920297145843506, "eval_runtime": 112.5389, "eval_samples_per_second": 885.258, "eval_steps_per_second": 55.332, "step": 128000 }, { "epoch": 1.15, "eval_loss": 3.1913902759552, "eval_runtime": 113.3938, "eval_samples_per_second": 878.584, "eval_steps_per_second": 54.915, "step": 136000 }, { "epoch": 1.22, "learning_rate": 3.854e-07, "loss": 3.3283, "step": 144000 }, { "epoch": 1.22, "eval_loss": 3.1852760314941406, "eval_runtime": 112.5608, "eval_samples_per_second": 885.086, "eval_steps_per_second": 55.321, "step": 144000 }, { "epoch": 1.28, "eval_loss": 3.1824891567230225, "eval_runtime": 111.9288, "eval_samples_per_second": 890.084, "eval_steps_per_second": 55.634, "step": 152000 }, { "epoch": 1.35, "learning_rate": 3.8266666666666665e-07, "loss": 3.3276, "step": 160000 }, { "epoch": 1.35, "eval_loss": 3.1827123165130615, "eval_runtime": 111.957, "eval_samples_per_second": 889.86, "eval_steps_per_second": 55.62, "step": 160000 }, { "epoch": 1.42, "eval_loss": 3.175551176071167, "eval_runtime": 111.803, "eval_samples_per_second": 891.086, "eval_steps_per_second": 55.696, "step": 168000 }, { "epoch": 1.49, "learning_rate": 3.799333333333333e-07, "loss": 3.323, "step": 176000 }, { "epoch": 1.49, "eval_loss": 3.186506509780884, "eval_runtime": 111.1821, "eval_samples_per_second": 896.061, "eval_steps_per_second": 56.007, "step": 176000 }, { "epoch": 1.56, "eval_loss": 3.174856662750244, "eval_runtime": 111.9657, "eval_samples_per_second": 889.79, "eval_steps_per_second": 55.615, "step": 184000 }, { "epoch": 1.62, "learning_rate": 3.772e-07, "loss": 3.3275, "step": 192000 }, { "epoch": 1.62, "eval_loss": 3.1782140731811523, "eval_runtime": 112.4385, "eval_samples_per_second": 886.049, "eval_steps_per_second": 55.381, "step": 192000 }, { "epoch": 1.69, "eval_loss": 3.167551040649414, "eval_runtime": 110.6744, "eval_samples_per_second": 900.172, "eval_steps_per_second": 56.264, "step": 200000 }, { "epoch": 1.76, "learning_rate": 3.7446666666666667e-07, "loss": 3.3309, "step": 208000 }, { "epoch": 1.76, "eval_loss": 3.1831653118133545, "eval_runtime": 111.0316, "eval_samples_per_second": 897.276, "eval_steps_per_second": 56.083, "step": 208000 }, { "epoch": 1.83, "eval_loss": 3.1743879318237305, "eval_runtime": 111.5589, "eval_samples_per_second": 893.035, "eval_steps_per_second": 55.818, "step": 216000 }, { "epoch": 1.89, "learning_rate": 3.7173333333333333e-07, "loss": 3.3166, "step": 224000 }, { "epoch": 1.89, "eval_loss": 3.164480447769165, "eval_runtime": 112.2934, "eval_samples_per_second": 887.194, "eval_steps_per_second": 55.453, "step": 224000 }, { "epoch": 1.96, "eval_loss": 3.177034616470337, "eval_runtime": 111.2262, "eval_samples_per_second": 895.706, "eval_steps_per_second": 55.985, "step": 232000 }, { "epoch": 2.03, "learning_rate": 3.69e-07, "loss": 3.3206, "step": 240000 }, { "epoch": 2.03, "eval_loss": 3.165634870529175, "eval_runtime": 110.3932, "eval_samples_per_second": 902.465, "eval_steps_per_second": 56.407, "step": 240000 }, { "epoch": 2.1, "eval_loss": 3.1560590267181396, "eval_runtime": 111.4599, "eval_samples_per_second": 893.828, "eval_steps_per_second": 55.868, "step": 248000 }, { "epoch": 2.16, "learning_rate": 3.6626666666666664e-07, "loss": 3.3228, "step": 256000 }, { "epoch": 2.16, "eval_loss": 3.1664586067199707, "eval_runtime": 111.3781, "eval_samples_per_second": 894.485, "eval_steps_per_second": 55.909, "step": 256000 }, { "epoch": 2.23, "eval_loss": 3.165701389312744, "eval_runtime": 112.0697, "eval_samples_per_second": 888.965, "eval_steps_per_second": 55.564, "step": 264000 }, { "epoch": 2.3, "learning_rate": 3.6353333333333335e-07, "loss": 3.3208, "step": 272000 }, { "epoch": 2.3, "eval_loss": 3.169295072555542, "eval_runtime": 112.0597, "eval_samples_per_second": 889.044, "eval_steps_per_second": 55.569, "step": 272000 }, { "epoch": 2.37, "eval_loss": 3.1777946949005127, "eval_runtime": 111.8143, "eval_samples_per_second": 890.995, "eval_steps_per_second": 55.691, "step": 280000 }, { "epoch": 2.43, "learning_rate": 3.608e-07, "loss": 3.3106, "step": 288000 }, { "epoch": 2.43, "eval_loss": 3.1760342121124268, "eval_runtime": 111.7773, "eval_samples_per_second": 891.29, "eval_steps_per_second": 55.709, "step": 288000 }, { "epoch": 2.5, "eval_loss": 3.1663529872894287, "eval_runtime": 111.6908, "eval_samples_per_second": 891.98, "eval_steps_per_second": 55.752, "step": 296000 }, { "epoch": 2.57, "learning_rate": 3.5806666666666666e-07, "loss": 3.3189, "step": 304000 }, { "epoch": 2.57, "eval_loss": 3.167732000350952, "eval_runtime": 111.5586, "eval_samples_per_second": 893.037, "eval_steps_per_second": 55.818, "step": 304000 }, { "epoch": 2.64, "eval_loss": 3.160001277923584, "eval_runtime": 111.7384, "eval_samples_per_second": 891.6, "eval_steps_per_second": 55.728, "step": 312000 }, { "epoch": 2.7, "learning_rate": 3.553333333333333e-07, "loss": 3.319, "step": 320000 }, { "epoch": 2.7, "eval_loss": 3.1570096015930176, "eval_runtime": 111.8911, "eval_samples_per_second": 890.384, "eval_steps_per_second": 55.652, "step": 320000 }, { "epoch": 2.77, "eval_loss": 3.169872760772705, "eval_runtime": 110.7472, "eval_samples_per_second": 899.58, "eval_steps_per_second": 56.227, "step": 328000 }, { "epoch": 2.84, "learning_rate": 3.5259999999999997e-07, "loss": 3.3236, "step": 336000 }, { "epoch": 2.84, "eval_loss": 3.1577506065368652, "eval_runtime": 111.5, "eval_samples_per_second": 893.507, "eval_steps_per_second": 55.848, "step": 336000 }, { "epoch": 2.91, "eval_loss": 3.1665139198303223, "eval_runtime": 111.674, "eval_samples_per_second": 892.115, "eval_steps_per_second": 55.761, "step": 344000 }, { "epoch": 2.98, "learning_rate": 3.498666666666667e-07, "loss": 3.3205, "step": 352000 }, { "epoch": 2.98, "eval_loss": 3.1557507514953613, "eval_runtime": 111.4997, "eval_samples_per_second": 893.509, "eval_steps_per_second": 55.848, "step": 352000 }, { "epoch": 3.04, "eval_loss": 3.167837381362915, "eval_runtime": 111.1802, "eval_samples_per_second": 896.077, "eval_steps_per_second": 56.008, "step": 360000 }, { "epoch": 3.11, "learning_rate": 3.4713333333333333e-07, "loss": 3.3114, "step": 368000 }, { "epoch": 3.11, "eval_loss": 3.159724473953247, "eval_runtime": 111.0206, "eval_samples_per_second": 897.365, "eval_steps_per_second": 56.089, "step": 368000 }, { "epoch": 3.18, "eval_loss": 3.161818265914917, "eval_runtime": 111.1411, "eval_samples_per_second": 896.392, "eval_steps_per_second": 56.028, "step": 376000 }, { "epoch": 3.25, "learning_rate": 3.444e-07, "loss": 3.3067, "step": 384000 }, { "epoch": 3.25, "eval_loss": 3.158372640609741, "eval_runtime": 112.1096, "eval_samples_per_second": 888.649, "eval_steps_per_second": 55.544, "step": 384000 }, { "epoch": 3.31, "eval_loss": 3.1597392559051514, "eval_runtime": 112.0977, "eval_samples_per_second": 888.742, "eval_steps_per_second": 55.55, "step": 392000 }, { "epoch": 3.38, "learning_rate": 3.416666666666667e-07, "loss": 3.314, "step": 400000 }, { "epoch": 3.38, "eval_loss": 3.1565074920654297, "eval_runtime": 111.7357, "eval_samples_per_second": 891.622, "eval_steps_per_second": 55.73, "step": 400000 }, { "epoch": 3.45, "eval_loss": 3.161231517791748, "eval_runtime": 111.0757, "eval_samples_per_second": 896.92, "eval_steps_per_second": 56.061, "step": 408000 }, { "epoch": 3.52, "learning_rate": 3.3893333333333335e-07, "loss": 3.3183, "step": 416000 }, { "epoch": 3.52, "eval_loss": 3.163727283477783, "eval_runtime": 111.1047, "eval_samples_per_second": 896.685, "eval_steps_per_second": 56.046, "step": 416000 }, { "epoch": 3.58, "eval_loss": 3.1568877696990967, "eval_runtime": 111.7771, "eval_samples_per_second": 891.292, "eval_steps_per_second": 55.709, "step": 424000 }, { "epoch": 3.65, "learning_rate": 3.3619999999999995e-07, "loss": 3.318, "step": 432000 }, { "epoch": 3.65, "eval_loss": 3.157625198364258, "eval_runtime": 111.9872, "eval_samples_per_second": 889.62, "eval_steps_per_second": 55.605, "step": 432000 }, { "epoch": 3.72, "eval_loss": 3.163891077041626, "eval_runtime": 113.1182, "eval_samples_per_second": 880.725, "eval_steps_per_second": 55.049, "step": 440000 }, { "epoch": 3.79, "learning_rate": 3.3346666666666666e-07, "loss": 3.3114, "step": 448000 }, { "epoch": 3.79, "eval_loss": 3.1459975242614746, "eval_runtime": 113.0006, "eval_samples_per_second": 881.641, "eval_steps_per_second": 55.106, "step": 448000 }, { "epoch": 3.85, "eval_loss": 3.161134719848633, "eval_runtime": 112.6662, "eval_samples_per_second": 884.258, "eval_steps_per_second": 55.269, "step": 456000 }, { "epoch": 3.92, "learning_rate": 3.307333333333333e-07, "loss": 3.3068, "step": 464000 }, { "epoch": 3.92, "eval_loss": 3.158674716949463, "eval_runtime": 111.6387, "eval_samples_per_second": 892.396, "eval_steps_per_second": 55.778, "step": 464000 }, { "epoch": 3.99, "eval_loss": 3.154172897338867, "eval_runtime": 111.5221, "eval_samples_per_second": 893.33, "eval_steps_per_second": 55.836, "step": 472000 }, { "epoch": 4.06, "learning_rate": 3.28e-07, "loss": 3.3166, "step": 480000 }, { "epoch": 4.06, "eval_loss": 3.142169237136841, "eval_runtime": 112.2969, "eval_samples_per_second": 887.166, "eval_steps_per_second": 55.451, "step": 480000 }, { "epoch": 4.12, "eval_loss": 3.160404920578003, "eval_runtime": 112.5525, "eval_samples_per_second": 885.152, "eval_steps_per_second": 55.325, "step": 488000 }, { "epoch": 4.19, "learning_rate": 3.252666666666667e-07, "loss": 3.3057, "step": 496000 }, { "epoch": 4.19, "eval_loss": 3.1586716175079346, "eval_runtime": 112.4095, "eval_samples_per_second": 886.278, "eval_steps_per_second": 55.396, "step": 496000 }, { "epoch": 4.26, "eval_loss": 3.1575887203216553, "eval_runtime": 113.1006, "eval_samples_per_second": 880.862, "eval_steps_per_second": 55.057, "step": 504000 }, { "epoch": 4.33, "learning_rate": 3.2253333333333334e-07, "loss": 3.3176, "step": 512000 }, { "epoch": 4.33, "eval_loss": 3.160233974456787, "eval_runtime": 111.8966, "eval_samples_per_second": 890.34, "eval_steps_per_second": 55.65, "step": 512000 }, { "epoch": 4.4, "eval_loss": 3.154435157775879, "eval_runtime": 112.2439, "eval_samples_per_second": 887.585, "eval_steps_per_second": 55.477, "step": 520000 }, { "epoch": 4.46, "learning_rate": 3.198e-07, "loss": 3.3126, "step": 528000 }, { "epoch": 4.46, "eval_loss": 3.1477601528167725, "eval_runtime": 112.0306, "eval_samples_per_second": 889.275, "eval_steps_per_second": 55.583, "step": 528000 }, { "epoch": 4.53, "eval_loss": 3.1520204544067383, "eval_runtime": 111.7071, "eval_samples_per_second": 891.85, "eval_steps_per_second": 55.744, "step": 536000 }, { "epoch": 4.6, "learning_rate": 3.1706666666666665e-07, "loss": 3.3044, "step": 544000 }, { "epoch": 4.6, "eval_loss": 3.158066749572754, "eval_runtime": 111.3952, "eval_samples_per_second": 894.348, "eval_steps_per_second": 55.9, "step": 544000 }, { "epoch": 4.67, "eval_loss": 3.1625027656555176, "eval_runtime": 111.6253, "eval_samples_per_second": 892.503, "eval_steps_per_second": 55.785, "step": 552000 }, { "epoch": 4.73, "learning_rate": 3.1433333333333336e-07, "loss": 3.3118, "step": 560000 }, { "epoch": 4.73, "eval_loss": 3.1510021686553955, "eval_runtime": 111.8688, "eval_samples_per_second": 890.561, "eval_steps_per_second": 55.663, "step": 560000 }, { "epoch": 4.8, "eval_loss": 3.154784917831421, "eval_runtime": 112.8767, "eval_samples_per_second": 882.609, "eval_steps_per_second": 55.166, "step": 568000 }, { "epoch": 4.87, "learning_rate": 3.116e-07, "loss": 3.3085, "step": 576000 }, { "epoch": 4.87, "eval_loss": 3.153918743133545, "eval_runtime": 113.0859, "eval_samples_per_second": 880.977, "eval_steps_per_second": 55.064, "step": 576000 }, { "epoch": 4.94, "eval_loss": 3.1503121852874756, "eval_runtime": 111.3015, "eval_samples_per_second": 895.1, "eval_steps_per_second": 55.947, "step": 584000 }, { "epoch": 5.0, "learning_rate": 3.0886666666666667e-07, "loss": 3.3014, "step": 592000 }, { "epoch": 5.0, "eval_loss": 3.1503639221191406, "eval_runtime": 112.1576, "eval_samples_per_second": 888.268, "eval_steps_per_second": 55.52, "step": 592000 }, { "epoch": 5.07, "eval_loss": 3.1534154415130615, "eval_runtime": 112.02, "eval_samples_per_second": 889.359, "eval_steps_per_second": 55.588, "step": 600000 }, { "epoch": 5.14, "learning_rate": 3.061333333333333e-07, "loss": 3.3115, "step": 608000 }, { "epoch": 5.14, "eval_loss": 3.155081033706665, "eval_runtime": 112.107, "eval_samples_per_second": 888.669, "eval_steps_per_second": 55.545, "step": 608000 }, { "epoch": 5.21, "eval_loss": 3.1493077278137207, "eval_runtime": 112.698, "eval_samples_per_second": 884.009, "eval_steps_per_second": 55.254, "step": 616000 }, { "epoch": 5.27, "learning_rate": 3.034e-07, "loss": 3.3079, "step": 624000 }, { "epoch": 5.27, "eval_loss": 3.1427001953125, "eval_runtime": 112.7088, "eval_samples_per_second": 883.924, "eval_steps_per_second": 55.249, "step": 624000 }, { "epoch": 5.34, "eval_loss": 3.1499979496002197, "eval_runtime": 113.2934, "eval_samples_per_second": 879.363, "eval_steps_per_second": 54.964, "step": 632000 }, { "epoch": 5.41, "learning_rate": 3.0066666666666663e-07, "loss": 3.3138, "step": 640000 }, { "epoch": 5.41, "eval_loss": 3.1546239852905273, "eval_runtime": 112.2247, "eval_samples_per_second": 887.737, "eval_steps_per_second": 55.487, "step": 640000 }, { "epoch": 5.48, "eval_loss": 3.1481516361236572, "eval_runtime": 112.549, "eval_samples_per_second": 885.179, "eval_steps_per_second": 55.327, "step": 648000 }, { "epoch": 5.54, "learning_rate": 2.9793333333333334e-07, "loss": 3.3096, "step": 656000 }, { "epoch": 5.54, "eval_loss": 3.1346185207366943, "eval_runtime": 112.618, "eval_samples_per_second": 884.637, "eval_steps_per_second": 55.293, "step": 656000 }, { "epoch": 5.61, "eval_loss": 3.1328086853027344, "eval_runtime": 112.4516, "eval_samples_per_second": 885.946, "eval_steps_per_second": 55.375, "step": 664000 }, { "epoch": 5.68, "learning_rate": 2.952e-07, "loss": 3.3121, "step": 672000 }, { "epoch": 5.68, "eval_loss": 3.14998197555542, "eval_runtime": 113.0205, "eval_samples_per_second": 881.486, "eval_steps_per_second": 55.096, "step": 672000 }, { "epoch": 5.75, "eval_loss": 3.131186008453369, "eval_runtime": 112.1192, "eval_samples_per_second": 888.573, "eval_steps_per_second": 55.539, "step": 680000 }, { "epoch": 5.82, "learning_rate": 2.9246666666666665e-07, "loss": 3.3195, "step": 688000 }, { "epoch": 5.82, "eval_loss": 3.143950939178467, "eval_runtime": 114.0106, "eval_samples_per_second": 873.831, "eval_steps_per_second": 54.618, "step": 688000 }, { "epoch": 5.88, "eval_loss": 3.1190884113311768, "eval_runtime": 113.1962, "eval_samples_per_second": 880.118, "eval_steps_per_second": 55.011, "step": 696000 }, { "epoch": 5.95, "learning_rate": 2.897333333333333e-07, "loss": 3.3091, "step": 704000 }, { "epoch": 5.95, "eval_loss": 3.139669179916382, "eval_runtime": 112.4234, "eval_samples_per_second": 886.168, "eval_steps_per_second": 55.389, "step": 704000 }, { "epoch": 6.02, "eval_loss": 3.1484687328338623, "eval_runtime": 113.6323, "eval_samples_per_second": 876.74, "eval_steps_per_second": 54.8, "step": 712000 }, { "epoch": 6.09, "learning_rate": 2.8699999999999996e-07, "loss": 3.3089, "step": 720000 }, { "epoch": 6.09, "eval_loss": 3.134004592895508, "eval_runtime": 112.7952, "eval_samples_per_second": 883.247, "eval_steps_per_second": 55.206, "step": 720000 }, { "epoch": 6.15, "eval_loss": 3.1384811401367188, "eval_runtime": 112.7828, "eval_samples_per_second": 883.344, "eval_steps_per_second": 55.212, "step": 728000 }, { "epoch": 6.22, "learning_rate": 2.8426666666666667e-07, "loss": 3.3062, "step": 736000 }, { "epoch": 6.22, "eval_loss": 3.1357834339141846, "eval_runtime": 113.0764, "eval_samples_per_second": 881.05, "eval_steps_per_second": 55.069, "step": 736000 }, { "epoch": 6.29, "eval_loss": 3.1295759677886963, "eval_runtime": 113.0992, "eval_samples_per_second": 880.873, "eval_steps_per_second": 55.058, "step": 744000 }, { "epoch": 6.36, "learning_rate": 2.815333333333333e-07, "loss": 3.3102, "step": 752000 }, { "epoch": 6.36, "eval_loss": 3.1260080337524414, "eval_runtime": 113.4791, "eval_samples_per_second": 877.924, "eval_steps_per_second": 54.874, "step": 752000 }, { "epoch": 6.42, "eval_loss": 3.142832040786743, "eval_runtime": 113.2715, "eval_samples_per_second": 879.533, "eval_steps_per_second": 54.974, "step": 760000 }, { "epoch": 6.49, "learning_rate": 2.7880000000000003e-07, "loss": 3.3088, "step": 768000 }, { "epoch": 6.49, "eval_loss": 3.137244939804077, "eval_runtime": 112.7233, "eval_samples_per_second": 883.81, "eval_steps_per_second": 55.241, "step": 768000 }, { "epoch": 6.56, "eval_loss": 3.1404080390930176, "eval_runtime": 112.6566, "eval_samples_per_second": 884.333, "eval_steps_per_second": 55.274, "step": 776000 }, { "epoch": 6.63, "learning_rate": 2.7606666666666664e-07, "loss": 3.3096, "step": 784000 }, { "epoch": 6.63, "eval_loss": 3.1362013816833496, "eval_runtime": 112.5766, "eval_samples_per_second": 884.962, "eval_steps_per_second": 55.313, "step": 784000 }, { "epoch": 6.69, "eval_loss": 3.1407511234283447, "eval_runtime": 113.0016, "eval_samples_per_second": 881.633, "eval_steps_per_second": 55.105, "step": 792000 }, { "epoch": 6.76, "learning_rate": 2.733333333333333e-07, "loss": 3.3079, "step": 800000 }, { "epoch": 6.76, "eval_loss": 3.134976625442505, "eval_runtime": 112.2018, "eval_samples_per_second": 887.918, "eval_steps_per_second": 55.498, "step": 800000 }, { "epoch": 6.83, "eval_loss": 3.146075487136841, "eval_runtime": 112.9151, "eval_samples_per_second": 882.309, "eval_steps_per_second": 55.148, "step": 808000 }, { "epoch": 6.9, "learning_rate": 2.706e-07, "loss": 3.3099, "step": 816000 }, { "epoch": 6.9, "eval_loss": 3.142042398452759, "eval_runtime": 112.8773, "eval_samples_per_second": 882.604, "eval_steps_per_second": 55.166, "step": 816000 }, { "epoch": 6.96, "eval_loss": 3.121290922164917, "eval_runtime": 112.4177, "eval_samples_per_second": 886.213, "eval_steps_per_second": 55.392, "step": 824000 }, { "epoch": 7.03, "learning_rate": 2.6786666666666666e-07, "loss": 3.3015, "step": 832000 }, { "epoch": 7.03, "eval_loss": 3.136622905731201, "eval_runtime": 112.757, "eval_samples_per_second": 883.546, "eval_steps_per_second": 55.225, "step": 832000 }, { "epoch": 7.1, "eval_loss": 3.1400632858276367, "eval_runtime": 112.5633, "eval_samples_per_second": 885.067, "eval_steps_per_second": 55.32, "step": 840000 }, { "epoch": 7.17, "learning_rate": 2.651333333333333e-07, "loss": 3.3045, "step": 848000 }, { "epoch": 7.17, "eval_loss": 3.129455804824829, "eval_runtime": 112.7781, "eval_samples_per_second": 883.381, "eval_steps_per_second": 55.215, "step": 848000 }, { "epoch": 7.24, "eval_loss": 3.1323471069335938, "eval_runtime": 113.1417, "eval_samples_per_second": 880.542, "eval_steps_per_second": 55.037, "step": 856000 }, { "epoch": 7.3, "learning_rate": 2.624e-07, "loss": 3.3085, "step": 864000 }, { "epoch": 7.3, "eval_loss": 3.1367900371551514, "eval_runtime": 112.7893, "eval_samples_per_second": 883.293, "eval_steps_per_second": 55.209, "step": 864000 }, { "epoch": 7.37, "eval_loss": 3.1274642944335938, "eval_runtime": 113.8432, "eval_samples_per_second": 875.116, "eval_steps_per_second": 54.698, "step": 872000 }, { "epoch": 7.44, "learning_rate": 2.596666666666667e-07, "loss": 3.3061, "step": 880000 }, { "epoch": 7.44, "eval_loss": 3.1325767040252686, "eval_runtime": 112.5258, "eval_samples_per_second": 885.362, "eval_steps_per_second": 55.338, "step": 880000 }, { "epoch": 7.51, "eval_loss": 3.137669801712036, "eval_runtime": 112.5494, "eval_samples_per_second": 885.175, "eval_steps_per_second": 55.327, "step": 888000 }, { "epoch": 7.57, "learning_rate": 2.5693333333333333e-07, "loss": 3.309, "step": 896000 }, { "epoch": 7.57, "eval_loss": 3.1406917572021484, "eval_runtime": 112.4096, "eval_samples_per_second": 886.276, "eval_steps_per_second": 55.396, "step": 896000 }, { "epoch": 7.64, "eval_loss": 3.132387399673462, "eval_runtime": 113.3694, "eval_samples_per_second": 878.773, "eval_steps_per_second": 54.927, "step": 904000 }, { "epoch": 7.71, "learning_rate": 2.542e-07, "loss": 3.3024, "step": 912000 }, { "epoch": 7.71, "eval_loss": 3.1187102794647217, "eval_runtime": 112.3891, "eval_samples_per_second": 886.438, "eval_steps_per_second": 55.406, "step": 912000 }, { "epoch": 7.78, "eval_loss": 3.1514384746551514, "eval_runtime": 112.9329, "eval_samples_per_second": 882.17, "eval_steps_per_second": 55.139, "step": 920000 }, { "epoch": 7.84, "learning_rate": 2.5146666666666664e-07, "loss": 3.2955, "step": 928000 }, { "epoch": 7.84, "eval_loss": 3.135131359100342, "eval_runtime": 113.0723, "eval_samples_per_second": 881.082, "eval_steps_per_second": 55.071, "step": 928000 }, { "epoch": 7.91, "eval_loss": 3.1307849884033203, "eval_runtime": 112.9789, "eval_samples_per_second": 881.811, "eval_steps_per_second": 55.117, "step": 936000 }, { "epoch": 7.98, "learning_rate": 2.4873333333333335e-07, "loss": 3.3122, "step": 944000 }, { "epoch": 7.98, "eval_loss": 3.1404905319213867, "eval_runtime": 113.3801, "eval_samples_per_second": 878.69, "eval_steps_per_second": 54.921, "step": 944000 }, { "epoch": 8.05, "eval_loss": 3.129053831100464, "eval_runtime": 113.1579, "eval_samples_per_second": 880.416, "eval_steps_per_second": 55.029, "step": 952000 }, { "epoch": 8.11, "learning_rate": 2.46e-07, "loss": 3.304, "step": 960000 }, { "epoch": 8.11, "eval_loss": 3.1244165897369385, "eval_runtime": 113.5289, "eval_samples_per_second": 877.539, "eval_steps_per_second": 54.849, "step": 960000 }, { "epoch": 8.18, "eval_loss": 3.1409430503845215, "eval_runtime": 113.3422, "eval_samples_per_second": 878.984, "eval_steps_per_second": 54.94, "step": 968000 }, { "epoch": 8.25, "learning_rate": 2.4326666666666666e-07, "loss": 3.3046, "step": 976000 }, { "epoch": 8.25, "eval_loss": 3.135524272918701, "eval_runtime": 112.7434, "eval_samples_per_second": 883.653, "eval_steps_per_second": 55.232, "step": 976000 }, { "epoch": 8.32, "eval_loss": 3.141561269760132, "eval_runtime": 113.0047, "eval_samples_per_second": 881.609, "eval_steps_per_second": 55.104, "step": 984000 }, { "epoch": 8.38, "learning_rate": 2.405333333333333e-07, "loss": 3.3022, "step": 992000 }, { "epoch": 8.38, "eval_loss": 3.1258225440979004, "eval_runtime": 113.3174, "eval_samples_per_second": 879.177, "eval_steps_per_second": 54.952, "step": 992000 }, { "epoch": 8.45, "eval_loss": 3.1332101821899414, "eval_runtime": 113.3836, "eval_samples_per_second": 878.663, "eval_steps_per_second": 54.92, "step": 1000000 }, { "epoch": 8.52, "learning_rate": 2.3779999999999997e-07, "loss": 3.3004, "step": 1008000 }, { "epoch": 8.52, "eval_loss": 3.143005847930908, "eval_runtime": 113.3372, "eval_samples_per_second": 879.023, "eval_steps_per_second": 54.942, "step": 1008000 }, { "epoch": 8.59, "eval_loss": 3.1281683444976807, "eval_runtime": 113.2531, "eval_samples_per_second": 879.675, "eval_steps_per_second": 54.983, "step": 1016000 }, { "epoch": 8.66, "learning_rate": 2.3506666666666668e-07, "loss": 3.3045, "step": 1024000 }, { "epoch": 8.66, "eval_loss": 3.1286985874176025, "eval_runtime": 112.5841, "eval_samples_per_second": 884.903, "eval_steps_per_second": 55.31, "step": 1024000 }, { "epoch": 8.72, "eval_loss": 3.1368112564086914, "eval_runtime": 113.404, "eval_samples_per_second": 878.505, "eval_steps_per_second": 54.91, "step": 1032000 }, { "epoch": 8.79, "learning_rate": 2.3233333333333334e-07, "loss": 3.3047, "step": 1040000 }, { "epoch": 8.79, "eval_loss": 3.136190891265869, "eval_runtime": 113.3739, "eval_samples_per_second": 878.738, "eval_steps_per_second": 54.924, "step": 1040000 }, { "epoch": 8.86, "eval_loss": 3.1267800331115723, "eval_runtime": 113.3548, "eval_samples_per_second": 878.886, "eval_steps_per_second": 54.934, "step": 1048000 }, { "epoch": 8.93, "learning_rate": 2.2960000000000002e-07, "loss": 3.3044, "step": 1056000 }, { "epoch": 8.93, "eval_loss": 3.1329193115234375, "eval_runtime": 113.2679, "eval_samples_per_second": 879.56, "eval_steps_per_second": 54.976, "step": 1056000 }, { "epoch": 8.99, "eval_loss": 3.124464273452759, "eval_runtime": 112.1804, "eval_samples_per_second": 888.087, "eval_steps_per_second": 55.509, "step": 1064000 }, { "epoch": 9.06, "learning_rate": 2.2686666666666667e-07, "loss": 3.2961, "step": 1072000 }, { "epoch": 9.06, "eval_loss": 3.127128839492798, "eval_runtime": 112.9944, "eval_samples_per_second": 881.69, "eval_steps_per_second": 55.109, "step": 1072000 }, { "epoch": 9.13, "eval_loss": 3.130047559738159, "eval_runtime": 113.791, "eval_samples_per_second": 875.518, "eval_steps_per_second": 54.723, "step": 1080000 }, { "epoch": 9.2, "learning_rate": 2.2413333333333333e-07, "loss": 3.2999, "step": 1088000 }, { "epoch": 9.2, "eval_loss": 3.136892080307007, "eval_runtime": 113.0086, "eval_samples_per_second": 881.579, "eval_steps_per_second": 55.102, "step": 1088000 }, { "epoch": 9.26, "eval_loss": 3.1424949169158936, "eval_runtime": 113.6203, "eval_samples_per_second": 876.833, "eval_steps_per_second": 54.805, "step": 1096000 }, { "epoch": 9.33, "learning_rate": 2.214e-07, "loss": 3.3012, "step": 1104000 }, { "epoch": 9.33, "eval_loss": 3.121316432952881, "eval_runtime": 113.7295, "eval_samples_per_second": 875.991, "eval_steps_per_second": 54.753, "step": 1104000 }, { "epoch": 9.4, "eval_loss": 3.1285130977630615, "eval_runtime": 114.4392, "eval_samples_per_second": 870.558, "eval_steps_per_second": 54.413, "step": 1112000 }, { "epoch": 9.47, "learning_rate": 2.1866666666666667e-07, "loss": 3.3008, "step": 1120000 }, { "epoch": 9.47, "eval_loss": 3.135331869125366, "eval_runtime": 114.4873, "eval_samples_per_second": 870.193, "eval_steps_per_second": 54.39, "step": 1120000 }, { "epoch": 9.53, "eval_loss": 3.136654853820801, "eval_runtime": 113.6856, "eval_samples_per_second": 876.329, "eval_steps_per_second": 54.774, "step": 1128000 }, { "epoch": 9.6, "learning_rate": 2.1593333333333332e-07, "loss": 3.3028, "step": 1136000 }, { "epoch": 9.6, "eval_loss": 3.129446029663086, "eval_runtime": 113.0995, "eval_samples_per_second": 880.871, "eval_steps_per_second": 55.058, "step": 1136000 }, { "epoch": 9.67, "eval_loss": 3.133976459503174, "eval_runtime": 113.1955, "eval_samples_per_second": 880.124, "eval_steps_per_second": 55.011, "step": 1144000 }, { "epoch": 9.74, "learning_rate": 2.132e-07, "loss": 3.3043, "step": 1152000 }, { "epoch": 9.74, "eval_loss": 3.1329877376556396, "eval_runtime": 113.2026, "eval_samples_per_second": 880.068, "eval_steps_per_second": 55.008, "step": 1152000 }, { "epoch": 9.8, "eval_loss": 3.13797664642334, "eval_runtime": 113.0427, "eval_samples_per_second": 881.313, "eval_steps_per_second": 55.085, "step": 1160000 }, { "epoch": 9.87, "learning_rate": 2.1046666666666666e-07, "loss": 3.2976, "step": 1168000 }, { "epoch": 9.87, "eval_loss": 3.119840145111084, "eval_runtime": 113.6008, "eval_samples_per_second": 876.983, "eval_steps_per_second": 54.815, "step": 1168000 }, { "epoch": 9.94, "eval_loss": 3.128972053527832, "eval_runtime": 114.1658, "eval_samples_per_second": 872.643, "eval_steps_per_second": 54.543, "step": 1176000 }, { "epoch": 10.01, "learning_rate": 2.0773333333333334e-07, "loss": 3.3048, "step": 1184000 }, { "epoch": 10.01, "eval_loss": 3.1457955837249756, "eval_runtime": 113.9721, "eval_samples_per_second": 874.126, "eval_steps_per_second": 54.636, "step": 1184000 }, { "epoch": 10.08, "eval_loss": 3.1274356842041016, "eval_runtime": 114.22, "eval_samples_per_second": 872.229, "eval_steps_per_second": 54.518, "step": 1192000 }, { "epoch": 10.14, "learning_rate": 2.05e-07, "loss": 3.3038, "step": 1200000 }, { "epoch": 10.14, "eval_loss": 3.1180973052978516, "eval_runtime": 114.324, "eval_samples_per_second": 871.436, "eval_steps_per_second": 54.468, "step": 1200000 }, { "epoch": 10.21, "eval_loss": 3.127936601638794, "eval_runtime": 113.9464, "eval_samples_per_second": 874.324, "eval_steps_per_second": 54.649, "step": 1208000 }, { "epoch": 10.28, "learning_rate": 2.0226666666666668e-07, "loss": 3.3066, "step": 1216000 }, { "epoch": 10.28, "eval_loss": 3.122974395751953, "eval_runtime": 115.0259, "eval_samples_per_second": 866.118, "eval_steps_per_second": 54.136, "step": 1216000 }, { "epoch": 10.35, "eval_loss": 3.1319711208343506, "eval_runtime": 114.7784, "eval_samples_per_second": 867.986, "eval_steps_per_second": 54.252, "step": 1224000 }, { "epoch": 10.41, "learning_rate": 1.9953333333333333e-07, "loss": 3.3019, "step": 1232000 }, { "epoch": 10.41, "eval_loss": 3.1202657222747803, "eval_runtime": 115.5672, "eval_samples_per_second": 862.061, "eval_steps_per_second": 53.882, "step": 1232000 }, { "epoch": 10.48, "eval_loss": 3.134918451309204, "eval_runtime": 114.8028, "eval_samples_per_second": 867.801, "eval_steps_per_second": 54.241, "step": 1240000 }, { "epoch": 10.55, "learning_rate": 1.968e-07, "loss": 3.3037, "step": 1248000 }, { "epoch": 10.55, "eval_loss": 3.132294178009033, "eval_runtime": 115.0121, "eval_samples_per_second": 866.222, "eval_steps_per_second": 54.142, "step": 1248000 }, { "epoch": 10.62, "eval_loss": 3.134295701980591, "eval_runtime": 114.186, "eval_samples_per_second": 872.489, "eval_steps_per_second": 54.534, "step": 1256000 }, { "epoch": 10.68, "learning_rate": 1.9406666666666667e-07, "loss": 3.2868, "step": 1264000 }, { "epoch": 10.68, "eval_loss": 3.1262283325195312, "eval_runtime": 114.6026, "eval_samples_per_second": 869.317, "eval_steps_per_second": 54.336, "step": 1264000 }, { "epoch": 10.75, "eval_loss": 3.1265833377838135, "eval_runtime": 114.6327, "eval_samples_per_second": 869.089, "eval_steps_per_second": 54.321, "step": 1272000 }, { "epoch": 10.82, "learning_rate": 1.9133333333333333e-07, "loss": 3.3033, "step": 1280000 }, { "epoch": 10.82, "eval_loss": 3.1282565593719482, "eval_runtime": 113.7083, "eval_samples_per_second": 876.154, "eval_steps_per_second": 54.763, "step": 1280000 }, { "epoch": 10.89, "eval_loss": 3.1290106773376465, "eval_runtime": 113.8399, "eval_samples_per_second": 875.141, "eval_steps_per_second": 54.7, "step": 1288000 }, { "epoch": 10.95, "learning_rate": 1.886e-07, "loss": 3.2984, "step": 1296000 }, { "epoch": 10.95, "eval_loss": 3.1177093982696533, "eval_runtime": 114.6951, "eval_samples_per_second": 868.616, "eval_steps_per_second": 54.292, "step": 1296000 }, { "epoch": 11.02, "eval_loss": 3.123425245285034, "eval_runtime": 113.7751, "eval_samples_per_second": 875.64, "eval_steps_per_second": 54.731, "step": 1304000 }, { "epoch": 11.09, "learning_rate": 1.8586666666666666e-07, "loss": 3.2982, "step": 1312000 }, { "epoch": 11.09, "eval_loss": 3.1309823989868164, "eval_runtime": 114.1207, "eval_samples_per_second": 872.988, "eval_steps_per_second": 54.565, "step": 1312000 }, { "epoch": 11.16, "eval_loss": 3.1408894062042236, "eval_runtime": 113.7168, "eval_samples_per_second": 876.089, "eval_steps_per_second": 54.759, "step": 1320000 }, { "epoch": 11.23, "learning_rate": 1.8313333333333332e-07, "loss": 3.303, "step": 1328000 }, { "epoch": 11.23, "eval_loss": 3.132986545562744, "eval_runtime": 114.412, "eval_samples_per_second": 870.766, "eval_steps_per_second": 54.426, "step": 1328000 }, { "epoch": 11.29, "eval_loss": 3.1281206607818604, "eval_runtime": 114.0643, "eval_samples_per_second": 873.42, "eval_steps_per_second": 54.592, "step": 1336000 }, { "epoch": 11.36, "learning_rate": 1.804e-07, "loss": 3.2976, "step": 1344000 }, { "epoch": 11.36, "eval_loss": 3.1286239624023438, "eval_runtime": 114.1147, "eval_samples_per_second": 873.034, "eval_steps_per_second": 54.568, "step": 1344000 }, { "epoch": 11.43, "eval_loss": 3.1282992362976074, "eval_runtime": 114.374, "eval_samples_per_second": 871.055, "eval_steps_per_second": 54.444, "step": 1352000 }, { "epoch": 11.5, "learning_rate": 1.7766666666666666e-07, "loss": 3.2923, "step": 1360000 }, { "epoch": 11.5, "eval_loss": 3.114553451538086, "eval_runtime": 112.5162, "eval_samples_per_second": 885.437, "eval_steps_per_second": 55.343, "step": 1360000 }, { "epoch": 11.56, "eval_loss": 3.1387319564819336, "eval_runtime": 113.9184, "eval_samples_per_second": 874.539, "eval_steps_per_second": 54.662, "step": 1368000 }, { "epoch": 11.63, "learning_rate": 1.7493333333333334e-07, "loss": 3.2988, "step": 1376000 }, { "epoch": 11.63, "eval_loss": 3.1278181076049805, "eval_runtime": 114.7175, "eval_samples_per_second": 868.447, "eval_steps_per_second": 54.281, "step": 1376000 }, { "epoch": 11.7, "eval_loss": 3.1225082874298096, "eval_runtime": 113.872, "eval_samples_per_second": 874.894, "eval_steps_per_second": 54.684, "step": 1384000 }, { "epoch": 11.77, "learning_rate": 1.722e-07, "loss": 3.299, "step": 1392000 }, { "epoch": 11.77, "eval_loss": 3.1341497898101807, "eval_runtime": 113.9675, "eval_samples_per_second": 874.161, "eval_steps_per_second": 54.638, "step": 1392000 }, { "epoch": 11.83, "eval_loss": 3.1210529804229736, "eval_runtime": 113.6828, "eval_samples_per_second": 876.351, "eval_steps_per_second": 54.775, "step": 1400000 }, { "epoch": 11.9, "learning_rate": 1.6946666666666668e-07, "loss": 3.2993, "step": 1408000 }, { "epoch": 11.9, "eval_loss": 3.1026012897491455, "eval_runtime": 114.1409, "eval_samples_per_second": 872.834, "eval_steps_per_second": 54.555, "step": 1408000 }, { "epoch": 11.97, "eval_loss": 3.1222946643829346, "eval_runtime": 113.2607, "eval_samples_per_second": 879.617, "eval_steps_per_second": 54.979, "step": 1416000 }, { "epoch": 12.04, "learning_rate": 1.6673333333333333e-07, "loss": 3.2942, "step": 1424000 }, { "epoch": 12.04, "eval_loss": 3.1199705600738525, "eval_runtime": 114.2703, "eval_samples_per_second": 871.845, "eval_steps_per_second": 54.494, "step": 1424000 }, { "epoch": 12.1, "eval_loss": 3.1245763301849365, "eval_runtime": 114.7753, "eval_samples_per_second": 868.009, "eval_steps_per_second": 54.254, "step": 1432000 }, { "epoch": 12.17, "learning_rate": 1.64e-07, "loss": 3.3062, "step": 1440000 }, { "epoch": 12.17, "eval_loss": 3.1325275897979736, "eval_runtime": 114.1964, "eval_samples_per_second": 872.409, "eval_steps_per_second": 54.529, "step": 1440000 }, { "epoch": 12.24, "eval_loss": 3.138754367828369, "eval_runtime": 113.4408, "eval_samples_per_second": 878.22, "eval_steps_per_second": 54.892, "step": 1448000 }, { "epoch": 12.31, "learning_rate": 1.6126666666666667e-07, "loss": 3.297, "step": 1456000 }, { "epoch": 12.31, "eval_loss": 3.1370742321014404, "eval_runtime": 114.3725, "eval_samples_per_second": 871.066, "eval_steps_per_second": 54.445, "step": 1456000 }, { "epoch": 12.37, "eval_loss": 3.1272239685058594, "eval_runtime": 114.3424, "eval_samples_per_second": 871.295, "eval_steps_per_second": 54.459, "step": 1464000 }, { "epoch": 12.44, "learning_rate": 1.5853333333333332e-07, "loss": 3.3033, "step": 1472000 }, { "epoch": 12.44, "eval_loss": 3.1231026649475098, "eval_runtime": 113.6228, "eval_samples_per_second": 876.813, "eval_steps_per_second": 54.804, "step": 1472000 }, { "epoch": 12.51, "eval_loss": 3.131573438644409, "eval_runtime": 114.5622, "eval_samples_per_second": 869.623, "eval_steps_per_second": 54.355, "step": 1480000 }, { "epoch": 12.58, "learning_rate": 1.558e-07, "loss": 3.291, "step": 1488000 }, { "epoch": 12.58, "eval_loss": 3.139345169067383, "eval_runtime": 114.0858, "eval_samples_per_second": 873.255, "eval_steps_per_second": 54.582, "step": 1488000 }, { "epoch": 12.65, "eval_loss": 3.1269216537475586, "eval_runtime": 114.7525, "eval_samples_per_second": 868.182, "eval_steps_per_second": 54.265, "step": 1496000 }, { "epoch": 12.71, "learning_rate": 1.5306666666666666e-07, "loss": 3.3054, "step": 1504000 }, { "epoch": 12.71, "eval_loss": 3.1363420486450195, "eval_runtime": 114.3108, "eval_samples_per_second": 871.536, "eval_steps_per_second": 54.474, "step": 1504000 }, { "epoch": 12.78, "eval_loss": 3.1249115467071533, "eval_runtime": 114.4828, "eval_samples_per_second": 870.227, "eval_steps_per_second": 54.392, "step": 1512000 }, { "epoch": 12.85, "learning_rate": 1.5033333333333332e-07, "loss": 3.2908, "step": 1520000 }, { "epoch": 12.85, "eval_loss": 3.1309752464294434, "eval_runtime": 114.385, "eval_samples_per_second": 870.97, "eval_steps_per_second": 54.439, "step": 1520000 }, { "epoch": 12.92, "eval_loss": 3.121305465698242, "eval_runtime": 115.3916, "eval_samples_per_second": 863.373, "eval_steps_per_second": 53.964, "step": 1528000 }, { "epoch": 12.98, "learning_rate": 1.476e-07, "loss": 3.2987, "step": 1536000 }, { "epoch": 12.98, "eval_loss": 3.122337818145752, "eval_runtime": 115.0493, "eval_samples_per_second": 865.942, "eval_steps_per_second": 54.125, "step": 1536000 }, { "epoch": 13.05, "eval_loss": 3.1133577823638916, "eval_runtime": 113.7876, "eval_samples_per_second": 875.544, "eval_steps_per_second": 54.725, "step": 1544000 }, { "epoch": 13.12, "learning_rate": 1.4486666666666665e-07, "loss": 3.2965, "step": 1552000 }, { "epoch": 13.12, "eval_loss": 3.1168410778045654, "eval_runtime": 113.5718, "eval_samples_per_second": 877.207, "eval_steps_per_second": 54.829, "step": 1552000 }, { "epoch": 13.19, "eval_loss": 3.1230275630950928, "eval_runtime": 113.4112, "eval_samples_per_second": 878.449, "eval_steps_per_second": 54.906, "step": 1560000 }, { "epoch": 13.25, "learning_rate": 1.4213333333333334e-07, "loss": 3.2931, "step": 1568000 }, { "epoch": 13.25, "eval_loss": 3.113243818283081, "eval_runtime": 114.3885, "eval_samples_per_second": 870.944, "eval_steps_per_second": 54.437, "step": 1568000 }, { "epoch": 13.32, "eval_loss": 3.119607925415039, "eval_runtime": 114.327, "eval_samples_per_second": 871.412, "eval_steps_per_second": 54.467, "step": 1576000 }, { "epoch": 13.39, "learning_rate": 1.3940000000000002e-07, "loss": 3.301, "step": 1584000 }, { "epoch": 13.39, "eval_loss": 3.1286561489105225, "eval_runtime": 114.3561, "eval_samples_per_second": 871.191, "eval_steps_per_second": 54.453, "step": 1584000 }, { "epoch": 13.46, "eval_loss": 3.114452838897705, "eval_runtime": 114.7858, "eval_samples_per_second": 867.929, "eval_steps_per_second": 54.249, "step": 1592000 }, { "epoch": 13.52, "learning_rate": 1.3666666666666665e-07, "loss": 3.3004, "step": 1600000 }, { "epoch": 13.52, "eval_loss": 3.129112482070923, "eval_runtime": 115.2447, "eval_samples_per_second": 864.473, "eval_steps_per_second": 54.033, "step": 1600000 }, { "epoch": 13.59, "eval_loss": 3.1145238876342773, "eval_runtime": 114.3883, "eval_samples_per_second": 870.946, "eval_steps_per_second": 54.437, "step": 1608000 }, { "epoch": 13.66, "learning_rate": 1.3393333333333333e-07, "loss": 3.2992, "step": 1616000 }, { "epoch": 13.66, "eval_loss": 3.129173755645752, "eval_runtime": 114.4593, "eval_samples_per_second": 870.405, "eval_steps_per_second": 54.404, "step": 1616000 }, { "epoch": 13.73, "eval_loss": 3.124779462814331, "eval_runtime": 114.1908, "eval_samples_per_second": 872.452, "eval_steps_per_second": 54.532, "step": 1624000 }, { "epoch": 13.79, "learning_rate": 1.312e-07, "loss": 3.2974, "step": 1632000 }, { "epoch": 13.79, "eval_loss": 3.1315438747406006, "eval_runtime": 114.5685, "eval_samples_per_second": 869.576, "eval_steps_per_second": 54.352, "step": 1632000 }, { "epoch": 13.86, "eval_loss": 3.111248254776001, "eval_runtime": 114.8609, "eval_samples_per_second": 867.362, "eval_steps_per_second": 54.213, "step": 1640000 }, { "epoch": 13.93, "learning_rate": 1.2846666666666667e-07, "loss": 3.2993, "step": 1648000 }, { "epoch": 13.93, "eval_loss": 3.121676206588745, "eval_runtime": 115.6258, "eval_samples_per_second": 861.625, "eval_steps_per_second": 53.855, "step": 1648000 }, { "epoch": 14.0, "eval_loss": 3.136202812194824, "eval_runtime": 115.2064, "eval_samples_per_second": 864.761, "eval_steps_per_second": 54.051, "step": 1656000 }, { "epoch": 14.07, "learning_rate": 1.2573333333333332e-07, "loss": 3.2934, "step": 1664000 }, { "epoch": 14.07, "eval_loss": 3.1199350357055664, "eval_runtime": 114.9755, "eval_samples_per_second": 866.498, "eval_steps_per_second": 54.159, "step": 1664000 }, { "epoch": 14.13, "eval_loss": 3.1276044845581055, "eval_runtime": 114.7877, "eval_samples_per_second": 867.916, "eval_steps_per_second": 54.248, "step": 1672000 }, { "epoch": 14.2, "learning_rate": 1.23e-07, "loss": 3.2964, "step": 1680000 }, { "epoch": 14.2, "eval_loss": 3.1164281368255615, "eval_runtime": 115.6081, "eval_samples_per_second": 861.756, "eval_steps_per_second": 53.863, "step": 1680000 }, { "epoch": 14.27, "eval_loss": 3.117210865020752, "eval_runtime": 114.8184, "eval_samples_per_second": 867.683, "eval_steps_per_second": 54.233, "step": 1688000 }, { "epoch": 14.34, "learning_rate": 1.2026666666666666e-07, "loss": 3.305, "step": 1696000 }, { "epoch": 14.34, "eval_loss": 3.1319870948791504, "eval_runtime": 114.4707, "eval_samples_per_second": 870.319, "eval_steps_per_second": 54.398, "step": 1696000 }, { "epoch": 14.4, "eval_loss": 3.1268680095672607, "eval_runtime": 115.2616, "eval_samples_per_second": 864.347, "eval_steps_per_second": 54.025, "step": 1704000 }, { "epoch": 14.47, "learning_rate": 1.1753333333333334e-07, "loss": 3.3022, "step": 1712000 }, { "epoch": 14.47, "eval_loss": 3.1107068061828613, "eval_runtime": 115.3427, "eval_samples_per_second": 863.739, "eval_steps_per_second": 53.987, "step": 1712000 }, { "epoch": 14.54, "eval_loss": 3.1096973419189453, "eval_runtime": 115.7245, "eval_samples_per_second": 860.89, "eval_steps_per_second": 53.809, "step": 1720000 }, { "epoch": 14.61, "learning_rate": 1.1480000000000001e-07, "loss": 3.2969, "step": 1728000 }, { "epoch": 14.61, "eval_loss": 3.117579460144043, "eval_runtime": 114.9075, "eval_samples_per_second": 867.01, "eval_steps_per_second": 54.191, "step": 1728000 }, { "epoch": 14.67, "eval_loss": 3.1282224655151367, "eval_runtime": 114.9382, "eval_samples_per_second": 866.779, "eval_steps_per_second": 54.177, "step": 1736000 }, { "epoch": 14.74, "learning_rate": 1.1206666666666666e-07, "loss": 3.2976, "step": 1744000 }, { "epoch": 14.74, "eval_loss": 3.1195032596588135, "eval_runtime": 114.3423, "eval_samples_per_second": 871.296, "eval_steps_per_second": 54.459, "step": 1744000 }, { "epoch": 14.81, "eval_loss": 3.115382432937622, "eval_runtime": 115.6972, "eval_samples_per_second": 861.093, "eval_steps_per_second": 53.822, "step": 1752000 }, { "epoch": 14.88, "learning_rate": 1.0933333333333333e-07, "loss": 3.3004, "step": 1760000 }, { "epoch": 14.88, "eval_loss": 3.114680767059326, "eval_runtime": 114.7064, "eval_samples_per_second": 868.53, "eval_steps_per_second": 54.286, "step": 1760000 }, { "epoch": 14.94, "eval_loss": 3.109429359436035, "eval_runtime": 121.4142, "eval_samples_per_second": 820.547, "eval_steps_per_second": 51.287, "step": 1768000 }, { "epoch": 15.01, "learning_rate": 1.066e-07, "loss": 3.2908, "step": 1776000 }, { "epoch": 15.01, "eval_loss": 3.1313207149505615, "eval_runtime": 122.4119, "eval_samples_per_second": 813.859, "eval_steps_per_second": 50.869, "step": 1776000 }, { "epoch": 15.08, "eval_loss": 3.128021001815796, "eval_runtime": 121.5867, "eval_samples_per_second": 819.382, "eval_steps_per_second": 51.214, "step": 1784000 }, { "epoch": 15.15, "learning_rate": 1.0386666666666667e-07, "loss": 3.2896, "step": 1792000 }, { "epoch": 15.15, "eval_loss": 3.130439043045044, "eval_runtime": 123.1458, "eval_samples_per_second": 809.009, "eval_steps_per_second": 50.566, "step": 1792000 }, { "epoch": 15.21, "eval_loss": 3.1329095363616943, "eval_runtime": 122.3076, "eval_samples_per_second": 814.553, "eval_steps_per_second": 50.913, "step": 1800000 }, { "epoch": 15.28, "learning_rate": 1.0113333333333334e-07, "loss": 3.3061, "step": 1808000 }, { "epoch": 15.28, "eval_loss": 3.119783401489258, "eval_runtime": 122.902, "eval_samples_per_second": 810.614, "eval_steps_per_second": 50.666, "step": 1808000 }, { "epoch": 15.35, "eval_loss": 3.1258275508880615, "eval_runtime": 124.0345, "eval_samples_per_second": 803.212, "eval_steps_per_second": 50.204, "step": 1816000 }, { "epoch": 15.42, "learning_rate": 9.84e-08, "loss": 3.3056, "step": 1824000 }, { "epoch": 15.42, "eval_loss": 3.125251531600952, "eval_runtime": 122.5875, "eval_samples_per_second": 812.693, "eval_steps_per_second": 50.796, "step": 1824000 }, { "epoch": 15.49, "eval_loss": 3.1200268268585205, "eval_runtime": 123.5094, "eval_samples_per_second": 806.627, "eval_steps_per_second": 50.417, "step": 1832000 }, { "epoch": 15.55, "learning_rate": 9.566666666666666e-08, "loss": 3.2921, "step": 1840000 }, { "epoch": 15.55, "eval_loss": 3.138437032699585, "eval_runtime": 122.6438, "eval_samples_per_second": 812.32, "eval_steps_per_second": 50.773, "step": 1840000 }, { "epoch": 15.62, "eval_loss": 3.1225225925445557, "eval_runtime": 123.7318, "eval_samples_per_second": 805.177, "eval_steps_per_second": 50.327, "step": 1848000 }, { "epoch": 15.69, "learning_rate": 9.293333333333333e-08, "loss": 3.2895, "step": 1856000 }, { "epoch": 15.69, "eval_loss": 3.128382921218872, "eval_runtime": 123.4511, "eval_samples_per_second": 807.008, "eval_steps_per_second": 50.441, "step": 1856000 }, { "epoch": 15.76, "eval_loss": 3.1200921535491943, "eval_runtime": 122.7543, "eval_samples_per_second": 811.589, "eval_steps_per_second": 50.727, "step": 1864000 }, { "epoch": 15.82, "learning_rate": 9.02e-08, "loss": 3.293, "step": 1872000 }, { "epoch": 15.82, "eval_loss": 3.1255955696105957, "eval_runtime": 123.1108, "eval_samples_per_second": 809.239, "eval_steps_per_second": 50.58, "step": 1872000 }, { "epoch": 15.89, "eval_loss": 3.116579055786133, "eval_runtime": 123.1245, "eval_samples_per_second": 809.149, "eval_steps_per_second": 50.575, "step": 1880000 }, { "epoch": 15.96, "learning_rate": 8.746666666666667e-08, "loss": 3.2963, "step": 1888000 }, { "epoch": 15.96, "eval_loss": 3.1218485832214355, "eval_runtime": 123.513, "eval_samples_per_second": 806.603, "eval_steps_per_second": 50.416, "step": 1888000 }, { "epoch": 16.03, "eval_loss": 3.1192948818206787, "eval_runtime": 123.2874, "eval_samples_per_second": 808.079, "eval_steps_per_second": 50.508, "step": 1896000 }, { "epoch": 16.09, "learning_rate": 8.473333333333334e-08, "loss": 3.2908, "step": 1904000 }, { "epoch": 16.09, "eval_loss": 3.12041974067688, "eval_runtime": 121.8605, "eval_samples_per_second": 817.541, "eval_steps_per_second": 51.099, "step": 1904000 }, { "epoch": 16.16, "eval_loss": 3.132479429244995, "eval_runtime": 121.752, "eval_samples_per_second": 818.27, "eval_steps_per_second": 51.145, "step": 1912000 }, { "epoch": 16.23, "learning_rate": 8.2e-08, "loss": 3.3039, "step": 1920000 }, { "epoch": 16.23, "eval_loss": 3.1090898513793945, "eval_runtime": 121.4862, "eval_samples_per_second": 820.06, "eval_steps_per_second": 51.257, "step": 1920000 }, { "epoch": 16.3, "eval_loss": 3.125005006790161, "eval_runtime": 121.872, "eval_samples_per_second": 817.464, "eval_steps_per_second": 51.095, "step": 1928000 }, { "epoch": 16.36, "learning_rate": 7.926666666666666e-08, "loss": 3.3011, "step": 1936000 }, { "epoch": 16.36, "eval_loss": 3.121675968170166, "eval_runtime": 121.4859, "eval_samples_per_second": 820.062, "eval_steps_per_second": 51.257, "step": 1936000 }, { "epoch": 16.43, "eval_loss": 3.120821237564087, "eval_runtime": 122.7757, "eval_samples_per_second": 811.447, "eval_steps_per_second": 50.719, "step": 1944000 }, { "epoch": 16.5, "learning_rate": 7.653333333333333e-08, "loss": 3.3003, "step": 1952000 }, { "epoch": 16.5, "eval_loss": 3.1109042167663574, "eval_runtime": 122.9588, "eval_samples_per_second": 810.239, "eval_steps_per_second": 50.643, "step": 1952000 }, { "epoch": 16.57, "eval_loss": 3.125174045562744, "eval_runtime": 123.2276, "eval_samples_per_second": 808.471, "eval_steps_per_second": 50.533, "step": 1960000 }, { "epoch": 16.63, "learning_rate": 7.38e-08, "loss": 3.3012, "step": 1968000 }, { "epoch": 16.63, "eval_loss": 3.112320899963379, "eval_runtime": 123.5751, "eval_samples_per_second": 806.198, "eval_steps_per_second": 50.39, "step": 1968000 }, { "epoch": 16.7, "eval_loss": 3.121267080307007, "eval_runtime": 123.2561, "eval_samples_per_second": 808.285, "eval_steps_per_second": 50.521, "step": 1976000 }, { "epoch": 16.77, "learning_rate": 7.106666666666667e-08, "loss": 3.2885, "step": 1984000 }, { "epoch": 16.77, "eval_loss": 3.1219470500946045, "eval_runtime": 124.2602, "eval_samples_per_second": 801.753, "eval_steps_per_second": 50.113, "step": 1984000 }, { "epoch": 16.84, "eval_loss": 3.1254475116729736, "eval_runtime": 123.4006, "eval_samples_per_second": 807.338, "eval_steps_per_second": 50.462, "step": 1992000 }, { "epoch": 16.91, "learning_rate": 6.833333333333332e-08, "loss": 3.2982, "step": 2000000 }, { "epoch": 16.91, "eval_loss": 3.1259801387786865, "eval_runtime": 123.6708, "eval_samples_per_second": 805.574, "eval_steps_per_second": 50.351, "step": 2000000 }, { "epoch": 16.97, "eval_loss": 3.1166510581970215, "eval_runtime": 123.5064, "eval_samples_per_second": 806.646, "eval_steps_per_second": 50.418, "step": 2008000 }, { "epoch": 17.04, "learning_rate": 6.56e-08, "loss": 3.2962, "step": 2016000 }, { "epoch": 17.04, "eval_loss": 3.108151435852051, "eval_runtime": 122.3197, "eval_samples_per_second": 814.472, "eval_steps_per_second": 50.908, "step": 2016000 }, { "epoch": 17.11, "eval_loss": 3.120389699935913, "eval_runtime": 123.9621, "eval_samples_per_second": 803.681, "eval_steps_per_second": 50.233, "step": 2024000 }, { "epoch": 17.18, "learning_rate": 6.286666666666666e-08, "loss": 3.2889, "step": 2032000 }, { "epoch": 17.18, "eval_loss": 3.1235997676849365, "eval_runtime": 124.4028, "eval_samples_per_second": 800.834, "eval_steps_per_second": 50.055, "step": 2032000 }, { "epoch": 17.24, "eval_loss": 3.13246488571167, "eval_runtime": 123.0626, "eval_samples_per_second": 809.555, "eval_steps_per_second": 50.6, "step": 2040000 }, { "epoch": 17.31, "learning_rate": 6.013333333333333e-08, "loss": 3.2892, "step": 2048000 }, { "epoch": 17.31, "eval_loss": 3.1200194358825684, "eval_runtime": 124.0744, "eval_samples_per_second": 802.954, "eval_steps_per_second": 50.188, "step": 2048000 }, { "epoch": 17.38, "eval_loss": 3.1231026649475098, "eval_runtime": 122.7547, "eval_samples_per_second": 811.586, "eval_steps_per_second": 50.727, "step": 2056000 }, { "epoch": 17.45, "learning_rate": 5.7400000000000004e-08, "loss": 3.3028, "step": 2064000 }, { "epoch": 17.45, "eval_loss": 3.1202361583709717, "eval_runtime": 122.376, "eval_samples_per_second": 814.098, "eval_steps_per_second": 50.884, "step": 2064000 }, { "epoch": 17.51, "eval_loss": 3.1188881397247314, "eval_runtime": 123.9465, "eval_samples_per_second": 803.782, "eval_steps_per_second": 50.239, "step": 2072000 }, { "epoch": 17.58, "learning_rate": 5.4666666666666666e-08, "loss": 3.2889, "step": 2080000 }, { "epoch": 17.58, "eval_loss": 3.1336753368377686, "eval_runtime": 123.6532, "eval_samples_per_second": 805.689, "eval_steps_per_second": 50.359, "step": 2080000 }, { "epoch": 17.65, "eval_loss": 3.1155591011047363, "eval_runtime": 124.2421, "eval_samples_per_second": 801.87, "eval_steps_per_second": 50.12, "step": 2088000 }, { "epoch": 17.72, "learning_rate": 5.1933333333333335e-08, "loss": 3.2985, "step": 2096000 }, { "epoch": 17.72, "eval_loss": 3.1258046627044678, "eval_runtime": 123.2939, "eval_samples_per_second": 808.037, "eval_steps_per_second": 50.505, "step": 2096000 }, { "epoch": 17.78, "eval_loss": 3.1358370780944824, "eval_runtime": 123.3415, "eval_samples_per_second": 807.725, "eval_steps_per_second": 50.486, "step": 2104000 }, { "epoch": 17.85, "learning_rate": 4.92e-08, "loss": 3.2949, "step": 2112000 }, { "epoch": 17.85, "eval_loss": 3.1270527839660645, "eval_runtime": 115.5858, "eval_samples_per_second": 861.922, "eval_steps_per_second": 53.873, "step": 2112000 }, { "epoch": 17.92, "eval_loss": 3.1249983310699463, "eval_runtime": 114.4039, "eval_samples_per_second": 870.827, "eval_steps_per_second": 54.43, "step": 2120000 }, { "epoch": 17.99, "learning_rate": 4.6466666666666666e-08, "loss": 3.2987, "step": 2128000 }, { "epoch": 17.99, "eval_loss": 3.124422550201416, "eval_runtime": 113.825, "eval_samples_per_second": 875.256, "eval_steps_per_second": 54.707, "step": 2128000 }, { "epoch": 18.05, "eval_loss": 3.1221253871917725, "eval_runtime": 113.4341, "eval_samples_per_second": 878.272, "eval_steps_per_second": 54.895, "step": 2136000 }, { "epoch": 18.12, "learning_rate": 4.3733333333333335e-08, "loss": 3.2884, "step": 2144000 }, { "epoch": 18.12, "eval_loss": 3.1197779178619385, "eval_runtime": 114.383, "eval_samples_per_second": 870.986, "eval_steps_per_second": 54.44, "step": 2144000 }, { "epoch": 18.19, "eval_loss": 3.1169650554656982, "eval_runtime": 115.2342, "eval_samples_per_second": 864.553, "eval_steps_per_second": 54.038, "step": 2152000 }, { "epoch": 18.26, "learning_rate": 4.1e-08, "loss": 3.2918, "step": 2160000 }, { "epoch": 18.26, "eval_loss": 3.1158599853515625, "eval_runtime": 114.9399, "eval_samples_per_second": 866.766, "eval_steps_per_second": 54.176, "step": 2160000 }, { "epoch": 18.33, "eval_loss": 3.1153085231781006, "eval_runtime": 114.0048, "eval_samples_per_second": 873.876, "eval_steps_per_second": 54.621, "step": 2168000 }, { "epoch": 18.39, "learning_rate": 3.8266666666666665e-08, "loss": 3.2995, "step": 2176000 }, { "epoch": 18.39, "eval_loss": 3.120265483856201, "eval_runtime": 114.5155, "eval_samples_per_second": 869.979, "eval_steps_per_second": 54.377, "step": 2176000 }, { "epoch": 18.46, "eval_loss": 3.110717535018921, "eval_runtime": 114.5457, "eval_samples_per_second": 869.749, "eval_steps_per_second": 54.363, "step": 2184000 }, { "epoch": 18.53, "learning_rate": 3.5533333333333334e-08, "loss": 3.3003, "step": 2192000 }, { "epoch": 18.53, "eval_loss": 3.1211767196655273, "eval_runtime": 114.2326, "eval_samples_per_second": 872.133, "eval_steps_per_second": 54.512, "step": 2192000 }, { "epoch": 18.6, "eval_loss": 3.133021593093872, "eval_runtime": 123.3425, "eval_samples_per_second": 807.718, "eval_steps_per_second": 50.485, "step": 2200000 }, { "epoch": 18.66, "learning_rate": 3.28e-08, "loss": 3.2921, "step": 2208000 }, { "epoch": 18.66, "eval_loss": 3.1160311698913574, "eval_runtime": 123.8562, "eval_samples_per_second": 804.369, "eval_steps_per_second": 50.276, "step": 2208000 }, { "epoch": 18.73, "eval_loss": 3.1191678047180176, "eval_runtime": 124.221, "eval_samples_per_second": 802.006, "eval_steps_per_second": 50.128, "step": 2216000 }, { "epoch": 18.8, "learning_rate": 3.0066666666666665e-08, "loss": 3.293, "step": 2224000 }, { "epoch": 18.8, "eval_loss": 3.1164309978485107, "eval_runtime": 123.2955, "eval_samples_per_second": 808.026, "eval_steps_per_second": 50.505, "step": 2224000 }, { "epoch": 18.87, "eval_loss": 3.1224827766418457, "eval_runtime": 124.8121, "eval_samples_per_second": 798.208, "eval_steps_per_second": 49.891, "step": 2232000 }, { "epoch": 18.93, "learning_rate": 2.7333333333333333e-08, "loss": 3.2969, "step": 2240000 }, { "epoch": 18.93, "eval_loss": 3.1243343353271484, "eval_runtime": 125.6337, "eval_samples_per_second": 792.988, "eval_steps_per_second": 49.565, "step": 2240000 }, { "epoch": 19.0, "eval_loss": 3.115158796310425, "eval_runtime": 123.9293, "eval_samples_per_second": 803.894, "eval_steps_per_second": 50.246, "step": 2248000 }, { "epoch": 19.07, "learning_rate": 2.46e-08, "loss": 3.2891, "step": 2256000 }, { "epoch": 19.07, "eval_loss": 3.132289171218872, "eval_runtime": 125.9455, "eval_samples_per_second": 791.025, "eval_steps_per_second": 49.442, "step": 2256000 }, { "epoch": 19.14, "eval_loss": 3.1076977252960205, "eval_runtime": 125.1878, "eval_samples_per_second": 795.812, "eval_steps_per_second": 49.741, "step": 2264000 }, { "epoch": 19.2, "learning_rate": 2.1866666666666667e-08, "loss": 3.2903, "step": 2272000 }, { "epoch": 19.2, "eval_loss": 3.134789228439331, "eval_runtime": 114.7197, "eval_samples_per_second": 868.43, "eval_steps_per_second": 54.28, "step": 2272000 }, { "epoch": 19.27, "eval_loss": 3.120239019393921, "eval_runtime": 115.2866, "eval_samples_per_second": 864.159, "eval_steps_per_second": 54.013, "step": 2280000 }, { "epoch": 19.34, "learning_rate": 1.9133333333333333e-08, "loss": 3.2986, "step": 2288000 }, { "epoch": 19.34, "eval_loss": 3.122042655944824, "eval_runtime": 114.9192, "eval_samples_per_second": 866.922, "eval_steps_per_second": 54.186, "step": 2288000 }, { "epoch": 19.41, "eval_loss": 3.12359881401062, "eval_runtime": 114.2736, "eval_samples_per_second": 871.82, "eval_steps_per_second": 54.492, "step": 2296000 }, { "epoch": 19.47, "learning_rate": 1.64e-08, "loss": 3.293, "step": 2304000 }, { "epoch": 19.47, "eval_loss": 3.122392177581787, "eval_runtime": 114.9052, "eval_samples_per_second": 867.028, "eval_steps_per_second": 54.192, "step": 2304000 }, { "epoch": 19.54, "eval_loss": 3.1246843338012695, "eval_runtime": 115.1423, "eval_samples_per_second": 865.242, "eval_steps_per_second": 54.081, "step": 2312000 }, { "epoch": 19.61, "learning_rate": 1.3666666666666667e-08, "loss": 3.299, "step": 2320000 }, { "epoch": 19.61, "eval_loss": 3.1234774589538574, "eval_runtime": 114.9644, "eval_samples_per_second": 866.581, "eval_steps_per_second": 54.165, "step": 2320000 }, { "epoch": 19.68, "eval_loss": 3.120058059692383, "eval_runtime": 114.2563, "eval_samples_per_second": 871.952, "eval_steps_per_second": 54.5, "step": 2328000 }, { "epoch": 19.75, "learning_rate": 1.0933333333333334e-08, "loss": 3.2898, "step": 2336000 }, { "epoch": 19.75, "eval_loss": 3.1162607669830322, "eval_runtime": 114.3246, "eval_samples_per_second": 871.431, "eval_steps_per_second": 54.468, "step": 2336000 }, { "epoch": 19.81, "eval_loss": 3.1289384365081787, "eval_runtime": 113.9183, "eval_samples_per_second": 874.539, "eval_steps_per_second": 54.662, "step": 2344000 }, { "epoch": 19.88, "learning_rate": 8.2e-09, "loss": 3.2956, "step": 2352000 }, { "epoch": 19.88, "eval_loss": 3.1197969913482666, "eval_runtime": 114.6787, "eval_samples_per_second": 868.74, "eval_steps_per_second": 54.3, "step": 2352000 }, { "epoch": 19.95, "eval_loss": 3.1250617504119873, "eval_runtime": 114.9296, "eval_samples_per_second": 866.844, "eval_steps_per_second": 54.181, "step": 2360000 }, { "epoch": 20.02, "learning_rate": 5.466666666666667e-09, "loss": 3.2926, "step": 2368000 }, { "epoch": 20.02, "eval_loss": 3.1086537837982178, "eval_runtime": 115.0292, "eval_samples_per_second": 866.093, "eval_steps_per_second": 54.134, "step": 2368000 }, { "epoch": 20.08, "eval_loss": 3.109729051589966, "eval_runtime": 115.2619, "eval_samples_per_second": 864.345, "eval_steps_per_second": 54.025, "step": 2376000 }, { "epoch": 20.15, "learning_rate": 2.7333333333333334e-09, "loss": 3.2958, "step": 2384000 }, { "epoch": 20.15, "eval_loss": 3.126241445541382, "eval_runtime": 114.8392, "eval_samples_per_second": 867.526, "eval_steps_per_second": 54.224, "step": 2384000 }, { "epoch": 20.22, "eval_loss": 3.1308016777038574, "eval_runtime": 115.2099, "eval_samples_per_second": 864.735, "eval_steps_per_second": 54.049, "step": 2392000 }, { "epoch": 20.29, "learning_rate": 0.0, "loss": 3.2862, "step": 2400000 }, { "epoch": 20.29, "eval_loss": 3.1129300594329834, "eval_runtime": 115.5611, "eval_samples_per_second": 862.107, "eval_steps_per_second": 53.885, "step": 2400000 }, { "epoch": 20.29, "step": 2400000, "total_flos": 7.704255639100524e+17, "train_loss": 3.30909029296875, "train_runtime": 182182.8159, "train_samples_per_second": 210.777, "train_steps_per_second": 13.174 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 21, "save_steps": 32000, "total_flos": 7.704255639100524e+17, "trial_name": null, "trial_params": null }