|
{ |
|
"best_metric": 2.256277322769165, |
|
"best_model_checkpoint": "./model_tweets_2020_Q4_25/checkpoint-1952000", |
|
"epoch": 6.73682319488226, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 2.580249547958374, |
|
"eval_runtime": 320.9634, |
|
"eval_samples_per_second": 934.686, |
|
"eval_steps_per_second": 58.418, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 2.8151, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 2.488163471221924, |
|
"eval_runtime": 321.5644, |
|
"eval_samples_per_second": 932.939, |
|
"eval_steps_per_second": 58.309, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.429165840148926, |
|
"eval_runtime": 321.2729, |
|
"eval_samples_per_second": 933.786, |
|
"eval_steps_per_second": 58.362, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.5636, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.3980140686035156, |
|
"eval_runtime": 321.6728, |
|
"eval_samples_per_second": 932.625, |
|
"eval_steps_per_second": 58.289, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.3799262046813965, |
|
"eval_runtime": 323.0695, |
|
"eval_samples_per_second": 928.593, |
|
"eval_steps_per_second": 58.037, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.4947, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.3665478229522705, |
|
"eval_runtime": 322.6194, |
|
"eval_samples_per_second": 929.888, |
|
"eval_steps_per_second": 58.118, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.345531940460205, |
|
"eval_runtime": 322.4114, |
|
"eval_samples_per_second": 930.488, |
|
"eval_steps_per_second": 58.156, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.473, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 2.341932773590088, |
|
"eval_runtime": 324.4552, |
|
"eval_samples_per_second": 924.627, |
|
"eval_steps_per_second": 57.789, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.3307127952575684, |
|
"eval_runtime": 322.5941, |
|
"eval_samples_per_second": 929.961, |
|
"eval_steps_per_second": 58.123, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.4512, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.3288769721984863, |
|
"eval_runtime": 322.3925, |
|
"eval_samples_per_second": 930.543, |
|
"eval_steps_per_second": 58.159, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 2.325032949447632, |
|
"eval_runtime": 322.736, |
|
"eval_samples_per_second": 929.552, |
|
"eval_steps_per_second": 58.097, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.4421, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.318911075592041, |
|
"eval_runtime": 323.2095, |
|
"eval_samples_per_second": 928.19, |
|
"eval_steps_per_second": 58.012, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.3199880123138428, |
|
"eval_runtime": 323.2363, |
|
"eval_samples_per_second": 928.114, |
|
"eval_steps_per_second": 58.007, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.4354, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 2.3154587745666504, |
|
"eval_runtime": 323.95, |
|
"eval_samples_per_second": 926.069, |
|
"eval_steps_per_second": 57.879, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.313781976699829, |
|
"eval_runtime": 324.5922, |
|
"eval_samples_per_second": 924.237, |
|
"eval_steps_per_second": 57.765, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.4324, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.305436372756958, |
|
"eval_runtime": 323.2003, |
|
"eval_samples_per_second": 928.217, |
|
"eval_steps_per_second": 58.014, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 2.302849054336548, |
|
"eval_runtime": 323.3577, |
|
"eval_samples_per_second": 927.765, |
|
"eval_steps_per_second": 57.985, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.4253, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.3029212951660156, |
|
"eval_runtime": 324.8316, |
|
"eval_samples_per_second": 923.555, |
|
"eval_steps_per_second": 57.722, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 2.3006043434143066, |
|
"eval_runtime": 323.2225, |
|
"eval_samples_per_second": 928.153, |
|
"eval_steps_per_second": 58.01, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.4156, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 2.300135612487793, |
|
"eval_runtime": 323.6582, |
|
"eval_samples_per_second": 926.904, |
|
"eval_steps_per_second": 57.931, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 2.298043727874756, |
|
"eval_runtime": 322.8658, |
|
"eval_samples_per_second": 929.179, |
|
"eval_steps_per_second": 58.074, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.4165, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 2.291269063949585, |
|
"eval_runtime": 323.9312, |
|
"eval_samples_per_second": 926.122, |
|
"eval_steps_per_second": 57.883, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.297363519668579, |
|
"eval_runtime": 323.2402, |
|
"eval_samples_per_second": 928.102, |
|
"eval_steps_per_second": 58.006, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.4131, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.2906086444854736, |
|
"eval_runtime": 323.5876, |
|
"eval_samples_per_second": 927.106, |
|
"eval_steps_per_second": 57.944, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 2.2908411026000977, |
|
"eval_runtime": 324.835, |
|
"eval_samples_per_second": 923.546, |
|
"eval_steps_per_second": 57.722, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.407, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 2.289541482925415, |
|
"eval_runtime": 323.2737, |
|
"eval_samples_per_second": 928.006, |
|
"eval_steps_per_second": 58.0, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 2.2865185737609863, |
|
"eval_runtime": 323.7161, |
|
"eval_samples_per_second": 926.738, |
|
"eval_steps_per_second": 57.921, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.4153, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 2.2913596630096436, |
|
"eval_runtime": 323.8117, |
|
"eval_samples_per_second": 926.464, |
|
"eval_steps_per_second": 57.904, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.280600070953369, |
|
"eval_runtime": 324.7681, |
|
"eval_samples_per_second": 923.736, |
|
"eval_steps_per_second": 57.734, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.4011, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.2818994522094727, |
|
"eval_runtime": 324.8269, |
|
"eval_samples_per_second": 923.569, |
|
"eval_steps_per_second": 57.723, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 2.2854413986206055, |
|
"eval_runtime": 324.8244, |
|
"eval_samples_per_second": 923.576, |
|
"eval_steps_per_second": 57.724, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.4087, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.283675193786621, |
|
"eval_runtime": 326.3862, |
|
"eval_samples_per_second": 919.157, |
|
"eval_steps_per_second": 57.447, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 2.286595106124878, |
|
"eval_runtime": 327.6717, |
|
"eval_samples_per_second": 915.551, |
|
"eval_steps_per_second": 57.222, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.4059, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 2.285534143447876, |
|
"eval_runtime": 326.6584, |
|
"eval_samples_per_second": 918.391, |
|
"eval_steps_per_second": 57.399, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 2.28678560256958, |
|
"eval_runtime": 329.4988, |
|
"eval_samples_per_second": 910.474, |
|
"eval_steps_per_second": 56.905, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.4086, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.277035713195801, |
|
"eval_runtime": 327.402, |
|
"eval_samples_per_second": 916.305, |
|
"eval_steps_per_second": 57.269, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 2.2788984775543213, |
|
"eval_runtime": 328.7295, |
|
"eval_samples_per_second": 912.604, |
|
"eval_steps_per_second": 57.038, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.4093, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 2.2792067527770996, |
|
"eval_runtime": 328.6343, |
|
"eval_samples_per_second": 912.869, |
|
"eval_steps_per_second": 57.054, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 2.2796542644500732, |
|
"eval_runtime": 328.9041, |
|
"eval_samples_per_second": 912.12, |
|
"eval_steps_per_second": 57.007, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.4036, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 2.2794368267059326, |
|
"eval_runtime": 327.0881, |
|
"eval_samples_per_second": 917.184, |
|
"eval_steps_per_second": 57.324, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 2.2767865657806396, |
|
"eval_runtime": 325.4813, |
|
"eval_samples_per_second": 921.712, |
|
"eval_steps_per_second": 57.607, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.4063, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.28360652923584, |
|
"eval_runtime": 326.0539, |
|
"eval_samples_per_second": 920.093, |
|
"eval_steps_per_second": 57.506, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 2.2808754444122314, |
|
"eval_runtime": 324.6753, |
|
"eval_samples_per_second": 924.0, |
|
"eval_steps_per_second": 57.75, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.4047, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 2.280778408050537, |
|
"eval_runtime": 325.3269, |
|
"eval_samples_per_second": 922.149, |
|
"eval_steps_per_second": 57.634, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.28403377532959, |
|
"eval_runtime": 325.2468, |
|
"eval_samples_per_second": 922.377, |
|
"eval_steps_per_second": 57.649, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.4084, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 2.279930591583252, |
|
"eval_runtime": 327.9631, |
|
"eval_samples_per_second": 914.737, |
|
"eval_steps_per_second": 57.171, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 2.272570848464966, |
|
"eval_runtime": 327.8275, |
|
"eval_samples_per_second": 915.115, |
|
"eval_steps_per_second": 57.195, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.4041, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.2823517322540283, |
|
"eval_runtime": 328.4584, |
|
"eval_samples_per_second": 913.358, |
|
"eval_steps_per_second": 57.085, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 2.278149127960205, |
|
"eval_runtime": 326.9556, |
|
"eval_samples_per_second": 917.556, |
|
"eval_steps_per_second": 57.347, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.4034, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 2.275142192840576, |
|
"eval_runtime": 326.8439, |
|
"eval_samples_per_second": 917.869, |
|
"eval_steps_per_second": 57.367, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.2760984897613525, |
|
"eval_runtime": 325.9846, |
|
"eval_samples_per_second": 920.289, |
|
"eval_steps_per_second": 57.518, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.3951, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 2.2731635570526123, |
|
"eval_runtime": 326.1395, |
|
"eval_samples_per_second": 919.852, |
|
"eval_steps_per_second": 57.491, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 2.2709577083587646, |
|
"eval_runtime": 326.1973, |
|
"eval_samples_per_second": 919.689, |
|
"eval_steps_per_second": 57.481, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.409, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.277972936630249, |
|
"eval_runtime": 325.3949, |
|
"eval_samples_per_second": 921.957, |
|
"eval_steps_per_second": 57.622, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 2.2714641094207764, |
|
"eval_runtime": 325.6353, |
|
"eval_samples_per_second": 921.276, |
|
"eval_steps_per_second": 57.58, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.3985, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 2.279003620147705, |
|
"eval_runtime": 326.3983, |
|
"eval_samples_per_second": 919.122, |
|
"eval_steps_per_second": 57.445, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.276561737060547, |
|
"eval_runtime": 326.5381, |
|
"eval_samples_per_second": 918.729, |
|
"eval_steps_per_second": 57.421, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.4016, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 2.2744641304016113, |
|
"eval_runtime": 326.438, |
|
"eval_samples_per_second": 919.011, |
|
"eval_steps_per_second": 57.438, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 2.2719147205352783, |
|
"eval_runtime": 326.1182, |
|
"eval_samples_per_second": 919.912, |
|
"eval_steps_per_second": 57.494, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.3978, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.2755250930786133, |
|
"eval_runtime": 326.0946, |
|
"eval_samples_per_second": 919.978, |
|
"eval_steps_per_second": 57.499, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 2.269918203353882, |
|
"eval_runtime": 326.8772, |
|
"eval_samples_per_second": 917.776, |
|
"eval_steps_per_second": 57.361, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.406, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 2.282317876815796, |
|
"eval_runtime": 325.8019, |
|
"eval_samples_per_second": 920.805, |
|
"eval_steps_per_second": 57.55, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.2735817432403564, |
|
"eval_runtime": 326.0969, |
|
"eval_samples_per_second": 919.972, |
|
"eval_steps_per_second": 57.498, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.3958, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 2.2728230953216553, |
|
"eval_runtime": 326.2067, |
|
"eval_samples_per_second": 919.662, |
|
"eval_steps_per_second": 57.479, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 2.2762703895568848, |
|
"eval_runtime": 326.3243, |
|
"eval_samples_per_second": 919.331, |
|
"eval_steps_per_second": 57.458, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.406, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.2780961990356445, |
|
"eval_runtime": 325.8653, |
|
"eval_samples_per_second": 920.626, |
|
"eval_steps_per_second": 57.539, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 2.2722842693328857, |
|
"eval_runtime": 326.0044, |
|
"eval_samples_per_second": 920.233, |
|
"eval_steps_per_second": 57.515, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.4, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 2.273293972015381, |
|
"eval_runtime": 326.966, |
|
"eval_samples_per_second": 917.527, |
|
"eval_steps_per_second": 57.345, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.271476984024048, |
|
"eval_runtime": 326.8892, |
|
"eval_samples_per_second": 917.742, |
|
"eval_steps_per_second": 57.359, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.3998, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 2.271629810333252, |
|
"eval_runtime": 326.5264, |
|
"eval_samples_per_second": 918.762, |
|
"eval_steps_per_second": 57.423, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 2.27506422996521, |
|
"eval_runtime": 326.712, |
|
"eval_samples_per_second": 918.24, |
|
"eval_steps_per_second": 57.39, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.4017, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.274268865585327, |
|
"eval_runtime": 326.6112, |
|
"eval_samples_per_second": 918.523, |
|
"eval_steps_per_second": 57.408, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 2.2739031314849854, |
|
"eval_runtime": 326.4511, |
|
"eval_samples_per_second": 918.974, |
|
"eval_steps_per_second": 57.436, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.4019, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 2.275505542755127, |
|
"eval_runtime": 329.4605, |
|
"eval_samples_per_second": 910.58, |
|
"eval_steps_per_second": 56.911, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.269094228744507, |
|
"eval_runtime": 327.3789, |
|
"eval_samples_per_second": 916.369, |
|
"eval_steps_per_second": 57.273, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.398, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 2.2705538272857666, |
|
"eval_runtime": 327.1271, |
|
"eval_samples_per_second": 917.075, |
|
"eval_steps_per_second": 57.317, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 2.270341634750366, |
|
"eval_runtime": 326.9286, |
|
"eval_samples_per_second": 917.632, |
|
"eval_steps_per_second": 57.352, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.4027, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.2657225131988525, |
|
"eval_runtime": 326.8016, |
|
"eval_samples_per_second": 917.988, |
|
"eval_steps_per_second": 57.374, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 2.267418146133423, |
|
"eval_runtime": 326.6227, |
|
"eval_samples_per_second": 918.491, |
|
"eval_steps_per_second": 57.406, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.4, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 2.2748591899871826, |
|
"eval_runtime": 326.8527, |
|
"eval_samples_per_second": 917.845, |
|
"eval_steps_per_second": 57.365, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.2713701725006104, |
|
"eval_runtime": 326.3767, |
|
"eval_samples_per_second": 919.183, |
|
"eval_steps_per_second": 57.449, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.4046, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 2.2694690227508545, |
|
"eval_runtime": 326.9136, |
|
"eval_samples_per_second": 917.674, |
|
"eval_steps_per_second": 57.355, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 2.2724227905273438, |
|
"eval_runtime": 326.9654, |
|
"eval_samples_per_second": 917.528, |
|
"eval_steps_per_second": 57.346, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.4033, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.2697391510009766, |
|
"eval_runtime": 326.8958, |
|
"eval_samples_per_second": 917.724, |
|
"eval_steps_per_second": 57.358, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 2.2697041034698486, |
|
"eval_runtime": 326.8461, |
|
"eval_samples_per_second": 917.863, |
|
"eval_steps_per_second": 57.366, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.3981, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 2.267427444458008, |
|
"eval_runtime": 327.9149, |
|
"eval_samples_per_second": 914.872, |
|
"eval_steps_per_second": 57.179, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 2.266889810562134, |
|
"eval_runtime": 327.4325, |
|
"eval_samples_per_second": 916.219, |
|
"eval_steps_per_second": 57.264, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.4029, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 2.275509834289551, |
|
"eval_runtime": 327.0353, |
|
"eval_samples_per_second": 917.332, |
|
"eval_steps_per_second": 57.333, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.2664170265197754, |
|
"eval_runtime": 329.3443, |
|
"eval_samples_per_second": 910.901, |
|
"eval_steps_per_second": 56.931, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.4046, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.2758920192718506, |
|
"eval_runtime": 328.0111, |
|
"eval_samples_per_second": 914.603, |
|
"eval_steps_per_second": 57.163, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 2.2689473628997803, |
|
"eval_runtime": 327.8597, |
|
"eval_samples_per_second": 915.026, |
|
"eval_steps_per_second": 57.189, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.4056, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 2.2710442543029785, |
|
"eval_runtime": 327.6707, |
|
"eval_samples_per_second": 915.553, |
|
"eval_steps_per_second": 57.222, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 2.2743895053863525, |
|
"eval_runtime": 326.938, |
|
"eval_samples_per_second": 917.605, |
|
"eval_steps_per_second": 57.35, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.4036, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 2.265347719192505, |
|
"eval_runtime": 327.8639, |
|
"eval_samples_per_second": 915.014, |
|
"eval_steps_per_second": 57.188, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 2.264220952987671, |
|
"eval_runtime": 328.2384, |
|
"eval_samples_per_second": 913.97, |
|
"eval_steps_per_second": 57.123, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.3961, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.2702980041503906, |
|
"eval_runtime": 328.1483, |
|
"eval_samples_per_second": 914.221, |
|
"eval_steps_per_second": 57.139, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 2.2682902812957764, |
|
"eval_runtime": 327.4533, |
|
"eval_samples_per_second": 916.161, |
|
"eval_steps_per_second": 57.26, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.3939, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 2.2746386528015137, |
|
"eval_runtime": 327.8678, |
|
"eval_samples_per_second": 915.003, |
|
"eval_steps_per_second": 57.188, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 2.2666993141174316, |
|
"eval_runtime": 329.1807, |
|
"eval_samples_per_second": 911.353, |
|
"eval_steps_per_second": 56.96, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.3998, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 2.268972396850586, |
|
"eval_runtime": 328.4073, |
|
"eval_samples_per_second": 913.5, |
|
"eval_steps_per_second": 57.094, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 2.2696826457977295, |
|
"eval_runtime": 329.554, |
|
"eval_samples_per_second": 910.321, |
|
"eval_steps_per_second": 56.895, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.3921, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.268064498901367, |
|
"eval_runtime": 328.2902, |
|
"eval_samples_per_second": 913.826, |
|
"eval_steps_per_second": 57.114, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 2.27397084236145, |
|
"eval_runtime": 328.4539, |
|
"eval_samples_per_second": 913.37, |
|
"eval_steps_per_second": 57.086, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.4011, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 2.270357608795166, |
|
"eval_runtime": 328.9931, |
|
"eval_samples_per_second": 911.873, |
|
"eval_steps_per_second": 56.992, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.2666330337524414, |
|
"eval_runtime": 328.6018, |
|
"eval_samples_per_second": 912.959, |
|
"eval_steps_per_second": 57.06, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.3948, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 2.2689247131347656, |
|
"eval_runtime": 328.0791, |
|
"eval_samples_per_second": 914.414, |
|
"eval_steps_per_second": 57.151, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 2.2741663455963135, |
|
"eval_runtime": 329.8118, |
|
"eval_samples_per_second": 909.61, |
|
"eval_steps_per_second": 56.851, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.3957, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.2755067348480225, |
|
"eval_runtime": 329.6482, |
|
"eval_samples_per_second": 910.061, |
|
"eval_steps_per_second": 56.879, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 2.268922805786133, |
|
"eval_runtime": 328.948, |
|
"eval_samples_per_second": 911.998, |
|
"eval_steps_per_second": 57.0, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.3971, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 2.271690607070923, |
|
"eval_runtime": 328.8273, |
|
"eval_samples_per_second": 912.333, |
|
"eval_steps_per_second": 57.021, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.2689971923828125, |
|
"eval_runtime": 329.7312, |
|
"eval_samples_per_second": 909.832, |
|
"eval_steps_per_second": 56.864, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.3982, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 2.264453649520874, |
|
"eval_runtime": 329.0657, |
|
"eval_samples_per_second": 911.672, |
|
"eval_steps_per_second": 56.98, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 2.2726194858551025, |
|
"eval_runtime": 328.4591, |
|
"eval_samples_per_second": 913.356, |
|
"eval_steps_per_second": 57.085, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.4005, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 2.262789011001587, |
|
"eval_runtime": 329.0087, |
|
"eval_samples_per_second": 911.83, |
|
"eval_steps_per_second": 56.989, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 2.2725658416748047, |
|
"eval_runtime": 331.131, |
|
"eval_samples_per_second": 905.986, |
|
"eval_steps_per_second": 56.624, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.4037, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 2.2759974002838135, |
|
"eval_runtime": 329.3386, |
|
"eval_samples_per_second": 910.917, |
|
"eval_steps_per_second": 56.932, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 2.2662434577941895, |
|
"eval_runtime": 331.0495, |
|
"eval_samples_per_second": 906.209, |
|
"eval_steps_per_second": 56.638, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.4031, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_loss": 2.272948741912842, |
|
"eval_runtime": 329.451, |
|
"eval_samples_per_second": 910.606, |
|
"eval_steps_per_second": 56.913, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 2.270596742630005, |
|
"eval_runtime": 328.9394, |
|
"eval_samples_per_second": 912.022, |
|
"eval_steps_per_second": 57.001, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.4025, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.2684247493743896, |
|
"eval_runtime": 328.8064, |
|
"eval_samples_per_second": 912.391, |
|
"eval_steps_per_second": 57.024, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 2.2634849548339844, |
|
"eval_runtime": 329.3927, |
|
"eval_samples_per_second": 910.767, |
|
"eval_steps_per_second": 56.923, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.409, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 2.2605979442596436, |
|
"eval_runtime": 330.2691, |
|
"eval_samples_per_second": 908.35, |
|
"eval_steps_per_second": 56.772, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 2.2664294242858887, |
|
"eval_runtime": 334.9875, |
|
"eval_samples_per_second": 895.556, |
|
"eval_steps_per_second": 55.972, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.4085, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 2.2646701335906982, |
|
"eval_runtime": 332.6305, |
|
"eval_samples_per_second": 901.902, |
|
"eval_steps_per_second": 56.369, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 2.265587329864502, |
|
"eval_runtime": 330.3094, |
|
"eval_samples_per_second": 908.239, |
|
"eval_steps_per_second": 56.765, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.3971, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 2.265507221221924, |
|
"eval_runtime": 332.5509, |
|
"eval_samples_per_second": 902.118, |
|
"eval_steps_per_second": 56.382, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 2.2681467533111572, |
|
"eval_runtime": 329.8973, |
|
"eval_samples_per_second": 909.374, |
|
"eval_steps_per_second": 56.836, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.3946, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 2.267101526260376, |
|
"eval_runtime": 329.91, |
|
"eval_samples_per_second": 909.339, |
|
"eval_steps_per_second": 56.834, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 2.2659785747528076, |
|
"eval_runtime": 332.3096, |
|
"eval_samples_per_second": 902.772, |
|
"eval_steps_per_second": 56.423, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.4063, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 2.2696707248687744, |
|
"eval_runtime": 329.9244, |
|
"eval_samples_per_second": 909.299, |
|
"eval_steps_per_second": 56.831, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 2.2705624103546143, |
|
"eval_runtime": 330.8986, |
|
"eval_samples_per_second": 906.622, |
|
"eval_steps_per_second": 56.664, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.399, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 2.2625114917755127, |
|
"eval_runtime": 330.8268, |
|
"eval_samples_per_second": 906.819, |
|
"eval_steps_per_second": 56.676, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 2.26986026763916, |
|
"eval_runtime": 330.4632, |
|
"eval_samples_per_second": 907.817, |
|
"eval_steps_per_second": 56.739, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.4024, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 2.2622313499450684, |
|
"eval_runtime": 331.2446, |
|
"eval_samples_per_second": 905.675, |
|
"eval_steps_per_second": 56.605, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 2.269458293914795, |
|
"eval_runtime": 330.8485, |
|
"eval_samples_per_second": 906.759, |
|
"eval_steps_per_second": 56.672, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.4035, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 2.2699954509735107, |
|
"eval_runtime": 332.9859, |
|
"eval_samples_per_second": 900.939, |
|
"eval_steps_per_second": 56.309, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 2.262361526489258, |
|
"eval_runtime": 333.2535, |
|
"eval_samples_per_second": 900.216, |
|
"eval_steps_per_second": 56.263, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.4061, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 2.2690372467041016, |
|
"eval_runtime": 332.4767, |
|
"eval_samples_per_second": 902.319, |
|
"eval_steps_per_second": 56.395, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_loss": 2.265334367752075, |
|
"eval_runtime": 333.384, |
|
"eval_samples_per_second": 899.863, |
|
"eval_steps_per_second": 56.241, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.4044, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 2.267867088317871, |
|
"eval_runtime": 332.4491, |
|
"eval_samples_per_second": 902.394, |
|
"eval_steps_per_second": 56.4, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 2.2657666206359863, |
|
"eval_runtime": 337.9264, |
|
"eval_samples_per_second": 887.767, |
|
"eval_steps_per_second": 55.485, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.3996, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_loss": 2.2680134773254395, |
|
"eval_runtime": 335.9795, |
|
"eval_samples_per_second": 892.912, |
|
"eval_steps_per_second": 55.807, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 2.26682186126709, |
|
"eval_runtime": 332.0277, |
|
"eval_samples_per_second": 903.539, |
|
"eval_steps_per_second": 56.471, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.3943, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.2689149379730225, |
|
"eval_runtime": 332.3397, |
|
"eval_samples_per_second": 902.691, |
|
"eval_steps_per_second": 56.418, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"eval_loss": 2.2701900005340576, |
|
"eval_runtime": 333.2287, |
|
"eval_samples_per_second": 900.283, |
|
"eval_steps_per_second": 56.268, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.3948, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 2.2652790546417236, |
|
"eval_runtime": 332.3733, |
|
"eval_samples_per_second": 902.6, |
|
"eval_steps_per_second": 56.412, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 2.262141466140747, |
|
"eval_runtime": 332.7579, |
|
"eval_samples_per_second": 901.556, |
|
"eval_steps_per_second": 56.347, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.4047, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_loss": 2.272305488586426, |
|
"eval_runtime": 332.21, |
|
"eval_samples_per_second": 903.043, |
|
"eval_steps_per_second": 56.44, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 2.271768808364868, |
|
"eval_runtime": 334.301, |
|
"eval_samples_per_second": 897.395, |
|
"eval_steps_per_second": 56.087, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.4057, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 2.266768217086792, |
|
"eval_runtime": 331.8859, |
|
"eval_samples_per_second": 903.925, |
|
"eval_steps_per_second": 56.495, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 2.264948844909668, |
|
"eval_runtime": 333.4261, |
|
"eval_samples_per_second": 899.75, |
|
"eval_steps_per_second": 56.234, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.3901, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_loss": 2.2699382305145264, |
|
"eval_runtime": 334.7905, |
|
"eval_samples_per_second": 896.083, |
|
"eval_steps_per_second": 56.005, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 2.2682831287384033, |
|
"eval_runtime": 335.082, |
|
"eval_samples_per_second": 895.303, |
|
"eval_steps_per_second": 55.956, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.3942, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 2.2679033279418945, |
|
"eval_runtime": 333.2769, |
|
"eval_samples_per_second": 900.152, |
|
"eval_steps_per_second": 56.26, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_loss": 2.264688014984131, |
|
"eval_runtime": 335.8312, |
|
"eval_samples_per_second": 893.306, |
|
"eval_steps_per_second": 55.832, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.4052, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 2.265596866607666, |
|
"eval_runtime": 333.6068, |
|
"eval_samples_per_second": 899.262, |
|
"eval_steps_per_second": 56.204, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_loss": 2.267854690551758, |
|
"eval_runtime": 333.2939, |
|
"eval_samples_per_second": 900.107, |
|
"eval_steps_per_second": 56.257, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.401, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_loss": 2.268515110015869, |
|
"eval_runtime": 332.5102, |
|
"eval_samples_per_second": 902.228, |
|
"eval_steps_per_second": 56.389, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 2.26540207862854, |
|
"eval_runtime": 332.9978, |
|
"eval_samples_per_second": 900.907, |
|
"eval_steps_per_second": 56.307, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.4012, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_loss": 2.260671854019165, |
|
"eval_runtime": 333.82, |
|
"eval_samples_per_second": 898.688, |
|
"eval_steps_per_second": 56.168, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_loss": 2.2668306827545166, |
|
"eval_runtime": 334.7781, |
|
"eval_samples_per_second": 896.116, |
|
"eval_steps_per_second": 56.007, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.4015, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 2.267199754714966, |
|
"eval_runtime": 333.9129, |
|
"eval_samples_per_second": 898.438, |
|
"eval_steps_per_second": 56.152, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"eval_loss": 2.268502712249756, |
|
"eval_runtime": 334.246, |
|
"eval_samples_per_second": 897.542, |
|
"eval_steps_per_second": 56.096, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.4039, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_loss": 2.267529010772705, |
|
"eval_runtime": 333.8135, |
|
"eval_samples_per_second": 898.705, |
|
"eval_steps_per_second": 56.169, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 2.2702226638793945, |
|
"eval_runtime": 336.4463, |
|
"eval_samples_per_second": 891.673, |
|
"eval_steps_per_second": 55.73, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.3927, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 2.268892526626587, |
|
"eval_runtime": 334.6454, |
|
"eval_samples_per_second": 896.471, |
|
"eval_steps_per_second": 56.029, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_loss": 2.2673678398132324, |
|
"eval_runtime": 334.3792, |
|
"eval_samples_per_second": 897.185, |
|
"eval_steps_per_second": 56.074, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.3998, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 2.2693703174591064, |
|
"eval_runtime": 336.7748, |
|
"eval_samples_per_second": 890.803, |
|
"eval_steps_per_second": 55.675, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 2.264862298965454, |
|
"eval_runtime": 336.6189, |
|
"eval_samples_per_second": 891.216, |
|
"eval_steps_per_second": 55.701, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.404, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_loss": 2.263476848602295, |
|
"eval_runtime": 333.0441, |
|
"eval_samples_per_second": 900.782, |
|
"eval_steps_per_second": 56.299, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 2.2680845260620117, |
|
"eval_runtime": 333.2221, |
|
"eval_samples_per_second": 900.301, |
|
"eval_steps_per_second": 56.269, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.4023, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"eval_loss": 2.260050058364868, |
|
"eval_runtime": 333.6835, |
|
"eval_samples_per_second": 899.056, |
|
"eval_steps_per_second": 56.191, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_loss": 2.2660913467407227, |
|
"eval_runtime": 334.5678, |
|
"eval_samples_per_second": 896.679, |
|
"eval_steps_per_second": 56.042, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.393, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 2.261288642883301, |
|
"eval_runtime": 334.5524, |
|
"eval_samples_per_second": 896.721, |
|
"eval_steps_per_second": 56.045, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 2.271660327911377, |
|
"eval_runtime": 334.4275, |
|
"eval_samples_per_second": 897.055, |
|
"eval_steps_per_second": 56.066, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.402, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 2.2671592235565186, |
|
"eval_runtime": 333.6753, |
|
"eval_samples_per_second": 899.078, |
|
"eval_steps_per_second": 56.192, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 2.263709545135498, |
|
"eval_runtime": 333.67, |
|
"eval_samples_per_second": 899.092, |
|
"eval_steps_per_second": 56.193, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.4047, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.2704622745513916, |
|
"eval_runtime": 336.6456, |
|
"eval_samples_per_second": 891.145, |
|
"eval_steps_per_second": 55.697, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 2.2682485580444336, |
|
"eval_runtime": 337.2045, |
|
"eval_samples_per_second": 889.668, |
|
"eval_steps_per_second": 55.604, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.4045, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 2.2630040645599365, |
|
"eval_runtime": 335.66, |
|
"eval_samples_per_second": 893.761, |
|
"eval_steps_per_second": 55.86, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_loss": 2.269909143447876, |
|
"eval_runtime": 336.6708, |
|
"eval_samples_per_second": 891.078, |
|
"eval_steps_per_second": 55.692, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.3973, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"eval_loss": 2.2578797340393066, |
|
"eval_runtime": 335.7138, |
|
"eval_samples_per_second": 893.618, |
|
"eval_steps_per_second": 55.851, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 2.2601444721221924, |
|
"eval_runtime": 334.2559, |
|
"eval_samples_per_second": 897.516, |
|
"eval_steps_per_second": 56.095, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.399, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_loss": 2.26086688041687, |
|
"eval_runtime": 334.4066, |
|
"eval_samples_per_second": 897.112, |
|
"eval_steps_per_second": 56.069, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_loss": 2.269728660583496, |
|
"eval_runtime": 334.0805, |
|
"eval_samples_per_second": 897.987, |
|
"eval_steps_per_second": 56.124, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.399, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 2.2630419731140137, |
|
"eval_runtime": 334.5552, |
|
"eval_samples_per_second": 896.713, |
|
"eval_steps_per_second": 56.045, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 2.2658443450927734, |
|
"eval_runtime": 336.5508, |
|
"eval_samples_per_second": 891.396, |
|
"eval_steps_per_second": 55.712, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.3995, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 2.265606641769409, |
|
"eval_runtime": 335.2841, |
|
"eval_samples_per_second": 894.763, |
|
"eval_steps_per_second": 55.923, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 2.2688894271850586, |
|
"eval_runtime": 337.311, |
|
"eval_samples_per_second": 889.387, |
|
"eval_steps_per_second": 55.587, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.3929, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_loss": 2.2678134441375732, |
|
"eval_runtime": 337.3214, |
|
"eval_samples_per_second": 889.359, |
|
"eval_steps_per_second": 55.585, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_loss": 2.2694430351257324, |
|
"eval_runtime": 336.6085, |
|
"eval_samples_per_second": 891.243, |
|
"eval_steps_per_second": 55.703, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.404, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 2.2631914615631104, |
|
"eval_runtime": 337.5687, |
|
"eval_samples_per_second": 888.708, |
|
"eval_steps_per_second": 55.544, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 2.2656803131103516, |
|
"eval_runtime": 336.4606, |
|
"eval_samples_per_second": 891.635, |
|
"eval_steps_per_second": 55.727, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.3932, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_loss": 2.2641873359680176, |
|
"eval_runtime": 335.6292, |
|
"eval_samples_per_second": 893.844, |
|
"eval_steps_per_second": 55.865, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 2.260714054107666, |
|
"eval_runtime": 335.5993, |
|
"eval_samples_per_second": 893.923, |
|
"eval_steps_per_second": 55.87, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.3985, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 2.2634730339050293, |
|
"eval_runtime": 335.566, |
|
"eval_samples_per_second": 894.012, |
|
"eval_steps_per_second": 55.876, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_loss": 2.2645463943481445, |
|
"eval_runtime": 337.3641, |
|
"eval_samples_per_second": 889.247, |
|
"eval_steps_per_second": 55.578, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.3997, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 2.2654054164886475, |
|
"eval_runtime": 336.173, |
|
"eval_samples_per_second": 892.398, |
|
"eval_steps_per_second": 55.775, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"eval_loss": 2.2672231197357178, |
|
"eval_runtime": 336.1452, |
|
"eval_samples_per_second": 892.472, |
|
"eval_steps_per_second": 55.779, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.396, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_loss": 2.2665934562683105, |
|
"eval_runtime": 336.5057, |
|
"eval_samples_per_second": 891.515, |
|
"eval_steps_per_second": 55.72, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 2.2708349227905273, |
|
"eval_runtime": 335.6471, |
|
"eval_samples_per_second": 893.796, |
|
"eval_steps_per_second": 55.862, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.4012, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_loss": 2.2706656455993652, |
|
"eval_runtime": 335.6113, |
|
"eval_samples_per_second": 893.891, |
|
"eval_steps_per_second": 55.868, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_loss": 2.2683677673339844, |
|
"eval_runtime": 335.9133, |
|
"eval_samples_per_second": 893.087, |
|
"eval_steps_per_second": 55.818, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.4074, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 2.2676126956939697, |
|
"eval_runtime": 336.2793, |
|
"eval_samples_per_second": 892.116, |
|
"eval_steps_per_second": 55.757, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_loss": 2.2657711505889893, |
|
"eval_runtime": 336.5159, |
|
"eval_samples_per_second": 891.488, |
|
"eval_steps_per_second": 55.718, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.3965, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_loss": 2.2716164588928223, |
|
"eval_runtime": 335.6672, |
|
"eval_samples_per_second": 893.742, |
|
"eval_steps_per_second": 55.859, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 2.2655858993530273, |
|
"eval_runtime": 335.9521, |
|
"eval_samples_per_second": 892.984, |
|
"eval_steps_per_second": 55.812, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.4021, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"eval_loss": 2.2689690589904785, |
|
"eval_runtime": 336.4235, |
|
"eval_samples_per_second": 891.733, |
|
"eval_steps_per_second": 55.733, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"eval_loss": 2.265604257583618, |
|
"eval_runtime": 337.1771, |
|
"eval_samples_per_second": 889.74, |
|
"eval_steps_per_second": 55.609, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.3981, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 2.2659354209899902, |
|
"eval_runtime": 337.0582, |
|
"eval_samples_per_second": 890.054, |
|
"eval_steps_per_second": 55.628, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_loss": 2.2666890621185303, |
|
"eval_runtime": 336.7986, |
|
"eval_samples_per_second": 890.74, |
|
"eval_steps_per_second": 55.671, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.3974, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_loss": 2.2654528617858887, |
|
"eval_runtime": 338.6552, |
|
"eval_samples_per_second": 885.857, |
|
"eval_steps_per_second": 55.366, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 2.2675693035125732, |
|
"eval_runtime": 336.4191, |
|
"eval_samples_per_second": 891.745, |
|
"eval_steps_per_second": 55.734, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.3964, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 2.265490770339966, |
|
"eval_runtime": 338.7304, |
|
"eval_samples_per_second": 885.66, |
|
"eval_steps_per_second": 55.354, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_loss": 2.2635693550109863, |
|
"eval_runtime": 337.2341, |
|
"eval_samples_per_second": 889.59, |
|
"eval_steps_per_second": 55.599, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.3933, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 2.267894983291626, |
|
"eval_runtime": 337.1638, |
|
"eval_samples_per_second": 889.775, |
|
"eval_steps_per_second": 55.611, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_loss": 2.266650438308716, |
|
"eval_runtime": 337.1959, |
|
"eval_samples_per_second": 889.69, |
|
"eval_steps_per_second": 55.606, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.4066, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_loss": 2.264688730239868, |
|
"eval_runtime": 338.0924, |
|
"eval_samples_per_second": 887.331, |
|
"eval_steps_per_second": 55.458, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 2.265735149383545, |
|
"eval_runtime": 338.8846, |
|
"eval_samples_per_second": 885.257, |
|
"eval_steps_per_second": 55.329, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.4027, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 2.2628121376037598, |
|
"eval_runtime": 337.9881, |
|
"eval_samples_per_second": 887.605, |
|
"eval_steps_per_second": 55.475, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_loss": 2.2642323970794678, |
|
"eval_runtime": 339.1796, |
|
"eval_samples_per_second": 884.487, |
|
"eval_steps_per_second": 55.28, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.4029, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 2.2676889896392822, |
|
"eval_runtime": 338.3313, |
|
"eval_samples_per_second": 886.705, |
|
"eval_steps_per_second": 55.419, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_loss": 2.2704169750213623, |
|
"eval_runtime": 340.3735, |
|
"eval_samples_per_second": 881.385, |
|
"eval_steps_per_second": 55.087, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.3958, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_loss": 2.2650022506713867, |
|
"eval_runtime": 337.884, |
|
"eval_samples_per_second": 887.879, |
|
"eval_steps_per_second": 55.492, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.265009880065918, |
|
"eval_runtime": 339.0311, |
|
"eval_samples_per_second": 884.875, |
|
"eval_steps_per_second": 55.305, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.4054, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 2.2680423259735107, |
|
"eval_runtime": 338.3773, |
|
"eval_samples_per_second": 886.584, |
|
"eval_steps_per_second": 55.412, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_loss": 2.2601048946380615, |
|
"eval_runtime": 338.8902, |
|
"eval_samples_per_second": 885.243, |
|
"eval_steps_per_second": 55.328, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.3984, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 2.267129898071289, |
|
"eval_runtime": 341.218, |
|
"eval_samples_per_second": 879.203, |
|
"eval_steps_per_second": 54.95, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"eval_loss": 2.263897657394409, |
|
"eval_runtime": 339.0811, |
|
"eval_samples_per_second": 884.744, |
|
"eval_steps_per_second": 55.296, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.4005, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"eval_loss": 2.262948989868164, |
|
"eval_runtime": 338.4625, |
|
"eval_samples_per_second": 886.361, |
|
"eval_steps_per_second": 55.398, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 2.2656354904174805, |
|
"eval_runtime": 339.1914, |
|
"eval_samples_per_second": 884.456, |
|
"eval_steps_per_second": 55.279, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.3962, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_loss": 2.2646210193634033, |
|
"eval_runtime": 339.4764, |
|
"eval_samples_per_second": 883.714, |
|
"eval_steps_per_second": 55.232, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"eval_loss": 2.2571327686309814, |
|
"eval_runtime": 340.4494, |
|
"eval_samples_per_second": 881.188, |
|
"eval_steps_per_second": 55.074, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.4033, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_loss": 2.2689077854156494, |
|
"eval_runtime": 339.6348, |
|
"eval_samples_per_second": 883.302, |
|
"eval_steps_per_second": 55.206, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_loss": 2.263167381286621, |
|
"eval_runtime": 340.3091, |
|
"eval_samples_per_second": 881.552, |
|
"eval_steps_per_second": 55.097, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.4064, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"eval_loss": 2.2632765769958496, |
|
"eval_runtime": 342.5582, |
|
"eval_samples_per_second": 875.764, |
|
"eval_steps_per_second": 54.735, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 2.2693655490875244, |
|
"eval_runtime": 342.7491, |
|
"eval_samples_per_second": 875.276, |
|
"eval_steps_per_second": 54.705, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.3967, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_loss": 2.2685184478759766, |
|
"eval_runtime": 342.158, |
|
"eval_samples_per_second": 876.788, |
|
"eval_steps_per_second": 54.799, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"eval_loss": 2.2636401653289795, |
|
"eval_runtime": 341.2652, |
|
"eval_samples_per_second": 879.082, |
|
"eval_steps_per_second": 54.943, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.4002, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 2.268721103668213, |
|
"eval_runtime": 343.2554, |
|
"eval_samples_per_second": 873.985, |
|
"eval_steps_per_second": 54.624, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 2.263157844543457, |
|
"eval_runtime": 341.2197, |
|
"eval_samples_per_second": 879.199, |
|
"eval_steps_per_second": 54.95, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.4045, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"eval_loss": 2.262470006942749, |
|
"eval_runtime": 342.6853, |
|
"eval_samples_per_second": 875.439, |
|
"eval_steps_per_second": 54.715, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 2.267735242843628, |
|
"eval_runtime": 346.6665, |
|
"eval_samples_per_second": 865.385, |
|
"eval_steps_per_second": 54.087, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.4096, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_loss": 2.256277322769165, |
|
"eval_runtime": 340.6214, |
|
"eval_samples_per_second": 880.743, |
|
"eval_steps_per_second": 55.046, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_loss": 2.264164447784424, |
|
"eval_runtime": 341.931, |
|
"eval_samples_per_second": 877.37, |
|
"eval_steps_per_second": 54.836, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.4004, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_loss": 2.269155979156494, |
|
"eval_runtime": 342.3742, |
|
"eval_samples_per_second": 876.234, |
|
"eval_steps_per_second": 54.765, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 2.2696123123168945, |
|
"eval_runtime": 345.6816, |
|
"eval_samples_per_second": 867.851, |
|
"eval_steps_per_second": 54.241, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.4065, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 2.2579238414764404, |
|
"eval_runtime": 341.8896, |
|
"eval_samples_per_second": 877.476, |
|
"eval_steps_per_second": 54.842, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 2.266026020050049, |
|
"eval_runtime": 344.4173, |
|
"eval_samples_per_second": 871.036, |
|
"eval_steps_per_second": 54.44, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.4025, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 2.2654054164886475, |
|
"eval_runtime": 342.2708, |
|
"eval_samples_per_second": 876.499, |
|
"eval_steps_per_second": 54.781, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_loss": 2.2706494331359863, |
|
"eval_runtime": 341.5445, |
|
"eval_samples_per_second": 878.363, |
|
"eval_steps_per_second": 54.898, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.3993, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 2.270448684692383, |
|
"eval_runtime": 340.9974, |
|
"eval_samples_per_second": 879.772, |
|
"eval_steps_per_second": 54.986, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 2.2663590908050537, |
|
"eval_runtime": 340.7056, |
|
"eval_samples_per_second": 880.526, |
|
"eval_steps_per_second": 55.033, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.4034, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_loss": 2.2659454345703125, |
|
"eval_runtime": 341.9489, |
|
"eval_samples_per_second": 877.324, |
|
"eval_steps_per_second": 54.833, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 2.268005609512329, |
|
"eval_runtime": 340.8655, |
|
"eval_samples_per_second": 880.113, |
|
"eval_steps_per_second": 55.007, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.4004, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": 2.2611002922058105, |
|
"eval_runtime": 340.9511, |
|
"eval_samples_per_second": 879.891, |
|
"eval_steps_per_second": 54.993, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 2.264587879180908, |
|
"eval_runtime": 342.5116, |
|
"eval_samples_per_second": 875.883, |
|
"eval_steps_per_second": 54.743, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.4025, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 2.268247604370117, |
|
"eval_runtime": 343.4269, |
|
"eval_samples_per_second": 873.548, |
|
"eval_steps_per_second": 54.597, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_loss": 2.264587640762329, |
|
"eval_runtime": 341.3392, |
|
"eval_samples_per_second": 878.891, |
|
"eval_steps_per_second": 54.931, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.4063, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"eval_loss": 2.2597994804382324, |
|
"eval_runtime": 343.1178, |
|
"eval_samples_per_second": 874.335, |
|
"eval_steps_per_second": 54.646, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 2.267334461212158, |
|
"eval_runtime": 344.4059, |
|
"eval_samples_per_second": 871.065, |
|
"eval_steps_per_second": 54.442, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.4071, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_loss": 2.264587879180908, |
|
"eval_runtime": 342.5952, |
|
"eval_samples_per_second": 875.669, |
|
"eval_steps_per_second": 54.729, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"eval_loss": 2.2672042846679688, |
|
"eval_runtime": 342.3657, |
|
"eval_samples_per_second": 876.256, |
|
"eval_steps_per_second": 54.766, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.401, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 2.2647833824157715, |
|
"eval_runtime": 343.2309, |
|
"eval_samples_per_second": 874.047, |
|
"eval_steps_per_second": 54.628, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_loss": 2.2654144763946533, |
|
"eval_runtime": 344.1951, |
|
"eval_samples_per_second": 871.599, |
|
"eval_steps_per_second": 54.475, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.402, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_loss": 2.2664010524749756, |
|
"eval_runtime": 342.7081, |
|
"eval_samples_per_second": 875.381, |
|
"eval_steps_per_second": 54.711, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.2682883739471436, |
|
"eval_runtime": 342.1336, |
|
"eval_samples_per_second": 876.851, |
|
"eval_steps_per_second": 54.803, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.4004, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"eval_loss": 2.261821985244751, |
|
"eval_runtime": 343.7815, |
|
"eval_samples_per_second": 872.647, |
|
"eval_steps_per_second": 54.54, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_loss": 2.2668938636779785, |
|
"eval_runtime": 344.1074, |
|
"eval_samples_per_second": 871.821, |
|
"eval_steps_per_second": 54.489, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.4001, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 2.2630324363708496, |
|
"eval_runtime": 341.9786, |
|
"eval_samples_per_second": 877.248, |
|
"eval_steps_per_second": 54.828, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"eval_loss": 2.2631518840789795, |
|
"eval_runtime": 341.9226, |
|
"eval_samples_per_second": 877.391, |
|
"eval_steps_per_second": 54.837, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.4046, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_loss": 2.26960825920105, |
|
"eval_runtime": 344.2789, |
|
"eval_samples_per_second": 871.387, |
|
"eval_steps_per_second": 54.462, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.2641026973724365, |
|
"eval_runtime": 343.3436, |
|
"eval_samples_per_second": 873.76, |
|
"eval_steps_per_second": 54.61, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.405, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 2.262655735015869, |
|
"eval_runtime": 344.8039, |
|
"eval_samples_per_second": 870.06, |
|
"eval_steps_per_second": 54.379, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_loss": 2.268143653869629, |
|
"eval_runtime": 343.934, |
|
"eval_samples_per_second": 872.26, |
|
"eval_steps_per_second": 54.516, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.4063, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 2.2603704929351807, |
|
"eval_runtime": 342.6448, |
|
"eval_samples_per_second": 875.542, |
|
"eval_steps_per_second": 54.721, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"eval_loss": 2.271454095840454, |
|
"eval_runtime": 343.324, |
|
"eval_samples_per_second": 873.81, |
|
"eval_steps_per_second": 54.613, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.3991, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"eval_loss": 2.268319606781006, |
|
"eval_runtime": 342.6834, |
|
"eval_samples_per_second": 875.444, |
|
"eval_steps_per_second": 54.715, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 2.265730857849121, |
|
"eval_runtime": 346.275, |
|
"eval_samples_per_second": 866.363, |
|
"eval_steps_per_second": 54.148, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.405, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_loss": 2.2645092010498047, |
|
"eval_runtime": 343.3622, |
|
"eval_samples_per_second": 873.713, |
|
"eval_steps_per_second": 54.607, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"eval_loss": 2.2676303386688232, |
|
"eval_runtime": 343.161, |
|
"eval_samples_per_second": 874.225, |
|
"eval_steps_per_second": 54.639, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.3941, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 2.270566463470459, |
|
"eval_runtime": 344.7989, |
|
"eval_samples_per_second": 870.072, |
|
"eval_steps_per_second": 54.38, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_loss": 2.259324312210083, |
|
"eval_runtime": 344.3396, |
|
"eval_samples_per_second": 871.233, |
|
"eval_steps_per_second": 54.452, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.4041, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_loss": 2.267908811569214, |
|
"eval_runtime": 344.2377, |
|
"eval_samples_per_second": 871.491, |
|
"eval_steps_per_second": 54.468, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.2643110752105713, |
|
"eval_runtime": 343.3047, |
|
"eval_samples_per_second": 873.859, |
|
"eval_steps_per_second": 54.616, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.4001, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_loss": 2.2728431224823, |
|
"eval_runtime": 343.644, |
|
"eval_samples_per_second": 872.996, |
|
"eval_steps_per_second": 54.562, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"eval_loss": 2.263103485107422, |
|
"eval_runtime": 343.0897, |
|
"eval_samples_per_second": 874.407, |
|
"eval_steps_per_second": 54.65, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.3983, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 2.263552188873291, |
|
"eval_runtime": 344.7078, |
|
"eval_samples_per_second": 870.302, |
|
"eval_steps_per_second": 54.394, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"eval_loss": 2.262969732284546, |
|
"eval_runtime": 343.3199, |
|
"eval_samples_per_second": 873.821, |
|
"eval_steps_per_second": 54.614, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.4003, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_loss": 2.2662770748138428, |
|
"eval_runtime": 344.4313, |
|
"eval_samples_per_second": 871.001, |
|
"eval_steps_per_second": 54.438, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_loss": 2.264718770980835, |
|
"eval_runtime": 344.3318, |
|
"eval_samples_per_second": 871.253, |
|
"eval_steps_per_second": 54.453, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.3981, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 2.2669222354888916, |
|
"eval_runtime": 344.4268, |
|
"eval_samples_per_second": 871.012, |
|
"eval_steps_per_second": 54.438, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_loss": 2.266000509262085, |
|
"eval_runtime": 344.4815, |
|
"eval_samples_per_second": 870.874, |
|
"eval_steps_per_second": 54.43, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.3951, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 2.2692267894744873, |
|
"eval_runtime": 344.0579, |
|
"eval_samples_per_second": 871.946, |
|
"eval_steps_per_second": 54.497, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"eval_loss": 2.264406442642212, |
|
"eval_runtime": 344.7783, |
|
"eval_samples_per_second": 870.124, |
|
"eval_steps_per_second": 54.383, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.4013, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 2.2610132694244385, |
|
"eval_runtime": 344.5393, |
|
"eval_samples_per_second": 870.728, |
|
"eval_steps_per_second": 54.42, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 2.26550555229187, |
|
"eval_runtime": 344.5292, |
|
"eval_samples_per_second": 870.754, |
|
"eval_steps_per_second": 54.422, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.4, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_loss": 2.25915789604187, |
|
"eval_runtime": 344.8958, |
|
"eval_samples_per_second": 869.828, |
|
"eval_steps_per_second": 54.364, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"eval_loss": 2.266591787338257, |
|
"eval_runtime": 344.5939, |
|
"eval_samples_per_second": 870.59, |
|
"eval_steps_per_second": 54.412, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.0, |
|
"loss": 2.3975, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 2.2684991359710693, |
|
"eval_runtime": 344.5329, |
|
"eval_samples_per_second": 870.744, |
|
"eval_steps_per_second": 54.422, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"step": 2400000, |
|
"total_flos": 8.367702695823237e+17, |
|
"train_loss": 2.4076748518880207, |
|
"train_runtime": 247856.7094, |
|
"train_samples_per_second": 154.928, |
|
"train_steps_per_second": 9.683 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 7, |
|
"save_steps": 32000, |
|
"total_flos": 8.367702695823237e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|