|
{ |
|
"best_metric": 2.4417428970336914, |
|
"best_model_checkpoint": "./model_tweets_2020_Q1_50/checkpoint-1888000", |
|
"epoch": 9.834292176820574, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.89373779296875, |
|
"eval_runtime": 220.7068, |
|
"eval_samples_per_second": 931.145, |
|
"eval_steps_per_second": 58.199, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.0726666666666665e-07, |
|
"loss": 3.073, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.76598858833313, |
|
"eval_runtime": 221.0774, |
|
"eval_samples_per_second": 929.584, |
|
"eval_steps_per_second": 58.102, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.7232513427734375, |
|
"eval_runtime": 221.6288, |
|
"eval_samples_per_second": 927.271, |
|
"eval_steps_per_second": 57.957, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.0453333333333336e-07, |
|
"loss": 2.8244, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.687758207321167, |
|
"eval_runtime": 220.1262, |
|
"eval_samples_per_second": 933.601, |
|
"eval_steps_per_second": 58.353, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.6519503593444824, |
|
"eval_runtime": 220.495, |
|
"eval_samples_per_second": 932.039, |
|
"eval_steps_per_second": 58.255, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.018e-07, |
|
"loss": 2.7542, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.63004469871521, |
|
"eval_runtime": 220.2803, |
|
"eval_samples_per_second": 932.948, |
|
"eval_steps_per_second": 58.312, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.613522529602051, |
|
"eval_runtime": 221.2317, |
|
"eval_samples_per_second": 928.936, |
|
"eval_steps_per_second": 58.061, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.9906666666666667e-07, |
|
"loss": 2.7083, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 2.6067709922790527, |
|
"eval_runtime": 220.3177, |
|
"eval_samples_per_second": 932.789, |
|
"eval_steps_per_second": 58.302, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 2.5854294300079346, |
|
"eval_runtime": 220.8061, |
|
"eval_samples_per_second": 930.726, |
|
"eval_steps_per_second": 58.173, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.963333333333333e-07, |
|
"loss": 2.6752, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.575528860092163, |
|
"eval_runtime": 221.8521, |
|
"eval_samples_per_second": 926.338, |
|
"eval_steps_per_second": 57.899, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.5720720291137695, |
|
"eval_runtime": 221.4472, |
|
"eval_samples_per_second": 928.032, |
|
"eval_steps_per_second": 58.005, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.936e-07, |
|
"loss": 2.6657, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 2.5709290504455566, |
|
"eval_runtime": 220.9006, |
|
"eval_samples_per_second": 930.328, |
|
"eval_steps_per_second": 58.148, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 2.5656096935272217, |
|
"eval_runtime": 220.5433, |
|
"eval_samples_per_second": 931.835, |
|
"eval_steps_per_second": 58.243, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.908666666666667e-07, |
|
"loss": 2.6534, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 2.5558407306671143, |
|
"eval_runtime": 221.6371, |
|
"eval_samples_per_second": 927.236, |
|
"eval_steps_per_second": 57.955, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 2.5495829582214355, |
|
"eval_runtime": 220.7733, |
|
"eval_samples_per_second": 930.864, |
|
"eval_steps_per_second": 58.182, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 3.8813333333333334e-07, |
|
"loss": 2.646, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.547106981277466, |
|
"eval_runtime": 221.6448, |
|
"eval_samples_per_second": 927.204, |
|
"eval_steps_per_second": 57.953, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 2.5408244132995605, |
|
"eval_runtime": 221.6302, |
|
"eval_samples_per_second": 927.265, |
|
"eval_steps_per_second": 57.957, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.854e-07, |
|
"loss": 2.625, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.531517744064331, |
|
"eval_runtime": 223.6683, |
|
"eval_samples_per_second": 918.816, |
|
"eval_steps_per_second": 57.429, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 2.5364675521850586, |
|
"eval_runtime": 224.1465, |
|
"eval_samples_per_second": 916.856, |
|
"eval_steps_per_second": 57.306, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.8266666666666665e-07, |
|
"loss": 2.6222, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 2.5372273921966553, |
|
"eval_runtime": 221.7325, |
|
"eval_samples_per_second": 926.837, |
|
"eval_steps_per_second": 57.93, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 2.534186363220215, |
|
"eval_runtime": 222.3525, |
|
"eval_samples_per_second": 924.253, |
|
"eval_steps_per_second": 57.769, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.799333333333333e-07, |
|
"loss": 2.6256, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.5308265686035156, |
|
"eval_runtime": 221.538, |
|
"eval_samples_per_second": 927.651, |
|
"eval_steps_per_second": 57.981, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 2.5311617851257324, |
|
"eval_runtime": 224.2919, |
|
"eval_samples_per_second": 916.261, |
|
"eval_steps_per_second": 57.269, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.772e-07, |
|
"loss": 2.6074, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 2.522848129272461, |
|
"eval_runtime": 224.4507, |
|
"eval_samples_per_second": 915.613, |
|
"eval_steps_per_second": 57.229, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_loss": 2.529161214828491, |
|
"eval_runtime": 222.7477, |
|
"eval_samples_per_second": 922.613, |
|
"eval_steps_per_second": 57.666, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.7446666666666667e-07, |
|
"loss": 2.6071, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 2.5295047760009766, |
|
"eval_runtime": 223.0891, |
|
"eval_samples_per_second": 921.201, |
|
"eval_steps_per_second": 57.578, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 2.523491621017456, |
|
"eval_runtime": 221.4007, |
|
"eval_samples_per_second": 928.227, |
|
"eval_steps_per_second": 58.017, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.7173333333333333e-07, |
|
"loss": 2.5955, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 2.5219199657440186, |
|
"eval_runtime": 221.3605, |
|
"eval_samples_per_second": 928.395, |
|
"eval_steps_per_second": 58.028, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 2.5190882682800293, |
|
"eval_runtime": 221.1449, |
|
"eval_samples_per_second": 929.3, |
|
"eval_steps_per_second": 58.084, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.69e-07, |
|
"loss": 2.6036, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.517120361328125, |
|
"eval_runtime": 220.9198, |
|
"eval_samples_per_second": 930.247, |
|
"eval_steps_per_second": 58.143, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 2.5102434158325195, |
|
"eval_runtime": 221.7647, |
|
"eval_samples_per_second": 926.703, |
|
"eval_steps_per_second": 57.922, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.6626666666666664e-07, |
|
"loss": 2.6046, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 2.5070137977600098, |
|
"eval_runtime": 221.6584, |
|
"eval_samples_per_second": 927.147, |
|
"eval_steps_per_second": 57.95, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.5109376907348633, |
|
"eval_runtime": 221.3382, |
|
"eval_samples_per_second": 928.489, |
|
"eval_steps_per_second": 58.033, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.6353333333333335e-07, |
|
"loss": 2.5892, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 2.5104565620422363, |
|
"eval_runtime": 222.1683, |
|
"eval_samples_per_second": 925.019, |
|
"eval_steps_per_second": 57.817, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.508704423904419, |
|
"eval_runtime": 222.9629, |
|
"eval_samples_per_second": 921.723, |
|
"eval_steps_per_second": 57.61, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.608e-07, |
|
"loss": 2.5929, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.509392738342285, |
|
"eval_runtime": 223.3494, |
|
"eval_samples_per_second": 920.128, |
|
"eval_steps_per_second": 57.511, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.508585214614868, |
|
"eval_runtime": 222.7314, |
|
"eval_samples_per_second": 922.681, |
|
"eval_steps_per_second": 57.67, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.5806666666666666e-07, |
|
"loss": 2.5857, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 2.4991345405578613, |
|
"eval_runtime": 223.3332, |
|
"eval_samples_per_second": 920.195, |
|
"eval_steps_per_second": 57.515, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.508927822113037, |
|
"eval_runtime": 224.1404, |
|
"eval_samples_per_second": 916.881, |
|
"eval_steps_per_second": 57.308, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.553333333333333e-07, |
|
"loss": 2.5828, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.501734972000122, |
|
"eval_runtime": 223.1146, |
|
"eval_samples_per_second": 921.096, |
|
"eval_steps_per_second": 57.571, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 2.503918409347534, |
|
"eval_runtime": 223.3327, |
|
"eval_samples_per_second": 920.196, |
|
"eval_steps_per_second": 57.515, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.5259999999999997e-07, |
|
"loss": 2.5812, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.5064587593078613, |
|
"eval_runtime": 224.1587, |
|
"eval_samples_per_second": 916.806, |
|
"eval_steps_per_second": 57.303, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.508263111114502, |
|
"eval_runtime": 222.503, |
|
"eval_samples_per_second": 923.628, |
|
"eval_steps_per_second": 57.73, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.498666666666667e-07, |
|
"loss": 2.5775, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 2.509936571121216, |
|
"eval_runtime": 223.039, |
|
"eval_samples_per_second": 921.408, |
|
"eval_steps_per_second": 57.591, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.5078811645507812, |
|
"eval_runtime": 221.7646, |
|
"eval_samples_per_second": 926.703, |
|
"eval_steps_per_second": 57.922, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.4713333333333333e-07, |
|
"loss": 2.5711, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 2.4922046661376953, |
|
"eval_runtime": 223.0544, |
|
"eval_samples_per_second": 921.345, |
|
"eval_steps_per_second": 57.587, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 2.5012030601501465, |
|
"eval_runtime": 222.0392, |
|
"eval_samples_per_second": 925.557, |
|
"eval_steps_per_second": 57.85, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.444e-07, |
|
"loss": 2.5797, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 2.49989914894104, |
|
"eval_runtime": 223.8829, |
|
"eval_samples_per_second": 917.935, |
|
"eval_steps_per_second": 57.374, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 2.4881107807159424, |
|
"eval_runtime": 222.4413, |
|
"eval_samples_per_second": 923.884, |
|
"eval_steps_per_second": 57.746, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.416666666666667e-07, |
|
"loss": 2.5718, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 2.4960451126098633, |
|
"eval_runtime": 222.8741, |
|
"eval_samples_per_second": 922.09, |
|
"eval_steps_per_second": 57.633, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 2.490837574005127, |
|
"eval_runtime": 222.3679, |
|
"eval_samples_per_second": 924.189, |
|
"eval_steps_per_second": 57.765, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.3893333333333335e-07, |
|
"loss": 2.5627, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 2.4970648288726807, |
|
"eval_runtime": 223.472, |
|
"eval_samples_per_second": 919.623, |
|
"eval_steps_per_second": 57.479, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 2.4916465282440186, |
|
"eval_runtime": 222.5109, |
|
"eval_samples_per_second": 923.595, |
|
"eval_steps_per_second": 57.728, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.3619999999999995e-07, |
|
"loss": 2.5641, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 2.4971389770507812, |
|
"eval_runtime": 222.1533, |
|
"eval_samples_per_second": 925.082, |
|
"eval_steps_per_second": 57.82, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 2.495426654815674, |
|
"eval_runtime": 223.2728, |
|
"eval_samples_per_second": 920.444, |
|
"eval_steps_per_second": 57.531, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.3346666666666666e-07, |
|
"loss": 2.5633, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 2.485994815826416, |
|
"eval_runtime": 222.7264, |
|
"eval_samples_per_second": 922.702, |
|
"eval_steps_per_second": 57.672, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 2.4893651008605957, |
|
"eval_runtime": 223.4251, |
|
"eval_samples_per_second": 919.816, |
|
"eval_steps_per_second": 57.491, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.307333333333333e-07, |
|
"loss": 2.5676, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 2.489337205886841, |
|
"eval_runtime": 222.9423, |
|
"eval_samples_per_second": 921.808, |
|
"eval_steps_per_second": 57.616, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 2.4883553981781006, |
|
"eval_runtime": 223.2404, |
|
"eval_samples_per_second": 920.577, |
|
"eval_steps_per_second": 57.539, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.28e-07, |
|
"loss": 2.5687, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 2.4921038150787354, |
|
"eval_runtime": 223.8809, |
|
"eval_samples_per_second": 917.943, |
|
"eval_steps_per_second": 57.374, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.4873294830322266, |
|
"eval_runtime": 222.8771, |
|
"eval_samples_per_second": 922.078, |
|
"eval_steps_per_second": 57.633, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.252666666666667e-07, |
|
"loss": 2.5633, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 2.4919497966766357, |
|
"eval_runtime": 222.6439, |
|
"eval_samples_per_second": 923.043, |
|
"eval_steps_per_second": 57.693, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 2.482137441635132, |
|
"eval_runtime": 222.747, |
|
"eval_samples_per_second": 922.616, |
|
"eval_steps_per_second": 57.666, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.2253333333333334e-07, |
|
"loss": 2.5547, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 2.490872621536255, |
|
"eval_runtime": 222.6765, |
|
"eval_samples_per_second": 922.908, |
|
"eval_steps_per_second": 57.685, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 2.4818356037139893, |
|
"eval_runtime": 223.7166, |
|
"eval_samples_per_second": 918.617, |
|
"eval_steps_per_second": 57.416, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.198e-07, |
|
"loss": 2.5617, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.4854869842529297, |
|
"eval_runtime": 223.7715, |
|
"eval_samples_per_second": 918.392, |
|
"eval_steps_per_second": 57.402, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 2.48504638671875, |
|
"eval_runtime": 223.6654, |
|
"eval_samples_per_second": 918.828, |
|
"eval_steps_per_second": 57.43, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.1706666666666665e-07, |
|
"loss": 2.5569, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 2.480282783508301, |
|
"eval_runtime": 222.7744, |
|
"eval_samples_per_second": 922.503, |
|
"eval_steps_per_second": 57.659, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.4775896072387695, |
|
"eval_runtime": 223.0018, |
|
"eval_samples_per_second": 921.562, |
|
"eval_steps_per_second": 57.6, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.1433333333333336e-07, |
|
"loss": 2.5535, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.4824471473693848, |
|
"eval_runtime": 223.1733, |
|
"eval_samples_per_second": 920.854, |
|
"eval_steps_per_second": 57.556, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 2.4821510314941406, |
|
"eval_runtime": 224.1586, |
|
"eval_samples_per_second": 916.806, |
|
"eval_steps_per_second": 57.303, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.116e-07, |
|
"loss": 2.5534, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.476337432861328, |
|
"eval_runtime": 223.4733, |
|
"eval_samples_per_second": 919.618, |
|
"eval_steps_per_second": 57.479, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 2.47969388961792, |
|
"eval_runtime": 224.2217, |
|
"eval_samples_per_second": 916.548, |
|
"eval_steps_per_second": 57.287, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.0886666666666667e-07, |
|
"loss": 2.5583, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.4872305393218994, |
|
"eval_runtime": 224.237, |
|
"eval_samples_per_second": 916.486, |
|
"eval_steps_per_second": 57.283, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 2.4812192916870117, |
|
"eval_runtime": 222.6272, |
|
"eval_samples_per_second": 923.113, |
|
"eval_steps_per_second": 57.697, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.061333333333333e-07, |
|
"loss": 2.5545, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.474827527999878, |
|
"eval_runtime": 223.4042, |
|
"eval_samples_per_second": 919.902, |
|
"eval_steps_per_second": 57.497, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 2.4735865592956543, |
|
"eval_runtime": 224.1504, |
|
"eval_samples_per_second": 916.84, |
|
"eval_steps_per_second": 57.305, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.034e-07, |
|
"loss": 2.5561, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 2.4714128971099854, |
|
"eval_runtime": 223.2085, |
|
"eval_samples_per_second": 920.709, |
|
"eval_steps_per_second": 57.547, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 2.485759973526001, |
|
"eval_runtime": 222.8361, |
|
"eval_samples_per_second": 922.247, |
|
"eval_steps_per_second": 57.643, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.0066666666666663e-07, |
|
"loss": 2.5384, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 2.482938289642334, |
|
"eval_runtime": 223.4494, |
|
"eval_samples_per_second": 919.716, |
|
"eval_steps_per_second": 57.485, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 2.47662091255188, |
|
"eval_runtime": 222.9171, |
|
"eval_samples_per_second": 921.912, |
|
"eval_steps_per_second": 57.622, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.9793333333333334e-07, |
|
"loss": 2.541, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.4835963249206543, |
|
"eval_runtime": 223.4062, |
|
"eval_samples_per_second": 919.894, |
|
"eval_steps_per_second": 57.496, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 2.465118408203125, |
|
"eval_runtime": 226.1239, |
|
"eval_samples_per_second": 908.838, |
|
"eval_steps_per_second": 56.805, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.952e-07, |
|
"loss": 2.5439, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 2.4797005653381348, |
|
"eval_runtime": 224.1173, |
|
"eval_samples_per_second": 916.975, |
|
"eval_steps_per_second": 57.314, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 2.4702000617980957, |
|
"eval_runtime": 223.8532, |
|
"eval_samples_per_second": 918.057, |
|
"eval_steps_per_second": 57.381, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.9246666666666665e-07, |
|
"loss": 2.5597, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 2.475144386291504, |
|
"eval_runtime": 223.4589, |
|
"eval_samples_per_second": 919.677, |
|
"eval_steps_per_second": 57.483, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 2.474367618560791, |
|
"eval_runtime": 222.9092, |
|
"eval_samples_per_second": 921.945, |
|
"eval_steps_per_second": 57.624, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.897333333333333e-07, |
|
"loss": 2.5491, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 2.4756221771240234, |
|
"eval_runtime": 223.5443, |
|
"eval_samples_per_second": 919.325, |
|
"eval_steps_per_second": 57.461, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 2.4731247425079346, |
|
"eval_runtime": 223.5397, |
|
"eval_samples_per_second": 919.345, |
|
"eval_steps_per_second": 57.462, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 2.8699999999999996e-07, |
|
"loss": 2.5505, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 2.475615978240967, |
|
"eval_runtime": 223.941, |
|
"eval_samples_per_second": 917.697, |
|
"eval_steps_per_second": 57.359, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 2.4703986644744873, |
|
"eval_runtime": 224.1288, |
|
"eval_samples_per_second": 916.928, |
|
"eval_steps_per_second": 57.311, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 2.8426666666666667e-07, |
|
"loss": 2.5432, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_loss": 2.4762611389160156, |
|
"eval_runtime": 223.7009, |
|
"eval_samples_per_second": 918.682, |
|
"eval_steps_per_second": 57.42, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 2.4743261337280273, |
|
"eval_runtime": 224.407, |
|
"eval_samples_per_second": 915.791, |
|
"eval_steps_per_second": 57.24, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.815333333333333e-07, |
|
"loss": 2.5485, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 2.4626660346984863, |
|
"eval_runtime": 224.0612, |
|
"eval_samples_per_second": 917.205, |
|
"eval_steps_per_second": 57.328, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"eval_loss": 2.471444606781006, |
|
"eval_runtime": 223.4318, |
|
"eval_samples_per_second": 919.788, |
|
"eval_steps_per_second": 57.49, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 2.7880000000000003e-07, |
|
"loss": 2.5482, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 2.4684672355651855, |
|
"eval_runtime": 224.1026, |
|
"eval_samples_per_second": 917.035, |
|
"eval_steps_per_second": 57.317, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 2.4672694206237793, |
|
"eval_runtime": 224.9545, |
|
"eval_samples_per_second": 913.562, |
|
"eval_steps_per_second": 57.1, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 2.7606666666666664e-07, |
|
"loss": 2.5411, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_loss": 2.4726006984710693, |
|
"eval_runtime": 224.2901, |
|
"eval_samples_per_second": 916.269, |
|
"eval_steps_per_second": 57.27, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_loss": 2.476133108139038, |
|
"eval_runtime": 224.2831, |
|
"eval_samples_per_second": 916.297, |
|
"eval_steps_per_second": 57.271, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.733333333333333e-07, |
|
"loss": 2.5407, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 2.4611737728118896, |
|
"eval_runtime": 223.8119, |
|
"eval_samples_per_second": 918.226, |
|
"eval_steps_per_second": 57.392, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_loss": 2.4742894172668457, |
|
"eval_runtime": 224.8606, |
|
"eval_samples_per_second": 913.944, |
|
"eval_steps_per_second": 57.124, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 2.706e-07, |
|
"loss": 2.5307, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"eval_loss": 2.469853401184082, |
|
"eval_runtime": 224.177, |
|
"eval_samples_per_second": 916.731, |
|
"eval_steps_per_second": 57.298, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 2.4721498489379883, |
|
"eval_runtime": 223.5611, |
|
"eval_samples_per_second": 919.256, |
|
"eval_steps_per_second": 57.456, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 2.6786666666666666e-07, |
|
"loss": 2.5391, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"eval_loss": 2.461381435394287, |
|
"eval_runtime": 224.3794, |
|
"eval_samples_per_second": 915.904, |
|
"eval_steps_per_second": 57.247, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 2.4641225337982178, |
|
"eval_runtime": 224.9887, |
|
"eval_samples_per_second": 913.423, |
|
"eval_steps_per_second": 57.092, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.651333333333333e-07, |
|
"loss": 2.5378, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 2.4652435779571533, |
|
"eval_runtime": 225.299, |
|
"eval_samples_per_second": 912.166, |
|
"eval_steps_per_second": 57.013, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 2.4640512466430664, |
|
"eval_runtime": 224.7841, |
|
"eval_samples_per_second": 914.255, |
|
"eval_steps_per_second": 57.144, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.624e-07, |
|
"loss": 2.5399, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_loss": 2.469067096710205, |
|
"eval_runtime": 225.0567, |
|
"eval_samples_per_second": 913.148, |
|
"eval_steps_per_second": 57.075, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 2.4611856937408447, |
|
"eval_runtime": 224.6227, |
|
"eval_samples_per_second": 914.912, |
|
"eval_steps_per_second": 57.185, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.596666666666667e-07, |
|
"loss": 2.5412, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 2.469621419906616, |
|
"eval_runtime": 225.3239, |
|
"eval_samples_per_second": 912.065, |
|
"eval_steps_per_second": 57.007, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 2.4638073444366455, |
|
"eval_runtime": 224.7878, |
|
"eval_samples_per_second": 914.24, |
|
"eval_steps_per_second": 57.143, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 2.5693333333333333e-07, |
|
"loss": 2.5389, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 2.4658303260803223, |
|
"eval_runtime": 224.4987, |
|
"eval_samples_per_second": 915.417, |
|
"eval_steps_per_second": 57.216, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 2.4725189208984375, |
|
"eval_runtime": 226.711, |
|
"eval_samples_per_second": 906.484, |
|
"eval_steps_per_second": 56.658, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.542e-07, |
|
"loss": 2.5325, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 2.46415114402771, |
|
"eval_runtime": 225.3655, |
|
"eval_samples_per_second": 911.896, |
|
"eval_steps_per_second": 56.996, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 2.4599404335021973, |
|
"eval_runtime": 224.7087, |
|
"eval_samples_per_second": 914.562, |
|
"eval_steps_per_second": 57.163, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 2.5146666666666664e-07, |
|
"loss": 2.5351, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 2.4616599082946777, |
|
"eval_runtime": 226.6966, |
|
"eval_samples_per_second": 906.542, |
|
"eval_steps_per_second": 56.662, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 2.464627265930176, |
|
"eval_runtime": 224.9933, |
|
"eval_samples_per_second": 913.405, |
|
"eval_steps_per_second": 57.091, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.4873333333333335e-07, |
|
"loss": 2.522, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 2.4665021896362305, |
|
"eval_runtime": 225.0978, |
|
"eval_samples_per_second": 912.981, |
|
"eval_steps_per_second": 57.064, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_loss": 2.4761972427368164, |
|
"eval_runtime": 224.2641, |
|
"eval_samples_per_second": 916.375, |
|
"eval_steps_per_second": 57.276, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 2.46e-07, |
|
"loss": 2.5331, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 2.4668779373168945, |
|
"eval_runtime": 225.069, |
|
"eval_samples_per_second": 913.098, |
|
"eval_steps_per_second": 57.071, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 2.4549825191497803, |
|
"eval_runtime": 224.677, |
|
"eval_samples_per_second": 914.691, |
|
"eval_steps_per_second": 57.171, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.4326666666666666e-07, |
|
"loss": 2.5276, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.466226577758789, |
|
"eval_runtime": 224.6009, |
|
"eval_samples_per_second": 915.001, |
|
"eval_steps_per_second": 57.19, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_loss": 2.464536190032959, |
|
"eval_runtime": 224.76, |
|
"eval_samples_per_second": 914.353, |
|
"eval_steps_per_second": 57.15, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.405333333333333e-07, |
|
"loss": 2.5206, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_loss": 2.4586591720581055, |
|
"eval_runtime": 225.6548, |
|
"eval_samples_per_second": 910.727, |
|
"eval_steps_per_second": 56.923, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 2.47253680229187, |
|
"eval_runtime": 225.9081, |
|
"eval_samples_per_second": 909.706, |
|
"eval_steps_per_second": 56.859, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.3779999999999997e-07, |
|
"loss": 2.5294, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_loss": 2.458824634552002, |
|
"eval_runtime": 224.8489, |
|
"eval_samples_per_second": 913.991, |
|
"eval_steps_per_second": 57.127, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_loss": 2.4591076374053955, |
|
"eval_runtime": 225.8271, |
|
"eval_samples_per_second": 910.032, |
|
"eval_steps_per_second": 56.88, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 2.3506666666666668e-07, |
|
"loss": 2.5312, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 2.4680891036987305, |
|
"eval_runtime": 226.319, |
|
"eval_samples_per_second": 908.055, |
|
"eval_steps_per_second": 56.756, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_loss": 2.4624712467193604, |
|
"eval_runtime": 226.6472, |
|
"eval_samples_per_second": 906.74, |
|
"eval_steps_per_second": 56.674, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.3233333333333334e-07, |
|
"loss": 2.525, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"eval_loss": 2.4659371376037598, |
|
"eval_runtime": 225.5287, |
|
"eval_samples_per_second": 911.237, |
|
"eval_steps_per_second": 56.955, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_loss": 2.460909843444824, |
|
"eval_runtime": 225.5374, |
|
"eval_samples_per_second": 911.201, |
|
"eval_steps_per_second": 56.953, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.2960000000000002e-07, |
|
"loss": 2.5318, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 2.4571011066436768, |
|
"eval_runtime": 225.1138, |
|
"eval_samples_per_second": 912.916, |
|
"eval_steps_per_second": 57.06, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_loss": 2.4581968784332275, |
|
"eval_runtime": 226.7154, |
|
"eval_samples_per_second": 906.467, |
|
"eval_steps_per_second": 56.657, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 2.2686666666666667e-07, |
|
"loss": 2.5332, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_loss": 2.456618547439575, |
|
"eval_runtime": 225.422, |
|
"eval_samples_per_second": 911.668, |
|
"eval_steps_per_second": 56.982, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_loss": 2.4587738513946533, |
|
"eval_runtime": 226.533, |
|
"eval_samples_per_second": 907.197, |
|
"eval_steps_per_second": 56.703, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 2.2413333333333333e-07, |
|
"loss": 2.5168, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"eval_loss": 2.4606146812438965, |
|
"eval_runtime": 226.9924, |
|
"eval_samples_per_second": 905.361, |
|
"eval_steps_per_second": 56.588, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"eval_loss": 2.4597506523132324, |
|
"eval_runtime": 228.0241, |
|
"eval_samples_per_second": 901.264, |
|
"eval_steps_per_second": 56.332, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 2.214e-07, |
|
"loss": 2.5181, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_loss": 2.454252004623413, |
|
"eval_runtime": 228.6537, |
|
"eval_samples_per_second": 898.783, |
|
"eval_steps_per_second": 56.177, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_loss": 2.4619953632354736, |
|
"eval_runtime": 226.293, |
|
"eval_samples_per_second": 908.159, |
|
"eval_steps_per_second": 56.763, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 2.1866666666666667e-07, |
|
"loss": 2.5246, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_loss": 2.4638657569885254, |
|
"eval_runtime": 228.163, |
|
"eval_samples_per_second": 900.716, |
|
"eval_steps_per_second": 56.297, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"eval_loss": 2.4556171894073486, |
|
"eval_runtime": 228.3656, |
|
"eval_samples_per_second": 899.917, |
|
"eval_steps_per_second": 56.248, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.1593333333333332e-07, |
|
"loss": 2.5318, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 2.457075595855713, |
|
"eval_runtime": 227.349, |
|
"eval_samples_per_second": 903.941, |
|
"eval_steps_per_second": 56.499, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"eval_loss": 2.4636013507843018, |
|
"eval_runtime": 228.1517, |
|
"eval_samples_per_second": 900.76, |
|
"eval_steps_per_second": 56.3, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.132e-07, |
|
"loss": 2.512, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 2.4567556381225586, |
|
"eval_runtime": 228.1066, |
|
"eval_samples_per_second": 900.938, |
|
"eval_steps_per_second": 56.311, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 2.4644010066986084, |
|
"eval_runtime": 228.916, |
|
"eval_samples_per_second": 897.753, |
|
"eval_steps_per_second": 56.112, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.1046666666666666e-07, |
|
"loss": 2.5174, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"eval_loss": 2.4528720378875732, |
|
"eval_runtime": 228.2634, |
|
"eval_samples_per_second": 900.32, |
|
"eval_steps_per_second": 56.273, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_loss": 2.4613921642303467, |
|
"eval_runtime": 228.3765, |
|
"eval_samples_per_second": 899.874, |
|
"eval_steps_per_second": 56.245, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.0773333333333334e-07, |
|
"loss": 2.5196, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 2.463758707046509, |
|
"eval_runtime": 227.0091, |
|
"eval_samples_per_second": 905.294, |
|
"eval_steps_per_second": 56.584, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 2.453406572341919, |
|
"eval_runtime": 227.3538, |
|
"eval_samples_per_second": 903.921, |
|
"eval_steps_per_second": 56.498, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.05e-07, |
|
"loss": 2.5248, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 2.4553115367889404, |
|
"eval_runtime": 227.0142, |
|
"eval_samples_per_second": 905.274, |
|
"eval_steps_per_second": 56.582, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 2.453683853149414, |
|
"eval_runtime": 226.8684, |
|
"eval_samples_per_second": 905.855, |
|
"eval_steps_per_second": 56.619, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.0226666666666668e-07, |
|
"loss": 2.5201, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_loss": 2.4578709602355957, |
|
"eval_runtime": 226.9695, |
|
"eval_samples_per_second": 905.452, |
|
"eval_steps_per_second": 56.594, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 2.4524765014648438, |
|
"eval_runtime": 226.5657, |
|
"eval_samples_per_second": 907.066, |
|
"eval_steps_per_second": 56.694, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 1.9953333333333333e-07, |
|
"loss": 2.5164, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.4645235538482666, |
|
"eval_runtime": 227.3689, |
|
"eval_samples_per_second": 903.861, |
|
"eval_steps_per_second": 56.494, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 2.447993040084839, |
|
"eval_runtime": 228.8072, |
|
"eval_samples_per_second": 898.18, |
|
"eval_steps_per_second": 56.139, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1.968e-07, |
|
"loss": 2.5186, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_loss": 2.4605581760406494, |
|
"eval_runtime": 229.2086, |
|
"eval_samples_per_second": 896.607, |
|
"eval_steps_per_second": 56.041, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 2.4623043537139893, |
|
"eval_runtime": 229.8264, |
|
"eval_samples_per_second": 894.197, |
|
"eval_steps_per_second": 55.89, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 1.9406666666666667e-07, |
|
"loss": 2.5123, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"eval_loss": 2.456583261489868, |
|
"eval_runtime": 230.427, |
|
"eval_samples_per_second": 891.866, |
|
"eval_steps_per_second": 55.744, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_loss": 2.464402437210083, |
|
"eval_runtime": 229.1803, |
|
"eval_samples_per_second": 896.717, |
|
"eval_steps_per_second": 56.048, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.9133333333333333e-07, |
|
"loss": 2.5233, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 2.457606792449951, |
|
"eval_runtime": 227.9824, |
|
"eval_samples_per_second": 901.429, |
|
"eval_steps_per_second": 56.342, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_loss": 2.451943874359131, |
|
"eval_runtime": 229.374, |
|
"eval_samples_per_second": 895.96, |
|
"eval_steps_per_second": 56.0, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 1.886e-07, |
|
"loss": 2.513, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"eval_loss": 2.456979513168335, |
|
"eval_runtime": 228.051, |
|
"eval_samples_per_second": 901.158, |
|
"eval_steps_per_second": 56.325, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_loss": 2.462719202041626, |
|
"eval_runtime": 228.489, |
|
"eval_samples_per_second": 899.43, |
|
"eval_steps_per_second": 56.217, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 1.8586666666666666e-07, |
|
"loss": 2.5226, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_loss": 2.449977397918701, |
|
"eval_runtime": 227.9952, |
|
"eval_samples_per_second": 901.379, |
|
"eval_steps_per_second": 56.339, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 2.4563188552856445, |
|
"eval_runtime": 227.2759, |
|
"eval_samples_per_second": 904.231, |
|
"eval_steps_per_second": 56.517, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 1.8313333333333332e-07, |
|
"loss": 2.5222, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_loss": 2.4521265029907227, |
|
"eval_runtime": 226.8418, |
|
"eval_samples_per_second": 905.962, |
|
"eval_steps_per_second": 56.625, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_loss": 2.4591453075408936, |
|
"eval_runtime": 226.8374, |
|
"eval_samples_per_second": 905.98, |
|
"eval_steps_per_second": 56.626, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 1.804e-07, |
|
"loss": 2.5191, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"eval_loss": 2.4508602619171143, |
|
"eval_runtime": 228.6931, |
|
"eval_samples_per_second": 898.628, |
|
"eval_steps_per_second": 56.167, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_loss": 2.455850124359131, |
|
"eval_runtime": 228.2295, |
|
"eval_samples_per_second": 900.453, |
|
"eval_steps_per_second": 56.281, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 1.7766666666666666e-07, |
|
"loss": 2.5243, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"eval_loss": 2.4501898288726807, |
|
"eval_runtime": 228.7596, |
|
"eval_samples_per_second": 898.367, |
|
"eval_steps_per_second": 56.151, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"eval_loss": 2.4514639377593994, |
|
"eval_runtime": 227.2719, |
|
"eval_samples_per_second": 904.247, |
|
"eval_steps_per_second": 56.518, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 1.7493333333333334e-07, |
|
"loss": 2.5157, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_loss": 2.4562854766845703, |
|
"eval_runtime": 227.9532, |
|
"eval_samples_per_second": 901.545, |
|
"eval_steps_per_second": 56.349, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"eval_loss": 2.452606678009033, |
|
"eval_runtime": 227.4532, |
|
"eval_samples_per_second": 903.527, |
|
"eval_steps_per_second": 56.473, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 1.722e-07, |
|
"loss": 2.5162, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_loss": 2.458620071411133, |
|
"eval_runtime": 228.2374, |
|
"eval_samples_per_second": 900.422, |
|
"eval_steps_per_second": 56.279, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"eval_loss": 2.458387613296509, |
|
"eval_runtime": 228.0105, |
|
"eval_samples_per_second": 901.318, |
|
"eval_steps_per_second": 56.335, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 1.6946666666666668e-07, |
|
"loss": 2.5169, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 2.454158067703247, |
|
"eval_runtime": 227.4312, |
|
"eval_samples_per_second": 903.614, |
|
"eval_steps_per_second": 56.479, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_loss": 2.460242986679077, |
|
"eval_runtime": 228.5958, |
|
"eval_samples_per_second": 899.01, |
|
"eval_steps_per_second": 56.191, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 1.6673333333333333e-07, |
|
"loss": 2.5127, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"eval_loss": 2.458707809448242, |
|
"eval_runtime": 228.0452, |
|
"eval_samples_per_second": 901.181, |
|
"eval_steps_per_second": 56.327, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_loss": 2.452913284301758, |
|
"eval_runtime": 227.4908, |
|
"eval_samples_per_second": 903.377, |
|
"eval_steps_per_second": 56.464, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 1.64e-07, |
|
"loss": 2.5144, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_loss": 2.462021827697754, |
|
"eval_runtime": 229.4885, |
|
"eval_samples_per_second": 895.513, |
|
"eval_steps_per_second": 55.972, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 2.450927972793579, |
|
"eval_runtime": 227.9748, |
|
"eval_samples_per_second": 901.459, |
|
"eval_steps_per_second": 56.344, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 1.6126666666666667e-07, |
|
"loss": 2.5175, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_loss": 2.4503204822540283, |
|
"eval_runtime": 227.5178, |
|
"eval_samples_per_second": 903.27, |
|
"eval_steps_per_second": 56.457, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.4545462131500244, |
|
"eval_runtime": 227.7963, |
|
"eval_samples_per_second": 902.165, |
|
"eval_steps_per_second": 56.388, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1.5853333333333332e-07, |
|
"loss": 2.5147, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 2.4440090656280518, |
|
"eval_runtime": 227.8162, |
|
"eval_samples_per_second": 902.087, |
|
"eval_steps_per_second": 56.383, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 2.457670211791992, |
|
"eval_runtime": 228.5245, |
|
"eval_samples_per_second": 899.291, |
|
"eval_steps_per_second": 56.208, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.558e-07, |
|
"loss": 2.5128, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 2.456602096557617, |
|
"eval_runtime": 230.1502, |
|
"eval_samples_per_second": 892.939, |
|
"eval_steps_per_second": 55.811, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.449889659881592, |
|
"eval_runtime": 228.3041, |
|
"eval_samples_per_second": 900.159, |
|
"eval_steps_per_second": 56.263, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 1.5306666666666666e-07, |
|
"loss": 2.5168, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_loss": 2.4480044841766357, |
|
"eval_runtime": 228.2508, |
|
"eval_samples_per_second": 900.369, |
|
"eval_steps_per_second": 56.276, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 2.4436299800872803, |
|
"eval_runtime": 229.3638, |
|
"eval_samples_per_second": 896.0, |
|
"eval_steps_per_second": 56.003, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.5033333333333332e-07, |
|
"loss": 2.5225, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 2.446739912033081, |
|
"eval_runtime": 228.4899, |
|
"eval_samples_per_second": 899.427, |
|
"eval_steps_per_second": 56.217, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_loss": 2.4519920349121094, |
|
"eval_runtime": 228.2075, |
|
"eval_samples_per_second": 900.54, |
|
"eval_steps_per_second": 56.286, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 1.476e-07, |
|
"loss": 2.5135, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_loss": 2.4535210132598877, |
|
"eval_runtime": 228.7342, |
|
"eval_samples_per_second": 898.466, |
|
"eval_steps_per_second": 56.157, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 2.4462831020355225, |
|
"eval_runtime": 229.9473, |
|
"eval_samples_per_second": 893.727, |
|
"eval_steps_per_second": 55.861, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.4486666666666665e-07, |
|
"loss": 2.5161, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_loss": 2.4556400775909424, |
|
"eval_runtime": 228.5872, |
|
"eval_samples_per_second": 899.044, |
|
"eval_steps_per_second": 56.193, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_loss": 2.4604580402374268, |
|
"eval_runtime": 229.1233, |
|
"eval_samples_per_second": 896.941, |
|
"eval_steps_per_second": 56.062, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1.4213333333333334e-07, |
|
"loss": 2.5144, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 2.4516451358795166, |
|
"eval_runtime": 229.9726, |
|
"eval_samples_per_second": 893.628, |
|
"eval_steps_per_second": 55.854, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"eval_loss": 2.4487648010253906, |
|
"eval_runtime": 229.4253, |
|
"eval_samples_per_second": 895.76, |
|
"eval_steps_per_second": 55.988, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 1.3940000000000002e-07, |
|
"loss": 2.5209, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"eval_loss": 2.4525067806243896, |
|
"eval_runtime": 228.8527, |
|
"eval_samples_per_second": 898.001, |
|
"eval_steps_per_second": 56.128, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 2.450185537338257, |
|
"eval_runtime": 230.8087, |
|
"eval_samples_per_second": 890.391, |
|
"eval_steps_per_second": 55.652, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.3666666666666665e-07, |
|
"loss": 2.5102, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 2.453780174255371, |
|
"eval_runtime": 229.4733, |
|
"eval_samples_per_second": 895.573, |
|
"eval_steps_per_second": 55.976, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_loss": 2.4490787982940674, |
|
"eval_runtime": 229.059, |
|
"eval_samples_per_second": 897.192, |
|
"eval_steps_per_second": 56.077, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 1.3393333333333333e-07, |
|
"loss": 2.5176, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"eval_loss": 2.452752113342285, |
|
"eval_runtime": 228.4962, |
|
"eval_samples_per_second": 899.402, |
|
"eval_steps_per_second": 56.215, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 2.44599986076355, |
|
"eval_runtime": 228.5114, |
|
"eval_samples_per_second": 899.342, |
|
"eval_steps_per_second": 56.212, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 1.312e-07, |
|
"loss": 2.5208, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_loss": 2.4484992027282715, |
|
"eval_runtime": 230.1605, |
|
"eval_samples_per_second": 892.899, |
|
"eval_steps_per_second": 55.809, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_loss": 2.451284646987915, |
|
"eval_runtime": 229.1401, |
|
"eval_samples_per_second": 896.875, |
|
"eval_steps_per_second": 56.057, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 1.2846666666666667e-07, |
|
"loss": 2.5064, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"eval_loss": 2.451927900314331, |
|
"eval_runtime": 229.3071, |
|
"eval_samples_per_second": 896.222, |
|
"eval_steps_per_second": 56.017, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_loss": 2.449305295944214, |
|
"eval_runtime": 231.2204, |
|
"eval_samples_per_second": 888.806, |
|
"eval_steps_per_second": 55.553, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 1.2573333333333332e-07, |
|
"loss": 2.5111, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"eval_loss": 2.4505178928375244, |
|
"eval_runtime": 230.2462, |
|
"eval_samples_per_second": 892.566, |
|
"eval_steps_per_second": 55.788, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_loss": 2.4501988887786865, |
|
"eval_runtime": 229.973, |
|
"eval_samples_per_second": 893.627, |
|
"eval_steps_per_second": 55.854, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1.23e-07, |
|
"loss": 2.5141, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"eval_loss": 2.4560253620147705, |
|
"eval_runtime": 229.6465, |
|
"eval_samples_per_second": 894.897, |
|
"eval_steps_per_second": 55.934, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"eval_loss": 2.4499940872192383, |
|
"eval_runtime": 229.0726, |
|
"eval_samples_per_second": 897.139, |
|
"eval_steps_per_second": 56.074, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 1.2026666666666666e-07, |
|
"loss": 2.5089, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_loss": 2.4512550830841064, |
|
"eval_runtime": 228.4897, |
|
"eval_samples_per_second": 899.428, |
|
"eval_steps_per_second": 56.217, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_loss": 2.4418201446533203, |
|
"eval_runtime": 229.377, |
|
"eval_samples_per_second": 895.949, |
|
"eval_steps_per_second": 56.0, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.1753333333333334e-07, |
|
"loss": 2.5174, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_loss": 2.447690010070801, |
|
"eval_runtime": 231.0137, |
|
"eval_samples_per_second": 889.601, |
|
"eval_steps_per_second": 55.603, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 2.450817584991455, |
|
"eval_runtime": 231.6212, |
|
"eval_samples_per_second": 887.268, |
|
"eval_steps_per_second": 55.457, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.1480000000000001e-07, |
|
"loss": 2.5198, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"eval_loss": 2.448648691177368, |
|
"eval_runtime": 230.9308, |
|
"eval_samples_per_second": 889.92, |
|
"eval_steps_per_second": 55.623, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 2.4577322006225586, |
|
"eval_runtime": 230.4865, |
|
"eval_samples_per_second": 891.636, |
|
"eval_steps_per_second": 55.73, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1.1206666666666666e-07, |
|
"loss": 2.4974, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_loss": 2.4416255950927734, |
|
"eval_runtime": 229.8237, |
|
"eval_samples_per_second": 894.207, |
|
"eval_steps_per_second": 55.891, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_loss": 2.4549336433410645, |
|
"eval_runtime": 229.7571, |
|
"eval_samples_per_second": 894.466, |
|
"eval_steps_per_second": 55.907, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 1.0933333333333333e-07, |
|
"loss": 2.5016, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 2.455679416656494, |
|
"eval_runtime": 230.1335, |
|
"eval_samples_per_second": 893.003, |
|
"eval_steps_per_second": 55.815, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 2.4531571865081787, |
|
"eval_runtime": 231.3847, |
|
"eval_samples_per_second": 888.175, |
|
"eval_steps_per_second": 55.514, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.066e-07, |
|
"loss": 2.5112, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_loss": 2.445054531097412, |
|
"eval_runtime": 231.2999, |
|
"eval_samples_per_second": 888.5, |
|
"eval_steps_per_second": 55.534, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_loss": 2.460723638534546, |
|
"eval_runtime": 230.196, |
|
"eval_samples_per_second": 892.761, |
|
"eval_steps_per_second": 55.8, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 1.0386666666666667e-07, |
|
"loss": 2.5172, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 2.4451537132263184, |
|
"eval_runtime": 231.1406, |
|
"eval_samples_per_second": 889.112, |
|
"eval_steps_per_second": 55.572, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_loss": 2.4426777362823486, |
|
"eval_runtime": 230.7159, |
|
"eval_samples_per_second": 890.749, |
|
"eval_steps_per_second": 55.675, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.0113333333333334e-07, |
|
"loss": 2.5089, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 2.4511077404022217, |
|
"eval_runtime": 231.5975, |
|
"eval_samples_per_second": 887.359, |
|
"eval_steps_per_second": 55.463, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 2.4440526962280273, |
|
"eval_runtime": 231.4447, |
|
"eval_samples_per_second": 887.944, |
|
"eval_steps_per_second": 55.499, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 9.84e-08, |
|
"loss": 2.5136, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"eval_loss": 2.4492361545562744, |
|
"eval_runtime": 231.7181, |
|
"eval_samples_per_second": 886.896, |
|
"eval_steps_per_second": 55.434, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_loss": 2.4523823261260986, |
|
"eval_runtime": 231.3659, |
|
"eval_samples_per_second": 888.247, |
|
"eval_steps_per_second": 55.518, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 9.566666666666666e-08, |
|
"loss": 2.509, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"eval_loss": 2.451181411743164, |
|
"eval_runtime": 230.8127, |
|
"eval_samples_per_second": 890.376, |
|
"eval_steps_per_second": 55.651, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_loss": 2.4528069496154785, |
|
"eval_runtime": 230.6096, |
|
"eval_samples_per_second": 891.16, |
|
"eval_steps_per_second": 55.7, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 9.293333333333333e-08, |
|
"loss": 2.5157, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 2.4439537525177, |
|
"eval_runtime": 233.382, |
|
"eval_samples_per_second": 880.573, |
|
"eval_steps_per_second": 55.039, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"eval_loss": 2.4401602745056152, |
|
"eval_runtime": 231.584, |
|
"eval_samples_per_second": 887.41, |
|
"eval_steps_per_second": 55.466, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 9.02e-08, |
|
"loss": 2.5181, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"eval_loss": 2.4537830352783203, |
|
"eval_runtime": 230.4518, |
|
"eval_samples_per_second": 891.77, |
|
"eval_steps_per_second": 55.738, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_loss": 2.4480724334716797, |
|
"eval_runtime": 229.9532, |
|
"eval_samples_per_second": 893.703, |
|
"eval_steps_per_second": 55.859, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 8.746666666666667e-08, |
|
"loss": 2.5145, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"eval_loss": 2.4417428970336914, |
|
"eval_runtime": 231.464, |
|
"eval_samples_per_second": 887.87, |
|
"eval_steps_per_second": 55.495, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"eval_loss": 2.4512147903442383, |
|
"eval_runtime": 231.0711, |
|
"eval_samples_per_second": 889.38, |
|
"eval_steps_per_second": 55.589, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 8.473333333333334e-08, |
|
"loss": 2.5013, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_loss": 2.45603084564209, |
|
"eval_runtime": 231.877, |
|
"eval_samples_per_second": 886.289, |
|
"eval_steps_per_second": 55.396, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_loss": 2.4508955478668213, |
|
"eval_runtime": 230.4147, |
|
"eval_samples_per_second": 891.913, |
|
"eval_steps_per_second": 55.747, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 8.2e-08, |
|
"loss": 2.5064, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_loss": 2.447256565093994, |
|
"eval_runtime": 231.4505, |
|
"eval_samples_per_second": 887.922, |
|
"eval_steps_per_second": 55.498, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"eval_loss": 2.457575559616089, |
|
"eval_runtime": 232.2387, |
|
"eval_samples_per_second": 884.908, |
|
"eval_steps_per_second": 55.309, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 7.926666666666666e-08, |
|
"loss": 2.5068, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"eval_loss": 2.4460949897766113, |
|
"eval_runtime": 230.6448, |
|
"eval_samples_per_second": 891.024, |
|
"eval_steps_per_second": 55.692, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"eval_loss": 2.4451067447662354, |
|
"eval_runtime": 231.6713, |
|
"eval_samples_per_second": 887.076, |
|
"eval_steps_per_second": 55.445, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 7.653333333333333e-08, |
|
"loss": 2.5152, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.442117214202881, |
|
"eval_runtime": 231.4315, |
|
"eval_samples_per_second": 887.995, |
|
"eval_steps_per_second": 55.502, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_loss": 2.4458179473876953, |
|
"eval_runtime": 230.6413, |
|
"eval_samples_per_second": 891.037, |
|
"eval_steps_per_second": 55.693, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 7.38e-08, |
|
"loss": 2.5025, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_loss": 2.4532368183135986, |
|
"eval_runtime": 230.9812, |
|
"eval_samples_per_second": 889.726, |
|
"eval_steps_per_second": 55.611, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_loss": 2.4541139602661133, |
|
"eval_runtime": 231.1965, |
|
"eval_samples_per_second": 888.898, |
|
"eval_steps_per_second": 55.559, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 7.106666666666667e-08, |
|
"loss": 2.5151, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"eval_loss": 2.4499058723449707, |
|
"eval_runtime": 231.2124, |
|
"eval_samples_per_second": 888.836, |
|
"eval_steps_per_second": 55.555, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_loss": 2.4501264095306396, |
|
"eval_runtime": 231.2241, |
|
"eval_samples_per_second": 888.791, |
|
"eval_steps_per_second": 55.552, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 6.833333333333332e-08, |
|
"loss": 2.5138, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 2.444784641265869, |
|
"eval_runtime": 231.6831, |
|
"eval_samples_per_second": 887.031, |
|
"eval_steps_per_second": 55.442, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"eval_loss": 2.4562456607818604, |
|
"eval_runtime": 231.974, |
|
"eval_samples_per_second": 885.918, |
|
"eval_steps_per_second": 55.373, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 6.56e-08, |
|
"loss": 2.5039, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_loss": 2.4612646102905273, |
|
"eval_runtime": 234.4229, |
|
"eval_samples_per_second": 876.663, |
|
"eval_steps_per_second": 54.794, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"eval_loss": 2.4471163749694824, |
|
"eval_runtime": 233.3806, |
|
"eval_samples_per_second": 880.579, |
|
"eval_steps_per_second": 55.039, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 6.286666666666666e-08, |
|
"loss": 2.5055, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 2.445026159286499, |
|
"eval_runtime": 233.3418, |
|
"eval_samples_per_second": 880.725, |
|
"eval_steps_per_second": 55.048, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_loss": 2.4492921829223633, |
|
"eval_runtime": 232.3875, |
|
"eval_samples_per_second": 884.342, |
|
"eval_steps_per_second": 55.274, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 6.013333333333333e-08, |
|
"loss": 2.5085, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_loss": 2.448164224624634, |
|
"eval_runtime": 233.4578, |
|
"eval_samples_per_second": 880.288, |
|
"eval_steps_per_second": 55.021, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 2.4571895599365234, |
|
"eval_runtime": 235.4355, |
|
"eval_samples_per_second": 872.893, |
|
"eval_steps_per_second": 54.558, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 5.7400000000000004e-08, |
|
"loss": 2.5114, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_loss": 2.444307804107666, |
|
"eval_runtime": 234.4924, |
|
"eval_samples_per_second": 876.404, |
|
"eval_steps_per_second": 54.778, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 2.445603132247925, |
|
"eval_runtime": 234.6223, |
|
"eval_samples_per_second": 875.919, |
|
"eval_steps_per_second": 54.748, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 5.4666666666666666e-08, |
|
"loss": 2.5132, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 2.4528441429138184, |
|
"eval_runtime": 234.3887, |
|
"eval_samples_per_second": 876.791, |
|
"eval_steps_per_second": 54.802, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 2.449744939804077, |
|
"eval_runtime": 233.1003, |
|
"eval_samples_per_second": 881.638, |
|
"eval_steps_per_second": 55.105, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 5.1933333333333335e-08, |
|
"loss": 2.5072, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 2.4547877311706543, |
|
"eval_runtime": 232.2237, |
|
"eval_samples_per_second": 884.966, |
|
"eval_steps_per_second": 55.313, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"eval_loss": 2.4547617435455322, |
|
"eval_runtime": 232.0067, |
|
"eval_samples_per_second": 885.794, |
|
"eval_steps_per_second": 55.365, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 4.92e-08, |
|
"loss": 2.504, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"eval_loss": 2.444261312484741, |
|
"eval_runtime": 232.4079, |
|
"eval_samples_per_second": 884.264, |
|
"eval_steps_per_second": 55.269, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 2.445204734802246, |
|
"eval_runtime": 233.2645, |
|
"eval_samples_per_second": 881.017, |
|
"eval_steps_per_second": 55.066, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 4.6466666666666666e-08, |
|
"loss": 2.5128, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"eval_loss": 2.4509565830230713, |
|
"eval_runtime": 233.1857, |
|
"eval_samples_per_second": 881.315, |
|
"eval_steps_per_second": 55.085, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"eval_loss": 2.447999954223633, |
|
"eval_runtime": 233.2452, |
|
"eval_samples_per_second": 881.09, |
|
"eval_steps_per_second": 55.071, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 4.3733333333333335e-08, |
|
"loss": 2.5133, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_loss": 2.4470479488372803, |
|
"eval_runtime": 234.9529, |
|
"eval_samples_per_second": 874.686, |
|
"eval_steps_per_second": 54.671, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"eval_loss": 2.4436631202697754, |
|
"eval_runtime": 234.9836, |
|
"eval_samples_per_second": 874.572, |
|
"eval_steps_per_second": 54.663, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 4.1e-08, |
|
"loss": 2.5067, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 2.444672107696533, |
|
"eval_runtime": 234.3233, |
|
"eval_samples_per_second": 877.036, |
|
"eval_steps_per_second": 54.817, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"eval_loss": 2.453118085861206, |
|
"eval_runtime": 233.5384, |
|
"eval_samples_per_second": 879.984, |
|
"eval_steps_per_second": 55.002, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 3.8266666666666665e-08, |
|
"loss": 2.4996, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"eval_loss": 2.447479009628296, |
|
"eval_runtime": 235.7844, |
|
"eval_samples_per_second": 871.601, |
|
"eval_steps_per_second": 54.478, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_loss": 2.4438347816467285, |
|
"eval_runtime": 233.6193, |
|
"eval_samples_per_second": 879.679, |
|
"eval_steps_per_second": 54.983, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 3.5533333333333334e-08, |
|
"loss": 2.5123, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_loss": 2.4552195072174072, |
|
"eval_runtime": 235.201, |
|
"eval_samples_per_second": 873.763, |
|
"eval_steps_per_second": 54.613, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"eval_loss": 2.4441311359405518, |
|
"eval_runtime": 234.6948, |
|
"eval_samples_per_second": 875.648, |
|
"eval_steps_per_second": 54.731, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 3.28e-08, |
|
"loss": 2.5044, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_loss": 2.4438366889953613, |
|
"eval_runtime": 233.1145, |
|
"eval_samples_per_second": 881.584, |
|
"eval_steps_per_second": 55.102, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"eval_loss": 2.453371286392212, |
|
"eval_runtime": 234.9783, |
|
"eval_samples_per_second": 874.592, |
|
"eval_steps_per_second": 54.665, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 3.0066666666666665e-08, |
|
"loss": 2.5068, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"eval_loss": 2.449671745300293, |
|
"eval_runtime": 232.7881, |
|
"eval_samples_per_second": 882.82, |
|
"eval_steps_per_second": 55.179, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"eval_loss": 2.444044828414917, |
|
"eval_runtime": 233.2255, |
|
"eval_samples_per_second": 881.164, |
|
"eval_steps_per_second": 55.075, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 2.7333333333333333e-08, |
|
"loss": 2.5165, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"eval_loss": 2.457695722579956, |
|
"eval_runtime": 234.2621, |
|
"eval_samples_per_second": 877.265, |
|
"eval_steps_per_second": 54.832, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"eval_loss": 2.4506990909576416, |
|
"eval_runtime": 232.4001, |
|
"eval_samples_per_second": 884.294, |
|
"eval_steps_per_second": 55.271, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 2.46e-08, |
|
"loss": 2.5087, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"eval_loss": 2.4494166374206543, |
|
"eval_runtime": 233.0606, |
|
"eval_samples_per_second": 881.788, |
|
"eval_steps_per_second": 55.114, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"eval_loss": 2.4393150806427, |
|
"eval_runtime": 234.1923, |
|
"eval_samples_per_second": 877.527, |
|
"eval_steps_per_second": 54.848, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 2.1866666666666667e-08, |
|
"loss": 2.5036, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"eval_loss": 2.4486756324768066, |
|
"eval_runtime": 233.5876, |
|
"eval_samples_per_second": 879.798, |
|
"eval_steps_per_second": 54.99, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_loss": 2.442298173904419, |
|
"eval_runtime": 233.1053, |
|
"eval_samples_per_second": 881.619, |
|
"eval_steps_per_second": 55.104, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 1.9133333333333333e-08, |
|
"loss": 2.5086, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"eval_loss": 2.4455623626708984, |
|
"eval_runtime": 232.7856, |
|
"eval_samples_per_second": 882.83, |
|
"eval_steps_per_second": 55.18, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"eval_loss": 2.449575185775757, |
|
"eval_runtime": 234.5471, |
|
"eval_samples_per_second": 876.199, |
|
"eval_steps_per_second": 54.765, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 1.64e-08, |
|
"loss": 2.5034, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"eval_loss": 2.4498891830444336, |
|
"eval_runtime": 232.7935, |
|
"eval_samples_per_second": 882.8, |
|
"eval_steps_per_second": 55.178, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_loss": 2.4432790279388428, |
|
"eval_runtime": 233.6332, |
|
"eval_samples_per_second": 879.627, |
|
"eval_steps_per_second": 54.979, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 1.3666666666666667e-08, |
|
"loss": 2.5099, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"eval_loss": 2.4534084796905518, |
|
"eval_runtime": 233.6601, |
|
"eval_samples_per_second": 879.525, |
|
"eval_steps_per_second": 54.973, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"eval_loss": 2.4494857788085938, |
|
"eval_runtime": 233.4959, |
|
"eval_samples_per_second": 880.144, |
|
"eval_steps_per_second": 55.012, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 1.0933333333333334e-08, |
|
"loss": 2.5065, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 2.4510202407836914, |
|
"eval_runtime": 233.7041, |
|
"eval_samples_per_second": 879.36, |
|
"eval_steps_per_second": 54.963, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 2.4512877464294434, |
|
"eval_runtime": 237.5888, |
|
"eval_samples_per_second": 864.982, |
|
"eval_steps_per_second": 54.064, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 8.2e-09, |
|
"loss": 2.502, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"eval_loss": 2.451225996017456, |
|
"eval_runtime": 233.4207, |
|
"eval_samples_per_second": 880.427, |
|
"eval_steps_per_second": 55.029, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 2.4469268321990967, |
|
"eval_runtime": 234.7938, |
|
"eval_samples_per_second": 875.279, |
|
"eval_steps_per_second": 54.708, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 5.466666666666667e-09, |
|
"loss": 2.5043, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_loss": 2.4544479846954346, |
|
"eval_runtime": 234.2869, |
|
"eval_samples_per_second": 877.173, |
|
"eval_steps_per_second": 54.826, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"eval_loss": 2.4492740631103516, |
|
"eval_runtime": 234.1805, |
|
"eval_samples_per_second": 877.571, |
|
"eval_steps_per_second": 54.851, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 2.7333333333333334e-09, |
|
"loss": 2.5068, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 2.453711748123169, |
|
"eval_runtime": 233.0608, |
|
"eval_samples_per_second": 881.787, |
|
"eval_steps_per_second": 55.114, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 2.4386837482452393, |
|
"eval_runtime": 234.1662, |
|
"eval_samples_per_second": 877.625, |
|
"eval_steps_per_second": 54.854, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"learning_rate": 0.0, |
|
"loss": 2.5118, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"eval_loss": 2.4494030475616455, |
|
"eval_runtime": 234.0187, |
|
"eval_samples_per_second": 878.178, |
|
"eval_steps_per_second": 54.889, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 9.83, |
|
"step": 2400000, |
|
"total_flos": 7.305293129309786e+17, |
|
"train_loss": 2.5438934391276042, |
|
"train_runtime": 220778.1092, |
|
"train_samples_per_second": 173.93, |
|
"train_steps_per_second": 10.871 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 10, |
|
"save_steps": 32000, |
|
"total_flos": 7.305293129309786e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|