|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.983050847457627, |
|
"eval_steps": 500, |
|
"global_step": 396, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009996066923030483, |
|
"loss": 2.8429, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009984273879759713, |
|
"loss": 1.7586, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009964639423366442, |
|
"loss": 1.7084, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009937194443381972, |
|
"loss": 1.7022, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009901982117093786, |
|
"loss": 1.436, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000985905784161771, |
|
"loss": 1.5345, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009808489146745465, |
|
"loss": 1.5542, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009750355588704727, |
|
"loss": 1.5173, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000968474862499881, |
|
"loss": 1.4094, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009611771470522907, |
|
"loss": 1.6779, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0009531538935183251, |
|
"loss": 1.593, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0009444177243274617, |
|
"loss": 1.8026, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0009349823834900395, |
|
"loss": 1.1646, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0009248627149747573, |
|
"loss": 1.4566, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0009140746393556853, |
|
"loss": 1.3848, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0009026351287655293, |
|
"loss": 1.6641, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0008905621801945467, |
|
"loss": 1.2664, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0008778747871771292, |
|
"loss": 1.4103, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0008645929099105886, |
|
"loss": 1.3391, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0008507374438531607, |
|
"loss": 1.3329, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0008363301868506264, |
|
"loss": 1.4708, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0008213938048432696, |
|
"loss": 1.5326, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0008059517962071233, |
|
"loss": 1.5317, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0007900284547855992, |
|
"loss": 1.2481, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0007736488316696662, |
|
"loss": 1.4773, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0007568386957867032, |
|
"loss": 1.2638, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0007396244933600284, |
|
"loss": 1.2867, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0007220333063028871, |
|
"loss": 1.0248, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0007040928096123516, |
|
"loss": 1.0408, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0006858312278301637, |
|
"loss": 1.1847, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0006672772906390176, |
|
"loss": 0.9884, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0006484601876641375, |
|
"loss": 1.0086, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0006294095225512603, |
|
"loss": 1.0438, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0006101552663932703, |
|
"loss": 0.9305, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0005907277105787513, |
|
"loss": 1.0619, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0005711574191366427, |
|
"loss": 1.5694, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0005514751806519673, |
|
"loss": 1.0704, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0005317119598282822, |
|
"loss": 1.1666, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0005118988487730537, |
|
"loss": 0.9914, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.000492067018082596, |
|
"loss": 1.0523, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00047224766780353, |
|
"loss": 0.9491, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0004524719783479088, |
|
"loss": 1.041, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0004327710614392341, |
|
"loss": 1.237, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00041317591116653486, |
|
"loss": 1.0301, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00039371735522351166, |
|
"loss": 1.0687, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00037442600640946044, |
|
"loss": 0.9959, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0003553322144682737, |
|
"loss": 0.9951, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0003364660183412892, |
|
"loss": 1.1158, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0003178570989091028, |
|
"loss": 1.2224, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00029953473229669324, |
|
"loss": 1.0617, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0002815277438153203, |
|
"loss": 0.9955, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0002638644626136587, |
|
"loss": 0.9129, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00024657267710950857, |
|
"loss": 1.0105, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0002296795912722014, |
|
"loss": 0.7617, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00021321178182447708, |
|
"loss": 0.629, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00019719515643116677, |
|
"loss": 0.7202, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00018165491294045593, |
|
"loss": 0.7211, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00016661549974185424, |
|
"loss": 0.6464, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00015210057730323618, |
|
"loss": 0.6799, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0001381329809474649, |
|
"loss": 0.6218, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00012473468492715895, |
|
"loss": 0.7277, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00011192676785412154, |
|
"loss": 0.6244, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 9.972937953781985e-05, |
|
"loss": 0.6352, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 8.816170928508365e-05, |
|
"loss": 0.6462, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 7.724195571089787e-05, |
|
"loss": 0.6377, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 6.698729810778065e-05, |
|
"loss": 0.7165, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 5.741386941879179e-05, |
|
"loss": 0.6948, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.853673085668947e-05, |
|
"loss": 0.6849, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.036984820916722e-05, |
|
"loss": 0.6742, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.292606986744667e-05, |
|
"loss": 0.7078, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.6217106612792528e-05, |
|
"loss": 0.6499, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.025351319275137e-05, |
|
"loss": 0.6878, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.5044671716097413e-05, |
|
"loss": 0.5469, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.0598776892610684e-05, |
|
"loss": 0.7233, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 6.9228231409067535e-06, |
|
"loss": 0.7482, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.02259358460233e-06, |
|
"loss": 0.6865, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.9026509541272275e-06, |
|
"loss": 0.6953, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.663304084960185e-07, |
|
"loss": 0.671, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.573428833345769e-08, |
|
"loss": 0.6152, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"step": 396, |
|
"total_flos": 1.0914041201688576e+17, |
|
"train_loss": 1.0966757915236733, |
|
"train_runtime": 4531.1008, |
|
"train_samples_per_second": 0.703, |
|
"train_steps_per_second": 0.087 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 396, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.0914041201688576e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|