|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.603550295857988, |
|
"eval_steps": 5, |
|
"global_step": 110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11834319526627218, |
|
"grad_norm": 0.11314600706100464, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.6482, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.11834319526627218, |
|
"eval_loss": 2.5026204586029053, |
|
"eval_runtime": 267.0476, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.154, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.23668639053254437, |
|
"grad_norm": 0.12746678292751312, |
|
"learning_rate": 1.992981096013517e-05, |
|
"loss": 2.6479, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.23668639053254437, |
|
"eval_loss": 2.496476888656616, |
|
"eval_runtime": 267.3526, |
|
"eval_samples_per_second": 0.606, |
|
"eval_steps_per_second": 0.153, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.35502958579881655, |
|
"grad_norm": 0.15371470153331757, |
|
"learning_rate": 1.964635581908359e-05, |
|
"loss": 2.6197, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.35502958579881655, |
|
"eval_loss": 2.4890758991241455, |
|
"eval_runtime": 268.0217, |
|
"eval_samples_per_second": 0.604, |
|
"eval_steps_per_second": 0.153, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.47337278106508873, |
|
"grad_norm": 0.12974606454372406, |
|
"learning_rate": 1.9151456172430186e-05, |
|
"loss": 2.592, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.47337278106508873, |
|
"eval_loss": 2.4806125164031982, |
|
"eval_runtime": 267.5318, |
|
"eval_samples_per_second": 0.606, |
|
"eval_steps_per_second": 0.153, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"grad_norm": 0.14419519901275635, |
|
"learning_rate": 1.845596003501826e-05, |
|
"loss": 2.6054, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"eval_loss": 2.4711341857910156, |
|
"eval_runtime": 266.6209, |
|
"eval_samples_per_second": 0.608, |
|
"eval_steps_per_second": 0.154, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.7100591715976331, |
|
"grad_norm": 0.148014634847641, |
|
"learning_rate": 1.7575112421616203e-05, |
|
"loss": 2.5873, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7100591715976331, |
|
"eval_loss": 2.4612033367156982, |
|
"eval_runtime": 265.6543, |
|
"eval_samples_per_second": 0.61, |
|
"eval_steps_per_second": 0.154, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8284023668639053, |
|
"grad_norm": 0.1544329673051834, |
|
"learning_rate": 1.6528221181905217e-05, |
|
"loss": 2.6077, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.8284023668639053, |
|
"eval_loss": 2.4514548778533936, |
|
"eval_runtime": 267.0157, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.154, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9467455621301775, |
|
"grad_norm": 0.14721575379371643, |
|
"learning_rate": 1.533823377964791e-05, |
|
"loss": 2.584, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9467455621301775, |
|
"eval_loss": 2.442168951034546, |
|
"eval_runtime": 266.406, |
|
"eval_samples_per_second": 0.608, |
|
"eval_steps_per_second": 0.154, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0650887573964498, |
|
"grad_norm": 0.15120001137256622, |
|
"learning_rate": 1.4031234292879726e-05, |
|
"loss": 2.5831, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0650887573964498, |
|
"eval_loss": 2.433633327484131, |
|
"eval_runtime": 266.429, |
|
"eval_samples_per_second": 0.608, |
|
"eval_steps_per_second": 0.154, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.183431952662722, |
|
"grad_norm": 0.1626676619052887, |
|
"learning_rate": 1.2635871660690677e-05, |
|
"loss": 2.5507, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.183431952662722, |
|
"eval_loss": 2.425856828689575, |
|
"eval_runtime": 266.5163, |
|
"eval_samples_per_second": 0.608, |
|
"eval_steps_per_second": 0.154, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.301775147928994, |
|
"grad_norm": 0.15653495490550995, |
|
"learning_rate": 1.1182731709213658e-05, |
|
"loss": 2.5446, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.301775147928994, |
|
"eval_loss": 2.4187350273132324, |
|
"eval_runtime": 266.7761, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.154, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.4201183431952662, |
|
"grad_norm": 0.18180669844150543, |
|
"learning_rate": 9.703666721774403e-06, |
|
"loss": 2.5259, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.4201183431952662, |
|
"eval_loss": 2.412436008453369, |
|
"eval_runtime": 267.1031, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.153, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.1720452457666397, |
|
"learning_rate": 8.231097248774273e-06, |
|
"loss": 2.5576, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"eval_loss": 2.407033920288086, |
|
"eval_runtime": 266.8119, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.154, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.6568047337278107, |
|
"grad_norm": 0.1620107889175415, |
|
"learning_rate": 6.797301461371626e-06, |
|
"loss": 2.5783, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.6568047337278107, |
|
"eval_loss": 2.402543783187866, |
|
"eval_runtime": 267.0918, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.154, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7751479289940828, |
|
"grad_norm": 0.1715419441461563, |
|
"learning_rate": 5.43370762606287e-06, |
|
"loss": 2.5542, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.7751479289940828, |
|
"eval_loss": 2.398886203765869, |
|
"eval_runtime": 267.0041, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.154, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.893491124260355, |
|
"grad_norm": 0.16874942183494568, |
|
"learning_rate": 4.170205208855281e-06, |
|
"loss": 2.5091, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.893491124260355, |
|
"eval_loss": 2.3959767818450928, |
|
"eval_runtime": 266.8823, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.154, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0118343195266273, |
|
"grad_norm": 0.16341295838356018, |
|
"learning_rate": 3.0344897093700333e-06, |
|
"loss": 2.528, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.0118343195266273, |
|
"eval_loss": 2.393803596496582, |
|
"eval_runtime": 266.9041, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.154, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.1301775147928996, |
|
"grad_norm": 0.16567575931549072, |
|
"learning_rate": 2.0514555858664663e-06, |
|
"loss": 2.5251, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.1301775147928996, |
|
"eval_loss": 2.3922572135925293, |
|
"eval_runtime": 267.9022, |
|
"eval_samples_per_second": 0.605, |
|
"eval_steps_per_second": 0.153, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.2485207100591715, |
|
"grad_norm": 0.16893596947193146, |
|
"learning_rate": 1.2426505780436326e-06, |
|
"loss": 2.5235, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.2485207100591715, |
|
"eval_loss": 2.3912644386291504, |
|
"eval_runtime": 266.3428, |
|
"eval_samples_per_second": 0.608, |
|
"eval_steps_per_second": 0.154, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.366863905325444, |
|
"grad_norm": 0.16508392989635468, |
|
"learning_rate": 6.258033886587911e-07, |
|
"loss": 2.5154, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.366863905325444, |
|
"eval_loss": 2.3907008171081543, |
|
"eval_runtime": 266.5099, |
|
"eval_samples_per_second": 0.608, |
|
"eval_steps_per_second": 0.154, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.485207100591716, |
|
"grad_norm": 0.15974663197994232, |
|
"learning_rate": 2.1443507700495968e-07, |
|
"loss": 2.5333, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.485207100591716, |
|
"eval_loss": 2.3904457092285156, |
|
"eval_runtime": 265.5021, |
|
"eval_samples_per_second": 0.61, |
|
"eval_steps_per_second": 0.154, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.603550295857988, |
|
"grad_norm": 0.17541921138763428, |
|
"learning_rate": 1.7562682356786488e-08, |
|
"loss": 2.5098, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.603550295857988, |
|
"eval_loss": 2.390380382537842, |
|
"eval_runtime": 266.7873, |
|
"eval_samples_per_second": 0.607, |
|
"eval_steps_per_second": 0.154, |
|
"step": 110 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 112, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 5, |
|
"total_flos": 5.736198700007424e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|