|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.6859016393442623, |
|
"global_step": 49152, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004984426897459585, |
|
"loss": 0.5454, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.5005695223808289, |
|
"eval_runtime": 32.9622, |
|
"eval_samples_per_second": 104.271, |
|
"eval_steps_per_second": 6.523, |
|
"step": 4096 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004934687023955681, |
|
"loss": 0.4849, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 0.480915367603302, |
|
"eval_runtime": 32.9785, |
|
"eval_samples_per_second": 104.219, |
|
"eval_steps_per_second": 6.519, |
|
"step": 8192 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.000485144849673373, |
|
"loss": 0.4518, |
|
"step": 12288 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 0.46660953760147095, |
|
"eval_runtime": 32.8208, |
|
"eval_samples_per_second": 104.72, |
|
"eval_steps_per_second": 6.551, |
|
"step": 12288 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0004735848873631612, |
|
"loss": 0.4282, |
|
"step": 16384 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 0.46097490191459656, |
|
"eval_runtime": 32.8311, |
|
"eval_samples_per_second": 104.687, |
|
"eval_steps_per_second": 6.549, |
|
"step": 16384 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0004589518403420676, |
|
"loss": 0.4145, |
|
"step": 20480 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.45063599944114685, |
|
"eval_runtime": 32.8912, |
|
"eval_samples_per_second": 104.496, |
|
"eval_steps_per_second": 6.537, |
|
"step": 20480 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0004414445597486605, |
|
"loss": 0.399, |
|
"step": 24576 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 0.44468095898628235, |
|
"eval_runtime": 32.706, |
|
"eval_samples_per_second": 105.088, |
|
"eval_steps_per_second": 6.574, |
|
"step": 24576 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00042130386669061293, |
|
"loss": 0.3882, |
|
"step": 28672 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 0.44857361912727356, |
|
"eval_runtime": 32.9754, |
|
"eval_samples_per_second": 104.229, |
|
"eval_steps_per_second": 6.52, |
|
"step": 28672 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0003988010477498867, |
|
"loss": 0.3767, |
|
"step": 32768 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.44354742765426636, |
|
"eval_runtime": 32.8219, |
|
"eval_samples_per_second": 104.717, |
|
"eval_steps_per_second": 6.55, |
|
"step": 32768 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0003742566178542921, |
|
"loss": 0.3676, |
|
"step": 36864 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.43940743803977966, |
|
"eval_runtime": 32.9279, |
|
"eval_samples_per_second": 104.38, |
|
"eval_steps_per_second": 6.529, |
|
"step": 36864 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0003479963856008823, |
|
"loss": 0.3577, |
|
"step": 40960 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.4323909878730774, |
|
"eval_runtime": 33.0718, |
|
"eval_samples_per_second": 103.926, |
|
"eval_steps_per_second": 6.501, |
|
"step": 40960 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0003203943839704654, |
|
"loss": 0.3517, |
|
"step": 45056 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 0.43262797594070435, |
|
"eval_runtime": 32.8978, |
|
"eval_samples_per_second": 104.475, |
|
"eval_steps_per_second": 6.535, |
|
"step": 45056 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.000291817008494138, |
|
"loss": 0.3427, |
|
"step": 49152 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 0.4303751289844513, |
|
"eval_runtime": 32.7306, |
|
"eval_samples_per_second": 105.009, |
|
"eval_steps_per_second": 6.569, |
|
"step": 49152 |
|
} |
|
], |
|
"max_steps": 109800, |
|
"num_train_epochs": 6, |
|
"total_flos": 2.28488651552981e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|