|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.9984, |
|
"global_step": 6240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.3489128649234772, |
|
"eval_mse": 0.34891289472579956, |
|
"eval_runtime": 4.7891, |
|
"eval_samples_per_second": 208.807, |
|
"eval_steps_per_second": 26.101, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.331812181114852e-05, |
|
"loss": 0.3355, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.23772406578063965, |
|
"eval_mse": 0.23772406578063965, |
|
"eval_runtime": 4.8126, |
|
"eval_samples_per_second": 207.789, |
|
"eval_steps_per_second": 25.974, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.2444075644016266, |
|
"eval_mse": 0.2444075644016266, |
|
"eval_runtime": 4.8651, |
|
"eval_samples_per_second": 205.548, |
|
"eval_steps_per_second": 25.693, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.9544766252686105e-05, |
|
"loss": 0.1182, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.3121347725391388, |
|
"eval_mse": 0.3121347725391388, |
|
"eval_runtime": 4.9119, |
|
"eval_samples_per_second": 203.588, |
|
"eval_steps_per_second": 25.448, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 3.577141069422369e-05, |
|
"loss": 0.0651, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.22353506088256836, |
|
"eval_mse": 0.22353507578372955, |
|
"eval_runtime": 4.9339, |
|
"eval_samples_per_second": 202.678, |
|
"eval_steps_per_second": 25.335, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.19977863132953644, |
|
"eval_mse": 0.19977861642837524, |
|
"eval_runtime": 4.7835, |
|
"eval_samples_per_second": 209.051, |
|
"eval_steps_per_second": 26.131, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 3.199805513576128e-05, |
|
"loss": 0.0498, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.2336214929819107, |
|
"eval_mse": 0.2336214929819107, |
|
"eval_runtime": 4.7754, |
|
"eval_samples_per_second": 209.407, |
|
"eval_steps_per_second": 26.176, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.21180056035518646, |
|
"eval_mse": 0.21180060505867004, |
|
"eval_runtime": 4.9099, |
|
"eval_samples_per_second": 203.671, |
|
"eval_steps_per_second": 25.459, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 2.822469957729886e-05, |
|
"loss": 0.0358, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.22979474067687988, |
|
"eval_mse": 0.22979475557804108, |
|
"eval_runtime": 4.8765, |
|
"eval_samples_per_second": 205.065, |
|
"eval_steps_per_second": 25.633, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 2.445134401883645e-05, |
|
"loss": 0.0279, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.2303524762392044, |
|
"eval_mse": 0.2303524762392044, |
|
"eval_runtime": 4.9464, |
|
"eval_samples_per_second": 202.165, |
|
"eval_steps_per_second": 25.271, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.21912191808223724, |
|
"eval_mse": 0.21912193298339844, |
|
"eval_runtime": 4.8937, |
|
"eval_samples_per_second": 204.345, |
|
"eval_steps_per_second": 25.543, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 2.0677988460374033e-05, |
|
"loss": 0.0236, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.20294061303138733, |
|
"eval_mse": 0.20294061303138733, |
|
"eval_runtime": 4.6751, |
|
"eval_samples_per_second": 213.901, |
|
"eval_steps_per_second": 26.738, |
|
"step": 3744 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 1.6904632901911617e-05, |
|
"loss": 0.0195, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.21014319360256195, |
|
"eval_mse": 0.21014319360256195, |
|
"eval_runtime": 4.8248, |
|
"eval_samples_per_second": 207.262, |
|
"eval_steps_per_second": 25.908, |
|
"step": 4056 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.2216099053621292, |
|
"eval_mse": 0.2216099053621292, |
|
"eval_runtime": 4.7789, |
|
"eval_samples_per_second": 209.252, |
|
"eval_steps_per_second": 26.157, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 1.3131277343449203e-05, |
|
"loss": 0.0156, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.21098460257053375, |
|
"eval_mse": 0.21098460257053375, |
|
"eval_runtime": 4.7959, |
|
"eval_samples_per_second": 208.509, |
|
"eval_steps_per_second": 26.064, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.21815571188926697, |
|
"eval_mse": 0.21815571188926697, |
|
"eval_runtime": 4.9214, |
|
"eval_samples_per_second": 203.196, |
|
"eval_steps_per_second": 25.4, |
|
"step": 4992 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 9.357921784986788e-06, |
|
"loss": 0.0127, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.21420633792877197, |
|
"eval_mse": 0.21420633792877197, |
|
"eval_runtime": 4.7835, |
|
"eval_samples_per_second": 209.054, |
|
"eval_steps_per_second": 26.132, |
|
"step": 5304 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 5.5845662265243735e-06, |
|
"loss": 0.0109, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.2126861810684204, |
|
"eval_mse": 0.2126861810684204, |
|
"eval_runtime": 4.8516, |
|
"eval_samples_per_second": 206.117, |
|
"eval_steps_per_second": 25.765, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.21384698152542114, |
|
"eval_mse": 0.21384698152542114, |
|
"eval_runtime": 4.9235, |
|
"eval_samples_per_second": 203.109, |
|
"eval_steps_per_second": 25.389, |
|
"step": 5928 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 1.8112106680619593e-06, |
|
"loss": 0.0094, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.2143700271844864, |
|
"eval_mse": 0.2143700271844864, |
|
"eval_runtime": 4.7927, |
|
"eval_samples_per_second": 208.649, |
|
"eval_steps_per_second": 26.081, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 6240, |
|
"total_flos": 6577191107414016.0, |
|
"train_loss": 0.05834093816005267, |
|
"train_runtime": 2383.5567, |
|
"train_samples_per_second": 41.954, |
|
"train_steps_per_second": 2.618 |
|
} |
|
], |
|
"max_steps": 6240, |
|
"num_train_epochs": 20, |
|
"total_flos": 6577191107414016.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|