ruanchaves's picture
Upload trainer_state.json with huggingface_hub
50b8f1d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 19.9984,
"global_step": 6240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 0.3489128649234772,
"eval_mse": 0.34891289472579956,
"eval_runtime": 4.7891,
"eval_samples_per_second": 208.807,
"eval_steps_per_second": 26.101,
"step": 312
},
{
"epoch": 1.6,
"learning_rate": 4.331812181114852e-05,
"loss": 0.3355,
"step": 500
},
{
"epoch": 2.0,
"eval_loss": 0.23772406578063965,
"eval_mse": 0.23772406578063965,
"eval_runtime": 4.8126,
"eval_samples_per_second": 207.789,
"eval_steps_per_second": 25.974,
"step": 624
},
{
"epoch": 3.0,
"eval_loss": 0.2444075644016266,
"eval_mse": 0.2444075644016266,
"eval_runtime": 4.8651,
"eval_samples_per_second": 205.548,
"eval_steps_per_second": 25.693,
"step": 936
},
{
"epoch": 3.2,
"learning_rate": 3.9544766252686105e-05,
"loss": 0.1182,
"step": 1000
},
{
"epoch": 4.0,
"eval_loss": 0.3121347725391388,
"eval_mse": 0.3121347725391388,
"eval_runtime": 4.9119,
"eval_samples_per_second": 203.588,
"eval_steps_per_second": 25.448,
"step": 1248
},
{
"epoch": 4.81,
"learning_rate": 3.577141069422369e-05,
"loss": 0.0651,
"step": 1500
},
{
"epoch": 5.0,
"eval_loss": 0.22353506088256836,
"eval_mse": 0.22353507578372955,
"eval_runtime": 4.9339,
"eval_samples_per_second": 202.678,
"eval_steps_per_second": 25.335,
"step": 1560
},
{
"epoch": 6.0,
"eval_loss": 0.19977863132953644,
"eval_mse": 0.19977861642837524,
"eval_runtime": 4.7835,
"eval_samples_per_second": 209.051,
"eval_steps_per_second": 26.131,
"step": 1872
},
{
"epoch": 6.41,
"learning_rate": 3.199805513576128e-05,
"loss": 0.0498,
"step": 2000
},
{
"epoch": 7.0,
"eval_loss": 0.2336214929819107,
"eval_mse": 0.2336214929819107,
"eval_runtime": 4.7754,
"eval_samples_per_second": 209.407,
"eval_steps_per_second": 26.176,
"step": 2184
},
{
"epoch": 8.0,
"eval_loss": 0.21180056035518646,
"eval_mse": 0.21180060505867004,
"eval_runtime": 4.9099,
"eval_samples_per_second": 203.671,
"eval_steps_per_second": 25.459,
"step": 2496
},
{
"epoch": 8.01,
"learning_rate": 2.822469957729886e-05,
"loss": 0.0358,
"step": 2500
},
{
"epoch": 9.0,
"eval_loss": 0.22979474067687988,
"eval_mse": 0.22979475557804108,
"eval_runtime": 4.8765,
"eval_samples_per_second": 205.065,
"eval_steps_per_second": 25.633,
"step": 2808
},
{
"epoch": 9.61,
"learning_rate": 2.445134401883645e-05,
"loss": 0.0279,
"step": 3000
},
{
"epoch": 10.0,
"eval_loss": 0.2303524762392044,
"eval_mse": 0.2303524762392044,
"eval_runtime": 4.9464,
"eval_samples_per_second": 202.165,
"eval_steps_per_second": 25.271,
"step": 3120
},
{
"epoch": 11.0,
"eval_loss": 0.21912191808223724,
"eval_mse": 0.21912193298339844,
"eval_runtime": 4.8937,
"eval_samples_per_second": 204.345,
"eval_steps_per_second": 25.543,
"step": 3432
},
{
"epoch": 11.22,
"learning_rate": 2.0677988460374033e-05,
"loss": 0.0236,
"step": 3500
},
{
"epoch": 12.0,
"eval_loss": 0.20294061303138733,
"eval_mse": 0.20294061303138733,
"eval_runtime": 4.6751,
"eval_samples_per_second": 213.901,
"eval_steps_per_second": 26.738,
"step": 3744
},
{
"epoch": 12.82,
"learning_rate": 1.6904632901911617e-05,
"loss": 0.0195,
"step": 4000
},
{
"epoch": 13.0,
"eval_loss": 0.21014319360256195,
"eval_mse": 0.21014319360256195,
"eval_runtime": 4.8248,
"eval_samples_per_second": 207.262,
"eval_steps_per_second": 25.908,
"step": 4056
},
{
"epoch": 14.0,
"eval_loss": 0.2216099053621292,
"eval_mse": 0.2216099053621292,
"eval_runtime": 4.7789,
"eval_samples_per_second": 209.252,
"eval_steps_per_second": 26.157,
"step": 4368
},
{
"epoch": 14.42,
"learning_rate": 1.3131277343449203e-05,
"loss": 0.0156,
"step": 4500
},
{
"epoch": 15.0,
"eval_loss": 0.21098460257053375,
"eval_mse": 0.21098460257053375,
"eval_runtime": 4.7959,
"eval_samples_per_second": 208.509,
"eval_steps_per_second": 26.064,
"step": 4680
},
{
"epoch": 16.0,
"eval_loss": 0.21815571188926697,
"eval_mse": 0.21815571188926697,
"eval_runtime": 4.9214,
"eval_samples_per_second": 203.196,
"eval_steps_per_second": 25.4,
"step": 4992
},
{
"epoch": 16.03,
"learning_rate": 9.357921784986788e-06,
"loss": 0.0127,
"step": 5000
},
{
"epoch": 17.0,
"eval_loss": 0.21420633792877197,
"eval_mse": 0.21420633792877197,
"eval_runtime": 4.7835,
"eval_samples_per_second": 209.054,
"eval_steps_per_second": 26.132,
"step": 5304
},
{
"epoch": 17.63,
"learning_rate": 5.5845662265243735e-06,
"loss": 0.0109,
"step": 5500
},
{
"epoch": 18.0,
"eval_loss": 0.2126861810684204,
"eval_mse": 0.2126861810684204,
"eval_runtime": 4.8516,
"eval_samples_per_second": 206.117,
"eval_steps_per_second": 25.765,
"step": 5616
},
{
"epoch": 19.0,
"eval_loss": 0.21384698152542114,
"eval_mse": 0.21384698152542114,
"eval_runtime": 4.9235,
"eval_samples_per_second": 203.109,
"eval_steps_per_second": 25.389,
"step": 5928
},
{
"epoch": 19.23,
"learning_rate": 1.8112106680619593e-06,
"loss": 0.0094,
"step": 6000
},
{
"epoch": 20.0,
"eval_loss": 0.2143700271844864,
"eval_mse": 0.2143700271844864,
"eval_runtime": 4.7927,
"eval_samples_per_second": 208.649,
"eval_steps_per_second": 26.081,
"step": 6240
},
{
"epoch": 20.0,
"step": 6240,
"total_flos": 6577191107414016.0,
"train_loss": 0.05834093816005267,
"train_runtime": 2383.5567,
"train_samples_per_second": 41.954,
"train_steps_per_second": 2.618
}
],
"max_steps": 6240,
"num_train_epochs": 20,
"total_flos": 6577191107414016.0,
"trial_name": null,
"trial_params": null
}