AlekseyKorshuk's picture
End of training
c5fd10b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"global_step": 1470,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 9e-07,
"loss": 2.4836,
"perplexity": 11.98433044208569,
"step": 1
},
{
"epoch": 1.0,
"learning_rate": 9e-07,
"loss": 2.5819,
"perplexity": 13.222236558089785,
"step": 98
},
{
"epoch": 1.0,
"eval_runtime": 0.0033,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 98
},
{
"epoch": 2.0,
"learning_rate": 9e-07,
"loss": 2.1794,
"perplexity": 8.841000066982355,
"step": 196
},
{
"epoch": 2.0,
"eval_runtime": 0.0031,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 196
},
{
"epoch": 3.0,
"learning_rate": 9e-07,
"loss": 1.8523,
"perplexity": 6.374463942130527,
"step": 294
},
{
"epoch": 3.0,
"eval_runtime": 0.0024,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 294
},
{
"epoch": 4.0,
"learning_rate": 9e-07,
"loss": 1.5639,
"perplexity": 4.777416885923455,
"step": 392
},
{
"epoch": 4.0,
"eval_runtime": 0.0026,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 392
},
{
"epoch": 5.0,
"learning_rate": 9e-07,
"loss": 1.3052,
"perplexity": 3.688426705282456,
"step": 490
},
{
"epoch": 5.0,
"eval_runtime": 0.0028,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 490
},
{
"epoch": 6.0,
"learning_rate": 9e-07,
"loss": 1.0804,
"perplexity": 2.945857658491727,
"step": 588
},
{
"epoch": 6.0,
"eval_runtime": 0.0025,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 588
},
{
"epoch": 7.0,
"learning_rate": 9e-07,
"loss": 0.8983,
"perplexity": 2.455425337981329,
"step": 686
},
{
"epoch": 7.0,
"eval_runtime": 0.0031,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 686
},
{
"epoch": 8.0,
"learning_rate": 9e-07,
"loss": 0.7469,
"perplexity": 2.1104474782431386,
"step": 784
},
{
"epoch": 8.0,
"eval_runtime": 0.0028,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 784
},
{
"epoch": 9.0,
"learning_rate": 9e-07,
"loss": 0.6196,
"perplexity": 1.8581846193240203,
"step": 882
},
{
"epoch": 9.0,
"eval_runtime": 0.0024,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 882
},
{
"epoch": 10.0,
"learning_rate": 9e-07,
"loss": 0.5154,
"perplexity": 1.6743080911071826,
"step": 980
},
{
"epoch": 10.0,
"eval_runtime": 0.0034,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 980
},
{
"epoch": 11.0,
"learning_rate": 9e-07,
"loss": 0.4301,
"perplexity": 1.53741125698718,
"step": 1078
},
{
"epoch": 11.0,
"eval_runtime": 0.0026,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 1078
},
{
"epoch": 12.0,
"learning_rate": 9e-07,
"loss": 0.3466,
"perplexity": 1.4142509118505286,
"step": 1176
},
{
"epoch": 12.0,
"eval_runtime": 0.0032,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 1176
},
{
"epoch": 13.0,
"learning_rate": 9e-07,
"loss": 0.2822,
"perplexity": 1.3260439022481316,
"step": 1274
},
{
"epoch": 13.0,
"eval_runtime": 0.0031,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 1274
},
{
"epoch": 14.0,
"learning_rate": 9e-07,
"loss": 0.2263,
"perplexity": 1.2539517943943168,
"step": 1372
},
{
"epoch": 14.0,
"eval_runtime": 0.0032,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 1372
},
{
"epoch": 15.0,
"learning_rate": 9e-07,
"loss": 0.1815,
"perplexity": 1.1990145367097138,
"step": 1470
},
{
"epoch": 15.0,
"eval_runtime": 0.0022,
"eval_samples_per_second": 0.0,
"eval_steps_per_second": 0.0,
"step": 1470
},
{
"epoch": 15.0,
"step": 1470,
"total_flos": 49376758071296.0,
"train_loss": 0.9872670257983565,
"train_runtime": 36788.8507,
"train_samples_per_second": 0.159,
"train_steps_per_second": 0.04
}
],
"max_steps": 1470,
"num_train_epochs": 15,
"total_flos": 49376758071296.0,
"trial_name": null,
"trial_params": null
}