File size: 1,275 Bytes
2544748 324c43f 2544748 369d72f 2544748 369d72f 2544748 26638be 324c43f 369d72f 324c43f 369d72f 1abc375 324c43f 369d72f 324c43f 369d72f 324c43f 369d72f 324c43f 369d72f 324c43f 369d72f 324c43f 369d72f 3f52b57 369d72f 2544748 369d72f 2544748 324c43f 2544748 324c43f 2544748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 500,
"global_step": 16,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.25,
"grad_norm": 6.53125,
"learning_rate": 0.0001,
"loss": 6.4013,
"step": 1
},
{
"epoch": 1.25,
"grad_norm": 3.84375,
"learning_rate": 0.000178183148246803,
"loss": 6.2911,
"step": 5
},
{
"epoch": 2.5,
"grad_norm": 2.765625,
"learning_rate": 7.774790660436858e-05,
"loss": 6.0613,
"step": 10
},
{
"epoch": 3.75,
"grad_norm": 2.5625,
"learning_rate": 2.5072087818176382e-06,
"loss": 5.9747,
"step": 15
},
{
"epoch": 4.0,
"step": 16,
"total_flos": 535126081536000.0,
"train_loss": 6.083620756864548,
"train_runtime": 28.0153,
"train_samples_per_second": 35.838,
"train_steps_per_second": 0.571
}
],
"logging_steps": 5,
"max_steps": 16,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"total_flos": 535126081536000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|