|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.014311270125223614, |
|
"eval_steps": 500, |
|
"global_step": 40, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0035778175313059034, |
|
"grad_norm": 0.5825825929641724, |
|
"learning_rate": 0.00019928443649373882, |
|
"loss": 1.248, |
|
"num_input_tokens_seen": 6646, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007155635062611807, |
|
"grad_norm": 0.5380188822746277, |
|
"learning_rate": 0.00019856887298747765, |
|
"loss": 0.5478, |
|
"num_input_tokens_seen": 13063, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01073345259391771, |
|
"grad_norm": 0.3872911036014557, |
|
"learning_rate": 0.00019785330948121648, |
|
"loss": 0.5135, |
|
"num_input_tokens_seen": 19512, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.014311270125223614, |
|
"grad_norm": 0.4991438686847687, |
|
"learning_rate": 0.0001971377459749553, |
|
"loss": 0.5092, |
|
"num_input_tokens_seen": 26884, |
|
"step": 40 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2795, |
|
"num_input_tokens_seen": 26884, |
|
"num_train_epochs": 1, |
|
"save_steps": 20, |
|
"total_flos": 604526222057472.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|