|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.16, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008, |
|
"grad_norm": 0.8485889434814453, |
|
"learning_rate": 0.0001999964908278481, |
|
"loss": 1.2049, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.016, |
|
"grad_norm": 0.47789862751960754, |
|
"learning_rate": 0.00019998596355767805, |
|
"loss": 0.9333, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024, |
|
"grad_norm": 1.017558217048645, |
|
"learning_rate": 0.00019996841892833, |
|
"loss": 0.8671, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.032, |
|
"grad_norm": 0.6610977053642273, |
|
"learning_rate": 0.00019994385817114646, |
|
"loss": 0.7979, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6075429320335388, |
|
"learning_rate": 0.00019991228300988585, |
|
"loss": 0.7662, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.048, |
|
"grad_norm": 0.6595763564109802, |
|
"learning_rate": 0.00019987369566060176, |
|
"loss": 0.7929, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.056, |
|
"grad_norm": 0.6968618035316467, |
|
"learning_rate": 0.00019982809883148722, |
|
"loss": 0.7683, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.064, |
|
"grad_norm": 0.4889592230319977, |
|
"learning_rate": 0.00019977549572268468, |
|
"loss": 0.8667, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.072, |
|
"grad_norm": 0.6651108264923096, |
|
"learning_rate": 0.0001997158900260614, |
|
"loss": 0.8446, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5898510217666626, |
|
"learning_rate": 0.00019964928592495045, |
|
"loss": 0.9051, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.088, |
|
"grad_norm": 0.4398016035556793, |
|
"learning_rate": 0.00019957568809385694, |
|
"loss": 0.7235, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.096, |
|
"grad_norm": 0.6901968121528625, |
|
"learning_rate": 0.00019949510169813003, |
|
"loss": 0.8169, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.104, |
|
"grad_norm": 0.6267213225364685, |
|
"learning_rate": 0.00019940753239360047, |
|
"loss": 0.8266, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.112, |
|
"grad_norm": 0.48524895310401917, |
|
"learning_rate": 0.00019931298632618356, |
|
"loss": 0.758, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.5294132232666016, |
|
"learning_rate": 0.0001992114701314478, |
|
"loss": 0.7759, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.128, |
|
"grad_norm": 0.48957982659339905, |
|
"learning_rate": 0.0001991029909341493, |
|
"loss": 0.7797, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.136, |
|
"grad_norm": 0.645412802696228, |
|
"learning_rate": 0.00019898755634773158, |
|
"loss": 0.7437, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.144, |
|
"grad_norm": 0.43297675251960754, |
|
"learning_rate": 0.0001988651744737914, |
|
"loss": 0.8043, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.152, |
|
"grad_norm": 0.5513920783996582, |
|
"learning_rate": 0.00019873585390151003, |
|
"loss": 0.7701, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.8462435007095337, |
|
"learning_rate": 0.0001985996037070505, |
|
"loss": 0.709, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1875, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6310561256570880.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|