|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.2467532467532467, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16233766233766234, |
|
"grad_norm": 0.4960825741291046, |
|
"learning_rate": 5e-06, |
|
"loss": 2.0091, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3246753246753247, |
|
"grad_norm": 0.612104058265686, |
|
"learning_rate": 1e-05, |
|
"loss": 1.9859, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.487012987012987, |
|
"grad_norm": 0.972963273525238, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.8634, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6493506493506493, |
|
"grad_norm": 1.1691817045211792, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6265, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8116883116883117, |
|
"grad_norm": 0.9815682768821716, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.3582, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.974025974025974, |
|
"grad_norm": 0.9641402959823608, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2325, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 1.0225356817245483, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.1779, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.2987012987012987, |
|
"grad_norm": 1.0966838598251343, |
|
"learning_rate": 4e-05, |
|
"loss": 1.1259, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.4610389610389611, |
|
"grad_norm": 1.287178874015808, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.1133, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.6233766233766234, |
|
"grad_norm": 1.6265290975570679, |
|
"learning_rate": 5e-05, |
|
"loss": 1.092, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 1.639063835144043, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 1.0809, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.948051948051948, |
|
"grad_norm": 1.2419629096984863, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 1.0611, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.1103896103896105, |
|
"grad_norm": 1.6913366317749023, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 1.0453, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 1.5554989576339722, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 1.0318, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.435064935064935, |
|
"grad_norm": 1.399939775466919, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.0313, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.5974025974025974, |
|
"grad_norm": 1.5082526206970215, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 1.0219, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.75974025974026, |
|
"grad_norm": 1.4758871793746948, |
|
"learning_rate": 1.0305368692688174e-05, |
|
"loss": 1.0223, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.9220779220779223, |
|
"grad_norm": 1.4590177536010742, |
|
"learning_rate": 4.7745751406263165e-06, |
|
"loss": 1.0271, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.0844155844155843, |
|
"grad_norm": 1.366603136062622, |
|
"learning_rate": 1.2235870926211619e-06, |
|
"loss": 1.0161, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.2467532467532467, |
|
"grad_norm": 1.26113760471344, |
|
"learning_rate": 0.0, |
|
"loss": 1.0168, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.378177990475776e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|