|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 1830, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.273224043715847, |
|
"grad_norm": 1.0141575336456299, |
|
"learning_rate": 0.00019995065603657316, |
|
"loss": 1.8945, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.546448087431694, |
|
"grad_norm": 0.9003917574882507, |
|
"learning_rate": 0.00019980267284282717, |
|
"loss": 1.3737, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.819672131147541, |
|
"grad_norm": 0.855492889881134, |
|
"learning_rate": 0.00019955619646030802, |
|
"loss": 1.3506, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.092896174863388, |
|
"grad_norm": 1.1789681911468506, |
|
"learning_rate": 0.0001992114701314478, |
|
"loss": 1.2946, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.366120218579235, |
|
"grad_norm": 0.9413456320762634, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 1.2914, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.639344262295082, |
|
"grad_norm": 0.8401021957397461, |
|
"learning_rate": 0.0001982287250728689, |
|
"loss": 1.2756, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.9125683060109289, |
|
"grad_norm": 0.9392536878585815, |
|
"learning_rate": 0.00019759167619387476, |
|
"loss": 1.2785, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.185792349726776, |
|
"grad_norm": 0.918136477470398, |
|
"learning_rate": 0.0001968583161128631, |
|
"loss": 1.2199, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.459016393442623, |
|
"grad_norm": 0.9809663891792297, |
|
"learning_rate": 0.0001960293685676943, |
|
"loss": 1.201, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.73224043715847, |
|
"grad_norm": 1.0254710912704468, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 1.1842, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0054644808743167, |
|
"grad_norm": 1.1089431047439575, |
|
"learning_rate": 0.00019408807689542257, |
|
"loss": 1.1819, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.278688524590164, |
|
"grad_norm": 1.2321062088012695, |
|
"learning_rate": 0.00019297764858882514, |
|
"loss": 1.1113, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.551912568306011, |
|
"grad_norm": 1.0911256074905396, |
|
"learning_rate": 0.00019177546256839812, |
|
"loss": 1.1212, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.8251366120218577, |
|
"grad_norm": 1.1500061750411987, |
|
"learning_rate": 0.00019048270524660196, |
|
"loss": 1.1247, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.098360655737705, |
|
"grad_norm": 1.259513258934021, |
|
"learning_rate": 0.0001891006524188368, |
|
"loss": 1.0826, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.371584699453552, |
|
"grad_norm": 1.377414345741272, |
|
"learning_rate": 0.00018763066800438636, |
|
"loss": 1.0593, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.644808743169399, |
|
"grad_norm": 1.2397098541259766, |
|
"learning_rate": 0.0001860742027003944, |
|
"loss": 1.0414, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.918032786885246, |
|
"grad_norm": 1.2820392847061157, |
|
"learning_rate": 0.00018443279255020152, |
|
"loss": 1.0601, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.191256830601093, |
|
"grad_norm": 1.6708155870437622, |
|
"learning_rate": 0.00018270805742745617, |
|
"loss": 0.973, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.46448087431694, |
|
"grad_norm": 1.546794056892395, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.9904, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.737704918032787, |
|
"grad_norm": 1.437908411026001, |
|
"learning_rate": 0.00017901550123756906, |
|
"loss": 0.9863, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.0109289617486334, |
|
"grad_norm": 1.4555143117904663, |
|
"learning_rate": 0.00017705132427757895, |
|
"loss": 0.9768, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.284153005464481, |
|
"grad_norm": 1.494957447052002, |
|
"learning_rate": 0.00017501110696304596, |
|
"loss": 0.8969, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.557377049180328, |
|
"grad_norm": 1.4257054328918457, |
|
"learning_rate": 0.00017289686274214118, |
|
"loss": 0.9207, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.830601092896175, |
|
"grad_norm": 1.6431266069412231, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 0.9116, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 7.103825136612022, |
|
"grad_norm": 1.4786570072174072, |
|
"learning_rate": 0.00016845471059286887, |
|
"loss": 0.8975, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.377049180327869, |
|
"grad_norm": 1.5059996843338013, |
|
"learning_rate": 0.00016613118653236518, |
|
"loss": 0.8519, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.6502732240437155, |
|
"grad_norm": 1.5110268592834473, |
|
"learning_rate": 0.000163742398974869, |
|
"loss": 0.8471, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.923497267759563, |
|
"grad_norm": 1.6930420398712158, |
|
"learning_rate": 0.00016129070536529766, |
|
"loss": 0.8544, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 8.19672131147541, |
|
"grad_norm": 1.8286707401275635, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 0.8102, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.469945355191257, |
|
"grad_norm": 1.4673559665679932, |
|
"learning_rate": 0.00015620833778521307, |
|
"loss": 0.7986, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.743169398907105, |
|
"grad_norm": 1.6546106338500977, |
|
"learning_rate": 0.00015358267949789966, |
|
"loss": 0.7985, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 9.01639344262295, |
|
"grad_norm": 1.7138121128082275, |
|
"learning_rate": 0.00015090414157503714, |
|
"loss": 0.8194, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.289617486338798, |
|
"grad_norm": 1.5631183385849, |
|
"learning_rate": 0.00014817536741017152, |
|
"loss": 0.7317, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.562841530054644, |
|
"grad_norm": 1.936880111694336, |
|
"learning_rate": 0.00014539904997395468, |
|
"loss": 0.7479, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 9.836065573770492, |
|
"grad_norm": 1.6515196561813354, |
|
"learning_rate": 0.00014257792915650728, |
|
"loss": 0.7435, |
|
"step": 1800 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 28, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.968783346244608e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|