{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3433, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.029129041654529567, "grad_norm": 0.13301080465316772, "learning_rate": 2.0000000000000003e-06, "loss": 2.4859, "step": 100 }, { "epoch": 0.058258083309059135, "grad_norm": 0.24819637835025787, "learning_rate": 4.000000000000001e-06, "loss": 2.5086, "step": 200 }, { "epoch": 0.08738712496358869, "grad_norm": 0.2222064584493637, "learning_rate": 6e-06, "loss": 2.4653, "step": 300 }, { "epoch": 0.11651616661811827, "grad_norm": 0.3736318349838257, "learning_rate": 8.000000000000001e-06, "loss": 2.3876, "step": 400 }, { "epoch": 0.14564520827264782, "grad_norm": 0.44497305154800415, "learning_rate": 1e-05, "loss": 2.3407, "step": 500 }, { "epoch": 0.17477424992717738, "grad_norm": 0.5166358351707458, "learning_rate": 1.2e-05, "loss": 2.2951, "step": 600 }, { "epoch": 0.20390329158170697, "grad_norm": 0.5752297043800354, "learning_rate": 1.4e-05, "loss": 2.2502, "step": 700 }, { "epoch": 0.23303233323623654, "grad_norm": 0.8210733532905579, "learning_rate": 1.6000000000000003e-05, "loss": 2.1539, "step": 800 }, { "epoch": 0.2621613748907661, "grad_norm": 0.9511260390281677, "learning_rate": 1.8e-05, "loss": 2.1106, "step": 900 }, { "epoch": 0.29129041654529564, "grad_norm": 0.9485260248184204, "learning_rate": 2e-05, "loss": 2.0728, "step": 1000 }, { "epoch": 0.3204194581998252, "grad_norm": 1.2509729862213135, "learning_rate": 1.9916750424140173e-05, "loss": 1.9686, "step": 1100 }, { "epoch": 0.34954849985435477, "grad_norm": 1.201899766921997, "learning_rate": 1.9668387794936857e-05, "loss": 1.9571, "step": 1200 }, { "epoch": 0.37867754150888433, "grad_norm": 1.625450849533081, "learning_rate": 1.9259047329098173e-05, "loss": 1.9467, "step": 1300 }, { "epoch": 0.40780658316341395, "grad_norm": 1.0702261924743652, "learning_rate": 1.869554451065679e-05, "loss": 1.93, "step": 1400 }, { "epoch": 0.4369356248179435, "grad_norm": 1.2770276069641113, "learning_rate": 1.7987261613738918e-05, "loss": 1.9, "step": 1500 }, { "epoch": 0.4660646664724731, "grad_norm": 1.1146663427352905, "learning_rate": 1.7145991488495997e-05, "loss": 1.8802, "step": 1600 }, { "epoch": 0.49519370812700264, "grad_norm": 1.2624766826629639, "learning_rate": 1.618574121115003e-05, "loss": 1.8791, "step": 1700 }, { "epoch": 0.5243227497815321, "grad_norm": 1.325671672821045, "learning_rate": 1.5122498867362679e-05, "loss": 1.8353, "step": 1800 }, { "epoch": 0.5534517914360617, "grad_norm": 1.1726603507995605, "learning_rate": 1.397396735196525e-05, "loss": 1.9057, "step": 1900 }, { "epoch": 0.5825808330905913, "grad_norm": 1.0342259407043457, "learning_rate": 1.2759269617261439e-05, "loss": 1.8641, "step": 2000 }, { "epoch": 0.6117098747451208, "grad_norm": 1.2554254531860352, "learning_rate": 1.1498630277493646e-05, "loss": 1.8813, "step": 2100 }, { "epoch": 0.6408389163996504, "grad_norm": 1.3348976373672485, "learning_rate": 1.0213038870731443e-05, "loss": 1.8919, "step": 2200 }, { "epoch": 0.66996795805418, "grad_norm": 1.320646047592163, "learning_rate": 8.923900384843229e-06, "loss": 1.8163, "step": 2300 }, { "epoch": 0.6990969997087095, "grad_norm": 1.3995234966278076, "learning_rate": 7.65267886626396e-06, "loss": 1.8456, "step": 2400 }, { "epoch": 0.7282260413632391, "grad_norm": 1.6419973373413086, "learning_rate": 6.420540045442756e-06, "loss": 1.8961, "step": 2500 }, { "epoch": 0.7573550830177687, "grad_norm": 1.266167402267456, "learning_rate": 5.247998929226381e-06, "loss": 1.8257, "step": 2600 }, { "epoch": 0.7864841246722983, "grad_norm": 1.2374173402786255, "learning_rate": 4.154578227735479e-06, "loss": 1.842, "step": 2700 }, { "epoch": 0.8156131663268279, "grad_norm": 1.1420338153839111, "learning_rate": 3.1584833028971385e-06, "loss": 1.8826, "step": 2800 }, { "epoch": 0.8447422079813575, "grad_norm": 1.4649367332458496, "learning_rate": 2.276299050713151e-06, "loss": 1.8156, "step": 2900 }, { "epoch": 0.873871249635887, "grad_norm": 1.246354341506958, "learning_rate": 1.522713764148417e-06, "loss": 1.8338, "step": 3000 }, { "epoch": 0.9030002912904166, "grad_norm": 1.3653801679611206, "learning_rate": 9.102745742990871e-07, "loss": 1.8364, "step": 3100 }, { "epoch": 0.9321293329449462, "grad_norm": 1.972670555114746, "learning_rate": 4.4917854172413567e-07, "loss": 1.8059, "step": 3200 }, { "epoch": 0.9612583745994757, "grad_norm": 1.2663543224334717, "learning_rate": 1.4710287625206654e-07, "loss": 1.8462, "step": 3300 }, { "epoch": 0.9903874162540053, "grad_norm": 1.3993514776229858, "learning_rate": 9.077112088505813e-09, "loss": 1.8679, "step": 3400 }, { "epoch": 1.0, "step": 3433, "total_flos": 6.253123043328e+16, "train_loss": 1.9996433352507346, "train_runtime": 916.5503, "train_samples_per_second": 7.491, "train_steps_per_second": 3.746 } ], "logging_steps": 100, "max_steps": 3433, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.253123043328e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }