{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3337505214851898, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008343763037129746, "grad_norm": 0.7562159895896912, "learning_rate": 0.0001951951951951952, "loss": 1.9181, "step": 25 }, { "epoch": 0.016687526074259492, "grad_norm": 0.8098325133323669, "learning_rate": 0.0001901901901901902, "loss": 1.1849, "step": 50 }, { "epoch": 0.025031289111389236, "grad_norm": 0.3777216374874115, "learning_rate": 0.0001851851851851852, "loss": 1.6916, "step": 75 }, { "epoch": 0.033375052148518984, "grad_norm": 0.5206697583198547, "learning_rate": 0.00018018018018018018, "loss": 1.1422, "step": 100 }, { "epoch": 0.041718815185648725, "grad_norm": 0.38817787170410156, "learning_rate": 0.0001751751751751752, "loss": 1.6667, "step": 125 }, { "epoch": 0.05006257822277847, "grad_norm": 0.6388385891914368, "learning_rate": 0.0001701701701701702, "loss": 1.2033, "step": 150 }, { "epoch": 0.05840634125990822, "grad_norm": 0.4120958149433136, "learning_rate": 0.00016516516516516518, "loss": 1.725, "step": 175 }, { "epoch": 0.06675010429703797, "grad_norm": 0.46837368607521057, "learning_rate": 0.00016016016016016018, "loss": 1.065, "step": 200 }, { "epoch": 0.07509386733416772, "grad_norm": 0.4679395854473114, "learning_rate": 0.00015515515515515516, "loss": 1.7601, "step": 225 }, { "epoch": 0.08343763037129745, "grad_norm": 0.380135178565979, "learning_rate": 0.00015015015015015014, "loss": 1.0251, "step": 250 }, { "epoch": 0.0917813934084272, "grad_norm": 0.3675175905227661, "learning_rate": 0.00014514514514514515, "loss": 1.6459, "step": 275 }, { "epoch": 0.10012515644555695, "grad_norm": 0.48725131154060364, "learning_rate": 0.00014014014014014013, "loss": 1.0373, "step": 300 }, { "epoch": 0.1084689194826867, "grad_norm": 0.42459967732429504, "learning_rate": 0.00013513513513513514, "loss": 1.6939, "step": 325 }, { "epoch": 0.11681268251981644, "grad_norm": 0.44615626335144043, "learning_rate": 0.00013013013013013014, "loss": 1.1265, "step": 350 }, { "epoch": 0.1251564455569462, "grad_norm": 0.3729555904865265, "learning_rate": 0.00012512512512512512, "loss": 1.7411, "step": 375 }, { "epoch": 0.13350020859407594, "grad_norm": 0.42633363604545593, "learning_rate": 0.00012012012012012013, "loss": 1.1436, "step": 400 }, { "epoch": 0.14184397163120568, "grad_norm": 0.4313684105873108, "learning_rate": 0.00011511511511511512, "loss": 1.6521, "step": 425 }, { "epoch": 0.15018773466833543, "grad_norm": 0.3195270597934723, "learning_rate": 0.00011011011011011012, "loss": 0.984, "step": 450 }, { "epoch": 0.15853149770546515, "grad_norm": 0.279301255941391, "learning_rate": 0.00010510510510510511, "loss": 1.6523, "step": 475 }, { "epoch": 0.1668752607425949, "grad_norm": 0.32661929726600647, "learning_rate": 0.00010010010010010012, "loss": 1.0151, "step": 500 }, { "epoch": 0.1668752607425949, "eval_loss": 1.5524340867996216, "eval_runtime": 511.702, "eval_samples_per_second": 2.931, "eval_steps_per_second": 0.367, "step": 500 }, { "epoch": 0.17521902377972465, "grad_norm": 0.27320751547813416, "learning_rate": 9.50950950950951e-05, "loss": 1.6524, "step": 525 }, { "epoch": 0.1835627868168544, "grad_norm": 0.393206387758255, "learning_rate": 9.009009009009009e-05, "loss": 0.9658, "step": 550 }, { "epoch": 0.19190654985398414, "grad_norm": 0.29037702083587646, "learning_rate": 8.50850850850851e-05, "loss": 1.6467, "step": 575 }, { "epoch": 0.2002503128911139, "grad_norm": 0.3599018156528473, "learning_rate": 8.008008008008009e-05, "loss": 1.0368, "step": 600 }, { "epoch": 0.20859407592824364, "grad_norm": 0.397064208984375, "learning_rate": 7.507507507507507e-05, "loss": 1.7412, "step": 625 }, { "epoch": 0.2169378389653734, "grad_norm": 0.3856523334980011, "learning_rate": 7.007007007007007e-05, "loss": 1.1053, "step": 650 }, { "epoch": 0.22528160200250313, "grad_norm": 0.30977582931518555, "learning_rate": 6.506506506506507e-05, "loss": 1.7623, "step": 675 }, { "epoch": 0.23362536503963288, "grad_norm": 0.4029249846935272, "learning_rate": 6.0060060060060066e-05, "loss": 1.1681, "step": 700 }, { "epoch": 0.24196912807676263, "grad_norm": 0.3093183636665344, "learning_rate": 5.505505505505506e-05, "loss": 1.7267, "step": 725 }, { "epoch": 0.2503128911138924, "grad_norm": 0.45136523246765137, "learning_rate": 5.005005005005006e-05, "loss": 1.1256, "step": 750 }, { "epoch": 0.2586566541510221, "grad_norm": 0.2787851393222809, "learning_rate": 4.5045045045045046e-05, "loss": 1.7174, "step": 775 }, { "epoch": 0.2670004171881519, "grad_norm": 0.33277878165245056, "learning_rate": 4.0040040040040046e-05, "loss": 0.9848, "step": 800 }, { "epoch": 0.2753441802252816, "grad_norm": 0.3014131784439087, "learning_rate": 3.503503503503503e-05, "loss": 1.6547, "step": 825 }, { "epoch": 0.28368794326241137, "grad_norm": 0.27079567313194275, "learning_rate": 3.0030030030030033e-05, "loss": 1.1372, "step": 850 }, { "epoch": 0.2920317062995411, "grad_norm": 0.2874036729335785, "learning_rate": 2.502502502502503e-05, "loss": 1.7384, "step": 875 }, { "epoch": 0.30037546933667086, "grad_norm": 0.38741400837898254, "learning_rate": 2.0020020020020023e-05, "loss": 1.0731, "step": 900 }, { "epoch": 0.3087192323738006, "grad_norm": 0.3522486090660095, "learning_rate": 1.5015015015015016e-05, "loss": 1.6537, "step": 925 }, { "epoch": 0.3170629954109303, "grad_norm": 0.41225603222846985, "learning_rate": 1.0010010010010011e-05, "loss": 1.0646, "step": 950 }, { "epoch": 0.32540675844806005, "grad_norm": 0.33286789059638977, "learning_rate": 5.005005005005006e-06, "loss": 1.6279, "step": 975 }, { "epoch": 0.3337505214851898, "grad_norm": 0.405441552400589, "learning_rate": 0.0, "loss": 1.0594, "step": 1000 }, { "epoch": 0.3337505214851898, "eval_loss": 1.5415141582489014, "eval_runtime": 511.169, "eval_samples_per_second": 2.934, "eval_steps_per_second": 0.368, "step": 1000 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.341797200653312e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }