{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 2.5629515647888184, "learning_rate": 0.0001, "loss": 2.2932, "step": 10 }, { "epoch": 0.04, "grad_norm": 1.5660052299499512, "learning_rate": 0.0001, "loss": 1.1775, "step": 20 }, { "epoch": 0.06, "grad_norm": 1.7450891733169556, "learning_rate": 0.0001, "loss": 1.048, "step": 30 }, { "epoch": 0.08, "grad_norm": 1.785325050354004, "learning_rate": 0.0001, "loss": 1.0855, "step": 40 }, { "epoch": 0.1, "grad_norm": 2.3252322673797607, "learning_rate": 0.0001, "loss": 0.9265, "step": 50 }, { "epoch": 0.12, "grad_norm": 1.5729879140853882, "learning_rate": 0.0001, "loss": 0.9402, "step": 60 }, { "epoch": 0.14, "grad_norm": 1.5131912231445312, "learning_rate": 0.0001, "loss": 0.9867, "step": 70 }, { "epoch": 0.16, "grad_norm": 0.9407297372817993, "learning_rate": 0.0001, "loss": 0.8494, "step": 80 }, { "epoch": 0.18, "grad_norm": 1.085139513015747, "learning_rate": 0.0001, "loss": 0.8612, "step": 90 }, { "epoch": 0.2, "grad_norm": 1.0694725513458252, "learning_rate": 0.0001, "loss": 1.0641, "step": 100 }, { "epoch": 0.2, "eval_loss": 0.8292067050933838, "eval_runtime": 53.3995, "eval_samples_per_second": 9.363, "eval_steps_per_second": 4.682, "step": 100 }, { "epoch": 0.22, "grad_norm": 1.7079308032989502, "learning_rate": 0.0001, "loss": 0.8869, "step": 110 }, { "epoch": 0.24, "grad_norm": 1.5450018644332886, "learning_rate": 0.0001, "loss": 0.9056, "step": 120 }, { "epoch": 0.26, "grad_norm": 1.4224787950515747, "learning_rate": 0.0001, "loss": 0.8426, "step": 130 }, { "epoch": 0.28, "grad_norm": 2.317502737045288, "learning_rate": 0.0001, "loss": 1.1121, "step": 140 }, { "epoch": 0.3, "grad_norm": 1.5484294891357422, "learning_rate": 0.0001, "loss": 0.7657, "step": 150 }, { "epoch": 0.32, "grad_norm": 1.1524784564971924, "learning_rate": 0.0001, "loss": 0.9395, "step": 160 }, { "epoch": 0.34, "grad_norm": 1.3373467922210693, "learning_rate": 0.0001, "loss": 0.8741, "step": 170 }, { "epoch": 0.36, "grad_norm": 0.813785195350647, "learning_rate": 0.0001, "loss": 0.807, "step": 180 }, { "epoch": 0.38, "grad_norm": 1.3544365167617798, "learning_rate": 0.0001, "loss": 0.8619, "step": 190 }, { "epoch": 0.4, "grad_norm": 1.4264659881591797, "learning_rate": 0.0001, "loss": 0.7061, "step": 200 }, { "epoch": 0.4, "eval_loss": 0.7746801972389221, "eval_runtime": 53.3696, "eval_samples_per_second": 9.369, "eval_steps_per_second": 4.684, "step": 200 }, { "epoch": 0.42, "grad_norm": 1.9158445596694946, "learning_rate": 0.0001, "loss": 0.8594, "step": 210 }, { "epoch": 0.44, "grad_norm": 0.9574500322341919, "learning_rate": 0.0001, "loss": 0.8221, "step": 220 }, { "epoch": 0.46, "grad_norm": 1.2509143352508545, "learning_rate": 0.0001, "loss": 0.7963, "step": 230 }, { "epoch": 0.48, "grad_norm": 1.714758038520813, "learning_rate": 0.0001, "loss": 0.7244, "step": 240 }, { "epoch": 0.5, "grad_norm": 1.070537805557251, "learning_rate": 0.0001, "loss": 0.784, "step": 250 }, { "epoch": 0.52, "grad_norm": 3.861132860183716, "learning_rate": 0.0001, "loss": 0.8176, "step": 260 }, { "epoch": 0.54, "grad_norm": 1.2554326057434082, "learning_rate": 0.0001, "loss": 0.8537, "step": 270 }, { "epoch": 0.56, "grad_norm": 0.9133288264274597, "learning_rate": 0.0001, "loss": 0.7829, "step": 280 }, { "epoch": 0.58, "grad_norm": 1.508233666419983, "learning_rate": 0.0001, "loss": 0.9645, "step": 290 }, { "epoch": 0.6, "grad_norm": 1.0573885440826416, "learning_rate": 0.0001, "loss": 0.7912, "step": 300 }, { "epoch": 0.6, "eval_loss": 0.7474251985549927, "eval_runtime": 53.3631, "eval_samples_per_second": 9.37, "eval_steps_per_second": 4.685, "step": 300 }, { "epoch": 0.62, "grad_norm": 1.4665676355361938, "learning_rate": 0.0001, "loss": 0.8004, "step": 310 }, { "epoch": 0.64, "grad_norm": 2.0176584720611572, "learning_rate": 0.0001, "loss": 0.8841, "step": 320 }, { "epoch": 0.66, "grad_norm": 2.208796501159668, "learning_rate": 0.0001, "loss": 0.8888, "step": 330 }, { "epoch": 0.68, "grad_norm": 1.3332535028457642, "learning_rate": 0.0001, "loss": 0.7775, "step": 340 }, { "epoch": 0.7, "grad_norm": 2.814683675765991, "learning_rate": 0.0001, "loss": 0.7112, "step": 350 }, { "epoch": 0.72, "grad_norm": 1.540370225906372, "learning_rate": 0.0001, "loss": 0.8344, "step": 360 }, { "epoch": 0.74, "grad_norm": 1.5666872262954712, "learning_rate": 0.0001, "loss": 0.7647, "step": 370 }, { "epoch": 0.76, "grad_norm": 1.2998279333114624, "learning_rate": 0.0001, "loss": 0.8868, "step": 380 }, { "epoch": 0.78, "grad_norm": 1.2700724601745605, "learning_rate": 0.0001, "loss": 0.8204, "step": 390 }, { "epoch": 0.8, "grad_norm": 1.0913368463516235, "learning_rate": 0.0001, "loss": 0.7203, "step": 400 }, { "epoch": 0.8, "eval_loss": 0.7208259105682373, "eval_runtime": 53.3297, "eval_samples_per_second": 9.376, "eval_steps_per_second": 4.688, "step": 400 }, { "epoch": 0.82, "grad_norm": 1.0129300355911255, "learning_rate": 0.0001, "loss": 0.6732, "step": 410 }, { "epoch": 0.84, "grad_norm": 1.3054537773132324, "learning_rate": 0.0001, "loss": 0.7467, "step": 420 }, { "epoch": 0.86, "grad_norm": 1.4525396823883057, "learning_rate": 0.0001, "loss": 0.8085, "step": 430 }, { "epoch": 0.88, "grad_norm": 1.1094130277633667, "learning_rate": 0.0001, "loss": 0.7971, "step": 440 }, { "epoch": 0.9, "grad_norm": 1.422092318534851, "learning_rate": 0.0001, "loss": 0.7452, "step": 450 }, { "epoch": 0.92, "grad_norm": 1.9026339054107666, "learning_rate": 0.0001, "loss": 0.9912, "step": 460 }, { "epoch": 0.94, "grad_norm": 1.0568705797195435, "learning_rate": 0.0001, "loss": 0.7249, "step": 470 }, { "epoch": 0.96, "grad_norm": 1.3295648097991943, "learning_rate": 0.0001, "loss": 0.6573, "step": 480 }, { "epoch": 0.98, "grad_norm": 1.215010404586792, "learning_rate": 0.0001, "loss": 0.8908, "step": 490 }, { "epoch": 1.0, "grad_norm": 0.9312228560447693, "learning_rate": 0.0001, "loss": 0.714, "step": 500 }, { "epoch": 1.0, "eval_loss": 0.7121768593788147, "eval_runtime": 53.3521, "eval_samples_per_second": 9.372, "eval_steps_per_second": 4.686, "step": 500 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.294978471477248e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }