{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9969945902624725, "eval_steps": 16, "global_step": 311, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05129232618713685, "grad_norm": 0.5048500299453735, "learning_rate": 0.0015, "loss": 1.1504, "step": 16 }, { "epoch": 0.1025846523742737, "grad_norm": 0.389893114566803, "learning_rate": 0.0011094003924504584, "loss": 3.0392, "step": 32 }, { "epoch": 0.15387697856141053, "grad_norm": 0.2868100106716156, "learning_rate": 0.0007427813527082075, "loss": 0.8441, "step": 48 }, { "epoch": 0.2051693047485474, "grad_norm": 0.2573494613170624, "learning_rate": 0.0005962847939999439, "loss": 0.783, "step": 64 }, { "epoch": 0.25646163093568425, "grad_norm": 0.31569787859916687, "learning_rate": 0.000512147519731584, "loss": 0.7637, "step": 80 }, { "epoch": 0.30775395712282105, "grad_norm": 0.2564464211463928, "learning_rate": 0.0004558423058385518, "loss": 0.719, "step": 96 }, { "epoch": 0.3590462833099579, "grad_norm": 0.32603177428245544, "learning_rate": 0.0004147806778921701, "loss": 0.7037, "step": 112 }, { "epoch": 0.4103386094970948, "grad_norm": 0.2662847340106964, "learning_rate": 0.0003831305140884606, "loss": 0.6794, "step": 128 }, { "epoch": 0.46163093568423164, "grad_norm": 0.3353310525417328, "learning_rate": 0.00035777087639996636, "loss": 0.6737, "step": 144 }, { "epoch": 0.5129232618713685, "grad_norm": 0.26287367939949036, "learning_rate": 0.00033686076842660763, "loss": 0.6561, "step": 160 }, { "epoch": 0.5642155880585054, "grad_norm": 0.3288561999797821, "learning_rate": 0.00031923475378704884, "loss": 0.6359, "step": 176 }, { "epoch": 0.6155079142456421, "grad_norm": 0.35450485348701477, "learning_rate": 0.0003041143685078822, "loss": 0.612, "step": 192 }, { "epoch": 0.666800240432779, "grad_norm": 0.29516109824180603, "learning_rate": 0.00029095718698132317, "loss": 0.6057, "step": 208 }, { "epoch": 0.7180925666199158, "grad_norm": 0.38677069544792175, "learning_rate": 0.00027937211830783126, "loss": 0.5943, "step": 224 }, { "epoch": 0.7693848928070527, "grad_norm": 0.3125530481338501, "learning_rate": 0.000269069117598525, "loss": 0.5635, "step": 240 }, { "epoch": 0.8206772189941896, "grad_norm": 0.3307824730873108, "learning_rate": 0.00025982792098465233, "loss": 0.5629, "step": 256 }, { "epoch": 0.8719695451813264, "grad_norm": 0.31122493743896484, "learning_rate": 0.0002514778453847726, "loss": 0.5582, "step": 272 }, { "epoch": 0.9232618713684633, "grad_norm": 0.3087589144706726, "learning_rate": 0.00024388430433987693, "loss": 0.5364, "step": 288 }, { "epoch": 0.9745541975556001, "grad_norm": 0.3315638601779938, "learning_rate": 0.00023693955110363693, "loss": 0.5412, "step": 304 }, { "epoch": 0.9969945902624725, "step": 311, "total_flos": 3.158660236722569e+18, "train_loss": 0.7963475859050199, "train_runtime": 2372.2052, "train_samples_per_second": 16.831, "train_steps_per_second": 0.131 } ], "logging_steps": 16, "max_steps": 311, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 16, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.158660236722569e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }