{ "best_metric": NaN, "best_model_checkpoint": "miner_id_24/checkpoint-100", "epoch": 0.07216308858019123, "eval_steps": 100, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00036081544290095615, "eval_loss": NaN, "eval_runtime": 196.169, "eval_samples_per_second": 23.796, "eval_steps_per_second": 5.949, "step": 1 }, { "epoch": 0.0036081544290095615, "grad_norm": 0.0, "learning_rate": 0.0002, "loss": 0.0, "step": 10 }, { "epoch": 0.007216308858019123, "grad_norm": 0.0, "learning_rate": 0.0001998582695676762, "loss": 0.0, "step": 20 }, { "epoch": 0.010824463287028685, "grad_norm": 0.0, "learning_rate": 0.00019943348002101371, "loss": 0.0, "step": 30 }, { "epoch": 0.014432617716038246, "grad_norm": 0.0, "learning_rate": 0.00019872683547213446, "loss": 0.0, "step": 40 }, { "epoch": 0.018040772145047807, "grad_norm": 0.0, "learning_rate": 0.00019774033898178667, "loss": 0.0, "step": 50 }, { "epoch": 0.02164892657405737, "grad_norm": 0.0, "learning_rate": 0.0001964767868814516, "loss": 0.0, "step": 60 }, { "epoch": 0.025257081003066933, "grad_norm": 0.0, "learning_rate": 0.00019493976084683813, "loss": 0.0, "step": 70 }, { "epoch": 0.028865235432076492, "grad_norm": 0.0, "learning_rate": 0.00019313361774523385, "loss": 0.0, "step": 80 }, { "epoch": 0.032473389861086055, "grad_norm": 0.0, "learning_rate": 0.00019106347728549135, "loss": 0.0, "step": 90 }, { "epoch": 0.036081544290095614, "grad_norm": 0.0, "learning_rate": 0.00018873520750565718, "loss": 0.0, "step": 100 }, { "epoch": 0.036081544290095614, "eval_loss": NaN, "eval_runtime": 197.2789, "eval_samples_per_second": 23.662, "eval_steps_per_second": 5.915, "step": 100 }, { "epoch": 0.03968969871910518, "grad_norm": 0.0, "learning_rate": 0.0001861554081393806, "loss": 0.0, "step": 110 }, { "epoch": 0.04329785314811474, "grad_norm": 0.0, "learning_rate": 0.0001833313919082515, "loss": 0.0, "step": 120 }, { "epoch": 0.0469060075771243, "grad_norm": 0.0, "learning_rate": 0.00018027116379309638, "loss": 0.0, "step": 130 }, { "epoch": 0.050514162006133866, "grad_norm": 0.0, "learning_rate": 0.00017698339834299061, "loss": 0.0, "step": 140 }, { "epoch": 0.054122316435143425, "grad_norm": 0.0, "learning_rate": 0.00017347741508630672, "loss": 0.0, "step": 150 }, { "epoch": 0.057730470864152984, "grad_norm": 0.0, "learning_rate": 0.0001697631521134985, "loss": 0.0, "step": 160 }, { "epoch": 0.06133862529316255, "grad_norm": 0.0, "learning_rate": 0.00016585113790650388, "loss": 0.0, "step": 170 }, { "epoch": 0.06494677972217211, "grad_norm": 0.0, "learning_rate": 0.0001617524614946192, "loss": 0.0, "step": 180 }, { "epoch": 0.06855493415118168, "grad_norm": 0.0, "learning_rate": 0.0001574787410214407, "loss": 0.0, "step": 190 }, { "epoch": 0.07216308858019123, "grad_norm": 0.0, "learning_rate": 0.00015304209081197425, "loss": 0.0, "step": 200 }, { "epoch": 0.07216308858019123, "eval_loss": NaN, "eval_runtime": 197.0795, "eval_samples_per_second": 23.686, "eval_steps_per_second": 5.921, "step": 200 } ], "logging_steps": 10, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.32707468181504e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }