{ "best_metric": 0.5989376306533813, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 3.0638297872340425, "eval_steps": 25, "global_step": 36, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0851063829787234, "grad_norm": 8.839696884155273, "learning_rate": 5e-05, "loss": 2.6857, "step": 1 }, { "epoch": 0.0851063829787234, "eval_loss": 3.0855712890625, "eval_runtime": 0.5388, "eval_samples_per_second": 146.629, "eval_steps_per_second": 18.561, "step": 1 }, { "epoch": 0.1702127659574468, "grad_norm": 10.66001033782959, "learning_rate": 0.0001, "loss": 2.8787, "step": 2 }, { "epoch": 0.2553191489361702, "grad_norm": 7.742804527282715, "learning_rate": 9.978670881475172e-05, "loss": 2.4614, "step": 3 }, { "epoch": 0.3404255319148936, "grad_norm": 5.67116117477417, "learning_rate": 9.91486549841951e-05, "loss": 2.1595, "step": 4 }, { "epoch": 0.425531914893617, "grad_norm": 4.944119453430176, "learning_rate": 9.809128215864097e-05, "loss": 1.6716, "step": 5 }, { "epoch": 0.5106382978723404, "grad_norm": 4.608202934265137, "learning_rate": 9.662361147021779e-05, "loss": 1.3835, "step": 6 }, { "epoch": 0.5957446808510638, "grad_norm": 3.0601212978363037, "learning_rate": 9.475816456775313e-05, "loss": 1.0948, "step": 7 }, { "epoch": 0.6808510638297872, "grad_norm": 2.5471537113189697, "learning_rate": 9.251085678648072e-05, "loss": 0.8284, "step": 8 }, { "epoch": 0.7659574468085106, "grad_norm": 1.931269884109497, "learning_rate": 8.9900861364012e-05, "loss": 0.8407, "step": 9 }, { "epoch": 0.851063829787234, "grad_norm": 2.042515516281128, "learning_rate": 8.695044586103296e-05, "loss": 0.9586, "step": 10 }, { "epoch": 0.9361702127659575, "grad_norm": 2.114428758621216, "learning_rate": 8.368478218232787e-05, "loss": 0.6138, "step": 11 }, { "epoch": 1.0212765957446808, "grad_norm": 2.6110360622406006, "learning_rate": 8.013173181896283e-05, "loss": 0.8813, "step": 12 }, { "epoch": 1.1063829787234043, "grad_norm": 1.4234488010406494, "learning_rate": 7.63216081438678e-05, "loss": 0.7682, "step": 13 }, { "epoch": 1.1914893617021276, "grad_norm": 1.7401140928268433, "learning_rate": 7.228691778882693e-05, "loss": 0.6967, "step": 14 }, { "epoch": 1.2765957446808511, "grad_norm": 1.493860125541687, "learning_rate": 6.806208330935766e-05, "loss": 0.5233, "step": 15 }, { "epoch": 1.3617021276595744, "grad_norm": 1.8245048522949219, "learning_rate": 6.368314950360415e-05, "loss": 0.7296, "step": 16 }, { "epoch": 1.4468085106382977, "grad_norm": 1.2929282188415527, "learning_rate": 5.918747589082853e-05, "loss": 0.5208, "step": 17 }, { "epoch": 1.5319148936170213, "grad_norm": 1.2650213241577148, "learning_rate": 5.4613417973165106e-05, "loss": 0.529, "step": 18 }, { "epoch": 1.6170212765957448, "grad_norm": 1.466747760772705, "learning_rate": 5e-05, "loss": 0.5967, "step": 19 }, { "epoch": 1.702127659574468, "grad_norm": 1.1956787109375, "learning_rate": 4.5386582026834906e-05, "loss": 0.4666, "step": 20 }, { "epoch": 1.7872340425531914, "grad_norm": 1.869742751121521, "learning_rate": 4.0812524109171476e-05, "loss": 0.7583, "step": 21 }, { "epoch": 1.872340425531915, "grad_norm": 1.5331084728240967, "learning_rate": 3.631685049639586e-05, "loss": 0.5254, "step": 22 }, { "epoch": 1.9574468085106385, "grad_norm": 2.150675058364868, "learning_rate": 3.1937916690642356e-05, "loss": 0.544, "step": 23 }, { "epoch": 2.0425531914893615, "grad_norm": 1.9073337316513062, "learning_rate": 2.771308221117309e-05, "loss": 0.716, "step": 24 }, { "epoch": 2.127659574468085, "grad_norm": 1.7541600465774536, "learning_rate": 2.3678391856132204e-05, "loss": 0.6128, "step": 25 }, { "epoch": 2.127659574468085, "eval_loss": 0.5989376306533813, "eval_runtime": 0.536, "eval_samples_per_second": 147.401, "eval_steps_per_second": 18.658, "step": 25 }, { "epoch": 2.2127659574468086, "grad_norm": 1.5322821140289307, "learning_rate": 1.9868268181037185e-05, "loss": 0.4246, "step": 26 }, { "epoch": 2.297872340425532, "grad_norm": 1.5346146821975708, "learning_rate": 1.631521781767214e-05, "loss": 0.526, "step": 27 }, { "epoch": 2.382978723404255, "grad_norm": 1.4425619840621948, "learning_rate": 1.3049554138967051e-05, "loss": 0.4616, "step": 28 }, { "epoch": 2.4680851063829787, "grad_norm": 1.0223990678787231, "learning_rate": 1.0099138635988026e-05, "loss": 0.4379, "step": 29 }, { "epoch": 2.5531914893617023, "grad_norm": 1.4977924823760986, "learning_rate": 7.489143213519301e-06, "loss": 0.5751, "step": 30 }, { "epoch": 2.6382978723404253, "grad_norm": 1.4635932445526123, "learning_rate": 5.241835432246889e-06, "loss": 0.5067, "step": 31 }, { "epoch": 2.723404255319149, "grad_norm": 1.7609386444091797, "learning_rate": 3.376388529782215e-06, "loss": 0.4576, "step": 32 }, { "epoch": 2.8085106382978724, "grad_norm": 1.5040547847747803, "learning_rate": 1.908717841359048e-06, "loss": 0.5874, "step": 33 }, { "epoch": 2.8936170212765955, "grad_norm": 1.596747875213623, "learning_rate": 8.513450158049108e-07, "loss": 0.4815, "step": 34 }, { "epoch": 2.978723404255319, "grad_norm": 2.097698450088501, "learning_rate": 2.1329118524827662e-07, "loss": 0.4825, "step": 35 }, { "epoch": 3.0638297872340425, "grad_norm": 2.1862497329711914, "learning_rate": 0.0, "loss": 0.6099, "step": 36 } ], "logging_steps": 1, "max_steps": 36, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.126166571037491e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }