{ "best_metric": 1.1700222492218018, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.6872852233676976, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013745704467353952, "grad_norm": 8.521697044372559, "learning_rate": 5e-05, "loss": 1.7568, "step": 1 }, { "epoch": 0.013745704467353952, "eval_loss": 1.69869065284729, "eval_runtime": 23.7639, "eval_samples_per_second": 20.62, "eval_steps_per_second": 2.609, "step": 1 }, { "epoch": 0.027491408934707903, "grad_norm": 8.721661567687988, "learning_rate": 0.0001, "loss": 1.5639, "step": 2 }, { "epoch": 0.041237113402061855, "grad_norm": 9.311981201171875, "learning_rate": 9.989294616193017e-05, "loss": 1.6337, "step": 3 }, { "epoch": 0.054982817869415807, "grad_norm": 7.9015936851501465, "learning_rate": 9.957224306869053e-05, "loss": 1.4346, "step": 4 }, { "epoch": 0.06872852233676977, "grad_norm": 7.836700439453125, "learning_rate": 9.903926402016153e-05, "loss": 1.4628, "step": 5 }, { "epoch": 0.08247422680412371, "grad_norm": 5.524428844451904, "learning_rate": 9.829629131445342e-05, "loss": 1.3585, "step": 6 }, { "epoch": 0.09621993127147767, "grad_norm": 5.041913032531738, "learning_rate": 9.73465064747553e-05, "loss": 1.3979, "step": 7 }, { "epoch": 0.10996563573883161, "grad_norm": 5.0521440505981445, "learning_rate": 9.619397662556435e-05, "loss": 1.4176, "step": 8 }, { "epoch": 0.12371134020618557, "grad_norm": 5.009284019470215, "learning_rate": 9.484363707663442e-05, "loss": 1.3873, "step": 9 }, { "epoch": 0.13745704467353953, "grad_norm": 5.2991719245910645, "learning_rate": 9.330127018922194e-05, "loss": 1.3692, "step": 10 }, { "epoch": 0.15120274914089346, "grad_norm": 6.43788480758667, "learning_rate": 9.157348061512727e-05, "loss": 1.421, "step": 11 }, { "epoch": 0.16494845360824742, "grad_norm": 6.886988639831543, "learning_rate": 8.966766701456177e-05, "loss": 1.3988, "step": 12 }, { "epoch": 0.17869415807560138, "grad_norm": 8.61909294128418, "learning_rate": 8.759199037394887e-05, "loss": 1.5297, "step": 13 }, { "epoch": 0.19243986254295534, "grad_norm": 9.367788314819336, "learning_rate": 8.535533905932738e-05, "loss": 1.3699, "step": 14 }, { "epoch": 0.20618556701030927, "grad_norm": 6.352292060852051, "learning_rate": 8.296729075500344e-05, "loss": 1.1778, "step": 15 }, { "epoch": 0.21993127147766323, "grad_norm": 4.758786201477051, "learning_rate": 8.043807145043604e-05, "loss": 1.161, "step": 16 }, { "epoch": 0.23367697594501718, "grad_norm": 4.461093425750732, "learning_rate": 7.777851165098012e-05, "loss": 1.1936, "step": 17 }, { "epoch": 0.24742268041237114, "grad_norm": 4.39322566986084, "learning_rate": 7.500000000000001e-05, "loss": 1.1752, "step": 18 }, { "epoch": 0.2611683848797251, "grad_norm": 3.388389825820923, "learning_rate": 7.211443451095007e-05, "loss": 1.1414, "step": 19 }, { "epoch": 0.27491408934707906, "grad_norm": 3.6625843048095703, "learning_rate": 6.91341716182545e-05, "loss": 1.2274, "step": 20 }, { "epoch": 0.28865979381443296, "grad_norm": 3.5239598751068115, "learning_rate": 6.607197326515808e-05, "loss": 1.2471, "step": 21 }, { "epoch": 0.3024054982817869, "grad_norm": 4.69722318649292, "learning_rate": 6.294095225512603e-05, "loss": 1.2665, "step": 22 }, { "epoch": 0.3161512027491409, "grad_norm": 3.7608370780944824, "learning_rate": 5.9754516100806423e-05, "loss": 1.2757, "step": 23 }, { "epoch": 0.32989690721649484, "grad_norm": 5.575406551361084, "learning_rate": 5.6526309611002594e-05, "loss": 1.3511, "step": 24 }, { "epoch": 0.3436426116838488, "grad_norm": 8.046637535095215, "learning_rate": 5.327015646150716e-05, "loss": 1.3011, "step": 25 }, { "epoch": 0.3436426116838488, "eval_loss": 1.222367763519287, "eval_runtime": 23.7465, "eval_samples_per_second": 20.635, "eval_steps_per_second": 2.611, "step": 25 }, { "epoch": 0.35738831615120276, "grad_norm": 8.447471618652344, "learning_rate": 5e-05, "loss": 1.3699, "step": 26 }, { "epoch": 0.3711340206185567, "grad_norm": 6.330043315887451, "learning_rate": 4.6729843538492847e-05, "loss": 1.2511, "step": 27 }, { "epoch": 0.3848797250859107, "grad_norm": 4.982043266296387, "learning_rate": 4.347369038899744e-05, "loss": 1.1805, "step": 28 }, { "epoch": 0.39862542955326463, "grad_norm": 5.331606864929199, "learning_rate": 4.0245483899193595e-05, "loss": 1.1521, "step": 29 }, { "epoch": 0.41237113402061853, "grad_norm": 3.5054099559783936, "learning_rate": 3.705904774487396e-05, "loss": 1.1404, "step": 30 }, { "epoch": 0.4261168384879725, "grad_norm": 4.3697614669799805, "learning_rate": 3.392802673484193e-05, "loss": 1.1858, "step": 31 }, { "epoch": 0.43986254295532645, "grad_norm": 3.5054900646209717, "learning_rate": 3.086582838174551e-05, "loss": 1.1712, "step": 32 }, { "epoch": 0.4536082474226804, "grad_norm": 2.989226818084717, "learning_rate": 2.7885565489049946e-05, "loss": 1.0911, "step": 33 }, { "epoch": 0.46735395189003437, "grad_norm": 3.1136281490325928, "learning_rate": 2.500000000000001e-05, "loss": 1.1126, "step": 34 }, { "epoch": 0.48109965635738833, "grad_norm": 3.8040785789489746, "learning_rate": 2.2221488349019903e-05, "loss": 1.2245, "step": 35 }, { "epoch": 0.4948453608247423, "grad_norm": 4.323892116546631, "learning_rate": 1.9561928549563968e-05, "loss": 1.111, "step": 36 }, { "epoch": 0.5085910652920962, "grad_norm": 4.997296333312988, "learning_rate": 1.703270924499656e-05, "loss": 1.1923, "step": 37 }, { "epoch": 0.5223367697594502, "grad_norm": 5.276499271392822, "learning_rate": 1.4644660940672627e-05, "loss": 1.4023, "step": 38 }, { "epoch": 0.5360824742268041, "grad_norm": 3.8192012310028076, "learning_rate": 1.2408009626051137e-05, "loss": 1.2573, "step": 39 }, { "epoch": 0.5498281786941581, "grad_norm": 3.393066883087158, "learning_rate": 1.0332332985438248e-05, "loss": 1.2258, "step": 40 }, { "epoch": 0.563573883161512, "grad_norm": 2.6232097148895264, "learning_rate": 8.426519384872733e-06, "loss": 1.0416, "step": 41 }, { "epoch": 0.5773195876288659, "grad_norm": 2.9142274856567383, "learning_rate": 6.698729810778065e-06, "loss": 0.9755, "step": 42 }, { "epoch": 0.5910652920962199, "grad_norm": 3.119406223297119, "learning_rate": 5.156362923365588e-06, "loss": 1.1864, "step": 43 }, { "epoch": 0.6048109965635738, "grad_norm": 2.5969648361206055, "learning_rate": 3.8060233744356633e-06, "loss": 1.132, "step": 44 }, { "epoch": 0.6185567010309279, "grad_norm": 2.6044764518737793, "learning_rate": 2.653493525244721e-06, "loss": 1.1857, "step": 45 }, { "epoch": 0.6323024054982818, "grad_norm": 2.9976484775543213, "learning_rate": 1.70370868554659e-06, "loss": 1.1866, "step": 46 }, { "epoch": 0.6460481099656358, "grad_norm": 3.2721643447875977, "learning_rate": 9.607359798384785e-07, "loss": 1.1646, "step": 47 }, { "epoch": 0.6597938144329897, "grad_norm": 4.761203289031982, "learning_rate": 4.277569313094809e-07, "loss": 1.2401, "step": 48 }, { "epoch": 0.6735395189003437, "grad_norm": 4.534084796905518, "learning_rate": 1.0705383806982606e-07, "loss": 1.129, "step": 49 }, { "epoch": 0.6872852233676976, "grad_norm": 8.279170036315918, "learning_rate": 0.0, "loss": 1.2175, "step": 50 }, { "epoch": 0.6872852233676976, "eval_loss": 1.1700222492218018, "eval_runtime": 23.7368, "eval_samples_per_second": 20.643, "eval_steps_per_second": 2.612, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.975488880578396e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }