{ "best_metric": 4.000638484954834, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.02107532032091965, "eval_steps": 25, "global_step": 66, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00031932303516544927, "grad_norm": 5.508306503295898, "learning_rate": 5e-05, "loss": 3.4843, "step": 1 }, { "epoch": 0.00031932303516544927, "eval_loss": 6.9587225914001465, "eval_runtime": 14.4472, "eval_samples_per_second": 3.461, "eval_steps_per_second": 3.461, "step": 1 }, { "epoch": 0.0006386460703308985, "grad_norm": 6.429923057556152, "learning_rate": 0.0001, "loss": 4.189, "step": 2 }, { "epoch": 0.0009579691054963477, "grad_norm": 5.022256374359131, "learning_rate": 9.994579552923277e-05, "loss": 3.7926, "step": 3 }, { "epoch": 0.001277292140661797, "grad_norm": 3.6086912155151367, "learning_rate": 9.978331270024886e-05, "loss": 3.3853, "step": 4 }, { "epoch": 0.0015966151758272462, "grad_norm": 3.7427756786346436, "learning_rate": 9.951294294841516e-05, "loss": 3.2209, "step": 5 }, { "epoch": 0.0019159382109926954, "grad_norm": 3.153134822845459, "learning_rate": 9.913533761814537e-05, "loss": 3.0261, "step": 6 }, { "epoch": 0.0022352612461581448, "grad_norm": 3.2068698406219482, "learning_rate": 9.865140639375449e-05, "loss": 3.2692, "step": 7 }, { "epoch": 0.002554584281323594, "grad_norm": 3.194871664047241, "learning_rate": 9.80623151079494e-05, "loss": 3.5115, "step": 8 }, { "epoch": 0.002873907316489043, "grad_norm": 2.743932008743286, "learning_rate": 9.736948293323593e-05, "loss": 3.4101, "step": 9 }, { "epoch": 0.0031932303516544924, "grad_norm": 2.6319997310638428, "learning_rate": 9.657457896300791e-05, "loss": 3.1293, "step": 10 }, { "epoch": 0.003512553386819942, "grad_norm": 3.2812702655792236, "learning_rate": 9.567951819055496e-05, "loss": 3.353, "step": 11 }, { "epoch": 0.0038318764219853908, "grad_norm": 3.206648111343384, "learning_rate": 9.468645689567598e-05, "loss": 3.7939, "step": 12 }, { "epoch": 0.00415119945715084, "grad_norm": 3.4260575771331787, "learning_rate": 9.359778745001225e-05, "loss": 3.4403, "step": 13 }, { "epoch": 0.0044705224923162895, "grad_norm": 3.34879732131958, "learning_rate": 9.241613255361455e-05, "loss": 3.7215, "step": 14 }, { "epoch": 0.004789845527481739, "grad_norm": 3.991157054901123, "learning_rate": 9.114433891662902e-05, "loss": 3.7424, "step": 15 }, { "epoch": 0.005109168562647188, "grad_norm": 11.184669494628906, "learning_rate": 8.978547040132317e-05, "loss": 4.1454, "step": 16 }, { "epoch": 0.005428491597812638, "grad_norm": 7.331747531890869, "learning_rate": 8.834280064097317e-05, "loss": 3.4989, "step": 17 }, { "epoch": 0.005747814632978086, "grad_norm": 7.6846513748168945, "learning_rate": 8.681980515339464e-05, "loss": 3.1464, "step": 18 }, { "epoch": 0.0060671376681435355, "grad_norm": 5.366702079772949, "learning_rate": 8.522015296811584e-05, "loss": 3.1253, "step": 19 }, { "epoch": 0.006386460703308985, "grad_norm": 4.334587097167969, "learning_rate": 8.354769778736406e-05, "loss": 3.2567, "step": 20 }, { "epoch": 0.006705783738474434, "grad_norm": 4.938718318939209, "learning_rate": 8.180646870215952e-05, "loss": 4.0654, "step": 21 }, { "epoch": 0.007025106773639884, "grad_norm": 4.045793533325195, "learning_rate": 8.000066048588211e-05, "loss": 3.8355, "step": 22 }, { "epoch": 0.007344429808805333, "grad_norm": 10.687291145324707, "learning_rate": 7.813462348869497e-05, "loss": 2.3498, "step": 23 }, { "epoch": 0.0076637528439707815, "grad_norm": 5.829076766967773, "learning_rate": 7.62128531571699e-05, "loss": 1.7846, "step": 24 }, { "epoch": 0.007983075879136232, "grad_norm": 4.1889424324035645, "learning_rate": 7.42399792043627e-05, "loss": 1.9843, "step": 25 }, { "epoch": 0.007983075879136232, "eval_loss": 3.5693531036376953, "eval_runtime": 14.6152, "eval_samples_per_second": 3.421, "eval_steps_per_second": 3.421, "step": 25 }, { "epoch": 0.00830239891430168, "grad_norm": 5.976337909698486, "learning_rate": 7.222075445642904e-05, "loss": 2.8225, "step": 26 }, { "epoch": 0.00862172194946713, "grad_norm": 7.094808101654053, "learning_rate": 7.01600434026499e-05, "loss": 2.3825, "step": 27 }, { "epoch": 0.008941044984632579, "grad_norm": 13.234283447265625, "learning_rate": 6.80628104764508e-05, "loss": 3.6424, "step": 28 }, { "epoch": 0.009260368019798028, "grad_norm": 13.326498985290527, "learning_rate": 6.593410809564689e-05, "loss": 4.4563, "step": 29 }, { "epoch": 0.009579691054963478, "grad_norm": 9.808164596557617, "learning_rate": 6.377906449072578e-05, "loss": 4.4519, "step": 30 }, { "epoch": 0.009899014090128926, "grad_norm": 5.980898857116699, "learning_rate": 6.160287135049127e-05, "loss": 4.3998, "step": 31 }, { "epoch": 0.010218337125294377, "grad_norm": 4.547826766967773, "learning_rate": 5.941077131483025e-05, "loss": 4.0411, "step": 32 }, { "epoch": 0.010537660160459825, "grad_norm": 4.85554313659668, "learning_rate": 5.720804534473382e-05, "loss": 4.4836, "step": 33 }, { "epoch": 0.010856983195625275, "grad_norm": 4.402012348175049, "learning_rate": 5.500000000000001e-05, "loss": 4.2954, "step": 34 }, { "epoch": 0.011176306230790724, "grad_norm": 5.091377258300781, "learning_rate": 5.27919546552662e-05, "loss": 4.311, "step": 35 }, { "epoch": 0.011495629265956172, "grad_norm": 5.752288341522217, "learning_rate": 5.058922868516978e-05, "loss": 4.2278, "step": 36 }, { "epoch": 0.011814952301121623, "grad_norm": 6.53019905090332, "learning_rate": 4.839712864950873e-05, "loss": 3.4952, "step": 37 }, { "epoch": 0.012134275336287071, "grad_norm": 5.096629619598389, "learning_rate": 4.6220935509274235e-05, "loss": 3.7458, "step": 38 }, { "epoch": 0.012453598371452521, "grad_norm": 5.266465187072754, "learning_rate": 4.406589190435313e-05, "loss": 3.6961, "step": 39 }, { "epoch": 0.01277292140661797, "grad_norm": 5.592081069946289, "learning_rate": 4.19371895235492e-05, "loss": 3.4962, "step": 40 }, { "epoch": 0.013092244441783418, "grad_norm": 4.7566609382629395, "learning_rate": 3.98399565973501e-05, "loss": 3.6898, "step": 41 }, { "epoch": 0.013411567476948869, "grad_norm": 3.937934637069702, "learning_rate": 3.777924554357096e-05, "loss": 3.7817, "step": 42 }, { "epoch": 0.013730890512114317, "grad_norm": 4.427155494689941, "learning_rate": 3.576002079563732e-05, "loss": 3.7729, "step": 43 }, { "epoch": 0.014050213547279767, "grad_norm": 5.204694747924805, "learning_rate": 3.378714684283011e-05, "loss": 4.3146, "step": 44 }, { "epoch": 0.014369536582445216, "grad_norm": 6.423009872436523, "learning_rate": 3.186537651130503e-05, "loss": 3.687, "step": 45 }, { "epoch": 0.014688859617610666, "grad_norm": 6.450584888458252, "learning_rate": 2.9999339514117912e-05, "loss": 3.7906, "step": 46 }, { "epoch": 0.015008182652776115, "grad_norm": 5.856997013092041, "learning_rate": 2.8193531297840503e-05, "loss": 3.8841, "step": 47 }, { "epoch": 0.015327505687941563, "grad_norm": 6.467741012573242, "learning_rate": 2.645230221263596e-05, "loss": 3.3666, "step": 48 }, { "epoch": 0.015646828723107013, "grad_norm": 8.139485359191895, "learning_rate": 2.4779847031884175e-05, "loss": 3.674, "step": 49 }, { "epoch": 0.015966151758272464, "grad_norm": 13.278364181518555, "learning_rate": 2.3180194846605367e-05, "loss": 3.9755, "step": 50 }, { "epoch": 0.015966151758272464, "eval_loss": 4.000638484954834, "eval_runtime": 14.6038, "eval_samples_per_second": 3.424, "eval_steps_per_second": 3.424, "step": 50 }, { "epoch": 0.01628547479343791, "grad_norm": 18.63812255859375, "learning_rate": 2.165719935902685e-05, "loss": 3.9371, "step": 51 }, { "epoch": 0.01660479782860336, "grad_norm": 15.783567428588867, "learning_rate": 2.0214529598676836e-05, "loss": 3.4496, "step": 52 }, { "epoch": 0.01692412086376881, "grad_norm": 16.15505599975586, "learning_rate": 1.8855661083370986e-05, "loss": 3.1263, "step": 53 }, { "epoch": 0.01724344389893426, "grad_norm": 12.968737602233887, "learning_rate": 1.758386744638546e-05, "loss": 3.0368, "step": 54 }, { "epoch": 0.017562766934099708, "grad_norm": 9.380385398864746, "learning_rate": 1.6402212549987762e-05, "loss": 2.9326, "step": 55 }, { "epoch": 0.017882089969265158, "grad_norm": 7.317039489746094, "learning_rate": 1.531354310432403e-05, "loss": 2.5816, "step": 56 }, { "epoch": 0.01820141300443061, "grad_norm": 4.853557109832764, "learning_rate": 1.4320481809445051e-05, "loss": 2.6577, "step": 57 }, { "epoch": 0.018520736039596055, "grad_norm": 4.9439239501953125, "learning_rate": 1.3425421036992098e-05, "loss": 3.1326, "step": 58 }, { "epoch": 0.018840059074761505, "grad_norm": 4.261263847351074, "learning_rate": 1.2630517066764069e-05, "loss": 2.8784, "step": 59 }, { "epoch": 0.019159382109926956, "grad_norm": 3.9718527793884277, "learning_rate": 1.1937684892050604e-05, "loss": 3.032, "step": 60 }, { "epoch": 0.019478705145092406, "grad_norm": 3.4298315048217773, "learning_rate": 1.1348593606245522e-05, "loss": 2.6723, "step": 61 }, { "epoch": 0.019798028180257853, "grad_norm": 3.560950994491577, "learning_rate": 1.0864662381854632e-05, "loss": 2.7488, "step": 62 }, { "epoch": 0.020117351215423303, "grad_norm": 3.4607114791870117, "learning_rate": 1.0487057051584856e-05, "loss": 2.5312, "step": 63 }, { "epoch": 0.020436674250588753, "grad_norm": 3.4417977333068848, "learning_rate": 1.0216687299751144e-05, "loss": 3.2348, "step": 64 }, { "epoch": 0.0207559972857542, "grad_norm": 3.5609071254730225, "learning_rate": 1.0054204470767243e-05, "loss": 3.0496, "step": 65 }, { "epoch": 0.02107532032091965, "grad_norm": 5.164323806762695, "learning_rate": 1e-05, "loss": 2.4215, "step": 66 } ], "logging_steps": 1, "max_steps": 66, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.969468656718971e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }