{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.13123359580052493, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 4.396, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.826919265136599e-07, "loss": 4.21, "step": 2 }, { "epoch": 0.0, "learning_rate": 6.065523528528873e-07, "loss": 4.3296, "step": 3 }, { "epoch": 0.01, "learning_rate": 7.653838530273198e-07, "loss": 4.334, "step": 4 }, { "epoch": 0.01, "learning_rate": 8.885831358586367e-07, "loss": 4.3884, "step": 5 }, { "epoch": 0.01, "learning_rate": 9.892442793665471e-07, "loss": 4.32, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.07435206352983e-06, "loss": 4.2667, "step": 7 }, { "epoch": 0.01, "learning_rate": 1.1480757795409794e-06, "loss": 4.2037, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.2131047057057746e-06, "loss": 4.4414, "step": 9 }, { "epoch": 0.01, "learning_rate": 1.2712750623722968e-06, "loss": 4.1682, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.3238965507785759e-06, "loss": 4.2204, "step": 11 }, { "epoch": 0.02, "learning_rate": 1.371936205880207e-06, "loss": 4.2052, "step": 12 }, { "epoch": 0.02, "learning_rate": 1.4161284046830791e-06, "loss": 4.1135, "step": 13 }, { "epoch": 0.02, "learning_rate": 1.4570439900434897e-06, "loss": 3.9597, "step": 14 }, { "epoch": 0.02, "learning_rate": 1.495135488711524e-06, "loss": 3.8066, "step": 15 }, { "epoch": 0.02, "learning_rate": 1.5307677060546395e-06, "loss": 4.2143, "step": 16 }, { "epoch": 0.02, "learning_rate": 1.5642390292710904e-06, "loss": 4.0728, "step": 17 }, { "epoch": 0.02, "learning_rate": 1.5957966322194344e-06, "loss": 4.3082, "step": 18 }, { "epoch": 0.02, "learning_rate": 1.6256475638101063e-06, "loss": 4.2929, "step": 19 }, { "epoch": 0.03, "learning_rate": 1.6539669888859565e-06, "loss": 3.6756, "step": 20 }, { "epoch": 0.03, "learning_rate": 1.6809044163827172e-06, "loss": 4.0849, "step": 21 }, { "epoch": 0.03, "learning_rate": 1.706588477292236e-06, "loss": 4.216, "step": 22 }, { "epoch": 0.03, "learning_rate": 1.7311306396673579e-06, "loss": 4.128, "step": 23 }, { "epoch": 0.03, "learning_rate": 1.7546281323938668e-06, "loss": 3.9854, "step": 24 }, { "epoch": 0.03, "learning_rate": 1.7771662717172735e-06, "loss": 4.1697, "step": 25 }, { "epoch": 0.03, "learning_rate": 1.7988203311967389e-06, "loss": 4.0979, "step": 26 }, { "epoch": 0.04, "learning_rate": 1.8196570585586616e-06, "loss": 3.9488, "step": 27 }, { "epoch": 0.04, "learning_rate": 1.8397359165571497e-06, "loss": 4.0495, "step": 28 }, { "epoch": 0.04, "learning_rate": 1.859110105992117e-06, "loss": 4.101, "step": 29 }, { "epoch": 0.04, "learning_rate": 1.8778274152251838e-06, "loss": 3.8262, "step": 30 }, { "epoch": 0.04, "learning_rate": 1.8959309303488189e-06, "loss": 3.9416, "step": 31 }, { "epoch": 0.04, "learning_rate": 1.913459632568299e-06, "loss": 3.7671, "step": 32 }, { "epoch": 0.04, "learning_rate": 1.9304489036314633e-06, "loss": 4.0306, "step": 33 }, { "epoch": 0.04, "learning_rate": 1.9469309557847503e-06, "loss": 3.9613, "step": 34 }, { "epoch": 0.05, "learning_rate": 1.962935199388467e-06, "loss": 3.7291, "step": 35 }, { "epoch": 0.05, "learning_rate": 1.9784885587330943e-06, "loss": 3.793, "step": 36 }, { "epoch": 0.05, "learning_rate": 1.993615744575612e-06, "loss": 3.6288, "step": 37 }, { "epoch": 0.05, "learning_rate": 2.0083394903237663e-06, "loss": 3.9299, "step": 38 }, { "epoch": 0.05, "learning_rate": 2.022680757535966e-06, "loss": 3.8319, "step": 39 }, { "epoch": 0.05, "learning_rate": 2.0366589153996162e-06, "loss": 3.7277, "step": 40 }, { "epoch": 0.05, "learning_rate": 2.050291898044415e-06, "loss": 3.7845, "step": 41 }, { "epoch": 0.06, "learning_rate": 2.063596342896377e-06, "loss": 3.5937, "step": 42 }, { "epoch": 0.06, "learning_rate": 2.076587712750118e-06, "loss": 3.5983, "step": 43 }, { "epoch": 0.06, "learning_rate": 2.0892804038058957e-06, "loss": 3.7483, "step": 44 }, { "epoch": 0.06, "learning_rate": 2.101687841564411e-06, "loss": 3.6843, "step": 45 }, { "epoch": 0.06, "learning_rate": 2.113822566181018e-06, "loss": 3.7431, "step": 46 }, { "epoch": 0.06, "learning_rate": 2.1256963086398127e-06, "loss": 3.5885, "step": 47 }, { "epoch": 0.06, "learning_rate": 2.1373200589075267e-06, "loss": 3.6473, "step": 48 }, { "epoch": 0.06, "learning_rate": 2.14870412705966e-06, "loss": 3.3018, "step": 49 }, { "epoch": 0.07, "learning_rate": 2.159858198230933e-06, "loss": 3.5044, "step": 50 }, { "epoch": 0.07, "learning_rate": 2.170791382123977e-06, "loss": 3.7595, "step": 51 }, { "epoch": 0.07, "learning_rate": 2.181512257710399e-06, "loss": 3.5821, "step": 52 }, { "epoch": 0.07, "learning_rate": 2.192028913673789e-06, "loss": 3.752, "step": 53 }, { "epoch": 0.07, "learning_rate": 2.2023489850723216e-06, "loss": 3.5635, "step": 54 }, { "epoch": 0.07, "learning_rate": 2.2124796866372125e-06, "loss": 3.5492, "step": 55 }, { "epoch": 0.07, "learning_rate": 2.22242784307081e-06, "loss": 3.4708, "step": 56 }, { "epoch": 0.07, "learning_rate": 2.2321999166629935e-06, "loss": 3.4797, "step": 57 }, { "epoch": 0.08, "learning_rate": 2.2418020325057765e-06, "loss": 3.5858, "step": 58 }, { "epoch": 0.08, "learning_rate": 2.251240001552474e-06, "loss": 3.5595, "step": 59 }, { "epoch": 0.08, "learning_rate": 2.2605193417388435e-06, "loss": 3.5123, "step": 60 }, { "epoch": 0.08, "learning_rate": 2.269645297358435e-06, "loss": 3.4325, "step": 61 }, { "epoch": 0.08, "learning_rate": 2.278622856862479e-06, "loss": 3.4439, "step": 62 }, { "epoch": 0.08, "learning_rate": 2.2874567692356042e-06, "loss": 3.4072, "step": 63 }, { "epoch": 0.08, "learning_rate": 2.2961515590819588e-06, "loss": 3.6176, "step": 64 }, { "epoch": 0.09, "learning_rate": 2.3047115405417158e-06, "loss": 3.5513, "step": 65 }, { "epoch": 0.09, "learning_rate": 2.313140830145123e-06, "loss": 3.5387, "step": 66 }, { "epoch": 0.09, "learning_rate": 2.3214433586999722e-06, "loss": 3.3643, "step": 67 }, { "epoch": 0.09, "learning_rate": 2.32962288229841e-06, "loss": 3.329, "step": 68 }, { "epoch": 0.09, "learning_rate": 2.3376829925202453e-06, "loss": 3.3631, "step": 69 }, { "epoch": 0.09, "learning_rate": 2.3456271259021266e-06, "loss": 3.505, "step": 70 }, { "epoch": 0.09, "learning_rate": 2.3534585727350773e-06, "loss": 3.3134, "step": 71 }, { "epoch": 0.09, "learning_rate": 2.361180485246754e-06, "loss": 3.4492, "step": 72 }, { "epoch": 0.1, "learning_rate": 2.3687958852193585e-06, "loss": 3.2279, "step": 73 }, { "epoch": 0.1, "learning_rate": 2.376307671089272e-06, "loss": 3.3059, "step": 74 }, { "epoch": 0.1, "learning_rate": 2.3837186245701603e-06, "loss": 3.4335, "step": 75 }, { "epoch": 0.1, "learning_rate": 2.391031416837426e-06, "loss": 3.3363, "step": 76 }, { "epoch": 0.1, "learning_rate": 2.398248614308406e-06, "loss": 3.2824, "step": 77 }, { "epoch": 0.1, "learning_rate": 2.4053726840496263e-06, "loss": 3.3852, "step": 78 }, { "epoch": 0.1, "learning_rate": 2.4124059988396154e-06, "loss": 3.5303, "step": 79 }, { "epoch": 0.1, "learning_rate": 2.419350841913276e-06, "loss": 3.3088, "step": 80 }, { "epoch": 0.11, "learning_rate": 2.4262094114115493e-06, "loss": 3.3774, "step": 81 }, { "epoch": 0.11, "learning_rate": 2.4329838245580745e-06, "loss": 3.3502, "step": 82 }, { "epoch": 0.11, "learning_rate": 2.4396761215827013e-06, "loss": 3.3218, "step": 83 }, { "epoch": 0.11, "learning_rate": 2.4462882694100367e-06, "loss": 3.4322, "step": 84 }, { "epoch": 0.11, "learning_rate": 2.452822165129727e-06, "loss": 3.297, "step": 85 }, { "epoch": 0.11, "learning_rate": 2.4592796392637773e-06, "loss": 3.297, "step": 86 }, { "epoch": 0.11, "learning_rate": 2.4656624588450042e-06, "loss": 3.3604, "step": 87 }, { "epoch": 0.12, "learning_rate": 2.471972330319556e-06, "loss": 3.2638, "step": 88 }, { "epoch": 0.12, "learning_rate": 2.478210902285443e-06, "loss": 3.186, "step": 89 }, { "epoch": 0.12, "learning_rate": 2.484379768078071e-06, "loss": 3.3819, "step": 90 }, { "epoch": 0.12, "learning_rate": 2.490480468212909e-06, "loss": 3.362, "step": 91 }, { "epoch": 0.12, "learning_rate": 2.4965144926946778e-06, "loss": 3.3608, "step": 92 }, { "epoch": 0.12, "learning_rate": 2.5024832832017063e-06, "loss": 3.2796, "step": 93 }, { "epoch": 0.12, "learning_rate": 2.5083882351534724e-06, "loss": 3.2159, "step": 94 }, { "epoch": 0.12, "learning_rate": 2.514230699668743e-06, "loss": 3.2004, "step": 95 }, { "epoch": 0.13, "learning_rate": 2.5200119854211865e-06, "loss": 3.1361, "step": 96 }, { "epoch": 0.13, "learning_rate": 2.5257333603988366e-06, "loss": 3.354, "step": 97 }, { "epoch": 0.13, "learning_rate": 2.5313960535733198e-06, "loss": 3.1423, "step": 98 }, { "epoch": 0.13, "learning_rate": 2.5370012564843503e-06, "loss": 3.2997, "step": 99 }, { "epoch": 0.13, "learning_rate": 2.5425501247445936e-06, "loss": 3.3779, "step": 100 } ], "logging_steps": 1.0, "max_steps": 762, "num_train_epochs": 1, "save_steps": 50, "total_flos": 0.0, "trial_name": null, "trial_params": null }