{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.87719298245614, "eval_steps": 500, "global_step": 420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23391812865497075, "grad_norm": 0.36140677332878113, "learning_rate": 4.77807122597034e-05, "loss": 1.7972, "step": 10 }, { "epoch": 0.4678362573099415, "grad_norm": 0.33242133259773254, "learning_rate": 3.232056928191376e-05, "loss": 1.6893, "step": 20 }, { "epoch": 0.7017543859649122, "grad_norm": 0.2246789187192917, "learning_rate": 1.1892317911069212e-05, "loss": 1.6413, "step": 30 }, { "epoch": 0.935672514619883, "grad_norm": 0.2739926278591156, "learning_rate": 8.066763266625282e-07, "loss": 1.6134, "step": 40 }, { "epoch": 1.1871345029239766, "grad_norm": 0.37965312600135803, "learning_rate": 3.848943205739711e-05, "loss": 1.4769, "step": 50 }, { "epoch": 1.4210526315789473, "grad_norm": 0.3006901741027832, "learning_rate": 3.219473788427984e-05, "loss": 1.4539, "step": 60 }, { "epoch": 1.654970760233918, "grad_norm": 0.27228423953056335, "learning_rate": 2.604226177226137e-05, "loss": 1.5344, "step": 70 }, { "epoch": 1.8888888888888888, "grad_norm": 0.26753801107406616, "learning_rate": 1.9146971351147655e-05, "loss": 1.4816, "step": 80 }, { "epoch": 2.1228070175438596, "grad_norm": 0.266255259513855, "learning_rate": 1.270117540713368e-05, "loss": 1.4132, "step": 90 }, { "epoch": 2.3567251461988303, "grad_norm": 0.3016515076160431, "learning_rate": 7.1998911101617575e-06, "loss": 1.3255, "step": 100 }, { "epoch": 2.590643274853801, "grad_norm": 0.27352389693260193, "learning_rate": 3.0656000602372558e-06, "loss": 1.3132, "step": 110 }, { "epoch": 2.824561403508772, "grad_norm": 0.27444812655448914, "learning_rate": 6.158030087068001e-07, "loss": 1.3476, "step": 120 }, { "epoch": 3.0935672514619883, "grad_norm": 0.3967718183994293, "learning_rate": 4.388136440446337e-05, "loss": 1.1837, "step": 130 }, { "epoch": 3.327485380116959, "grad_norm": 0.4026094377040863, "learning_rate": 4.245592045215182e-05, "loss": 1.3114, "step": 140 }, { "epoch": 3.56140350877193, "grad_norm": 0.3775029182434082, "learning_rate": 4.0909970437009096e-05, "loss": 1.2437, "step": 150 }, { "epoch": 3.7953216374269005, "grad_norm": 0.40222153067588806, "learning_rate": 3.925418674667405e-05, "loss": 1.2936, "step": 160 }, { "epoch": 4.029239766081871, "grad_norm": 0.4361186921596527, "learning_rate": 3.7500000000000003e-05, "loss": 1.3328, "step": 170 }, { "epoch": 4.2631578947368425, "grad_norm": 0.4647097885608673, "learning_rate": 3.565952013635635e-05, "loss": 1.1235, "step": 180 }, { "epoch": 4.497076023391813, "grad_norm": 0.417835533618927, "learning_rate": 3.374545281527538e-05, "loss": 1.0464, "step": 190 }, { "epoch": 4.730994152046784, "grad_norm": 0.42690905928611755, "learning_rate": 3.177101170357513e-05, "loss": 1.1242, "step": 200 }, { "epoch": 4.964912280701754, "grad_norm": 0.4406892955303192, "learning_rate": 2.9749827255479755e-05, "loss": 1.0463, "step": 210 }, { "epoch": 5.1988304093567255, "grad_norm": 0.4422934353351593, "learning_rate": 2.769585261546897e-05, "loss": 0.9407, "step": 220 }, { "epoch": 5.432748538011696, "grad_norm": 0.46315091848373413, "learning_rate": 2.5623267293451826e-05, "loss": 0.8377, "step": 230 }, { "epoch": 5.666666666666667, "grad_norm": 0.5057820081710815, "learning_rate": 2.3546379277238107e-05, "loss": 0.9125, "step": 240 }, { "epoch": 5.900584795321637, "grad_norm": 0.5013360381126404, "learning_rate": 2.1479526258069087e-05, "loss": 0.9502, "step": 250 }, { "epoch": 6.1345029239766085, "grad_norm": 0.7791084051132202, "learning_rate": 1.9436976651092144e-05, "loss": 0.8194, "step": 260 }, { "epoch": 6.368421052631579, "grad_norm": 0.4837506115436554, "learning_rate": 1.7432831094079355e-05, "loss": 0.7167, "step": 270 }, { "epoch": 6.60233918128655, "grad_norm": 0.5172699689865112, "learning_rate": 1.5480925104388762e-05, "loss": 0.7622, "step": 280 }, { "epoch": 6.83625730994152, "grad_norm": 0.5458669662475586, "learning_rate": 1.3594733566170926e-05, "loss": 0.767, "step": 290 }, { "epoch": 7.0701754385964914, "grad_norm": 0.46376243233680725, "learning_rate": 1.1787277707188616e-05, "loss": 0.7493, "step": 300 }, { "epoch": 7.304093567251462, "grad_norm": 0.5310298800468445, "learning_rate": 1.0071035207430352e-05, "loss": 0.6112, "step": 310 }, { "epoch": 7.538011695906433, "grad_norm": 0.49246639013290405, "learning_rate": 8.45785406007852e-06, "loss": 0.6109, "step": 320 }, { "epoch": 7.771929824561403, "grad_norm": 0.5299611687660217, "learning_rate": 6.958870779488447e-06, "loss": 0.6453, "step": 330 }, { "epoch": 8.005847953216374, "grad_norm": 0.4647846519947052, "learning_rate": 5.584433520825541e-06, "loss": 0.6664, "step": 340 }, { "epoch": 8.239766081871345, "grad_norm": 0.4858466386795044, "learning_rate": 4.344030642100133e-06, "loss": 0.5755, "step": 350 }, { "epoch": 8.473684210526315, "grad_norm": 0.4467305839061737, "learning_rate": 3.2462252017684797e-06, "loss": 0.5977, "step": 360 }, { "epoch": 8.707602339181287, "grad_norm": 0.47269728779792786, "learning_rate": 2.298595844092377e-06, "loss": 0.5428, "step": 370 }, { "epoch": 8.941520467836257, "grad_norm": 0.4306669235229492, "learning_rate": 1.5076844803522922e-06, "loss": 0.5528, "step": 380 }, { "epoch": 9.175438596491228, "grad_norm": 0.42190077900886536, "learning_rate": 8.78951127094127e-07, "loss": 0.5825, "step": 390 }, { "epoch": 9.409356725146198, "grad_norm": 0.4262978434562683, "learning_rate": 4.16736213181515e-07, "loss": 0.5498, "step": 400 }, { "epoch": 9.64327485380117, "grad_norm": 0.4205697774887085, "learning_rate": 1.2423061586496477e-07, "loss": 0.5318, "step": 410 }, { "epoch": 9.87719298245614, "grad_norm": 0.4037964940071106, "learning_rate": 3.453632722358324e-09, "loss": 0.5299, "step": 420 }, { "epoch": 9.87719298245614, "step": 420, "total_flos": 6.211299136336036e+17, "train_loss": 0.5820885260899862, "train_runtime": 3633.6741, "train_samples_per_second": 3.762, "train_steps_per_second": 0.116 } ], "logging_steps": 10, "max_steps": 420, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.211299136336036e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }