{ "best_metric": 0.420650452375412, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 1.694915254237288, "eval_steps": 50, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03389830508474576, "grad_norm": 0.6644117832183838, "learning_rate": 1.0500000000000001e-05, "loss": 1.2728, "step": 1 }, { "epoch": 0.03389830508474576, "eval_loss": 1.325829029083252, "eval_runtime": 0.8828, "eval_samples_per_second": 450.827, "eval_steps_per_second": 14.725, "step": 1 }, { "epoch": 0.06779661016949153, "grad_norm": 0.7109208106994629, "learning_rate": 2.1000000000000002e-05, "loss": 1.3144, "step": 2 }, { "epoch": 0.1016949152542373, "grad_norm": 0.715027928352356, "learning_rate": 3.15e-05, "loss": 1.3375, "step": 3 }, { "epoch": 0.13559322033898305, "grad_norm": 0.7128281593322754, "learning_rate": 4.2000000000000004e-05, "loss": 1.326, "step": 4 }, { "epoch": 0.1694915254237288, "grad_norm": 0.719323992729187, "learning_rate": 5.25e-05, "loss": 1.3235, "step": 5 }, { "epoch": 0.2033898305084746, "grad_norm": 0.7542026042938232, "learning_rate": 6.3e-05, "loss": 1.348, "step": 6 }, { "epoch": 0.23728813559322035, "grad_norm": 0.6400342583656311, "learning_rate": 7.35e-05, "loss": 1.2302, "step": 7 }, { "epoch": 0.2711864406779661, "grad_norm": 0.6019027829170227, "learning_rate": 8.400000000000001e-05, "loss": 1.1686, "step": 8 }, { "epoch": 0.3050847457627119, "grad_norm": 0.5761123299598694, "learning_rate": 9.45e-05, "loss": 1.0889, "step": 9 }, { "epoch": 0.3389830508474576, "grad_norm": 0.5195322036743164, "learning_rate": 0.000105, "loss": 1.0254, "step": 10 }, { "epoch": 0.3728813559322034, "grad_norm": 0.5309574604034424, "learning_rate": 0.00010495849335443335, "loss": 0.9455, "step": 11 }, { "epoch": 0.4067796610169492, "grad_norm": 0.5846827030181885, "learning_rate": 0.00010483403904827154, "loss": 0.886, "step": 12 }, { "epoch": 0.4406779661016949, "grad_norm": 0.5385061502456665, "learning_rate": 0.0001046268338693536, "loss": 0.8475, "step": 13 }, { "epoch": 0.4745762711864407, "grad_norm": 0.34226641058921814, "learning_rate": 0.0001043372054516575, "loss": 0.7105, "step": 14 }, { "epoch": 0.5084745762711864, "grad_norm": 0.5354804992675781, "learning_rate": 0.0001039656117572434, "loss": 0.681, "step": 15 }, { "epoch": 0.5423728813559322, "grad_norm": 0.7758191227912903, "learning_rate": 0.00010351264035212153, "loss": 0.6268, "step": 16 }, { "epoch": 0.576271186440678, "grad_norm": 0.4772910475730896, "learning_rate": 0.00010297900747718958, "loss": 0.6068, "step": 17 }, { "epoch": 0.6101694915254238, "grad_norm": 0.39711692929267883, "learning_rate": 0.0001023655569157086, "loss": 0.5645, "step": 18 }, { "epoch": 0.6440677966101694, "grad_norm": 0.6140002608299255, "learning_rate": 0.00010167325865910821, "loss": 0.5802, "step": 19 }, { "epoch": 0.6779661016949152, "grad_norm": 0.24760758876800537, "learning_rate": 0.00010090320737323084, "loss": 0.5746, "step": 20 }, { "epoch": 0.711864406779661, "grad_norm": 0.20960871875286102, "learning_rate": 0.00010005662066743998, "loss": 0.5206, "step": 21 }, { "epoch": 0.7457627118644068, "grad_norm": 0.20185863971710205, "learning_rate": 9.913483716932943e-05, "loss": 0.489, "step": 22 }, { "epoch": 0.7796610169491526, "grad_norm": 0.1934608370065689, "learning_rate": 9.81393144080781e-05, "loss": 0.4544, "step": 23 }, { "epoch": 0.8135593220338984, "grad_norm": 0.2110586315393448, "learning_rate": 9.707162650979662e-05, "loss": 0.4844, "step": 24 }, { "epoch": 0.847457627118644, "grad_norm": 0.4501343071460724, "learning_rate": 9.593346170851051e-05, "loss": 0.4708, "step": 25 }, { "epoch": 0.8813559322033898, "grad_norm": 0.8199589848518372, "learning_rate": 9.472661967671516e-05, "loss": 0.5096, "step": 26 }, { "epoch": 0.9152542372881356, "grad_norm": 0.42125675082206726, "learning_rate": 9.345300867972365e-05, "loss": 0.4668, "step": 27 }, { "epoch": 0.9491525423728814, "grad_norm": 0.1957239806652069, "learning_rate": 9.211464255830708e-05, "loss": 0.4379, "step": 28 }, { "epoch": 0.9830508474576272, "grad_norm": 0.46031802892684937, "learning_rate": 9.071363754439846e-05, "loss": 0.4519, "step": 29 }, { "epoch": 1.0169491525423728, "grad_norm": 0.7945327758789062, "learning_rate": 8.925220891489483e-05, "loss": 0.7032, "step": 30 }, { "epoch": 1.0508474576271187, "grad_norm": 0.13100884854793549, "learning_rate": 8.773266748884944e-05, "loss": 0.4579, "step": 31 }, { "epoch": 1.0847457627118644, "grad_norm": 0.3228919804096222, "learning_rate": 8.615741597359156e-05, "loss": 0.4398, "step": 32 }, { "epoch": 1.11864406779661, "grad_norm": 0.35424545407295227, "learning_rate": 8.452894516555253e-05, "loss": 0.4465, "step": 33 }, { "epoch": 1.152542372881356, "grad_norm": 0.22786343097686768, "learning_rate": 8.284983001180455e-05, "loss": 0.4068, "step": 34 }, { "epoch": 1.1864406779661016, "grad_norm": 0.16856859624385834, "learning_rate": 8.112272553854005e-05, "loss": 0.4109, "step": 35 }, { "epoch": 1.2203389830508475, "grad_norm": 0.29634520411491394, "learning_rate": 7.935036265292968e-05, "loss": 0.4125, "step": 36 }, { "epoch": 1.2542372881355932, "grad_norm": 0.2798968553543091, "learning_rate": 7.753554382499657e-05, "loss": 0.4788, "step": 37 }, { "epoch": 1.288135593220339, "grad_norm": 0.11876481026411057, "learning_rate": 7.568113865633538e-05, "loss": 0.4387, "step": 38 }, { "epoch": 1.3220338983050848, "grad_norm": 0.08868218213319778, "learning_rate": 7.379007934268217e-05, "loss": 0.4091, "step": 39 }, { "epoch": 1.3559322033898304, "grad_norm": 0.22629207372665405, "learning_rate": 7.18653560375104e-05, "loss": 0.4052, "step": 40 }, { "epoch": 1.3898305084745763, "grad_norm": 0.3182011842727661, "learning_rate": 6.991001212398357e-05, "loss": 0.4155, "step": 41 }, { "epoch": 1.423728813559322, "grad_norm": 0.48684900999069214, "learning_rate": 6.792713940274086e-05, "loss": 0.3487, "step": 42 }, { "epoch": 1.457627118644068, "grad_norm": 0.7674040198326111, "learning_rate": 6.591987320312492e-05, "loss": 0.5046, "step": 43 }, { "epoch": 1.4915254237288136, "grad_norm": 0.5276405811309814, "learning_rate": 6.38913874255817e-05, "loss": 0.4545, "step": 44 }, { "epoch": 1.5254237288135593, "grad_norm": 0.12091302126646042, "learning_rate": 6.184488952307141e-05, "loss": 0.4387, "step": 45 }, { "epoch": 1.559322033898305, "grad_norm": 0.17123517394065857, "learning_rate": 5.9783615429426096e-05, "loss": 0.4108, "step": 46 }, { "epoch": 1.5932203389830508, "grad_norm": 0.4185921251773834, "learning_rate": 5.7710824442673244e-05, "loss": 0.407, "step": 47 }, { "epoch": 1.6271186440677967, "grad_norm": 0.5745218992233276, "learning_rate": 5.562979407141554e-05, "loss": 0.4002, "step": 48 }, { "epoch": 1.6610169491525424, "grad_norm": 0.09879444539546967, "learning_rate": 5.3543814852416186e-05, "loss": 0.4583, "step": 49 }, { "epoch": 1.694915254237288, "grad_norm": 0.29606881737709045, "learning_rate": 5.145618514758382e-05, "loss": 0.4399, "step": 50 }, { "epoch": 1.694915254237288, "eval_loss": 0.420650452375412, "eval_runtime": 1.3705, "eval_samples_per_second": 290.407, "eval_steps_per_second": 9.486, "step": 50 } ], "logging_steps": 1, "max_steps": 89, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.7475518160896e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }