|
{ |
|
"best_metric": 1.3308466672897339, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.013775646379157448, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00027551292758314896, |
|
"grad_norm": 4.985652446746826, |
|
"learning_rate": 5e-05, |
|
"loss": 3.345, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00027551292758314896, |
|
"eval_loss": 3.855706214904785, |
|
"eval_runtime": 2.3104, |
|
"eval_samples_per_second": 21.641, |
|
"eval_steps_per_second": 5.627, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005510258551662979, |
|
"grad_norm": 6.339283466339111, |
|
"learning_rate": 0.0001, |
|
"loss": 3.6077, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0008265387827494468, |
|
"grad_norm": 5.504337310791016, |
|
"learning_rate": 9.990365154573717e-05, |
|
"loss": 3.3122, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0011020517103325959, |
|
"grad_norm": 2.902787685394287, |
|
"learning_rate": 9.961501876182148e-05, |
|
"loss": 2.654, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0013775646379157447, |
|
"grad_norm": 2.803334951400757, |
|
"learning_rate": 9.913533761814537e-05, |
|
"loss": 2.2916, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0016530775654988936, |
|
"grad_norm": 2.3478095531463623, |
|
"learning_rate": 9.846666218300807e-05, |
|
"loss": 2.2673, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0019285904930820426, |
|
"grad_norm": 1.7243198156356812, |
|
"learning_rate": 9.761185582727977e-05, |
|
"loss": 2.1345, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0022041034206651917, |
|
"grad_norm": 1.657291293144226, |
|
"learning_rate": 9.657457896300791e-05, |
|
"loss": 2.1614, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0024796163482483403, |
|
"grad_norm": 1.478399395942688, |
|
"learning_rate": 9.535927336897098e-05, |
|
"loss": 1.9236, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0027551292758314894, |
|
"grad_norm": 1.503095030784607, |
|
"learning_rate": 9.397114317029975e-05, |
|
"loss": 1.9579, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0030306422034146385, |
|
"grad_norm": 1.3471025228500366, |
|
"learning_rate": 9.241613255361455e-05, |
|
"loss": 1.9872, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.003306155130997787, |
|
"grad_norm": 1.953307867050171, |
|
"learning_rate": 9.070090031310558e-05, |
|
"loss": 1.7656, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.003581668058580936, |
|
"grad_norm": 1.4133480787277222, |
|
"learning_rate": 8.883279133655399e-05, |
|
"loss": 2.2645, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0038571809861640853, |
|
"grad_norm": 1.2810511589050293, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 2.3035, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004132693913747234, |
|
"grad_norm": 1.3834335803985596, |
|
"learning_rate": 8.467056167950311e-05, |
|
"loss": 2.0962, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004408206841330383, |
|
"grad_norm": 1.2161914110183716, |
|
"learning_rate": 8.239426430539243e-05, |
|
"loss": 2.0253, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.004683719768913532, |
|
"grad_norm": 1.2627516984939575, |
|
"learning_rate": 8.000066048588211e-05, |
|
"loss": 1.8418, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.004959232696496681, |
|
"grad_norm": 1.310968041419983, |
|
"learning_rate": 7.75e-05, |
|
"loss": 1.8342, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00523474562407983, |
|
"grad_norm": 1.149875283241272, |
|
"learning_rate": 7.490299105985507e-05, |
|
"loss": 1.8164, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.005510258551662979, |
|
"grad_norm": 1.3373379707336426, |
|
"learning_rate": 7.222075445642904e-05, |
|
"loss": 1.798, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0057857714792461275, |
|
"grad_norm": 1.3559110164642334, |
|
"learning_rate": 6.946477593864228e-05, |
|
"loss": 1.7143, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.006061284406829277, |
|
"grad_norm": 1.158542275428772, |
|
"learning_rate": 6.664685702961344e-05, |
|
"loss": 1.7487, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.006336797334412426, |
|
"grad_norm": 1.2300978899002075, |
|
"learning_rate": 6.377906449072578e-05, |
|
"loss": 1.6934, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.006612310261995574, |
|
"grad_norm": 1.8234623670578003, |
|
"learning_rate": 6.087367864990233e-05, |
|
"loss": 1.6099, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.006887823189578724, |
|
"grad_norm": 1.70737886428833, |
|
"learning_rate": 5.794314081535644e-05, |
|
"loss": 1.7116, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.006887823189578724, |
|
"eval_loss": 1.4621353149414062, |
|
"eval_runtime": 2.2624, |
|
"eval_samples_per_second": 22.1, |
|
"eval_steps_per_second": 5.746, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007163336117161872, |
|
"grad_norm": 1.08351731300354, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 2.1669, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.007438849044745021, |
|
"grad_norm": 1.1508958339691162, |
|
"learning_rate": 5.205685918464356e-05, |
|
"loss": 1.796, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0077143619723281705, |
|
"grad_norm": 1.2080681324005127, |
|
"learning_rate": 4.912632135009769e-05, |
|
"loss": 1.879, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.00798987489991132, |
|
"grad_norm": 1.3611031770706177, |
|
"learning_rate": 4.6220935509274235e-05, |
|
"loss": 1.7371, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.008265387827494468, |
|
"grad_norm": 1.3392175436019897, |
|
"learning_rate": 4.3353142970386564e-05, |
|
"loss": 1.6612, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008540900755077617, |
|
"grad_norm": 1.2826539278030396, |
|
"learning_rate": 4.053522406135775e-05, |
|
"loss": 1.68, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.008816413682660767, |
|
"grad_norm": 1.4184712171554565, |
|
"learning_rate": 3.777924554357096e-05, |
|
"loss": 1.6392, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.009091926610243915, |
|
"grad_norm": 1.42243230342865, |
|
"learning_rate": 3.509700894014496e-05, |
|
"loss": 1.5867, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.009367439537827064, |
|
"grad_norm": 1.5949103832244873, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 1.4738, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.009642952465410214, |
|
"grad_norm": 1.6676265001296997, |
|
"learning_rate": 2.9999339514117912e-05, |
|
"loss": 1.596, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.009918465392993361, |
|
"grad_norm": 1.4162265062332153, |
|
"learning_rate": 2.760573569460757e-05, |
|
"loss": 1.5618, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.010193978320576511, |
|
"grad_norm": 1.7664705514907837, |
|
"learning_rate": 2.53294383204969e-05, |
|
"loss": 1.3765, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.01046949124815966, |
|
"grad_norm": 1.2374845743179321, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 2.0416, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.010745004175742808, |
|
"grad_norm": 1.4492584466934204, |
|
"learning_rate": 2.1167208663446025e-05, |
|
"loss": 1.9999, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.011020517103325958, |
|
"grad_norm": 1.5911473035812378, |
|
"learning_rate": 1.9299099686894423e-05, |
|
"loss": 1.8249, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011296030030909107, |
|
"grad_norm": 1.7549965381622314, |
|
"learning_rate": 1.758386744638546e-05, |
|
"loss": 1.7741, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.011571542958492255, |
|
"grad_norm": 1.734220027923584, |
|
"learning_rate": 1.602885682970026e-05, |
|
"loss": 1.5942, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.011847055886075404, |
|
"grad_norm": 1.8675172328948975, |
|
"learning_rate": 1.464072663102903e-05, |
|
"loss": 1.5419, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.012122568813658554, |
|
"grad_norm": 1.4374147653579712, |
|
"learning_rate": 1.3425421036992098e-05, |
|
"loss": 1.5869, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.012398081741241702, |
|
"grad_norm": 1.2492765188217163, |
|
"learning_rate": 1.2388144172720251e-05, |
|
"loss": 1.5914, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.012673594668824851, |
|
"grad_norm": 1.2149417400360107, |
|
"learning_rate": 1.1533337816991932e-05, |
|
"loss": 1.535, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.012949107596408, |
|
"grad_norm": 1.3839102983474731, |
|
"learning_rate": 1.0864662381854632e-05, |
|
"loss": 1.4046, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.013224620523991149, |
|
"grad_norm": 1.3655040264129639, |
|
"learning_rate": 1.0384981238178534e-05, |
|
"loss": 1.5681, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.013500133451574298, |
|
"grad_norm": 1.9632959365844727, |
|
"learning_rate": 1.0096348454262845e-05, |
|
"loss": 1.3139, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.013775646379157448, |
|
"grad_norm": 2.0195958614349365, |
|
"learning_rate": 1e-05, |
|
"loss": 1.4892, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.013775646379157448, |
|
"eval_loss": 1.3308466672897339, |
|
"eval_runtime": 2.2559, |
|
"eval_samples_per_second": 22.164, |
|
"eval_steps_per_second": 5.763, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.601881470140416e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|