|
{ |
|
"best_metric": 11.051278114318848, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-25", |
|
"epoch": 0.08156606851549755, |
|
"eval_steps": 5, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0032626427406199023, |
|
"grad_norm": 0.9323341846466064, |
|
"learning_rate": 2e-05, |
|
"loss": 44.3182, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0032626427406199023, |
|
"eval_loss": 11.076725006103516, |
|
"eval_runtime": 1.4226, |
|
"eval_samples_per_second": 91.379, |
|
"eval_steps_per_second": 45.69, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0065252854812398045, |
|
"grad_norm": 1.062995195388794, |
|
"learning_rate": 4e-05, |
|
"loss": 44.2994, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.009787928221859706, |
|
"grad_norm": 1.0153011083602905, |
|
"learning_rate": 6e-05, |
|
"loss": 44.2882, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.013050570962479609, |
|
"grad_norm": 0.991928219795227, |
|
"learning_rate": 8e-05, |
|
"loss": 44.3067, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01631321370309951, |
|
"grad_norm": 0.9939742684364319, |
|
"learning_rate": 0.0001, |
|
"loss": 44.2967, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01631321370309951, |
|
"eval_loss": 11.075981140136719, |
|
"eval_runtime": 0.7294, |
|
"eval_samples_per_second": 178.229, |
|
"eval_steps_per_second": 89.115, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01957585644371941, |
|
"grad_norm": 0.976222574710846, |
|
"learning_rate": 0.00012, |
|
"loss": 44.2904, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.022838499184339316, |
|
"grad_norm": 1.0003511905670166, |
|
"learning_rate": 0.00014, |
|
"loss": 44.2975, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.026101141924959218, |
|
"grad_norm": 1.0365238189697266, |
|
"learning_rate": 0.00016, |
|
"loss": 44.2978, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02936378466557912, |
|
"grad_norm": 1.065255880355835, |
|
"learning_rate": 0.00018, |
|
"loss": 44.2773, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03262642740619902, |
|
"grad_norm": 0.9979621767997742, |
|
"learning_rate": 0.0002, |
|
"loss": 44.3153, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03262642740619902, |
|
"eval_loss": 11.069252967834473, |
|
"eval_runtime": 0.724, |
|
"eval_samples_per_second": 179.563, |
|
"eval_steps_per_second": 89.781, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03588907014681892, |
|
"grad_norm": 1.0355585813522339, |
|
"learning_rate": 0.00019781476007338058, |
|
"loss": 44.2574, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03915171288743882, |
|
"grad_norm": 0.9954984188079834, |
|
"learning_rate": 0.0001913545457642601, |
|
"loss": 44.2634, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04241435562805873, |
|
"grad_norm": 1.012833595275879, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 44.2439, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04567699836867863, |
|
"grad_norm": 1.0343108177185059, |
|
"learning_rate": 0.00016691306063588583, |
|
"loss": 44.2441, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.048939641109298535, |
|
"grad_norm": 0.9532853960990906, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 44.2595, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.048939641109298535, |
|
"eval_loss": 11.059078216552734, |
|
"eval_runtime": 0.8276, |
|
"eval_samples_per_second": 157.081, |
|
"eval_steps_per_second": 78.541, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.052202283849918436, |
|
"grad_norm": 1.0444512367248535, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 44.234, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05546492659053834, |
|
"grad_norm": 1.0665297508239746, |
|
"learning_rate": 0.00011045284632676536, |
|
"loss": 44.2096, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05872756933115824, |
|
"grad_norm": 1.0649420022964478, |
|
"learning_rate": 8.954715367323468e-05, |
|
"loss": 44.1898, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06199021207177814, |
|
"grad_norm": 1.1307687759399414, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 44.2054, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.06525285481239804, |
|
"grad_norm": 0.9438048601150513, |
|
"learning_rate": 5.000000000000002e-05, |
|
"loss": 44.2332, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06525285481239804, |
|
"eval_loss": 11.052212715148926, |
|
"eval_runtime": 0.8374, |
|
"eval_samples_per_second": 155.247, |
|
"eval_steps_per_second": 77.623, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06851549755301795, |
|
"grad_norm": 1.0551722049713135, |
|
"learning_rate": 3.308693936411421e-05, |
|
"loss": 44.1937, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.07177814029363784, |
|
"grad_norm": 0.9978577494621277, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 44.2144, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.07504078303425775, |
|
"grad_norm": 1.056475043296814, |
|
"learning_rate": 8.645454235739903e-06, |
|
"loss": 44.2002, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07830342577487764, |
|
"grad_norm": 1.0541832447052002, |
|
"learning_rate": 2.1852399266194314e-06, |
|
"loss": 44.2104, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.08156606851549755, |
|
"grad_norm": 1.0341196060180664, |
|
"learning_rate": 0.0, |
|
"loss": 44.2035, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08156606851549755, |
|
"eval_loss": 11.051278114318848, |
|
"eval_runtime": 0.7873, |
|
"eval_samples_per_second": 165.123, |
|
"eval_steps_per_second": 82.562, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 25, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 2, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 20919091200.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|