|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9969945902624725, |
|
"eval_steps": 16, |
|
"global_step": 311, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05129232618713685, |
|
"grad_norm": 0.5048500299453735, |
|
"learning_rate": 0.0015, |
|
"loss": 1.1504, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1025846523742737, |
|
"grad_norm": 0.389893114566803, |
|
"learning_rate": 0.0011094003924504584, |
|
"loss": 3.0392, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.15387697856141053, |
|
"grad_norm": 0.2868100106716156, |
|
"learning_rate": 0.0007427813527082075, |
|
"loss": 0.8441, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2051693047485474, |
|
"grad_norm": 0.2573494613170624, |
|
"learning_rate": 0.0005962847939999439, |
|
"loss": 0.783, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.25646163093568425, |
|
"grad_norm": 0.31569787859916687, |
|
"learning_rate": 0.000512147519731584, |
|
"loss": 0.7637, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.30775395712282105, |
|
"grad_norm": 0.2564464211463928, |
|
"learning_rate": 0.0004558423058385518, |
|
"loss": 0.719, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.3590462833099579, |
|
"grad_norm": 0.32603177428245544, |
|
"learning_rate": 0.0004147806778921701, |
|
"loss": 0.7037, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.4103386094970948, |
|
"grad_norm": 0.2662847340106964, |
|
"learning_rate": 0.0003831305140884606, |
|
"loss": 0.6794, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.46163093568423164, |
|
"grad_norm": 0.3353310525417328, |
|
"learning_rate": 0.00035777087639996636, |
|
"loss": 0.6737, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.5129232618713685, |
|
"grad_norm": 0.26287367939949036, |
|
"learning_rate": 0.00033686076842660763, |
|
"loss": 0.6561, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.5642155880585054, |
|
"grad_norm": 0.3288561999797821, |
|
"learning_rate": 0.00031923475378704884, |
|
"loss": 0.6359, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.6155079142456421, |
|
"grad_norm": 0.35450485348701477, |
|
"learning_rate": 0.0003041143685078822, |
|
"loss": 0.612, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.666800240432779, |
|
"grad_norm": 0.29516109824180603, |
|
"learning_rate": 0.00029095718698132317, |
|
"loss": 0.6057, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.7180925666199158, |
|
"grad_norm": 0.38677069544792175, |
|
"learning_rate": 0.00027937211830783126, |
|
"loss": 0.5943, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.7693848928070527, |
|
"grad_norm": 0.3125530481338501, |
|
"learning_rate": 0.000269069117598525, |
|
"loss": 0.5635, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.8206772189941896, |
|
"grad_norm": 0.3307824730873108, |
|
"learning_rate": 0.00025982792098465233, |
|
"loss": 0.5629, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.8719695451813264, |
|
"grad_norm": 0.31122493743896484, |
|
"learning_rate": 0.0002514778453847726, |
|
"loss": 0.5582, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.9232618713684633, |
|
"grad_norm": 0.3087589144706726, |
|
"learning_rate": 0.00024388430433987693, |
|
"loss": 0.5364, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.9745541975556001, |
|
"grad_norm": 0.3315638601779938, |
|
"learning_rate": 0.00023693955110363693, |
|
"loss": 0.5412, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.9969945902624725, |
|
"step": 311, |
|
"total_flos": 3.158660236722569e+18, |
|
"train_loss": 0.7963475859050199, |
|
"train_runtime": 2372.2052, |
|
"train_samples_per_second": 16.831, |
|
"train_steps_per_second": 0.131 |
|
} |
|
], |
|
"logging_steps": 16, |
|
"max_steps": 311, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 16, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.158660236722569e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|