|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9980636237897649, |
|
"eval_steps": 23, |
|
"global_step": 451, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05089903181189488, |
|
"grad_norm": 0.7468199729919434, |
|
"learning_rate": 0.001825741858350554, |
|
"loss": 1.4222, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.10179806362378976, |
|
"grad_norm": 0.33098104596138, |
|
"learning_rate": 0.0008451542547285166, |
|
"loss": 1.2833, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.15269709543568466, |
|
"grad_norm": 0.3552784323692322, |
|
"learning_rate": 0.0006262242910851496, |
|
"loss": 1.095, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.20359612724757953, |
|
"grad_norm": 0.32930678129196167, |
|
"learning_rate": 0.0005198752449100363, |
|
"loss": 1.0648, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2544951590594744, |
|
"grad_norm": 0.32375234365463257, |
|
"learning_rate": 0.0004540766091864998, |
|
"loss": 1.0184, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3053941908713693, |
|
"grad_norm": 0.37444040179252625, |
|
"learning_rate": 0.0004082482904638631, |
|
"loss": 1.0037, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3562932226832642, |
|
"grad_norm": 0.398503839969635, |
|
"learning_rate": 0.00037397879600338285, |
|
"loss": 0.9613, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.40719225449515906, |
|
"grad_norm": 0.36333534121513367, |
|
"learning_rate": 0.00034710506725031166, |
|
"loss": 0.9395, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4580912863070539, |
|
"grad_norm": 0.3362521231174469, |
|
"learning_rate": 0.0003253000243161777, |
|
"loss": 0.929, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5089903181189488, |
|
"grad_norm": 0.3286592960357666, |
|
"learning_rate": 0.0003071475584169756, |
|
"loss": 0.9067, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5598893499308437, |
|
"grad_norm": 0.37335479259490967, |
|
"learning_rate": 0.0002917299829957891, |
|
"loss": 0.8955, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6107883817427386, |
|
"grad_norm": 0.3964427411556244, |
|
"learning_rate": 0.0002784230231948523, |
|
"loss": 0.8665, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6616874135546335, |
|
"grad_norm": 0.39572906494140625, |
|
"learning_rate": 0.0002667852642561041, |
|
"loss": 0.8622, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.7125864453665284, |
|
"grad_norm": 0.4889402389526367, |
|
"learning_rate": 0.0002564945880212886, |
|
"loss": 0.8716, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7634854771784232, |
|
"grad_norm": 0.3482915163040161, |
|
"learning_rate": 0.00024730968341474897, |
|
"loss": 0.8326, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.8143845089903181, |
|
"grad_norm": 0.4124608337879181, |
|
"learning_rate": 0.00023904572186687873, |
|
"loss": 0.8199, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.865283540802213, |
|
"grad_norm": 0.3616255819797516, |
|
"learning_rate": 0.00023155842232374464, |
|
"loss": 0.8103, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.9161825726141078, |
|
"grad_norm": 0.42308276891708374, |
|
"learning_rate": 0.00022473328748774736, |
|
"loss": 0.8067, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9670816044260028, |
|
"grad_norm": 0.4363687038421631, |
|
"learning_rate": 0.00021847813825958586, |
|
"loss": 0.8163, |
|
"step": 437 |
|
} |
|
], |
|
"logging_steps": 23, |
|
"max_steps": 451, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 23, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.580565166436909e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|