|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 28178, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10646603733409042, |
|
"grad_norm": 0.10739253461360931, |
|
"learning_rate": 4.81935110918419e-05, |
|
"loss": 1.4731, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.21293207466818084, |
|
"grad_norm": 0.12009046971797943, |
|
"learning_rate": 4.548377772960474e-05, |
|
"loss": 0.1519, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.31939811200227125, |
|
"grad_norm": 0.17752079665660858, |
|
"learning_rate": 4.277404436736759e-05, |
|
"loss": 0.1419, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4258641493363617, |
|
"grad_norm": 0.18882086873054504, |
|
"learning_rate": 4.006431100513043e-05, |
|
"loss": 0.1371, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5323301866704522, |
|
"grad_norm": 0.13926886022090912, |
|
"learning_rate": 3.735457764289327e-05, |
|
"loss": 0.1348, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6387962240045425, |
|
"grad_norm": 0.13389110565185547, |
|
"learning_rate": 3.464484428065612e-05, |
|
"loss": 0.1324, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.7452622613386329, |
|
"grad_norm": 0.11937592923641205, |
|
"learning_rate": 3.1935110918418966e-05, |
|
"loss": 0.1295, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.8517282986727234, |
|
"grad_norm": 0.09599358588457108, |
|
"learning_rate": 2.9225377556181803e-05, |
|
"loss": 0.1282, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.9581943360068138, |
|
"grad_norm": 0.1334109604358673, |
|
"learning_rate": 2.651564419394465e-05, |
|
"loss": 0.1287, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.0646603733409044, |
|
"grad_norm": 0.14946720004081726, |
|
"learning_rate": 2.3805910831707495e-05, |
|
"loss": 0.1282, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.1711264106749946, |
|
"grad_norm": 0.12653611600399017, |
|
"learning_rate": 2.109617746947034e-05, |
|
"loss": 0.1256, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.2775924480090852, |
|
"grad_norm": 0.19287967681884766, |
|
"learning_rate": 1.8386444107233183e-05, |
|
"loss": 0.127, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.3840584853431754, |
|
"grad_norm": 0.16098152101039886, |
|
"learning_rate": 1.5676710744996027e-05, |
|
"loss": 0.1242, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.4905245226772659, |
|
"grad_norm": 0.16886812448501587, |
|
"learning_rate": 1.2966977382758871e-05, |
|
"loss": 0.1236, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.5969905600113563, |
|
"grad_norm": 0.14498655498027802, |
|
"learning_rate": 1.0257244020521714e-05, |
|
"loss": 0.124, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.703456597345447, |
|
"grad_norm": 0.19877927005290985, |
|
"learning_rate": 7.547510658284558e-06, |
|
"loss": 0.1251, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.8099226346795372, |
|
"grad_norm": 0.33909276127815247, |
|
"learning_rate": 4.837777296047403e-06, |
|
"loss": 0.1242, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.9163886720136276, |
|
"grad_norm": 0.14188048243522644, |
|
"learning_rate": 2.1280439338102465e-06, |
|
"loss": 0.1222, |
|
"step": 27000 |
|
} |
|
], |
|
"logging_steps": 1500, |
|
"max_steps": 28178, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0576304824909824e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|