|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9993815708101422, |
|
"eval_steps": 500, |
|
"global_step": 404, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.024737167594310452, |
|
"grad_norm": 3.8928613137780697, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8929, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.049474335188620905, |
|
"grad_norm": 4.590030022211483, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7739, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07421150278293136, |
|
"grad_norm": 2.0445731869289467, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7286, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09894867037724181, |
|
"grad_norm": 1.4578350201244652, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6964, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12368583797155226, |
|
"grad_norm": 1.2041164043429626, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6808, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14842300556586271, |
|
"grad_norm": 0.9753852138770908, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6577, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17316017316017315, |
|
"grad_norm": 0.8981690468785772, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6476, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19789734075448362, |
|
"grad_norm": 0.7458796589430092, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6395, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22263450834879406, |
|
"grad_norm": 0.7140456505991135, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6376, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.24737167594310452, |
|
"grad_norm": 0.9200299620458595, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6307, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 0.5761268046084219, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6175, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.29684601113172543, |
|
"grad_norm": 0.5836896660196662, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6177, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.32158317872603587, |
|
"grad_norm": 1.038827197557366, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6132, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3463203463203463, |
|
"grad_norm": 0.6673252980412175, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6091, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.37105751391465674, |
|
"grad_norm": 0.6307785927320235, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6046, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.39579468150896724, |
|
"grad_norm": 0.5244651264271686, |
|
"learning_rate": 5e-06, |
|
"loss": 0.601, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4205318491032777, |
|
"grad_norm": 0.5705407579445089, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6082, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4452690166975881, |
|
"grad_norm": 0.5554154594013059, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6003, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.47000618429189855, |
|
"grad_norm": 0.6019873008818303, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5994, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.49474335188620905, |
|
"grad_norm": 0.7034894074017951, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5986, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5194805194805194, |
|
"grad_norm": 0.5638344674323469, |
|
"learning_rate": 5e-06, |
|
"loss": 0.6023, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 0.5443793743216905, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5927, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5689548546691404, |
|
"grad_norm": 0.5802674598015297, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5925, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5936920222634509, |
|
"grad_norm": 0.592738891502665, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5928, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6184291898577613, |
|
"grad_norm": 0.5388550762260421, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5858, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6431663574520717, |
|
"grad_norm": 0.5593031272628818, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5879, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6679035250463822, |
|
"grad_norm": 0.6608335560611281, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5844, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6926406926406926, |
|
"grad_norm": 0.6327217733233739, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5755, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.717377860235003, |
|
"grad_norm": 0.5769636309953428, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5843, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7421150278293135, |
|
"grad_norm": 0.5532053151787545, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5864, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.766852195423624, |
|
"grad_norm": 0.6400898941077486, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5822, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7915893630179345, |
|
"grad_norm": 0.602629447160874, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5727, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.5999318227987905, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5794, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8410636982065554, |
|
"grad_norm": 0.5332757259893975, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5793, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8658008658008658, |
|
"grad_norm": 0.5492421058512896, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5744, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8905380333951762, |
|
"grad_norm": 0.6007108771595042, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5707, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9152752009894867, |
|
"grad_norm": 0.5866994201925174, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5765, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9400123685837971, |
|
"grad_norm": 0.4956333122054928, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5714, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9647495361781077, |
|
"grad_norm": 0.5508918734344029, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5682, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9894867037724181, |
|
"grad_norm": 0.599971023810852, |
|
"learning_rate": 5e-06, |
|
"loss": 0.5719, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9993815708101422, |
|
"eval_loss": 0.5685587525367737, |
|
"eval_runtime": 289.4782, |
|
"eval_samples_per_second": 37.626, |
|
"eval_steps_per_second": 0.591, |
|
"step": 404 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1212, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 676924426813440.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|