|
{ |
|
"best_metric": 0.31598106026649475, |
|
"best_model_checkpoint": "./opt_trained/checkpoint-43", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 43, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.023391812865497075, |
|
"grad_norm": 1.4262398481369019, |
|
"learning_rate": 9.995238095238095e-05, |
|
"loss": 2.4722, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04678362573099415, |
|
"grad_norm": 12.206507682800293, |
|
"learning_rate": 9.990476190476191e-05, |
|
"loss": 3.176, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.07017543859649122, |
|
"grad_norm": 11.578840255737305, |
|
"learning_rate": 9.985714285714287e-05, |
|
"loss": 2.8396, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0935672514619883, |
|
"grad_norm": 9.648161888122559, |
|
"learning_rate": 9.980952380952382e-05, |
|
"loss": 2.7336, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11695906432748537, |
|
"grad_norm": 3.8782691955566406, |
|
"learning_rate": 9.976190476190477e-05, |
|
"loss": 2.6955, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.14035087719298245, |
|
"grad_norm": 12.644487380981445, |
|
"learning_rate": 9.971428571428571e-05, |
|
"loss": 2.6405, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.16374269005847952, |
|
"grad_norm": 11.057122230529785, |
|
"learning_rate": 9.966666666666667e-05, |
|
"loss": 2.7113, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1871345029239766, |
|
"grad_norm": 4.860190391540527, |
|
"learning_rate": 9.961904761904762e-05, |
|
"loss": 2.7076, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"grad_norm": 4.317215442657471, |
|
"learning_rate": 9.957142857142858e-05, |
|
"loss": 2.6377, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.23391812865497075, |
|
"grad_norm": 3.3068416118621826, |
|
"learning_rate": 9.952380952380953e-05, |
|
"loss": 2.5995, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2573099415204678, |
|
"grad_norm": 1.2752724885940552, |
|
"learning_rate": 9.947619047619048e-05, |
|
"loss": 2.6285, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.2807017543859649, |
|
"grad_norm": 8.849737167358398, |
|
"learning_rate": 9.942857142857144e-05, |
|
"loss": 2.6217, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.30409356725146197, |
|
"grad_norm": 5.594025611877441, |
|
"learning_rate": 9.938095238095238e-05, |
|
"loss": 2.6265, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.32748538011695905, |
|
"grad_norm": 3.581617593765259, |
|
"learning_rate": 9.933333333333334e-05, |
|
"loss": 2.5984, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 5.33600378036499, |
|
"learning_rate": 9.92857142857143e-05, |
|
"loss": 2.6071, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3742690058479532, |
|
"grad_norm": 4.4274983406066895, |
|
"learning_rate": 9.923809523809524e-05, |
|
"loss": 2.5908, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.39766081871345027, |
|
"grad_norm": 4.5507307052612305, |
|
"learning_rate": 9.91904761904762e-05, |
|
"loss": 2.5616, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 3.189161777496338, |
|
"learning_rate": 9.914285714285715e-05, |
|
"loss": 2.5784, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 2.706615924835205, |
|
"learning_rate": 9.909523809523809e-05, |
|
"loss": 2.5413, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.4678362573099415, |
|
"grad_norm": 2.146662712097168, |
|
"learning_rate": 9.904761904761905e-05, |
|
"loss": 2.6019, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.49122807017543857, |
|
"grad_norm": 3.2252964973449707, |
|
"learning_rate": 9.900000000000001e-05, |
|
"loss": 2.5914, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.5146198830409356, |
|
"grad_norm": 2.8859879970550537, |
|
"learning_rate": 9.895238095238095e-05, |
|
"loss": 2.6133, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5380116959064327, |
|
"grad_norm": 2.5647897720336914, |
|
"learning_rate": 9.890476190476191e-05, |
|
"loss": 2.5425, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5614035087719298, |
|
"grad_norm": 3.0347073078155518, |
|
"learning_rate": 9.885714285714286e-05, |
|
"loss": 2.5732, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5847953216374269, |
|
"grad_norm": 1.8412858247756958, |
|
"learning_rate": 9.880952380952381e-05, |
|
"loss": 2.5776, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6081871345029239, |
|
"grad_norm": 3.1820366382598877, |
|
"learning_rate": 9.876190476190477e-05, |
|
"loss": 2.5566, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.631578947368421, |
|
"grad_norm": 2.4613687992095947, |
|
"learning_rate": 9.871428571428572e-05, |
|
"loss": 2.5113, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.6549707602339181, |
|
"grad_norm": 5.942374229431152, |
|
"learning_rate": 9.866666666666668e-05, |
|
"loss": 2.575, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6783625730994152, |
|
"grad_norm": 7.427689075469971, |
|
"learning_rate": 9.861904761904762e-05, |
|
"loss": 2.5692, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 3.163085699081421, |
|
"learning_rate": 9.857142857142858e-05, |
|
"loss": 2.5192, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7251461988304093, |
|
"grad_norm": 3.421778678894043, |
|
"learning_rate": 9.852380952380952e-05, |
|
"loss": 2.5533, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.7485380116959064, |
|
"grad_norm": 2.486320734024048, |
|
"learning_rate": 9.847619047619048e-05, |
|
"loss": 2.515, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7719298245614035, |
|
"grad_norm": 3.2825722694396973, |
|
"learning_rate": 9.842857142857144e-05, |
|
"loss": 2.4994, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7953216374269005, |
|
"grad_norm": 3.5643672943115234, |
|
"learning_rate": 9.838095238095238e-05, |
|
"loss": 2.5327, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.8187134502923976, |
|
"grad_norm": 3.3127200603485107, |
|
"learning_rate": 9.833333333333333e-05, |
|
"loss": 2.6251, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.8421052631578947, |
|
"grad_norm": 3.187095880508423, |
|
"learning_rate": 9.828571428571429e-05, |
|
"loss": 2.5233, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.8654970760233918, |
|
"grad_norm": 3.3743860721588135, |
|
"learning_rate": 9.823809523809525e-05, |
|
"loss": 2.4999, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 2.3684120178222656, |
|
"learning_rate": 9.81904761904762e-05, |
|
"loss": 2.5302, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.9122807017543859, |
|
"grad_norm": 3.7091619968414307, |
|
"learning_rate": 9.814285714285715e-05, |
|
"loss": 2.5003, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.935672514619883, |
|
"grad_norm": 4.230418682098389, |
|
"learning_rate": 9.80952380952381e-05, |
|
"loss": 2.5379, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9590643274853801, |
|
"grad_norm": 3.5879616737365723, |
|
"learning_rate": 9.804761904761905e-05, |
|
"loss": 2.5652, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.9824561403508771, |
|
"grad_norm": 2.621013879776001, |
|
"learning_rate": 9.8e-05, |
|
"loss": 2.5704, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.3536667823791504, |
|
"learning_rate": 9.795238095238097e-05, |
|
"loss": 1.9135, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.31598106026649475, |
|
"eval_runtime": 3.4639, |
|
"eval_samples_per_second": 62.358, |
|
"eval_steps_per_second": 15.59, |
|
"step": 43 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2151327228493824.0, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|