|
{ |
|
"best_metric": 0.09626218676567078, |
|
"best_model_checkpoint": "checkpoint/checkpoint-300000", |
|
"epoch": 0.8551077008149176, |
|
"global_step": 300000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.942992819945672e-05, |
|
"loss": 0.0499, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8859856398913445e-05, |
|
"loss": 0.0274, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8289784598370164e-05, |
|
"loss": 0.0244, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7719712797826888e-05, |
|
"loss": 0.0218, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.714964099728361e-05, |
|
"loss": 0.0208, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.9703630265572595, |
|
"eval_f1": 0.9697362965196742, |
|
"eval_loss": 0.19255024194717407, |
|
"eval_precision": 0.9906862367555316, |
|
"eval_recall": 0.949654059527074, |
|
"eval_runtime": 7138.3308, |
|
"eval_samples_per_second": 53.13, |
|
"eval_steps_per_second": 6.641, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.657956919674033e-05, |
|
"loss": 0.018, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.600949739619705e-05, |
|
"loss": 0.0173, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5439425595653774e-05, |
|
"loss": 0.0182, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4869353795110495e-05, |
|
"loss": 0.0168, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4299281994567218e-05, |
|
"loss": 0.0152, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.9694269833568883, |
|
"eval_f1": 0.9686356296001752, |
|
"eval_loss": 0.26416775584220886, |
|
"eval_precision": 0.9943740662782754, |
|
"eval_recall": 0.9441960048094163, |
|
"eval_runtime": 7120.7469, |
|
"eval_samples_per_second": 53.261, |
|
"eval_steps_per_second": 6.658, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3729210194023938e-05, |
|
"loss": 0.0167, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.315913839348066e-05, |
|
"loss": 0.0143, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2589066592937383e-05, |
|
"loss": 0.0151, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2018994792394103e-05, |
|
"loss": 0.0127, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1448922991850824e-05, |
|
"loss": 0.0129, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.9814399772185542, |
|
"eval_f1": 0.9811897565811801, |
|
"eval_loss": 0.14654095470905304, |
|
"eval_precision": 0.9945986358440376, |
|
"eval_recall": 0.9681376168076444, |
|
"eval_runtime": 7098.6821, |
|
"eval_samples_per_second": 53.426, |
|
"eval_steps_per_second": 6.678, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0878851191307547e-05, |
|
"loss": 0.0166, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0308779390764267e-05, |
|
"loss": 0.0133, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.738707590220989e-06, |
|
"loss": 0.0117, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.168635789677712e-06, |
|
"loss": 0.0104, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.598563989134432e-06, |
|
"loss": 0.0111, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.9705159575590103, |
|
"eval_f1": 0.9696535478373199, |
|
"eval_loss": 0.31930962204933167, |
|
"eval_precision": 0.9988705619233995, |
|
"eval_recall": 0.9420971586474571, |
|
"eval_runtime": 7105.0574, |
|
"eval_samples_per_second": 53.378, |
|
"eval_steps_per_second": 6.672, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.028492188591153e-06, |
|
"loss": 0.0103, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.4584203880478754e-06, |
|
"loss": 0.0113, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.888348587504596e-06, |
|
"loss": 0.0083, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.3182767869613184e-06, |
|
"loss": 0.008, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.74820498641804e-06, |
|
"loss": 0.008, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.976308878435674, |
|
"eval_f1": 0.9758237027270649, |
|
"eval_loss": 0.20410259068012238, |
|
"eval_precision": 0.996225627276573, |
|
"eval_recall": 0.9562406395679963, |
|
"eval_runtime": 7119.7261, |
|
"eval_samples_per_second": 53.268, |
|
"eval_steps_per_second": 6.659, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.178133185874762e-06, |
|
"loss": 0.0086, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.608061385331483e-06, |
|
"loss": 0.0067, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0379895847882044e-06, |
|
"loss": 0.007, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.467917784244926e-06, |
|
"loss": 0.0059, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8978459837016474e-06, |
|
"loss": 0.0061, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.9880529246735714, |
|
"eval_f1": 0.9879724036876487, |
|
"eval_loss": 0.09626218676567078, |
|
"eval_precision": 0.9946763305164922, |
|
"eval_recall": 0.9813582382348598, |
|
"eval_runtime": 7136.9095, |
|
"eval_samples_per_second": 53.14, |
|
"eval_steps_per_second": 6.643, |
|
"step": 300000 |
|
} |
|
], |
|
"max_steps": 350833, |
|
"num_train_epochs": 1, |
|
"total_flos": 2.7489826686811325e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|