|
{ |
|
"best_metric": 0.08749764412641525, |
|
"best_model_checkpoint": "/data/jcanete/all_results/pos/albeto_tiny/epochs_4_bs_16_lr_5e-5/checkpoint-1600", |
|
"epoch": 4.0, |
|
"global_step": 3580, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9538808220802765, |
|
"eval_f1": 0.9490697836636672, |
|
"eval_loss": 0.17476704716682434, |
|
"eval_precision": 0.9467019204007793, |
|
"eval_recall": 0.9514495214880822, |
|
"eval_runtime": 1.5406, |
|
"eval_samples_per_second": 1073.605, |
|
"eval_steps_per_second": 67.506, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.9654529743650565, |
|
"eval_f1": 0.9616131828280513, |
|
"eval_loss": 0.125724658370018, |
|
"eval_precision": 0.9590238861730456, |
|
"eval_recall": 0.9642164991708457, |
|
"eval_runtime": 1.5211, |
|
"eval_samples_per_second": 1087.347, |
|
"eval_steps_per_second": 68.37, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.303072625698324e-05, |
|
"loss": 0.3133, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9684688500703139, |
|
"eval_f1": 0.9654409345999343, |
|
"eval_loss": 0.1092953085899353, |
|
"eval_precision": 0.9633379749353491, |
|
"eval_recall": 0.9675530958422409, |
|
"eval_runtime": 1.5243, |
|
"eval_samples_per_second": 1085.096, |
|
"eval_steps_per_second": 68.229, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.9705189678250115, |
|
"eval_f1": 0.9675733365251304, |
|
"eval_loss": 0.09988456219434738, |
|
"eval_precision": 0.9660797514241326, |
|
"eval_recall": 0.9690715470220376, |
|
"eval_runtime": 1.5122, |
|
"eval_samples_per_second": 1093.787, |
|
"eval_steps_per_second": 68.775, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.604748603351956e-05, |
|
"loss": 0.0983, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.9718066450924248, |
|
"eval_f1": 0.9694776714513557, |
|
"eval_loss": 0.09871890395879745, |
|
"eval_precision": 0.9673755197039925, |
|
"eval_recall": 0.971588979241174, |
|
"eval_runtime": 1.5084, |
|
"eval_samples_per_second": 1096.509, |
|
"eval_steps_per_second": 68.946, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.9728401755307433, |
|
"eval_f1": 0.9709789372905697, |
|
"eval_loss": 0.09597848355770111, |
|
"eval_precision": 0.9693355369267836, |
|
"eval_recall": 0.9726279195220875, |
|
"eval_runtime": 1.4942, |
|
"eval_samples_per_second": 1106.933, |
|
"eval_steps_per_second": 69.602, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.9736026160180274, |
|
"eval_f1": 0.9714610733031314, |
|
"eval_loss": 0.09189929813146591, |
|
"eval_precision": 0.9703765723740606, |
|
"eval_recall": 0.9725480010389402, |
|
"eval_runtime": 1.5177, |
|
"eval_samples_per_second": 1089.787, |
|
"eval_steps_per_second": 68.523, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.9064245810055868e-05, |
|
"loss": 0.0695, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.9747039189441047, |
|
"eval_f1": 0.9728139460666986, |
|
"eval_loss": 0.08749764412641525, |
|
"eval_precision": 0.9711674399155732, |
|
"eval_recall": 0.9744660446344728, |
|
"eval_runtime": 1.5082, |
|
"eval_samples_per_second": 1096.655, |
|
"eval_steps_per_second": 68.955, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.974975008895139, |
|
"eval_f1": 0.9729659656365613, |
|
"eval_loss": 0.08767995983362198, |
|
"eval_precision": 0.9712323312761298, |
|
"eval_recall": 0.9747058000839144, |
|
"eval_runtime": 1.4537, |
|
"eval_samples_per_second": 1137.75, |
|
"eval_steps_per_second": 71.539, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.2094972067039108e-05, |
|
"loss": 0.0597, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_accuracy": 0.974636146456346, |
|
"eval_f1": 0.9728004468426774, |
|
"eval_loss": 0.08931880444288254, |
|
"eval_precision": 0.9712794773741237, |
|
"eval_recall": 0.9743261872889653, |
|
"eval_runtime": 1.4451, |
|
"eval_samples_per_second": 1144.553, |
|
"eval_steps_per_second": 71.967, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_accuracy": 0.975025838260958, |
|
"eval_f1": 0.9732900972811175, |
|
"eval_loss": 0.0905543640255928, |
|
"eval_precision": 0.9720971020847451, |
|
"eval_recall": 0.9744860242552597, |
|
"eval_runtime": 1.4435, |
|
"eval_samples_per_second": 1145.863, |
|
"eval_steps_per_second": 72.049, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.9755510750410871, |
|
"eval_f1": 0.9741129056392965, |
|
"eval_loss": 0.09149234741926193, |
|
"eval_precision": 0.9727446605036659, |
|
"eval_recall": 0.9754850052945995, |
|
"eval_runtime": 1.4454, |
|
"eval_samples_per_second": 1144.307, |
|
"eval_steps_per_second": 71.952, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.511173184357542e-05, |
|
"loss": 0.0458, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.9755171887972077, |
|
"eval_f1": 0.9740555211846648, |
|
"eval_loss": 0.08821560442447662, |
|
"eval_precision": 0.9729681239160337, |
|
"eval_recall": 0.975145351741224, |
|
"eval_runtime": 1.4692, |
|
"eval_samples_per_second": 1125.788, |
|
"eval_steps_per_second": 70.787, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_accuracy": 0.9757374493824232, |
|
"eval_f1": 0.9740832369019593, |
|
"eval_loss": 0.09264585375785828, |
|
"eval_precision": 0.9727053573207419, |
|
"eval_recall": 0.9754650256738127, |
|
"eval_runtime": 1.4438, |
|
"eval_samples_per_second": 1145.593, |
|
"eval_steps_per_second": 72.032, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 8.128491620111732e-06, |
|
"loss": 0.0383, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_accuracy": 0.9753985869436302, |
|
"eval_f1": 0.973785013167345, |
|
"eval_loss": 0.093608058989048, |
|
"eval_precision": 0.9723688666653386, |
|
"eval_recall": 0.9752052906035843, |
|
"eval_runtime": 1.444, |
|
"eval_samples_per_second": 1145.395, |
|
"eval_steps_per_second": 72.02, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_accuracy": 0.9756357906507853, |
|
"eval_f1": 0.974050980196538, |
|
"eval_loss": 0.09429396688938141, |
|
"eval_precision": 0.9727602423083055, |
|
"eval_recall": 0.9753451479490919, |
|
"eval_runtime": 1.4392, |
|
"eval_samples_per_second": 1149.213, |
|
"eval_steps_per_second": 72.26, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 0.9750936107487166, |
|
"eval_f1": 0.9735209019255712, |
|
"eval_loss": 0.09423112124204636, |
|
"eval_precision": 0.9722792403196556, |
|
"eval_recall": 0.9747657389462748, |
|
"eval_runtime": 1.4459, |
|
"eval_samples_per_second": 1143.936, |
|
"eval_steps_per_second": 71.928, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.1452513966480447e-06, |
|
"loss": 0.0344, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 3580, |
|
"total_flos": 32358086511744.0, |
|
"train_loss": 0.09274842659188383, |
|
"train_runtime": 230.8308, |
|
"train_samples_per_second": 247.887, |
|
"train_steps_per_second": 15.509 |
|
} |
|
], |
|
"max_steps": 3580, |
|
"num_train_epochs": 4, |
|
"total_flos": 32358086511744.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|