|
{ |
|
"best_metric": 0.13781137764453888, |
|
"best_model_checkpoint": "/data/jcanete/all_results/mldoc/distillbeto/epochs_3_bs_16_lr_2e-5/checkpoint-900", |
|
"epoch": 3.0, |
|
"global_step": 1776, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.7329999804496765, |
|
"eval_loss": 0.9082120656967163, |
|
"eval_runtime": 2.085, |
|
"eval_samples_per_second": 479.613, |
|
"eval_steps_per_second": 30.216, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.8560000061988831, |
|
"eval_loss": 0.47957685589790344, |
|
"eval_runtime": 2.0816, |
|
"eval_samples_per_second": 480.41, |
|
"eval_steps_per_second": 30.266, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.9480000138282776, |
|
"eval_loss": 0.23153278231620789, |
|
"eval_runtime": 2.0846, |
|
"eval_samples_per_second": 479.697, |
|
"eval_steps_per_second": 30.221, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.9309999942779541, |
|
"eval_loss": 0.25234749913215637, |
|
"eval_runtime": 2.0857, |
|
"eval_samples_per_second": 479.464, |
|
"eval_steps_per_second": 30.206, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4391891891891894e-05, |
|
"loss": 0.1569, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.9539999961853027, |
|
"eval_loss": 0.18860605359077454, |
|
"eval_runtime": 2.0884, |
|
"eval_samples_per_second": 478.832, |
|
"eval_steps_per_second": 30.166, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.9020000100135803, |
|
"eval_loss": 0.3387507200241089, |
|
"eval_runtime": 2.0825, |
|
"eval_samples_per_second": 480.196, |
|
"eval_steps_per_second": 30.252, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.9610000252723694, |
|
"eval_loss": 0.16218014061450958, |
|
"eval_runtime": 2.0844, |
|
"eval_samples_per_second": 479.763, |
|
"eval_steps_per_second": 30.225, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.9509999752044678, |
|
"eval_loss": 0.18978460133075714, |
|
"eval_runtime": 2.0789, |
|
"eval_samples_per_second": 481.026, |
|
"eval_steps_per_second": 30.305, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.9710000157356262, |
|
"eval_loss": 0.13781137764453888, |
|
"eval_runtime": 2.0825, |
|
"eval_samples_per_second": 480.201, |
|
"eval_steps_per_second": 30.253, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.772522522522522e-06, |
|
"loss": 0.0617, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.9610000252723694, |
|
"eval_loss": 0.1706898957490921, |
|
"eval_runtime": 2.0817, |
|
"eval_samples_per_second": 480.376, |
|
"eval_steps_per_second": 30.264, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.9599999785423279, |
|
"eval_loss": 0.1884380429983139, |
|
"eval_runtime": 2.08, |
|
"eval_samples_per_second": 480.775, |
|
"eval_steps_per_second": 30.289, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.9570000171661377, |
|
"eval_loss": 0.20238688588142395, |
|
"eval_runtime": 2.0822, |
|
"eval_samples_per_second": 480.271, |
|
"eval_steps_per_second": 30.257, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.9589999914169312, |
|
"eval_loss": 0.1709979772567749, |
|
"eval_runtime": 2.0789, |
|
"eval_samples_per_second": 481.035, |
|
"eval_steps_per_second": 30.305, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.9610000252723694, |
|
"eval_loss": 0.1705155074596405, |
|
"eval_runtime": 2.0769, |
|
"eval_samples_per_second": 481.48, |
|
"eval_steps_per_second": 30.333, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.141891891891892e-06, |
|
"loss": 0.0353, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.9570000171661377, |
|
"eval_loss": 0.18021175265312195, |
|
"eval_runtime": 2.0858, |
|
"eval_samples_per_second": 479.427, |
|
"eval_steps_per_second": 30.204, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_accuracy": 0.9520000219345093, |
|
"eval_loss": 0.19292205572128296, |
|
"eval_runtime": 2.0769, |
|
"eval_samples_per_second": 481.487, |
|
"eval_steps_per_second": 30.334, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_accuracy": 0.9570000171661377, |
|
"eval_loss": 0.1787494271993637, |
|
"eval_runtime": 2.0981, |
|
"eval_samples_per_second": 476.617, |
|
"eval_steps_per_second": 30.027, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1776, |
|
"total_flos": 3142677580766976.0, |
|
"train_loss": 0.077526035609546, |
|
"train_runtime": 3451.0165, |
|
"train_samples_per_second": 8.222, |
|
"train_steps_per_second": 0.515 |
|
} |
|
], |
|
"max_steps": 1776, |
|
"num_train_epochs": 3, |
|
"total_flos": 3142677580766976.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|