|
{ |
|
"best_metric": 0.4863630663948402, |
|
"best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-4276", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 4276, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 3.7270963191986084, |
|
"learning_rate": 1.0277702544991746e-05, |
|
"loss": 0.5849, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 16.02177619934082, |
|
"learning_rate": 9.408772671387439e-06, |
|
"loss": 0.5575, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 15.67810344696045, |
|
"learning_rate": 8.539842797783132e-06, |
|
"loss": 0.5369, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 19.78717041015625, |
|
"learning_rate": 7.670912924178823e-06, |
|
"loss": 0.5402, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.5537068843841553, |
|
"eval_matthews_correlation": 0.472806497515492, |
|
"eval_runtime": 0.7082, |
|
"eval_samples_per_second": 1472.658, |
|
"eval_steps_per_second": 93.188, |
|
"step": 2138 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 10.657214164733887, |
|
"learning_rate": 6.801983050574517e-06, |
|
"loss": 0.4772, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 9.260273933410645, |
|
"learning_rate": 5.933053176970209e-06, |
|
"loss": 0.4307, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.3461722433567047, |
|
"learning_rate": 5.064123303365902e-06, |
|
"loss": 0.4898, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 9.188729286193848, |
|
"learning_rate": 4.195193429761595e-06, |
|
"loss": 0.4323, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.8039517402648926, |
|
"eval_matthews_correlation": 0.4863630663948402, |
|
"eval_runtime": 0.7133, |
|
"eval_samples_per_second": 1462.131, |
|
"eval_steps_per_second": 92.522, |
|
"step": 4276 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6414, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 65242522366572.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 1.1146632418596053e-05, |
|
"num_train_epochs": 3, |
|
"per_device_train_batch_size": 4, |
|
"seed": 14 |
|
} |
|
} |
|
|