|
{ |
|
"best_metric": 0.8181818181818182, |
|
"best_model_checkpoint": "beit-base-patch16-224-hasta-85-fold5/checkpoint-61", |
|
"epoch": 100.0, |
|
"eval_steps": 500, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5454545454545454, |
|
"eval_loss": 1.0103095769882202, |
|
"eval_runtime": 0.1623, |
|
"eval_samples_per_second": 67.785, |
|
"eval_steps_per_second": 6.162, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6363636363636364, |
|
"eval_loss": 0.8045657873153687, |
|
"eval_runtime": 0.1606, |
|
"eval_samples_per_second": 68.48, |
|
"eval_steps_per_second": 6.225, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 0.737779974937439, |
|
"eval_runtime": 0.1609, |
|
"eval_samples_per_second": 68.384, |
|
"eval_steps_per_second": 6.217, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.067632794380188, |
|
"eval_runtime": 0.1648, |
|
"eval_samples_per_second": 66.756, |
|
"eval_steps_per_second": 6.069, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.3956468105316162, |
|
"eval_runtime": 0.1683, |
|
"eval_samples_per_second": 65.354, |
|
"eval_steps_per_second": 5.941, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5799392461776733, |
|
"eval_runtime": 0.1676, |
|
"eval_samples_per_second": 65.649, |
|
"eval_steps_per_second": 5.968, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.606094479560852, |
|
"eval_runtime": 0.1674, |
|
"eval_samples_per_second": 65.697, |
|
"eval_steps_per_second": 5.972, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4350547790527344, |
|
"eval_runtime": 0.1678, |
|
"eval_samples_per_second": 65.537, |
|
"eval_steps_per_second": 5.958, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.3295589685440063, |
|
"eval_runtime": 0.1708, |
|
"eval_samples_per_second": 64.389, |
|
"eval_steps_per_second": 5.854, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.930849075317383, |
|
"learning_rate": 5e-05, |
|
"loss": 0.39, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.2815629243850708, |
|
"eval_runtime": 0.161, |
|
"eval_samples_per_second": 68.322, |
|
"eval_steps_per_second": 6.211, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.265523910522461, |
|
"eval_runtime": 0.1668, |
|
"eval_samples_per_second": 65.937, |
|
"eval_steps_per_second": 5.994, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.2039612531661987, |
|
"eval_runtime": 0.1757, |
|
"eval_samples_per_second": 62.617, |
|
"eval_steps_per_second": 5.692, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.0664290189743042, |
|
"eval_runtime": 0.1648, |
|
"eval_samples_per_second": 66.739, |
|
"eval_steps_per_second": 6.067, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.0846155881881714, |
|
"eval_runtime": 0.1608, |
|
"eval_samples_per_second": 68.402, |
|
"eval_steps_per_second": 6.218, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.214535117149353, |
|
"eval_runtime": 0.1642, |
|
"eval_samples_per_second": 66.984, |
|
"eval_steps_per_second": 6.089, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4681614637374878, |
|
"eval_runtime": 0.1636, |
|
"eval_samples_per_second": 67.239, |
|
"eval_steps_per_second": 6.113, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4473122358322144, |
|
"eval_runtime": 0.1607, |
|
"eval_samples_per_second": 68.471, |
|
"eval_steps_per_second": 6.225, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.2699377536773682, |
|
"eval_runtime": 0.1669, |
|
"eval_samples_per_second": 65.907, |
|
"eval_steps_per_second": 5.992, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.246677041053772, |
|
"eval_runtime": 0.2573, |
|
"eval_samples_per_second": 42.747, |
|
"eval_steps_per_second": 3.886, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 2.567878484725952, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.1832, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.2648237943649292, |
|
"eval_runtime": 0.1623, |
|
"eval_samples_per_second": 67.792, |
|
"eval_steps_per_second": 6.163, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.2914077043533325, |
|
"eval_runtime": 0.1601, |
|
"eval_samples_per_second": 68.712, |
|
"eval_steps_per_second": 6.247, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.3443835973739624, |
|
"eval_runtime": 0.1569, |
|
"eval_samples_per_second": 70.118, |
|
"eval_steps_per_second": 6.374, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5324546098709106, |
|
"eval_runtime": 0.1662, |
|
"eval_samples_per_second": 66.183, |
|
"eval_steps_per_second": 6.017, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6140273809432983, |
|
"eval_runtime": 0.1646, |
|
"eval_samples_per_second": 66.822, |
|
"eval_steps_per_second": 6.075, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6262385845184326, |
|
"eval_runtime": 0.1635, |
|
"eval_samples_per_second": 67.284, |
|
"eval_steps_per_second": 6.117, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6752601861953735, |
|
"eval_runtime": 0.1704, |
|
"eval_samples_per_second": 64.572, |
|
"eval_steps_per_second": 5.87, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.653100848197937, |
|
"eval_runtime": 0.1637, |
|
"eval_samples_per_second": 67.204, |
|
"eval_steps_per_second": 6.109, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7095893621444702, |
|
"eval_runtime": 0.1629, |
|
"eval_samples_per_second": 67.544, |
|
"eval_steps_per_second": 6.14, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.666225790977478, |
|
"eval_runtime": 0.1364, |
|
"eval_samples_per_second": 80.626, |
|
"eval_steps_per_second": 7.33, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 2.4013149738311768, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.1194, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5768946409225464, |
|
"eval_runtime": 0.1615, |
|
"eval_samples_per_second": 68.132, |
|
"eval_steps_per_second": 6.194, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4446830749511719, |
|
"eval_runtime": 0.1657, |
|
"eval_samples_per_second": 66.386, |
|
"eval_steps_per_second": 6.035, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.264360785484314, |
|
"eval_runtime": 0.1666, |
|
"eval_samples_per_second": 66.03, |
|
"eval_steps_per_second": 6.003, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.2033320665359497, |
|
"eval_runtime": 0.1596, |
|
"eval_samples_per_second": 68.902, |
|
"eval_steps_per_second": 6.264, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.270344614982605, |
|
"eval_runtime": 0.1609, |
|
"eval_samples_per_second": 68.347, |
|
"eval_steps_per_second": 6.213, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4491862058639526, |
|
"eval_runtime": 0.1633, |
|
"eval_samples_per_second": 67.359, |
|
"eval_steps_per_second": 6.124, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5890190601348877, |
|
"eval_runtime": 0.1696, |
|
"eval_samples_per_second": 64.877, |
|
"eval_steps_per_second": 5.898, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.569095253944397, |
|
"eval_runtime": 0.1639, |
|
"eval_samples_per_second": 67.106, |
|
"eval_steps_per_second": 6.101, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.412721872329712, |
|
"eval_runtime": 0.1576, |
|
"eval_samples_per_second": 69.794, |
|
"eval_steps_per_second": 6.345, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.3179062604904175, |
|
"eval_runtime": 0.1665, |
|
"eval_samples_per_second": 66.079, |
|
"eval_steps_per_second": 6.007, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 4.264218807220459, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0783, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.2985975742340088, |
|
"eval_runtime": 0.1572, |
|
"eval_samples_per_second": 69.994, |
|
"eval_steps_per_second": 6.363, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.3181326389312744, |
|
"eval_runtime": 0.1636, |
|
"eval_samples_per_second": 67.239, |
|
"eval_steps_per_second": 6.113, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.4252508878707886, |
|
"eval_runtime": 0.1627, |
|
"eval_samples_per_second": 67.613, |
|
"eval_steps_per_second": 6.147, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5179203748703003, |
|
"eval_runtime": 0.1669, |
|
"eval_samples_per_second": 65.914, |
|
"eval_steps_per_second": 5.992, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.568513035774231, |
|
"eval_runtime": 0.172, |
|
"eval_samples_per_second": 63.949, |
|
"eval_steps_per_second": 5.814, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.5695762634277344, |
|
"eval_runtime": 0.1674, |
|
"eval_samples_per_second": 65.707, |
|
"eval_steps_per_second": 5.973, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.757134199142456, |
|
"eval_runtime": 0.1629, |
|
"eval_samples_per_second": 67.512, |
|
"eval_steps_per_second": 6.137, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.912243366241455, |
|
"eval_runtime": 0.1651, |
|
"eval_samples_per_second": 66.609, |
|
"eval_steps_per_second": 6.055, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1062092781066895, |
|
"eval_runtime": 0.1666, |
|
"eval_samples_per_second": 66.025, |
|
"eval_steps_per_second": 6.002, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1660807132720947, |
|
"eval_runtime": 0.1649, |
|
"eval_samples_per_second": 66.721, |
|
"eval_steps_per_second": 6.066, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"grad_norm": 4.0951247215271, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.056, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.183253526687622, |
|
"eval_runtime": 0.1672, |
|
"eval_samples_per_second": 65.787, |
|
"eval_steps_per_second": 5.981, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.240224599838257, |
|
"eval_runtime": 0.1611, |
|
"eval_samples_per_second": 68.291, |
|
"eval_steps_per_second": 6.208, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.300673723220825, |
|
"eval_runtime": 0.1648, |
|
"eval_samples_per_second": 66.765, |
|
"eval_steps_per_second": 6.07, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.369180679321289, |
|
"eval_runtime": 0.1588, |
|
"eval_samples_per_second": 69.263, |
|
"eval_steps_per_second": 6.297, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.382084608078003, |
|
"eval_runtime": 0.1641, |
|
"eval_samples_per_second": 67.023, |
|
"eval_steps_per_second": 6.093, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.2715866565704346, |
|
"eval_runtime": 0.1645, |
|
"eval_samples_per_second": 66.867, |
|
"eval_steps_per_second": 6.079, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.048184871673584, |
|
"eval_runtime": 0.1627, |
|
"eval_samples_per_second": 67.596, |
|
"eval_steps_per_second": 6.145, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.878261685371399, |
|
"eval_runtime": 0.1665, |
|
"eval_samples_per_second": 66.071, |
|
"eval_steps_per_second": 6.006, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7967017889022827, |
|
"eval_runtime": 0.1646, |
|
"eval_samples_per_second": 66.839, |
|
"eval_steps_per_second": 6.076, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.7035846710205078, |
|
"eval_runtime": 0.166, |
|
"eval_samples_per_second": 66.247, |
|
"eval_steps_per_second": 6.022, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 1.3352000713348389, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.052, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.6389153003692627, |
|
"eval_runtime": 0.1645, |
|
"eval_samples_per_second": 66.885, |
|
"eval_steps_per_second": 6.08, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.635382056236267, |
|
"eval_runtime": 0.1649, |
|
"eval_samples_per_second": 66.706, |
|
"eval_steps_per_second": 6.064, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.685198187828064, |
|
"eval_runtime": 0.45, |
|
"eval_samples_per_second": 24.444, |
|
"eval_steps_per_second": 2.222, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.818864107131958, |
|
"eval_runtime": 0.1598, |
|
"eval_samples_per_second": 68.848, |
|
"eval_steps_per_second": 6.259, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 1.9682955741882324, |
|
"eval_runtime": 0.1685, |
|
"eval_samples_per_second": 65.269, |
|
"eval_steps_per_second": 5.934, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.016566753387451, |
|
"eval_runtime": 0.1665, |
|
"eval_samples_per_second": 66.08, |
|
"eval_steps_per_second": 6.007, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.0854508876800537, |
|
"eval_runtime": 0.1653, |
|
"eval_samples_per_second": 66.551, |
|
"eval_steps_per_second": 6.05, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1358840465545654, |
|
"eval_runtime": 0.1723, |
|
"eval_samples_per_second": 63.824, |
|
"eval_steps_per_second": 5.802, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.246534585952759, |
|
"eval_runtime": 0.1636, |
|
"eval_samples_per_second": 67.22, |
|
"eval_steps_per_second": 6.111, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.2680304050445557, |
|
"eval_runtime": 0.1596, |
|
"eval_samples_per_second": 68.909, |
|
"eval_steps_per_second": 6.264, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"grad_norm": 1.2708288431167603, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0276, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.2728347778320312, |
|
"eval_runtime": 0.163, |
|
"eval_samples_per_second": 67.468, |
|
"eval_steps_per_second": 6.133, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.282043218612671, |
|
"eval_runtime": 0.1579, |
|
"eval_samples_per_second": 69.686, |
|
"eval_steps_per_second": 6.335, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.2427427768707275, |
|
"eval_runtime": 0.1656, |
|
"eval_samples_per_second": 66.429, |
|
"eval_steps_per_second": 6.039, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.20658540725708, |
|
"eval_runtime": 0.171, |
|
"eval_samples_per_second": 64.32, |
|
"eval_steps_per_second": 5.847, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.2434442043304443, |
|
"eval_runtime": 0.1632, |
|
"eval_samples_per_second": 67.392, |
|
"eval_steps_per_second": 6.127, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.3205888271331787, |
|
"eval_runtime": 0.1636, |
|
"eval_samples_per_second": 67.241, |
|
"eval_steps_per_second": 6.113, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.4407994747161865, |
|
"eval_runtime": 0.1664, |
|
"eval_samples_per_second": 66.119, |
|
"eval_steps_per_second": 6.011, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.480969190597534, |
|
"eval_runtime": 0.1628, |
|
"eval_samples_per_second": 67.577, |
|
"eval_steps_per_second": 6.143, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.509140729904175, |
|
"eval_runtime": 0.1672, |
|
"eval_samples_per_second": 65.806, |
|
"eval_steps_per_second": 5.982, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.486177682876587, |
|
"eval_runtime": 0.1683, |
|
"eval_samples_per_second": 65.368, |
|
"eval_steps_per_second": 5.943, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 2.5931143760681152, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0411, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.4502298831939697, |
|
"eval_runtime": 0.1622, |
|
"eval_samples_per_second": 67.834, |
|
"eval_steps_per_second": 6.167, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.4204084873199463, |
|
"eval_runtime": 0.163, |
|
"eval_samples_per_second": 67.475, |
|
"eval_steps_per_second": 6.134, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.3837873935699463, |
|
"eval_runtime": 0.1661, |
|
"eval_samples_per_second": 66.226, |
|
"eval_steps_per_second": 6.021, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.343120813369751, |
|
"eval_runtime": 0.1614, |
|
"eval_samples_per_second": 68.159, |
|
"eval_steps_per_second": 6.196, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.292693614959717, |
|
"eval_runtime": 0.1662, |
|
"eval_samples_per_second": 66.18, |
|
"eval_steps_per_second": 6.016, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.2180917263031006, |
|
"eval_runtime": 0.1665, |
|
"eval_samples_per_second": 66.081, |
|
"eval_steps_per_second": 6.007, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1632635593414307, |
|
"eval_runtime": 0.1684, |
|
"eval_samples_per_second": 65.315, |
|
"eval_steps_per_second": 5.938, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.0965707302093506, |
|
"eval_runtime": 0.165, |
|
"eval_samples_per_second": 66.655, |
|
"eval_steps_per_second": 6.06, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.053642988204956, |
|
"eval_runtime": 0.1705, |
|
"eval_samples_per_second": 64.51, |
|
"eval_steps_per_second": 5.865, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.0427496433258057, |
|
"eval_runtime": 0.1615, |
|
"eval_samples_per_second": 68.103, |
|
"eval_steps_per_second": 6.191, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"grad_norm": 1.728418231010437, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0317, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.0523698329925537, |
|
"eval_runtime": 0.1681, |
|
"eval_samples_per_second": 65.445, |
|
"eval_steps_per_second": 5.95, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.0489087104797363, |
|
"eval_runtime": 0.1658, |
|
"eval_samples_per_second": 66.365, |
|
"eval_steps_per_second": 6.033, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.064781904220581, |
|
"eval_runtime": 0.1669, |
|
"eval_samples_per_second": 65.891, |
|
"eval_steps_per_second": 5.99, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.094611644744873, |
|
"eval_runtime": 0.1646, |
|
"eval_samples_per_second": 66.836, |
|
"eval_steps_per_second": 6.076, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.115490436553955, |
|
"eval_runtime": 0.1669, |
|
"eval_samples_per_second": 65.919, |
|
"eval_steps_per_second": 5.993, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1469366550445557, |
|
"eval_runtime": 0.1614, |
|
"eval_samples_per_second": 68.154, |
|
"eval_steps_per_second": 6.196, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.1768057346343994, |
|
"eval_runtime": 0.1696, |
|
"eval_samples_per_second": 64.857, |
|
"eval_steps_per_second": 5.896, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.2026336193084717, |
|
"eval_runtime": 0.1585, |
|
"eval_samples_per_second": 69.396, |
|
"eval_steps_per_second": 6.309, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.2204527854919434, |
|
"eval_runtime": 0.1606, |
|
"eval_samples_per_second": 68.492, |
|
"eval_steps_per_second": 6.227, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.2304117679595947, |
|
"eval_runtime": 0.1642, |
|
"eval_samples_per_second": 67.008, |
|
"eval_steps_per_second": 6.092, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"grad_norm": 4.493223190307617, |
|
"learning_rate": 0.0, |
|
"loss": 0.0394, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.7272727272727273, |
|
"eval_loss": 2.234973430633545, |
|
"eval_runtime": 0.1591, |
|
"eval_samples_per_second": 69.124, |
|
"eval_steps_per_second": 6.284, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 100, |
|
"total_flos": 4.570114227259392e+17, |
|
"train_loss": 0.10187353670597077, |
|
"train_runtime": 408.9093, |
|
"train_samples_per_second": 14.429, |
|
"train_steps_per_second": 0.245 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.8181818181818182, |
|
"eval_loss": 1.635382056236267, |
|
"eval_runtime": 0.2152, |
|
"eval_samples_per_second": 51.111, |
|
"eval_steps_per_second": 4.646, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.570114227259392e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|