|
{ |
|
"best_metric": 0.2235843539237976, |
|
"best_model_checkpoint": "nsfw-images-2/checkpoint-1868", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1868, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013383297644539615, |
|
"grad_norm": 18.170637130737305, |
|
"learning_rate": 1.2312633832976446e-05, |
|
"loss": 0.7179, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02676659528907923, |
|
"grad_norm": 3.0997555255889893, |
|
"learning_rate": 2.569593147751606e-05, |
|
"loss": 0.3338, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04014989293361884, |
|
"grad_norm": 31.291412353515625, |
|
"learning_rate": 3.9079229122055675e-05, |
|
"loss": 0.3848, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05353319057815846, |
|
"grad_norm": 0.03622590750455856, |
|
"learning_rate": 5.246252676659529e-05, |
|
"loss": 0.0596, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06691648822269808, |
|
"grad_norm": 26.821062088012695, |
|
"learning_rate": 6.58458244111349e-05, |
|
"loss": 0.3471, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08029978586723768, |
|
"grad_norm": 0.011622205376625061, |
|
"learning_rate": 7.922912205567452e-05, |
|
"loss": 0.1036, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0936830835117773, |
|
"grad_norm": 0.07030396908521652, |
|
"learning_rate": 9.261241970021414e-05, |
|
"loss": 0.0683, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10706638115631692, |
|
"grad_norm": 0.1393502801656723, |
|
"learning_rate": 0.00010599571734475374, |
|
"loss": 0.3868, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12044967880085652, |
|
"grad_norm": 0.019323009997606277, |
|
"learning_rate": 0.00011937901498929336, |
|
"loss": 0.5266, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.13383297644539616, |
|
"grad_norm": 0.02416042983531952, |
|
"learning_rate": 0.000132762312633833, |
|
"loss": 0.2695, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.14721627408993576, |
|
"grad_norm": 2.2118706703186035, |
|
"learning_rate": 0.0001461456102783726, |
|
"loss": 0.4989, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.16059957173447537, |
|
"grad_norm": 11.72065258026123, |
|
"learning_rate": 0.0001595289079229122, |
|
"loss": 0.3731, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.173982869379015, |
|
"grad_norm": 0.0027222177013754845, |
|
"learning_rate": 0.00017291220556745181, |
|
"loss": 0.1109, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1873661670235546, |
|
"grad_norm": 0.0008230574312619865, |
|
"learning_rate": 0.00018629550321199142, |
|
"loss": 0.2094, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2007494646680942, |
|
"grad_norm": 0.036911237984895706, |
|
"learning_rate": 0.00019967880085653106, |
|
"loss": 0.6709, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.21413276231263384, |
|
"grad_norm": 20.274892807006836, |
|
"learning_rate": 0.00021306209850107066, |
|
"loss": 0.5297, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22751605995717344, |
|
"grad_norm": 0.04864942654967308, |
|
"learning_rate": 0.00022644539614561027, |
|
"loss": 0.1467, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.24089935760171305, |
|
"grad_norm": 0.12074282765388489, |
|
"learning_rate": 0.0002398286937901499, |
|
"loss": 0.6298, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.25428265524625265, |
|
"grad_norm": 0.11845938861370087, |
|
"learning_rate": 0.00025321199143468954, |
|
"loss": 0.257, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2676659528907923, |
|
"grad_norm": 0.09819821268320084, |
|
"learning_rate": 0.00026659528907922915, |
|
"loss": 0.8714, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2810492505353319, |
|
"grad_norm": 0.09790199995040894, |
|
"learning_rate": 0.00027997858672376875, |
|
"loss": 0.2357, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2944325481798715, |
|
"grad_norm": 0.5837295055389404, |
|
"learning_rate": 0.00029336188436830836, |
|
"loss": 0.7943, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.3078158458244111, |
|
"grad_norm": 0.14801590144634247, |
|
"learning_rate": 0.00030674518201284797, |
|
"loss": 0.4615, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.32119914346895073, |
|
"grad_norm": 0.05935389921069145, |
|
"learning_rate": 0.0003201284796573876, |
|
"loss": 0.2401, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.33458244111349034, |
|
"grad_norm": 12.694086074829102, |
|
"learning_rate": 0.00033297644539614566, |
|
"loss": 0.6572, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.34796573875803, |
|
"grad_norm": 0.05311077833175659, |
|
"learning_rate": 0.00034635974304068526, |
|
"loss": 0.3569, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3613490364025696, |
|
"grad_norm": 0.03182295709848404, |
|
"learning_rate": 0.0003597430406852248, |
|
"loss": 0.1498, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3747323340471092, |
|
"grad_norm": 0.33807849884033203, |
|
"learning_rate": 0.0003731263383297644, |
|
"loss": 0.3766, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3881156316916488, |
|
"grad_norm": 0.08421733975410461, |
|
"learning_rate": 0.00038650963597430403, |
|
"loss": 0.2979, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.4014989293361884, |
|
"grad_norm": 0.01755744405090809, |
|
"learning_rate": 0.0003998929336188437, |
|
"loss": 0.0031, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4148822269807281, |
|
"grad_norm": 0.21543407440185547, |
|
"learning_rate": 0.0004132762312633833, |
|
"loss": 0.4799, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.4282655246252677, |
|
"grad_norm": 0.18580827116966248, |
|
"learning_rate": 0.0004266595289079229, |
|
"loss": 0.3885, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4416488222698073, |
|
"grad_norm": 11.58962631225586, |
|
"learning_rate": 0.0004400428265524625, |
|
"loss": 0.3268, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4550321199143469, |
|
"grad_norm": 0.0360148549079895, |
|
"learning_rate": 0.0004534261241970021, |
|
"loss": 0.3201, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4684154175588865, |
|
"grad_norm": 0.09959446638822556, |
|
"learning_rate": 0.0004668094218415418, |
|
"loss": 0.2712, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4817987152034261, |
|
"grad_norm": 0.25127506256103516, |
|
"learning_rate": 0.0004801927194860814, |
|
"loss": 0.3106, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.49518201284796576, |
|
"grad_norm": 11.287735939025879, |
|
"learning_rate": 0.000493576017130621, |
|
"loss": 0.2296, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5085653104925053, |
|
"grad_norm": 10.859394073486328, |
|
"learning_rate": 0.0004992267428027599, |
|
"loss": 0.3712, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.521948608137045, |
|
"grad_norm": 0.18585732579231262, |
|
"learning_rate": 0.0004977397097311444, |
|
"loss": 0.3561, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5353319057815846, |
|
"grad_norm": 14.576325416564941, |
|
"learning_rate": 0.000496252676659529, |
|
"loss": 0.3283, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5487152034261242, |
|
"grad_norm": 0.00898987427353859, |
|
"learning_rate": 0.0004947656435879134, |
|
"loss": 0.0033, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5620985010706638, |
|
"grad_norm": 0.27176257967948914, |
|
"learning_rate": 0.0004932786105162979, |
|
"loss": 0.5933, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5754817987152034, |
|
"grad_norm": 0.012289056554436684, |
|
"learning_rate": 0.0004917915774446823, |
|
"loss": 0.1723, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.588865096359743, |
|
"grad_norm": 0.050152458250522614, |
|
"learning_rate": 0.0004903045443730669, |
|
"loss": 0.5719, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6022483940042827, |
|
"grad_norm": 0.6666317582130432, |
|
"learning_rate": 0.0004888175113014514, |
|
"loss": 0.7938, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6156316916488223, |
|
"grad_norm": 0.35938775539398193, |
|
"learning_rate": 0.00048733047822983584, |
|
"loss": 0.4964, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6290149892933619, |
|
"grad_norm": 0.08501864224672318, |
|
"learning_rate": 0.0004858434451582203, |
|
"loss": 0.2737, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6423982869379015, |
|
"grad_norm": 0.02361874282360077, |
|
"learning_rate": 0.0004843564120866048, |
|
"loss": 0.15, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6557815845824411, |
|
"grad_norm": 0.05745585262775421, |
|
"learning_rate": 0.0004828693790149893, |
|
"loss": 0.2676, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6691648822269807, |
|
"grad_norm": 0.15592187643051147, |
|
"learning_rate": 0.00048138234594337375, |
|
"loss": 0.3195, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6825481798715204, |
|
"grad_norm": 0.01977529190480709, |
|
"learning_rate": 0.0004798953128717583, |
|
"loss": 0.1439, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.69593147751606, |
|
"grad_norm": 11.418144226074219, |
|
"learning_rate": 0.0004784082798001428, |
|
"loss": 0.2578, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7093147751605996, |
|
"grad_norm": 0.12220004945993423, |
|
"learning_rate": 0.00047692124672852725, |
|
"loss": 0.2181, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.7226980728051392, |
|
"grad_norm": 0.2515058219432831, |
|
"learning_rate": 0.00047543421365691177, |
|
"loss": 0.4207, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7360813704496788, |
|
"grad_norm": 10.930126190185547, |
|
"learning_rate": 0.00047394718058529624, |
|
"loss": 0.3217, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7494646680942184, |
|
"grad_norm": 0.1796221137046814, |
|
"learning_rate": 0.0004724601475136807, |
|
"loss": 0.4147, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.762847965738758, |
|
"grad_norm": 0.12057535350322723, |
|
"learning_rate": 0.0004709731144420652, |
|
"loss": 0.2845, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.7762312633832976, |
|
"grad_norm": 0.6278265714645386, |
|
"learning_rate": 0.0004694860813704497, |
|
"loss": 0.6357, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7896145610278372, |
|
"grad_norm": 0.08220059424638748, |
|
"learning_rate": 0.00046799904829883415, |
|
"loss": 0.4283, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.8029978586723768, |
|
"grad_norm": 10.042654991149902, |
|
"learning_rate": 0.00046651201522721866, |
|
"loss": 0.3957, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8163811563169164, |
|
"grad_norm": 0.20416533946990967, |
|
"learning_rate": 0.00046502498215560313, |
|
"loss": 0.4242, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.8297644539614561, |
|
"grad_norm": 0.12127134948968887, |
|
"learning_rate": 0.00046353794908398765, |
|
"loss": 0.1901, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8431477516059958, |
|
"grad_norm": 0.07789531350135803, |
|
"learning_rate": 0.00046205091601237216, |
|
"loss": 0.2266, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.8565310492505354, |
|
"grad_norm": 0.03442813456058502, |
|
"learning_rate": 0.00046056388294075663, |
|
"loss": 0.2767, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.869914346895075, |
|
"grad_norm": 0.2261192798614502, |
|
"learning_rate": 0.0004590768498691411, |
|
"loss": 0.3227, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.8832976445396146, |
|
"grad_norm": 0.05203871801495552, |
|
"learning_rate": 0.0004575898167975256, |
|
"loss": 0.1059, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8966809421841542, |
|
"grad_norm": 0.17587050795555115, |
|
"learning_rate": 0.0004561027837259101, |
|
"loss": 0.4627, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.9100642398286938, |
|
"grad_norm": 0.04208080843091011, |
|
"learning_rate": 0.00045461575065429454, |
|
"loss": 0.1095, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9234475374732334, |
|
"grad_norm": 0.07704867422580719, |
|
"learning_rate": 0.00045312871758267906, |
|
"loss": 0.2484, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.936830835117773, |
|
"grad_norm": 9.084434509277344, |
|
"learning_rate": 0.0004516416845110635, |
|
"loss": 0.4368, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9502141327623126, |
|
"grad_norm": 0.09450817853212357, |
|
"learning_rate": 0.000450154651439448, |
|
"loss": 0.2326, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.9635974304068522, |
|
"grad_norm": 0.27564939856529236, |
|
"learning_rate": 0.0004486676183678325, |
|
"loss": 0.5406, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9769807280513919, |
|
"grad_norm": 0.1096988320350647, |
|
"learning_rate": 0.000447180585296217, |
|
"loss": 0.2194, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.9903640256959315, |
|
"grad_norm": 0.06793688237667084, |
|
"learning_rate": 0.0004456935522246015, |
|
"loss": 0.3206, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9325481798715204, |
|
"eval_auc": 0.8326772729757805, |
|
"eval_f1": 0.0, |
|
"eval_loss": 0.2235843539237976, |
|
"eval_precision": 0.0, |
|
"eval_recall": 0.0, |
|
"eval_runtime": 12.9282, |
|
"eval_samples_per_second": 72.245, |
|
"eval_steps_per_second": 18.1, |
|
"step": 1868 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 9340, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.894325812105011e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|