nsfw-images-2 / checkpoint-1868 /trainer_state.json
haiefff's picture
Upload folder using huggingface_hub
9eb71d6 verified
raw
history blame
14.3 kB
{
"best_metric": 0.2235843539237976,
"best_model_checkpoint": "nsfw-images-2/checkpoint-1868",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1868,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013383297644539615,
"grad_norm": 18.170637130737305,
"learning_rate": 1.2312633832976446e-05,
"loss": 0.7179,
"step": 25
},
{
"epoch": 0.02676659528907923,
"grad_norm": 3.0997555255889893,
"learning_rate": 2.569593147751606e-05,
"loss": 0.3338,
"step": 50
},
{
"epoch": 0.04014989293361884,
"grad_norm": 31.291412353515625,
"learning_rate": 3.9079229122055675e-05,
"loss": 0.3848,
"step": 75
},
{
"epoch": 0.05353319057815846,
"grad_norm": 0.03622590750455856,
"learning_rate": 5.246252676659529e-05,
"loss": 0.0596,
"step": 100
},
{
"epoch": 0.06691648822269808,
"grad_norm": 26.821062088012695,
"learning_rate": 6.58458244111349e-05,
"loss": 0.3471,
"step": 125
},
{
"epoch": 0.08029978586723768,
"grad_norm": 0.011622205376625061,
"learning_rate": 7.922912205567452e-05,
"loss": 0.1036,
"step": 150
},
{
"epoch": 0.0936830835117773,
"grad_norm": 0.07030396908521652,
"learning_rate": 9.261241970021414e-05,
"loss": 0.0683,
"step": 175
},
{
"epoch": 0.10706638115631692,
"grad_norm": 0.1393502801656723,
"learning_rate": 0.00010599571734475374,
"loss": 0.3868,
"step": 200
},
{
"epoch": 0.12044967880085652,
"grad_norm": 0.019323009997606277,
"learning_rate": 0.00011937901498929336,
"loss": 0.5266,
"step": 225
},
{
"epoch": 0.13383297644539616,
"grad_norm": 0.02416042983531952,
"learning_rate": 0.000132762312633833,
"loss": 0.2695,
"step": 250
},
{
"epoch": 0.14721627408993576,
"grad_norm": 2.2118706703186035,
"learning_rate": 0.0001461456102783726,
"loss": 0.4989,
"step": 275
},
{
"epoch": 0.16059957173447537,
"grad_norm": 11.72065258026123,
"learning_rate": 0.0001595289079229122,
"loss": 0.3731,
"step": 300
},
{
"epoch": 0.173982869379015,
"grad_norm": 0.0027222177013754845,
"learning_rate": 0.00017291220556745181,
"loss": 0.1109,
"step": 325
},
{
"epoch": 0.1873661670235546,
"grad_norm": 0.0008230574312619865,
"learning_rate": 0.00018629550321199142,
"loss": 0.2094,
"step": 350
},
{
"epoch": 0.2007494646680942,
"grad_norm": 0.036911237984895706,
"learning_rate": 0.00019967880085653106,
"loss": 0.6709,
"step": 375
},
{
"epoch": 0.21413276231263384,
"grad_norm": 20.274892807006836,
"learning_rate": 0.00021306209850107066,
"loss": 0.5297,
"step": 400
},
{
"epoch": 0.22751605995717344,
"grad_norm": 0.04864942654967308,
"learning_rate": 0.00022644539614561027,
"loss": 0.1467,
"step": 425
},
{
"epoch": 0.24089935760171305,
"grad_norm": 0.12074282765388489,
"learning_rate": 0.0002398286937901499,
"loss": 0.6298,
"step": 450
},
{
"epoch": 0.25428265524625265,
"grad_norm": 0.11845938861370087,
"learning_rate": 0.00025321199143468954,
"loss": 0.257,
"step": 475
},
{
"epoch": 0.2676659528907923,
"grad_norm": 0.09819821268320084,
"learning_rate": 0.00026659528907922915,
"loss": 0.8714,
"step": 500
},
{
"epoch": 0.2810492505353319,
"grad_norm": 0.09790199995040894,
"learning_rate": 0.00027997858672376875,
"loss": 0.2357,
"step": 525
},
{
"epoch": 0.2944325481798715,
"grad_norm": 0.5837295055389404,
"learning_rate": 0.00029336188436830836,
"loss": 0.7943,
"step": 550
},
{
"epoch": 0.3078158458244111,
"grad_norm": 0.14801590144634247,
"learning_rate": 0.00030674518201284797,
"loss": 0.4615,
"step": 575
},
{
"epoch": 0.32119914346895073,
"grad_norm": 0.05935389921069145,
"learning_rate": 0.0003201284796573876,
"loss": 0.2401,
"step": 600
},
{
"epoch": 0.33458244111349034,
"grad_norm": 12.694086074829102,
"learning_rate": 0.00033297644539614566,
"loss": 0.6572,
"step": 625
},
{
"epoch": 0.34796573875803,
"grad_norm": 0.05311077833175659,
"learning_rate": 0.00034635974304068526,
"loss": 0.3569,
"step": 650
},
{
"epoch": 0.3613490364025696,
"grad_norm": 0.03182295709848404,
"learning_rate": 0.0003597430406852248,
"loss": 0.1498,
"step": 675
},
{
"epoch": 0.3747323340471092,
"grad_norm": 0.33807849884033203,
"learning_rate": 0.0003731263383297644,
"loss": 0.3766,
"step": 700
},
{
"epoch": 0.3881156316916488,
"grad_norm": 0.08421733975410461,
"learning_rate": 0.00038650963597430403,
"loss": 0.2979,
"step": 725
},
{
"epoch": 0.4014989293361884,
"grad_norm": 0.01755744405090809,
"learning_rate": 0.0003998929336188437,
"loss": 0.0031,
"step": 750
},
{
"epoch": 0.4148822269807281,
"grad_norm": 0.21543407440185547,
"learning_rate": 0.0004132762312633833,
"loss": 0.4799,
"step": 775
},
{
"epoch": 0.4282655246252677,
"grad_norm": 0.18580827116966248,
"learning_rate": 0.0004266595289079229,
"loss": 0.3885,
"step": 800
},
{
"epoch": 0.4416488222698073,
"grad_norm": 11.58962631225586,
"learning_rate": 0.0004400428265524625,
"loss": 0.3268,
"step": 825
},
{
"epoch": 0.4550321199143469,
"grad_norm": 0.0360148549079895,
"learning_rate": 0.0004534261241970021,
"loss": 0.3201,
"step": 850
},
{
"epoch": 0.4684154175588865,
"grad_norm": 0.09959446638822556,
"learning_rate": 0.0004668094218415418,
"loss": 0.2712,
"step": 875
},
{
"epoch": 0.4817987152034261,
"grad_norm": 0.25127506256103516,
"learning_rate": 0.0004801927194860814,
"loss": 0.3106,
"step": 900
},
{
"epoch": 0.49518201284796576,
"grad_norm": 11.287735939025879,
"learning_rate": 0.000493576017130621,
"loss": 0.2296,
"step": 925
},
{
"epoch": 0.5085653104925053,
"grad_norm": 10.859394073486328,
"learning_rate": 0.0004992267428027599,
"loss": 0.3712,
"step": 950
},
{
"epoch": 0.521948608137045,
"grad_norm": 0.18585732579231262,
"learning_rate": 0.0004977397097311444,
"loss": 0.3561,
"step": 975
},
{
"epoch": 0.5353319057815846,
"grad_norm": 14.576325416564941,
"learning_rate": 0.000496252676659529,
"loss": 0.3283,
"step": 1000
},
{
"epoch": 0.5487152034261242,
"grad_norm": 0.00898987427353859,
"learning_rate": 0.0004947656435879134,
"loss": 0.0033,
"step": 1025
},
{
"epoch": 0.5620985010706638,
"grad_norm": 0.27176257967948914,
"learning_rate": 0.0004932786105162979,
"loss": 0.5933,
"step": 1050
},
{
"epoch": 0.5754817987152034,
"grad_norm": 0.012289056554436684,
"learning_rate": 0.0004917915774446823,
"loss": 0.1723,
"step": 1075
},
{
"epoch": 0.588865096359743,
"grad_norm": 0.050152458250522614,
"learning_rate": 0.0004903045443730669,
"loss": 0.5719,
"step": 1100
},
{
"epoch": 0.6022483940042827,
"grad_norm": 0.6666317582130432,
"learning_rate": 0.0004888175113014514,
"loss": 0.7938,
"step": 1125
},
{
"epoch": 0.6156316916488223,
"grad_norm": 0.35938775539398193,
"learning_rate": 0.00048733047822983584,
"loss": 0.4964,
"step": 1150
},
{
"epoch": 0.6290149892933619,
"grad_norm": 0.08501864224672318,
"learning_rate": 0.0004858434451582203,
"loss": 0.2737,
"step": 1175
},
{
"epoch": 0.6423982869379015,
"grad_norm": 0.02361874282360077,
"learning_rate": 0.0004843564120866048,
"loss": 0.15,
"step": 1200
},
{
"epoch": 0.6557815845824411,
"grad_norm": 0.05745585262775421,
"learning_rate": 0.0004828693790149893,
"loss": 0.2676,
"step": 1225
},
{
"epoch": 0.6691648822269807,
"grad_norm": 0.15592187643051147,
"learning_rate": 0.00048138234594337375,
"loss": 0.3195,
"step": 1250
},
{
"epoch": 0.6825481798715204,
"grad_norm": 0.01977529190480709,
"learning_rate": 0.0004798953128717583,
"loss": 0.1439,
"step": 1275
},
{
"epoch": 0.69593147751606,
"grad_norm": 11.418144226074219,
"learning_rate": 0.0004784082798001428,
"loss": 0.2578,
"step": 1300
},
{
"epoch": 0.7093147751605996,
"grad_norm": 0.12220004945993423,
"learning_rate": 0.00047692124672852725,
"loss": 0.2181,
"step": 1325
},
{
"epoch": 0.7226980728051392,
"grad_norm": 0.2515058219432831,
"learning_rate": 0.00047543421365691177,
"loss": 0.4207,
"step": 1350
},
{
"epoch": 0.7360813704496788,
"grad_norm": 10.930126190185547,
"learning_rate": 0.00047394718058529624,
"loss": 0.3217,
"step": 1375
},
{
"epoch": 0.7494646680942184,
"grad_norm": 0.1796221137046814,
"learning_rate": 0.0004724601475136807,
"loss": 0.4147,
"step": 1400
},
{
"epoch": 0.762847965738758,
"grad_norm": 0.12057535350322723,
"learning_rate": 0.0004709731144420652,
"loss": 0.2845,
"step": 1425
},
{
"epoch": 0.7762312633832976,
"grad_norm": 0.6278265714645386,
"learning_rate": 0.0004694860813704497,
"loss": 0.6357,
"step": 1450
},
{
"epoch": 0.7896145610278372,
"grad_norm": 0.08220059424638748,
"learning_rate": 0.00046799904829883415,
"loss": 0.4283,
"step": 1475
},
{
"epoch": 0.8029978586723768,
"grad_norm": 10.042654991149902,
"learning_rate": 0.00046651201522721866,
"loss": 0.3957,
"step": 1500
},
{
"epoch": 0.8163811563169164,
"grad_norm": 0.20416533946990967,
"learning_rate": 0.00046502498215560313,
"loss": 0.4242,
"step": 1525
},
{
"epoch": 0.8297644539614561,
"grad_norm": 0.12127134948968887,
"learning_rate": 0.00046353794908398765,
"loss": 0.1901,
"step": 1550
},
{
"epoch": 0.8431477516059958,
"grad_norm": 0.07789531350135803,
"learning_rate": 0.00046205091601237216,
"loss": 0.2266,
"step": 1575
},
{
"epoch": 0.8565310492505354,
"grad_norm": 0.03442813456058502,
"learning_rate": 0.00046056388294075663,
"loss": 0.2767,
"step": 1600
},
{
"epoch": 0.869914346895075,
"grad_norm": 0.2261192798614502,
"learning_rate": 0.0004590768498691411,
"loss": 0.3227,
"step": 1625
},
{
"epoch": 0.8832976445396146,
"grad_norm": 0.05203871801495552,
"learning_rate": 0.0004575898167975256,
"loss": 0.1059,
"step": 1650
},
{
"epoch": 0.8966809421841542,
"grad_norm": 0.17587050795555115,
"learning_rate": 0.0004561027837259101,
"loss": 0.4627,
"step": 1675
},
{
"epoch": 0.9100642398286938,
"grad_norm": 0.04208080843091011,
"learning_rate": 0.00045461575065429454,
"loss": 0.1095,
"step": 1700
},
{
"epoch": 0.9234475374732334,
"grad_norm": 0.07704867422580719,
"learning_rate": 0.00045312871758267906,
"loss": 0.2484,
"step": 1725
},
{
"epoch": 0.936830835117773,
"grad_norm": 9.084434509277344,
"learning_rate": 0.0004516416845110635,
"loss": 0.4368,
"step": 1750
},
{
"epoch": 0.9502141327623126,
"grad_norm": 0.09450817853212357,
"learning_rate": 0.000450154651439448,
"loss": 0.2326,
"step": 1775
},
{
"epoch": 0.9635974304068522,
"grad_norm": 0.27564939856529236,
"learning_rate": 0.0004486676183678325,
"loss": 0.5406,
"step": 1800
},
{
"epoch": 0.9769807280513919,
"grad_norm": 0.1096988320350647,
"learning_rate": 0.000447180585296217,
"loss": 0.2194,
"step": 1825
},
{
"epoch": 0.9903640256959315,
"grad_norm": 0.06793688237667084,
"learning_rate": 0.0004456935522246015,
"loss": 0.3206,
"step": 1850
},
{
"epoch": 1.0,
"eval_accuracy": 0.9325481798715204,
"eval_auc": 0.8326772729757805,
"eval_f1": 0.0,
"eval_loss": 0.2235843539237976,
"eval_precision": 0.0,
"eval_recall": 0.0,
"eval_runtime": 12.9282,
"eval_samples_per_second": 72.245,
"eval_steps_per_second": 18.1,
"step": 1868
}
],
"logging_steps": 25,
"max_steps": 9340,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.894325812105011e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}