{ "best_metric": 0.2235843539237976, "best_model_checkpoint": "nsfw-images-2/checkpoint-1868", "epoch": 1.0, "eval_steps": 500, "global_step": 1868, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013383297644539615, "grad_norm": 18.170637130737305, "learning_rate": 1.2312633832976446e-05, "loss": 0.7179, "step": 25 }, { "epoch": 0.02676659528907923, "grad_norm": 3.0997555255889893, "learning_rate": 2.569593147751606e-05, "loss": 0.3338, "step": 50 }, { "epoch": 0.04014989293361884, "grad_norm": 31.291412353515625, "learning_rate": 3.9079229122055675e-05, "loss": 0.3848, "step": 75 }, { "epoch": 0.05353319057815846, "grad_norm": 0.03622590750455856, "learning_rate": 5.246252676659529e-05, "loss": 0.0596, "step": 100 }, { "epoch": 0.06691648822269808, "grad_norm": 26.821062088012695, "learning_rate": 6.58458244111349e-05, "loss": 0.3471, "step": 125 }, { "epoch": 0.08029978586723768, "grad_norm": 0.011622205376625061, "learning_rate": 7.922912205567452e-05, "loss": 0.1036, "step": 150 }, { "epoch": 0.0936830835117773, "grad_norm": 0.07030396908521652, "learning_rate": 9.261241970021414e-05, "loss": 0.0683, "step": 175 }, { "epoch": 0.10706638115631692, "grad_norm": 0.1393502801656723, "learning_rate": 0.00010599571734475374, "loss": 0.3868, "step": 200 }, { "epoch": 0.12044967880085652, "grad_norm": 0.019323009997606277, "learning_rate": 0.00011937901498929336, "loss": 0.5266, "step": 225 }, { "epoch": 0.13383297644539616, "grad_norm": 0.02416042983531952, "learning_rate": 0.000132762312633833, "loss": 0.2695, "step": 250 }, { "epoch": 0.14721627408993576, "grad_norm": 2.2118706703186035, "learning_rate": 0.0001461456102783726, "loss": 0.4989, "step": 275 }, { "epoch": 0.16059957173447537, "grad_norm": 11.72065258026123, "learning_rate": 0.0001595289079229122, "loss": 0.3731, "step": 300 }, { "epoch": 0.173982869379015, "grad_norm": 0.0027222177013754845, "learning_rate": 0.00017291220556745181, "loss": 0.1109, "step": 325 }, { "epoch": 0.1873661670235546, "grad_norm": 0.0008230574312619865, "learning_rate": 0.00018629550321199142, "loss": 0.2094, "step": 350 }, { "epoch": 0.2007494646680942, "grad_norm": 0.036911237984895706, "learning_rate": 0.00019967880085653106, "loss": 0.6709, "step": 375 }, { "epoch": 0.21413276231263384, "grad_norm": 20.274892807006836, "learning_rate": 0.00021306209850107066, "loss": 0.5297, "step": 400 }, { "epoch": 0.22751605995717344, "grad_norm": 0.04864942654967308, "learning_rate": 0.00022644539614561027, "loss": 0.1467, "step": 425 }, { "epoch": 0.24089935760171305, "grad_norm": 0.12074282765388489, "learning_rate": 0.0002398286937901499, "loss": 0.6298, "step": 450 }, { "epoch": 0.25428265524625265, "grad_norm": 0.11845938861370087, "learning_rate": 0.00025321199143468954, "loss": 0.257, "step": 475 }, { "epoch": 0.2676659528907923, "grad_norm": 0.09819821268320084, "learning_rate": 0.00026659528907922915, "loss": 0.8714, "step": 500 }, { "epoch": 0.2810492505353319, "grad_norm": 0.09790199995040894, "learning_rate": 0.00027997858672376875, "loss": 0.2357, "step": 525 }, { "epoch": 0.2944325481798715, "grad_norm": 0.5837295055389404, "learning_rate": 0.00029336188436830836, "loss": 0.7943, "step": 550 }, { "epoch": 0.3078158458244111, "grad_norm": 0.14801590144634247, "learning_rate": 0.00030674518201284797, "loss": 0.4615, "step": 575 }, { "epoch": 0.32119914346895073, "grad_norm": 0.05935389921069145, "learning_rate": 0.0003201284796573876, "loss": 0.2401, "step": 600 }, { "epoch": 0.33458244111349034, "grad_norm": 12.694086074829102, "learning_rate": 0.00033297644539614566, "loss": 0.6572, "step": 625 }, { "epoch": 0.34796573875803, "grad_norm": 0.05311077833175659, "learning_rate": 0.00034635974304068526, "loss": 0.3569, "step": 650 }, { "epoch": 0.3613490364025696, "grad_norm": 0.03182295709848404, "learning_rate": 0.0003597430406852248, "loss": 0.1498, "step": 675 }, { "epoch": 0.3747323340471092, "grad_norm": 0.33807849884033203, "learning_rate": 0.0003731263383297644, "loss": 0.3766, "step": 700 }, { "epoch": 0.3881156316916488, "grad_norm": 0.08421733975410461, "learning_rate": 0.00038650963597430403, "loss": 0.2979, "step": 725 }, { "epoch": 0.4014989293361884, "grad_norm": 0.01755744405090809, "learning_rate": 0.0003998929336188437, "loss": 0.0031, "step": 750 }, { "epoch": 0.4148822269807281, "grad_norm": 0.21543407440185547, "learning_rate": 0.0004132762312633833, "loss": 0.4799, "step": 775 }, { "epoch": 0.4282655246252677, "grad_norm": 0.18580827116966248, "learning_rate": 0.0004266595289079229, "loss": 0.3885, "step": 800 }, { "epoch": 0.4416488222698073, "grad_norm": 11.58962631225586, "learning_rate": 0.0004400428265524625, "loss": 0.3268, "step": 825 }, { "epoch": 0.4550321199143469, "grad_norm": 0.0360148549079895, "learning_rate": 0.0004534261241970021, "loss": 0.3201, "step": 850 }, { "epoch": 0.4684154175588865, "grad_norm": 0.09959446638822556, "learning_rate": 0.0004668094218415418, "loss": 0.2712, "step": 875 }, { "epoch": 0.4817987152034261, "grad_norm": 0.25127506256103516, "learning_rate": 0.0004801927194860814, "loss": 0.3106, "step": 900 }, { "epoch": 0.49518201284796576, "grad_norm": 11.287735939025879, "learning_rate": 0.000493576017130621, "loss": 0.2296, "step": 925 }, { "epoch": 0.5085653104925053, "grad_norm": 10.859394073486328, "learning_rate": 0.0004992267428027599, "loss": 0.3712, "step": 950 }, { "epoch": 0.521948608137045, "grad_norm": 0.18585732579231262, "learning_rate": 0.0004977397097311444, "loss": 0.3561, "step": 975 }, { "epoch": 0.5353319057815846, "grad_norm": 14.576325416564941, "learning_rate": 0.000496252676659529, "loss": 0.3283, "step": 1000 }, { "epoch": 0.5487152034261242, "grad_norm": 0.00898987427353859, "learning_rate": 0.0004947656435879134, "loss": 0.0033, "step": 1025 }, { "epoch": 0.5620985010706638, "grad_norm": 0.27176257967948914, "learning_rate": 0.0004932786105162979, "loss": 0.5933, "step": 1050 }, { "epoch": 0.5754817987152034, "grad_norm": 0.012289056554436684, "learning_rate": 0.0004917915774446823, "loss": 0.1723, "step": 1075 }, { "epoch": 0.588865096359743, "grad_norm": 0.050152458250522614, "learning_rate": 0.0004903045443730669, "loss": 0.5719, "step": 1100 }, { "epoch": 0.6022483940042827, "grad_norm": 0.6666317582130432, "learning_rate": 0.0004888175113014514, "loss": 0.7938, "step": 1125 }, { "epoch": 0.6156316916488223, "grad_norm": 0.35938775539398193, "learning_rate": 0.00048733047822983584, "loss": 0.4964, "step": 1150 }, { "epoch": 0.6290149892933619, "grad_norm": 0.08501864224672318, "learning_rate": 0.0004858434451582203, "loss": 0.2737, "step": 1175 }, { "epoch": 0.6423982869379015, "grad_norm": 0.02361874282360077, "learning_rate": 0.0004843564120866048, "loss": 0.15, "step": 1200 }, { "epoch": 0.6557815845824411, "grad_norm": 0.05745585262775421, "learning_rate": 0.0004828693790149893, "loss": 0.2676, "step": 1225 }, { "epoch": 0.6691648822269807, "grad_norm": 0.15592187643051147, "learning_rate": 0.00048138234594337375, "loss": 0.3195, "step": 1250 }, { "epoch": 0.6825481798715204, "grad_norm": 0.01977529190480709, "learning_rate": 0.0004798953128717583, "loss": 0.1439, "step": 1275 }, { "epoch": 0.69593147751606, "grad_norm": 11.418144226074219, "learning_rate": 0.0004784082798001428, "loss": 0.2578, "step": 1300 }, { "epoch": 0.7093147751605996, "grad_norm": 0.12220004945993423, "learning_rate": 0.00047692124672852725, "loss": 0.2181, "step": 1325 }, { "epoch": 0.7226980728051392, "grad_norm": 0.2515058219432831, "learning_rate": 0.00047543421365691177, "loss": 0.4207, "step": 1350 }, { "epoch": 0.7360813704496788, "grad_norm": 10.930126190185547, "learning_rate": 0.00047394718058529624, "loss": 0.3217, "step": 1375 }, { "epoch": 0.7494646680942184, "grad_norm": 0.1796221137046814, "learning_rate": 0.0004724601475136807, "loss": 0.4147, "step": 1400 }, { "epoch": 0.762847965738758, "grad_norm": 0.12057535350322723, "learning_rate": 0.0004709731144420652, "loss": 0.2845, "step": 1425 }, { "epoch": 0.7762312633832976, "grad_norm": 0.6278265714645386, "learning_rate": 0.0004694860813704497, "loss": 0.6357, "step": 1450 }, { "epoch": 0.7896145610278372, "grad_norm": 0.08220059424638748, "learning_rate": 0.00046799904829883415, "loss": 0.4283, "step": 1475 }, { "epoch": 0.8029978586723768, "grad_norm": 10.042654991149902, "learning_rate": 0.00046651201522721866, "loss": 0.3957, "step": 1500 }, { "epoch": 0.8163811563169164, "grad_norm": 0.20416533946990967, "learning_rate": 0.00046502498215560313, "loss": 0.4242, "step": 1525 }, { "epoch": 0.8297644539614561, "grad_norm": 0.12127134948968887, "learning_rate": 0.00046353794908398765, "loss": 0.1901, "step": 1550 }, { "epoch": 0.8431477516059958, "grad_norm": 0.07789531350135803, "learning_rate": 0.00046205091601237216, "loss": 0.2266, "step": 1575 }, { "epoch": 0.8565310492505354, "grad_norm": 0.03442813456058502, "learning_rate": 0.00046056388294075663, "loss": 0.2767, "step": 1600 }, { "epoch": 0.869914346895075, "grad_norm": 0.2261192798614502, "learning_rate": 0.0004590768498691411, "loss": 0.3227, "step": 1625 }, { "epoch": 0.8832976445396146, "grad_norm": 0.05203871801495552, "learning_rate": 0.0004575898167975256, "loss": 0.1059, "step": 1650 }, { "epoch": 0.8966809421841542, "grad_norm": 0.17587050795555115, "learning_rate": 0.0004561027837259101, "loss": 0.4627, "step": 1675 }, { "epoch": 0.9100642398286938, "grad_norm": 0.04208080843091011, "learning_rate": 0.00045461575065429454, "loss": 0.1095, "step": 1700 }, { "epoch": 0.9234475374732334, "grad_norm": 0.07704867422580719, "learning_rate": 0.00045312871758267906, "loss": 0.2484, "step": 1725 }, { "epoch": 0.936830835117773, "grad_norm": 9.084434509277344, "learning_rate": 0.0004516416845110635, "loss": 0.4368, "step": 1750 }, { "epoch": 0.9502141327623126, "grad_norm": 0.09450817853212357, "learning_rate": 0.000450154651439448, "loss": 0.2326, "step": 1775 }, { "epoch": 0.9635974304068522, "grad_norm": 0.27564939856529236, "learning_rate": 0.0004486676183678325, "loss": 0.5406, "step": 1800 }, { "epoch": 0.9769807280513919, "grad_norm": 0.1096988320350647, "learning_rate": 0.000447180585296217, "loss": 0.2194, "step": 1825 }, { "epoch": 0.9903640256959315, "grad_norm": 0.06793688237667084, "learning_rate": 0.0004456935522246015, "loss": 0.3206, "step": 1850 }, { "epoch": 1.0, "eval_accuracy": 0.9325481798715204, "eval_auc": 0.8326772729757805, "eval_f1": 0.0, "eval_loss": 0.2235843539237976, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 12.9282, "eval_samples_per_second": 72.245, "eval_steps_per_second": 18.1, "step": 1868 } ], "logging_steps": 25, "max_steps": 9340, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.894325812105011e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }