{ "best_metric": 0.03607647866010666, "best_model_checkpoint": "/content/drive/MyDrive/HateSpeech-BETO-cased/checkpoint-1124", "epoch": 4.0, "eval_steps": 500, "global_step": 1124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03558718861209965, "grad_norm": 7.08419132232666, "learning_rate": 2.2271714922049e-07, "loss": 0.4085, "step": 10 }, { "epoch": 0.0711743772241993, "grad_norm": 23.322711944580078, "learning_rate": 4.4543429844098e-07, "loss": 0.3565, "step": 20 }, { "epoch": 0.10676156583629894, "grad_norm": 8.696600914001465, "learning_rate": 6.6815144766147e-07, "loss": 0.3482, "step": 30 }, { "epoch": 0.1423487544483986, "grad_norm": 14.429851531982422, "learning_rate": 8.9086859688196e-07, "loss": 0.3711, "step": 40 }, { "epoch": 0.17793594306049823, "grad_norm": 9.41112995147705, "learning_rate": 1.11358574610245e-06, "loss": 0.337, "step": 50 }, { "epoch": 0.21352313167259787, "grad_norm": 12.487126350402832, "learning_rate": 1.33630289532294e-06, "loss": 0.3413, "step": 60 }, { "epoch": 0.2491103202846975, "grad_norm": 17.9010009765625, "learning_rate": 1.55902004454343e-06, "loss": 0.3154, "step": 70 }, { "epoch": 0.2846975088967972, "grad_norm": 17.09844207763672, "learning_rate": 1.78173719376392e-06, "loss": 0.2304, "step": 80 }, { "epoch": 0.3202846975088968, "grad_norm": 19.929523468017578, "learning_rate": 2.00445434298441e-06, "loss": 0.308, "step": 90 }, { "epoch": 0.35587188612099646, "grad_norm": 5.709442615509033, "learning_rate": 2.2271714922049e-06, "loss": 0.2692, "step": 100 }, { "epoch": 0.3914590747330961, "grad_norm": 19.828474044799805, "learning_rate": 2.44988864142539e-06, "loss": 0.2255, "step": 110 }, { "epoch": 0.42704626334519574, "grad_norm": 17.53233528137207, "learning_rate": 2.67260579064588e-06, "loss": 0.1917, "step": 120 }, { "epoch": 0.4626334519572954, "grad_norm": 14.940444946289062, "learning_rate": 2.8953229398663702e-06, "loss": 0.3019, "step": 130 }, { "epoch": 0.498220640569395, "grad_norm": 1.711176872253418, "learning_rate": 3.11804008908686e-06, "loss": 0.2032, "step": 140 }, { "epoch": 0.5338078291814946, "grad_norm": 7.716080665588379, "learning_rate": 3.34075723830735e-06, "loss": 0.2764, "step": 150 }, { "epoch": 0.5693950177935944, "grad_norm": 20.839887619018555, "learning_rate": 3.56347438752784e-06, "loss": 0.2067, "step": 160 }, { "epoch": 0.604982206405694, "grad_norm": 2.853170156478882, "learning_rate": 3.78619153674833e-06, "loss": 0.243, "step": 170 }, { "epoch": 0.6405693950177936, "grad_norm": 6.61458683013916, "learning_rate": 4.00890868596882e-06, "loss": 0.1886, "step": 180 }, { "epoch": 0.6761565836298933, "grad_norm": 3.499279022216797, "learning_rate": 4.231625835189309e-06, "loss": 0.1987, "step": 190 }, { "epoch": 0.7117437722419929, "grad_norm": 32.111114501953125, "learning_rate": 4.4543429844098e-06, "loss": 0.2239, "step": 200 }, { "epoch": 0.7473309608540926, "grad_norm": 3.83589506149292, "learning_rate": 4.67706013363029e-06, "loss": 0.1891, "step": 210 }, { "epoch": 0.7829181494661922, "grad_norm": 3.3233108520507812, "learning_rate": 4.89977728285078e-06, "loss": 0.1335, "step": 220 }, { "epoch": 0.8185053380782918, "grad_norm": 19.36582374572754, "learning_rate": 5.12249443207127e-06, "loss": 0.0513, "step": 230 }, { "epoch": 0.8540925266903915, "grad_norm": 0.7740408182144165, "learning_rate": 5.34521158129176e-06, "loss": 0.1499, "step": 240 }, { "epoch": 0.8896797153024911, "grad_norm": 0.18774119019508362, "learning_rate": 5.5679287305122494e-06, "loss": 0.1303, "step": 250 }, { "epoch": 0.9252669039145908, "grad_norm": 0.2991533875465393, "learning_rate": 5.7906458797327404e-06, "loss": 0.0695, "step": 260 }, { "epoch": 0.9608540925266904, "grad_norm": 0.08448722958564758, "learning_rate": 6.01336302895323e-06, "loss": 0.1109, "step": 270 }, { "epoch": 0.99644128113879, "grad_norm": 30.85651397705078, "learning_rate": 6.23608017817372e-06, "loss": 0.2155, "step": 280 }, { "epoch": 1.0, "eval_accuracy": 0.9559412550066756, "eval_f1_per_label": [ 0.9255079006772009, 0.9687203791469194 ], "eval_f1_weighted": 0.9556239404117711, "eval_loss": 0.17148031294345856, "eval_precision_per_label": [ 0.9490740740740741, 0.9587242026266416 ], "eval_precision_weighted": 0.9557995308223255, "eval_recall_per_label": [ 0.9030837004405287, 0.9789272030651341 ], "eval_recall_weighted": 0.9559412550066756, "eval_runtime": 1.3508, "eval_samples_per_second": 554.494, "eval_steps_per_second": 69.589, "step": 281 }, { "epoch": 1.0320284697508897, "grad_norm": 37.36682891845703, "learning_rate": 6.458797327394209e-06, "loss": 0.2318, "step": 290 }, { "epoch": 1.0676156583629894, "grad_norm": 3.181389093399048, "learning_rate": 6.6815144766147e-06, "loss": 0.29, "step": 300 }, { "epoch": 1.103202846975089, "grad_norm": 0.6792677640914917, "learning_rate": 6.9042316258351895e-06, "loss": 0.0621, "step": 310 }, { "epoch": 1.1387900355871885, "grad_norm": 31.2163143157959, "learning_rate": 7.12694877505568e-06, "loss": 0.3279, "step": 320 }, { "epoch": 1.1743772241992882, "grad_norm": 32.702980041503906, "learning_rate": 7.349665924276171e-06, "loss": 0.0851, "step": 330 }, { "epoch": 1.209964412811388, "grad_norm": 76.18331146240234, "learning_rate": 7.57238307349666e-06, "loss": 0.3339, "step": 340 }, { "epoch": 1.2455516014234875, "grad_norm": 76.69121551513672, "learning_rate": 7.79510022271715e-06, "loss": 0.0757, "step": 350 }, { "epoch": 1.281138790035587, "grad_norm": 0.06500601023435593, "learning_rate": 8.01781737193764e-06, "loss": 0.188, "step": 360 }, { "epoch": 1.3167259786476868, "grad_norm": 95.82825469970703, "learning_rate": 8.24053452115813e-06, "loss": 0.2278, "step": 370 }, { "epoch": 1.3523131672597866, "grad_norm": 49.43801498413086, "learning_rate": 8.463251670378619e-06, "loss": 0.2501, "step": 380 }, { "epoch": 1.387900355871886, "grad_norm": 40.980369567871094, "learning_rate": 8.68596881959911e-06, "loss": 0.1424, "step": 390 }, { "epoch": 1.4234875444839858, "grad_norm": 1.2029680013656616, "learning_rate": 8.9086859688196e-06, "loss": 0.1464, "step": 400 }, { "epoch": 1.4590747330960854, "grad_norm": 26.718364715576172, "learning_rate": 9.13140311804009e-06, "loss": 0.2213, "step": 410 }, { "epoch": 1.4946619217081851, "grad_norm": 29.254457473754883, "learning_rate": 9.35412026726058e-06, "loss": 0.1796, "step": 420 }, { "epoch": 1.5302491103202847, "grad_norm": 0.17349238693714142, "learning_rate": 9.57683741648107e-06, "loss": 0.1021, "step": 430 }, { "epoch": 1.5658362989323842, "grad_norm": 0.27866968512535095, "learning_rate": 9.79955456570156e-06, "loss": 0.2656, "step": 440 }, { "epoch": 1.601423487544484, "grad_norm": 0.02428502030670643, "learning_rate": 9.985185185185185e-06, "loss": 0.0504, "step": 450 }, { "epoch": 1.6370106761565837, "grad_norm": 0.0974503755569458, "learning_rate": 9.837037037037038e-06, "loss": 0.062, "step": 460 }, { "epoch": 1.6725978647686834, "grad_norm": 1.3685508966445923, "learning_rate": 9.688888888888889e-06, "loss": 0.0459, "step": 470 }, { "epoch": 1.708185053380783, "grad_norm": 1.2251461744308472, "learning_rate": 9.540740740740742e-06, "loss": 0.1173, "step": 480 }, { "epoch": 1.7437722419928825, "grad_norm": 62.15313720703125, "learning_rate": 9.392592592592593e-06, "loss": 0.1621, "step": 490 }, { "epoch": 1.7793594306049823, "grad_norm": 0.03026748262345791, "learning_rate": 9.244444444444445e-06, "loss": 0.1064, "step": 500 }, { "epoch": 1.814946619217082, "grad_norm": 0.4413605332374573, "learning_rate": 9.096296296296298e-06, "loss": 0.1794, "step": 510 }, { "epoch": 1.8505338078291815, "grad_norm": 0.08637289702892303, "learning_rate": 8.948148148148149e-06, "loss": 0.0915, "step": 520 }, { "epoch": 1.886120996441281, "grad_norm": 0.09529650211334229, "learning_rate": 8.8e-06, "loss": 0.052, "step": 530 }, { "epoch": 1.9217081850533808, "grad_norm": 107.46017456054688, "learning_rate": 8.651851851851852e-06, "loss": 0.1408, "step": 540 }, { "epoch": 1.9572953736654806, "grad_norm": 26.60268211364746, "learning_rate": 8.503703703703705e-06, "loss": 0.1605, "step": 550 }, { "epoch": 1.99288256227758, "grad_norm": 0.018591415137052536, "learning_rate": 8.355555555555556e-06, "loss": 0.0914, "step": 560 }, { "epoch": 2.0, "eval_accuracy": 0.9732977303070761, "eval_f1_per_label": [ 0.9541284403669725, 0.9811676082862524 ], "eval_f1_weighted": 0.9729728270877522, "eval_loss": 0.1353287547826767, "eval_precision_per_label": [ 0.9952153110047847, 0.9648148148148148 ], "eval_precision_weighted": 0.9740283163303329, "eval_recall_per_label": [ 0.9162995594713657, 0.9980842911877394 ], "eval_recall_weighted": 0.9732977303070761, "eval_runtime": 1.309, "eval_samples_per_second": 572.195, "eval_steps_per_second": 71.811, "step": 562 }, { "epoch": 2.0284697508896796, "grad_norm": 0.015944845974445343, "learning_rate": 8.207407407407409e-06, "loss": 0.0011, "step": 570 }, { "epoch": 2.0640569395017794, "grad_norm": 0.03662079572677612, "learning_rate": 8.05925925925926e-06, "loss": 0.0475, "step": 580 }, { "epoch": 2.099644128113879, "grad_norm": 0.03478744253516197, "learning_rate": 7.911111111111112e-06, "loss": 0.0017, "step": 590 }, { "epoch": 2.135231316725979, "grad_norm": 0.03390470892190933, "learning_rate": 7.762962962962963e-06, "loss": 0.0054, "step": 600 }, { "epoch": 2.170818505338078, "grad_norm": 0.007953139953315258, "learning_rate": 7.614814814814816e-06, "loss": 0.0096, "step": 610 }, { "epoch": 2.206405693950178, "grad_norm": 45.853023529052734, "learning_rate": 7.4666666666666675e-06, "loss": 0.0893, "step": 620 }, { "epoch": 2.2419928825622777, "grad_norm": 0.03887489438056946, "learning_rate": 7.31851851851852e-06, "loss": 0.0004, "step": 630 }, { "epoch": 2.277580071174377, "grad_norm": 0.04745912551879883, "learning_rate": 7.170370370370371e-06, "loss": 0.1963, "step": 640 }, { "epoch": 2.3131672597864767, "grad_norm": 0.05072787404060364, "learning_rate": 7.022222222222222e-06, "loss": 0.0231, "step": 650 }, { "epoch": 2.3487544483985765, "grad_norm": 0.026582635939121246, "learning_rate": 6.8740740740740745e-06, "loss": 0.0149, "step": 660 }, { "epoch": 2.3843416370106763, "grad_norm": 0.014482633210718632, "learning_rate": 6.725925925925927e-06, "loss": 0.0994, "step": 670 }, { "epoch": 2.419928825622776, "grad_norm": 0.025208059698343277, "learning_rate": 6.577777777777779e-06, "loss": 0.0006, "step": 680 }, { "epoch": 2.4555160142348753, "grad_norm": 0.01709064655005932, "learning_rate": 6.42962962962963e-06, "loss": 0.1219, "step": 690 }, { "epoch": 2.491103202846975, "grad_norm": 0.1339588463306427, "learning_rate": 6.2814814814814814e-06, "loss": 0.0006, "step": 700 }, { "epoch": 2.526690391459075, "grad_norm": 24.798721313476562, "learning_rate": 6.133333333333334e-06, "loss": 0.0029, "step": 710 }, { "epoch": 2.562277580071174, "grad_norm": 1.3540862798690796, "learning_rate": 5.985185185185186e-06, "loss": 0.081, "step": 720 }, { "epoch": 2.597864768683274, "grad_norm": 0.04109741002321243, "learning_rate": 5.837037037037038e-06, "loss": 0.0006, "step": 730 }, { "epoch": 2.6334519572953736, "grad_norm": 0.010597619228065014, "learning_rate": 5.688888888888889e-06, "loss": 0.013, "step": 740 }, { "epoch": 2.6690391459074734, "grad_norm": 0.02383551187813282, "learning_rate": 5.540740740740741e-06, "loss": 0.0008, "step": 750 }, { "epoch": 2.704626334519573, "grad_norm": 1.2794164419174194, "learning_rate": 5.392592592592593e-06, "loss": 0.1707, "step": 760 }, { "epoch": 2.7402135231316724, "grad_norm": 0.014402506873011589, "learning_rate": 5.244444444444445e-06, "loss": 0.0864, "step": 770 }, { "epoch": 2.775800711743772, "grad_norm": 0.013125807978212833, "learning_rate": 5.096296296296297e-06, "loss": 0.0205, "step": 780 }, { "epoch": 2.811387900355872, "grad_norm": 0.2091672271490097, "learning_rate": 4.9481481481481485e-06, "loss": 0.0027, "step": 790 }, { "epoch": 2.8469750889679717, "grad_norm": 0.13218224048614502, "learning_rate": 4.800000000000001e-06, "loss": 0.0004, "step": 800 }, { "epoch": 2.882562277580071, "grad_norm": 2.1445350646972656, "learning_rate": 4.651851851851853e-06, "loss": 0.1045, "step": 810 }, { "epoch": 2.9181494661921707, "grad_norm": 0.016628708690404892, "learning_rate": 4.503703703703704e-06, "loss": 0.0005, "step": 820 }, { "epoch": 2.9537366548042705, "grad_norm": 0.04098201170563698, "learning_rate": 4.3555555555555555e-06, "loss": 0.0569, "step": 830 }, { "epoch": 2.9893238434163703, "grad_norm": 0.7140023708343506, "learning_rate": 4.207407407407408e-06, "loss": 0.0014, "step": 840 }, { "epoch": 3.0, "eval_accuracy": 0.9893190921228304, "eval_f1_per_label": [ 0.9821428571428571, 0.9923809523809524 ], "eval_f1_weighted": 0.989278085065802, "eval_loss": 0.050094157457351685, "eval_precision_per_label": [ 0.995475113122172, 0.9867424242424242 ], "eval_precision_weighted": 0.9893890469069138, "eval_recall_per_label": [ 0.9691629955947136, 0.9980842911877394 ], "eval_recall_weighted": 0.9893190921228304, "eval_runtime": 1.3422, "eval_samples_per_second": 558.027, "eval_steps_per_second": 70.033, "step": 843 }, { "epoch": 3.0249110320284696, "grad_norm": 0.009055254980921745, "learning_rate": 4.05925925925926e-06, "loss": 0.124, "step": 850 }, { "epoch": 3.0604982206405693, "grad_norm": 0.005354477558284998, "learning_rate": 3.911111111111112e-06, "loss": 0.0008, "step": 860 }, { "epoch": 3.096085409252669, "grad_norm": 0.006784161552786827, "learning_rate": 3.7629629629629633e-06, "loss": 0.0736, "step": 870 }, { "epoch": 3.131672597864769, "grad_norm": 0.007448482792824507, "learning_rate": 3.614814814814815e-06, "loss": 0.0047, "step": 880 }, { "epoch": 3.167259786476868, "grad_norm": 0.0065169306471943855, "learning_rate": 3.4666666666666672e-06, "loss": 0.0003, "step": 890 }, { "epoch": 3.202846975088968, "grad_norm": 0.00644827401265502, "learning_rate": 3.3185185185185185e-06, "loss": 0.0007, "step": 900 }, { "epoch": 3.2384341637010676, "grad_norm": 12.251897811889648, "learning_rate": 3.1703703703703707e-06, "loss": 0.0016, "step": 910 }, { "epoch": 3.2740213523131674, "grad_norm": 0.010300640016794205, "learning_rate": 3.0222222222222225e-06, "loss": 0.0002, "step": 920 }, { "epoch": 3.309608540925267, "grad_norm": 0.006541989278048277, "learning_rate": 2.874074074074074e-06, "loss": 0.0054, "step": 930 }, { "epoch": 3.3451957295373664, "grad_norm": 0.08283556252717972, "learning_rate": 2.7259259259259264e-06, "loss": 0.0006, "step": 940 }, { "epoch": 3.380782918149466, "grad_norm": 0.005921730771660805, "learning_rate": 2.577777777777778e-06, "loss": 0.0003, "step": 950 }, { "epoch": 3.416370106761566, "grad_norm": 0.004639245569705963, "learning_rate": 2.42962962962963e-06, "loss": 0.0003, "step": 960 }, { "epoch": 3.4519572953736652, "grad_norm": 0.16998454928398132, "learning_rate": 2.2814814814814816e-06, "loss": 0.0007, "step": 970 }, { "epoch": 3.487544483985765, "grad_norm": 0.005660816095769405, "learning_rate": 2.133333333333334e-06, "loss": 0.0241, "step": 980 }, { "epoch": 3.5231316725978647, "grad_norm": 0.01572972722351551, "learning_rate": 1.985185185185185e-06, "loss": 0.0022, "step": 990 }, { "epoch": 3.5587188612099645, "grad_norm": 0.01506053563207388, "learning_rate": 1.837037037037037e-06, "loss": 0.0017, "step": 1000 }, { "epoch": 3.5943060498220643, "grad_norm": 0.06420188397169113, "learning_rate": 1.688888888888889e-06, "loss": 0.0981, "step": 1010 }, { "epoch": 3.6298932384341636, "grad_norm": 0.050418056547641754, "learning_rate": 1.540740740740741e-06, "loss": 0.0002, "step": 1020 }, { "epoch": 3.6654804270462633, "grad_norm": 0.1331329494714737, "learning_rate": 1.3925925925925925e-06, "loss": 0.0002, "step": 1030 }, { "epoch": 3.701067615658363, "grad_norm": 0.0047861747443675995, "learning_rate": 1.2444444444444445e-06, "loss": 0.0007, "step": 1040 }, { "epoch": 3.7366548042704624, "grad_norm": 0.004302954766899347, "learning_rate": 1.0962962962962965e-06, "loss": 0.0002, "step": 1050 }, { "epoch": 3.772241992882562, "grad_norm": 0.0052270120941102505, "learning_rate": 9.481481481481482e-07, "loss": 0.0003, "step": 1060 }, { "epoch": 3.807829181494662, "grad_norm": 0.006781002506613731, "learning_rate": 8.000000000000001e-07, "loss": 0.0003, "step": 1070 }, { "epoch": 3.8434163701067616, "grad_norm": 0.006440193857997656, "learning_rate": 6.518518518518518e-07, "loss": 0.0003, "step": 1080 }, { "epoch": 3.8790035587188614, "grad_norm": 0.009248029440641403, "learning_rate": 5.037037037037038e-07, "loss": 0.0061, "step": 1090 }, { "epoch": 3.914590747330961, "grad_norm": 0.011829700320959091, "learning_rate": 3.555555555555556e-07, "loss": 0.0003, "step": 1100 }, { "epoch": 3.9501779359430604, "grad_norm": 0.024425974115729332, "learning_rate": 2.074074074074074e-07, "loss": 0.0676, "step": 1110 }, { "epoch": 3.98576512455516, "grad_norm": 0.00512115890160203, "learning_rate": 5.9259259259259263e-08, "loss": 0.0002, "step": 1120 }, { "epoch": 4.0, "eval_accuracy": 0.9933244325767691, "eval_f1_per_label": [ 0.9888641425389755, 0.9952335557673975 ], "eval_f1_weighted": 0.9933031728530427, "eval_loss": 0.03607647866010666, "eval_precision_per_label": [ 1.0, 0.9905123339658444 ], "eval_precision_weighted": 0.9933877681310691, "eval_recall_per_label": [ 0.9779735682819384, 1.0 ], "eval_recall_weighted": 0.9933244325767691, "eval_runtime": 5.013, "eval_samples_per_second": 149.411, "eval_steps_per_second": 18.751, "step": 1124 } ], "logging_steps": 10, "max_steps": 1124, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 87830786944920.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }