{ "best_metric": NaN, "best_model_checkpoint": "autotrain-q48dd-y2s6a/checkpoint-4469", "epoch": 1.0, "eval_steps": 500, "global_step": 4469, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005594092638174088, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 25 }, { "epoch": 0.011188185276348177, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 50 }, { "epoch": 0.016782277914522265, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 75 }, { "epoch": 0.022376370552696354, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 100 }, { "epoch": 0.027970463190870442, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 125 }, { "epoch": 0.03356455582904453, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 150 }, { "epoch": 0.039158648467218615, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 175 }, { "epoch": 0.04475274110539271, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 200 }, { "epoch": 0.05034683374356679, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 225 }, { "epoch": 0.055940926381740884, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 250 }, { "epoch": 0.06153501901991497, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 275 }, { "epoch": 0.06712911165808906, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 300 }, { "epoch": 0.07272320429626315, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 325 }, { "epoch": 0.07831729693443723, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 350 }, { "epoch": 0.08391138957261132, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 375 }, { "epoch": 0.08950548221078541, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 400 }, { "epoch": 0.09509957484895949, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 425 }, { "epoch": 0.10069366748713358, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 450 }, { "epoch": 0.10628776012530768, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 475 }, { "epoch": 0.11188185276348177, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 500 }, { "epoch": 0.11747594540165585, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 525 }, { "epoch": 0.12307003803982994, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 550 }, { "epoch": 0.12866413067800403, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 575 }, { "epoch": 0.13425822331617812, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 600 }, { "epoch": 0.1398523159543522, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 625 }, { "epoch": 0.1454464085925263, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 650 }, { "epoch": 0.15104050123070037, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 675 }, { "epoch": 0.15663459386887446, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 700 }, { "epoch": 0.16222868650704855, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 725 }, { "epoch": 0.16782277914522264, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 750 }, { "epoch": 0.17341687178339674, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 775 }, { "epoch": 0.17901096442157083, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 800 }, { "epoch": 0.18460505705974492, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 825 }, { "epoch": 0.19019914969791898, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 850 }, { "epoch": 0.19579324233609308, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 875 }, { "epoch": 0.20138733497426717, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 900 }, { "epoch": 0.20698142761244126, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 925 }, { "epoch": 0.21257552025061535, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 950 }, { "epoch": 0.21816961288878944, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 975 }, { "epoch": 0.22376370552696354, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1000 }, { "epoch": 0.22935779816513763, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1025 }, { "epoch": 0.2349518908033117, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1050 }, { "epoch": 0.24054598344148578, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1075 }, { "epoch": 0.24614007607965988, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1100 }, { "epoch": 0.251734168717834, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1125 }, { "epoch": 0.25732826135600806, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1150 }, { "epoch": 0.2629223539941821, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1175 }, { "epoch": 0.26851644663235624, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1200 }, { "epoch": 0.2741105392705303, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1225 }, { "epoch": 0.2797046319087044, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1250 }, { "epoch": 0.2852987245468785, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1275 }, { "epoch": 0.2908928171850526, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1300 }, { "epoch": 0.2964869098232267, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1325 }, { "epoch": 0.30208100246140074, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1350 }, { "epoch": 0.30767509509957486, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1375 }, { "epoch": 0.3132691877377489, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1400 }, { "epoch": 0.31886328037592304, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1425 }, { "epoch": 0.3244573730140971, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1450 }, { "epoch": 0.3300514656522712, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1475 }, { "epoch": 0.3356455582904453, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1500 }, { "epoch": 0.34123965092861935, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1525 }, { "epoch": 0.3468337435667935, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1550 }, { "epoch": 0.35242783620496754, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1575 }, { "epoch": 0.35802192884314166, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1600 }, { "epoch": 0.3636160214813157, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1625 }, { "epoch": 0.36921011411948984, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1650 }, { "epoch": 0.3748042067576639, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1675 }, { "epoch": 0.38039829939583797, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1700 }, { "epoch": 0.3859923920340121, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1725 }, { "epoch": 0.39158648467218615, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1750 }, { "epoch": 0.39718057731036027, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1775 }, { "epoch": 0.40277466994853434, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1800 }, { "epoch": 0.40836876258670846, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1825 }, { "epoch": 0.4139628552248825, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1850 }, { "epoch": 0.41955694786305664, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1875 }, { "epoch": 0.4251510405012307, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1900 }, { "epoch": 0.43074513313940477, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1925 }, { "epoch": 0.4363392257775789, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1950 }, { "epoch": 0.44193331841575295, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 1975 }, { "epoch": 0.44752741105392707, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2000 }, { "epoch": 0.45312150369210114, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2025 }, { "epoch": 0.45871559633027525, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2050 }, { "epoch": 0.4643096889684493, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2075 }, { "epoch": 0.4699037816066234, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2100 }, { "epoch": 0.4754978742447975, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2125 }, { "epoch": 0.48109196688297157, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2150 }, { "epoch": 0.4866860595211457, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2175 }, { "epoch": 0.49228015215931975, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2200 }, { "epoch": 0.49787424479749387, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2225 }, { "epoch": 0.503468337435668, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2250 }, { "epoch": 0.509062430073842, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2275 }, { "epoch": 0.5146565227120161, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2300 }, { "epoch": 0.5202506153501902, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2325 }, { "epoch": 0.5258447079883642, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2350 }, { "epoch": 0.5314388006265384, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2375 }, { "epoch": 0.5370328932647125, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2400 }, { "epoch": 0.5426269859028866, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2425 }, { "epoch": 0.5482210785410606, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2450 }, { "epoch": 0.5538151711792347, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2475 }, { "epoch": 0.5594092638174089, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2500 }, { "epoch": 0.5650033564555829, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2525 }, { "epoch": 0.570597449093757, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2550 }, { "epoch": 0.5761915417319311, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2575 }, { "epoch": 0.5817856343701052, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2600 }, { "epoch": 0.5873797270082792, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2625 }, { "epoch": 0.5929738196464533, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2650 }, { "epoch": 0.5985679122846275, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2675 }, { "epoch": 0.6041620049228015, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2700 }, { "epoch": 0.6097560975609756, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2725 }, { "epoch": 0.6153501901991497, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2750 }, { "epoch": 0.6209442828373238, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2775 }, { "epoch": 0.6265383754754978, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2800 }, { "epoch": 0.632132468113672, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2825 }, { "epoch": 0.6377265607518461, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2850 }, { "epoch": 0.6433206533900201, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2875 }, { "epoch": 0.6489147460281942, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2900 }, { "epoch": 0.6545088386663683, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2925 }, { "epoch": 0.6601029313045425, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2950 }, { "epoch": 0.6656970239427165, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 2975 }, { "epoch": 0.6712911165808906, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3000 }, { "epoch": 0.6768852092190647, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3025 }, { "epoch": 0.6824793018572387, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3050 }, { "epoch": 0.6880733944954128, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3075 }, { "epoch": 0.693667487133587, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3100 }, { "epoch": 0.6992615797717611, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3125 }, { "epoch": 0.7048556724099351, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3150 }, { "epoch": 0.7104497650481092, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3175 }, { "epoch": 0.7160438576862833, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3200 }, { "epoch": 0.7216379503244573, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3225 }, { "epoch": 0.7272320429626314, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3250 }, { "epoch": 0.7328261356008056, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3275 }, { "epoch": 0.7384202282389797, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3300 }, { "epoch": 0.7440143208771537, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3325 }, { "epoch": 0.7496084135153278, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3350 }, { "epoch": 0.7552025061535019, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3375 }, { "epoch": 0.7607965987916759, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3400 }, { "epoch": 0.7663906914298501, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3425 }, { "epoch": 0.7719847840680242, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3450 }, { "epoch": 0.7775788767061983, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3475 }, { "epoch": 0.7831729693443723, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3500 }, { "epoch": 0.7887670619825464, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3525 }, { "epoch": 0.7943611546207205, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3550 }, { "epoch": 0.7999552472588946, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3575 }, { "epoch": 0.8055493398970687, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3600 }, { "epoch": 0.8111434325352428, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3625 }, { "epoch": 0.8167375251734169, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3650 }, { "epoch": 0.8223316178115909, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3675 }, { "epoch": 0.827925710449765, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3700 }, { "epoch": 0.8335198030879392, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3725 }, { "epoch": 0.8391138957261133, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3750 }, { "epoch": 0.8447079883642873, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3775 }, { "epoch": 0.8503020810024614, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3800 }, { "epoch": 0.8558961736406355, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3825 }, { "epoch": 0.8614902662788095, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3850 }, { "epoch": 0.8670843589169837, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3875 }, { "epoch": 0.8726784515551578, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3900 }, { "epoch": 0.8782725441933319, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3925 }, { "epoch": 0.8838666368315059, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3950 }, { "epoch": 0.88946072946968, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 3975 }, { "epoch": 0.8950548221078541, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4000 }, { "epoch": 0.9006489147460282, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4025 }, { "epoch": 0.9062430073842023, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4050 }, { "epoch": 0.9118371000223764, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4075 }, { "epoch": 0.9174311926605505, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4100 }, { "epoch": 0.9230252852987245, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4125 }, { "epoch": 0.9286193779368986, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4150 }, { "epoch": 0.9342134705750728, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4175 }, { "epoch": 0.9398075632132468, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4200 }, { "epoch": 0.9454016558514209, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4225 }, { "epoch": 0.950995748489595, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4250 }, { "epoch": 0.9565898411277691, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4275 }, { "epoch": 0.9621839337659431, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4300 }, { "epoch": 0.9677780264041173, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4325 }, { "epoch": 0.9733721190422914, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4350 }, { "epoch": 0.9789662116804654, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4375 }, { "epoch": 0.9845603043186395, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4400 }, { "epoch": 0.9901543969568136, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4425 }, { "epoch": 0.9957484895949877, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 4450 }, { "epoch": 1.0, "eval_gen_len": 12.915, "eval_loss": NaN, "eval_rouge1": 13.2945, "eval_rouge2": 3.5623, "eval_rougeL": 12.4291, "eval_rougeLsum": 12.5471, "eval_runtime": 125.0743, "eval_samples_per_second": 8.931, "eval_steps_per_second": 2.239, "step": 4469 } ], "logging_steps": 25, "max_steps": 13407, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 434735324135424.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }