flan-t5-base-80 / checkpoint-4469 /trainer_state.json
riken01's picture
Upload folder using huggingface_hub
b121009 verified
raw
history blame contribute delete
No virus
25.9 kB
{
"best_metric": NaN,
"best_model_checkpoint": "autotrain-q48dd-y2s6a/checkpoint-4469",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 4469,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005594092638174088,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 25
},
{
"epoch": 0.011188185276348177,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 50
},
{
"epoch": 0.016782277914522265,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 75
},
{
"epoch": 0.022376370552696354,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 100
},
{
"epoch": 0.027970463190870442,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 125
},
{
"epoch": 0.03356455582904453,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 150
},
{
"epoch": 0.039158648467218615,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 175
},
{
"epoch": 0.04475274110539271,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 200
},
{
"epoch": 0.05034683374356679,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 225
},
{
"epoch": 0.055940926381740884,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 250
},
{
"epoch": 0.06153501901991497,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 275
},
{
"epoch": 0.06712911165808906,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 300
},
{
"epoch": 0.07272320429626315,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 325
},
{
"epoch": 0.07831729693443723,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 350
},
{
"epoch": 0.08391138957261132,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 375
},
{
"epoch": 0.08950548221078541,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 400
},
{
"epoch": 0.09509957484895949,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 425
},
{
"epoch": 0.10069366748713358,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 450
},
{
"epoch": 0.10628776012530768,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 475
},
{
"epoch": 0.11188185276348177,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 500
},
{
"epoch": 0.11747594540165585,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 525
},
{
"epoch": 0.12307003803982994,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 550
},
{
"epoch": 0.12866413067800403,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 575
},
{
"epoch": 0.13425822331617812,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 600
},
{
"epoch": 0.1398523159543522,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 625
},
{
"epoch": 0.1454464085925263,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 650
},
{
"epoch": 0.15104050123070037,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 675
},
{
"epoch": 0.15663459386887446,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 700
},
{
"epoch": 0.16222868650704855,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 725
},
{
"epoch": 0.16782277914522264,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 750
},
{
"epoch": 0.17341687178339674,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 775
},
{
"epoch": 0.17901096442157083,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 800
},
{
"epoch": 0.18460505705974492,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 825
},
{
"epoch": 0.19019914969791898,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 850
},
{
"epoch": 0.19579324233609308,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 875
},
{
"epoch": 0.20138733497426717,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 900
},
{
"epoch": 0.20698142761244126,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 925
},
{
"epoch": 0.21257552025061535,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 950
},
{
"epoch": 0.21816961288878944,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 975
},
{
"epoch": 0.22376370552696354,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1000
},
{
"epoch": 0.22935779816513763,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1025
},
{
"epoch": 0.2349518908033117,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1050
},
{
"epoch": 0.24054598344148578,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1075
},
{
"epoch": 0.24614007607965988,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1100
},
{
"epoch": 0.251734168717834,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1125
},
{
"epoch": 0.25732826135600806,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1150
},
{
"epoch": 0.2629223539941821,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1175
},
{
"epoch": 0.26851644663235624,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1200
},
{
"epoch": 0.2741105392705303,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1225
},
{
"epoch": 0.2797046319087044,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1250
},
{
"epoch": 0.2852987245468785,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1275
},
{
"epoch": 0.2908928171850526,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1300
},
{
"epoch": 0.2964869098232267,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1325
},
{
"epoch": 0.30208100246140074,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1350
},
{
"epoch": 0.30767509509957486,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1375
},
{
"epoch": 0.3132691877377489,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1400
},
{
"epoch": 0.31886328037592304,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1425
},
{
"epoch": 0.3244573730140971,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1450
},
{
"epoch": 0.3300514656522712,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1475
},
{
"epoch": 0.3356455582904453,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1500
},
{
"epoch": 0.34123965092861935,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1525
},
{
"epoch": 0.3468337435667935,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1550
},
{
"epoch": 0.35242783620496754,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1575
},
{
"epoch": 0.35802192884314166,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1600
},
{
"epoch": 0.3636160214813157,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1625
},
{
"epoch": 0.36921011411948984,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1650
},
{
"epoch": 0.3748042067576639,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1675
},
{
"epoch": 0.38039829939583797,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1700
},
{
"epoch": 0.3859923920340121,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1725
},
{
"epoch": 0.39158648467218615,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1750
},
{
"epoch": 0.39718057731036027,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1775
},
{
"epoch": 0.40277466994853434,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1800
},
{
"epoch": 0.40836876258670846,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1825
},
{
"epoch": 0.4139628552248825,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1850
},
{
"epoch": 0.41955694786305664,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1875
},
{
"epoch": 0.4251510405012307,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1900
},
{
"epoch": 0.43074513313940477,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1925
},
{
"epoch": 0.4363392257775789,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1950
},
{
"epoch": 0.44193331841575295,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 1975
},
{
"epoch": 0.44752741105392707,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2000
},
{
"epoch": 0.45312150369210114,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2025
},
{
"epoch": 0.45871559633027525,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2050
},
{
"epoch": 0.4643096889684493,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2075
},
{
"epoch": 0.4699037816066234,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2100
},
{
"epoch": 0.4754978742447975,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2125
},
{
"epoch": 0.48109196688297157,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2150
},
{
"epoch": 0.4866860595211457,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2175
},
{
"epoch": 0.49228015215931975,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2200
},
{
"epoch": 0.49787424479749387,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2225
},
{
"epoch": 0.503468337435668,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2250
},
{
"epoch": 0.509062430073842,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2275
},
{
"epoch": 0.5146565227120161,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2300
},
{
"epoch": 0.5202506153501902,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2325
},
{
"epoch": 0.5258447079883642,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2350
},
{
"epoch": 0.5314388006265384,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2375
},
{
"epoch": 0.5370328932647125,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2400
},
{
"epoch": 0.5426269859028866,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2425
},
{
"epoch": 0.5482210785410606,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2450
},
{
"epoch": 0.5538151711792347,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2475
},
{
"epoch": 0.5594092638174089,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2500
},
{
"epoch": 0.5650033564555829,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2525
},
{
"epoch": 0.570597449093757,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2550
},
{
"epoch": 0.5761915417319311,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2575
},
{
"epoch": 0.5817856343701052,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2600
},
{
"epoch": 0.5873797270082792,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2625
},
{
"epoch": 0.5929738196464533,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2650
},
{
"epoch": 0.5985679122846275,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2675
},
{
"epoch": 0.6041620049228015,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2700
},
{
"epoch": 0.6097560975609756,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2725
},
{
"epoch": 0.6153501901991497,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2750
},
{
"epoch": 0.6209442828373238,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2775
},
{
"epoch": 0.6265383754754978,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2800
},
{
"epoch": 0.632132468113672,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2825
},
{
"epoch": 0.6377265607518461,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2850
},
{
"epoch": 0.6433206533900201,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2875
},
{
"epoch": 0.6489147460281942,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2900
},
{
"epoch": 0.6545088386663683,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2925
},
{
"epoch": 0.6601029313045425,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2950
},
{
"epoch": 0.6656970239427165,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 2975
},
{
"epoch": 0.6712911165808906,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3000
},
{
"epoch": 0.6768852092190647,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3025
},
{
"epoch": 0.6824793018572387,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3050
},
{
"epoch": 0.6880733944954128,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3075
},
{
"epoch": 0.693667487133587,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3100
},
{
"epoch": 0.6992615797717611,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3125
},
{
"epoch": 0.7048556724099351,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3150
},
{
"epoch": 0.7104497650481092,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3175
},
{
"epoch": 0.7160438576862833,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3200
},
{
"epoch": 0.7216379503244573,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3225
},
{
"epoch": 0.7272320429626314,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3250
},
{
"epoch": 0.7328261356008056,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3275
},
{
"epoch": 0.7384202282389797,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3300
},
{
"epoch": 0.7440143208771537,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3325
},
{
"epoch": 0.7496084135153278,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3350
},
{
"epoch": 0.7552025061535019,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3375
},
{
"epoch": 0.7607965987916759,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3400
},
{
"epoch": 0.7663906914298501,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3425
},
{
"epoch": 0.7719847840680242,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3450
},
{
"epoch": 0.7775788767061983,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3475
},
{
"epoch": 0.7831729693443723,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3500
},
{
"epoch": 0.7887670619825464,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3525
},
{
"epoch": 0.7943611546207205,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3550
},
{
"epoch": 0.7999552472588946,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3575
},
{
"epoch": 0.8055493398970687,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3600
},
{
"epoch": 0.8111434325352428,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3625
},
{
"epoch": 0.8167375251734169,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3650
},
{
"epoch": 0.8223316178115909,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3675
},
{
"epoch": 0.827925710449765,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3700
},
{
"epoch": 0.8335198030879392,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3725
},
{
"epoch": 0.8391138957261133,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3750
},
{
"epoch": 0.8447079883642873,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3775
},
{
"epoch": 0.8503020810024614,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3800
},
{
"epoch": 0.8558961736406355,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3825
},
{
"epoch": 0.8614902662788095,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3850
},
{
"epoch": 0.8670843589169837,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3875
},
{
"epoch": 0.8726784515551578,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3900
},
{
"epoch": 0.8782725441933319,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3925
},
{
"epoch": 0.8838666368315059,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3950
},
{
"epoch": 0.88946072946968,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 3975
},
{
"epoch": 0.8950548221078541,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4000
},
{
"epoch": 0.9006489147460282,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4025
},
{
"epoch": 0.9062430073842023,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4050
},
{
"epoch": 0.9118371000223764,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4075
},
{
"epoch": 0.9174311926605505,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4100
},
{
"epoch": 0.9230252852987245,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4125
},
{
"epoch": 0.9286193779368986,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4150
},
{
"epoch": 0.9342134705750728,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4175
},
{
"epoch": 0.9398075632132468,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4200
},
{
"epoch": 0.9454016558514209,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4225
},
{
"epoch": 0.950995748489595,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4250
},
{
"epoch": 0.9565898411277691,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4275
},
{
"epoch": 0.9621839337659431,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4300
},
{
"epoch": 0.9677780264041173,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4325
},
{
"epoch": 0.9733721190422914,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4350
},
{
"epoch": 0.9789662116804654,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4375
},
{
"epoch": 0.9845603043186395,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4400
},
{
"epoch": 0.9901543969568136,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4425
},
{
"epoch": 0.9957484895949877,
"grad_norm": NaN,
"learning_rate": 0.0,
"loss": 0.0,
"step": 4450
},
{
"epoch": 1.0,
"eval_gen_len": 12.915,
"eval_loss": NaN,
"eval_rouge1": 13.2945,
"eval_rouge2": 3.5623,
"eval_rougeL": 12.4291,
"eval_rougeLsum": 12.5471,
"eval_runtime": 125.0743,
"eval_samples_per_second": 8.931,
"eval_steps_per_second": 2.239,
"step": 4469
}
],
"logging_steps": 25,
"max_steps": 13407,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 434735324135424.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}