EntityCS-39-PEP_MS_MLM-xlmr-base / trainer_state.json
Xmm's picture
Upload 11 files
154a15f verified
{
"best_metric": 0.260405570268631,
"best_model_checkpoint": "./checkpoint-huawei-noah/checkpoint-45000",
"epoch": 1.9922082521692934,
"eval_steps": 1000,
"global_step": 45000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"grad_norm": 13.771051406860352,
"learning_rate": 1.9873510587163855e-05,
"loss": 0.7093,
"step": 1000
},
{
"epoch": 0.04,
"eval_LOC_f1": 0.6130136331478614,
"eval_ORG_f1": 0.5179822369964004,
"eval_PER_f1": 0.6815983074481896,
"eval_loss": 0.5055311322212219,
"eval_overall_accuracy": 0.8334903358124973,
"eval_overall_f1": 0.6058789768206305,
"eval_overall_precision": 0.5856612385653673,
"eval_overall_recall": 0.6275425074138874,
"eval_runtime": 974.5486,
"eval_samples_per_second": 67.416,
"eval_steps_per_second": 0.264,
"step": 1000
},
{
"epoch": 0.09,
"grad_norm": 23.08890151977539,
"learning_rate": 1.974702117432771e-05,
"loss": 0.501,
"step": 2000
},
{
"epoch": 0.09,
"eval_LOC_f1": 0.6875965698393078,
"eval_ORG_f1": 0.5727174388363369,
"eval_PER_f1": 0.7225324778200253,
"eval_loss": 0.4574837386608124,
"eval_overall_accuracy": 0.8565286056409621,
"eval_overall_f1": 0.6654497292771804,
"eval_overall_precision": 0.6761779246769729,
"eval_overall_recall": 0.6550566427755974,
"eval_runtime": 902.1725,
"eval_samples_per_second": 72.824,
"eval_steps_per_second": 0.285,
"step": 2000
},
{
"epoch": 0.13,
"grad_norm": 20.49671745300293,
"learning_rate": 1.9620531761491565e-05,
"loss": 0.4505,
"step": 3000
},
{
"epoch": 0.13,
"eval_LOC_f1": 0.7204635210044149,
"eval_ORG_f1": 0.5725871273468899,
"eval_PER_f1": 0.7433987684658955,
"eval_loss": 0.4243098795413971,
"eval_overall_accuracy": 0.8621471869792049,
"eval_overall_f1": 0.6874798198516072,
"eval_overall_precision": 0.6776606774900581,
"eval_overall_recall": 0.6975877000398559,
"eval_runtime": 876.2182,
"eval_samples_per_second": 74.981,
"eval_steps_per_second": 0.293,
"step": 3000
},
{
"epoch": 0.18,
"grad_norm": 14.499987602233887,
"learning_rate": 1.949404234865542e-05,
"loss": 0.4268,
"step": 4000
},
{
"epoch": 0.18,
"eval_LOC_f1": 0.7364175740245708,
"eval_ORG_f1": 0.5861517860999524,
"eval_PER_f1": 0.7518103620724146,
"eval_loss": 0.4260491728782654,
"eval_overall_accuracy": 0.8618999871026152,
"eval_overall_f1": 0.6951561442521513,
"eval_overall_precision": 0.6862764458838554,
"eval_overall_recall": 0.7042686428026184,
"eval_runtime": 900.7744,
"eval_samples_per_second": 72.937,
"eval_steps_per_second": 0.285,
"step": 4000
},
{
"epoch": 0.22,
"grad_norm": 2.379225969314575,
"learning_rate": 1.9367552935819272e-05,
"loss": 0.4023,
"step": 5000
},
{
"epoch": 0.22,
"eval_LOC_f1": 0.7258738398309293,
"eval_ORG_f1": 0.6257472293742304,
"eval_PER_f1": 0.7652099137004392,
"eval_loss": 0.3998052477836609,
"eval_overall_accuracy": 0.8690542423545061,
"eval_overall_f1": 0.70826256852105,
"eval_overall_precision": 0.7241968388877515,
"eval_overall_recall": 0.6930143954388549,
"eval_runtime": 879.0998,
"eval_samples_per_second": 74.736,
"eval_steps_per_second": 0.292,
"step": 5000
},
{
"epoch": 0.27,
"grad_norm": 17.12338638305664,
"learning_rate": 1.924106352298313e-05,
"loss": 0.3733,
"step": 6000
},
{
"epoch": 0.27,
"eval_LOC_f1": 0.7708459775529246,
"eval_ORG_f1": 0.6382364519091983,
"eval_PER_f1": 0.774621616895607,
"eval_loss": 0.3535228669643402,
"eval_overall_accuracy": 0.8847707173727775,
"eval_overall_f1": 0.7347440116838627,
"eval_overall_precision": 0.7320440692286544,
"eval_overall_recall": 0.7374639438503577,
"eval_runtime": 911.8464,
"eval_samples_per_second": 72.052,
"eval_steps_per_second": 0.282,
"step": 6000
},
{
"epoch": 0.31,
"grad_norm": 45.46567153930664,
"learning_rate": 1.9114574110146982e-05,
"loss": 0.3636,
"step": 7000
},
{
"epoch": 0.31,
"eval_LOC_f1": 0.7551567306422088,
"eval_ORG_f1": 0.6445153971205689,
"eval_PER_f1": 0.7953595339091327,
"eval_loss": 0.36239349842071533,
"eval_overall_accuracy": 0.8839519863237142,
"eval_overall_f1": 0.7360533119495638,
"eval_overall_precision": 0.7536363185945938,
"eval_overall_recall": 0.7192720542041302,
"eval_runtime": 934.2532,
"eval_samples_per_second": 70.324,
"eval_steps_per_second": 0.275,
"step": 7000
},
{
"epoch": 0.35,
"grad_norm": 23.77419090270996,
"learning_rate": 1.8988084697310836e-05,
"loss": 0.3438,
"step": 8000
},
{
"epoch": 0.35,
"eval_LOC_f1": 0.7777527075812274,
"eval_ORG_f1": 0.6523009578392771,
"eval_PER_f1": 0.7983586509404405,
"eval_loss": 0.35361218452453613,
"eval_overall_accuracy": 0.8877984416924404,
"eval_overall_f1": 0.7468599888415638,
"eval_overall_precision": 0.7522804372408594,
"eval_overall_recall": 0.741517094161437,
"eval_runtime": 964.9468,
"eval_samples_per_second": 68.087,
"eval_steps_per_second": 0.266,
"step": 8000
},
{
"epoch": 0.4,
"grad_norm": 12.217660903930664,
"learning_rate": 1.8861595284474693e-05,
"loss": 0.3388,
"step": 9000
},
{
"epoch": 0.4,
"eval_LOC_f1": 0.7769920027192643,
"eval_ORG_f1": 0.6685048661658352,
"eval_PER_f1": 0.7955169761564663,
"eval_loss": 0.36109644174575806,
"eval_overall_accuracy": 0.8892544552878013,
"eval_overall_f1": 0.7515097063573983,
"eval_overall_precision": 0.763163393230255,
"eval_overall_recall": 0.7402065755608547,
"eval_runtime": 935.4215,
"eval_samples_per_second": 70.236,
"eval_steps_per_second": 0.275,
"step": 9000
},
{
"epoch": 0.44,
"grad_norm": 6.038209438323975,
"learning_rate": 1.8735105871638546e-05,
"loss": 0.34,
"step": 10000
},
{
"epoch": 0.44,
"eval_LOC_f1": 0.7909373544148547,
"eval_ORG_f1": 0.6797622124579994,
"eval_PER_f1": 0.8097494515182176,
"eval_loss": 0.31574323773384094,
"eval_overall_accuracy": 0.8950133905201693,
"eval_overall_f1": 0.7636732236459436,
"eval_overall_precision": 0.7691121860312153,
"eval_overall_recall": 0.7583106469503421,
"eval_runtime": 875.8957,
"eval_samples_per_second": 75.009,
"eval_steps_per_second": 0.293,
"step": 10000
},
{
"epoch": 0.49,
"grad_norm": 4.172688961029053,
"learning_rate": 1.86086164588024e-05,
"loss": 0.3234,
"step": 11000
},
{
"epoch": 0.49,
"eval_LOC_f1": 0.7809025636945843,
"eval_ORG_f1": 0.6583455896967046,
"eval_PER_f1": 0.8088906849868661,
"eval_loss": 0.3503168821334839,
"eval_overall_accuracy": 0.8873938178535211,
"eval_overall_f1": 0.7490455281410953,
"eval_overall_precision": 0.7389521011220446,
"eval_overall_recall": 0.7594185080353705,
"eval_runtime": 876.8783,
"eval_samples_per_second": 74.925,
"eval_steps_per_second": 0.293,
"step": 11000
},
{
"epoch": 0.53,
"grad_norm": 0.31278446316719055,
"learning_rate": 1.8482127045966253e-05,
"loss": 0.3116,
"step": 12000
},
{
"epoch": 0.53,
"eval_LOC_f1": 0.8015539728054758,
"eval_ORG_f1": 0.6780179794520548,
"eval_PER_f1": 0.807255456750202,
"eval_loss": 0.3222993314266205,
"eval_overall_accuracy": 0.8972015203740747,
"eval_overall_f1": 0.7650194669062593,
"eval_overall_precision": 0.7538710756376371,
"eval_overall_recall": 0.7765025365965696,
"eval_runtime": 876.0279,
"eval_samples_per_second": 74.998,
"eval_steps_per_second": 0.293,
"step": 12000
},
{
"epoch": 0.58,
"grad_norm": 17.869319915771484,
"learning_rate": 1.8355637633130106e-05,
"loss": 0.3195,
"step": 13000
},
{
"epoch": 0.58,
"eval_LOC_f1": 0.8077397954482631,
"eval_ORG_f1": 0.6910689787013923,
"eval_PER_f1": 0.8216717076180746,
"eval_loss": 0.3207678496837616,
"eval_overall_accuracy": 0.8985247667722903,
"eval_overall_f1": 0.7773739255449474,
"eval_overall_precision": 0.7767264634475317,
"eval_overall_recall": 0.7780224679632244,
"eval_runtime": 885.6262,
"eval_samples_per_second": 74.185,
"eval_steps_per_second": 0.29,
"step": 13000
},
{
"epoch": 0.62,
"grad_norm": 31.75470733642578,
"learning_rate": 1.8229148220293963e-05,
"loss": 0.3038,
"step": 14000
},
{
"epoch": 0.62,
"eval_LOC_f1": 0.7926177313116938,
"eval_ORG_f1": 0.6917118970756915,
"eval_PER_f1": 0.8275611161079727,
"eval_loss": 0.3353281319141388,
"eval_overall_accuracy": 0.8932463223486391,
"eval_overall_f1": 0.775302594053191,
"eval_overall_precision": 0.7831411656868759,
"eval_overall_recall": 0.7676193821647875,
"eval_runtime": 929.7993,
"eval_samples_per_second": 70.66,
"eval_steps_per_second": 0.276,
"step": 14000
},
{
"epoch": 0.66,
"grad_norm": 4.61831521987915,
"learning_rate": 1.8102658807457817e-05,
"loss": 0.3108,
"step": 15000
},
{
"epoch": 0.66,
"eval_LOC_f1": 0.8077224388355109,
"eval_ORG_f1": 0.6909212129572154,
"eval_PER_f1": 0.8192221560207918,
"eval_loss": 0.3174687623977661,
"eval_overall_accuracy": 0.8962873233880165,
"eval_overall_f1": 0.7771138787987528,
"eval_overall_precision": 0.7788884364820847,
"eval_overall_recall": 0.7753473887579121,
"eval_runtime": 989.2234,
"eval_samples_per_second": 66.416,
"eval_steps_per_second": 0.26,
"step": 15000
},
{
"epoch": 0.71,
"grad_norm": 12.568930625915527,
"learning_rate": 1.7976169394621674e-05,
"loss": 0.3071,
"step": 16000
},
{
"epoch": 0.71,
"eval_LOC_f1": 0.794363707776905,
"eval_ORG_f1": 0.6988674651653153,
"eval_PER_f1": 0.830271581375005,
"eval_loss": 0.32856282591819763,
"eval_overall_accuracy": 0.8977907538395009,
"eval_overall_f1": 0.7767913544942637,
"eval_overall_precision": 0.7810995225703338,
"eval_overall_recall": 0.772530449291712,
"eval_runtime": 943.0369,
"eval_samples_per_second": 69.669,
"eval_steps_per_second": 0.273,
"step": 16000
},
{
"epoch": 0.75,
"grad_norm": 5.375293731689453,
"learning_rate": 1.7849679981785527e-05,
"loss": 0.294,
"step": 17000
},
{
"epoch": 0.75,
"eval_LOC_f1": 0.8179866156103497,
"eval_ORG_f1": 0.7139503216415458,
"eval_PER_f1": 0.8319541693189051,
"eval_loss": 0.3004244267940521,
"eval_overall_accuracy": 0.9056726997766982,
"eval_overall_f1": 0.7925003906754269,
"eval_overall_precision": 0.7971147209370665,
"eval_overall_recall": 0.7879391757243317,
"eval_runtime": 922.5388,
"eval_samples_per_second": 71.217,
"eval_steps_per_second": 0.279,
"step": 17000
},
{
"epoch": 0.8,
"grad_norm": 15.245515823364258,
"learning_rate": 1.772319056894938e-05,
"loss": 0.3022,
"step": 18000
},
{
"epoch": 0.8,
"eval_LOC_f1": 0.8157123906017681,
"eval_ORG_f1": 0.7098208686331968,
"eval_PER_f1": 0.8456405034257072,
"eval_loss": 0.30179163813591003,
"eval_overall_accuracy": 0.9067740352882565,
"eval_overall_f1": 0.7949277311259202,
"eval_overall_precision": 0.7816879334439119,
"eval_overall_recall": 0.808623752811873,
"eval_runtime": 877.0088,
"eval_samples_per_second": 74.914,
"eval_steps_per_second": 0.293,
"step": 18000
},
{
"epoch": 0.84,
"grad_norm": 27.571258544921875,
"learning_rate": 1.7596701156113234e-05,
"loss": 0.2932,
"step": 19000
},
{
"epoch": 0.84,
"eval_LOC_f1": 0.812069319938867,
"eval_ORG_f1": 0.711144363521586,
"eval_PER_f1": 0.8502128757579667,
"eval_loss": 0.2992981970310211,
"eval_overall_accuracy": 0.9061335916182172,
"eval_overall_f1": 0.7960065863825535,
"eval_overall_precision": 0.7903741392400006,
"eval_overall_recall": 0.8017198867820013,
"eval_runtime": 877.5146,
"eval_samples_per_second": 74.871,
"eval_steps_per_second": 0.293,
"step": 19000
},
{
"epoch": 0.89,
"grad_norm": 6.182469844818115,
"learning_rate": 1.747021174327709e-05,
"loss": 0.2943,
"step": 20000
},
{
"epoch": 0.89,
"eval_LOC_f1": 0.8233705131530693,
"eval_ORG_f1": 0.7053565992474702,
"eval_PER_f1": 0.8430738573647348,
"eval_loss": 0.28671061992645264,
"eval_overall_accuracy": 0.9084127618358795,
"eval_overall_f1": 0.7974687389257328,
"eval_overall_precision": 0.7968528685705035,
"eval_overall_recall": 0.7980855620030669,
"eval_runtime": 876.9521,
"eval_samples_per_second": 74.919,
"eval_steps_per_second": 0.293,
"step": 20000
},
{
"epoch": 0.93,
"grad_norm": 37.97357177734375,
"learning_rate": 1.7343722330440944e-05,
"loss": 0.2782,
"step": 21000
},
{
"epoch": 0.93,
"eval_LOC_f1": 0.8330484744899815,
"eval_ORG_f1": 0.7210407632263661,
"eval_PER_f1": 0.8489616418275103,
"eval_loss": 0.30233901739120483,
"eval_overall_accuracy": 0.906348548032643,
"eval_overall_f1": 0.8060817413887736,
"eval_overall_precision": 0.7987835211557213,
"eval_overall_recall": 0.8135145541872421,
"eval_runtime": 903.2924,
"eval_samples_per_second": 72.734,
"eval_steps_per_second": 0.285,
"step": 21000
},
{
"epoch": 0.97,
"grad_norm": 3.23641037940979,
"learning_rate": 1.7217232917604798e-05,
"loss": 0.2913,
"step": 22000
},
{
"epoch": 0.97,
"eval_LOC_f1": 0.8284559448131656,
"eval_ORG_f1": 0.7247506440319704,
"eval_PER_f1": 0.8467821489168608,
"eval_loss": 0.2910088300704956,
"eval_overall_accuracy": 0.908466500939486,
"eval_overall_f1": 0.8031924311719724,
"eval_overall_precision": 0.7945208195637806,
"eval_overall_recall": 0.8120554200752534,
"eval_runtime": 930.8988,
"eval_samples_per_second": 70.577,
"eval_steps_per_second": 0.276,
"step": 22000
},
{
"epoch": 1.02,
"grad_norm": 51.232643127441406,
"learning_rate": 1.709074350476865e-05,
"loss": 0.254,
"step": 23000
},
{
"epoch": 1.02,
"eval_LOC_f1": 0.8326820729529502,
"eval_ORG_f1": 0.7281753443144438,
"eval_PER_f1": 0.8538723624698639,
"eval_loss": 0.3031412661075592,
"eval_overall_accuracy": 0.9094907050317503,
"eval_overall_f1": 0.8106029688042823,
"eval_overall_precision": 0.8094644661502189,
"eval_overall_recall": 0.8117446785514041,
"eval_runtime": 982.2729,
"eval_samples_per_second": 66.886,
"eval_steps_per_second": 0.262,
"step": 23000
},
{
"epoch": 1.06,
"grad_norm": 3.0182816982269287,
"learning_rate": 1.6964254091932504e-05,
"loss": 0.2412,
"step": 24000
},
{
"epoch": 1.06,
"eval_LOC_f1": 0.8337292382788848,
"eval_ORG_f1": 0.7265721539463927,
"eval_PER_f1": 0.8438998504510589,
"eval_loss": 0.2959749400615692,
"eval_overall_accuracy": 0.9087023207706061,
"eval_overall_f1": 0.8054477820887165,
"eval_overall_precision": 0.7949914132873621,
"eval_overall_recall": 0.8161828781420359,
"eval_runtime": 937.946,
"eval_samples_per_second": 70.047,
"eval_steps_per_second": 0.274,
"step": 24000
},
{
"epoch": 1.11,
"grad_norm": 4.651257038116455,
"learning_rate": 1.683776467909636e-05,
"loss": 0.2248,
"step": 25000
},
{
"epoch": 1.11,
"eval_LOC_f1": 0.8204892221350009,
"eval_ORG_f1": 0.7164497497985908,
"eval_PER_f1": 0.8366153573083787,
"eval_loss": 0.2870059013366699,
"eval_overall_accuracy": 0.9095747909232758,
"eval_overall_f1": 0.7954083144399056,
"eval_overall_precision": 0.7841744564646571,
"eval_overall_recall": 0.806968716434849,
"eval_runtime": 880.5612,
"eval_samples_per_second": 74.612,
"eval_steps_per_second": 0.292,
"step": 25000
},
{
"epoch": 1.15,
"grad_norm": 0.5376187562942505,
"learning_rate": 1.6711275266260215e-05,
"loss": 0.2367,
"step": 26000
},
{
"epoch": 1.15,
"eval_LOC_f1": 0.8274943290245922,
"eval_ORG_f1": 0.7181801646127961,
"eval_PER_f1": 0.8393770384236834,
"eval_loss": 0.3035840094089508,
"eval_overall_accuracy": 0.9099459068505344,
"eval_overall_f1": 0.7997951699758828,
"eval_overall_precision": 0.7826674511984585,
"eval_overall_recall": 0.8176892990076537,
"eval_runtime": 877.1859,
"eval_samples_per_second": 74.899,
"eval_steps_per_second": 0.293,
"step": 26000
},
{
"epoch": 1.2,
"grad_norm": 29.850025177001953,
"learning_rate": 1.658478585342407e-05,
"loss": 0.2259,
"step": 27000
},
{
"epoch": 1.2,
"eval_LOC_f1": 0.8368737846096861,
"eval_ORG_f1": 0.730566338210411,
"eval_PER_f1": 0.8548625950405009,
"eval_loss": 0.2981078624725342,
"eval_overall_accuracy": 0.912430550111398,
"eval_overall_f1": 0.8107802613802052,
"eval_overall_precision": 0.8028279082088811,
"eval_overall_recall": 0.8188917335999406,
"eval_runtime": 900.9915,
"eval_samples_per_second": 72.92,
"eval_steps_per_second": 0.285,
"step": 27000
},
{
"epoch": 1.24,
"grad_norm": 6.522253036499023,
"learning_rate": 1.6458296440587925e-05,
"loss": 0.2353,
"step": 28000
},
{
"epoch": 1.24,
"eval_LOC_f1": 0.8361146769362431,
"eval_ORG_f1": 0.7397288818401768,
"eval_PER_f1": 0.851691836373991,
"eval_loss": 0.2890784442424774,
"eval_overall_accuracy": 0.9118988490980682,
"eval_overall_f1": 0.813503140265178,
"eval_overall_precision": 0.8080863288253761,
"eval_overall_recall": 0.8189930623577175,
"eval_runtime": 898.5924,
"eval_samples_per_second": 73.114,
"eval_steps_per_second": 0.286,
"step": 28000
},
{
"epoch": 1.28,
"grad_norm": 18.48634910583496,
"learning_rate": 1.633180702775178e-05,
"loss": 0.231,
"step": 29000
},
{
"epoch": 1.28,
"eval_LOC_f1": 0.8399307496708805,
"eval_ORG_f1": 0.7353511607405231,
"eval_PER_f1": 0.8429051875514639,
"eval_loss": 0.29812344908714294,
"eval_overall_accuracy": 0.9100799384971765,
"eval_overall_f1": 0.8109618028412001,
"eval_overall_precision": 0.8090263071203351,
"eval_overall_recall": 0.8129065816405802,
"eval_runtime": 911.3101,
"eval_samples_per_second": 72.094,
"eval_steps_per_second": 0.282,
"step": 29000
},
{
"epoch": 1.33,
"grad_norm": 6.476167678833008,
"learning_rate": 1.6205317614915632e-05,
"loss": 0.2298,
"step": 30000
},
{
"epoch": 1.33,
"eval_LOC_f1": 0.8437642148074813,
"eval_ORG_f1": 0.741920341727885,
"eval_PER_f1": 0.845791168353266,
"eval_loss": 0.2789755165576935,
"eval_overall_accuracy": 0.9128231616800994,
"eval_overall_f1": 0.8156984934617233,
"eval_overall_precision": 0.8007835888891781,
"eval_overall_recall": 0.8311795342930293,
"eval_runtime": 950.013,
"eval_samples_per_second": 69.157,
"eval_steps_per_second": 0.271,
"step": 30000
},
{
"epoch": 1.37,
"grad_norm": 0.6922666430473328,
"learning_rate": 1.607882820207949e-05,
"loss": 0.2236,
"step": 31000
},
{
"epoch": 1.37,
"eval_LOC_f1": 0.8401312445122232,
"eval_ORG_f1": 0.7412946847115139,
"eval_PER_f1": 0.8599101069965396,
"eval_loss": 0.2861514985561371,
"eval_overall_accuracy": 0.9133093425115507,
"eval_overall_f1": 0.8168471254617229,
"eval_overall_precision": 0.8077931685921699,
"eval_overall_recall": 0.8261063411536617,
"eval_runtime": 964.6377,
"eval_samples_per_second": 68.108,
"eval_steps_per_second": 0.266,
"step": 31000
},
{
"epoch": 1.42,
"grad_norm": 10.913984298706055,
"learning_rate": 1.5952338789243342e-05,
"loss": 0.2164,
"step": 32000
},
{
"epoch": 1.42,
"eval_LOC_f1": 0.8450018789928598,
"eval_ORG_f1": 0.7453389102160086,
"eval_PER_f1": 0.8475419561015748,
"eval_loss": 0.29202836751937866,
"eval_overall_accuracy": 0.9133548626934291,
"eval_overall_f1": 0.8167286457267982,
"eval_overall_precision": 0.8108745156006552,
"eval_overall_recall": 0.8226679186397627,
"eval_runtime": 934.7918,
"eval_samples_per_second": 70.283,
"eval_steps_per_second": 0.275,
"step": 32000
},
{
"epoch": 1.46,
"grad_norm": 8.604541778564453,
"learning_rate": 1.5825849376407196e-05,
"loss": 0.2343,
"step": 33000
},
{
"epoch": 1.46,
"eval_LOC_f1": 0.8327052539148251,
"eval_ORG_f1": 0.7464142820374833,
"eval_PER_f1": 0.8571568569804591,
"eval_loss": 0.26980945467948914,
"eval_overall_accuracy": 0.915141529832157,
"eval_overall_f1": 0.8162628685387808,
"eval_overall_precision": 0.8141182004502234,
"eval_overall_recall": 0.818418866063648,
"eval_runtime": 878.6904,
"eval_samples_per_second": 74.77,
"eval_steps_per_second": 0.292,
"step": 33000
},
{
"epoch": 1.51,
"grad_norm": 20.011140823364258,
"learning_rate": 1.569935996357105e-05,
"loss": 0.2305,
"step": 34000
},
{
"epoch": 1.51,
"eval_LOC_f1": 0.8434370154154885,
"eval_ORG_f1": 0.7450794786844748,
"eval_PER_f1": 0.8598302131901996,
"eval_loss": 0.2736206650733948,
"eval_overall_accuracy": 0.9164160949247526,
"eval_overall_f1": 0.820116525352046,
"eval_overall_precision": 0.8215285544822911,
"eval_overall_recall": 0.818709341835942,
"eval_runtime": 878.0774,
"eval_samples_per_second": 74.823,
"eval_steps_per_second": 0.293,
"step": 34000
},
{
"epoch": 1.55,
"grad_norm": 0.8893330693244934,
"learning_rate": 1.5572870550734906e-05,
"loss": 0.218,
"step": 35000
},
{
"epoch": 1.55,
"eval_LOC_f1": 0.8372996858861737,
"eval_ORG_f1": 0.7351363688234623,
"eval_PER_f1": 0.8452893909397927,
"eval_loss": 0.277670681476593,
"eval_overall_accuracy": 0.9132897435443531,
"eval_overall_f1": 0.8105032765054125,
"eval_overall_precision": 0.7914432306117588,
"eval_overall_recall": 0.8305040092411827,
"eval_runtime": 878.1965,
"eval_samples_per_second": 74.812,
"eval_steps_per_second": 0.293,
"step": 35000
},
{
"epoch": 1.59,
"grad_norm": 2.807310104370117,
"learning_rate": 1.544638113789876e-05,
"loss": 0.2209,
"step": 36000
},
{
"epoch": 1.59,
"eval_LOC_f1": 0.8436019819082686,
"eval_ORG_f1": 0.7541017701160051,
"eval_PER_f1": 0.8559255699664113,
"eval_loss": 0.2975883483886719,
"eval_overall_accuracy": 0.9155777649084917,
"eval_overall_f1": 0.8217433690792348,
"eval_overall_precision": 0.8178831213153369,
"eval_overall_recall": 0.8256402288678876,
"eval_runtime": 887.7886,
"eval_samples_per_second": 74.004,
"eval_steps_per_second": 0.289,
"step": 36000
},
{
"epoch": 1.64,
"grad_norm": 0.7378529906272888,
"learning_rate": 1.5319891725062616e-05,
"loss": 0.2068,
"step": 37000
},
{
"epoch": 1.64,
"eval_LOC_f1": 0.8404596277816221,
"eval_ORG_f1": 0.7519756060658962,
"eval_PER_f1": 0.8650594959056045,
"eval_loss": 0.2906901240348816,
"eval_overall_accuracy": 0.9159109473508519,
"eval_overall_f1": 0.8231073274551537,
"eval_overall_precision": 0.8232630746670091,
"eval_overall_recall": 0.8229516391615384,
"eval_runtime": 914.9939,
"eval_samples_per_second": 71.804,
"eval_steps_per_second": 0.281,
"step": 37000
},
{
"epoch": 1.68,
"grad_norm": 4.775814056396484,
"learning_rate": 1.5193402312226468e-05,
"loss": 0.2222,
"step": 38000
},
{
"epoch": 1.68,
"eval_LOC_f1": 0.8486954241510423,
"eval_ORG_f1": 0.7403596163509645,
"eval_PER_f1": 0.8627163820626227,
"eval_loss": 0.2920599579811096,
"eval_overall_accuracy": 0.9144530370812459,
"eval_overall_f1": 0.8204945751023299,
"eval_overall_precision": 0.8079300635190885,
"eval_overall_recall": 0.8334560537177521,
"eval_runtime": 974.0136,
"eval_samples_per_second": 67.453,
"eval_steps_per_second": 0.264,
"step": 38000
},
{
"epoch": 1.73,
"grad_norm": 2.2256317138671875,
"learning_rate": 1.5066912899390323e-05,
"loss": 0.2328,
"step": 39000
},
{
"epoch": 1.73,
"eval_LOC_f1": 0.849334397801749,
"eval_ORG_f1": 0.753236617390506,
"eval_PER_f1": 0.8693573280340553,
"eval_loss": 0.29489845037460327,
"eval_overall_accuracy": 0.917294255100157,
"eval_overall_f1": 0.8276056778793333,
"eval_overall_precision": 0.8335114765330592,
"eval_overall_recall": 0.8217829808218438,
"eval_runtime": 944.9062,
"eval_samples_per_second": 69.531,
"eval_steps_per_second": 0.272,
"step": 39000
},
{
"epoch": 1.77,
"grad_norm": 16.9512996673584,
"learning_rate": 1.4940423486554176e-05,
"loss": 0.2229,
"step": 40000
},
{
"epoch": 1.77,
"eval_LOC_f1": 0.8445889009269291,
"eval_ORG_f1": 0.7452847675981278,
"eval_PER_f1": 0.8508496270046708,
"eval_loss": 0.27226653695106506,
"eval_overall_accuracy": 0.916396495957555,
"eval_overall_f1": 0.818113712374582,
"eval_overall_precision": 0.8101638106341121,
"eval_overall_recall": 0.8262211804124756,
"eval_runtime": 919.492,
"eval_samples_per_second": 71.452,
"eval_steps_per_second": 0.28,
"step": 40000
},
{
"epoch": 1.82,
"grad_norm": 0.6652330160140991,
"learning_rate": 1.481393407371803e-05,
"loss": 0.2219,
"step": 41000
},
{
"epoch": 1.82,
"eval_LOC_f1": 0.8483445744353834,
"eval_ORG_f1": 0.7490041659533185,
"eval_PER_f1": 0.865145374272123,
"eval_loss": 0.2795349359512329,
"eval_overall_accuracy": 0.9183601860258099,
"eval_overall_f1": 0.8247159453593771,
"eval_overall_precision": 0.8203393908609086,
"eval_overall_recall": 0.8291394486364527,
"eval_runtime": 878.0472,
"eval_samples_per_second": 74.825,
"eval_steps_per_second": 0.293,
"step": 41000
},
{
"epoch": 1.86,
"grad_norm": 8.537057876586914,
"learning_rate": 1.4687444660881885e-05,
"loss": 0.2265,
"step": 42000
},
{
"epoch": 1.86,
"eval_LOC_f1": 0.8431806420528559,
"eval_ORG_f1": 0.7481651106805237,
"eval_PER_f1": 0.8624314888139032,
"eval_loss": 0.2695271372795105,
"eval_overall_accuracy": 0.9176305986662586,
"eval_overall_f1": 0.8227747180347664,
"eval_overall_precision": 0.8127298369558054,
"eval_overall_recall": 0.8330710044381996,
"eval_runtime": 878.4759,
"eval_samples_per_second": 74.789,
"eval_steps_per_second": 0.293,
"step": 42000
},
{
"epoch": 1.9,
"grad_norm": 2.5510284900665283,
"learning_rate": 1.456095524804574e-05,
"loss": 0.208,
"step": 43000
},
{
"epoch": 1.9,
"eval_LOC_f1": 0.8455445895423215,
"eval_ORG_f1": 0.7595170903640558,
"eval_PER_f1": 0.8683249226114047,
"eval_loss": 0.291418194770813,
"eval_overall_accuracy": 0.9158768072144431,
"eval_overall_f1": 0.828613510075651,
"eval_overall_precision": 0.8212624496539643,
"eval_overall_recall": 0.8360973566704721,
"eval_runtime": 879.88,
"eval_samples_per_second": 74.669,
"eval_steps_per_second": 0.292,
"step": 43000
},
{
"epoch": 1.95,
"grad_norm": 40.04784393310547,
"learning_rate": 1.4434465835209595e-05,
"loss": 0.2201,
"step": 44000
},
{
"epoch": 1.95,
"eval_LOC_f1": 0.8562697361719467,
"eval_ORG_f1": 0.7598152424942263,
"eval_PER_f1": 0.8635555196248839,
"eval_loss": 0.26673147082328796,
"eval_overall_accuracy": 0.9210597857010994,
"eval_overall_f1": 0.8302701818813599,
"eval_overall_precision": 0.8374682152429387,
"eval_overall_recall": 0.8231948281802031,
"eval_runtime": 908.7842,
"eval_samples_per_second": 72.294,
"eval_steps_per_second": 0.283,
"step": 44000
},
{
"epoch": 1.99,
"grad_norm": 4.13014030456543,
"learning_rate": 1.4307976422373449e-05,
"loss": 0.2065,
"step": 45000
},
{
"epoch": 1.99,
"eval_LOC_f1": 0.850374667633551,
"eval_ORG_f1": 0.7572422253856926,
"eval_PER_f1": 0.8662861965717501,
"eval_loss": 0.260405570268631,
"eval_overall_accuracy": 0.9184101317809266,
"eval_overall_f1": 0.8283524878105291,
"eval_overall_precision": 0.835684527492472,
"eval_overall_recall": 0.821147987273108,
"eval_runtime": 930.7338,
"eval_samples_per_second": 70.589,
"eval_steps_per_second": 0.276,
"step": 45000
}
],
"logging_steps": 1000,
"max_steps": 158116,
"num_input_tokens_seen": 0,
"num_train_epochs": 7,
"save_steps": 500,
"total_flos": 1.4980271080173432e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}