|
{ |
|
"best_metric": 0.260405570268631, |
|
"best_model_checkpoint": "./checkpoint-huawei-noah/checkpoint-45000", |
|
"epoch": 1.9922082521692934, |
|
"eval_steps": 1000, |
|
"global_step": 45000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.771051406860352, |
|
"learning_rate": 1.9873510587163855e-05, |
|
"loss": 0.7093, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_LOC_f1": 0.6130136331478614, |
|
"eval_ORG_f1": 0.5179822369964004, |
|
"eval_PER_f1": 0.6815983074481896, |
|
"eval_loss": 0.5055311322212219, |
|
"eval_overall_accuracy": 0.8334903358124973, |
|
"eval_overall_f1": 0.6058789768206305, |
|
"eval_overall_precision": 0.5856612385653673, |
|
"eval_overall_recall": 0.6275425074138874, |
|
"eval_runtime": 974.5486, |
|
"eval_samples_per_second": 67.416, |
|
"eval_steps_per_second": 0.264, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 23.08890151977539, |
|
"learning_rate": 1.974702117432771e-05, |
|
"loss": 0.501, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_LOC_f1": 0.6875965698393078, |
|
"eval_ORG_f1": 0.5727174388363369, |
|
"eval_PER_f1": 0.7225324778200253, |
|
"eval_loss": 0.4574837386608124, |
|
"eval_overall_accuracy": 0.8565286056409621, |
|
"eval_overall_f1": 0.6654497292771804, |
|
"eval_overall_precision": 0.6761779246769729, |
|
"eval_overall_recall": 0.6550566427755974, |
|
"eval_runtime": 902.1725, |
|
"eval_samples_per_second": 72.824, |
|
"eval_steps_per_second": 0.285, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 20.49671745300293, |
|
"learning_rate": 1.9620531761491565e-05, |
|
"loss": 0.4505, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_LOC_f1": 0.7204635210044149, |
|
"eval_ORG_f1": 0.5725871273468899, |
|
"eval_PER_f1": 0.7433987684658955, |
|
"eval_loss": 0.4243098795413971, |
|
"eval_overall_accuracy": 0.8621471869792049, |
|
"eval_overall_f1": 0.6874798198516072, |
|
"eval_overall_precision": 0.6776606774900581, |
|
"eval_overall_recall": 0.6975877000398559, |
|
"eval_runtime": 876.2182, |
|
"eval_samples_per_second": 74.981, |
|
"eval_steps_per_second": 0.293, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 14.499987602233887, |
|
"learning_rate": 1.949404234865542e-05, |
|
"loss": 0.4268, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_LOC_f1": 0.7364175740245708, |
|
"eval_ORG_f1": 0.5861517860999524, |
|
"eval_PER_f1": 0.7518103620724146, |
|
"eval_loss": 0.4260491728782654, |
|
"eval_overall_accuracy": 0.8618999871026152, |
|
"eval_overall_f1": 0.6951561442521513, |
|
"eval_overall_precision": 0.6862764458838554, |
|
"eval_overall_recall": 0.7042686428026184, |
|
"eval_runtime": 900.7744, |
|
"eval_samples_per_second": 72.937, |
|
"eval_steps_per_second": 0.285, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.379225969314575, |
|
"learning_rate": 1.9367552935819272e-05, |
|
"loss": 0.4023, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_LOC_f1": 0.7258738398309293, |
|
"eval_ORG_f1": 0.6257472293742304, |
|
"eval_PER_f1": 0.7652099137004392, |
|
"eval_loss": 0.3998052477836609, |
|
"eval_overall_accuracy": 0.8690542423545061, |
|
"eval_overall_f1": 0.70826256852105, |
|
"eval_overall_precision": 0.7241968388877515, |
|
"eval_overall_recall": 0.6930143954388549, |
|
"eval_runtime": 879.0998, |
|
"eval_samples_per_second": 74.736, |
|
"eval_steps_per_second": 0.292, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 17.12338638305664, |
|
"learning_rate": 1.924106352298313e-05, |
|
"loss": 0.3733, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_LOC_f1": 0.7708459775529246, |
|
"eval_ORG_f1": 0.6382364519091983, |
|
"eval_PER_f1": 0.774621616895607, |
|
"eval_loss": 0.3535228669643402, |
|
"eval_overall_accuracy": 0.8847707173727775, |
|
"eval_overall_f1": 0.7347440116838627, |
|
"eval_overall_precision": 0.7320440692286544, |
|
"eval_overall_recall": 0.7374639438503577, |
|
"eval_runtime": 911.8464, |
|
"eval_samples_per_second": 72.052, |
|
"eval_steps_per_second": 0.282, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 45.46567153930664, |
|
"learning_rate": 1.9114574110146982e-05, |
|
"loss": 0.3636, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_LOC_f1": 0.7551567306422088, |
|
"eval_ORG_f1": 0.6445153971205689, |
|
"eval_PER_f1": 0.7953595339091327, |
|
"eval_loss": 0.36239349842071533, |
|
"eval_overall_accuracy": 0.8839519863237142, |
|
"eval_overall_f1": 0.7360533119495638, |
|
"eval_overall_precision": 0.7536363185945938, |
|
"eval_overall_recall": 0.7192720542041302, |
|
"eval_runtime": 934.2532, |
|
"eval_samples_per_second": 70.324, |
|
"eval_steps_per_second": 0.275, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 23.77419090270996, |
|
"learning_rate": 1.8988084697310836e-05, |
|
"loss": 0.3438, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_LOC_f1": 0.7777527075812274, |
|
"eval_ORG_f1": 0.6523009578392771, |
|
"eval_PER_f1": 0.7983586509404405, |
|
"eval_loss": 0.35361218452453613, |
|
"eval_overall_accuracy": 0.8877984416924404, |
|
"eval_overall_f1": 0.7468599888415638, |
|
"eval_overall_precision": 0.7522804372408594, |
|
"eval_overall_recall": 0.741517094161437, |
|
"eval_runtime": 964.9468, |
|
"eval_samples_per_second": 68.087, |
|
"eval_steps_per_second": 0.266, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 12.217660903930664, |
|
"learning_rate": 1.8861595284474693e-05, |
|
"loss": 0.3388, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_LOC_f1": 0.7769920027192643, |
|
"eval_ORG_f1": 0.6685048661658352, |
|
"eval_PER_f1": 0.7955169761564663, |
|
"eval_loss": 0.36109644174575806, |
|
"eval_overall_accuracy": 0.8892544552878013, |
|
"eval_overall_f1": 0.7515097063573983, |
|
"eval_overall_precision": 0.763163393230255, |
|
"eval_overall_recall": 0.7402065755608547, |
|
"eval_runtime": 935.4215, |
|
"eval_samples_per_second": 70.236, |
|
"eval_steps_per_second": 0.275, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 6.038209438323975, |
|
"learning_rate": 1.8735105871638546e-05, |
|
"loss": 0.34, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_LOC_f1": 0.7909373544148547, |
|
"eval_ORG_f1": 0.6797622124579994, |
|
"eval_PER_f1": 0.8097494515182176, |
|
"eval_loss": 0.31574323773384094, |
|
"eval_overall_accuracy": 0.8950133905201693, |
|
"eval_overall_f1": 0.7636732236459436, |
|
"eval_overall_precision": 0.7691121860312153, |
|
"eval_overall_recall": 0.7583106469503421, |
|
"eval_runtime": 875.8957, |
|
"eval_samples_per_second": 75.009, |
|
"eval_steps_per_second": 0.293, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.172688961029053, |
|
"learning_rate": 1.86086164588024e-05, |
|
"loss": 0.3234, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_LOC_f1": 0.7809025636945843, |
|
"eval_ORG_f1": 0.6583455896967046, |
|
"eval_PER_f1": 0.8088906849868661, |
|
"eval_loss": 0.3503168821334839, |
|
"eval_overall_accuracy": 0.8873938178535211, |
|
"eval_overall_f1": 0.7490455281410953, |
|
"eval_overall_precision": 0.7389521011220446, |
|
"eval_overall_recall": 0.7594185080353705, |
|
"eval_runtime": 876.8783, |
|
"eval_samples_per_second": 74.925, |
|
"eval_steps_per_second": 0.293, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.31278446316719055, |
|
"learning_rate": 1.8482127045966253e-05, |
|
"loss": 0.3116, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_LOC_f1": 0.8015539728054758, |
|
"eval_ORG_f1": 0.6780179794520548, |
|
"eval_PER_f1": 0.807255456750202, |
|
"eval_loss": 0.3222993314266205, |
|
"eval_overall_accuracy": 0.8972015203740747, |
|
"eval_overall_f1": 0.7650194669062593, |
|
"eval_overall_precision": 0.7538710756376371, |
|
"eval_overall_recall": 0.7765025365965696, |
|
"eval_runtime": 876.0279, |
|
"eval_samples_per_second": 74.998, |
|
"eval_steps_per_second": 0.293, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 17.869319915771484, |
|
"learning_rate": 1.8355637633130106e-05, |
|
"loss": 0.3195, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_LOC_f1": 0.8077397954482631, |
|
"eval_ORG_f1": 0.6910689787013923, |
|
"eval_PER_f1": 0.8216717076180746, |
|
"eval_loss": 0.3207678496837616, |
|
"eval_overall_accuracy": 0.8985247667722903, |
|
"eval_overall_f1": 0.7773739255449474, |
|
"eval_overall_precision": 0.7767264634475317, |
|
"eval_overall_recall": 0.7780224679632244, |
|
"eval_runtime": 885.6262, |
|
"eval_samples_per_second": 74.185, |
|
"eval_steps_per_second": 0.29, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 31.75470733642578, |
|
"learning_rate": 1.8229148220293963e-05, |
|
"loss": 0.3038, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_LOC_f1": 0.7926177313116938, |
|
"eval_ORG_f1": 0.6917118970756915, |
|
"eval_PER_f1": 0.8275611161079727, |
|
"eval_loss": 0.3353281319141388, |
|
"eval_overall_accuracy": 0.8932463223486391, |
|
"eval_overall_f1": 0.775302594053191, |
|
"eval_overall_precision": 0.7831411656868759, |
|
"eval_overall_recall": 0.7676193821647875, |
|
"eval_runtime": 929.7993, |
|
"eval_samples_per_second": 70.66, |
|
"eval_steps_per_second": 0.276, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 4.61831521987915, |
|
"learning_rate": 1.8102658807457817e-05, |
|
"loss": 0.3108, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_LOC_f1": 0.8077224388355109, |
|
"eval_ORG_f1": 0.6909212129572154, |
|
"eval_PER_f1": 0.8192221560207918, |
|
"eval_loss": 0.3174687623977661, |
|
"eval_overall_accuracy": 0.8962873233880165, |
|
"eval_overall_f1": 0.7771138787987528, |
|
"eval_overall_precision": 0.7788884364820847, |
|
"eval_overall_recall": 0.7753473887579121, |
|
"eval_runtime": 989.2234, |
|
"eval_samples_per_second": 66.416, |
|
"eval_steps_per_second": 0.26, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 12.568930625915527, |
|
"learning_rate": 1.7976169394621674e-05, |
|
"loss": 0.3071, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_LOC_f1": 0.794363707776905, |
|
"eval_ORG_f1": 0.6988674651653153, |
|
"eval_PER_f1": 0.830271581375005, |
|
"eval_loss": 0.32856282591819763, |
|
"eval_overall_accuracy": 0.8977907538395009, |
|
"eval_overall_f1": 0.7767913544942637, |
|
"eval_overall_precision": 0.7810995225703338, |
|
"eval_overall_recall": 0.772530449291712, |
|
"eval_runtime": 943.0369, |
|
"eval_samples_per_second": 69.669, |
|
"eval_steps_per_second": 0.273, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 5.375293731689453, |
|
"learning_rate": 1.7849679981785527e-05, |
|
"loss": 0.294, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_LOC_f1": 0.8179866156103497, |
|
"eval_ORG_f1": 0.7139503216415458, |
|
"eval_PER_f1": 0.8319541693189051, |
|
"eval_loss": 0.3004244267940521, |
|
"eval_overall_accuracy": 0.9056726997766982, |
|
"eval_overall_f1": 0.7925003906754269, |
|
"eval_overall_precision": 0.7971147209370665, |
|
"eval_overall_recall": 0.7879391757243317, |
|
"eval_runtime": 922.5388, |
|
"eval_samples_per_second": 71.217, |
|
"eval_steps_per_second": 0.279, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 15.245515823364258, |
|
"learning_rate": 1.772319056894938e-05, |
|
"loss": 0.3022, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_LOC_f1": 0.8157123906017681, |
|
"eval_ORG_f1": 0.7098208686331968, |
|
"eval_PER_f1": 0.8456405034257072, |
|
"eval_loss": 0.30179163813591003, |
|
"eval_overall_accuracy": 0.9067740352882565, |
|
"eval_overall_f1": 0.7949277311259202, |
|
"eval_overall_precision": 0.7816879334439119, |
|
"eval_overall_recall": 0.808623752811873, |
|
"eval_runtime": 877.0088, |
|
"eval_samples_per_second": 74.914, |
|
"eval_steps_per_second": 0.293, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 27.571258544921875, |
|
"learning_rate": 1.7596701156113234e-05, |
|
"loss": 0.2932, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_LOC_f1": 0.812069319938867, |
|
"eval_ORG_f1": 0.711144363521586, |
|
"eval_PER_f1": 0.8502128757579667, |
|
"eval_loss": 0.2992981970310211, |
|
"eval_overall_accuracy": 0.9061335916182172, |
|
"eval_overall_f1": 0.7960065863825535, |
|
"eval_overall_precision": 0.7903741392400006, |
|
"eval_overall_recall": 0.8017198867820013, |
|
"eval_runtime": 877.5146, |
|
"eval_samples_per_second": 74.871, |
|
"eval_steps_per_second": 0.293, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 6.182469844818115, |
|
"learning_rate": 1.747021174327709e-05, |
|
"loss": 0.2943, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_LOC_f1": 0.8233705131530693, |
|
"eval_ORG_f1": 0.7053565992474702, |
|
"eval_PER_f1": 0.8430738573647348, |
|
"eval_loss": 0.28671061992645264, |
|
"eval_overall_accuracy": 0.9084127618358795, |
|
"eval_overall_f1": 0.7974687389257328, |
|
"eval_overall_precision": 0.7968528685705035, |
|
"eval_overall_recall": 0.7980855620030669, |
|
"eval_runtime": 876.9521, |
|
"eval_samples_per_second": 74.919, |
|
"eval_steps_per_second": 0.293, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 37.97357177734375, |
|
"learning_rate": 1.7343722330440944e-05, |
|
"loss": 0.2782, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_LOC_f1": 0.8330484744899815, |
|
"eval_ORG_f1": 0.7210407632263661, |
|
"eval_PER_f1": 0.8489616418275103, |
|
"eval_loss": 0.30233901739120483, |
|
"eval_overall_accuracy": 0.906348548032643, |
|
"eval_overall_f1": 0.8060817413887736, |
|
"eval_overall_precision": 0.7987835211557213, |
|
"eval_overall_recall": 0.8135145541872421, |
|
"eval_runtime": 903.2924, |
|
"eval_samples_per_second": 72.734, |
|
"eval_steps_per_second": 0.285, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 3.23641037940979, |
|
"learning_rate": 1.7217232917604798e-05, |
|
"loss": 0.2913, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_LOC_f1": 0.8284559448131656, |
|
"eval_ORG_f1": 0.7247506440319704, |
|
"eval_PER_f1": 0.8467821489168608, |
|
"eval_loss": 0.2910088300704956, |
|
"eval_overall_accuracy": 0.908466500939486, |
|
"eval_overall_f1": 0.8031924311719724, |
|
"eval_overall_precision": 0.7945208195637806, |
|
"eval_overall_recall": 0.8120554200752534, |
|
"eval_runtime": 930.8988, |
|
"eval_samples_per_second": 70.577, |
|
"eval_steps_per_second": 0.276, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 51.232643127441406, |
|
"learning_rate": 1.709074350476865e-05, |
|
"loss": 0.254, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_LOC_f1": 0.8326820729529502, |
|
"eval_ORG_f1": 0.7281753443144438, |
|
"eval_PER_f1": 0.8538723624698639, |
|
"eval_loss": 0.3031412661075592, |
|
"eval_overall_accuracy": 0.9094907050317503, |
|
"eval_overall_f1": 0.8106029688042823, |
|
"eval_overall_precision": 0.8094644661502189, |
|
"eval_overall_recall": 0.8117446785514041, |
|
"eval_runtime": 982.2729, |
|
"eval_samples_per_second": 66.886, |
|
"eval_steps_per_second": 0.262, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 3.0182816982269287, |
|
"learning_rate": 1.6964254091932504e-05, |
|
"loss": 0.2412, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_LOC_f1": 0.8337292382788848, |
|
"eval_ORG_f1": 0.7265721539463927, |
|
"eval_PER_f1": 0.8438998504510589, |
|
"eval_loss": 0.2959749400615692, |
|
"eval_overall_accuracy": 0.9087023207706061, |
|
"eval_overall_f1": 0.8054477820887165, |
|
"eval_overall_precision": 0.7949914132873621, |
|
"eval_overall_recall": 0.8161828781420359, |
|
"eval_runtime": 937.946, |
|
"eval_samples_per_second": 70.047, |
|
"eval_steps_per_second": 0.274, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 4.651257038116455, |
|
"learning_rate": 1.683776467909636e-05, |
|
"loss": 0.2248, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_LOC_f1": 0.8204892221350009, |
|
"eval_ORG_f1": 0.7164497497985908, |
|
"eval_PER_f1": 0.8366153573083787, |
|
"eval_loss": 0.2870059013366699, |
|
"eval_overall_accuracy": 0.9095747909232758, |
|
"eval_overall_f1": 0.7954083144399056, |
|
"eval_overall_precision": 0.7841744564646571, |
|
"eval_overall_recall": 0.806968716434849, |
|
"eval_runtime": 880.5612, |
|
"eval_samples_per_second": 74.612, |
|
"eval_steps_per_second": 0.292, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.5376187562942505, |
|
"learning_rate": 1.6711275266260215e-05, |
|
"loss": 0.2367, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_LOC_f1": 0.8274943290245922, |
|
"eval_ORG_f1": 0.7181801646127961, |
|
"eval_PER_f1": 0.8393770384236834, |
|
"eval_loss": 0.3035840094089508, |
|
"eval_overall_accuracy": 0.9099459068505344, |
|
"eval_overall_f1": 0.7997951699758828, |
|
"eval_overall_precision": 0.7826674511984585, |
|
"eval_overall_recall": 0.8176892990076537, |
|
"eval_runtime": 877.1859, |
|
"eval_samples_per_second": 74.899, |
|
"eval_steps_per_second": 0.293, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 29.850025177001953, |
|
"learning_rate": 1.658478585342407e-05, |
|
"loss": 0.2259, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_LOC_f1": 0.8368737846096861, |
|
"eval_ORG_f1": 0.730566338210411, |
|
"eval_PER_f1": 0.8548625950405009, |
|
"eval_loss": 0.2981078624725342, |
|
"eval_overall_accuracy": 0.912430550111398, |
|
"eval_overall_f1": 0.8107802613802052, |
|
"eval_overall_precision": 0.8028279082088811, |
|
"eval_overall_recall": 0.8188917335999406, |
|
"eval_runtime": 900.9915, |
|
"eval_samples_per_second": 72.92, |
|
"eval_steps_per_second": 0.285, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 6.522253036499023, |
|
"learning_rate": 1.6458296440587925e-05, |
|
"loss": 0.2353, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_LOC_f1": 0.8361146769362431, |
|
"eval_ORG_f1": 0.7397288818401768, |
|
"eval_PER_f1": 0.851691836373991, |
|
"eval_loss": 0.2890784442424774, |
|
"eval_overall_accuracy": 0.9118988490980682, |
|
"eval_overall_f1": 0.813503140265178, |
|
"eval_overall_precision": 0.8080863288253761, |
|
"eval_overall_recall": 0.8189930623577175, |
|
"eval_runtime": 898.5924, |
|
"eval_samples_per_second": 73.114, |
|
"eval_steps_per_second": 0.286, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 18.48634910583496, |
|
"learning_rate": 1.633180702775178e-05, |
|
"loss": 0.231, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_LOC_f1": 0.8399307496708805, |
|
"eval_ORG_f1": 0.7353511607405231, |
|
"eval_PER_f1": 0.8429051875514639, |
|
"eval_loss": 0.29812344908714294, |
|
"eval_overall_accuracy": 0.9100799384971765, |
|
"eval_overall_f1": 0.8109618028412001, |
|
"eval_overall_precision": 0.8090263071203351, |
|
"eval_overall_recall": 0.8129065816405802, |
|
"eval_runtime": 911.3101, |
|
"eval_samples_per_second": 72.094, |
|
"eval_steps_per_second": 0.282, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 6.476167678833008, |
|
"learning_rate": 1.6205317614915632e-05, |
|
"loss": 0.2298, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_LOC_f1": 0.8437642148074813, |
|
"eval_ORG_f1": 0.741920341727885, |
|
"eval_PER_f1": 0.845791168353266, |
|
"eval_loss": 0.2789755165576935, |
|
"eval_overall_accuracy": 0.9128231616800994, |
|
"eval_overall_f1": 0.8156984934617233, |
|
"eval_overall_precision": 0.8007835888891781, |
|
"eval_overall_recall": 0.8311795342930293, |
|
"eval_runtime": 950.013, |
|
"eval_samples_per_second": 69.157, |
|
"eval_steps_per_second": 0.271, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.6922666430473328, |
|
"learning_rate": 1.607882820207949e-05, |
|
"loss": 0.2236, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_LOC_f1": 0.8401312445122232, |
|
"eval_ORG_f1": 0.7412946847115139, |
|
"eval_PER_f1": 0.8599101069965396, |
|
"eval_loss": 0.2861514985561371, |
|
"eval_overall_accuracy": 0.9133093425115507, |
|
"eval_overall_f1": 0.8168471254617229, |
|
"eval_overall_precision": 0.8077931685921699, |
|
"eval_overall_recall": 0.8261063411536617, |
|
"eval_runtime": 964.6377, |
|
"eval_samples_per_second": 68.108, |
|
"eval_steps_per_second": 0.266, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 10.913984298706055, |
|
"learning_rate": 1.5952338789243342e-05, |
|
"loss": 0.2164, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_LOC_f1": 0.8450018789928598, |
|
"eval_ORG_f1": 0.7453389102160086, |
|
"eval_PER_f1": 0.8475419561015748, |
|
"eval_loss": 0.29202836751937866, |
|
"eval_overall_accuracy": 0.9133548626934291, |
|
"eval_overall_f1": 0.8167286457267982, |
|
"eval_overall_precision": 0.8108745156006552, |
|
"eval_overall_recall": 0.8226679186397627, |
|
"eval_runtime": 934.7918, |
|
"eval_samples_per_second": 70.283, |
|
"eval_steps_per_second": 0.275, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 8.604541778564453, |
|
"learning_rate": 1.5825849376407196e-05, |
|
"loss": 0.2343, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_LOC_f1": 0.8327052539148251, |
|
"eval_ORG_f1": 0.7464142820374833, |
|
"eval_PER_f1": 0.8571568569804591, |
|
"eval_loss": 0.26980945467948914, |
|
"eval_overall_accuracy": 0.915141529832157, |
|
"eval_overall_f1": 0.8162628685387808, |
|
"eval_overall_precision": 0.8141182004502234, |
|
"eval_overall_recall": 0.818418866063648, |
|
"eval_runtime": 878.6904, |
|
"eval_samples_per_second": 74.77, |
|
"eval_steps_per_second": 0.292, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 20.011140823364258, |
|
"learning_rate": 1.569935996357105e-05, |
|
"loss": 0.2305, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_LOC_f1": 0.8434370154154885, |
|
"eval_ORG_f1": 0.7450794786844748, |
|
"eval_PER_f1": 0.8598302131901996, |
|
"eval_loss": 0.2736206650733948, |
|
"eval_overall_accuracy": 0.9164160949247526, |
|
"eval_overall_f1": 0.820116525352046, |
|
"eval_overall_precision": 0.8215285544822911, |
|
"eval_overall_recall": 0.818709341835942, |
|
"eval_runtime": 878.0774, |
|
"eval_samples_per_second": 74.823, |
|
"eval_steps_per_second": 0.293, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.8893330693244934, |
|
"learning_rate": 1.5572870550734906e-05, |
|
"loss": 0.218, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_LOC_f1": 0.8372996858861737, |
|
"eval_ORG_f1": 0.7351363688234623, |
|
"eval_PER_f1": 0.8452893909397927, |
|
"eval_loss": 0.277670681476593, |
|
"eval_overall_accuracy": 0.9132897435443531, |
|
"eval_overall_f1": 0.8105032765054125, |
|
"eval_overall_precision": 0.7914432306117588, |
|
"eval_overall_recall": 0.8305040092411827, |
|
"eval_runtime": 878.1965, |
|
"eval_samples_per_second": 74.812, |
|
"eval_steps_per_second": 0.293, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 2.807310104370117, |
|
"learning_rate": 1.544638113789876e-05, |
|
"loss": 0.2209, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_LOC_f1": 0.8436019819082686, |
|
"eval_ORG_f1": 0.7541017701160051, |
|
"eval_PER_f1": 0.8559255699664113, |
|
"eval_loss": 0.2975883483886719, |
|
"eval_overall_accuracy": 0.9155777649084917, |
|
"eval_overall_f1": 0.8217433690792348, |
|
"eval_overall_precision": 0.8178831213153369, |
|
"eval_overall_recall": 0.8256402288678876, |
|
"eval_runtime": 887.7886, |
|
"eval_samples_per_second": 74.004, |
|
"eval_steps_per_second": 0.289, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.7378529906272888, |
|
"learning_rate": 1.5319891725062616e-05, |
|
"loss": 0.2068, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_LOC_f1": 0.8404596277816221, |
|
"eval_ORG_f1": 0.7519756060658962, |
|
"eval_PER_f1": 0.8650594959056045, |
|
"eval_loss": 0.2906901240348816, |
|
"eval_overall_accuracy": 0.9159109473508519, |
|
"eval_overall_f1": 0.8231073274551537, |
|
"eval_overall_precision": 0.8232630746670091, |
|
"eval_overall_recall": 0.8229516391615384, |
|
"eval_runtime": 914.9939, |
|
"eval_samples_per_second": 71.804, |
|
"eval_steps_per_second": 0.281, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 4.775814056396484, |
|
"learning_rate": 1.5193402312226468e-05, |
|
"loss": 0.2222, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_LOC_f1": 0.8486954241510423, |
|
"eval_ORG_f1": 0.7403596163509645, |
|
"eval_PER_f1": 0.8627163820626227, |
|
"eval_loss": 0.2920599579811096, |
|
"eval_overall_accuracy": 0.9144530370812459, |
|
"eval_overall_f1": 0.8204945751023299, |
|
"eval_overall_precision": 0.8079300635190885, |
|
"eval_overall_recall": 0.8334560537177521, |
|
"eval_runtime": 974.0136, |
|
"eval_samples_per_second": 67.453, |
|
"eval_steps_per_second": 0.264, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 2.2256317138671875, |
|
"learning_rate": 1.5066912899390323e-05, |
|
"loss": 0.2328, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_LOC_f1": 0.849334397801749, |
|
"eval_ORG_f1": 0.753236617390506, |
|
"eval_PER_f1": 0.8693573280340553, |
|
"eval_loss": 0.29489845037460327, |
|
"eval_overall_accuracy": 0.917294255100157, |
|
"eval_overall_f1": 0.8276056778793333, |
|
"eval_overall_precision": 0.8335114765330592, |
|
"eval_overall_recall": 0.8217829808218438, |
|
"eval_runtime": 944.9062, |
|
"eval_samples_per_second": 69.531, |
|
"eval_steps_per_second": 0.272, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 16.9512996673584, |
|
"learning_rate": 1.4940423486554176e-05, |
|
"loss": 0.2229, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_LOC_f1": 0.8445889009269291, |
|
"eval_ORG_f1": 0.7452847675981278, |
|
"eval_PER_f1": 0.8508496270046708, |
|
"eval_loss": 0.27226653695106506, |
|
"eval_overall_accuracy": 0.916396495957555, |
|
"eval_overall_f1": 0.818113712374582, |
|
"eval_overall_precision": 0.8101638106341121, |
|
"eval_overall_recall": 0.8262211804124756, |
|
"eval_runtime": 919.492, |
|
"eval_samples_per_second": 71.452, |
|
"eval_steps_per_second": 0.28, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 0.6652330160140991, |
|
"learning_rate": 1.481393407371803e-05, |
|
"loss": 0.2219, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_LOC_f1": 0.8483445744353834, |
|
"eval_ORG_f1": 0.7490041659533185, |
|
"eval_PER_f1": 0.865145374272123, |
|
"eval_loss": 0.2795349359512329, |
|
"eval_overall_accuracy": 0.9183601860258099, |
|
"eval_overall_f1": 0.8247159453593771, |
|
"eval_overall_precision": 0.8203393908609086, |
|
"eval_overall_recall": 0.8291394486364527, |
|
"eval_runtime": 878.0472, |
|
"eval_samples_per_second": 74.825, |
|
"eval_steps_per_second": 0.293, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 8.537057876586914, |
|
"learning_rate": 1.4687444660881885e-05, |
|
"loss": 0.2265, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_LOC_f1": 0.8431806420528559, |
|
"eval_ORG_f1": 0.7481651106805237, |
|
"eval_PER_f1": 0.8624314888139032, |
|
"eval_loss": 0.2695271372795105, |
|
"eval_overall_accuracy": 0.9176305986662586, |
|
"eval_overall_f1": 0.8227747180347664, |
|
"eval_overall_precision": 0.8127298369558054, |
|
"eval_overall_recall": 0.8330710044381996, |
|
"eval_runtime": 878.4759, |
|
"eval_samples_per_second": 74.789, |
|
"eval_steps_per_second": 0.293, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 2.5510284900665283, |
|
"learning_rate": 1.456095524804574e-05, |
|
"loss": 0.208, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_LOC_f1": 0.8455445895423215, |
|
"eval_ORG_f1": 0.7595170903640558, |
|
"eval_PER_f1": 0.8683249226114047, |
|
"eval_loss": 0.291418194770813, |
|
"eval_overall_accuracy": 0.9158768072144431, |
|
"eval_overall_f1": 0.828613510075651, |
|
"eval_overall_precision": 0.8212624496539643, |
|
"eval_overall_recall": 0.8360973566704721, |
|
"eval_runtime": 879.88, |
|
"eval_samples_per_second": 74.669, |
|
"eval_steps_per_second": 0.292, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 40.04784393310547, |
|
"learning_rate": 1.4434465835209595e-05, |
|
"loss": 0.2201, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_LOC_f1": 0.8562697361719467, |
|
"eval_ORG_f1": 0.7598152424942263, |
|
"eval_PER_f1": 0.8635555196248839, |
|
"eval_loss": 0.26673147082328796, |
|
"eval_overall_accuracy": 0.9210597857010994, |
|
"eval_overall_f1": 0.8302701818813599, |
|
"eval_overall_precision": 0.8374682152429387, |
|
"eval_overall_recall": 0.8231948281802031, |
|
"eval_runtime": 908.7842, |
|
"eval_samples_per_second": 72.294, |
|
"eval_steps_per_second": 0.283, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 4.13014030456543, |
|
"learning_rate": 1.4307976422373449e-05, |
|
"loss": 0.2065, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_LOC_f1": 0.850374667633551, |
|
"eval_ORG_f1": 0.7572422253856926, |
|
"eval_PER_f1": 0.8662861965717501, |
|
"eval_loss": 0.260405570268631, |
|
"eval_overall_accuracy": 0.9184101317809266, |
|
"eval_overall_f1": 0.8283524878105291, |
|
"eval_overall_precision": 0.835684527492472, |
|
"eval_overall_recall": 0.821147987273108, |
|
"eval_runtime": 930.7338, |
|
"eval_samples_per_second": 70.589, |
|
"eval_steps_per_second": 0.276, |
|
"step": 45000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 158116, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 7, |
|
"save_steps": 500, |
|
"total_flos": 1.4980271080173432e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|