|
{ |
|
"best_metric": 0.1913878321647644, |
|
"best_model_checkpoint": "./ryan03312024_lr_2e-5_wd_001_v2/checkpoint-2800", |
|
"epoch": 2.0, |
|
"eval_steps": 100, |
|
"global_step": 5546, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6066028475761414, |
|
"learning_rate": 1.9909844933285253e-05, |
|
"loss": 0.571, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.938056468963623, |
|
"learning_rate": 1.9819689866570503e-05, |
|
"loss": 0.4613, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.0178314447402954, |
|
"learning_rate": 1.9729534799855754e-05, |
|
"loss": 0.4095, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.4264367818832397, |
|
"learning_rate": 1.9639379733141005e-05, |
|
"loss": 0.4426, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 0.37066397070884705, |
|
"eval_na_accuracy": 0.80756014585495, |
|
"eval_ordinal_accuracy": 0.3408682346343994, |
|
"eval_ordinal_mae": 0.8707404732704163, |
|
"eval_runtime": 343.5137, |
|
"eval_samples_per_second": 13.027, |
|
"eval_steps_per_second": 1.63, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.495741069316864, |
|
"learning_rate": 1.9549224666426253e-05, |
|
"loss": 0.3942, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6811705827713013, |
|
"learning_rate": 1.9459069599711507e-05, |
|
"loss": 0.3667, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.048833966255188, |
|
"learning_rate": 1.9368914532996758e-05, |
|
"loss": 0.3792, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.517120897769928, |
|
"learning_rate": 1.9278759466282005e-05, |
|
"loss": 0.3133, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.3202913999557495, |
|
"eval_na_accuracy": 0.7749140858650208, |
|
"eval_ordinal_accuracy": 0.4300025701522827, |
|
"eval_ordinal_mae": 0.8544500470161438, |
|
"eval_runtime": 201.5044, |
|
"eval_samples_per_second": 22.208, |
|
"eval_steps_per_second": 2.779, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.3575235605239868, |
|
"learning_rate": 1.9188604399567256e-05, |
|
"loss": 0.3491, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.0482808351516724, |
|
"learning_rate": 1.909844933285251e-05, |
|
"loss": 0.3655, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.861924409866333, |
|
"learning_rate": 1.9008294266137758e-05, |
|
"loss": 0.3141, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.2723792791366577, |
|
"learning_rate": 1.891813919942301e-05, |
|
"loss": 0.3349, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 0.2997085452079773, |
|
"eval_na_accuracy": 0.8419243693351746, |
|
"eval_ordinal_accuracy": 0.4592858850955963, |
|
"eval_ordinal_mae": 0.8338660001754761, |
|
"eval_runtime": 195.4794, |
|
"eval_samples_per_second": 22.892, |
|
"eval_steps_per_second": 2.865, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.6588447690010071, |
|
"learning_rate": 1.882798413270826e-05, |
|
"loss": 0.2944, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.5759406089782715, |
|
"learning_rate": 1.873782906599351e-05, |
|
"loss": 0.2992, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.21856403350830078, |
|
"learning_rate": 1.864767399927876e-05, |
|
"loss": 0.305, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 1.3187426328659058, |
|
"learning_rate": 1.8557518932564012e-05, |
|
"loss": 0.3173, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.28696730732917786, |
|
"eval_na_accuracy": 0.8659793734550476, |
|
"eval_ordinal_accuracy": 0.48189055919647217, |
|
"eval_ordinal_mae": 0.7992547750473022, |
|
"eval_runtime": 196.5851, |
|
"eval_samples_per_second": 22.764, |
|
"eval_steps_per_second": 2.849, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.22658109664917, |
|
"learning_rate": 1.846736386584926e-05, |
|
"loss": 0.2952, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 2.2546815872192383, |
|
"learning_rate": 1.8377208799134514e-05, |
|
"loss": 0.3037, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.8508079051971436, |
|
"learning_rate": 1.8287053732419765e-05, |
|
"loss": 0.3114, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.2727103233337402, |
|
"learning_rate": 1.8196898665705013e-05, |
|
"loss": 0.2946, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 0.2856302559375763, |
|
"eval_na_accuracy": 0.8934707641601562, |
|
"eval_ordinal_accuracy": 0.5111739039421082, |
|
"eval_ordinal_mae": 0.7690269351005554, |
|
"eval_runtime": 195.0911, |
|
"eval_samples_per_second": 22.938, |
|
"eval_steps_per_second": 2.87, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 1.6682345867156982, |
|
"learning_rate": 1.8106743598990263e-05, |
|
"loss": 0.301, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.0263928174972534, |
|
"learning_rate": 1.8016588532275518e-05, |
|
"loss": 0.2746, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3150116503238678, |
|
"learning_rate": 1.7926433465560765e-05, |
|
"loss": 0.2676, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.7826778888702393, |
|
"learning_rate": 1.7836278398846016e-05, |
|
"loss": 0.3002, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.2724498510360718, |
|
"eval_na_accuracy": 0.9209622144699097, |
|
"eval_ordinal_accuracy": 0.5345491766929626, |
|
"eval_ordinal_mae": 0.7232748866081238, |
|
"eval_runtime": 200.1722, |
|
"eval_samples_per_second": 22.356, |
|
"eval_steps_per_second": 2.798, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.7353246808052063, |
|
"learning_rate": 1.7746123332131267e-05, |
|
"loss": 0.303, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.5406638383865356, |
|
"learning_rate": 1.7655968265416518e-05, |
|
"loss": 0.2909, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.443584680557251, |
|
"learning_rate": 1.756581319870177e-05, |
|
"loss": 0.2512, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.028228521347046, |
|
"learning_rate": 1.747565813198702e-05, |
|
"loss": 0.2817, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 0.26569074392318726, |
|
"eval_na_accuracy": 0.8625429272651672, |
|
"eval_ordinal_accuracy": 0.5566401481628418, |
|
"eval_ordinal_mae": 0.6927691102027893, |
|
"eval_runtime": 201.4281, |
|
"eval_samples_per_second": 22.216, |
|
"eval_steps_per_second": 2.78, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.40620824694633484, |
|
"learning_rate": 1.738550306527227e-05, |
|
"loss": 0.2732, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 1.9016238451004028, |
|
"learning_rate": 1.729534799855752e-05, |
|
"loss": 0.2552, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.003732919692993, |
|
"learning_rate": 1.7205192931842772e-05, |
|
"loss": 0.2538, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.751253604888916, |
|
"learning_rate": 1.711503786512802e-05, |
|
"loss": 0.2939, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.25960293412208557, |
|
"eval_na_accuracy": 0.7920961976051331, |
|
"eval_ordinal_accuracy": 0.5861803293228149, |
|
"eval_ordinal_mae": 0.6425440907478333, |
|
"eval_runtime": 200.1318, |
|
"eval_samples_per_second": 22.36, |
|
"eval_steps_per_second": 2.798, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.241727113723755, |
|
"learning_rate": 1.702488279841327e-05, |
|
"loss": 0.2659, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.8341929912567139, |
|
"learning_rate": 1.6938333934367113e-05, |
|
"loss": 0.2654, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.8499948978424072, |
|
"learning_rate": 1.6848178867652364e-05, |
|
"loss": 0.2774, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 1.8184664249420166, |
|
"learning_rate": 1.6758023800937615e-05, |
|
"loss": 0.2525, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.24585017561912537, |
|
"eval_na_accuracy": 0.8264604806900024, |
|
"eval_ordinal_accuracy": 0.6046750545501709, |
|
"eval_ordinal_mae": 0.6053154468536377, |
|
"eval_runtime": 203.517, |
|
"eval_samples_per_second": 21.988, |
|
"eval_steps_per_second": 2.752, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 1.131605625152588, |
|
"learning_rate": 1.6667868734222866e-05, |
|
"loss": 0.2701, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.05479097366333, |
|
"learning_rate": 1.6577713667508117e-05, |
|
"loss": 0.2956, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 1.3499815464019775, |
|
"learning_rate": 1.6487558600793364e-05, |
|
"loss": 0.2446, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.7456974983215332, |
|
"learning_rate": 1.639740353407862e-05, |
|
"loss": 0.2163, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.2399808019399643, |
|
"eval_na_accuracy": 0.8109965920448303, |
|
"eval_ordinal_accuracy": 0.624454140663147, |
|
"eval_ordinal_mae": 0.5777344703674316, |
|
"eval_runtime": 217.2069, |
|
"eval_samples_per_second": 20.602, |
|
"eval_steps_per_second": 2.578, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 1.0125823020935059, |
|
"learning_rate": 1.6307248467363866e-05, |
|
"loss": 0.2168, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 1.9579060077667236, |
|
"learning_rate": 1.6217093400649117e-05, |
|
"loss": 0.2364, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.975767135620117, |
|
"learning_rate": 1.6126938333934368e-05, |
|
"loss": 0.2716, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.9766530990600586, |
|
"learning_rate": 1.603678326721962e-05, |
|
"loss": 0.2181, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.23391272127628326, |
|
"eval_na_accuracy": 0.876288652420044, |
|
"eval_ordinal_accuracy": 0.6023632287979126, |
|
"eval_ordinal_mae": 0.5429691672325134, |
|
"eval_runtime": 209.4151, |
|
"eval_samples_per_second": 21.369, |
|
"eval_steps_per_second": 2.674, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.6088442802429199, |
|
"learning_rate": 1.594662820050487e-05, |
|
"loss": 0.2128, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3461017906665802, |
|
"learning_rate": 1.585647313379012e-05, |
|
"loss": 0.2145, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.341361165046692, |
|
"learning_rate": 1.576631806707537e-05, |
|
"loss": 0.2554, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.5675370693206787, |
|
"learning_rate": 1.5676163000360622e-05, |
|
"loss": 0.1949, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.23308563232421875, |
|
"eval_na_accuracy": 0.7955326437950134, |
|
"eval_ordinal_accuracy": 0.6285640597343445, |
|
"eval_ordinal_mae": 0.5328579545021057, |
|
"eval_runtime": 209.3844, |
|
"eval_samples_per_second": 21.372, |
|
"eval_steps_per_second": 2.675, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.334373712539673, |
|
"learning_rate": 1.5586007933645873e-05, |
|
"loss": 0.2739, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.5421320199966431, |
|
"learning_rate": 1.5495852866931124e-05, |
|
"loss": 0.2294, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.599592924118042, |
|
"learning_rate": 1.540569780021637e-05, |
|
"loss": 0.255, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.6286499500274658, |
|
"learning_rate": 1.5315542733501626e-05, |
|
"loss": 0.214, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.24238631129264832, |
|
"eval_na_accuracy": 0.7628865838050842, |
|
"eval_ordinal_accuracy": 0.6182892322540283, |
|
"eval_ordinal_mae": 0.5243560671806335, |
|
"eval_runtime": 212.87, |
|
"eval_samples_per_second": 21.022, |
|
"eval_steps_per_second": 2.631, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.9406815767288208, |
|
"learning_rate": 1.5225387666786875e-05, |
|
"loss": 0.2225, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 3.733520269393921, |
|
"learning_rate": 1.5135232600072126e-05, |
|
"loss": 0.1918, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.8792308568954468, |
|
"learning_rate": 1.5045077533357375e-05, |
|
"loss": 0.2179, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.1207715272903442, |
|
"learning_rate": 1.4954922466642628e-05, |
|
"loss": 0.27, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.22981737554073334, |
|
"eval_na_accuracy": 0.7869415879249573, |
|
"eval_ordinal_accuracy": 0.6367839574813843, |
|
"eval_ordinal_mae": 0.4994910955429077, |
|
"eval_runtime": 212.2299, |
|
"eval_samples_per_second": 21.086, |
|
"eval_steps_per_second": 2.639, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.47230863571167, |
|
"learning_rate": 1.4864767399927877e-05, |
|
"loss": 0.2348, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.015068769454956, |
|
"learning_rate": 1.4774612333213128e-05, |
|
"loss": 0.2593, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 1.7957515716552734, |
|
"learning_rate": 1.4684457266498377e-05, |
|
"loss": 0.2049, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 1.9824727773666382, |
|
"learning_rate": 1.459430219978363e-05, |
|
"loss": 0.2117, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 0.2300846129655838, |
|
"eval_na_accuracy": 0.7783505320549011, |
|
"eval_ordinal_accuracy": 0.6473156809806824, |
|
"eval_ordinal_mae": 0.4949534237384796, |
|
"eval_runtime": 210.619, |
|
"eval_samples_per_second": 21.247, |
|
"eval_steps_per_second": 2.659, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.1805070638656616, |
|
"learning_rate": 1.450414713306888e-05, |
|
"loss": 0.189, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 1.2057225704193115, |
|
"learning_rate": 1.441399206635413e-05, |
|
"loss": 0.2254, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.1214776039123535, |
|
"learning_rate": 1.432383699963938e-05, |
|
"loss": 0.2286, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.340480089187622, |
|
"learning_rate": 1.4233681932924633e-05, |
|
"loss": 0.2038, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.21561825275421143, |
|
"eval_na_accuracy": 0.8367697596549988, |
|
"eval_ordinal_accuracy": 0.6550218462944031, |
|
"eval_ordinal_mae": 0.48989665508270264, |
|
"eval_runtime": 207.7331, |
|
"eval_samples_per_second": 21.542, |
|
"eval_steps_per_second": 2.696, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.1810505390167236, |
|
"learning_rate": 1.4143526866209882e-05, |
|
"loss": 0.2223, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.7431559562683105, |
|
"learning_rate": 1.4053371799495133e-05, |
|
"loss": 0.2644, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.8831206560134888, |
|
"learning_rate": 1.3963216732780382e-05, |
|
"loss": 0.2589, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.4183125495910645, |
|
"learning_rate": 1.3873061666065635e-05, |
|
"loss": 0.1974, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 0.22116971015930176, |
|
"eval_na_accuracy": 0.8281787037849426, |
|
"eval_ordinal_accuracy": 0.6347290277481079, |
|
"eval_ordinal_mae": 0.4638911485671997, |
|
"eval_runtime": 207.3318, |
|
"eval_samples_per_second": 21.584, |
|
"eval_steps_per_second": 2.701, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.318037986755371, |
|
"learning_rate": 1.3782906599350886e-05, |
|
"loss": 0.2456, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.9031999111175537, |
|
"learning_rate": 1.3692751532636135e-05, |
|
"loss": 0.2208, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.9724792838096619, |
|
"learning_rate": 1.3602596465921386e-05, |
|
"loss": 0.1694, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.8028815984725952, |
|
"learning_rate": 1.3512441399206637e-05, |
|
"loss": 0.1916, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.2150656282901764, |
|
"eval_na_accuracy": 0.8797250986099243, |
|
"eval_ordinal_accuracy": 0.6439763903617859, |
|
"eval_ordinal_mae": 0.4789562523365021, |
|
"eval_runtime": 211.6606, |
|
"eval_samples_per_second": 21.142, |
|
"eval_steps_per_second": 2.646, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 5.7413434982299805, |
|
"learning_rate": 1.3422286332491887e-05, |
|
"loss": 0.2097, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.586044192314148, |
|
"learning_rate": 1.3332131265777137e-05, |
|
"loss": 0.1926, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 3.0166969299316406, |
|
"learning_rate": 1.3241976199062388e-05, |
|
"loss": 0.1901, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.5749481916427612, |
|
"learning_rate": 1.315182113234764e-05, |
|
"loss": 0.1921, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.20500315725803375, |
|
"eval_na_accuracy": 0.8728522062301636, |
|
"eval_ordinal_accuracy": 0.6609298586845398, |
|
"eval_ordinal_mae": 0.4614226222038269, |
|
"eval_runtime": 179.4835, |
|
"eval_samples_per_second": 24.933, |
|
"eval_steps_per_second": 3.12, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.0241420269012451, |
|
"learning_rate": 1.306166606563289e-05, |
|
"loss": 0.2312, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.6365127563476562, |
|
"learning_rate": 1.2975117201586732e-05, |
|
"loss": 0.2358, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.4451234340667725, |
|
"learning_rate": 1.2884962134871981e-05, |
|
"loss": 0.2349, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.9995049238204956, |
|
"learning_rate": 1.2794807068157232e-05, |
|
"loss": 0.1936, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.20607997477054596, |
|
"eval_na_accuracy": 0.8573883175849915, |
|
"eval_ordinal_accuracy": 0.6496275663375854, |
|
"eval_ordinal_mae": 0.4565940797328949, |
|
"eval_runtime": 175.1471, |
|
"eval_samples_per_second": 25.55, |
|
"eval_steps_per_second": 3.197, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 5.394123554229736, |
|
"learning_rate": 1.2704652001442481e-05, |
|
"loss": 0.2187, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.7290520668029785, |
|
"learning_rate": 1.2614496934727734e-05, |
|
"loss": 0.2144, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.8023579120635986, |
|
"learning_rate": 1.2524341868012983e-05, |
|
"loss": 0.1749, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 2.040255308151245, |
|
"learning_rate": 1.2434186801298234e-05, |
|
"loss": 0.1939, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.22944368422031403, |
|
"eval_na_accuracy": 0.9089347124099731, |
|
"eval_ordinal_accuracy": 0.6362702250480652, |
|
"eval_ordinal_mae": 0.4656565487384796, |
|
"eval_runtime": 176.4408, |
|
"eval_samples_per_second": 25.363, |
|
"eval_steps_per_second": 3.174, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 1.4762495756149292, |
|
"learning_rate": 1.2344031734583483e-05, |
|
"loss": 0.2211, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 3.6306161880493164, |
|
"learning_rate": 1.2253876667868735e-05, |
|
"loss": 0.2548, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.6580469608306885, |
|
"learning_rate": 1.2163721601153986e-05, |
|
"loss": 0.211, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.7241005897521973, |
|
"learning_rate": 1.2073566534439236e-05, |
|
"loss": 0.257, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.20536109805107117, |
|
"eval_na_accuracy": 0.8608247637748718, |
|
"eval_ordinal_accuracy": 0.6527100205421448, |
|
"eval_ordinal_mae": 0.45671576261520386, |
|
"eval_runtime": 176.5474, |
|
"eval_samples_per_second": 25.347, |
|
"eval_steps_per_second": 3.172, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.5323662757873535, |
|
"learning_rate": 1.1983411467724486e-05, |
|
"loss": 0.176, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 5.30660343170166, |
|
"learning_rate": 1.1893256401009739e-05, |
|
"loss": 0.195, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.7983808517456055, |
|
"learning_rate": 1.1803101334294988e-05, |
|
"loss": 0.1746, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.6054279804229736, |
|
"learning_rate": 1.1712946267580239e-05, |
|
"loss": 0.2236, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.2043941468000412, |
|
"eval_na_accuracy": 0.876288652420044, |
|
"eval_ordinal_accuracy": 0.6640123128890991, |
|
"eval_ordinal_mae": 0.45420947670936584, |
|
"eval_runtime": 176.816, |
|
"eval_samples_per_second": 25.309, |
|
"eval_steps_per_second": 3.167, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 1.1567206382751465, |
|
"learning_rate": 1.1622791200865488e-05, |
|
"loss": 0.2479, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 5.52665376663208, |
|
"learning_rate": 1.1532636134150741e-05, |
|
"loss": 0.2271, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.9272255897521973, |
|
"learning_rate": 1.1442481067435992e-05, |
|
"loss": 0.2303, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.266409397125244, |
|
"learning_rate": 1.1352326000721241e-05, |
|
"loss": 0.1925, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.20849426090717316, |
|
"eval_na_accuracy": 0.80756014585495, |
|
"eval_ordinal_accuracy": 0.6886719465255737, |
|
"eval_ordinal_mae": 0.44630834460258484, |
|
"eval_runtime": 175.9296, |
|
"eval_samples_per_second": 25.436, |
|
"eval_steps_per_second": 3.183, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.3057689666748047, |
|
"learning_rate": 1.1262170934006492e-05, |
|
"loss": 0.2049, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.8032755851745605, |
|
"learning_rate": 1.1172015867291743e-05, |
|
"loss": 0.1909, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.4314826428890228, |
|
"learning_rate": 1.1081860800576994e-05, |
|
"loss": 0.1825, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.1127185821533203, |
|
"learning_rate": 1.0991705733862243e-05, |
|
"loss": 0.1657, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 0.2033829689025879, |
|
"eval_na_accuracy": 0.8522336483001709, |
|
"eval_ordinal_accuracy": 0.6768559217453003, |
|
"eval_ordinal_mae": 0.4391731023788452, |
|
"eval_runtime": 174.624, |
|
"eval_samples_per_second": 25.626, |
|
"eval_steps_per_second": 3.207, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 4.6838459968566895, |
|
"learning_rate": 1.0901550667147494e-05, |
|
"loss": 0.2397, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.440084218978882, |
|
"learning_rate": 1.0811395600432746e-05, |
|
"loss": 0.1921, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.5833593606948853, |
|
"learning_rate": 1.0721240533717995e-05, |
|
"loss": 0.188, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.1624417304992676, |
|
"learning_rate": 1.0631085467003246e-05, |
|
"loss": 0.1723, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 0.1956964135169983, |
|
"eval_na_accuracy": 0.838487982749939, |
|
"eval_ordinal_accuracy": 0.6755715608596802, |
|
"eval_ordinal_mae": 0.42565402388572693, |
|
"eval_runtime": 175.3137, |
|
"eval_samples_per_second": 25.526, |
|
"eval_steps_per_second": 3.194, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.9376819133758545, |
|
"learning_rate": 1.0540930400288496e-05, |
|
"loss": 0.1889, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.2914035320281982, |
|
"learning_rate": 1.0450775333573748e-05, |
|
"loss": 0.2078, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.2271968126296997, |
|
"learning_rate": 1.0360620266858999e-05, |
|
"loss": 0.2247, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 2.135350227355957, |
|
"learning_rate": 1.0270465200144248e-05, |
|
"loss": 0.2279, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.19462928175926208, |
|
"eval_na_accuracy": 0.8642611503601074, |
|
"eval_ordinal_accuracy": 0.6740303039550781, |
|
"eval_ordinal_mae": 0.4287233054637909, |
|
"eval_runtime": 175.4222, |
|
"eval_samples_per_second": 25.51, |
|
"eval_steps_per_second": 3.192, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.897443413734436, |
|
"learning_rate": 1.0180310133429499e-05, |
|
"loss": 0.1672, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.9568222761154175, |
|
"learning_rate": 1.009015506671475e-05, |
|
"loss": 0.1597, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.222680926322937, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2093, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 1.0666455030441284, |
|
"learning_rate": 9.909844933285252e-06, |
|
"loss": 0.1421, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.1913878321647644, |
|
"eval_na_accuracy": 0.8505154848098755, |
|
"eval_ordinal_accuracy": 0.6843051910400391, |
|
"eval_ordinal_mae": 0.41984713077545166, |
|
"eval_runtime": 177.3377, |
|
"eval_samples_per_second": 25.234, |
|
"eval_steps_per_second": 3.158, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.9463332295417786, |
|
"learning_rate": 9.819689866570503e-06, |
|
"loss": 0.1563, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.9075057506561279, |
|
"learning_rate": 9.729534799855753e-06, |
|
"loss": 0.1411, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 3.121269702911377, |
|
"learning_rate": 9.639379733141003e-06, |
|
"loss": 0.1368, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.5694425702095032, |
|
"learning_rate": 9.549224666426255e-06, |
|
"loss": 0.1116, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.20190554857254028, |
|
"eval_na_accuracy": 0.8230240345001221, |
|
"eval_ordinal_accuracy": 0.6704341173171997, |
|
"eval_ordinal_mae": 0.4214249551296234, |
|
"eval_runtime": 175.5908, |
|
"eval_samples_per_second": 25.485, |
|
"eval_steps_per_second": 3.189, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 4.727107048034668, |
|
"learning_rate": 9.459069599711504e-06, |
|
"loss": 0.1485, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.6293134689331055, |
|
"learning_rate": 9.368914532996755e-06, |
|
"loss": 0.116, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.7508574724197388, |
|
"learning_rate": 9.278759466282006e-06, |
|
"loss": 0.1656, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.7475106120109558, |
|
"learning_rate": 9.188604399567257e-06, |
|
"loss": 0.1194, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 0.19536536931991577, |
|
"eval_na_accuracy": 0.8367697596549988, |
|
"eval_ordinal_accuracy": 0.6807089447975159, |
|
"eval_ordinal_mae": 0.41778308153152466, |
|
"eval_runtime": 175.6051, |
|
"eval_samples_per_second": 25.483, |
|
"eval_steps_per_second": 3.189, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.6796592473983765, |
|
"learning_rate": 9.098449332852506e-06, |
|
"loss": 0.1483, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 1.7480669021606445, |
|
"learning_rate": 9.008294266137759e-06, |
|
"loss": 0.1083, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.7591541409492493, |
|
"learning_rate": 8.918139199423008e-06, |
|
"loss": 0.1463, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.8966888189315796, |
|
"learning_rate": 8.827984132708259e-06, |
|
"loss": 0.1312, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 0.19300740957260132, |
|
"eval_na_accuracy": 0.8591065406799316, |
|
"eval_ordinal_accuracy": 0.6873876452445984, |
|
"eval_ordinal_mae": 0.4165719449520111, |
|
"eval_runtime": 179.7716, |
|
"eval_samples_per_second": 24.893, |
|
"eval_steps_per_second": 3.115, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 1.1078789234161377, |
|
"learning_rate": 8.73782906599351e-06, |
|
"loss": 0.154, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 4.303710460662842, |
|
"learning_rate": 8.64767399927876e-06, |
|
"loss": 0.1328, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.9769502282142639, |
|
"learning_rate": 8.55751893256401e-06, |
|
"loss": 0.1253, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 2.734457492828369, |
|
"learning_rate": 8.467363865849262e-06, |
|
"loss": 0.1836, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.19890539348125458, |
|
"eval_na_accuracy": 0.8642611503601074, |
|
"eval_ordinal_accuracy": 0.6794245839118958, |
|
"eval_ordinal_mae": 0.4106903374195099, |
|
"eval_runtime": 173.0922, |
|
"eval_samples_per_second": 25.853, |
|
"eval_steps_per_second": 3.235, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 1.8836966753005981, |
|
"learning_rate": 8.377208799134512e-06, |
|
"loss": 0.1168, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 1.8701483011245728, |
|
"learning_rate": 8.287053732419763e-06, |
|
"loss": 0.1184, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 1.9784027338027954, |
|
"learning_rate": 8.196898665705013e-06, |
|
"loss": 0.1085, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 3.046462059020996, |
|
"learning_rate": 8.106743598990264e-06, |
|
"loss": 0.1282, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.195108100771904, |
|
"eval_na_accuracy": 0.8539518713951111, |
|
"eval_ordinal_accuracy": 0.6971487402915955, |
|
"eval_ordinal_mae": 0.412724107503891, |
|
"eval_runtime": 175.5444, |
|
"eval_samples_per_second": 25.492, |
|
"eval_steps_per_second": 3.19, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 1.4622838497161865, |
|
"learning_rate": 8.016588532275515e-06, |
|
"loss": 0.1428, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.1039246320724487, |
|
"learning_rate": 7.926433465560766e-06, |
|
"loss": 0.1027, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 1.006919503211975, |
|
"learning_rate": 7.836278398846015e-06, |
|
"loss": 0.1545, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.9952992796897888, |
|
"learning_rate": 7.746123332131266e-06, |
|
"loss": 0.1406, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.19589702785015106, |
|
"eval_na_accuracy": 0.8505154848098755, |
|
"eval_ordinal_accuracy": 0.6974055767059326, |
|
"eval_ordinal_mae": 0.4036160707473755, |
|
"eval_runtime": 176.1463, |
|
"eval_samples_per_second": 25.405, |
|
"eval_steps_per_second": 3.179, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.206535577774048, |
|
"learning_rate": 7.655968265416517e-06, |
|
"loss": 0.1523, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 2.8195924758911133, |
|
"learning_rate": 7.565813198701768e-06, |
|
"loss": 0.1357, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.383612871170044, |
|
"learning_rate": 7.475658131987018e-06, |
|
"loss": 0.1262, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 1.5167522430419922, |
|
"learning_rate": 7.385503065272269e-06, |
|
"loss": 0.0929, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.19685061275959015, |
|
"eval_na_accuracy": 0.8453608155250549, |
|
"eval_ordinal_accuracy": 0.6976624727249146, |
|
"eval_ordinal_mae": 0.4020155370235443, |
|
"eval_runtime": 170.2776, |
|
"eval_samples_per_second": 26.281, |
|
"eval_steps_per_second": 3.289, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.9208484888076782, |
|
"learning_rate": 7.295347998557519e-06, |
|
"loss": 0.1193, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.8087962865829468, |
|
"learning_rate": 7.205192931842771e-06, |
|
"loss": 0.1109, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 1.2812789678573608, |
|
"learning_rate": 7.115037865128021e-06, |
|
"loss": 0.1778, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.8966251015663147, |
|
"learning_rate": 7.0248827984132715e-06, |
|
"loss": 0.1135, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 0.19572028517723083, |
|
"eval_na_accuracy": 0.831615149974823, |
|
"eval_ordinal_accuracy": 0.6981762051582336, |
|
"eval_ordinal_mae": 0.4026087522506714, |
|
"eval_runtime": 171.6945, |
|
"eval_samples_per_second": 26.064, |
|
"eval_steps_per_second": 3.262, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.8011260628700256, |
|
"learning_rate": 6.9347277316985216e-06, |
|
"loss": 0.1078, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.5675652623176575, |
|
"learning_rate": 6.844572664983773e-06, |
|
"loss": 0.1114, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.0519170761108398, |
|
"learning_rate": 6.754417598269023e-06, |
|
"loss": 0.1435, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 1.6201542615890503, |
|
"learning_rate": 6.664262531554274e-06, |
|
"loss": 0.1345, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 0.1987382471561432, |
|
"eval_na_accuracy": 0.8814433217048645, |
|
"eval_ordinal_accuracy": 0.6832776665687561, |
|
"eval_ordinal_mae": 0.4107266366481781, |
|
"eval_runtime": 173.6501, |
|
"eval_samples_per_second": 25.77, |
|
"eval_steps_per_second": 3.225, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 8.633369445800781, |
|
"learning_rate": 6.574107464839524e-06, |
|
"loss": 0.174, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 1.3483631610870361, |
|
"learning_rate": 6.483952398124775e-06, |
|
"loss": 0.1104, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 2.3553450107574463, |
|
"learning_rate": 6.393797331410025e-06, |
|
"loss": 0.1401, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 6.273257732391357, |
|
"learning_rate": 6.303642264695277e-06, |
|
"loss": 0.1198, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.19685617089271545, |
|
"eval_na_accuracy": 0.8522336483001709, |
|
"eval_ordinal_accuracy": 0.6992036700248718, |
|
"eval_ordinal_mae": 0.3987964391708374, |
|
"eval_runtime": 176.5744, |
|
"eval_samples_per_second": 25.343, |
|
"eval_steps_per_second": 3.171, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.8371938467025757, |
|
"learning_rate": 6.213487197980527e-06, |
|
"loss": 0.1131, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 4.081541061401367, |
|
"learning_rate": 6.123332131265778e-06, |
|
"loss": 0.1345, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 2.648669958114624, |
|
"learning_rate": 6.033177064551028e-06, |
|
"loss": 0.1376, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.8278887867927551, |
|
"learning_rate": 5.943021997836279e-06, |
|
"loss": 0.1281, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 0.19765153527259827, |
|
"eval_na_accuracy": 0.8402062058448792, |
|
"eval_ordinal_accuracy": 0.6966350078582764, |
|
"eval_ordinal_mae": 0.4065835475921631, |
|
"eval_runtime": 174.9801, |
|
"eval_samples_per_second": 25.574, |
|
"eval_steps_per_second": 3.2, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.9070908427238464, |
|
"learning_rate": 5.852866931121529e-06, |
|
"loss": 0.1278, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.7849236130714417, |
|
"learning_rate": 5.7627118644067805e-06, |
|
"loss": 0.1575, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 2.2364354133605957, |
|
"learning_rate": 5.6761630003606205e-06, |
|
"loss": 0.116, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.8048083782196045, |
|
"learning_rate": 5.586007933645871e-06, |
|
"loss": 0.1153, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.20141063630580902, |
|
"eval_na_accuracy": 0.8436425924301147, |
|
"eval_ordinal_accuracy": 0.693552553653717, |
|
"eval_ordinal_mae": 0.4091172218322754, |
|
"eval_runtime": 175.0141, |
|
"eval_samples_per_second": 25.569, |
|
"eval_steps_per_second": 3.2, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 1.209535002708435, |
|
"learning_rate": 5.495852866931121e-06, |
|
"loss": 0.106, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 3.71276593208313, |
|
"learning_rate": 5.405697800216373e-06, |
|
"loss": 0.1316, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 18.610342025756836, |
|
"learning_rate": 5.315542733501623e-06, |
|
"loss": 0.132, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 2.119901180267334, |
|
"learning_rate": 5.225387666786874e-06, |
|
"loss": 0.1485, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 0.19648495316505432, |
|
"eval_na_accuracy": 0.838487982749939, |
|
"eval_ordinal_accuracy": 0.7038273811340332, |
|
"eval_ordinal_mae": 0.39893215894699097, |
|
"eval_runtime": 174.9887, |
|
"eval_samples_per_second": 25.573, |
|
"eval_steps_per_second": 3.2, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 1.489436388015747, |
|
"learning_rate": 5.135232600072124e-06, |
|
"loss": 0.1158, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 3.2724928855895996, |
|
"learning_rate": 5.045077533357375e-06, |
|
"loss": 0.1098, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 1.0074132680892944, |
|
"learning_rate": 4.954922466642626e-06, |
|
"loss": 0.1094, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 0.7578648924827576, |
|
"learning_rate": 4.864767399927877e-06, |
|
"loss": 0.1292, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.19693247973918915, |
|
"eval_na_accuracy": 0.8470790386199951, |
|
"eval_ordinal_accuracy": 0.7030567526817322, |
|
"eval_ordinal_mae": 0.39781150221824646, |
|
"eval_runtime": 173.0859, |
|
"eval_samples_per_second": 25.854, |
|
"eval_steps_per_second": 3.235, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 3.7381861209869385, |
|
"learning_rate": 4.774612333213128e-06, |
|
"loss": 0.1309, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 0.8151502013206482, |
|
"learning_rate": 4.684457266498378e-06, |
|
"loss": 0.1428, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 0.9021445512771606, |
|
"learning_rate": 4.5943021997836285e-06, |
|
"loss": 0.1104, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 3.8359127044677734, |
|
"learning_rate": 4.5041471330688794e-06, |
|
"loss": 0.1233, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.1989138424396515, |
|
"eval_na_accuracy": 0.8659793734550476, |
|
"eval_ordinal_accuracy": 0.6950937509536743, |
|
"eval_ordinal_mae": 0.39926186203956604, |
|
"eval_runtime": 173.9, |
|
"eval_samples_per_second": 25.733, |
|
"eval_steps_per_second": 3.22, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 2.0737252235412598, |
|
"learning_rate": 4.4139920663541295e-06, |
|
"loss": 0.1343, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"grad_norm": 0.8500555753707886, |
|
"learning_rate": 4.32383699963938e-06, |
|
"loss": 0.1274, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 4.984882354736328, |
|
"learning_rate": 4.233681932924631e-06, |
|
"loss": 0.1131, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 0.8260248899459839, |
|
"learning_rate": 4.143526866209881e-06, |
|
"loss": 0.1128, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.1998443454504013, |
|
"eval_na_accuracy": 0.8522336483001709, |
|
"eval_ordinal_accuracy": 0.6971487402915955, |
|
"eval_ordinal_mae": 0.391990065574646, |
|
"eval_runtime": 175.0388, |
|
"eval_samples_per_second": 25.566, |
|
"eval_steps_per_second": 3.199, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.7347471117973328, |
|
"learning_rate": 4.053371799495132e-06, |
|
"loss": 0.1095, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.6527581810951233, |
|
"learning_rate": 3.963216732780383e-06, |
|
"loss": 0.1627, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 1.0017273426055908, |
|
"learning_rate": 3.873061666065633e-06, |
|
"loss": 0.1047, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"grad_norm": 1.7798997163772583, |
|
"learning_rate": 3.782906599350884e-06, |
|
"loss": 0.0964, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.20046481490135193, |
|
"eval_na_accuracy": 0.8625429272651672, |
|
"eval_ordinal_accuracy": 0.6981762051582336, |
|
"eval_ordinal_mae": 0.39257487654685974, |
|
"eval_runtime": 174.4806, |
|
"eval_samples_per_second": 25.648, |
|
"eval_steps_per_second": 3.21, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.8608025312423706, |
|
"learning_rate": 3.6927515326361344e-06, |
|
"loss": 0.0974, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 0.5464763641357422, |
|
"learning_rate": 3.6025964659213853e-06, |
|
"loss": 0.1224, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 1.151129961013794, |
|
"learning_rate": 3.5124413992066358e-06, |
|
"loss": 0.1514, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 0.5751986503601074, |
|
"learning_rate": 3.4222863324918867e-06, |
|
"loss": 0.1184, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.20076720416545868, |
|
"eval_na_accuracy": 0.8711340427398682, |
|
"eval_ordinal_accuracy": 0.6968918442726135, |
|
"eval_ordinal_mae": 0.3860225975513458, |
|
"eval_runtime": 175.6222, |
|
"eval_samples_per_second": 25.481, |
|
"eval_steps_per_second": 3.189, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.8153491020202637, |
|
"learning_rate": 3.332131265777137e-06, |
|
"loss": 0.1344, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"grad_norm": 1.3030270338058472, |
|
"learning_rate": 3.2419761990623876e-06, |
|
"loss": 0.136, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 6.30877685546875, |
|
"learning_rate": 3.1518211323476385e-06, |
|
"loss": 0.1224, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 4.972422122955322, |
|
"learning_rate": 3.061666065632889e-06, |
|
"loss": 0.108, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 0.19936376810073853, |
|
"eval_na_accuracy": 0.8573883175849915, |
|
"eval_ordinal_accuracy": 0.702029287815094, |
|
"eval_ordinal_mae": 0.39070162177085876, |
|
"eval_runtime": 174.219, |
|
"eval_samples_per_second": 25.686, |
|
"eval_steps_per_second": 3.214, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 1.4122976064682007, |
|
"learning_rate": 2.9715109989181394e-06, |
|
"loss": 0.1496, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 2.4173316955566406, |
|
"learning_rate": 2.8813559322033903e-06, |
|
"loss": 0.1276, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 1.0301971435546875, |
|
"learning_rate": 2.7912008654886407e-06, |
|
"loss": 0.1315, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.8369082808494568, |
|
"learning_rate": 2.701045798773891e-06, |
|
"loss": 0.129, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.19854028522968292, |
|
"eval_na_accuracy": 0.8591065406799316, |
|
"eval_ordinal_accuracy": 0.7033136487007141, |
|
"eval_ordinal_mae": 0.3896414041519165, |
|
"eval_runtime": 176.1539, |
|
"eval_samples_per_second": 25.404, |
|
"eval_steps_per_second": 3.179, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 3.947204113006592, |
|
"learning_rate": 2.610890732059142e-06, |
|
"loss": 0.1106, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 2.09318470954895, |
|
"learning_rate": 2.5207356653443925e-06, |
|
"loss": 0.164, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 0.7661357522010803, |
|
"learning_rate": 2.4305805986296434e-06, |
|
"loss": 0.1214, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 0.4803534746170044, |
|
"learning_rate": 2.340425531914894e-06, |
|
"loss": 0.1396, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 0.19975444674491882, |
|
"eval_na_accuracy": 0.8573883175849915, |
|
"eval_ordinal_accuracy": 0.6984331011772156, |
|
"eval_ordinal_mae": 0.3833589553833008, |
|
"eval_runtime": 176.3204, |
|
"eval_samples_per_second": 25.38, |
|
"eval_steps_per_second": 3.176, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"grad_norm": 0.7548712491989136, |
|
"learning_rate": 2.2502704652001443e-06, |
|
"loss": 0.0977, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 1.906633734703064, |
|
"learning_rate": 2.1601153984853952e-06, |
|
"loss": 0.1385, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 0.6955749988555908, |
|
"learning_rate": 2.0699603317706457e-06, |
|
"loss": 0.1017, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 6.211479663848877, |
|
"learning_rate": 1.979805265055896e-06, |
|
"loss": 0.1323, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.19856412708759308, |
|
"eval_na_accuracy": 0.8453608155250549, |
|
"eval_ordinal_accuracy": 0.7051117420196533, |
|
"eval_ordinal_mae": 0.3843817710876465, |
|
"eval_runtime": 173.9748, |
|
"eval_samples_per_second": 25.722, |
|
"eval_steps_per_second": 3.219, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"grad_norm": 0.5422704219818115, |
|
"learning_rate": 1.889650198341147e-06, |
|
"loss": 0.1367, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 2.782160997390747, |
|
"learning_rate": 1.7994951316263975e-06, |
|
"loss": 0.1002, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 4.336767673492432, |
|
"learning_rate": 1.7093400649116482e-06, |
|
"loss": 0.1407, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 3.1643247604370117, |
|
"learning_rate": 1.6191849981968988e-06, |
|
"loss": 0.1079, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 0.19739079475402832, |
|
"eval_na_accuracy": 0.8402062058448792, |
|
"eval_ordinal_accuracy": 0.7053686380386353, |
|
"eval_ordinal_mae": 0.3832775950431824, |
|
"eval_runtime": 174.8648, |
|
"eval_samples_per_second": 25.591, |
|
"eval_steps_per_second": 3.202, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 5.217372894287109, |
|
"learning_rate": 1.5290299314821493e-06, |
|
"loss": 0.122, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 4.001213073730469, |
|
"learning_rate": 1.4388748647674e-06, |
|
"loss": 0.1102, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 2.418731451034546, |
|
"learning_rate": 1.3487197980526506e-06, |
|
"loss": 0.1117, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.181628704071045, |
|
"learning_rate": 1.2585647313379013e-06, |
|
"loss": 0.0802, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 0.19647686183452606, |
|
"eval_na_accuracy": 0.8487972617149353, |
|
"eval_ordinal_accuracy": 0.7074235677719116, |
|
"eval_ordinal_mae": 0.38217073678970337, |
|
"eval_runtime": 175.4662, |
|
"eval_samples_per_second": 25.503, |
|
"eval_steps_per_second": 3.191, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.654973030090332, |
|
"learning_rate": 1.1684096646231518e-06, |
|
"loss": 0.1139, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 0.66736900806427, |
|
"learning_rate": 1.0782545979084025e-06, |
|
"loss": 0.1099, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.9291324615478516, |
|
"learning_rate": 9.880995311936533e-07, |
|
"loss": 0.1233, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 0.864493191242218, |
|
"learning_rate": 8.979444644789038e-07, |
|
"loss": 0.1391, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 0.19748477637767792, |
|
"eval_na_accuracy": 0.8453608155250549, |
|
"eval_ordinal_accuracy": 0.7051117420196533, |
|
"eval_ordinal_mae": 0.3809069097042084, |
|
"eval_runtime": 174.5451, |
|
"eval_samples_per_second": 25.638, |
|
"eval_steps_per_second": 3.208, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 0.844887912273407, |
|
"learning_rate": 8.077893977641545e-07, |
|
"loss": 0.1364, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.7487301230430603, |
|
"learning_rate": 7.17634331049405e-07, |
|
"loss": 0.1137, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 3.6033549308776855, |
|
"learning_rate": 6.274792643346557e-07, |
|
"loss": 0.1508, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.7312902212142944, |
|
"learning_rate": 5.373241976199063e-07, |
|
"loss": 0.1183, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.19734890758991241, |
|
"eval_na_accuracy": 0.8350515365600586, |
|
"eval_ordinal_accuracy": 0.7087079286575317, |
|
"eval_ordinal_mae": 0.38272929191589355, |
|
"eval_runtime": 176.207, |
|
"eval_samples_per_second": 25.396, |
|
"eval_steps_per_second": 3.178, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"grad_norm": 0.859944760799408, |
|
"learning_rate": 4.4716913090515695e-07, |
|
"loss": 0.1125, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 0.7725034952163696, |
|
"learning_rate": 3.5701406419040757e-07, |
|
"loss": 0.1009, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 8.944540977478027, |
|
"learning_rate": 2.6685899747565814e-07, |
|
"loss": 0.1115, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 0.6212058663368225, |
|
"learning_rate": 1.7670393076090878e-07, |
|
"loss": 0.1368, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 0.19747541844844818, |
|
"eval_na_accuracy": 0.8333333134651184, |
|
"eval_ordinal_accuracy": 0.7081941962242126, |
|
"eval_ordinal_mae": 0.38125094771385193, |
|
"eval_runtime": 170.5638, |
|
"eval_samples_per_second": 26.237, |
|
"eval_steps_per_second": 3.283, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"grad_norm": 0.4366958737373352, |
|
"learning_rate": 8.65488640461594e-08, |
|
"loss": 0.108, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 5546, |
|
"total_flos": 6.876575711894569e+18, |
|
"train_loss": 0.18857605492557122, |
|
"train_runtime": 17933.6413, |
|
"train_samples_per_second": 4.948, |
|
"train_steps_per_second": 0.309 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5546, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"total_flos": 6.876575711894569e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|