|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7558488302339532, |
|
"eval_steps": 500, |
|
"global_step": 252, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002999400119976005, |
|
"grad_norm": 12.213529318427376, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.6991, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00599880023995201, |
|
"grad_norm": 11.149836011452198, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.5997, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008998200359928014, |
|
"grad_norm": 5.296165512046405, |
|
"learning_rate": 3e-06, |
|
"loss": 1.3037, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01199760047990402, |
|
"grad_norm": 3.7277911972297235, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.3472, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014997000599880024, |
|
"grad_norm": 6.970291148182751, |
|
"learning_rate": 5e-06, |
|
"loss": 1.5234, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017996400719856028, |
|
"grad_norm": 5.825870871741097, |
|
"learning_rate": 6e-06, |
|
"loss": 1.2331, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.020995800839832032, |
|
"grad_norm": 4.789226733983147, |
|
"learning_rate": 7e-06, |
|
"loss": 1.2847, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02399520095980804, |
|
"grad_norm": 5.735809923380043, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.3148, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.026994601079784044, |
|
"grad_norm": 4.146594143559111, |
|
"learning_rate": 9e-06, |
|
"loss": 1.3241, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.029994001199760048, |
|
"grad_norm": 4.3548572301609365, |
|
"learning_rate": 1e-05, |
|
"loss": 1.2048, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.032993401319736056, |
|
"grad_norm": 3.186860649490824, |
|
"learning_rate": 9.999942663491213e-06, |
|
"loss": 1.271, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.035992801439712056, |
|
"grad_norm": 5.683449148461711, |
|
"learning_rate": 9.999770655279843e-06, |
|
"loss": 1.4619, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.038992201559688064, |
|
"grad_norm": 2.918639590322084, |
|
"learning_rate": 9.99948397931083e-06, |
|
"loss": 1.2698, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.041991601679664065, |
|
"grad_norm": 3.1049642688975228, |
|
"learning_rate": 9.999082642158972e-06, |
|
"loss": 1.4018, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04499100179964007, |
|
"grad_norm": 2.396816425806027, |
|
"learning_rate": 9.99856665302878e-06, |
|
"loss": 1.1897, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04799040191961608, |
|
"grad_norm": 2.6334601975355283, |
|
"learning_rate": 9.997936023754258e-06, |
|
"loss": 1.3976, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05098980203959208, |
|
"grad_norm": 2.619689679258694, |
|
"learning_rate": 9.997190768798639e-06, |
|
"loss": 1.286, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05398920215956809, |
|
"grad_norm": 2.7084119507983377, |
|
"learning_rate": 9.99633090525405e-06, |
|
"loss": 1.3328, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05698860227954409, |
|
"grad_norm": 2.3481903809919404, |
|
"learning_rate": 9.995356452841122e-06, |
|
"loss": 1.1662, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.059988002399520096, |
|
"grad_norm": 4.651416427067284, |
|
"learning_rate": 9.994267433908533e-06, |
|
"loss": 1.5033, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0629874025194961, |
|
"grad_norm": 2.625598226472764, |
|
"learning_rate": 9.9930638734325e-06, |
|
"loss": 1.2022, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06598680263947211, |
|
"grad_norm": 2.4104347021265973, |
|
"learning_rate": 9.991745799016206e-06, |
|
"loss": 1.1979, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06898620275944811, |
|
"grad_norm": 3.1305838351835478, |
|
"learning_rate": 9.990313240889167e-06, |
|
"loss": 1.3105, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07198560287942411, |
|
"grad_norm": 3.18309366674778, |
|
"learning_rate": 9.988766231906532e-06, |
|
"loss": 1.3079, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07498500299940011, |
|
"grad_norm": 3.5781057246328563, |
|
"learning_rate": 9.987104807548341e-06, |
|
"loss": 1.3255, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07798440311937613, |
|
"grad_norm": 2.259604288692876, |
|
"learning_rate": 9.985329005918702e-06, |
|
"loss": 1.242, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08098380323935213, |
|
"grad_norm": 5.16511455231151, |
|
"learning_rate": 9.983438867744923e-06, |
|
"loss": 1.5295, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08398320335932813, |
|
"grad_norm": 2.5507458276752955, |
|
"learning_rate": 9.981434436376572e-06, |
|
"loss": 1.1835, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08698260347930414, |
|
"grad_norm": 2.374301337319529, |
|
"learning_rate": 9.97931575778449e-06, |
|
"loss": 1.2602, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08998200359928014, |
|
"grad_norm": 2.869182508074545, |
|
"learning_rate": 9.977082880559725e-06, |
|
"loss": 1.2714, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09298140371925614, |
|
"grad_norm": 3.352349678924823, |
|
"learning_rate": 9.974735855912436e-06, |
|
"loss": 1.2895, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09598080383923216, |
|
"grad_norm": 2.402498887438619, |
|
"learning_rate": 9.972274737670702e-06, |
|
"loss": 1.2169, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09898020395920816, |
|
"grad_norm": 2.3026043458108356, |
|
"learning_rate": 9.969699582279294e-06, |
|
"loss": 1.2486, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10197960407918416, |
|
"grad_norm": 2.7234482697971796, |
|
"learning_rate": 9.967010448798376e-06, |
|
"loss": 1.3611, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10497900419916016, |
|
"grad_norm": 2.3328312481588096, |
|
"learning_rate": 9.964207398902163e-06, |
|
"loss": 1.2112, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10797840431913618, |
|
"grad_norm": 2.3738667556158304, |
|
"learning_rate": 9.961290496877492e-06, |
|
"loss": 1.1489, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11097780443911218, |
|
"grad_norm": 2.381064080119671, |
|
"learning_rate": 9.958259809622353e-06, |
|
"loss": 1.3426, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11397720455908818, |
|
"grad_norm": 2.789399877084244, |
|
"learning_rate": 9.955115406644357e-06, |
|
"loss": 1.2355, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11697660467906419, |
|
"grad_norm": 2.2360091447066885, |
|
"learning_rate": 9.951857360059141e-06, |
|
"loss": 1.298, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11997600479904019, |
|
"grad_norm": 2.3420608415672644, |
|
"learning_rate": 9.948485744588709e-06, |
|
"loss": 1.2597, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12297540491901619, |
|
"grad_norm": 2.0514216630297786, |
|
"learning_rate": 9.945000637559728e-06, |
|
"loss": 1.2464, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1259748050389922, |
|
"grad_norm": 2.1083906964286765, |
|
"learning_rate": 9.941402118901743e-06, |
|
"loss": 1.2223, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1289742051589682, |
|
"grad_norm": 2.584585224378793, |
|
"learning_rate": 9.937690271145356e-06, |
|
"loss": 1.2575, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13197360527894422, |
|
"grad_norm": 2.3779090684986652, |
|
"learning_rate": 9.93386517942032e-06, |
|
"loss": 1.1451, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1349730053989202, |
|
"grad_norm": 2.108339055225494, |
|
"learning_rate": 9.9299269314536e-06, |
|
"loss": 1.2248, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13797240551889622, |
|
"grad_norm": 2.121719885374642, |
|
"learning_rate": 9.92587561756735e-06, |
|
"loss": 1.207, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.14097180563887224, |
|
"grad_norm": 2.205771486341789, |
|
"learning_rate": 9.921711330676848e-06, |
|
"loss": 1.2321, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14397120575884823, |
|
"grad_norm": 2.288650968193418, |
|
"learning_rate": 9.917434166288364e-06, |
|
"loss": 1.244, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14697060587882424, |
|
"grad_norm": 2.3703349873199917, |
|
"learning_rate": 9.913044222496966e-06, |
|
"loss": 1.2758, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14997000599880023, |
|
"grad_norm": 2.342816783402187, |
|
"learning_rate": 9.908541599984276e-06, |
|
"loss": 1.2799, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15296940611877624, |
|
"grad_norm": 2.290011867632185, |
|
"learning_rate": 9.903926402016153e-06, |
|
"loss": 1.137, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15596880623875226, |
|
"grad_norm": 2.2455094517388825, |
|
"learning_rate": 9.899198734440335e-06, |
|
"loss": 1.2133, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.15896820635872824, |
|
"grad_norm": 2.263487895355519, |
|
"learning_rate": 9.894358705684002e-06, |
|
"loss": 1.1898, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16196760647870426, |
|
"grad_norm": 2.094119128560065, |
|
"learning_rate": 9.889406426751296e-06, |
|
"loss": 1.1847, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16496700659868027, |
|
"grad_norm": 2.45640150031141, |
|
"learning_rate": 9.88434201122077e-06, |
|
"loss": 1.2662, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16796640671865626, |
|
"grad_norm": 3.7462847341409606, |
|
"learning_rate": 9.879165575242788e-06, |
|
"loss": 1.1912, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.17096580683863227, |
|
"grad_norm": 2.382097038596032, |
|
"learning_rate": 9.873877237536854e-06, |
|
"loss": 1.2652, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1739652069586083, |
|
"grad_norm": 2.320080713494513, |
|
"learning_rate": 9.868477119388897e-06, |
|
"loss": 1.2682, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17696460707858427, |
|
"grad_norm": 2.1974546622057938, |
|
"learning_rate": 9.862965344648485e-06, |
|
"loss": 1.1956, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.1799640071985603, |
|
"grad_norm": 2.40241845961337, |
|
"learning_rate": 9.85734203972599e-06, |
|
"loss": 1.2948, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1829634073185363, |
|
"grad_norm": 2.1855343450625053, |
|
"learning_rate": 9.851607333589677e-06, |
|
"loss": 1.1967, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1859628074385123, |
|
"grad_norm": 2.772912562685191, |
|
"learning_rate": 9.84576135776276e-06, |
|
"loss": 1.1864, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1889622075584883, |
|
"grad_norm": 2.420057020746961, |
|
"learning_rate": 9.839804246320374e-06, |
|
"loss": 1.2534, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.19196160767846432, |
|
"grad_norm": 2.265555099135192, |
|
"learning_rate": 9.833736135886513e-06, |
|
"loss": 1.2563, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.1949610077984403, |
|
"grad_norm": 2.292076182842636, |
|
"learning_rate": 9.827557165630879e-06, |
|
"loss": 1.193, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.19796040791841632, |
|
"grad_norm": 2.032874490713336, |
|
"learning_rate": 9.821267477265705e-06, |
|
"loss": 1.231, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.20095980803839233, |
|
"grad_norm": 2.1341683347971543, |
|
"learning_rate": 9.814867215042503e-06, |
|
"loss": 1.2317, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.20395920815836832, |
|
"grad_norm": 2.2463443930345077, |
|
"learning_rate": 9.808356525748748e-06, |
|
"loss": 1.2911, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.20695860827834434, |
|
"grad_norm": 2.0325804061530848, |
|
"learning_rate": 9.801735558704516e-06, |
|
"loss": 1.254, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.20995800839832032, |
|
"grad_norm": 2.0568500164008086, |
|
"learning_rate": 9.795004465759067e-06, |
|
"loss": 1.206, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21295740851829634, |
|
"grad_norm": 2.082577302356669, |
|
"learning_rate": 9.78816340128734e-06, |
|
"loss": 1.2027, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.21595680863827235, |
|
"grad_norm": 2.048998263808703, |
|
"learning_rate": 9.781212522186442e-06, |
|
"loss": 1.2288, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.21895620875824834, |
|
"grad_norm": 2.49581568276726, |
|
"learning_rate": 9.774151987872029e-06, |
|
"loss": 1.2493, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.22195560887822435, |
|
"grad_norm": 1.8323511888260988, |
|
"learning_rate": 9.766981960274653e-06, |
|
"loss": 1.2133, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.22495500899820037, |
|
"grad_norm": 1.9244897973012691, |
|
"learning_rate": 9.759702603836059e-06, |
|
"loss": 1.1556, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22795440911817635, |
|
"grad_norm": 2.2391757657702476, |
|
"learning_rate": 9.752314085505396e-06, |
|
"loss": 1.139, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.23095380923815237, |
|
"grad_norm": 2.1062513435343955, |
|
"learning_rate": 9.744816574735405e-06, |
|
"loss": 1.1568, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.23395320935812838, |
|
"grad_norm": 2.244216493001031, |
|
"learning_rate": 9.737210243478522e-06, |
|
"loss": 1.1672, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.23695260947810437, |
|
"grad_norm": 2.2301876958310585, |
|
"learning_rate": 9.72949526618294e-06, |
|
"loss": 1.1805, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.23995200959808038, |
|
"grad_norm": 2.087760281351449, |
|
"learning_rate": 9.721671819788603e-06, |
|
"loss": 1.2595, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2429514097180564, |
|
"grad_norm": 2.040923837775886, |
|
"learning_rate": 9.713740083723152e-06, |
|
"loss": 1.1853, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.24595080983803239, |
|
"grad_norm": 1.9835044903865013, |
|
"learning_rate": 9.705700239897809e-06, |
|
"loss": 1.1165, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2489502099580084, |
|
"grad_norm": 2.4048211037733433, |
|
"learning_rate": 9.697552472703205e-06, |
|
"loss": 1.2442, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.2519496100779844, |
|
"grad_norm": 1.9274434259225983, |
|
"learning_rate": 9.689296969005151e-06, |
|
"loss": 1.2233, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2549490101979604, |
|
"grad_norm": 2.0930221605237813, |
|
"learning_rate": 9.680933918140348e-06, |
|
"loss": 1.2042, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2579484103179364, |
|
"grad_norm": 2.050947291358715, |
|
"learning_rate": 9.672463511912056e-06, |
|
"loss": 1.1526, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.26094781043791243, |
|
"grad_norm": 2.1905662044973218, |
|
"learning_rate": 9.66388594458568e-06, |
|
"loss": 1.2428, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.26394721055788845, |
|
"grad_norm": 2.194939852549527, |
|
"learning_rate": 9.655201412884328e-06, |
|
"loss": 1.1842, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2669466106778644, |
|
"grad_norm": 1.9191574847419615, |
|
"learning_rate": 9.64641011598429e-06, |
|
"loss": 1.2005, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2699460107978404, |
|
"grad_norm": 2.0406272846772597, |
|
"learning_rate": 9.637512255510475e-06, |
|
"loss": 1.1275, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.27294541091781643, |
|
"grad_norm": 1.9546291627174466, |
|
"learning_rate": 9.628508035531785e-06, |
|
"loss": 1.2274, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.27594481103779245, |
|
"grad_norm": 1.9691367121837948, |
|
"learning_rate": 9.619397662556434e-06, |
|
"loss": 1.207, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.27894421115776846, |
|
"grad_norm": 2.3613406410056323, |
|
"learning_rate": 9.610181345527217e-06, |
|
"loss": 1.2273, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2819436112777445, |
|
"grad_norm": 1.869890124546451, |
|
"learning_rate": 9.600859295816708e-06, |
|
"loss": 1.2272, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.28494301139772044, |
|
"grad_norm": 2.1493924596390834, |
|
"learning_rate": 9.591431727222425e-06, |
|
"loss": 1.1787, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.28794241151769645, |
|
"grad_norm": 2.055643425325773, |
|
"learning_rate": 9.581898855961911e-06, |
|
"loss": 1.1993, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.29094181163767247, |
|
"grad_norm": 1.84642386683885, |
|
"learning_rate": 9.572260900667794e-06, |
|
"loss": 1.1839, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2939412117576485, |
|
"grad_norm": 1.890951805519698, |
|
"learning_rate": 9.562518082382751e-06, |
|
"loss": 1.207, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2969406118776245, |
|
"grad_norm": 2.37835708136971, |
|
"learning_rate": 9.55267062455446e-06, |
|
"loss": 1.1894, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.29994001199760045, |
|
"grad_norm": 1.9298763701114428, |
|
"learning_rate": 9.542718753030463e-06, |
|
"loss": 1.17, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.30293941211757647, |
|
"grad_norm": 1.9738559838328023, |
|
"learning_rate": 9.532662696052986e-06, |
|
"loss": 1.1911, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.3059388122375525, |
|
"grad_norm": 2.097481526316711, |
|
"learning_rate": 9.522502684253709e-06, |
|
"loss": 1.2047, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.3089382123575285, |
|
"grad_norm": 2.2773066391553405, |
|
"learning_rate": 9.512238950648474e-06, |
|
"loss": 1.1683, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.3119376124775045, |
|
"grad_norm": 2.0545409984803435, |
|
"learning_rate": 9.501871730631944e-06, |
|
"loss": 1.1777, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.3149370125974805, |
|
"grad_norm": 2.0765643204952435, |
|
"learning_rate": 9.491401261972194e-06, |
|
"loss": 1.2306, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.3179364127174565, |
|
"grad_norm": 1.9891174989072355, |
|
"learning_rate": 9.480827784805278e-06, |
|
"loss": 1.2351, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3209358128374325, |
|
"grad_norm": 1.889574475483626, |
|
"learning_rate": 9.4701515416297e-06, |
|
"loss": 1.1742, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3239352129574085, |
|
"grad_norm": 2.0131114525216955, |
|
"learning_rate": 9.459372777300863e-06, |
|
"loss": 1.127, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.32693461307738453, |
|
"grad_norm": 2.0094971545889506, |
|
"learning_rate": 9.448491739025455e-06, |
|
"loss": 1.1203, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.32993401319736054, |
|
"grad_norm": 2.007024349964355, |
|
"learning_rate": 9.437508676355774e-06, |
|
"loss": 1.2005, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.33293341331733656, |
|
"grad_norm": 1.834949376512209, |
|
"learning_rate": 9.426423841184007e-06, |
|
"loss": 1.1451, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.3359328134373125, |
|
"grad_norm": 2.0659739662077863, |
|
"learning_rate": 9.415237487736452e-06, |
|
"loss": 1.1978, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.33893221355728853, |
|
"grad_norm": 2.0021385422334714, |
|
"learning_rate": 9.403949872567696e-06, |
|
"loss": 1.1969, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.34193161367726455, |
|
"grad_norm": 2.131713368907865, |
|
"learning_rate": 9.392561254554712e-06, |
|
"loss": 1.0958, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.34493101379724056, |
|
"grad_norm": 2.009265355301516, |
|
"learning_rate": 9.381071894890942e-06, |
|
"loss": 1.2154, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3479304139172166, |
|
"grad_norm": 1.9134985847126271, |
|
"learning_rate": 9.369482057080293e-06, |
|
"loss": 1.2242, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3509298140371926, |
|
"grad_norm": 1.9567206999616176, |
|
"learning_rate": 9.3577920069311e-06, |
|
"loss": 1.172, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.35392921415716855, |
|
"grad_norm": 1.9433526958741427, |
|
"learning_rate": 9.346002012550027e-06, |
|
"loss": 1.1785, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.35692861427714456, |
|
"grad_norm": 3.7758127026886776, |
|
"learning_rate": 9.334112344335924e-06, |
|
"loss": 1.2509, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3599280143971206, |
|
"grad_norm": 2.2288097190307754, |
|
"learning_rate": 9.322123274973613e-06, |
|
"loss": 1.2034, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3629274145170966, |
|
"grad_norm": 2.210018938006893, |
|
"learning_rate": 9.310035079427651e-06, |
|
"loss": 1.2089, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3659268146370726, |
|
"grad_norm": 1.8381206653711355, |
|
"learning_rate": 9.297848034936007e-06, |
|
"loss": 1.186, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.36892621475704857, |
|
"grad_norm": 2.1078020542808913, |
|
"learning_rate": 9.285562421003716e-06, |
|
"loss": 1.2042, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3719256148770246, |
|
"grad_norm": 2.2181514311801496, |
|
"learning_rate": 9.273178519396459e-06, |
|
"loss": 1.1561, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3749250149970006, |
|
"grad_norm": 2.127427295141141, |
|
"learning_rate": 9.260696614134115e-06, |
|
"loss": 1.1935, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3779244151169766, |
|
"grad_norm": 2.179542019285163, |
|
"learning_rate": 9.24811699148423e-06, |
|
"loss": 1.2001, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3809238152369526, |
|
"grad_norm": 2.95737675688332, |
|
"learning_rate": 9.235439939955458e-06, |
|
"loss": 1.1333, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.38392321535692864, |
|
"grad_norm": 2.408346714267433, |
|
"learning_rate": 9.222665750290953e-06, |
|
"loss": 1.2035, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3869226154769046, |
|
"grad_norm": 1.9803537052769442, |
|
"learning_rate": 9.209794715461691e-06, |
|
"loss": 1.2133, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3899220155968806, |
|
"grad_norm": 2.0753391828539844, |
|
"learning_rate": 9.196827130659752e-06, |
|
"loss": 1.2389, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3929214157168566, |
|
"grad_norm": 2.057624573882372, |
|
"learning_rate": 9.18376329329155e-06, |
|
"loss": 1.1612, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.39592081583683264, |
|
"grad_norm": 2.095421490379302, |
|
"learning_rate": 9.170603502971017e-06, |
|
"loss": 1.2319, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.39892021595680865, |
|
"grad_norm": 1.9629007266889844, |
|
"learning_rate": 9.157348061512728e-06, |
|
"loss": 1.2061, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.40191961607678467, |
|
"grad_norm": 1.9702164374790287, |
|
"learning_rate": 9.143997272924974e-06, |
|
"loss": 1.197, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.40491901619676063, |
|
"grad_norm": 2.094524351597277, |
|
"learning_rate": 9.1305514434028e-06, |
|
"loss": 1.1508, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.40791841631673664, |
|
"grad_norm": 2.078336809149187, |
|
"learning_rate": 9.117010881320973e-06, |
|
"loss": 1.2006, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.41091781643671266, |
|
"grad_norm": 2.1047922013296385, |
|
"learning_rate": 9.103375897226919e-06, |
|
"loss": 1.2225, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.41391721655668867, |
|
"grad_norm": 1.9800892010976672, |
|
"learning_rate": 9.089646803833589e-06, |
|
"loss": 1.2117, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.4169166166766647, |
|
"grad_norm": 1.948925596250933, |
|
"learning_rate": 9.075823916012298e-06, |
|
"loss": 1.1917, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.41991601679664065, |
|
"grad_norm": 1.8720535653109143, |
|
"learning_rate": 9.061907550785498e-06, |
|
"loss": 1.2048, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42291541691661666, |
|
"grad_norm": 2.1914812005829316, |
|
"learning_rate": 9.047898027319508e-06, |
|
"loss": 1.2393, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4259148170365927, |
|
"grad_norm": 3.541065061637063, |
|
"learning_rate": 9.033795666917191e-06, |
|
"loss": 1.1728, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.4289142171565687, |
|
"grad_norm": 1.958943800312053, |
|
"learning_rate": 9.019600793010596e-06, |
|
"loss": 1.1204, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.4319136172765447, |
|
"grad_norm": 1.8982949525316917, |
|
"learning_rate": 9.005313731153525e-06, |
|
"loss": 1.136, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.4349130173965207, |
|
"grad_norm": 2.1962092793560775, |
|
"learning_rate": 8.990934809014079e-06, |
|
"loss": 1.2025, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4379124175164967, |
|
"grad_norm": 2.148888648427292, |
|
"learning_rate": 8.976464356367133e-06, |
|
"loss": 1.1971, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.4409118176364727, |
|
"grad_norm": 2.1656007482087243, |
|
"learning_rate": 8.961902705086785e-06, |
|
"loss": 1.1531, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.4439112177564487, |
|
"grad_norm": 2.245423572348511, |
|
"learning_rate": 8.947250189138732e-06, |
|
"loss": 1.205, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4469106178764247, |
|
"grad_norm": 2.2015971794373037, |
|
"learning_rate": 8.932507144572616e-06, |
|
"loss": 1.1663, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.44991001799640074, |
|
"grad_norm": 2.040881991969036, |
|
"learning_rate": 8.917673909514321e-06, |
|
"loss": 1.2318, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45290941811637675, |
|
"grad_norm": 2.0365534777961884, |
|
"learning_rate": 8.902750824158213e-06, |
|
"loss": 1.1634, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4559088182363527, |
|
"grad_norm": 2.0931596144968903, |
|
"learning_rate": 8.887738230759334e-06, |
|
"loss": 1.1547, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.4589082183563287, |
|
"grad_norm": 2.040316251155918, |
|
"learning_rate": 8.872636473625564e-06, |
|
"loss": 1.1948, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.46190761847630474, |
|
"grad_norm": 2.0411032824079514, |
|
"learning_rate": 8.857445899109716e-06, |
|
"loss": 1.1292, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.46490701859628075, |
|
"grad_norm": 2.9762584994335923, |
|
"learning_rate": 8.84216685560159e-06, |
|
"loss": 1.1308, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.46790641871625677, |
|
"grad_norm": 2.097668580242586, |
|
"learning_rate": 8.826799693519996e-06, |
|
"loss": 1.1631, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4709058188362327, |
|
"grad_norm": 2.0737253370280384, |
|
"learning_rate": 8.811344765304698e-06, |
|
"loss": 1.1484, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.47390521895620874, |
|
"grad_norm": 2.1733316818144033, |
|
"learning_rate": 8.795802425408352e-06, |
|
"loss": 1.1992, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.47690461907618475, |
|
"grad_norm": 3.0100700234706927, |
|
"learning_rate": 8.780173030288359e-06, |
|
"loss": 1.2115, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.47990401919616077, |
|
"grad_norm": 2.0471682767814374, |
|
"learning_rate": 8.7644569383987e-06, |
|
"loss": 1.2007, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4829034193161368, |
|
"grad_norm": 2.069677080263759, |
|
"learning_rate": 8.748654510181709e-06, |
|
"loss": 1.1719, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4859028194361128, |
|
"grad_norm": 2.136133148960194, |
|
"learning_rate": 8.732766108059814e-06, |
|
"loss": 1.1097, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.48890221955608876, |
|
"grad_norm": 2.2078781415683704, |
|
"learning_rate": 8.716792096427217e-06, |
|
"loss": 1.2822, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.49190161967606477, |
|
"grad_norm": 1.8108893344256605, |
|
"learning_rate": 8.700732841641542e-06, |
|
"loss": 1.1984, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4949010197960408, |
|
"grad_norm": 1.9202600268802827, |
|
"learning_rate": 8.68458871201543e-06, |
|
"loss": 1.1822, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4979004199160168, |
|
"grad_norm": 2.043987291062948, |
|
"learning_rate": 8.668360077808093e-06, |
|
"loss": 1.1588, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.5008998200359928, |
|
"grad_norm": 1.8608294617929213, |
|
"learning_rate": 8.652047311216823e-06, |
|
"loss": 1.1739, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.5038992201559688, |
|
"grad_norm": 2.0614264560161812, |
|
"learning_rate": 8.635650786368452e-06, |
|
"loss": 1.2053, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.5068986202759448, |
|
"grad_norm": 1.8415134777925706, |
|
"learning_rate": 8.61917087931078e-06, |
|
"loss": 1.1629, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5098980203959208, |
|
"grad_norm": 2.067056790142203, |
|
"learning_rate": 8.602607968003935e-06, |
|
"loss": 1.1656, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5128974205158968, |
|
"grad_norm": 1.9885496712381465, |
|
"learning_rate": 8.585962432311728e-06, |
|
"loss": 1.1061, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5158968206358728, |
|
"grad_norm": 1.8542146492076237, |
|
"learning_rate": 8.569234653992916e-06, |
|
"loss": 1.2416, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.5188962207558488, |
|
"grad_norm": 2.2130247278463453, |
|
"learning_rate": 8.552425016692464e-06, |
|
"loss": 1.2651, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5218956208758249, |
|
"grad_norm": 1.9214372548459986, |
|
"learning_rate": 8.535533905932739e-06, |
|
"loss": 1.2038, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.5248950209958009, |
|
"grad_norm": 1.8202998106553783, |
|
"learning_rate": 8.518561709104667e-06, |
|
"loss": 1.1806, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5278944211157769, |
|
"grad_norm": 2.033480383542316, |
|
"learning_rate": 8.501508815458856e-06, |
|
"loss": 1.1138, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5308938212357528, |
|
"grad_norm": 1.8173739641856819, |
|
"learning_rate": 8.484375616096658e-06, |
|
"loss": 1.0463, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5338932213557288, |
|
"grad_norm": 1.9159018093250713, |
|
"learning_rate": 8.467162503961209e-06, |
|
"loss": 1.1438, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.5368926214757048, |
|
"grad_norm": 2.0146669190076416, |
|
"learning_rate": 8.449869873828411e-06, |
|
"loss": 1.2241, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5398920215956808, |
|
"grad_norm": 2.07880204207024, |
|
"learning_rate": 8.432498122297879e-06, |
|
"loss": 1.2059, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5428914217156569, |
|
"grad_norm": 2.1530579237214593, |
|
"learning_rate": 8.415047647783847e-06, |
|
"loss": 1.1945, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5458908218356329, |
|
"grad_norm": 1.9176382821131852, |
|
"learning_rate": 8.39751885050603e-06, |
|
"loss": 1.2025, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.5488902219556089, |
|
"grad_norm": 2.032474526366641, |
|
"learning_rate": 8.379912132480441e-06, |
|
"loss": 1.1962, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5518896220755849, |
|
"grad_norm": 2.454514968342902, |
|
"learning_rate": 8.36222789751018e-06, |
|
"loss": 1.2075, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5548890221955609, |
|
"grad_norm": 2.033627242661126, |
|
"learning_rate": 8.344466551176163e-06, |
|
"loss": 1.2134, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5578884223155369, |
|
"grad_norm": 2.0441091460883585, |
|
"learning_rate": 8.326628500827826e-06, |
|
"loss": 1.2038, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5608878224355129, |
|
"grad_norm": 1.9807070431779146, |
|
"learning_rate": 8.308714155573785e-06, |
|
"loss": 1.12, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.563887222555489, |
|
"grad_norm": 1.9349184602121337, |
|
"learning_rate": 8.290723926272439e-06, |
|
"loss": 1.1802, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5668866226754649, |
|
"grad_norm": 1.8384106015909107, |
|
"learning_rate": 8.27265822552257e-06, |
|
"loss": 1.1826, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5698860227954409, |
|
"grad_norm": 1.9156655051619718, |
|
"learning_rate": 8.254517467653858e-06, |
|
"loss": 1.1257, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5728854229154169, |
|
"grad_norm": 2.1724786856863973, |
|
"learning_rate": 8.236302068717393e-06, |
|
"loss": 1.1839, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5758848230353929, |
|
"grad_norm": 1.8396024645135292, |
|
"learning_rate": 8.218012446476128e-06, |
|
"loss": 1.16, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5788842231553689, |
|
"grad_norm": 1.9408487099445306, |
|
"learning_rate": 8.199649020395299e-06, |
|
"loss": 1.2241, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5818836232753449, |
|
"grad_norm": 1.9108055995475, |
|
"learning_rate": 8.1812122116328e-06, |
|
"loss": 1.164, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.584883023395321, |
|
"grad_norm": 2.098400399811293, |
|
"learning_rate": 8.16270244302953e-06, |
|
"loss": 1.1549, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.587882423515297, |
|
"grad_norm": 2.1600698972560015, |
|
"learning_rate": 8.144120139099697e-06, |
|
"loss": 1.2257, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.590881823635273, |
|
"grad_norm": 2.4157151117755826, |
|
"learning_rate": 8.125465726021068e-06, |
|
"loss": 1.2377, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.593881223755249, |
|
"grad_norm": 2.0852805021246246, |
|
"learning_rate": 8.106739631625216e-06, |
|
"loss": 1.2469, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.596880623875225, |
|
"grad_norm": 1.9748280355919685, |
|
"learning_rate": 8.08794228538769e-06, |
|
"loss": 1.1286, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5998800239952009, |
|
"grad_norm": 1.9281839006734618, |
|
"learning_rate": 8.06907411841817e-06, |
|
"loss": 1.1319, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6028794241151769, |
|
"grad_norm": 1.8690571721258256, |
|
"learning_rate": 8.050135563450587e-06, |
|
"loss": 1.1856, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.6058788242351529, |
|
"grad_norm": 1.9748712035222622, |
|
"learning_rate": 8.031127054833192e-06, |
|
"loss": 1.1948, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.608878224355129, |
|
"grad_norm": 1.991151818988732, |
|
"learning_rate": 8.01204902851859e-06, |
|
"loss": 1.1659, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.611877624475105, |
|
"grad_norm": 2.958946009978542, |
|
"learning_rate": 7.992901922053751e-06, |
|
"loss": 1.1856, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.614877024595081, |
|
"grad_norm": 2.065918241043226, |
|
"learning_rate": 7.973686174569971e-06, |
|
"loss": 1.0692, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.617876424715057, |
|
"grad_norm": 2.123952469982477, |
|
"learning_rate": 7.954402226772804e-06, |
|
"loss": 1.1462, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.620875824835033, |
|
"grad_norm": 2.384554268129999, |
|
"learning_rate": 7.93505052093194e-06, |
|
"loss": 1.2331, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.623875224955009, |
|
"grad_norm": 2.213816272262928, |
|
"learning_rate": 7.915631500871084e-06, |
|
"loss": 1.2179, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.626874625074985, |
|
"grad_norm": 2.041537464539173, |
|
"learning_rate": 7.896145611957759e-06, |
|
"loss": 1.1869, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.629874025194961, |
|
"grad_norm": 1.9032099687526678, |
|
"learning_rate": 7.876593301093104e-06, |
|
"loss": 1.1041, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6328734253149371, |
|
"grad_norm": 2.0485786710751452, |
|
"learning_rate": 7.856975016701616e-06, |
|
"loss": 1.2018, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.635872825434913, |
|
"grad_norm": 1.985579363766555, |
|
"learning_rate": 7.837291208720867e-06, |
|
"loss": 1.2284, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.638872225554889, |
|
"grad_norm": 1.8372693078207498, |
|
"learning_rate": 7.81754232859119e-06, |
|
"loss": 1.2194, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.641871625674865, |
|
"grad_norm": 2.034191799766837, |
|
"learning_rate": 7.797728829245321e-06, |
|
"loss": 1.2119, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.644871025794841, |
|
"grad_norm": 2.172768388345461, |
|
"learning_rate": 7.777851165098012e-06, |
|
"loss": 1.2264, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.647870425914817, |
|
"grad_norm": 2.036813209788241, |
|
"learning_rate": 7.757909792035608e-06, |
|
"loss": 1.2233, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.650869826034793, |
|
"grad_norm": 1.7687192297667345, |
|
"learning_rate": 7.737905167405596e-06, |
|
"loss": 1.2008, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6538692261547691, |
|
"grad_norm": 1.9034513819160714, |
|
"learning_rate": 7.717837750006106e-06, |
|
"loss": 1.2116, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6568686262747451, |
|
"grad_norm": 1.896055349909721, |
|
"learning_rate": 7.697708000075404e-06, |
|
"loss": 1.0943, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6598680263947211, |
|
"grad_norm": 2.01228606334524, |
|
"learning_rate": 7.67751637928132e-06, |
|
"loss": 1.1236, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6628674265146971, |
|
"grad_norm": 2.156992216970816, |
|
"learning_rate": 7.657263350710676e-06, |
|
"loss": 1.1291, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6658668266346731, |
|
"grad_norm": 1.9699773673672667, |
|
"learning_rate": 7.636949378858647e-06, |
|
"loss": 1.1458, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.668866226754649, |
|
"grad_norm": 2.1989186993052754, |
|
"learning_rate": 7.616574929618126e-06, |
|
"loss": 1.1121, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.671865626874625, |
|
"grad_norm": 1.993731506914194, |
|
"learning_rate": 7.596140470269029e-06, |
|
"loss": 1.0735, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.674865026994601, |
|
"grad_norm": 1.8252968956264437, |
|
"learning_rate": 7.575646469467576e-06, |
|
"loss": 1.149, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6778644271145771, |
|
"grad_norm": 2.055158428727496, |
|
"learning_rate": 7.555093397235553e-06, |
|
"loss": 1.154, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6808638272345531, |
|
"grad_norm": 1.9915560854905294, |
|
"learning_rate": 7.5344817249495195e-06, |
|
"loss": 1.0942, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6838632273545291, |
|
"grad_norm": 2.06764585971135, |
|
"learning_rate": 7.51381192533001e-06, |
|
"loss": 1.1884, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6868626274745051, |
|
"grad_norm": 1.9341247237628625, |
|
"learning_rate": 7.493084472430683e-06, |
|
"loss": 1.1791, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6898620275944811, |
|
"grad_norm": 1.9931838011874805, |
|
"learning_rate": 7.472299841627452e-06, |
|
"loss": 1.1299, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6928614277144571, |
|
"grad_norm": 1.9296797551564173, |
|
"learning_rate": 7.451458509607583e-06, |
|
"loss": 1.1513, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6958608278344331, |
|
"grad_norm": 2.0306426071035544, |
|
"learning_rate": 7.430560954358764e-06, |
|
"loss": 1.1837, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6988602279544092, |
|
"grad_norm": 1.8564265209276956, |
|
"learning_rate": 7.409607655158139e-06, |
|
"loss": 1.2164, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.7018596280743852, |
|
"grad_norm": 2.2341388452646727, |
|
"learning_rate": 7.388599092561315e-06, |
|
"loss": 1.1252, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.7048590281943611, |
|
"grad_norm": 2.1161776966895496, |
|
"learning_rate": 7.367535748391349e-06, |
|
"loss": 1.213, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.7078584283143371, |
|
"grad_norm": 1.9243533852197758, |
|
"learning_rate": 7.3464181057276864e-06, |
|
"loss": 1.1046, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.7108578284343131, |
|
"grad_norm": 1.9237683937108236, |
|
"learning_rate": 7.325246648895089e-06, |
|
"loss": 1.1577, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.7138572285542891, |
|
"grad_norm": 1.7984916602185241, |
|
"learning_rate": 7.304021863452525e-06, |
|
"loss": 1.2052, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7168566286742651, |
|
"grad_norm": 1.8721994512639362, |
|
"learning_rate": 7.282744236182033e-06, |
|
"loss": 1.1676, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7198560287942412, |
|
"grad_norm": 1.8334072031704922, |
|
"learning_rate": 7.261414255077561e-06, |
|
"loss": 1.1118, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7228554289142172, |
|
"grad_norm": 2.2489377670615474, |
|
"learning_rate": 7.240032409333765e-06, |
|
"loss": 1.1682, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7258548290341932, |
|
"grad_norm": 1.8400363638367707, |
|
"learning_rate": 7.218599189334799e-06, |
|
"loss": 1.1638, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.7288542291541692, |
|
"grad_norm": 1.8851878635905157, |
|
"learning_rate": 7.197115086643069e-06, |
|
"loss": 1.1176, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7318536292741452, |
|
"grad_norm": 1.8023550249649072, |
|
"learning_rate": 7.175580593987952e-06, |
|
"loss": 1.1164, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.7348530293941212, |
|
"grad_norm": 2.694191659057711, |
|
"learning_rate": 7.153996205254495e-06, |
|
"loss": 1.1573, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.7378524295140971, |
|
"grad_norm": 1.964255695607597, |
|
"learning_rate": 7.132362415472099e-06, |
|
"loss": 1.0549, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7408518296340731, |
|
"grad_norm": 1.8017763616156734, |
|
"learning_rate": 7.1106797208031554e-06, |
|
"loss": 1.1446, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7438512297540492, |
|
"grad_norm": 1.9428097126313124, |
|
"learning_rate": 7.088948618531668e-06, |
|
"loss": 1.1081, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7468506298740252, |
|
"grad_norm": 2.002394147288212, |
|
"learning_rate": 7.067169607051851e-06, |
|
"loss": 1.1359, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7498500299940012, |
|
"grad_norm": 1.8198961995762413, |
|
"learning_rate": 7.045343185856701e-06, |
|
"loss": 1.1201, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7528494301139772, |
|
"grad_norm": 1.9151331196894974, |
|
"learning_rate": 7.02346985552653e-06, |
|
"loss": 1.0402, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7558488302339532, |
|
"grad_norm": 1.9384378002461773, |
|
"learning_rate": 7.001550117717499e-06, |
|
"loss": 1.175, |
|
"step": 252 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 666, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 84, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 139507934232576.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|