{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.05055779729409,
  "eval_steps": 500,
  "global_step": 4266,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0009494422027059103,
      "grad_norm": 5.364119052886963,
      "learning_rate": 6.329113924050633e-07,
      "loss": 2.9616,
      "step": 1
    },
    {
      "epoch": 0.0018988844054118206,
      "grad_norm": 5.426063060760498,
      "learning_rate": 1.2658227848101265e-06,
      "loss": 2.9664,
      "step": 2
    },
    {
      "epoch": 0.0028483266081177306,
      "grad_norm": 5.507386684417725,
      "learning_rate": 1.8987341772151901e-06,
      "loss": 2.96,
      "step": 3
    },
    {
      "epoch": 0.0037977688108236413,
      "grad_norm": 5.467552185058594,
      "learning_rate": 2.531645569620253e-06,
      "loss": 2.975,
      "step": 4
    },
    {
      "epoch": 0.004747211013529551,
      "grad_norm": 5.386384963989258,
      "learning_rate": 3.1645569620253167e-06,
      "loss": 2.9473,
      "step": 5
    },
    {
      "epoch": 0.005696653216235461,
      "grad_norm": 4.9166951179504395,
      "learning_rate": 3.7974683544303802e-06,
      "loss": 2.8597,
      "step": 6
    },
    {
      "epoch": 0.006646095418941372,
      "grad_norm": 5.469020843505859,
      "learning_rate": 4.430379746835443e-06,
      "loss": 2.9633,
      "step": 7
    },
    {
      "epoch": 0.0075955376216472826,
      "grad_norm": 5.380453586578369,
      "learning_rate": 5.063291139240506e-06,
      "loss": 2.8931,
      "step": 8
    },
    {
      "epoch": 0.008544979824353193,
      "grad_norm": 4.922253131866455,
      "learning_rate": 5.69620253164557e-06,
      "loss": 2.7452,
      "step": 9
    },
    {
      "epoch": 0.009494422027059102,
      "grad_norm": 5.517508029937744,
      "learning_rate": 6.329113924050633e-06,
      "loss": 2.8326,
      "step": 10
    },
    {
      "epoch": 0.010443864229765013,
      "grad_norm": 5.810976982116699,
      "learning_rate": 6.9620253164556965e-06,
      "loss": 2.7854,
      "step": 11
    },
    {
      "epoch": 0.011393306432470923,
      "grad_norm": 5.690661430358887,
      "learning_rate": 7.5949367088607605e-06,
      "loss": 2.7069,
      "step": 12
    },
    {
      "epoch": 0.012342748635176834,
      "grad_norm": 5.994122505187988,
      "learning_rate": 8.227848101265822e-06,
      "loss": 2.5705,
      "step": 13
    },
    {
      "epoch": 0.013292190837882745,
      "grad_norm": 5.86803674697876,
      "learning_rate": 8.860759493670886e-06,
      "loss": 2.4461,
      "step": 14
    },
    {
      "epoch": 0.014241633040588654,
      "grad_norm": 5.448781490325928,
      "learning_rate": 9.49367088607595e-06,
      "loss": 2.2408,
      "step": 15
    },
    {
      "epoch": 0.015191075243294565,
      "grad_norm": 6.4004902839660645,
      "learning_rate": 1.0126582278481012e-05,
      "loss": 2.1205,
      "step": 16
    },
    {
      "epoch": 0.016140517446000476,
      "grad_norm": 6.970590591430664,
      "learning_rate": 1.0759493670886076e-05,
      "loss": 1.9474,
      "step": 17
    },
    {
      "epoch": 0.017089959648706386,
      "grad_norm": 7.423785209655762,
      "learning_rate": 1.139240506329114e-05,
      "loss": 1.7348,
      "step": 18
    },
    {
      "epoch": 0.018039401851412295,
      "grad_norm": 7.429481029510498,
      "learning_rate": 1.2025316455696203e-05,
      "loss": 1.4835,
      "step": 19
    },
    {
      "epoch": 0.018988844054118204,
      "grad_norm": 6.7193284034729,
      "learning_rate": 1.2658227848101267e-05,
      "loss": 1.2373,
      "step": 20
    },
    {
      "epoch": 0.019938286256824117,
      "grad_norm": 4.46099853515625,
      "learning_rate": 1.3291139240506329e-05,
      "loss": 1.1095,
      "step": 21
    },
    {
      "epoch": 0.020887728459530026,
      "grad_norm": 3.001573085784912,
      "learning_rate": 1.3924050632911393e-05,
      "loss": 0.8642,
      "step": 22
    },
    {
      "epoch": 0.021837170662235936,
      "grad_norm": 2.197000026702881,
      "learning_rate": 1.4556962025316457e-05,
      "loss": 0.7734,
      "step": 23
    },
    {
      "epoch": 0.022786612864941845,
      "grad_norm": 1.8113943338394165,
      "learning_rate": 1.5189873417721521e-05,
      "loss": 0.7341,
      "step": 24
    },
    {
      "epoch": 0.023736055067647758,
      "grad_norm": 1.7461305856704712,
      "learning_rate": 1.5822784810126583e-05,
      "loss": 0.6743,
      "step": 25
    },
    {
      "epoch": 0.024685497270353667,
      "grad_norm": 1.3315849304199219,
      "learning_rate": 1.6455696202531644e-05,
      "loss": 0.5975,
      "step": 26
    },
    {
      "epoch": 0.025634939473059577,
      "grad_norm": 0.726314127445221,
      "learning_rate": 1.7088607594936708e-05,
      "loss": 0.5659,
      "step": 27
    },
    {
      "epoch": 0.02658438167576549,
      "grad_norm": 0.6269010901451111,
      "learning_rate": 1.7721518987341772e-05,
      "loss": 0.5226,
      "step": 28
    },
    {
      "epoch": 0.0275338238784714,
      "grad_norm": 0.5819966197013855,
      "learning_rate": 1.8354430379746836e-05,
      "loss": 0.6132,
      "step": 29
    },
    {
      "epoch": 0.028483266081177308,
      "grad_norm": 0.6247850060462952,
      "learning_rate": 1.89873417721519e-05,
      "loss": 0.5,
      "step": 30
    },
    {
      "epoch": 0.029432708283883217,
      "grad_norm": 0.702621579170227,
      "learning_rate": 1.962025316455696e-05,
      "loss": 0.4958,
      "step": 31
    },
    {
      "epoch": 0.03038215048658913,
      "grad_norm": 0.6045309901237488,
      "learning_rate": 2.0253164556962025e-05,
      "loss": 0.4405,
      "step": 32
    },
    {
      "epoch": 0.031331592689295036,
      "grad_norm": 0.5436626076698303,
      "learning_rate": 2.088607594936709e-05,
      "loss": 0.5607,
      "step": 33
    },
    {
      "epoch": 0.03228103489200095,
      "grad_norm": 0.43146297335624695,
      "learning_rate": 2.1518987341772153e-05,
      "loss": 0.3987,
      "step": 34
    },
    {
      "epoch": 0.03323047709470686,
      "grad_norm": 0.5124548673629761,
      "learning_rate": 2.2151898734177217e-05,
      "loss": 0.5084,
      "step": 35
    },
    {
      "epoch": 0.03417991929741277,
      "grad_norm": 0.4466649293899536,
      "learning_rate": 2.278481012658228e-05,
      "loss": 0.3761,
      "step": 36
    },
    {
      "epoch": 0.03512936150011868,
      "grad_norm": 0.41221529245376587,
      "learning_rate": 2.341772151898734e-05,
      "loss": 0.3859,
      "step": 37
    },
    {
      "epoch": 0.03607880370282459,
      "grad_norm": 0.3802257180213928,
      "learning_rate": 2.4050632911392405e-05,
      "loss": 0.3447,
      "step": 38
    },
    {
      "epoch": 0.0370282459055305,
      "grad_norm": 0.47727710008621216,
      "learning_rate": 2.468354430379747e-05,
      "loss": 0.3914,
      "step": 39
    },
    {
      "epoch": 0.03797768810823641,
      "grad_norm": 0.41048529744148254,
      "learning_rate": 2.5316455696202533e-05,
      "loss": 0.2988,
      "step": 40
    },
    {
      "epoch": 0.038927130310942325,
      "grad_norm": 0.5019667744636536,
      "learning_rate": 2.5949367088607597e-05,
      "loss": 0.2938,
      "step": 41
    },
    {
      "epoch": 0.039876572513648234,
      "grad_norm": 0.42121732234954834,
      "learning_rate": 2.6582278481012658e-05,
      "loss": 0.2579,
      "step": 42
    },
    {
      "epoch": 0.04082601471635414,
      "grad_norm": 0.4193897247314453,
      "learning_rate": 2.7215189873417722e-05,
      "loss": 0.3262,
      "step": 43
    },
    {
      "epoch": 0.04177545691906005,
      "grad_norm": 0.2978931665420532,
      "learning_rate": 2.7848101265822786e-05,
      "loss": 0.2365,
      "step": 44
    },
    {
      "epoch": 0.04272489912176596,
      "grad_norm": 0.34771448373794556,
      "learning_rate": 2.848101265822785e-05,
      "loss": 0.2364,
      "step": 45
    },
    {
      "epoch": 0.04367434132447187,
      "grad_norm": 0.3881576955318451,
      "learning_rate": 2.9113924050632914e-05,
      "loss": 0.286,
      "step": 46
    },
    {
      "epoch": 0.04462378352717778,
      "grad_norm": 0.33863797783851624,
      "learning_rate": 2.9746835443037974e-05,
      "loss": 0.2739,
      "step": 47
    },
    {
      "epoch": 0.04557322572988369,
      "grad_norm": 0.2894616723060608,
      "learning_rate": 3.0379746835443042e-05,
      "loss": 0.2587,
      "step": 48
    },
    {
      "epoch": 0.046522667932589606,
      "grad_norm": 0.22292694449424744,
      "learning_rate": 3.10126582278481e-05,
      "loss": 0.1861,
      "step": 49
    },
    {
      "epoch": 0.047472110135295516,
      "grad_norm": 0.21907460689544678,
      "learning_rate": 3.1645569620253167e-05,
      "loss": 0.1755,
      "step": 50
    },
    {
      "epoch": 0.048421552338001425,
      "grad_norm": 0.29593944549560547,
      "learning_rate": 3.227848101265823e-05,
      "loss": 0.1856,
      "step": 51
    },
    {
      "epoch": 0.049370994540707334,
      "grad_norm": 0.23055657744407654,
      "learning_rate": 3.291139240506329e-05,
      "loss": 0.2102,
      "step": 52
    },
    {
      "epoch": 0.050320436743413244,
      "grad_norm": 0.18929323554039001,
      "learning_rate": 3.354430379746836e-05,
      "loss": 0.1909,
      "step": 53
    },
    {
      "epoch": 0.05126987894611915,
      "grad_norm": 0.15004883706569672,
      "learning_rate": 3.4177215189873416e-05,
      "loss": 0.1619,
      "step": 54
    },
    {
      "epoch": 0.05221932114882506,
      "grad_norm": 0.15621644258499146,
      "learning_rate": 3.4810126582278487e-05,
      "loss": 0.1759,
      "step": 55
    },
    {
      "epoch": 0.05316876335153098,
      "grad_norm": 0.16266578435897827,
      "learning_rate": 3.5443037974683544e-05,
      "loss": 0.1657,
      "step": 56
    },
    {
      "epoch": 0.05411820555423689,
      "grad_norm": 0.14417718350887299,
      "learning_rate": 3.607594936708861e-05,
      "loss": 0.1698,
      "step": 57
    },
    {
      "epoch": 0.0550676477569428,
      "grad_norm": 0.21402889490127563,
      "learning_rate": 3.670886075949367e-05,
      "loss": 0.2185,
      "step": 58
    },
    {
      "epoch": 0.05601708995964871,
      "grad_norm": 0.1997889280319214,
      "learning_rate": 3.7341772151898736e-05,
      "loss": 0.2143,
      "step": 59
    },
    {
      "epoch": 0.056966532162354616,
      "grad_norm": 0.13755086064338684,
      "learning_rate": 3.79746835443038e-05,
      "loss": 0.1677,
      "step": 60
    },
    {
      "epoch": 0.057915974365060525,
      "grad_norm": 0.19304363429546356,
      "learning_rate": 3.8607594936708864e-05,
      "loss": 0.2113,
      "step": 61
    },
    {
      "epoch": 0.058865416567766435,
      "grad_norm": 0.14066031575202942,
      "learning_rate": 3.924050632911392e-05,
      "loss": 0.1612,
      "step": 62
    },
    {
      "epoch": 0.059814858770472344,
      "grad_norm": 0.13375213742256165,
      "learning_rate": 3.987341772151899e-05,
      "loss": 0.164,
      "step": 63
    },
    {
      "epoch": 0.06076430097317826,
      "grad_norm": 0.15216922760009766,
      "learning_rate": 4.050632911392405e-05,
      "loss": 0.16,
      "step": 64
    },
    {
      "epoch": 0.06171374317588417,
      "grad_norm": 0.16130389273166656,
      "learning_rate": 4.113924050632912e-05,
      "loss": 0.1957,
      "step": 65
    },
    {
      "epoch": 0.06266318537859007,
      "grad_norm": 0.1791229248046875,
      "learning_rate": 4.177215189873418e-05,
      "loss": 0.1993,
      "step": 66
    },
    {
      "epoch": 0.06361262758129599,
      "grad_norm": 0.11038907617330551,
      "learning_rate": 4.240506329113924e-05,
      "loss": 0.1517,
      "step": 67
    },
    {
      "epoch": 0.0645620697840019,
      "grad_norm": 0.13327902555465698,
      "learning_rate": 4.3037974683544305e-05,
      "loss": 0.1501,
      "step": 68
    },
    {
      "epoch": 0.06551151198670781,
      "grad_norm": 0.13731731474399567,
      "learning_rate": 4.367088607594937e-05,
      "loss": 0.1596,
      "step": 69
    },
    {
      "epoch": 0.06646095418941372,
      "grad_norm": 0.13924308121204376,
      "learning_rate": 4.430379746835443e-05,
      "loss": 0.152,
      "step": 70
    },
    {
      "epoch": 0.06741039639211963,
      "grad_norm": 0.1482289433479309,
      "learning_rate": 4.49367088607595e-05,
      "loss": 0.1536,
      "step": 71
    },
    {
      "epoch": 0.06835983859482554,
      "grad_norm": 0.10759364813566208,
      "learning_rate": 4.556962025316456e-05,
      "loss": 0.1543,
      "step": 72
    },
    {
      "epoch": 0.06930928079753144,
      "grad_norm": 0.12899678945541382,
      "learning_rate": 4.6202531645569625e-05,
      "loss": 0.165,
      "step": 73
    },
    {
      "epoch": 0.07025872300023736,
      "grad_norm": 0.11689919233322144,
      "learning_rate": 4.683544303797468e-05,
      "loss": 0.1564,
      "step": 74
    },
    {
      "epoch": 0.07120816520294328,
      "grad_norm": 0.12697139382362366,
      "learning_rate": 4.7468354430379746e-05,
      "loss": 0.162,
      "step": 75
    },
    {
      "epoch": 0.07215760740564918,
      "grad_norm": 0.12069376558065414,
      "learning_rate": 4.810126582278481e-05,
      "loss": 0.1467,
      "step": 76
    },
    {
      "epoch": 0.0731070496083551,
      "grad_norm": 0.10199815034866333,
      "learning_rate": 4.8734177215189874e-05,
      "loss": 0.1528,
      "step": 77
    },
    {
      "epoch": 0.074056491811061,
      "grad_norm": 0.1142750009894371,
      "learning_rate": 4.936708860759494e-05,
      "loss": 0.1574,
      "step": 78
    },
    {
      "epoch": 0.07500593401376691,
      "grad_norm": 0.11019093543291092,
      "learning_rate": 5e-05,
      "loss": 0.1512,
      "step": 79
    },
    {
      "epoch": 0.07595537621647282,
      "grad_norm": 0.09426973015069962,
      "learning_rate": 5.0632911392405066e-05,
      "loss": 0.1481,
      "step": 80
    },
    {
      "epoch": 0.07690481841917873,
      "grad_norm": 0.09757663309574127,
      "learning_rate": 5.1265822784810124e-05,
      "loss": 0.1484,
      "step": 81
    },
    {
      "epoch": 0.07785426062188465,
      "grad_norm": 0.10646392405033112,
      "learning_rate": 5.1898734177215194e-05,
      "loss": 0.1549,
      "step": 82
    },
    {
      "epoch": 0.07880370282459055,
      "grad_norm": 0.12109784036874771,
      "learning_rate": 5.253164556962026e-05,
      "loss": 0.1448,
      "step": 83
    },
    {
      "epoch": 0.07975314502729647,
      "grad_norm": 0.12039211392402649,
      "learning_rate": 5.3164556962025316e-05,
      "loss": 0.1538,
      "step": 84
    },
    {
      "epoch": 0.08070258723000237,
      "grad_norm": 0.16873961687088013,
      "learning_rate": 5.379746835443038e-05,
      "loss": 0.1971,
      "step": 85
    },
    {
      "epoch": 0.08165202943270829,
      "grad_norm": 0.12140022218227386,
      "learning_rate": 5.4430379746835444e-05,
      "loss": 0.1497,
      "step": 86
    },
    {
      "epoch": 0.08260147163541419,
      "grad_norm": 0.14637599885463715,
      "learning_rate": 5.5063291139240514e-05,
      "loss": 0.1958,
      "step": 87
    },
    {
      "epoch": 0.0835509138381201,
      "grad_norm": 0.1141396313905716,
      "learning_rate": 5.569620253164557e-05,
      "loss": 0.1457,
      "step": 88
    },
    {
      "epoch": 0.08450035604082601,
      "grad_norm": 0.2128390371799469,
      "learning_rate": 5.6329113924050636e-05,
      "loss": 0.2339,
      "step": 89
    },
    {
      "epoch": 0.08544979824353192,
      "grad_norm": 0.18838858604431152,
      "learning_rate": 5.69620253164557e-05,
      "loss": 0.2029,
      "step": 90
    },
    {
      "epoch": 0.08639924044623784,
      "grad_norm": 0.19592566788196564,
      "learning_rate": 5.759493670886076e-05,
      "loss": 0.2276,
      "step": 91
    },
    {
      "epoch": 0.08734868264894374,
      "grad_norm": 0.14753012359142303,
      "learning_rate": 5.822784810126583e-05,
      "loss": 0.1916,
      "step": 92
    },
    {
      "epoch": 0.08829812485164966,
      "grad_norm": 0.1494351178407669,
      "learning_rate": 5.886075949367089e-05,
      "loss": 0.1913,
      "step": 93
    },
    {
      "epoch": 0.08924756705435556,
      "grad_norm": 0.1173478439450264,
      "learning_rate": 5.949367088607595e-05,
      "loss": 0.1438,
      "step": 94
    },
    {
      "epoch": 0.09019700925706148,
      "grad_norm": 0.12023188918828964,
      "learning_rate": 6.012658227848101e-05,
      "loss": 0.1516,
      "step": 95
    },
    {
      "epoch": 0.09114645145976738,
      "grad_norm": 0.1275833547115326,
      "learning_rate": 6.0759493670886084e-05,
      "loss": 0.1492,
      "step": 96
    },
    {
      "epoch": 0.0920958936624733,
      "grad_norm": 0.1360282599925995,
      "learning_rate": 6.139240506329115e-05,
      "loss": 0.1507,
      "step": 97
    },
    {
      "epoch": 0.09304533586517921,
      "grad_norm": 0.1586841195821762,
      "learning_rate": 6.20253164556962e-05,
      "loss": 0.1956,
      "step": 98
    },
    {
      "epoch": 0.09399477806788512,
      "grad_norm": 0.14281995594501495,
      "learning_rate": 6.265822784810128e-05,
      "loss": 0.1774,
      "step": 99
    },
    {
      "epoch": 0.09494422027059103,
      "grad_norm": 0.12553077936172485,
      "learning_rate": 6.329113924050633e-05,
      "loss": 0.148,
      "step": 100
    },
    {
      "epoch": 0.09589366247329693,
      "grad_norm": 0.1117570698261261,
      "learning_rate": 6.392405063291139e-05,
      "loss": 0.16,
      "step": 101
    },
    {
      "epoch": 0.09684310467600285,
      "grad_norm": 0.13955281674861908,
      "learning_rate": 6.455696202531646e-05,
      "loss": 0.1464,
      "step": 102
    },
    {
      "epoch": 0.09779254687870875,
      "grad_norm": 0.10990285873413086,
      "learning_rate": 6.518987341772153e-05,
      "loss": 0.147,
      "step": 103
    },
    {
      "epoch": 0.09874198908141467,
      "grad_norm": 0.10545991361141205,
      "learning_rate": 6.582278481012658e-05,
      "loss": 0.1436,
      "step": 104
    },
    {
      "epoch": 0.09969143128412059,
      "grad_norm": 0.1717437207698822,
      "learning_rate": 6.645569620253165e-05,
      "loss": 0.2278,
      "step": 105
    },
    {
      "epoch": 0.10064087348682649,
      "grad_norm": 0.10950994491577148,
      "learning_rate": 6.708860759493672e-05,
      "loss": 0.1493,
      "step": 106
    },
    {
      "epoch": 0.1015903156895324,
      "grad_norm": 0.11200258880853653,
      "learning_rate": 6.772151898734177e-05,
      "loss": 0.1536,
      "step": 107
    },
    {
      "epoch": 0.1025397578922383,
      "grad_norm": 0.10955105721950531,
      "learning_rate": 6.835443037974683e-05,
      "loss": 0.1483,
      "step": 108
    },
    {
      "epoch": 0.10348920009494422,
      "grad_norm": 0.11920775473117828,
      "learning_rate": 6.89873417721519e-05,
      "loss": 0.1492,
      "step": 109
    },
    {
      "epoch": 0.10443864229765012,
      "grad_norm": 0.1390092819929123,
      "learning_rate": 6.962025316455697e-05,
      "loss": 0.1849,
      "step": 110
    },
    {
      "epoch": 0.10538808450035604,
      "grad_norm": 0.1363140493631363,
      "learning_rate": 7.025316455696203e-05,
      "loss": 0.1849,
      "step": 111
    },
    {
      "epoch": 0.10633752670306196,
      "grad_norm": 0.09190025180578232,
      "learning_rate": 7.088607594936709e-05,
      "loss": 0.1587,
      "step": 112
    },
    {
      "epoch": 0.10728696890576786,
      "grad_norm": 0.09020426124334335,
      "learning_rate": 7.151898734177216e-05,
      "loss": 0.1377,
      "step": 113
    },
    {
      "epoch": 0.10823641110847378,
      "grad_norm": 0.10544883459806442,
      "learning_rate": 7.215189873417722e-05,
      "loss": 0.1516,
      "step": 114
    },
    {
      "epoch": 0.10918585331117968,
      "grad_norm": 0.12401281297206879,
      "learning_rate": 7.278481012658229e-05,
      "loss": 0.154,
      "step": 115
    },
    {
      "epoch": 0.1101352955138856,
      "grad_norm": 0.1008707657456398,
      "learning_rate": 7.341772151898734e-05,
      "loss": 0.1448,
      "step": 116
    },
    {
      "epoch": 0.1110847377165915,
      "grad_norm": 0.10302747040987015,
      "learning_rate": 7.40506329113924e-05,
      "loss": 0.1451,
      "step": 117
    },
    {
      "epoch": 0.11203417991929741,
      "grad_norm": 0.12748293578624725,
      "learning_rate": 7.468354430379747e-05,
      "loss": 0.1829,
      "step": 118
    },
    {
      "epoch": 0.11298362212200333,
      "grad_norm": 0.10413361340761185,
      "learning_rate": 7.531645569620254e-05,
      "loss": 0.1371,
      "step": 119
    },
    {
      "epoch": 0.11393306432470923,
      "grad_norm": 0.1243433803319931,
      "learning_rate": 7.59493670886076e-05,
      "loss": 0.1409,
      "step": 120
    },
    {
      "epoch": 0.11488250652741515,
      "grad_norm": 0.11630933731794357,
      "learning_rate": 7.658227848101266e-05,
      "loss": 0.1372,
      "step": 121
    },
    {
      "epoch": 0.11583194873012105,
      "grad_norm": 0.17981529235839844,
      "learning_rate": 7.721518987341773e-05,
      "loss": 0.2257,
      "step": 122
    },
    {
      "epoch": 0.11678139093282697,
      "grad_norm": 0.14063452184200287,
      "learning_rate": 7.78481012658228e-05,
      "loss": 0.1841,
      "step": 123
    },
    {
      "epoch": 0.11773083313553287,
      "grad_norm": 0.1264188438653946,
      "learning_rate": 7.848101265822784e-05,
      "loss": 0.1471,
      "step": 124
    },
    {
      "epoch": 0.11868027533823879,
      "grad_norm": 0.12827955186367035,
      "learning_rate": 7.911392405063291e-05,
      "loss": 0.1493,
      "step": 125
    },
    {
      "epoch": 0.11962971754094469,
      "grad_norm": 0.09800329059362411,
      "learning_rate": 7.974683544303798e-05,
      "loss": 0.1414,
      "step": 126
    },
    {
      "epoch": 0.1205791597436506,
      "grad_norm": 0.09902197122573853,
      "learning_rate": 8.037974683544304e-05,
      "loss": 0.1462,
      "step": 127
    },
    {
      "epoch": 0.12152860194635652,
      "grad_norm": 0.09450504928827286,
      "learning_rate": 8.10126582278481e-05,
      "loss": 0.1484,
      "step": 128
    },
    {
      "epoch": 0.12247804414906242,
      "grad_norm": 0.11012883484363556,
      "learning_rate": 8.164556962025317e-05,
      "loss": 0.1437,
      "step": 129
    },
    {
      "epoch": 0.12342748635176834,
      "grad_norm": 0.11717642843723297,
      "learning_rate": 8.227848101265824e-05,
      "loss": 0.1478,
      "step": 130
    },
    {
      "epoch": 0.12437692855447424,
      "grad_norm": 0.08754123747348785,
      "learning_rate": 8.29113924050633e-05,
      "loss": 0.1408,
      "step": 131
    },
    {
      "epoch": 0.12532637075718014,
      "grad_norm": 0.10017862170934677,
      "learning_rate": 8.354430379746835e-05,
      "loss": 0.1476,
      "step": 132
    },
    {
      "epoch": 0.12627581295988607,
      "grad_norm": 0.08994068205356598,
      "learning_rate": 8.417721518987342e-05,
      "loss": 0.1478,
      "step": 133
    },
    {
      "epoch": 0.12722525516259198,
      "grad_norm": 0.09894968569278717,
      "learning_rate": 8.481012658227848e-05,
      "loss": 0.1309,
      "step": 134
    },
    {
      "epoch": 0.12817469736529788,
      "grad_norm": 0.10028701275587082,
      "learning_rate": 8.544303797468355e-05,
      "loss": 0.1433,
      "step": 135
    },
    {
      "epoch": 0.1291241395680038,
      "grad_norm": 0.0897536426782608,
      "learning_rate": 8.607594936708861e-05,
      "loss": 0.1459,
      "step": 136
    },
    {
      "epoch": 0.1300735817707097,
      "grad_norm": 0.10435349494218826,
      "learning_rate": 8.670886075949367e-05,
      "loss": 0.1434,
      "step": 137
    },
    {
      "epoch": 0.13102302397341561,
      "grad_norm": 0.11718117445707321,
      "learning_rate": 8.734177215189874e-05,
      "loss": 0.1509,
      "step": 138
    },
    {
      "epoch": 0.13197246617612152,
      "grad_norm": 0.14426474273204803,
      "learning_rate": 8.797468354430381e-05,
      "loss": 0.1373,
      "step": 139
    },
    {
      "epoch": 0.13292190837882745,
      "grad_norm": 0.13101965188980103,
      "learning_rate": 8.860759493670887e-05,
      "loss": 0.1358,
      "step": 140
    },
    {
      "epoch": 0.13387135058153335,
      "grad_norm": 0.11235956102609634,
      "learning_rate": 8.924050632911392e-05,
      "loss": 0.1394,
      "step": 141
    },
    {
      "epoch": 0.13482079278423925,
      "grad_norm": 0.11327100545167923,
      "learning_rate": 8.9873417721519e-05,
      "loss": 0.1443,
      "step": 142
    },
    {
      "epoch": 0.13577023498694518,
      "grad_norm": 0.10912016034126282,
      "learning_rate": 9.050632911392407e-05,
      "loss": 0.1698,
      "step": 143
    },
    {
      "epoch": 0.13671967718965108,
      "grad_norm": 0.16535617411136627,
      "learning_rate": 9.113924050632912e-05,
      "loss": 0.2255,
      "step": 144
    },
    {
      "epoch": 0.137669119392357,
      "grad_norm": 0.10184327512979507,
      "learning_rate": 9.177215189873418e-05,
      "loss": 0.1371,
      "step": 145
    },
    {
      "epoch": 0.1386185615950629,
      "grad_norm": 0.10998040437698364,
      "learning_rate": 9.240506329113925e-05,
      "loss": 0.1794,
      "step": 146
    },
    {
      "epoch": 0.13956800379776882,
      "grad_norm": 0.08974044770002365,
      "learning_rate": 9.303797468354431e-05,
      "loss": 0.144,
      "step": 147
    },
    {
      "epoch": 0.14051744600047472,
      "grad_norm": 0.12724193930625916,
      "learning_rate": 9.367088607594936e-05,
      "loss": 0.1794,
      "step": 148
    },
    {
      "epoch": 0.14146688820318062,
      "grad_norm": 0.1079091802239418,
      "learning_rate": 9.430379746835444e-05,
      "loss": 0.1399,
      "step": 149
    },
    {
      "epoch": 0.14241633040588655,
      "grad_norm": 0.09480807185173035,
      "learning_rate": 9.493670886075949e-05,
      "loss": 0.1395,
      "step": 150
    },
    {
      "epoch": 0.14336577260859246,
      "grad_norm": 0.08620745688676834,
      "learning_rate": 9.556962025316456e-05,
      "loss": 0.1415,
      "step": 151
    },
    {
      "epoch": 0.14431521481129836,
      "grad_norm": 0.10517002642154694,
      "learning_rate": 9.620253164556962e-05,
      "loss": 0.1723,
      "step": 152
    },
    {
      "epoch": 0.14526465701400426,
      "grad_norm": 0.0956311896443367,
      "learning_rate": 9.683544303797469e-05,
      "loss": 0.1515,
      "step": 153
    },
    {
      "epoch": 0.1462140992167102,
      "grad_norm": 0.08050324022769928,
      "learning_rate": 9.746835443037975e-05,
      "loss": 0.1322,
      "step": 154
    },
    {
      "epoch": 0.1471635414194161,
      "grad_norm": 0.0853201299905777,
      "learning_rate": 9.810126582278482e-05,
      "loss": 0.142,
      "step": 155
    },
    {
      "epoch": 0.148112983622122,
      "grad_norm": 0.09991180151700974,
      "learning_rate": 9.873417721518988e-05,
      "loss": 0.1348,
      "step": 156
    },
    {
      "epoch": 0.14906242582482793,
      "grad_norm": 0.08640603721141815,
      "learning_rate": 9.936708860759493e-05,
      "loss": 0.1397,
      "step": 157
    },
    {
      "epoch": 0.15001186802753383,
      "grad_norm": 0.09057717025279999,
      "learning_rate": 0.0001,
      "loss": 0.1381,
      "step": 158
    },
    {
      "epoch": 0.15096131023023973,
      "grad_norm": 0.09916041046380997,
      "learning_rate": 0.00010063291139240508,
      "loss": 0.1509,
      "step": 159
    },
    {
      "epoch": 0.15191075243294563,
      "grad_norm": 0.09434045851230621,
      "learning_rate": 0.00010126582278481013,
      "loss": 0.1388,
      "step": 160
    },
    {
      "epoch": 0.15286019463565156,
      "grad_norm": 0.1273377537727356,
      "learning_rate": 0.0001018987341772152,
      "loss": 0.1401,
      "step": 161
    },
    {
      "epoch": 0.15380963683835747,
      "grad_norm": 0.1297912299633026,
      "learning_rate": 0.00010253164556962025,
      "loss": 0.1852,
      "step": 162
    },
    {
      "epoch": 0.15475907904106337,
      "grad_norm": 0.1151595488190651,
      "learning_rate": 0.00010316455696202532,
      "loss": 0.1848,
      "step": 163
    },
    {
      "epoch": 0.1557085212437693,
      "grad_norm": 0.13381290435791016,
      "learning_rate": 0.00010379746835443039,
      "loss": 0.1438,
      "step": 164
    },
    {
      "epoch": 0.1566579634464752,
      "grad_norm": 0.07880119979381561,
      "learning_rate": 0.00010443037974683545,
      "loss": 0.1327,
      "step": 165
    },
    {
      "epoch": 0.1576074056491811,
      "grad_norm": 0.0843740776181221,
      "learning_rate": 0.00010506329113924052,
      "loss": 0.1398,
      "step": 166
    },
    {
      "epoch": 0.158556847851887,
      "grad_norm": 0.0981813594698906,
      "learning_rate": 0.00010569620253164559,
      "loss": 0.1409,
      "step": 167
    },
    {
      "epoch": 0.15950629005459294,
      "grad_norm": 0.10005304962396622,
      "learning_rate": 0.00010632911392405063,
      "loss": 0.1783,
      "step": 168
    },
    {
      "epoch": 0.16045573225729884,
      "grad_norm": 0.08365727961063385,
      "learning_rate": 0.00010696202531645569,
      "loss": 0.1275,
      "step": 169
    },
    {
      "epoch": 0.16140517446000474,
      "grad_norm": 0.1017635315656662,
      "learning_rate": 0.00010759493670886076,
      "loss": 0.1792,
      "step": 170
    },
    {
      "epoch": 0.16235461666271067,
      "grad_norm": 0.07007888704538345,
      "learning_rate": 0.00010822784810126583,
      "loss": 0.1473,
      "step": 171
    },
    {
      "epoch": 0.16330405886541657,
      "grad_norm": 0.07718679308891296,
      "learning_rate": 0.00010886075949367089,
      "loss": 0.1396,
      "step": 172
    },
    {
      "epoch": 0.16425350106812248,
      "grad_norm": 0.07228100299835205,
      "learning_rate": 0.00010949367088607596,
      "loss": 0.1398,
      "step": 173
    },
    {
      "epoch": 0.16520294327082838,
      "grad_norm": 0.07955378293991089,
      "learning_rate": 0.00011012658227848103,
      "loss": 0.1402,
      "step": 174
    },
    {
      "epoch": 0.1661523854735343,
      "grad_norm": 0.0816427692770958,
      "learning_rate": 0.00011075949367088607,
      "loss": 0.1345,
      "step": 175
    },
    {
      "epoch": 0.1671018276762402,
      "grad_norm": 0.07641757279634476,
      "learning_rate": 0.00011139240506329114,
      "loss": 0.1373,
      "step": 176
    },
    {
      "epoch": 0.1680512698789461,
      "grad_norm": 0.07354450225830078,
      "learning_rate": 0.0001120253164556962,
      "loss": 0.1394,
      "step": 177
    },
    {
      "epoch": 0.16900071208165202,
      "grad_norm": 0.08322398364543915,
      "learning_rate": 0.00011265822784810127,
      "loss": 0.138,
      "step": 178
    },
    {
      "epoch": 0.16995015428435795,
      "grad_norm": 0.13528607785701752,
      "learning_rate": 0.00011329113924050634,
      "loss": 0.2188,
      "step": 179
    },
    {
      "epoch": 0.17089959648706385,
      "grad_norm": 0.10803692042827606,
      "learning_rate": 0.0001139240506329114,
      "loss": 0.1782,
      "step": 180
    },
    {
      "epoch": 0.17184903868976975,
      "grad_norm": 0.08404573053121567,
      "learning_rate": 0.00011455696202531647,
      "loss": 0.1394,
      "step": 181
    },
    {
      "epoch": 0.17279848089247568,
      "grad_norm": 0.12790893018245697,
      "learning_rate": 0.00011518987341772151,
      "loss": 0.2157,
      "step": 182
    },
    {
      "epoch": 0.17374792309518158,
      "grad_norm": 0.09879907220602036,
      "learning_rate": 0.00011582278481012658,
      "loss": 0.1693,
      "step": 183
    },
    {
      "epoch": 0.17469736529788749,
      "grad_norm": 0.08092228323221207,
      "learning_rate": 0.00011645569620253166,
      "loss": 0.136,
      "step": 184
    },
    {
      "epoch": 0.1756468075005934,
      "grad_norm": 0.07660632580518723,
      "learning_rate": 0.00011708860759493671,
      "loss": 0.1332,
      "step": 185
    },
    {
      "epoch": 0.17659624970329932,
      "grad_norm": 0.07474201172590256,
      "learning_rate": 0.00011772151898734178,
      "loss": 0.1301,
      "step": 186
    },
    {
      "epoch": 0.17754569190600522,
      "grad_norm": 0.09162931889295578,
      "learning_rate": 0.00011835443037974685,
      "loss": 0.1407,
      "step": 187
    },
    {
      "epoch": 0.17849513410871112,
      "grad_norm": 0.08646775782108307,
      "learning_rate": 0.0001189873417721519,
      "loss": 0.139,
      "step": 188
    },
    {
      "epoch": 0.17944457631141705,
      "grad_norm": 0.0759253203868866,
      "learning_rate": 0.00011962025316455696,
      "loss": 0.1342,
      "step": 189
    },
    {
      "epoch": 0.18039401851412296,
      "grad_norm": 0.08292865008115768,
      "learning_rate": 0.00012025316455696203,
      "loss": 0.1389,
      "step": 190
    },
    {
      "epoch": 0.18134346071682886,
      "grad_norm": 0.12379574030637741,
      "learning_rate": 0.0001208860759493671,
      "loss": 0.1795,
      "step": 191
    },
    {
      "epoch": 0.18229290291953476,
      "grad_norm": 0.10240278393030167,
      "learning_rate": 0.00012151898734177217,
      "loss": 0.1721,
      "step": 192
    },
    {
      "epoch": 0.1832423451222407,
      "grad_norm": 0.09666036069393158,
      "learning_rate": 0.00012215189873417722,
      "loss": 0.1783,
      "step": 193
    },
    {
      "epoch": 0.1841917873249466,
      "grad_norm": 0.08314768224954605,
      "learning_rate": 0.0001227848101265823,
      "loss": 0.1429,
      "step": 194
    },
    {
      "epoch": 0.1851412295276525,
      "grad_norm": 0.07590368390083313,
      "learning_rate": 0.00012341772151898734,
      "loss": 0.1393,
      "step": 195
    },
    {
      "epoch": 0.18609067173035843,
      "grad_norm": 0.10585250705480576,
      "learning_rate": 0.0001240506329113924,
      "loss": 0.2155,
      "step": 196
    },
    {
      "epoch": 0.18704011393306433,
      "grad_norm": 0.06995555013418198,
      "learning_rate": 0.00012468354430379748,
      "loss": 0.1374,
      "step": 197
    },
    {
      "epoch": 0.18798955613577023,
      "grad_norm": 0.07370735704898834,
      "learning_rate": 0.00012531645569620255,
      "loss": 0.1367,
      "step": 198
    },
    {
      "epoch": 0.18893899833847613,
      "grad_norm": 0.07194443792104721,
      "learning_rate": 0.0001259493670886076,
      "loss": 0.1437,
      "step": 199
    },
    {
      "epoch": 0.18988844054118206,
      "grad_norm": 0.06982647627592087,
      "learning_rate": 0.00012658227848101267,
      "loss": 0.1358,
      "step": 200
    },
    {
      "epoch": 0.19083788274388797,
      "grad_norm": 0.06538347154855728,
      "learning_rate": 0.0001272151898734177,
      "loss": 0.1354,
      "step": 201
    },
    {
      "epoch": 0.19178732494659387,
      "grad_norm": 0.07789324969053268,
      "learning_rate": 0.00012784810126582278,
      "loss": 0.178,
      "step": 202
    },
    {
      "epoch": 0.1927367671492998,
      "grad_norm": 0.07376820594072342,
      "learning_rate": 0.00012848101265822785,
      "loss": 0.1621,
      "step": 203
    },
    {
      "epoch": 0.1936862093520057,
      "grad_norm": 0.0720745250582695,
      "learning_rate": 0.00012911392405063292,
      "loss": 0.132,
      "step": 204
    },
    {
      "epoch": 0.1946356515547116,
      "grad_norm": 0.06211116537451744,
      "learning_rate": 0.000129746835443038,
      "loss": 0.1387,
      "step": 205
    },
    {
      "epoch": 0.1955850937574175,
      "grad_norm": 0.06701771914958954,
      "learning_rate": 0.00013037974683544306,
      "loss": 0.14,
      "step": 206
    },
    {
      "epoch": 0.19653453596012344,
      "grad_norm": 0.07692532986402512,
      "learning_rate": 0.0001310126582278481,
      "loss": 0.1322,
      "step": 207
    },
    {
      "epoch": 0.19748397816282934,
      "grad_norm": 0.07763269543647766,
      "learning_rate": 0.00013164556962025315,
      "loss": 0.1393,
      "step": 208
    },
    {
      "epoch": 0.19843342036553524,
      "grad_norm": 0.08769022673368454,
      "learning_rate": 0.00013227848101265822,
      "loss": 0.1489,
      "step": 209
    },
    {
      "epoch": 0.19938286256824117,
      "grad_norm": 0.08881859481334686,
      "learning_rate": 0.0001329113924050633,
      "loss": 0.1765,
      "step": 210
    },
    {
      "epoch": 0.20033230477094707,
      "grad_norm": 0.06811822950839996,
      "learning_rate": 0.00013354430379746836,
      "loss": 0.1332,
      "step": 211
    },
    {
      "epoch": 0.20128174697365298,
      "grad_norm": 0.06390922516584396,
      "learning_rate": 0.00013417721518987343,
      "loss": 0.1343,
      "step": 212
    },
    {
      "epoch": 0.20223118917635888,
      "grad_norm": 0.06630406528711319,
      "learning_rate": 0.0001348101265822785,
      "loss": 0.1329,
      "step": 213
    },
    {
      "epoch": 0.2031806313790648,
      "grad_norm": 0.0730772465467453,
      "learning_rate": 0.00013544303797468355,
      "loss": 0.1354,
      "step": 214
    },
    {
      "epoch": 0.2041300735817707,
      "grad_norm": 0.06487323343753815,
      "learning_rate": 0.00013607594936708862,
      "loss": 0.1297,
      "step": 215
    },
    {
      "epoch": 0.2050795157844766,
      "grad_norm": 0.06967955082654953,
      "learning_rate": 0.00013670886075949366,
      "loss": 0.1398,
      "step": 216
    },
    {
      "epoch": 0.20602895798718254,
      "grad_norm": 0.08531820774078369,
      "learning_rate": 0.00013734177215189873,
      "loss": 0.1336,
      "step": 217
    },
    {
      "epoch": 0.20697840018988845,
      "grad_norm": 0.0757659375667572,
      "learning_rate": 0.0001379746835443038,
      "loss": 0.1606,
      "step": 218
    },
    {
      "epoch": 0.20792784239259435,
      "grad_norm": 0.060206469148397446,
      "learning_rate": 0.00013860759493670888,
      "loss": 0.1337,
      "step": 219
    },
    {
      "epoch": 0.20887728459530025,
      "grad_norm": 0.07996556162834167,
      "learning_rate": 0.00013924050632911395,
      "loss": 0.1308,
      "step": 220
    },
    {
      "epoch": 0.20982672679800618,
      "grad_norm": 0.06206861138343811,
      "learning_rate": 0.000139873417721519,
      "loss": 0.1347,
      "step": 221
    },
    {
      "epoch": 0.21077616900071208,
      "grad_norm": 0.08736416697502136,
      "learning_rate": 0.00014050632911392406,
      "loss": 0.1768,
      "step": 222
    },
    {
      "epoch": 0.21172561120341798,
      "grad_norm": 0.06427916139364243,
      "learning_rate": 0.00014113924050632913,
      "loss": 0.1374,
      "step": 223
    },
    {
      "epoch": 0.21267505340612392,
      "grad_norm": 0.10996536910533905,
      "learning_rate": 0.00014177215189873418,
      "loss": 0.222,
      "step": 224
    },
    {
      "epoch": 0.21362449560882982,
      "grad_norm": 0.08439125120639801,
      "learning_rate": 0.00014240506329113925,
      "loss": 0.1854,
      "step": 225
    },
    {
      "epoch": 0.21457393781153572,
      "grad_norm": 0.06892693787813187,
      "learning_rate": 0.00014303797468354432,
      "loss": 0.139,
      "step": 226
    },
    {
      "epoch": 0.21552338001424162,
      "grad_norm": 0.08241122961044312,
      "learning_rate": 0.0001436708860759494,
      "loss": 0.173,
      "step": 227
    },
    {
      "epoch": 0.21647282221694755,
      "grad_norm": 0.07911046594381332,
      "learning_rate": 0.00014430379746835443,
      "loss": 0.1418,
      "step": 228
    },
    {
      "epoch": 0.21742226441965345,
      "grad_norm": 0.06346064805984497,
      "learning_rate": 0.0001449367088607595,
      "loss": 0.1406,
      "step": 229
    },
    {
      "epoch": 0.21837170662235936,
      "grad_norm": 0.060393668711185455,
      "learning_rate": 0.00014556962025316457,
      "loss": 0.1417,
      "step": 230
    },
    {
      "epoch": 0.2193211488250653,
      "grad_norm": 0.05912507325410843,
      "learning_rate": 0.00014620253164556962,
      "loss": 0.1298,
      "step": 231
    },
    {
      "epoch": 0.2202705910277712,
      "grad_norm": 0.07730337232351303,
      "learning_rate": 0.0001468354430379747,
      "loss": 0.1769,
      "step": 232
    },
    {
      "epoch": 0.2212200332304771,
      "grad_norm": 0.07612381875514984,
      "learning_rate": 0.00014746835443037976,
      "loss": 0.1338,
      "step": 233
    },
    {
      "epoch": 0.222169475433183,
      "grad_norm": 0.055311791598796844,
      "learning_rate": 0.0001481012658227848,
      "loss": 0.1313,
      "step": 234
    },
    {
      "epoch": 0.22311891763588892,
      "grad_norm": 0.08492033183574677,
      "learning_rate": 0.00014873417721518987,
      "loss": 0.1367,
      "step": 235
    },
    {
      "epoch": 0.22406835983859483,
      "grad_norm": 0.07133237272500992,
      "learning_rate": 0.00014936708860759494,
      "loss": 0.1308,
      "step": 236
    },
    {
      "epoch": 0.22501780204130073,
      "grad_norm": 0.07148605585098267,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.133,
      "step": 237
    },
    {
      "epoch": 0.22596724424400666,
      "grad_norm": 0.06900472939014435,
      "learning_rate": 0.00015063291139240508,
      "loss": 0.138,
      "step": 238
    },
    {
      "epoch": 0.22691668644671256,
      "grad_norm": 0.062325432896614075,
      "learning_rate": 0.00015126582278481013,
      "loss": 0.1338,
      "step": 239
    },
    {
      "epoch": 0.22786612864941846,
      "grad_norm": 0.06719667464494705,
      "learning_rate": 0.0001518987341772152,
      "loss": 0.1316,
      "step": 240
    },
    {
      "epoch": 0.22881557085212437,
      "grad_norm": 0.07456009089946747,
      "learning_rate": 0.00015253164556962024,
      "loss": 0.1412,
      "step": 241
    },
    {
      "epoch": 0.2297650130548303,
      "grad_norm": 0.05619575083255768,
      "learning_rate": 0.00015316455696202531,
      "loss": 0.1342,
      "step": 242
    },
    {
      "epoch": 0.2307144552575362,
      "grad_norm": 0.06157098710536957,
      "learning_rate": 0.00015379746835443038,
      "loss": 0.1329,
      "step": 243
    },
    {
      "epoch": 0.2316638974602421,
      "grad_norm": 0.06759827584028244,
      "learning_rate": 0.00015443037974683546,
      "loss": 0.1411,
      "step": 244
    },
    {
      "epoch": 0.232613339662948,
      "grad_norm": 0.06892479956150055,
      "learning_rate": 0.00015506329113924053,
      "loss": 0.1484,
      "step": 245
    },
    {
      "epoch": 0.23356278186565393,
      "grad_norm": 0.08536699414253235,
      "learning_rate": 0.0001556962025316456,
      "loss": 0.1855,
      "step": 246
    },
    {
      "epoch": 0.23451222406835984,
      "grad_norm": 0.06800314784049988,
      "learning_rate": 0.00015632911392405064,
      "loss": 0.1379,
      "step": 247
    },
    {
      "epoch": 0.23546166627106574,
      "grad_norm": 0.0625622496008873,
      "learning_rate": 0.00015696202531645568,
      "loss": 0.1344,
      "step": 248
    },
    {
      "epoch": 0.23641110847377167,
      "grad_norm": 0.06030593812465668,
      "learning_rate": 0.00015759493670886075,
      "loss": 0.1254,
      "step": 249
    },
    {
      "epoch": 0.23736055067647757,
      "grad_norm": 0.06694353371858597,
      "learning_rate": 0.00015822784810126583,
      "loss": 0.1413,
      "step": 250
    },
    {
      "epoch": 0.23830999287918347,
      "grad_norm": 0.06594134122133255,
      "learning_rate": 0.0001588607594936709,
      "loss": 0.1394,
      "step": 251
    },
    {
      "epoch": 0.23925943508188938,
      "grad_norm": 0.09062930941581726,
      "learning_rate": 0.00015949367088607597,
      "loss": 0.1883,
      "step": 252
    },
    {
      "epoch": 0.2402088772845953,
      "grad_norm": 0.06029089167714119,
      "learning_rate": 0.00016012658227848104,
      "loss": 0.1271,
      "step": 253
    },
    {
      "epoch": 0.2411583194873012,
      "grad_norm": 0.08471622318029404,
      "learning_rate": 0.00016075949367088608,
      "loss": 0.172,
      "step": 254
    },
    {
      "epoch": 0.2421077616900071,
      "grad_norm": 0.061710160225629807,
      "learning_rate": 0.00016139240506329115,
      "loss": 0.1348,
      "step": 255
    },
    {
      "epoch": 0.24305720389271304,
      "grad_norm": 0.0812671035528183,
      "learning_rate": 0.0001620253164556962,
      "loss": 0.1312,
      "step": 256
    },
    {
      "epoch": 0.24400664609541894,
      "grad_norm": 0.06917005032300949,
      "learning_rate": 0.00016265822784810127,
      "loss": 0.1464,
      "step": 257
    },
    {
      "epoch": 0.24495608829812485,
      "grad_norm": 0.0905887708067894,
      "learning_rate": 0.00016329113924050634,
      "loss": 0.1759,
      "step": 258
    },
    {
      "epoch": 0.24590553050083075,
      "grad_norm": 0.05976787209510803,
      "learning_rate": 0.0001639240506329114,
      "loss": 0.1404,
      "step": 259
    },
    {
      "epoch": 0.24685497270353668,
      "grad_norm": 0.07545675337314606,
      "learning_rate": 0.00016455696202531648,
      "loss": 0.1322,
      "step": 260
    },
    {
      "epoch": 0.24780441490624258,
      "grad_norm": 0.07035024464130402,
      "learning_rate": 0.00016518987341772152,
      "loss": 0.1378,
      "step": 261
    },
    {
      "epoch": 0.24875385710894848,
      "grad_norm": 0.07665737718343735,
      "learning_rate": 0.0001658227848101266,
      "loss": 0.1827,
      "step": 262
    },
    {
      "epoch": 0.24970329931165441,
      "grad_norm": 0.06619013845920563,
      "learning_rate": 0.00016645569620253166,
      "loss": 0.1284,
      "step": 263
    },
    {
      "epoch": 0.2506527415143603,
      "grad_norm": 0.0647001713514328,
      "learning_rate": 0.0001670886075949367,
      "loss": 0.133,
      "step": 264
    },
    {
      "epoch": 0.2516021837170662,
      "grad_norm": 0.060702718794345856,
      "learning_rate": 0.00016772151898734178,
      "loss": 0.1335,
      "step": 265
    },
    {
      "epoch": 0.25255162591977215,
      "grad_norm": 0.0508468896150589,
      "learning_rate": 0.00016835443037974685,
      "loss": 0.1333,
      "step": 266
    },
    {
      "epoch": 0.253501068122478,
      "grad_norm": 0.09877864271402359,
      "learning_rate": 0.0001689873417721519,
      "loss": 0.2031,
      "step": 267
    },
    {
      "epoch": 0.25445051032518395,
      "grad_norm": 0.06673337519168854,
      "learning_rate": 0.00016962025316455696,
      "loss": 0.1356,
      "step": 268
    },
    {
      "epoch": 0.2553999525278899,
      "grad_norm": 0.10604165494441986,
      "learning_rate": 0.00017025316455696204,
      "loss": 0.2517,
      "step": 269
    },
    {
      "epoch": 0.25634939473059576,
      "grad_norm": 0.07689858227968216,
      "learning_rate": 0.0001708860759493671,
      "loss": 0.1761,
      "step": 270
    },
    {
      "epoch": 0.2572988369333017,
      "grad_norm": 0.05482449755072594,
      "learning_rate": 0.00017151898734177218,
      "loss": 0.131,
      "step": 271
    },
    {
      "epoch": 0.2582482791360076,
      "grad_norm": 0.08622145652770996,
      "learning_rate": 0.00017215189873417722,
      "loss": 0.1335,
      "step": 272
    },
    {
      "epoch": 0.2591977213387135,
      "grad_norm": 0.0748213455080986,
      "learning_rate": 0.0001727848101265823,
      "loss": 0.176,
      "step": 273
    },
    {
      "epoch": 0.2601471635414194,
      "grad_norm": 0.06163305416703224,
      "learning_rate": 0.00017341772151898733,
      "loss": 0.1381,
      "step": 274
    },
    {
      "epoch": 0.26109660574412535,
      "grad_norm": 0.06141841039061546,
      "learning_rate": 0.0001740506329113924,
      "loss": 0.1353,
      "step": 275
    },
    {
      "epoch": 0.26204604794683123,
      "grad_norm": 0.07326913625001907,
      "learning_rate": 0.00017468354430379748,
      "loss": 0.1441,
      "step": 276
    },
    {
      "epoch": 0.26299549014953716,
      "grad_norm": 0.05951124057173729,
      "learning_rate": 0.00017531645569620255,
      "loss": 0.1321,
      "step": 277
    },
    {
      "epoch": 0.26394493235224303,
      "grad_norm": 0.08364073932170868,
      "learning_rate": 0.00017594936708860762,
      "loss": 0.187,
      "step": 278
    },
    {
      "epoch": 0.26489437455494896,
      "grad_norm": 0.05849132314324379,
      "learning_rate": 0.00017658227848101266,
      "loss": 0.1393,
      "step": 279
    },
    {
      "epoch": 0.2658438167576549,
      "grad_norm": 0.05452360957860947,
      "learning_rate": 0.00017721518987341773,
      "loss": 0.1342,
      "step": 280
    },
    {
      "epoch": 0.26679325896036077,
      "grad_norm": 0.04878188297152519,
      "learning_rate": 0.00017784810126582278,
      "loss": 0.1445,
      "step": 281
    },
    {
      "epoch": 0.2677427011630667,
      "grad_norm": 0.06066753342747688,
      "learning_rate": 0.00017848101265822785,
      "loss": 0.1423,
      "step": 282
    },
    {
      "epoch": 0.26869214336577263,
      "grad_norm": 0.04918207973241806,
      "learning_rate": 0.00017911392405063292,
      "loss": 0.1316,
      "step": 283
    },
    {
      "epoch": 0.2696415855684785,
      "grad_norm": 0.05103525519371033,
      "learning_rate": 0.000179746835443038,
      "loss": 0.1313,
      "step": 284
    },
    {
      "epoch": 0.27059102777118443,
      "grad_norm": 0.05667628347873688,
      "learning_rate": 0.00018037974683544306,
      "loss": 0.1434,
      "step": 285
    },
    {
      "epoch": 0.27154046997389036,
      "grad_norm": 0.06226016581058502,
      "learning_rate": 0.00018101265822784813,
      "loss": 0.1357,
      "step": 286
    },
    {
      "epoch": 0.27248991217659624,
      "grad_norm": 0.04695293679833412,
      "learning_rate": 0.00018164556962025317,
      "loss": 0.1314,
      "step": 287
    },
    {
      "epoch": 0.27343935437930217,
      "grad_norm": 0.05762844532728195,
      "learning_rate": 0.00018227848101265824,
      "loss": 0.1349,
      "step": 288
    },
    {
      "epoch": 0.27438879658200804,
      "grad_norm": 0.05454534292221069,
      "learning_rate": 0.0001829113924050633,
      "loss": 0.1432,
      "step": 289
    },
    {
      "epoch": 0.275338238784714,
      "grad_norm": 0.050270579755306244,
      "learning_rate": 0.00018354430379746836,
      "loss": 0.1272,
      "step": 290
    },
    {
      "epoch": 0.2762876809874199,
      "grad_norm": 0.0688452497124672,
      "learning_rate": 0.00018417721518987343,
      "loss": 0.1708,
      "step": 291
    },
    {
      "epoch": 0.2772371231901258,
      "grad_norm": 0.06213200092315674,
      "learning_rate": 0.0001848101265822785,
      "loss": 0.1674,
      "step": 292
    },
    {
      "epoch": 0.2781865653928317,
      "grad_norm": 0.059717319905757904,
      "learning_rate": 0.00018544303797468354,
      "loss": 0.169,
      "step": 293
    },
    {
      "epoch": 0.27913600759553764,
      "grad_norm": 0.06223325803875923,
      "learning_rate": 0.00018607594936708861,
      "loss": 0.1369,
      "step": 294
    },
    {
      "epoch": 0.2800854497982435,
      "grad_norm": 0.053163208067417145,
      "learning_rate": 0.00018670886075949369,
      "loss": 0.133,
      "step": 295
    },
    {
      "epoch": 0.28103489200094944,
      "grad_norm": 0.06647945195436478,
      "learning_rate": 0.00018734177215189873,
      "loss": 0.1438,
      "step": 296
    },
    {
      "epoch": 0.2819843342036554,
      "grad_norm": 0.0588272288441658,
      "learning_rate": 0.0001879746835443038,
      "loss": 0.1338,
      "step": 297
    },
    {
      "epoch": 0.28293377640636125,
      "grad_norm": 0.05841274932026863,
      "learning_rate": 0.00018860759493670887,
      "loss": 0.1329,
      "step": 298
    },
    {
      "epoch": 0.2838832186090672,
      "grad_norm": 0.09033369272947311,
      "learning_rate": 0.00018924050632911394,
      "loss": 0.1747,
      "step": 299
    },
    {
      "epoch": 0.2848326608117731,
      "grad_norm": 0.052215326577425,
      "learning_rate": 0.00018987341772151899,
      "loss": 0.1296,
      "step": 300
    },
    {
      "epoch": 0.285782103014479,
      "grad_norm": 0.05880101025104523,
      "learning_rate": 0.00019050632911392406,
      "loss": 0.1287,
      "step": 301
    },
    {
      "epoch": 0.2867315452171849,
      "grad_norm": 0.0691700354218483,
      "learning_rate": 0.00019113924050632913,
      "loss": 0.1676,
      "step": 302
    },
    {
      "epoch": 0.2876809874198908,
      "grad_norm": 0.057025909423828125,
      "learning_rate": 0.0001917721518987342,
      "loss": 0.1346,
      "step": 303
    },
    {
      "epoch": 0.2886304296225967,
      "grad_norm": 0.04936329275369644,
      "learning_rate": 0.00019240506329113924,
      "loss": 0.1354,
      "step": 304
    },
    {
      "epoch": 0.28957987182530265,
      "grad_norm": 0.0680055245757103,
      "learning_rate": 0.0001930379746835443,
      "loss": 0.1344,
      "step": 305
    },
    {
      "epoch": 0.2905293140280085,
      "grad_norm": 0.07374466210603714,
      "learning_rate": 0.00019367088607594938,
      "loss": 0.1428,
      "step": 306
    },
    {
      "epoch": 0.29147875623071445,
      "grad_norm": 0.061204761266708374,
      "learning_rate": 0.00019430379746835443,
      "loss": 0.1246,
      "step": 307
    },
    {
      "epoch": 0.2924281984334204,
      "grad_norm": 0.053467705845832825,
      "learning_rate": 0.0001949367088607595,
      "loss": 0.1342,
      "step": 308
    },
    {
      "epoch": 0.29337764063612626,
      "grad_norm": 0.057525087147951126,
      "learning_rate": 0.00019556962025316457,
      "loss": 0.1377,
      "step": 309
    },
    {
      "epoch": 0.2943270828388322,
      "grad_norm": 0.07857844978570938,
      "learning_rate": 0.00019620253164556964,
      "loss": 0.2076,
      "step": 310
    },
    {
      "epoch": 0.2952765250415381,
      "grad_norm": 0.05250545218586922,
      "learning_rate": 0.0001968354430379747,
      "loss": 0.1432,
      "step": 311
    },
    {
      "epoch": 0.296225967244244,
      "grad_norm": 0.07495012134313583,
      "learning_rate": 0.00019746835443037975,
      "loss": 0.1766,
      "step": 312
    },
    {
      "epoch": 0.2971754094469499,
      "grad_norm": 0.04692578688263893,
      "learning_rate": 0.0001981012658227848,
      "loss": 0.1408,
      "step": 313
    },
    {
      "epoch": 0.29812485164965585,
      "grad_norm": 0.055666085332632065,
      "learning_rate": 0.00019873417721518987,
      "loss": 0.1391,
      "step": 314
    },
    {
      "epoch": 0.29907429385236173,
      "grad_norm": 0.050465911626815796,
      "learning_rate": 0.00019936708860759494,
      "loss": 0.1415,
      "step": 315
    },
    {
      "epoch": 0.30002373605506766,
      "grad_norm": 0.051260240375995636,
      "learning_rate": 0.0002,
      "loss": 0.1423,
      "step": 316
    },
    {
      "epoch": 0.30097317825777353,
      "grad_norm": 0.0503215529024601,
      "learning_rate": 0.000199999938945738,
      "loss": 0.1348,
      "step": 317
    },
    {
      "epoch": 0.30192262046047946,
      "grad_norm": 0.04917483776807785,
      "learning_rate": 0.0001999997557830265,
      "loss": 0.1342,
      "step": 318
    },
    {
      "epoch": 0.3028720626631854,
      "grad_norm": 0.06354209035634995,
      "learning_rate": 0.00019999945051208916,
      "loss": 0.1365,
      "step": 319
    },
    {
      "epoch": 0.30382150486589127,
      "grad_norm": 0.04878314957022667,
      "learning_rate": 0.0001999990231332988,
      "loss": 0.13,
      "step": 320
    },
    {
      "epoch": 0.3047709470685972,
      "grad_norm": 0.07046223431825638,
      "learning_rate": 0.0001999984736471772,
      "loss": 0.1394,
      "step": 321
    },
    {
      "epoch": 0.30572038927130313,
      "grad_norm": 0.04456232488155365,
      "learning_rate": 0.00019999780205439538,
      "loss": 0.1278,
      "step": 322
    },
    {
      "epoch": 0.306669831474009,
      "grad_norm": 0.06280628591775894,
      "learning_rate": 0.00019999700835577342,
      "loss": 0.1715,
      "step": 323
    },
    {
      "epoch": 0.30761927367671493,
      "grad_norm": 0.07462131977081299,
      "learning_rate": 0.00019999609255228046,
      "loss": 0.1772,
      "step": 324
    },
    {
      "epoch": 0.30856871587942086,
      "grad_norm": 0.059642352163791656,
      "learning_rate": 0.00019999505464503482,
      "loss": 0.1294,
      "step": 325
    },
    {
      "epoch": 0.30951815808212674,
      "grad_norm": 0.06458820402622223,
      "learning_rate": 0.00019999389463530383,
      "loss": 0.173,
      "step": 326
    },
    {
      "epoch": 0.31046760028483267,
      "grad_norm": 0.05901939421892166,
      "learning_rate": 0.00019999261252450396,
      "loss": 0.1419,
      "step": 327
    },
    {
      "epoch": 0.3114170424875386,
      "grad_norm": 0.055540215224027634,
      "learning_rate": 0.00019999120831420083,
      "loss": 0.1314,
      "step": 328
    },
    {
      "epoch": 0.3123664846902445,
      "grad_norm": 0.0546739287674427,
      "learning_rate": 0.00019998968200610903,
      "loss": 0.1354,
      "step": 329
    },
    {
      "epoch": 0.3133159268929504,
      "grad_norm": 0.0689477026462555,
      "learning_rate": 0.00019998803360209234,
      "loss": 0.132,
      "step": 330
    },
    {
      "epoch": 0.3142653690956563,
      "grad_norm": 0.05279696360230446,
      "learning_rate": 0.00019998626310416365,
      "loss": 0.1424,
      "step": 331
    },
    {
      "epoch": 0.3152148112983622,
      "grad_norm": 0.055384278297424316,
      "learning_rate": 0.00019998437051448482,
      "loss": 0.141,
      "step": 332
    },
    {
      "epoch": 0.31616425350106814,
      "grad_norm": 0.04636182263493538,
      "learning_rate": 0.0001999823558353669,
      "loss": 0.1414,
      "step": 333
    },
    {
      "epoch": 0.317113695703774,
      "grad_norm": 0.04795726016163826,
      "learning_rate": 0.00019998021906926993,
      "loss": 0.1255,
      "step": 334
    },
    {
      "epoch": 0.31806313790647994,
      "grad_norm": 0.05326540395617485,
      "learning_rate": 0.00019997796021880318,
      "loss": 0.1309,
      "step": 335
    },
    {
      "epoch": 0.3190125801091859,
      "grad_norm": 0.0684736892580986,
      "learning_rate": 0.00019997557928672484,
      "loss": 0.1825,
      "step": 336
    },
    {
      "epoch": 0.31996202231189175,
      "grad_norm": 0.042282164096832275,
      "learning_rate": 0.0001999730762759422,
      "loss": 0.12,
      "step": 337
    },
    {
      "epoch": 0.3209114645145977,
      "grad_norm": 0.05297423154115677,
      "learning_rate": 0.00019997045118951175,
      "loss": 0.1309,
      "step": 338
    },
    {
      "epoch": 0.3218609067173036,
      "grad_norm": 0.080621138215065,
      "learning_rate": 0.00019996770403063883,
      "loss": 0.2134,
      "step": 339
    },
    {
      "epoch": 0.3228103489200095,
      "grad_norm": 0.05552308261394501,
      "learning_rate": 0.00019996483480267803,
      "loss": 0.1361,
      "step": 340
    },
    {
      "epoch": 0.3237597911227154,
      "grad_norm": 0.05070111155509949,
      "learning_rate": 0.00019996184350913287,
      "loss": 0.1314,
      "step": 341
    },
    {
      "epoch": 0.32470923332542134,
      "grad_norm": 0.04412266984581947,
      "learning_rate": 0.00019995873015365601,
      "loss": 0.1299,
      "step": 342
    },
    {
      "epoch": 0.3256586755281272,
      "grad_norm": 0.0445338599383831,
      "learning_rate": 0.00019995549474004917,
      "loss": 0.1313,
      "step": 343
    },
    {
      "epoch": 0.32660811773083315,
      "grad_norm": 0.08224980533123016,
      "learning_rate": 0.000199952137272263,
      "loss": 0.1844,
      "step": 344
    },
    {
      "epoch": 0.327557559933539,
      "grad_norm": 0.04331446811556816,
      "learning_rate": 0.0001999486577543972,
      "loss": 0.133,
      "step": 345
    },
    {
      "epoch": 0.32850700213624495,
      "grad_norm": 0.049314577132463455,
      "learning_rate": 0.00019994505619070068,
      "loss": 0.1351,
      "step": 346
    },
    {
      "epoch": 0.3294564443389509,
      "grad_norm": 0.0697011798620224,
      "learning_rate": 0.00019994133258557117,
      "loss": 0.1709,
      "step": 347
    },
    {
      "epoch": 0.33040588654165676,
      "grad_norm": 0.0510990135371685,
      "learning_rate": 0.00019993748694355557,
      "loss": 0.1365,
      "step": 348
    },
    {
      "epoch": 0.3313553287443627,
      "grad_norm": 0.05100785568356514,
      "learning_rate": 0.00019993351926934967,
      "loss": 0.1302,
      "step": 349
    },
    {
      "epoch": 0.3323047709470686,
      "grad_norm": 0.08001980185508728,
      "learning_rate": 0.00019992942956779838,
      "loss": 0.1736,
      "step": 350
    },
    {
      "epoch": 0.3332542131497745,
      "grad_norm": 0.05298507958650589,
      "learning_rate": 0.00019992521784389559,
      "loss": 0.159,
      "step": 351
    },
    {
      "epoch": 0.3342036553524804,
      "grad_norm": 0.04655485600233078,
      "learning_rate": 0.00019992088410278414,
      "loss": 0.1401,
      "step": 352
    },
    {
      "epoch": 0.33515309755518635,
      "grad_norm": 0.047509439289569855,
      "learning_rate": 0.00019991642834975594,
      "loss": 0.1369,
      "step": 353
    },
    {
      "epoch": 0.3361025397578922,
      "grad_norm": 0.046006906777620316,
      "learning_rate": 0.0001999118505902518,
      "loss": 0.1384,
      "step": 354
    },
    {
      "epoch": 0.33705198196059816,
      "grad_norm": 0.07522892951965332,
      "learning_rate": 0.00019990715082986155,
      "loss": 0.2254,
      "step": 355
    },
    {
      "epoch": 0.33800142416330403,
      "grad_norm": 0.048646144568920135,
      "learning_rate": 0.00019990232907432404,
      "loss": 0.1355,
      "step": 356
    },
    {
      "epoch": 0.33895086636600996,
      "grad_norm": 0.03941798582673073,
      "learning_rate": 0.000199897385329527,
      "loss": 0.1242,
      "step": 357
    },
    {
      "epoch": 0.3399003085687159,
      "grad_norm": 0.04582727700471878,
      "learning_rate": 0.0001998923196015072,
      "loss": 0.1347,
      "step": 358
    },
    {
      "epoch": 0.34084975077142177,
      "grad_norm": 0.05890033766627312,
      "learning_rate": 0.00019988713189645027,
      "loss": 0.1356,
      "step": 359
    },
    {
      "epoch": 0.3417991929741277,
      "grad_norm": 0.050398606806993484,
      "learning_rate": 0.00019988182222069093,
      "loss": 0.1379,
      "step": 360
    },
    {
      "epoch": 0.3427486351768336,
      "grad_norm": 0.053657352924346924,
      "learning_rate": 0.00019987639058071267,
      "loss": 0.1417,
      "step": 361
    },
    {
      "epoch": 0.3436980773795395,
      "grad_norm": 0.04928993433713913,
      "learning_rate": 0.00019987083698314804,
      "loss": 0.1269,
      "step": 362
    },
    {
      "epoch": 0.34464751958224543,
      "grad_norm": 0.04932550713419914,
      "learning_rate": 0.0001998651614347784,
      "loss": 0.1429,
      "step": 363
    },
    {
      "epoch": 0.34559696178495136,
      "grad_norm": 0.0531768873333931,
      "learning_rate": 0.00019985936394253413,
      "loss": 0.1367,
      "step": 364
    },
    {
      "epoch": 0.34654640398765724,
      "grad_norm": 0.05342009291052818,
      "learning_rate": 0.00019985344451349443,
      "loss": 0.1365,
      "step": 365
    },
    {
      "epoch": 0.34749584619036317,
      "grad_norm": 0.04960772022604942,
      "learning_rate": 0.00019984740315488742,
      "loss": 0.133,
      "step": 366
    },
    {
      "epoch": 0.3484452883930691,
      "grad_norm": 0.04490765556693077,
      "learning_rate": 0.00019984123987409013,
      "loss": 0.1347,
      "step": 367
    },
    {
      "epoch": 0.34939473059577497,
      "grad_norm": 0.05546121671795845,
      "learning_rate": 0.0001998349546786285,
      "loss": 0.169,
      "step": 368
    },
    {
      "epoch": 0.3503441727984809,
      "grad_norm": 0.04962169751524925,
      "learning_rate": 0.0001998285475761772,
      "loss": 0.1325,
      "step": 369
    },
    {
      "epoch": 0.3512936150011868,
      "grad_norm": 0.0451858825981617,
      "learning_rate": 0.00019982201857455988,
      "loss": 0.1291,
      "step": 370
    },
    {
      "epoch": 0.3522430572038927,
      "grad_norm": 0.07738906145095825,
      "learning_rate": 0.00019981536768174903,
      "loss": 0.1841,
      "step": 371
    },
    {
      "epoch": 0.35319249940659864,
      "grad_norm": 0.05104148015379906,
      "learning_rate": 0.000199808594905866,
      "loss": 0.1375,
      "step": 372
    },
    {
      "epoch": 0.3541419416093045,
      "grad_norm": 0.04850155860185623,
      "learning_rate": 0.00019980170025518082,
      "loss": 0.1335,
      "step": 373
    },
    {
      "epoch": 0.35509138381201044,
      "grad_norm": 0.050271324813365936,
      "learning_rate": 0.00019979468373811248,
      "loss": 0.1394,
      "step": 374
    },
    {
      "epoch": 0.35604082601471637,
      "grad_norm": 0.050799645483493805,
      "learning_rate": 0.0001997875453632288,
      "loss": 0.135,
      "step": 375
    },
    {
      "epoch": 0.35699026821742225,
      "grad_norm": 0.05703526735305786,
      "learning_rate": 0.00019978028513924627,
      "loss": 0.1371,
      "step": 376
    },
    {
      "epoch": 0.3579397104201282,
      "grad_norm": 0.06665853410959244,
      "learning_rate": 0.00019977290307503028,
      "loss": 0.1837,
      "step": 377
    },
    {
      "epoch": 0.3588891526228341,
      "grad_norm": 0.04639972746372223,
      "learning_rate": 0.000199765399179595,
      "loss": 0.1315,
      "step": 378
    },
    {
      "epoch": 0.35983859482554,
      "grad_norm": 0.07625308632850647,
      "learning_rate": 0.00019975777346210326,
      "loss": 0.2064,
      "step": 379
    },
    {
      "epoch": 0.3607880370282459,
      "grad_norm": 0.048770248889923096,
      "learning_rate": 0.00019975002593186674,
      "loss": 0.1363,
      "step": 380
    },
    {
      "epoch": 0.36173747923095184,
      "grad_norm": 0.04932136833667755,
      "learning_rate": 0.00019974215659834582,
      "loss": 0.1374,
      "step": 381
    },
    {
      "epoch": 0.3626869214336577,
      "grad_norm": 0.03848756104707718,
      "learning_rate": 0.00019973416547114964,
      "loss": 0.1333,
      "step": 382
    },
    {
      "epoch": 0.36363636363636365,
      "grad_norm": 0.04468891769647598,
      "learning_rate": 0.00019972605256003605,
      "loss": 0.129,
      "step": 383
    },
    {
      "epoch": 0.3645858058390695,
      "grad_norm": 0.048413511365652084,
      "learning_rate": 0.0001997178178749116,
      "loss": 0.1314,
      "step": 384
    },
    {
      "epoch": 0.36553524804177545,
      "grad_norm": 0.045054856687784195,
      "learning_rate": 0.00019970946142583155,
      "loss": 0.1323,
      "step": 385
    },
    {
      "epoch": 0.3664846902444814,
      "grad_norm": 0.05541200935840607,
      "learning_rate": 0.00019970098322299982,
      "loss": 0.1342,
      "step": 386
    },
    {
      "epoch": 0.36743413244718726,
      "grad_norm": 0.06861472874879837,
      "learning_rate": 0.00019969238327676906,
      "loss": 0.1347,
      "step": 387
    },
    {
      "epoch": 0.3683835746498932,
      "grad_norm": 0.043996453285217285,
      "learning_rate": 0.00019968366159764047,
      "loss": 0.132,
      "step": 388
    },
    {
      "epoch": 0.3693330168525991,
      "grad_norm": 0.06562239676713943,
      "learning_rate": 0.000199674818196264,
      "loss": 0.1759,
      "step": 389
    },
    {
      "epoch": 0.370282459055305,
      "grad_norm": 0.04714899882674217,
      "learning_rate": 0.00019966585308343822,
      "loss": 0.1274,
      "step": 390
    },
    {
      "epoch": 0.3712319012580109,
      "grad_norm": 0.04736959934234619,
      "learning_rate": 0.00019965676627011026,
      "loss": 0.1265,
      "step": 391
    },
    {
      "epoch": 0.37218134346071685,
      "grad_norm": 0.056829433888196945,
      "learning_rate": 0.0001996475577673759,
      "loss": 0.1402,
      "step": 392
    },
    {
      "epoch": 0.3731307856634227,
      "grad_norm": 0.0426231250166893,
      "learning_rate": 0.00019963822758647953,
      "loss": 0.1364,
      "step": 393
    },
    {
      "epoch": 0.37408022786612866,
      "grad_norm": 0.07376877963542938,
      "learning_rate": 0.00019962877573881404,
      "loss": 0.2042,
      "step": 394
    },
    {
      "epoch": 0.3750296700688346,
      "grad_norm": 0.043273668736219406,
      "learning_rate": 0.00019961920223592104,
      "loss": 0.132,
      "step": 395
    },
    {
      "epoch": 0.37597911227154046,
      "grad_norm": 0.044406965374946594,
      "learning_rate": 0.00019960950708949052,
      "loss": 0.1344,
      "step": 396
    },
    {
      "epoch": 0.3769285544742464,
      "grad_norm": 0.040342606604099274,
      "learning_rate": 0.00019959969031136106,
      "loss": 0.1214,
      "step": 397
    },
    {
      "epoch": 0.37787799667695227,
      "grad_norm": 0.05118388682603836,
      "learning_rate": 0.00019958975191351983,
      "loss": 0.14,
      "step": 398
    },
    {
      "epoch": 0.3788274388796582,
      "grad_norm": 0.045876793563365936,
      "learning_rate": 0.00019957969190810245,
      "loss": 0.1335,
      "step": 399
    },
    {
      "epoch": 0.3797768810823641,
      "grad_norm": 0.0645332932472229,
      "learning_rate": 0.00019956951030739308,
      "loss": 0.1702,
      "step": 400
    },
    {
      "epoch": 0.38072632328507,
      "grad_norm": 0.05039132386445999,
      "learning_rate": 0.00019955920712382423,
      "loss": 0.136,
      "step": 401
    },
    {
      "epoch": 0.38167576548777593,
      "grad_norm": 0.052004653960466385,
      "learning_rate": 0.00019954878236997704,
      "loss": 0.1386,
      "step": 402
    },
    {
      "epoch": 0.38262520769048186,
      "grad_norm": 0.05021458491683006,
      "learning_rate": 0.00019953823605858105,
      "loss": 0.1378,
      "step": 403
    },
    {
      "epoch": 0.38357464989318774,
      "grad_norm": 0.058653559535741806,
      "learning_rate": 0.0001995275682025141,
      "loss": 0.1437,
      "step": 404
    },
    {
      "epoch": 0.38452409209589367,
      "grad_norm": 0.04466673359274864,
      "learning_rate": 0.00019951677881480264,
      "loss": 0.1334,
      "step": 405
    },
    {
      "epoch": 0.3854735342985996,
      "grad_norm": 0.06119415909051895,
      "learning_rate": 0.00019950586790862138,
      "loss": 0.1296,
      "step": 406
    },
    {
      "epoch": 0.38642297650130547,
      "grad_norm": 0.04749077931046486,
      "learning_rate": 0.0001994948354972935,
      "loss": 0.1341,
      "step": 407
    },
    {
      "epoch": 0.3873724187040114,
      "grad_norm": 0.037752799689769745,
      "learning_rate": 0.00019948368159429053,
      "loss": 0.134,
      "step": 408
    },
    {
      "epoch": 0.38832186090671733,
      "grad_norm": 0.08903038501739502,
      "learning_rate": 0.00019947240621323226,
      "loss": 0.2155,
      "step": 409
    },
    {
      "epoch": 0.3892713031094232,
      "grad_norm": 0.03878140076994896,
      "learning_rate": 0.00019946100936788698,
      "loss": 0.1176,
      "step": 410
    },
    {
      "epoch": 0.39022074531212914,
      "grad_norm": 0.04927309602499008,
      "learning_rate": 0.00019944949107217113,
      "loss": 0.1344,
      "step": 411
    },
    {
      "epoch": 0.391170187514835,
      "grad_norm": 0.04933890327811241,
      "learning_rate": 0.00019943785134014962,
      "loss": 0.1315,
      "step": 412
    },
    {
      "epoch": 0.39211962971754094,
      "grad_norm": 0.06702516227960587,
      "learning_rate": 0.0001994260901860355,
      "loss": 0.1826,
      "step": 413
    },
    {
      "epoch": 0.39306907192024687,
      "grad_norm": 0.048132237046957016,
      "learning_rate": 0.00019941420762419014,
      "loss": 0.1436,
      "step": 414
    },
    {
      "epoch": 0.39401851412295275,
      "grad_norm": 0.07756894826889038,
      "learning_rate": 0.00019940220366912318,
      "loss": 0.2162,
      "step": 415
    },
    {
      "epoch": 0.3949679563256587,
      "grad_norm": 0.04789011925458908,
      "learning_rate": 0.00019939007833549242,
      "loss": 0.1295,
      "step": 416
    },
    {
      "epoch": 0.3959173985283646,
      "grad_norm": 0.04369444027543068,
      "learning_rate": 0.000199377831638104,
      "loss": 0.1322,
      "step": 417
    },
    {
      "epoch": 0.3968668407310705,
      "grad_norm": 0.05376122146844864,
      "learning_rate": 0.00019936546359191216,
      "loss": 0.1743,
      "step": 418
    },
    {
      "epoch": 0.3978162829337764,
      "grad_norm": 0.045930229127407074,
      "learning_rate": 0.0001993529742120193,
      "loss": 0.1336,
      "step": 419
    },
    {
      "epoch": 0.39876572513648234,
      "grad_norm": 0.039980966597795486,
      "learning_rate": 0.00019934036351367606,
      "loss": 0.1349,
      "step": 420
    },
    {
      "epoch": 0.3997151673391882,
      "grad_norm": 0.03797341510653496,
      "learning_rate": 0.00019932763151228115,
      "loss": 0.1256,
      "step": 421
    },
    {
      "epoch": 0.40066460954189415,
      "grad_norm": 0.04779914394021034,
      "learning_rate": 0.00019931477822338146,
      "loss": 0.1411,
      "step": 422
    },
    {
      "epoch": 0.4016140517446,
      "grad_norm": 0.040458668023347855,
      "learning_rate": 0.00019930180366267193,
      "loss": 0.126,
      "step": 423
    },
    {
      "epoch": 0.40256349394730595,
      "grad_norm": 0.04114462807774544,
      "learning_rate": 0.0001992887078459956,
      "loss": 0.127,
      "step": 424
    },
    {
      "epoch": 0.4035129361500119,
      "grad_norm": 0.048119012266397476,
      "learning_rate": 0.00019927549078934358,
      "loss": 0.1346,
      "step": 425
    },
    {
      "epoch": 0.40446237835271776,
      "grad_norm": 0.0545562319457531,
      "learning_rate": 0.00019926215250885504,
      "loss": 0.1387,
      "step": 426
    },
    {
      "epoch": 0.4054118205554237,
      "grad_norm": 0.052092909812927246,
      "learning_rate": 0.00019924869302081715,
      "loss": 0.1389,
      "step": 427
    },
    {
      "epoch": 0.4063612627581296,
      "grad_norm": 0.03847799077630043,
      "learning_rate": 0.0001992351123416651,
      "loss": 0.1234,
      "step": 428
    },
    {
      "epoch": 0.4073107049608355,
      "grad_norm": 0.0436912477016449,
      "learning_rate": 0.000199221410487982,
      "loss": 0.1362,
      "step": 429
    },
    {
      "epoch": 0.4082601471635414,
      "grad_norm": 0.04420888423919678,
      "learning_rate": 0.00019920758747649908,
      "loss": 0.1243,
      "step": 430
    },
    {
      "epoch": 0.40920958936624735,
      "grad_norm": 0.037297070026397705,
      "learning_rate": 0.00019919364332409535,
      "loss": 0.1331,
      "step": 431
    },
    {
      "epoch": 0.4101590315689532,
      "grad_norm": 0.03854360058903694,
      "learning_rate": 0.00019917957804779782,
      "loss": 0.1266,
      "step": 432
    },
    {
      "epoch": 0.41110847377165916,
      "grad_norm": 0.04071418195962906,
      "learning_rate": 0.00019916539166478137,
      "loss": 0.1292,
      "step": 433
    },
    {
      "epoch": 0.4120579159743651,
      "grad_norm": 0.04560808837413788,
      "learning_rate": 0.00019915108419236882,
      "loss": 0.1381,
      "step": 434
    },
    {
      "epoch": 0.41300735817707096,
      "grad_norm": 0.06313233822584152,
      "learning_rate": 0.00019913665564803078,
      "loss": 0.2031,
      "step": 435
    },
    {
      "epoch": 0.4139568003797769,
      "grad_norm": 0.04507524147629738,
      "learning_rate": 0.00019912210604938578,
      "loss": 0.1277,
      "step": 436
    },
    {
      "epoch": 0.41490624258248276,
      "grad_norm": 0.05048058554530144,
      "learning_rate": 0.00019910743541420007,
      "loss": 0.1315,
      "step": 437
    },
    {
      "epoch": 0.4158556847851887,
      "grad_norm": 0.04872648045420647,
      "learning_rate": 0.0001990926437603878,
      "loss": 0.1292,
      "step": 438
    },
    {
      "epoch": 0.4168051269878946,
      "grad_norm": 0.04400710016489029,
      "learning_rate": 0.00019907773110601075,
      "loss": 0.1236,
      "step": 439
    },
    {
      "epoch": 0.4177545691906005,
      "grad_norm": 0.051591627299785614,
      "learning_rate": 0.00019906269746927863,
      "loss": 0.1358,
      "step": 440
    },
    {
      "epoch": 0.41870401139330643,
      "grad_norm": 0.04288725182414055,
      "learning_rate": 0.00019904754286854877,
      "loss": 0.126,
      "step": 441
    },
    {
      "epoch": 0.41965345359601236,
      "grad_norm": 0.04984726384282112,
      "learning_rate": 0.00019903226732232622,
      "loss": 0.1326,
      "step": 442
    },
    {
      "epoch": 0.42060289579871823,
      "grad_norm": 0.041585132479667664,
      "learning_rate": 0.00019901687084926373,
      "loss": 0.136,
      "step": 443
    },
    {
      "epoch": 0.42155233800142417,
      "grad_norm": 0.05849035084247589,
      "learning_rate": 0.0001990013534681617,
      "loss": 0.1727,
      "step": 444
    },
    {
      "epoch": 0.4225017802041301,
      "grad_norm": 0.043387994170188904,
      "learning_rate": 0.00019898571519796817,
      "loss": 0.1393,
      "step": 445
    },
    {
      "epoch": 0.42345122240683597,
      "grad_norm": 0.05867496132850647,
      "learning_rate": 0.0001989699560577788,
      "loss": 0.1664,
      "step": 446
    },
    {
      "epoch": 0.4244006646095419,
      "grad_norm": 0.07019232958555222,
      "learning_rate": 0.00019895407606683685,
      "loss": 0.1653,
      "step": 447
    },
    {
      "epoch": 0.42535010681224783,
      "grad_norm": 0.04676515609025955,
      "learning_rate": 0.00019893807524453314,
      "loss": 0.1368,
      "step": 448
    },
    {
      "epoch": 0.4262995490149537,
      "grad_norm": 0.06640240550041199,
      "learning_rate": 0.00019892195361040607,
      "loss": 0.2089,
      "step": 449
    },
    {
      "epoch": 0.42724899121765963,
      "grad_norm": 0.044658735394477844,
      "learning_rate": 0.00019890571118414148,
      "loss": 0.1298,
      "step": 450
    },
    {
      "epoch": 0.4281984334203655,
      "grad_norm": 0.04810122773051262,
      "learning_rate": 0.00019888934798557278,
      "loss": 0.1288,
      "step": 451
    },
    {
      "epoch": 0.42914787562307144,
      "grad_norm": 0.0425436794757843,
      "learning_rate": 0.0001988728640346808,
      "loss": 0.1354,
      "step": 452
    },
    {
      "epoch": 0.43009731782577737,
      "grad_norm": 0.04513363912701607,
      "learning_rate": 0.0001988562593515939,
      "loss": 0.1346,
      "step": 453
    },
    {
      "epoch": 0.43104676002848324,
      "grad_norm": 0.052022870630025864,
      "learning_rate": 0.0001988395339565878,
      "loss": 0.1302,
      "step": 454
    },
    {
      "epoch": 0.4319962022311892,
      "grad_norm": 0.04852641373872757,
      "learning_rate": 0.0001988226878700856,
      "loss": 0.1388,
      "step": 455
    },
    {
      "epoch": 0.4329456444338951,
      "grad_norm": 0.04990584775805473,
      "learning_rate": 0.00019880572111265785,
      "loss": 0.1552,
      "step": 456
    },
    {
      "epoch": 0.433895086636601,
      "grad_norm": 0.052271679043769836,
      "learning_rate": 0.00019878863370502238,
      "loss": 0.1404,
      "step": 457
    },
    {
      "epoch": 0.4348445288393069,
      "grad_norm": 0.04795520752668381,
      "learning_rate": 0.00019877142566804436,
      "loss": 0.1341,
      "step": 458
    },
    {
      "epoch": 0.43579397104201284,
      "grad_norm": 0.048165664076805115,
      "learning_rate": 0.00019875409702273632,
      "loss": 0.1343,
      "step": 459
    },
    {
      "epoch": 0.4367434132447187,
      "grad_norm": 0.04213611036539078,
      "learning_rate": 0.000198736647790258,
      "loss": 0.1369,
      "step": 460
    },
    {
      "epoch": 0.43769285544742464,
      "grad_norm": 0.05819966271519661,
      "learning_rate": 0.00019871907799191632,
      "loss": 0.1615,
      "step": 461
    },
    {
      "epoch": 0.4386422976501306,
      "grad_norm": 0.057378821074962616,
      "learning_rate": 0.00019870138764916558,
      "loss": 0.175,
      "step": 462
    },
    {
      "epoch": 0.43959173985283645,
      "grad_norm": 0.0432853177189827,
      "learning_rate": 0.00019868357678360724,
      "loss": 0.1371,
      "step": 463
    },
    {
      "epoch": 0.4405411820555424,
      "grad_norm": 0.03890872746706009,
      "learning_rate": 0.0001986656454169898,
      "loss": 0.1332,
      "step": 464
    },
    {
      "epoch": 0.44149062425824825,
      "grad_norm": 0.04006613418459892,
      "learning_rate": 0.00019864759357120896,
      "loss": 0.1342,
      "step": 465
    },
    {
      "epoch": 0.4424400664609542,
      "grad_norm": 0.049053166061639786,
      "learning_rate": 0.00019862942126830767,
      "loss": 0.1756,
      "step": 466
    },
    {
      "epoch": 0.4433895086636601,
      "grad_norm": 0.03966079652309418,
      "learning_rate": 0.00019861112853047577,
      "loss": 0.1303,
      "step": 467
    },
    {
      "epoch": 0.444338950866366,
      "grad_norm": 0.04506433755159378,
      "learning_rate": 0.0001985927153800503,
      "loss": 0.136,
      "step": 468
    },
    {
      "epoch": 0.4452883930690719,
      "grad_norm": 0.04392915591597557,
      "learning_rate": 0.00019857418183951526,
      "loss": 0.1397,
      "step": 469
    },
    {
      "epoch": 0.44623783527177785,
      "grad_norm": 0.038007620722055435,
      "learning_rate": 0.0001985555279315017,
      "loss": 0.1246,
      "step": 470
    },
    {
      "epoch": 0.4471872774744837,
      "grad_norm": 0.048948097974061966,
      "learning_rate": 0.00019853675367878764,
      "loss": 0.1329,
      "step": 471
    },
    {
      "epoch": 0.44813671967718965,
      "grad_norm": 0.04174380376935005,
      "learning_rate": 0.00019851785910429806,
      "loss": 0.13,
      "step": 472
    },
    {
      "epoch": 0.4490861618798956,
      "grad_norm": 0.048575468361377716,
      "learning_rate": 0.00019849884423110478,
      "loss": 0.1385,
      "step": 473
    },
    {
      "epoch": 0.45003560408260146,
      "grad_norm": 0.05167670175433159,
      "learning_rate": 0.00019847970908242664,
      "loss": 0.1684,
      "step": 474
    },
    {
      "epoch": 0.4509850462853074,
      "grad_norm": 0.06849198788404465,
      "learning_rate": 0.00019846045368162923,
      "loss": 0.1795,
      "step": 475
    },
    {
      "epoch": 0.4519344884880133,
      "grad_norm": 0.044273603707551956,
      "learning_rate": 0.0001984410780522251,
      "loss": 0.1246,
      "step": 476
    },
    {
      "epoch": 0.4528839306907192,
      "grad_norm": 0.048194363713264465,
      "learning_rate": 0.00019842158221787353,
      "loss": 0.1366,
      "step": 477
    },
    {
      "epoch": 0.4538333728934251,
      "grad_norm": 0.033906418830156326,
      "learning_rate": 0.00019840196620238057,
      "loss": 0.1235,
      "step": 478
    },
    {
      "epoch": 0.454782815096131,
      "grad_norm": 0.043933141976594925,
      "learning_rate": 0.00019838223002969905,
      "loss": 0.1195,
      "step": 479
    },
    {
      "epoch": 0.45573225729883693,
      "grad_norm": 0.056823644787073135,
      "learning_rate": 0.00019836237372392854,
      "loss": 0.1757,
      "step": 480
    },
    {
      "epoch": 0.45668169950154286,
      "grad_norm": 0.07587820291519165,
      "learning_rate": 0.00019834239730931526,
      "loss": 0.1784,
      "step": 481
    },
    {
      "epoch": 0.45763114170424873,
      "grad_norm": 0.04008018970489502,
      "learning_rate": 0.0001983223008102521,
      "loss": 0.1306,
      "step": 482
    },
    {
      "epoch": 0.45858058390695466,
      "grad_norm": 0.05180038511753082,
      "learning_rate": 0.00019830208425127867,
      "loss": 0.1485,
      "step": 483
    },
    {
      "epoch": 0.4595300261096606,
      "grad_norm": 0.0691617876291275,
      "learning_rate": 0.00019828174765708104,
      "loss": 0.1249,
      "step": 484
    },
    {
      "epoch": 0.46047946831236647,
      "grad_norm": 0.0565367266535759,
      "learning_rate": 0.00019826129105249195,
      "loss": 0.1744,
      "step": 485
    },
    {
      "epoch": 0.4614289105150724,
      "grad_norm": 0.044927019625902176,
      "learning_rate": 0.00019824071446249072,
      "loss": 0.1341,
      "step": 486
    },
    {
      "epoch": 0.46237835271777833,
      "grad_norm": 0.04481721669435501,
      "learning_rate": 0.00019822001791220298,
      "loss": 0.1354,
      "step": 487
    },
    {
      "epoch": 0.4633277949204842,
      "grad_norm": 0.05233500525355339,
      "learning_rate": 0.0001981992014269011,
      "loss": 0.1501,
      "step": 488
    },
    {
      "epoch": 0.46427723712319013,
      "grad_norm": 0.044350553303956985,
      "learning_rate": 0.00019817826503200372,
      "loss": 0.1335,
      "step": 489
    },
    {
      "epoch": 0.465226679325896,
      "grad_norm": 0.03551819548010826,
      "learning_rate": 0.000198157208753076,
      "loss": 0.1322,
      "step": 490
    },
    {
      "epoch": 0.46617612152860194,
      "grad_norm": 0.04409592226147652,
      "learning_rate": 0.00019813603261582943,
      "loss": 0.1561,
      "step": 491
    },
    {
      "epoch": 0.46712556373130787,
      "grad_norm": 0.04842127487063408,
      "learning_rate": 0.0001981147366461219,
      "loss": 0.1296,
      "step": 492
    },
    {
      "epoch": 0.46807500593401374,
      "grad_norm": 0.04349881038069725,
      "learning_rate": 0.00019809332086995757,
      "loss": 0.1319,
      "step": 493
    },
    {
      "epoch": 0.4690244481367197,
      "grad_norm": 0.04413028433918953,
      "learning_rate": 0.00019807178531348698,
      "loss": 0.1321,
      "step": 494
    },
    {
      "epoch": 0.4699738903394256,
      "grad_norm": 0.03972313553094864,
      "learning_rate": 0.00019805013000300683,
      "loss": 0.1358,
      "step": 495
    },
    {
      "epoch": 0.4709233325421315,
      "grad_norm": 0.052269116044044495,
      "learning_rate": 0.00019802835496496012,
      "loss": 0.1389,
      "step": 496
    },
    {
      "epoch": 0.4718727747448374,
      "grad_norm": 0.0379653237760067,
      "learning_rate": 0.00019800646022593603,
      "loss": 0.1283,
      "step": 497
    },
    {
      "epoch": 0.47282221694754334,
      "grad_norm": 0.04370688647031784,
      "learning_rate": 0.0001979844458126699,
      "loss": 0.1278,
      "step": 498
    },
    {
      "epoch": 0.4737716591502492,
      "grad_norm": 0.03912369906902313,
      "learning_rate": 0.0001979623117520432,
      "loss": 0.1257,
      "step": 499
    },
    {
      "epoch": 0.47472110135295514,
      "grad_norm": 0.039594005793333054,
      "learning_rate": 0.00019794005807108352,
      "loss": 0.1375,
      "step": 500
    },
    {
      "epoch": 0.4756705435556611,
      "grad_norm": 0.03889892250299454,
      "learning_rate": 0.00019791768479696448,
      "loss": 0.13,
      "step": 501
    },
    {
      "epoch": 0.47661998575836695,
      "grad_norm": 0.03966660797595978,
      "learning_rate": 0.00019789519195700578,
      "loss": 0.1268,
      "step": 502
    },
    {
      "epoch": 0.4775694279610729,
      "grad_norm": 0.04501716047525406,
      "learning_rate": 0.00019787257957867306,
      "loss": 0.1423,
      "step": 503
    },
    {
      "epoch": 0.47851887016377875,
      "grad_norm": 0.06255436688661575,
      "learning_rate": 0.000197849847689578,
      "loss": 0.1799,
      "step": 504
    },
    {
      "epoch": 0.4794683123664847,
      "grad_norm": 0.050308458507061005,
      "learning_rate": 0.00019782699631747813,
      "loss": 0.1733,
      "step": 505
    },
    {
      "epoch": 0.4804177545691906,
      "grad_norm": 0.0357963964343071,
      "learning_rate": 0.00019780402549027698,
      "loss": 0.1268,
      "step": 506
    },
    {
      "epoch": 0.4813671967718965,
      "grad_norm": 0.03651968017220497,
      "learning_rate": 0.00019778093523602384,
      "loss": 0.1267,
      "step": 507
    },
    {
      "epoch": 0.4823166389746024,
      "grad_norm": 0.043042074888944626,
      "learning_rate": 0.0001977577255829139,
      "loss": 0.1256,
      "step": 508
    },
    {
      "epoch": 0.48326608117730835,
      "grad_norm": 0.07031014561653137,
      "learning_rate": 0.00019773439655928815,
      "loss": 0.1796,
      "step": 509
    },
    {
      "epoch": 0.4842155233800142,
      "grad_norm": 0.04429268836975098,
      "learning_rate": 0.00019771094819363326,
      "loss": 0.1298,
      "step": 510
    },
    {
      "epoch": 0.48516496558272015,
      "grad_norm": 0.0373898483812809,
      "learning_rate": 0.00019768738051458172,
      "loss": 0.1232,
      "step": 511
    },
    {
      "epoch": 0.4861144077854261,
      "grad_norm": 0.05853155627846718,
      "learning_rate": 0.00019766369355091166,
      "loss": 0.1694,
      "step": 512
    },
    {
      "epoch": 0.48706384998813196,
      "grad_norm": 0.05050895735621452,
      "learning_rate": 0.00019763988733154686,
      "loss": 0.1665,
      "step": 513
    },
    {
      "epoch": 0.4880132921908379,
      "grad_norm": 0.04074448347091675,
      "learning_rate": 0.0001976159618855568,
      "loss": 0.1336,
      "step": 514
    },
    {
      "epoch": 0.4889627343935438,
      "grad_norm": 0.03826110064983368,
      "learning_rate": 0.00019759191724215644,
      "loss": 0.132,
      "step": 515
    },
    {
      "epoch": 0.4899121765962497,
      "grad_norm": 0.04392875358462334,
      "learning_rate": 0.0001975677534307064,
      "loss": 0.1204,
      "step": 516
    },
    {
      "epoch": 0.4908616187989556,
      "grad_norm": 0.04615531116724014,
      "learning_rate": 0.0001975434704807127,
      "loss": 0.1358,
      "step": 517
    },
    {
      "epoch": 0.4918110610016615,
      "grad_norm": 0.053060565143823624,
      "learning_rate": 0.00019751906842182688,
      "loss": 0.1299,
      "step": 518
    },
    {
      "epoch": 0.49276050320436743,
      "grad_norm": 0.04905511438846588,
      "learning_rate": 0.00019749454728384594,
      "loss": 0.1284,
      "step": 519
    },
    {
      "epoch": 0.49370994540707336,
      "grad_norm": 0.04257996007800102,
      "learning_rate": 0.00019746990709671234,
      "loss": 0.1353,
      "step": 520
    },
    {
      "epoch": 0.49465938760977923,
      "grad_norm": 0.05581909418106079,
      "learning_rate": 0.0001974451478905138,
      "loss": 0.1594,
      "step": 521
    },
    {
      "epoch": 0.49560882981248516,
      "grad_norm": 0.04603990167379379,
      "learning_rate": 0.00019742026969548338,
      "loss": 0.1383,
      "step": 522
    },
    {
      "epoch": 0.4965582720151911,
      "grad_norm": 0.058511972427368164,
      "learning_rate": 0.00019739527254199958,
      "loss": 0.1725,
      "step": 523
    },
    {
      "epoch": 0.49750771421789697,
      "grad_norm": 0.03875808045268059,
      "learning_rate": 0.000197370156460586,
      "loss": 0.1405,
      "step": 524
    },
    {
      "epoch": 0.4984571564206029,
      "grad_norm": 0.040860000997781754,
      "learning_rate": 0.00019734492148191151,
      "loss": 0.139,
      "step": 525
    },
    {
      "epoch": 0.49940659862330883,
      "grad_norm": 0.06110459193587303,
      "learning_rate": 0.00019731956763679014,
      "loss": 0.223,
      "step": 526
    },
    {
      "epoch": 0.5003560408260147,
      "grad_norm": 0.05238598585128784,
      "learning_rate": 0.00019729409495618117,
      "loss": 0.1681,
      "step": 527
    },
    {
      "epoch": 0.5013054830287206,
      "grad_norm": 0.05180145800113678,
      "learning_rate": 0.00019726850347118885,
      "loss": 0.1743,
      "step": 528
    },
    {
      "epoch": 0.5022549252314266,
      "grad_norm": 0.05066410079598427,
      "learning_rate": 0.00019724279321306262,
      "loss": 0.1634,
      "step": 529
    },
    {
      "epoch": 0.5032043674341324,
      "grad_norm": 0.06856084614992142,
      "learning_rate": 0.00019721696421319684,
      "loss": 0.1685,
      "step": 530
    },
    {
      "epoch": 0.5041538096368383,
      "grad_norm": 0.045972324907779694,
      "learning_rate": 0.00019719101650313096,
      "loss": 0.1245,
      "step": 531
    },
    {
      "epoch": 0.5051032518395443,
      "grad_norm": 0.04522623121738434,
      "learning_rate": 0.00019716495011454934,
      "loss": 0.1367,
      "step": 532
    },
    {
      "epoch": 0.5060526940422502,
      "grad_norm": 0.0780516117811203,
      "learning_rate": 0.00019713876507928126,
      "loss": 0.1351,
      "step": 533
    },
    {
      "epoch": 0.507002136244956,
      "grad_norm": 0.04264210909605026,
      "learning_rate": 0.00019711246142930088,
      "loss": 0.1312,
      "step": 534
    },
    {
      "epoch": 0.507951578447662,
      "grad_norm": 0.059501100331544876,
      "learning_rate": 0.00019708603919672718,
      "loss": 0.1698,
      "step": 535
    },
    {
      "epoch": 0.5089010206503679,
      "grad_norm": 0.060105033218860626,
      "learning_rate": 0.00019705949841382396,
      "loss": 0.1303,
      "step": 536
    },
    {
      "epoch": 0.5098504628530738,
      "grad_norm": 0.04733967408537865,
      "learning_rate": 0.00019703283911299982,
      "loss": 0.1245,
      "step": 537
    },
    {
      "epoch": 0.5107999050557798,
      "grad_norm": 0.04254663735628128,
      "learning_rate": 0.00019700606132680798,
      "loss": 0.1343,
      "step": 538
    },
    {
      "epoch": 0.5117493472584856,
      "grad_norm": 0.06302463263273239,
      "learning_rate": 0.00019697916508794645,
      "loss": 0.1831,
      "step": 539
    },
    {
      "epoch": 0.5126987894611915,
      "grad_norm": 0.05301344394683838,
      "learning_rate": 0.0001969521504292578,
      "loss": 0.1316,
      "step": 540
    },
    {
      "epoch": 0.5136482316638975,
      "grad_norm": 0.04151083528995514,
      "learning_rate": 0.00019692501738372922,
      "loss": 0.1335,
      "step": 541
    },
    {
      "epoch": 0.5145976738666034,
      "grad_norm": 0.05647062510251999,
      "learning_rate": 0.00019689776598449257,
      "loss": 0.1688,
      "step": 542
    },
    {
      "epoch": 0.5155471160693093,
      "grad_norm": 0.037060294300317764,
      "learning_rate": 0.000196870396264824,
      "loss": 0.1339,
      "step": 543
    },
    {
      "epoch": 0.5164965582720152,
      "grad_norm": 0.04036247730255127,
      "learning_rate": 0.0001968429082581443,
      "loss": 0.1361,
      "step": 544
    },
    {
      "epoch": 0.5174460004747211,
      "grad_norm": 0.040889665484428406,
      "learning_rate": 0.00019681530199801875,
      "loss": 0.1356,
      "step": 545
    },
    {
      "epoch": 0.518395442677427,
      "grad_norm": 0.0538480207324028,
      "learning_rate": 0.00019678757751815686,
      "loss": 0.1689,
      "step": 546
    },
    {
      "epoch": 0.519344884880133,
      "grad_norm": 0.04074794426560402,
      "learning_rate": 0.0001967597348524126,
      "loss": 0.1329,
      "step": 547
    },
    {
      "epoch": 0.5202943270828388,
      "grad_norm": 0.03896891698241234,
      "learning_rate": 0.00019673177403478428,
      "loss": 0.1356,
      "step": 548
    },
    {
      "epoch": 0.5212437692855447,
      "grad_norm": 0.04619259387254715,
      "learning_rate": 0.00019670369509941442,
      "loss": 0.163,
      "step": 549
    },
    {
      "epoch": 0.5221932114882507,
      "grad_norm": 0.035968657582998276,
      "learning_rate": 0.00019667549808058976,
      "loss": 0.1242,
      "step": 550
    },
    {
      "epoch": 0.5231426536909566,
      "grad_norm": 0.04564007744193077,
      "learning_rate": 0.0001966471830127413,
      "loss": 0.1364,
      "step": 551
    },
    {
      "epoch": 0.5240920958936625,
      "grad_norm": 0.03991610184311867,
      "learning_rate": 0.00019661874993044415,
      "loss": 0.1312,
      "step": 552
    },
    {
      "epoch": 0.5250415380963683,
      "grad_norm": 0.037240512669086456,
      "learning_rate": 0.00019659019886841752,
      "loss": 0.1279,
      "step": 553
    },
    {
      "epoch": 0.5259909802990743,
      "grad_norm": 0.06598762422800064,
      "learning_rate": 0.00019656152986152468,
      "loss": 0.2165,
      "step": 554
    },
    {
      "epoch": 0.5269404225017802,
      "grad_norm": 0.03867746889591217,
      "learning_rate": 0.00019653274294477292,
      "loss": 0.1233,
      "step": 555
    },
    {
      "epoch": 0.5278898647044861,
      "grad_norm": 0.051915477961301804,
      "learning_rate": 0.00019650383815331357,
      "loss": 0.168,
      "step": 556
    },
    {
      "epoch": 0.528839306907192,
      "grad_norm": 0.054896485060453415,
      "learning_rate": 0.00019647481552244182,
      "loss": 0.1678,
      "step": 557
    },
    {
      "epoch": 0.5297887491098979,
      "grad_norm": 0.05439051240682602,
      "learning_rate": 0.00019644567508759675,
      "loss": 0.1607,
      "step": 558
    },
    {
      "epoch": 0.5307381913126038,
      "grad_norm": 0.03601578250527382,
      "learning_rate": 0.00019641641688436135,
      "loss": 0.1271,
      "step": 559
    },
    {
      "epoch": 0.5316876335153098,
      "grad_norm": 0.06025104597210884,
      "learning_rate": 0.00019638704094846236,
      "loss": 0.176,
      "step": 560
    },
    {
      "epoch": 0.5326370757180157,
      "grad_norm": 0.04126368835568428,
      "learning_rate": 0.00019635754731577032,
      "loss": 0.1319,
      "step": 561
    },
    {
      "epoch": 0.5335865179207215,
      "grad_norm": 0.05305393040180206,
      "learning_rate": 0.00019632793602229943,
      "loss": 0.1699,
      "step": 562
    },
    {
      "epoch": 0.5345359601234275,
      "grad_norm": 0.03538331016898155,
      "learning_rate": 0.00019629820710420764,
      "loss": 0.124,
      "step": 563
    },
    {
      "epoch": 0.5354854023261334,
      "grad_norm": 0.05861300975084305,
      "learning_rate": 0.0001962683605977965,
      "loss": 0.1688,
      "step": 564
    },
    {
      "epoch": 0.5364348445288393,
      "grad_norm": 0.040226079523563385,
      "learning_rate": 0.0001962383965395111,
      "loss": 0.1334,
      "step": 565
    },
    {
      "epoch": 0.5373842867315453,
      "grad_norm": 0.035788875073194504,
      "learning_rate": 0.00019620831496594017,
      "loss": 0.1281,
      "step": 566
    },
    {
      "epoch": 0.5383337289342511,
      "grad_norm": 0.0334162712097168,
      "learning_rate": 0.0001961781159138158,
      "loss": 0.1317,
      "step": 567
    },
    {
      "epoch": 0.539283171136957,
      "grad_norm": 0.03352081775665283,
      "learning_rate": 0.00019614779942001364,
      "loss": 0.1334,
      "step": 568
    },
    {
      "epoch": 0.540232613339663,
      "grad_norm": 0.03684060648083687,
      "learning_rate": 0.00019611736552155274,
      "loss": 0.1349,
      "step": 569
    },
    {
      "epoch": 0.5411820555423689,
      "grad_norm": 0.03640671446919441,
      "learning_rate": 0.00019608681425559542,
      "loss": 0.1278,
      "step": 570
    },
    {
      "epoch": 0.5421314977450747,
      "grad_norm": 0.04167250171303749,
      "learning_rate": 0.00019605614565944748,
      "loss": 0.1384,
      "step": 571
    },
    {
      "epoch": 0.5430809399477807,
      "grad_norm": 0.0416824147105217,
      "learning_rate": 0.00019602535977055778,
      "loss": 0.1319,
      "step": 572
    },
    {
      "epoch": 0.5440303821504866,
      "grad_norm": 0.03897137567400932,
      "learning_rate": 0.00019599445662651861,
      "loss": 0.1389,
      "step": 573
    },
    {
      "epoch": 0.5449798243531925,
      "grad_norm": 0.03894896060228348,
      "learning_rate": 0.00019596343626506526,
      "loss": 0.1341,
      "step": 574
    },
    {
      "epoch": 0.5459292665558985,
      "grad_norm": 0.04211690276861191,
      "learning_rate": 0.00019593229872407627,
      "loss": 0.1377,
      "step": 575
    },
    {
      "epoch": 0.5468787087586043,
      "grad_norm": 0.04308454692363739,
      "learning_rate": 0.00019590104404157327,
      "loss": 0.1268,
      "step": 576
    },
    {
      "epoch": 0.5478281509613102,
      "grad_norm": 0.0525001622736454,
      "learning_rate": 0.00019586967225572086,
      "loss": 0.1775,
      "step": 577
    },
    {
      "epoch": 0.5487775931640161,
      "grad_norm": 0.056315965950489044,
      "learning_rate": 0.00019583818340482664,
      "loss": 0.1688,
      "step": 578
    },
    {
      "epoch": 0.5497270353667221,
      "grad_norm": 0.03801283985376358,
      "learning_rate": 0.0001958065775273412,
      "loss": 0.1309,
      "step": 579
    },
    {
      "epoch": 0.550676477569428,
      "grad_norm": 0.03738854080438614,
      "learning_rate": 0.00019577485466185804,
      "loss": 0.137,
      "step": 580
    },
    {
      "epoch": 0.5516259197721338,
      "grad_norm": 0.03772661089897156,
      "learning_rate": 0.0001957430148471134,
      "loss": 0.1276,
      "step": 581
    },
    {
      "epoch": 0.5525753619748398,
      "grad_norm": 0.039842378348112106,
      "learning_rate": 0.00019571105812198652,
      "loss": 0.1329,
      "step": 582
    },
    {
      "epoch": 0.5535248041775457,
      "grad_norm": 0.033689334988594055,
      "learning_rate": 0.0001956789845254992,
      "loss": 0.1265,
      "step": 583
    },
    {
      "epoch": 0.5544742463802516,
      "grad_norm": 0.046588387340307236,
      "learning_rate": 0.00019564679409681608,
      "loss": 0.1645,
      "step": 584
    },
    {
      "epoch": 0.5554236885829575,
      "grad_norm": 0.03861064463853836,
      "learning_rate": 0.0001956144868752444,
      "loss": 0.1267,
      "step": 585
    },
    {
      "epoch": 0.5563731307856634,
      "grad_norm": 0.03467525169253349,
      "learning_rate": 0.000195582062900234,
      "loss": 0.1299,
      "step": 586
    },
    {
      "epoch": 0.5573225729883693,
      "grad_norm": 0.03659389913082123,
      "learning_rate": 0.0001955495222113774,
      "loss": 0.1286,
      "step": 587
    },
    {
      "epoch": 0.5582720151910753,
      "grad_norm": 0.03826770931482315,
      "learning_rate": 0.0001955168648484095,
      "loss": 0.1313,
      "step": 588
    },
    {
      "epoch": 0.5592214573937812,
      "grad_norm": 0.038110729306936264,
      "learning_rate": 0.00019548409085120772,
      "loss": 0.137,
      "step": 589
    },
    {
      "epoch": 0.560170899596487,
      "grad_norm": 0.03989555314183235,
      "learning_rate": 0.0001954512002597919,
      "loss": 0.132,
      "step": 590
    },
    {
      "epoch": 0.561120341799193,
      "grad_norm": 0.05395180359482765,
      "learning_rate": 0.00019541819311432427,
      "loss": 0.1401,
      "step": 591
    },
    {
      "epoch": 0.5620697840018989,
      "grad_norm": 0.05007918179035187,
      "learning_rate": 0.00019538506945510938,
      "loss": 0.1584,
      "step": 592
    },
    {
      "epoch": 0.5630192262046048,
      "grad_norm": 0.047849785536527634,
      "learning_rate": 0.00019535182932259404,
      "loss": 0.1265,
      "step": 593
    },
    {
      "epoch": 0.5639686684073107,
      "grad_norm": 0.04303041473031044,
      "learning_rate": 0.00019531847275736726,
      "loss": 0.1245,
      "step": 594
    },
    {
      "epoch": 0.5649181106100166,
      "grad_norm": 0.04128289222717285,
      "learning_rate": 0.00019528499980016025,
      "loss": 0.1317,
      "step": 595
    },
    {
      "epoch": 0.5658675528127225,
      "grad_norm": 0.04311414808034897,
      "learning_rate": 0.00019525141049184637,
      "loss": 0.1364,
      "step": 596
    },
    {
      "epoch": 0.5668169950154285,
      "grad_norm": 0.03765838220715523,
      "learning_rate": 0.00019521770487344103,
      "loss": 0.1268,
      "step": 597
    },
    {
      "epoch": 0.5677664372181344,
      "grad_norm": 0.03674585744738579,
      "learning_rate": 0.00019518388298610164,
      "loss": 0.1297,
      "step": 598
    },
    {
      "epoch": 0.5687158794208402,
      "grad_norm": 0.036937762051820755,
      "learning_rate": 0.0001951499448711276,
      "loss": 0.1303,
      "step": 599
    },
    {
      "epoch": 0.5696653216235462,
      "grad_norm": 0.03748161345720291,
      "learning_rate": 0.0001951158905699603,
      "loss": 0.1328,
      "step": 600
    },
    {
      "epoch": 0.5706147638262521,
      "grad_norm": 0.04011257737874985,
      "learning_rate": 0.00019508172012418283,
      "loss": 0.1346,
      "step": 601
    },
    {
      "epoch": 0.571564206028958,
      "grad_norm": 0.03853931650519371,
      "learning_rate": 0.00019504743357552035,
      "loss": 0.1279,
      "step": 602
    },
    {
      "epoch": 0.572513648231664,
      "grad_norm": 0.03750459849834442,
      "learning_rate": 0.0001950130309658396,
      "loss": 0.1227,
      "step": 603
    },
    {
      "epoch": 0.5734630904343698,
      "grad_norm": 0.05542079731822014,
      "learning_rate": 0.00019497851233714908,
      "loss": 0.1647,
      "step": 604
    },
    {
      "epoch": 0.5744125326370757,
      "grad_norm": 0.04472218081355095,
      "learning_rate": 0.00019494387773159898,
      "loss": 0.1416,
      "step": 605
    },
    {
      "epoch": 0.5753619748397816,
      "grad_norm": 0.052323974668979645,
      "learning_rate": 0.00019490912719148114,
      "loss": 0.1367,
      "step": 606
    },
    {
      "epoch": 0.5763114170424876,
      "grad_norm": 0.037580832839012146,
      "learning_rate": 0.00019487426075922893,
      "loss": 0.131,
      "step": 607
    },
    {
      "epoch": 0.5772608592451934,
      "grad_norm": 0.03929577395319939,
      "learning_rate": 0.0001948392784774172,
      "loss": 0.128,
      "step": 608
    },
    {
      "epoch": 0.5782103014478993,
      "grad_norm": 0.03706606104969978,
      "learning_rate": 0.0001948041803887623,
      "loss": 0.1316,
      "step": 609
    },
    {
      "epoch": 0.5791597436506053,
      "grad_norm": 0.038938358426094055,
      "learning_rate": 0.00019476896653612203,
      "loss": 0.1275,
      "step": 610
    },
    {
      "epoch": 0.5801091858533112,
      "grad_norm": 0.04818068817257881,
      "learning_rate": 0.00019473363696249546,
      "loss": 0.1662,
      "step": 611
    },
    {
      "epoch": 0.581058628056017,
      "grad_norm": 0.03735940158367157,
      "learning_rate": 0.00019469819171102304,
      "loss": 0.1361,
      "step": 612
    },
    {
      "epoch": 0.582008070258723,
      "grad_norm": 0.03568827733397484,
      "learning_rate": 0.00019466263082498645,
      "loss": 0.1216,
      "step": 613
    },
    {
      "epoch": 0.5829575124614289,
      "grad_norm": 0.03913251310586929,
      "learning_rate": 0.0001946269543478085,
      "loss": 0.1321,
      "step": 614
    },
    {
      "epoch": 0.5839069546641348,
      "grad_norm": 0.062009479850530624,
      "learning_rate": 0.0001945911623230533,
      "loss": 0.1778,
      "step": 615
    },
    {
      "epoch": 0.5848563968668408,
      "grad_norm": 0.039088111370801926,
      "learning_rate": 0.0001945552547944259,
      "loss": 0.1352,
      "step": 616
    },
    {
      "epoch": 0.5858058390695466,
      "grad_norm": 0.041976600885391235,
      "learning_rate": 0.0001945192318057725,
      "loss": 0.1394,
      "step": 617
    },
    {
      "epoch": 0.5867552812722525,
      "grad_norm": 0.03723563253879547,
      "learning_rate": 0.00019448309340108018,
      "loss": 0.1246,
      "step": 618
    },
    {
      "epoch": 0.5877047234749585,
      "grad_norm": 0.0382399819791317,
      "learning_rate": 0.00019444683962447707,
      "loss": 0.1232,
      "step": 619
    },
    {
      "epoch": 0.5886541656776644,
      "grad_norm": 0.03758077695965767,
      "learning_rate": 0.0001944104705202321,
      "loss": 0.1417,
      "step": 620
    },
    {
      "epoch": 0.5896036078803703,
      "grad_norm": 0.034823786467313766,
      "learning_rate": 0.000194373986132755,
      "loss": 0.1304,
      "step": 621
    },
    {
      "epoch": 0.5905530500830762,
      "grad_norm": 0.03755120187997818,
      "learning_rate": 0.00019433738650659641,
      "loss": 0.133,
      "step": 622
    },
    {
      "epoch": 0.5915024922857821,
      "grad_norm": 0.03759913146495819,
      "learning_rate": 0.00019430067168644754,
      "loss": 0.1222,
      "step": 623
    },
    {
      "epoch": 0.592451934488488,
      "grad_norm": 0.06232694163918495,
      "learning_rate": 0.0001942638417171403,
      "loss": 0.1778,
      "step": 624
    },
    {
      "epoch": 0.593401376691194,
      "grad_norm": 0.05642306059598923,
      "learning_rate": 0.00019422689664364725,
      "loss": 0.1706,
      "step": 625
    },
    {
      "epoch": 0.5943508188938998,
      "grad_norm": 0.0827709287405014,
      "learning_rate": 0.00019418983651108148,
      "loss": 0.2371,
      "step": 626
    },
    {
      "epoch": 0.5953002610966057,
      "grad_norm": 0.03614366054534912,
      "learning_rate": 0.00019415266136469652,
      "loss": 0.1225,
      "step": 627
    },
    {
      "epoch": 0.5962497032993117,
      "grad_norm": 0.042416494339704514,
      "learning_rate": 0.00019411537124988643,
      "loss": 0.1239,
      "step": 628
    },
    {
      "epoch": 0.5971991455020176,
      "grad_norm": 0.037246908992528915,
      "learning_rate": 0.00019407796621218566,
      "loss": 0.1292,
      "step": 629
    },
    {
      "epoch": 0.5981485877047235,
      "grad_norm": 0.05374092981219292,
      "learning_rate": 0.00019404044629726887,
      "loss": 0.1782,
      "step": 630
    },
    {
      "epoch": 0.5990980299074293,
      "grad_norm": 0.052854426205158234,
      "learning_rate": 0.00019400281155095112,
      "loss": 0.1711,
      "step": 631
    },
    {
      "epoch": 0.6000474721101353,
      "grad_norm": 0.038800131529569626,
      "learning_rate": 0.00019396506201918765,
      "loss": 0.1285,
      "step": 632
    },
    {
      "epoch": 0.6009969143128412,
      "grad_norm": 0.040118250995874405,
      "learning_rate": 0.0001939271977480738,
      "loss": 0.1335,
      "step": 633
    },
    {
      "epoch": 0.6019463565155471,
      "grad_norm": 0.07007341086864471,
      "learning_rate": 0.00019388921878384517,
      "loss": 0.2115,
      "step": 634
    },
    {
      "epoch": 0.602895798718253,
      "grad_norm": 0.03245210647583008,
      "learning_rate": 0.0001938511251728772,
      "loss": 0.1304,
      "step": 635
    },
    {
      "epoch": 0.6038452409209589,
      "grad_norm": 0.03384733200073242,
      "learning_rate": 0.00019381291696168553,
      "loss": 0.1297,
      "step": 636
    },
    {
      "epoch": 0.6047946831236648,
      "grad_norm": 0.04325825348496437,
      "learning_rate": 0.0001937745941969256,
      "loss": 0.1337,
      "step": 637
    },
    {
      "epoch": 0.6057441253263708,
      "grad_norm": 0.046986173838377,
      "learning_rate": 0.00019373615692539275,
      "loss": 0.1385,
      "step": 638
    },
    {
      "epoch": 0.6066935675290767,
      "grad_norm": 0.03726234659552574,
      "learning_rate": 0.0001936976051940222,
      "loss": 0.1393,
      "step": 639
    },
    {
      "epoch": 0.6076430097317825,
      "grad_norm": 0.05574486404657364,
      "learning_rate": 0.0001936589390498889,
      "loss": 0.1698,
      "step": 640
    },
    {
      "epoch": 0.6085924519344885,
      "grad_norm": 0.052818477153778076,
      "learning_rate": 0.0001936201585402075,
      "loss": 0.1722,
      "step": 641
    },
    {
      "epoch": 0.6095418941371944,
      "grad_norm": 0.03535636141896248,
      "learning_rate": 0.00019358126371233231,
      "loss": 0.129,
      "step": 642
    },
    {
      "epoch": 0.6104913363399003,
      "grad_norm": 0.03453061729669571,
      "learning_rate": 0.00019354225461375724,
      "loss": 0.1313,
      "step": 643
    },
    {
      "epoch": 0.6114407785426063,
      "grad_norm": 0.030467770993709564,
      "learning_rate": 0.0001935031312921157,
      "loss": 0.1241,
      "step": 644
    },
    {
      "epoch": 0.6123902207453121,
      "grad_norm": 0.03996508568525314,
      "learning_rate": 0.0001934638937951806,
      "loss": 0.1265,
      "step": 645
    },
    {
      "epoch": 0.613339662948018,
      "grad_norm": 0.034416794776916504,
      "learning_rate": 0.00019342454217086429,
      "loss": 0.1341,
      "step": 646
    },
    {
      "epoch": 0.614289105150724,
      "grad_norm": 0.03674698621034622,
      "learning_rate": 0.00019338507646721845,
      "loss": 0.1399,
      "step": 647
    },
    {
      "epoch": 0.6152385473534299,
      "grad_norm": 0.037850040942430496,
      "learning_rate": 0.0001933454967324341,
      "loss": 0.1295,
      "step": 648
    },
    {
      "epoch": 0.6161879895561357,
      "grad_norm": 0.037829235196113586,
      "learning_rate": 0.0001933058030148414,
      "loss": 0.1302,
      "step": 649
    },
    {
      "epoch": 0.6171374317588417,
      "grad_norm": 0.03579702973365784,
      "learning_rate": 0.00019326599536290983,
      "loss": 0.1352,
      "step": 650
    },
    {
      "epoch": 0.6180868739615476,
      "grad_norm": 0.052539851516485214,
      "learning_rate": 0.00019322607382524785,
      "loss": 0.1744,
      "step": 651
    },
    {
      "epoch": 0.6190363161642535,
      "grad_norm": 0.03814668953418732,
      "learning_rate": 0.0001931860384506031,
      "loss": 0.1303,
      "step": 652
    },
    {
      "epoch": 0.6199857583669595,
      "grad_norm": 0.03730069473385811,
      "learning_rate": 0.00019314588928786224,
      "loss": 0.1236,
      "step": 653
    },
    {
      "epoch": 0.6209352005696653,
      "grad_norm": 0.04081875458359718,
      "learning_rate": 0.00019310562638605078,
      "loss": 0.1328,
      "step": 654
    },
    {
      "epoch": 0.6218846427723712,
      "grad_norm": 0.03532617911696434,
      "learning_rate": 0.00019306524979433308,
      "loss": 0.1238,
      "step": 655
    },
    {
      "epoch": 0.6228340849750772,
      "grad_norm": 0.035857025533914566,
      "learning_rate": 0.00019302475956201254,
      "loss": 0.1244,
      "step": 656
    },
    {
      "epoch": 0.6237835271777831,
      "grad_norm": 0.036031339317560196,
      "learning_rate": 0.0001929841557385311,
      "loss": 0.1251,
      "step": 657
    },
    {
      "epoch": 0.624732969380489,
      "grad_norm": 0.037832874804735184,
      "learning_rate": 0.00019294343837346944,
      "loss": 0.1262,
      "step": 658
    },
    {
      "epoch": 0.6256824115831948,
      "grad_norm": 0.03651989623904228,
      "learning_rate": 0.00019290260751654706,
      "loss": 0.1239,
      "step": 659
    },
    {
      "epoch": 0.6266318537859008,
      "grad_norm": 0.03595907241106033,
      "learning_rate": 0.00019286166321762184,
      "loss": 0.1342,
      "step": 660
    },
    {
      "epoch": 0.6275812959886067,
      "grad_norm": 0.04714696854352951,
      "learning_rate": 0.00019282060552669025,
      "loss": 0.1712,
      "step": 661
    },
    {
      "epoch": 0.6285307381913126,
      "grad_norm": 0.0448799654841423,
      "learning_rate": 0.00019277943449388726,
      "loss": 0.1601,
      "step": 662
    },
    {
      "epoch": 0.6294801803940185,
      "grad_norm": 0.03133920207619667,
      "learning_rate": 0.0001927381501694862,
      "loss": 0.127,
      "step": 663
    },
    {
      "epoch": 0.6304296225967244,
      "grad_norm": 0.051593225449323654,
      "learning_rate": 0.00019269675260389876,
      "loss": 0.1659,
      "step": 664
    },
    {
      "epoch": 0.6313790647994303,
      "grad_norm": 0.03713349625468254,
      "learning_rate": 0.0001926552418476749,
      "loss": 0.1294,
      "step": 665
    },
    {
      "epoch": 0.6323285070021363,
      "grad_norm": 0.03420734405517578,
      "learning_rate": 0.00019261361795150275,
      "loss": 0.1376,
      "step": 666
    },
    {
      "epoch": 0.6332779492048421,
      "grad_norm": 0.04476429522037506,
      "learning_rate": 0.00019257188096620867,
      "loss": 0.1595,
      "step": 667
    },
    {
      "epoch": 0.634227391407548,
      "grad_norm": 0.05289504677057266,
      "learning_rate": 0.00019253003094275707,
      "loss": 0.1589,
      "step": 668
    },
    {
      "epoch": 0.635176833610254,
      "grad_norm": 0.042022526264190674,
      "learning_rate": 0.0001924880679322504,
      "loss": 0.1316,
      "step": 669
    },
    {
      "epoch": 0.6361262758129599,
      "grad_norm": 0.0408223457634449,
      "learning_rate": 0.00019244599198592907,
      "loss": 0.1386,
      "step": 670
    },
    {
      "epoch": 0.6370757180156658,
      "grad_norm": 0.03941584751009941,
      "learning_rate": 0.00019240380315517142,
      "loss": 0.1325,
      "step": 671
    },
    {
      "epoch": 0.6380251602183717,
      "grad_norm": 0.03860325738787651,
      "learning_rate": 0.00019236150149149357,
      "loss": 0.1215,
      "step": 672
    },
    {
      "epoch": 0.6389746024210776,
      "grad_norm": 0.0342581607401371,
      "learning_rate": 0.00019231908704654948,
      "loss": 0.1247,
      "step": 673
    },
    {
      "epoch": 0.6399240446237835,
      "grad_norm": 0.04099750518798828,
      "learning_rate": 0.00019227655987213077,
      "loss": 0.1335,
      "step": 674
    },
    {
      "epoch": 0.6408734868264895,
      "grad_norm": 0.031005796045064926,
      "learning_rate": 0.00019223392002016678,
      "loss": 0.1297,
      "step": 675
    },
    {
      "epoch": 0.6418229290291954,
      "grad_norm": 0.05248212069272995,
      "learning_rate": 0.0001921911675427244,
      "loss": 0.1737,
      "step": 676
    },
    {
      "epoch": 0.6427723712319012,
      "grad_norm": 0.04168983921408653,
      "learning_rate": 0.00019214830249200806,
      "loss": 0.1373,
      "step": 677
    },
    {
      "epoch": 0.6437218134346072,
      "grad_norm": 0.03659060224890709,
      "learning_rate": 0.0001921053249203596,
      "loss": 0.1263,
      "step": 678
    },
    {
      "epoch": 0.6446712556373131,
      "grad_norm": 0.042256928980350494,
      "learning_rate": 0.00019206223488025834,
      "loss": 0.1646,
      "step": 679
    },
    {
      "epoch": 0.645620697840019,
      "grad_norm": 0.04420709237456322,
      "learning_rate": 0.00019201903242432086,
      "loss": 0.1577,
      "step": 680
    },
    {
      "epoch": 0.646570140042725,
      "grad_norm": 0.03781798109412193,
      "learning_rate": 0.00019197571760530107,
      "loss": 0.1253,
      "step": 681
    },
    {
      "epoch": 0.6475195822454308,
      "grad_norm": 0.03728644549846649,
      "learning_rate": 0.00019193229047609003,
      "loss": 0.1423,
      "step": 682
    },
    {
      "epoch": 0.6484690244481367,
      "grad_norm": 0.05171523615717888,
      "learning_rate": 0.00019188875108971598,
      "loss": 0.177,
      "step": 683
    },
    {
      "epoch": 0.6494184666508427,
      "grad_norm": 0.05022161453962326,
      "learning_rate": 0.0001918450994993442,
      "loss": 0.1616,
      "step": 684
    },
    {
      "epoch": 0.6503679088535486,
      "grad_norm": 0.037774864584207535,
      "learning_rate": 0.00019180133575827707,
      "loss": 0.1257,
      "step": 685
    },
    {
      "epoch": 0.6513173510562544,
      "grad_norm": 0.056198425590991974,
      "learning_rate": 0.00019175745991995377,
      "loss": 0.1751,
      "step": 686
    },
    {
      "epoch": 0.6522667932589603,
      "grad_norm": 0.05259314179420471,
      "learning_rate": 0.0001917134720379505,
      "loss": 0.1655,
      "step": 687
    },
    {
      "epoch": 0.6532162354616663,
      "grad_norm": 0.04018954187631607,
      "learning_rate": 0.00019166937216598013,
      "loss": 0.1178,
      "step": 688
    },
    {
      "epoch": 0.6541656776643722,
      "grad_norm": 0.057170454412698746,
      "learning_rate": 0.00019162516035789247,
      "loss": 0.1744,
      "step": 689
    },
    {
      "epoch": 0.655115119867078,
      "grad_norm": 0.04647281393408775,
      "learning_rate": 0.00019158083666767381,
      "loss": 0.1343,
      "step": 690
    },
    {
      "epoch": 0.656064562069784,
      "grad_norm": 0.056390274316072464,
      "learning_rate": 0.00019153640114944723,
      "loss": 0.2029,
      "step": 691
    },
    {
      "epoch": 0.6570140042724899,
      "grad_norm": 0.03656432405114174,
      "learning_rate": 0.00019149185385747224,
      "loss": 0.1249,
      "step": 692
    },
    {
      "epoch": 0.6579634464751958,
      "grad_norm": 0.031422629952430725,
      "learning_rate": 0.0001914471948461449,
      "loss": 0.1232,
      "step": 693
    },
    {
      "epoch": 0.6589128886779018,
      "grad_norm": 0.0463186614215374,
      "learning_rate": 0.00019140242416999765,
      "loss": 0.1675,
      "step": 694
    },
    {
      "epoch": 0.6598623308806076,
      "grad_norm": 0.03907819464802742,
      "learning_rate": 0.0001913575418836993,
      "loss": 0.1307,
      "step": 695
    },
    {
      "epoch": 0.6608117730833135,
      "grad_norm": 0.04354274645447731,
      "learning_rate": 0.00019131254804205498,
      "loss": 0.1381,
      "step": 696
    },
    {
      "epoch": 0.6617612152860195,
      "grad_norm": 0.0355788990855217,
      "learning_rate": 0.00019126744270000598,
      "loss": 0.1273,
      "step": 697
    },
    {
      "epoch": 0.6627106574887254,
      "grad_norm": 0.0382835678756237,
      "learning_rate": 0.0001912222259126298,
      "loss": 0.1184,
      "step": 698
    },
    {
      "epoch": 0.6636600996914312,
      "grad_norm": 0.05007009580731392,
      "learning_rate": 0.00019117689773513993,
      "loss": 0.1751,
      "step": 699
    },
    {
      "epoch": 0.6646095418941372,
      "grad_norm": 0.05426732823252678,
      "learning_rate": 0.000191131458222886,
      "loss": 0.175,
      "step": 700
    },
    {
      "epoch": 0.6655589840968431,
      "grad_norm": 0.033966466784477234,
      "learning_rate": 0.00019108590743135352,
      "loss": 0.123,
      "step": 701
    },
    {
      "epoch": 0.666508426299549,
      "grad_norm": 0.04007060080766678,
      "learning_rate": 0.00019104024541616386,
      "loss": 0.1386,
      "step": 702
    },
    {
      "epoch": 0.667457868502255,
      "grad_norm": 0.05075724050402641,
      "learning_rate": 0.00019099447223307423,
      "loss": 0.1698,
      "step": 703
    },
    {
      "epoch": 0.6684073107049608,
      "grad_norm": 0.04677930474281311,
      "learning_rate": 0.00019094858793797757,
      "loss": 0.1633,
      "step": 704
    },
    {
      "epoch": 0.6693567529076667,
      "grad_norm": 0.04063379392027855,
      "learning_rate": 0.00019090259258690263,
      "loss": 0.1414,
      "step": 705
    },
    {
      "epoch": 0.6703061951103727,
      "grad_norm": 0.039291396737098694,
      "learning_rate": 0.00019085648623601352,
      "loss": 0.1273,
      "step": 706
    },
    {
      "epoch": 0.6712556373130786,
      "grad_norm": 0.04960642755031586,
      "learning_rate": 0.00019081026894161008,
      "loss": 0.1512,
      "step": 707
    },
    {
      "epoch": 0.6722050795157845,
      "grad_norm": 0.04266348108649254,
      "learning_rate": 0.00019076394076012756,
      "loss": 0.1352,
      "step": 708
    },
    {
      "epoch": 0.6731545217184904,
      "grad_norm": 0.03943296894431114,
      "learning_rate": 0.00019071750174813663,
      "loss": 0.1332,
      "step": 709
    },
    {
      "epoch": 0.6741039639211963,
      "grad_norm": 0.04927997291088104,
      "learning_rate": 0.0001906709519623433,
      "loss": 0.1645,
      "step": 710
    },
    {
      "epoch": 0.6750534061239022,
      "grad_norm": 0.0418451763689518,
      "learning_rate": 0.00019062429145958877,
      "loss": 0.1279,
      "step": 711
    },
    {
      "epoch": 0.6760028483266081,
      "grad_norm": 0.04283139482140541,
      "learning_rate": 0.0001905775202968495,
      "loss": 0.1388,
      "step": 712
    },
    {
      "epoch": 0.676952290529314,
      "grad_norm": 0.05674710497260094,
      "learning_rate": 0.00019053063853123714,
      "loss": 0.171,
      "step": 713
    },
    {
      "epoch": 0.6779017327320199,
      "grad_norm": 0.03568726405501366,
      "learning_rate": 0.00019048364621999825,
      "loss": 0.1329,
      "step": 714
    },
    {
      "epoch": 0.6788511749347258,
      "grad_norm": 0.03796301409602165,
      "learning_rate": 0.00019043654342051447,
      "loss": 0.1352,
      "step": 715
    },
    {
      "epoch": 0.6798006171374318,
      "grad_norm": 0.03538963943719864,
      "learning_rate": 0.00019038933019030233,
      "loss": 0.1328,
      "step": 716
    },
    {
      "epoch": 0.6807500593401377,
      "grad_norm": 0.05234035104513168,
      "learning_rate": 0.00019034200658701322,
      "loss": 0.1649,
      "step": 717
    },
    {
      "epoch": 0.6816995015428435,
      "grad_norm": 0.03719701990485191,
      "learning_rate": 0.00019029457266843327,
      "loss": 0.1295,
      "step": 718
    },
    {
      "epoch": 0.6826489437455495,
      "grad_norm": 0.03594352304935455,
      "learning_rate": 0.00019024702849248335,
      "loss": 0.128,
      "step": 719
    },
    {
      "epoch": 0.6835983859482554,
      "grad_norm": 0.04097168892621994,
      "learning_rate": 0.00019019937411721895,
      "loss": 0.1331,
      "step": 720
    },
    {
      "epoch": 0.6845478281509613,
      "grad_norm": 0.03943239524960518,
      "learning_rate": 0.00019015160960083013,
      "loss": 0.1337,
      "step": 721
    },
    {
      "epoch": 0.6854972703536673,
      "grad_norm": 0.0411958172917366,
      "learning_rate": 0.00019010373500164145,
      "loss": 0.1603,
      "step": 722
    },
    {
      "epoch": 0.6864467125563731,
      "grad_norm": 0.05295250564813614,
      "learning_rate": 0.00019005575037811184,
      "loss": 0.1644,
      "step": 723
    },
    {
      "epoch": 0.687396154759079,
      "grad_norm": 0.03916552662849426,
      "learning_rate": 0.00019000765578883465,
      "loss": 0.135,
      "step": 724
    },
    {
      "epoch": 0.688345596961785,
      "grad_norm": 0.03871094062924385,
      "learning_rate": 0.00018995945129253745,
      "loss": 0.1276,
      "step": 725
    },
    {
      "epoch": 0.6892950391644909,
      "grad_norm": 0.03405594825744629,
      "learning_rate": 0.00018991113694808204,
      "loss": 0.1327,
      "step": 726
    },
    {
      "epoch": 0.6902444813671967,
      "grad_norm": 0.03824371099472046,
      "learning_rate": 0.00018986271281446436,
      "loss": 0.1357,
      "step": 727
    },
    {
      "epoch": 0.6911939235699027,
      "grad_norm": 0.03813684731721878,
      "learning_rate": 0.0001898141789508144,
      "loss": 0.1341,
      "step": 728
    },
    {
      "epoch": 0.6921433657726086,
      "grad_norm": 0.03283112868666649,
      "learning_rate": 0.0001897655354163962,
      "loss": 0.1299,
      "step": 729
    },
    {
      "epoch": 0.6930928079753145,
      "grad_norm": 0.03226768597960472,
      "learning_rate": 0.00018971678227060757,
      "loss": 0.1272,
      "step": 730
    },
    {
      "epoch": 0.6940422501780205,
      "grad_norm": 0.037317484617233276,
      "learning_rate": 0.0001896679195729803,
      "loss": 0.1339,
      "step": 731
    },
    {
      "epoch": 0.6949916923807263,
      "grad_norm": 0.05428892746567726,
      "learning_rate": 0.0001896189473831799,
      "loss": 0.1667,
      "step": 732
    },
    {
      "epoch": 0.6959411345834322,
      "grad_norm": 0.04177982360124588,
      "learning_rate": 0.0001895698657610056,
      "loss": 0.1337,
      "step": 733
    },
    {
      "epoch": 0.6968905767861382,
      "grad_norm": 0.041572730988264084,
      "learning_rate": 0.00018952067476639024,
      "loss": 0.1332,
      "step": 734
    },
    {
      "epoch": 0.6978400189888441,
      "grad_norm": 0.03430505469441414,
      "learning_rate": 0.00018947137445940023,
      "loss": 0.1265,
      "step": 735
    },
    {
      "epoch": 0.6987894611915499,
      "grad_norm": 0.03863980621099472,
      "learning_rate": 0.00018942196490023542,
      "loss": 0.1337,
      "step": 736
    },
    {
      "epoch": 0.6997389033942559,
      "grad_norm": 0.06445252895355225,
      "learning_rate": 0.00018937244614922912,
      "loss": 0.2032,
      "step": 737
    },
    {
      "epoch": 0.7006883455969618,
      "grad_norm": 0.03358490392565727,
      "learning_rate": 0.00018932281826684793,
      "loss": 0.127,
      "step": 738
    },
    {
      "epoch": 0.7016377877996677,
      "grad_norm": 0.034341324120759964,
      "learning_rate": 0.00018927308131369173,
      "loss": 0.1303,
      "step": 739
    },
    {
      "epoch": 0.7025872300023736,
      "grad_norm": 0.035848621279001236,
      "learning_rate": 0.00018922323535049354,
      "loss": 0.1272,
      "step": 740
    },
    {
      "epoch": 0.7035366722050795,
      "grad_norm": 0.03865866735577583,
      "learning_rate": 0.0001891732804381196,
      "loss": 0.136,
      "step": 741
    },
    {
      "epoch": 0.7044861144077854,
      "grad_norm": 0.045944251120090485,
      "learning_rate": 0.0001891232166375691,
      "loss": 0.1741,
      "step": 742
    },
    {
      "epoch": 0.7054355566104913,
      "grad_norm": 0.04418769106268883,
      "learning_rate": 0.00018907304400997418,
      "loss": 0.1504,
      "step": 743
    },
    {
      "epoch": 0.7063849988131973,
      "grad_norm": 0.062257930636405945,
      "learning_rate": 0.0001890227626165999,
      "loss": 0.1786,
      "step": 744
    },
    {
      "epoch": 0.7073344410159031,
      "grad_norm": 0.037457846105098724,
      "learning_rate": 0.00018897237251884415,
      "loss": 0.1389,
      "step": 745
    },
    {
      "epoch": 0.708283883218609,
      "grad_norm": 0.039091672748327255,
      "learning_rate": 0.0001889218737782375,
      "loss": 0.1264,
      "step": 746
    },
    {
      "epoch": 0.709233325421315,
      "grad_norm": 0.035011596977710724,
      "learning_rate": 0.00018887126645644324,
      "loss": 0.1363,
      "step": 747
    },
    {
      "epoch": 0.7101827676240209,
      "grad_norm": 0.104104183614254,
      "learning_rate": 0.00018882055061525722,
      "loss": 0.1588,
      "step": 748
    },
    {
      "epoch": 0.7111322098267268,
      "grad_norm": 0.03222833201289177,
      "learning_rate": 0.0001887697263166078,
      "loss": 0.1259,
      "step": 749
    },
    {
      "epoch": 0.7120816520294327,
      "grad_norm": 0.049904145300388336,
      "learning_rate": 0.0001887187936225558,
      "loss": 0.1676,
      "step": 750
    },
    {
      "epoch": 0.7130310942321386,
      "grad_norm": 0.15150390565395355,
      "learning_rate": 0.00018866775259529435,
      "loss": 0.1369,
      "step": 751
    },
    {
      "epoch": 0.7139805364348445,
      "grad_norm": 0.03994397446513176,
      "learning_rate": 0.0001886166032971489,
      "loss": 0.1294,
      "step": 752
    },
    {
      "epoch": 0.7149299786375505,
      "grad_norm": 0.06274881213903427,
      "learning_rate": 0.00018856534579057713,
      "loss": 0.1659,
      "step": 753
    },
    {
      "epoch": 0.7158794208402564,
      "grad_norm": 0.04001612216234207,
      "learning_rate": 0.00018851398013816883,
      "loss": 0.1305,
      "step": 754
    },
    {
      "epoch": 0.7168288630429622,
      "grad_norm": 0.03961142525076866,
      "learning_rate": 0.0001884625064026458,
      "loss": 0.1265,
      "step": 755
    },
    {
      "epoch": 0.7177783052456682,
      "grad_norm": 0.033916253596544266,
      "learning_rate": 0.00018841092464686186,
      "loss": 0.1336,
      "step": 756
    },
    {
      "epoch": 0.7187277474483741,
      "grad_norm": 0.040992431342601776,
      "learning_rate": 0.00018835923493380278,
      "loss": 0.1403,
      "step": 757
    },
    {
      "epoch": 0.71967718965108,
      "grad_norm": 0.03410341590642929,
      "learning_rate": 0.00018830743732658608,
      "loss": 0.1233,
      "step": 758
    },
    {
      "epoch": 0.720626631853786,
      "grad_norm": 0.05984083190560341,
      "learning_rate": 0.000188255531888461,
      "loss": 0.1417,
      "step": 759
    },
    {
      "epoch": 0.7215760740564918,
      "grad_norm": 0.03874243050813675,
      "learning_rate": 0.00018820351868280858,
      "loss": 0.1366,
      "step": 760
    },
    {
      "epoch": 0.7225255162591977,
      "grad_norm": 0.05256400629878044,
      "learning_rate": 0.00018815139777314136,
      "loss": 0.172,
      "step": 761
    },
    {
      "epoch": 0.7234749584619037,
      "grad_norm": 0.039005253463983536,
      "learning_rate": 0.0001880991692231034,
      "loss": 0.1312,
      "step": 762
    },
    {
      "epoch": 0.7244244006646096,
      "grad_norm": 0.04029637575149536,
      "learning_rate": 0.0001880468330964702,
      "loss": 0.1327,
      "step": 763
    },
    {
      "epoch": 0.7253738428673154,
      "grad_norm": 0.04493672773241997,
      "learning_rate": 0.00018799438945714866,
      "loss": 0.1555,
      "step": 764
    },
    {
      "epoch": 0.7263232850700213,
      "grad_norm": 0.03862634301185608,
      "learning_rate": 0.0001879418383691769,
      "loss": 0.133,
      "step": 765
    },
    {
      "epoch": 0.7272727272727273,
      "grad_norm": 0.03904002159833908,
      "learning_rate": 0.00018788917989672434,
      "loss": 0.1259,
      "step": 766
    },
    {
      "epoch": 0.7282221694754332,
      "grad_norm": 0.037936531007289886,
      "learning_rate": 0.0001878364141040914,
      "loss": 0.1263,
      "step": 767
    },
    {
      "epoch": 0.729171611678139,
      "grad_norm": 0.03802201896905899,
      "learning_rate": 0.0001877835410557096,
      "loss": 0.134,
      "step": 768
    },
    {
      "epoch": 0.730121053880845,
      "grad_norm": 0.03759211301803589,
      "learning_rate": 0.00018773056081614154,
      "loss": 0.1383,
      "step": 769
    },
    {
      "epoch": 0.7310704960835509,
      "grad_norm": 0.0498163104057312,
      "learning_rate": 0.0001876774734500805,
      "loss": 0.1573,
      "step": 770
    },
    {
      "epoch": 0.7320199382862568,
      "grad_norm": 0.036126043647527695,
      "learning_rate": 0.00018762427902235072,
      "loss": 0.1274,
      "step": 771
    },
    {
      "epoch": 0.7329693804889628,
      "grad_norm": 0.044809550046920776,
      "learning_rate": 0.0001875709775979071,
      "loss": 0.1703,
      "step": 772
    },
    {
      "epoch": 0.7339188226916686,
      "grad_norm": 0.050454337149858475,
      "learning_rate": 0.0001875175692418353,
      "loss": 0.1699,
      "step": 773
    },
    {
      "epoch": 0.7348682648943745,
      "grad_norm": 0.06160600110888481,
      "learning_rate": 0.00018746405401935142,
      "loss": 0.1806,
      "step": 774
    },
    {
      "epoch": 0.7358177070970805,
      "grad_norm": 0.05408332124352455,
      "learning_rate": 0.0001874104319958021,
      "loss": 0.1681,
      "step": 775
    },
    {
      "epoch": 0.7367671492997864,
      "grad_norm": 0.03859655559062958,
      "learning_rate": 0.00018735670323666442,
      "loss": 0.1297,
      "step": 776
    },
    {
      "epoch": 0.7377165915024922,
      "grad_norm": 0.05268474668264389,
      "learning_rate": 0.00018730286780754577,
      "loss": 0.1658,
      "step": 777
    },
    {
      "epoch": 0.7386660337051982,
      "grad_norm": 0.06406822055578232,
      "learning_rate": 0.00018724892577418381,
      "loss": 0.199,
      "step": 778
    },
    {
      "epoch": 0.7396154759079041,
      "grad_norm": 0.05488892272114754,
      "learning_rate": 0.00018719487720244638,
      "loss": 0.1669,
      "step": 779
    },
    {
      "epoch": 0.74056491811061,
      "grad_norm": 0.03732241317629814,
      "learning_rate": 0.00018714072215833132,
      "loss": 0.1337,
      "step": 780
    },
    {
      "epoch": 0.741514360313316,
      "grad_norm": 0.05548230558633804,
      "learning_rate": 0.00018708646070796664,
      "loss": 0.1652,
      "step": 781
    },
    {
      "epoch": 0.7424638025160218,
      "grad_norm": 0.06930623203516006,
      "learning_rate": 0.0001870320929176101,
      "loss": 0.1647,
      "step": 782
    },
    {
      "epoch": 0.7434132447187277,
      "grad_norm": 0.05485931411385536,
      "learning_rate": 0.0001869776188536495,
      "loss": 0.2149,
      "step": 783
    },
    {
      "epoch": 0.7443626869214337,
      "grad_norm": 0.03739183023571968,
      "learning_rate": 0.00018692303858260228,
      "loss": 0.1257,
      "step": 784
    },
    {
      "epoch": 0.7453121291241396,
      "grad_norm": 0.03913332521915436,
      "learning_rate": 0.00018686835217111557,
      "loss": 0.1293,
      "step": 785
    },
    {
      "epoch": 0.7462615713268455,
      "grad_norm": 0.03580600768327713,
      "learning_rate": 0.0001868135596859662,
      "loss": 0.1278,
      "step": 786
    },
    {
      "epoch": 0.7472110135295514,
      "grad_norm": 0.03586685657501221,
      "learning_rate": 0.00018675866119406042,
      "loss": 0.128,
      "step": 787
    },
    {
      "epoch": 0.7481604557322573,
      "grad_norm": 0.04061829298734665,
      "learning_rate": 0.00018670365676243397,
      "loss": 0.1256,
      "step": 788
    },
    {
      "epoch": 0.7491098979349632,
      "grad_norm": 0.03580275923013687,
      "learning_rate": 0.000186648546458252,
      "loss": 0.1265,
      "step": 789
    },
    {
      "epoch": 0.7500593401376692,
      "grad_norm": 0.04277309030294418,
      "learning_rate": 0.00018659333034880884,
      "loss": 0.1678,
      "step": 790
    },
    {
      "epoch": 0.751008782340375,
      "grad_norm": 0.03997024893760681,
      "learning_rate": 0.00018653800850152808,
      "loss": 0.1251,
      "step": 791
    },
    {
      "epoch": 0.7519582245430809,
      "grad_norm": 0.03809446841478348,
      "learning_rate": 0.0001864825809839624,
      "loss": 0.1354,
      "step": 792
    },
    {
      "epoch": 0.7529076667457868,
      "grad_norm": 0.05002079904079437,
      "learning_rate": 0.00018642704786379354,
      "loss": 0.1492,
      "step": 793
    },
    {
      "epoch": 0.7538571089484928,
      "grad_norm": 0.03734049201011658,
      "learning_rate": 0.00018637140920883217,
      "loss": 0.1328,
      "step": 794
    },
    {
      "epoch": 0.7548065511511987,
      "grad_norm": 0.034287337213754654,
      "learning_rate": 0.00018631566508701784,
      "loss": 0.1261,
      "step": 795
    },
    {
      "epoch": 0.7557559933539045,
      "grad_norm": 0.0322953499853611,
      "learning_rate": 0.00018625981556641882,
      "loss": 0.1251,
      "step": 796
    },
    {
      "epoch": 0.7567054355566105,
      "grad_norm": 0.03397887200117111,
      "learning_rate": 0.00018620386071523218,
      "loss": 0.1226,
      "step": 797
    },
    {
      "epoch": 0.7576548777593164,
      "grad_norm": 0.048685140907764435,
      "learning_rate": 0.0001861478006017836,
      "loss": 0.1677,
      "step": 798
    },
    {
      "epoch": 0.7586043199620223,
      "grad_norm": 0.06330600380897522,
      "learning_rate": 0.00018609163529452723,
      "loss": 0.2012,
      "step": 799
    },
    {
      "epoch": 0.7595537621647283,
      "grad_norm": 0.04262509569525719,
      "learning_rate": 0.00018603536486204564,
      "loss": 0.1271,
      "step": 800
    },
    {
      "epoch": 0.7605032043674341,
      "grad_norm": 0.04021213576197624,
      "learning_rate": 0.00018597898937304988,
      "loss": 0.1426,
      "step": 801
    },
    {
      "epoch": 0.76145264657014,
      "grad_norm": 0.05070256441831589,
      "learning_rate": 0.0001859225088963792,
      "loss": 0.209,
      "step": 802
    },
    {
      "epoch": 0.762402088772846,
      "grad_norm": 0.05344654247164726,
      "learning_rate": 0.00018586592350100113,
      "loss": 0.2093,
      "step": 803
    },
    {
      "epoch": 0.7633515309755519,
      "grad_norm": 0.03695262596011162,
      "learning_rate": 0.0001858092332560112,
      "loss": 0.1264,
      "step": 804
    },
    {
      "epoch": 0.7643009731782577,
      "grad_norm": 0.041282836347818375,
      "learning_rate": 0.00018575243823063306,
      "loss": 0.1275,
      "step": 805
    },
    {
      "epoch": 0.7652504153809637,
      "grad_norm": 0.038663093000650406,
      "learning_rate": 0.00018569553849421828,
      "loss": 0.1285,
      "step": 806
    },
    {
      "epoch": 0.7661998575836696,
      "grad_norm": 0.05324345454573631,
      "learning_rate": 0.00018563853411624628,
      "loss": 0.1691,
      "step": 807
    },
    {
      "epoch": 0.7671492997863755,
      "grad_norm": 0.0382021889090538,
      "learning_rate": 0.00018558142516632425,
      "loss": 0.1299,
      "step": 808
    },
    {
      "epoch": 0.7680987419890815,
      "grad_norm": 0.05059641972184181,
      "learning_rate": 0.00018552421171418712,
      "loss": 0.1685,
      "step": 809
    },
    {
      "epoch": 0.7690481841917873,
      "grad_norm": 0.041547179222106934,
      "learning_rate": 0.00018546689382969737,
      "loss": 0.1322,
      "step": 810
    },
    {
      "epoch": 0.7699976263944932,
      "grad_norm": 0.047367729246616364,
      "learning_rate": 0.00018540947158284503,
      "loss": 0.1662,
      "step": 811
    },
    {
      "epoch": 0.7709470685971992,
      "grad_norm": 0.07076044380664825,
      "learning_rate": 0.00018535194504374754,
      "loss": 0.1749,
      "step": 812
    },
    {
      "epoch": 0.7718965107999051,
      "grad_norm": 0.05194571986794472,
      "learning_rate": 0.00018529431428264973,
      "loss": 0.1595,
      "step": 813
    },
    {
      "epoch": 0.7728459530026109,
      "grad_norm": 0.034832440316677094,
      "learning_rate": 0.00018523657936992367,
      "loss": 0.1279,
      "step": 814
    },
    {
      "epoch": 0.7737953952053169,
      "grad_norm": 0.03709466755390167,
      "learning_rate": 0.00018517874037606862,
      "loss": 0.1161,
      "step": 815
    },
    {
      "epoch": 0.7747448374080228,
      "grad_norm": 0.03341936320066452,
      "learning_rate": 0.00018512079737171086,
      "loss": 0.1277,
      "step": 816
    },
    {
      "epoch": 0.7756942796107287,
      "grad_norm": 0.0411679781973362,
      "learning_rate": 0.00018506275042760382,
      "loss": 0.1284,
      "step": 817
    },
    {
      "epoch": 0.7766437218134347,
      "grad_norm": 0.04416754096746445,
      "learning_rate": 0.00018500459961462773,
      "loss": 0.1647,
      "step": 818
    },
    {
      "epoch": 0.7775931640161405,
      "grad_norm": 0.03680622950196266,
      "learning_rate": 0.00018494634500378966,
      "loss": 0.1371,
      "step": 819
    },
    {
      "epoch": 0.7785426062188464,
      "grad_norm": 0.037342917174100876,
      "learning_rate": 0.0001848879866662235,
      "loss": 0.1308,
      "step": 820
    },
    {
      "epoch": 0.7794920484215523,
      "grad_norm": 0.04237838089466095,
      "learning_rate": 0.00018482952467318976,
      "loss": 0.1623,
      "step": 821
    },
    {
      "epoch": 0.7804414906242583,
      "grad_norm": 0.04467133805155754,
      "learning_rate": 0.00018477095909607546,
      "loss": 0.1651,
      "step": 822
    },
    {
      "epoch": 0.7813909328269641,
      "grad_norm": 0.04672664403915405,
      "learning_rate": 0.00018471229000639424,
      "loss": 0.1735,
      "step": 823
    },
    {
      "epoch": 0.78234037502967,
      "grad_norm": 0.03545104339718819,
      "learning_rate": 0.00018465351747578597,
      "loss": 0.1342,
      "step": 824
    },
    {
      "epoch": 0.783289817232376,
      "grad_norm": 0.04771837964653969,
      "learning_rate": 0.000184594641576017,
      "loss": 0.175,
      "step": 825
    },
    {
      "epoch": 0.7842392594350819,
      "grad_norm": 0.03531822934746742,
      "learning_rate": 0.00018453566237897976,
      "loss": 0.1321,
      "step": 826
    },
    {
      "epoch": 0.7851887016377878,
      "grad_norm": 0.04098953306674957,
      "learning_rate": 0.00018447657995669295,
      "loss": 0.1372,
      "step": 827
    },
    {
      "epoch": 0.7861381438404937,
      "grad_norm": 0.053972020745277405,
      "learning_rate": 0.00018441739438130114,
      "loss": 0.1673,
      "step": 828
    },
    {
      "epoch": 0.7870875860431996,
      "grad_norm": 0.03818265721201897,
      "learning_rate": 0.00018435810572507507,
      "loss": 0.1322,
      "step": 829
    },
    {
      "epoch": 0.7880370282459055,
      "grad_norm": 0.033827316015958786,
      "learning_rate": 0.0001842987140604112,
      "loss": 0.1445,
      "step": 830
    },
    {
      "epoch": 0.7889864704486115,
      "grad_norm": 0.041385356336832047,
      "learning_rate": 0.00018423921945983179,
      "loss": 0.1332,
      "step": 831
    },
    {
      "epoch": 0.7899359126513174,
      "grad_norm": 0.03948013484477997,
      "learning_rate": 0.00018417962199598483,
      "loss": 0.1412,
      "step": 832
    },
    {
      "epoch": 0.7908853548540232,
      "grad_norm": 0.044912584125995636,
      "learning_rate": 0.00018411992174164393,
      "loss": 0.1684,
      "step": 833
    },
    {
      "epoch": 0.7918347970567292,
      "grad_norm": 0.03675195202231407,
      "learning_rate": 0.0001840601187697082,
      "loss": 0.1334,
      "step": 834
    },
    {
      "epoch": 0.7927842392594351,
      "grad_norm": 0.0349728949368,
      "learning_rate": 0.0001840002131532021,
      "loss": 0.1323,
      "step": 835
    },
    {
      "epoch": 0.793733681462141,
      "grad_norm": 0.03763123229146004,
      "learning_rate": 0.0001839402049652755,
      "loss": 0.1283,
      "step": 836
    },
    {
      "epoch": 0.794683123664847,
      "grad_norm": 0.036798711866140366,
      "learning_rate": 0.00018388009427920362,
      "loss": 0.1272,
      "step": 837
    },
    {
      "epoch": 0.7956325658675528,
      "grad_norm": 0.036771487444639206,
      "learning_rate": 0.00018381988116838663,
      "loss": 0.126,
      "step": 838
    },
    {
      "epoch": 0.7965820080702587,
      "grad_norm": 0.060571007430553436,
      "learning_rate": 0.00018375956570634987,
      "loss": 0.1736,
      "step": 839
    },
    {
      "epoch": 0.7975314502729647,
      "grad_norm": 0.0332857109606266,
      "learning_rate": 0.00018369914796674373,
      "loss": 0.1301,
      "step": 840
    },
    {
      "epoch": 0.7984808924756706,
      "grad_norm": 0.045279379934072495,
      "learning_rate": 0.00018363862802334334,
      "loss": 0.1602,
      "step": 841
    },
    {
      "epoch": 0.7994303346783764,
      "grad_norm": 0.03676297515630722,
      "learning_rate": 0.00018357800595004877,
      "loss": 0.1299,
      "step": 842
    },
    {
      "epoch": 0.8003797768810824,
      "grad_norm": 0.05098710209131241,
      "learning_rate": 0.0001835172818208847,
      "loss": 0.1289,
      "step": 843
    },
    {
      "epoch": 0.8013292190837883,
      "grad_norm": 0.047296855598688126,
      "learning_rate": 0.00018345645571000052,
      "loss": 0.1716,
      "step": 844
    },
    {
      "epoch": 0.8022786612864942,
      "grad_norm": 0.03570317476987839,
      "learning_rate": 0.00018339552769167003,
      "loss": 0.1337,
      "step": 845
    },
    {
      "epoch": 0.8032281034892,
      "grad_norm": 0.03380590304732323,
      "learning_rate": 0.00018333449784029156,
      "loss": 0.1218,
      "step": 846
    },
    {
      "epoch": 0.804177545691906,
      "grad_norm": 0.0340820774435997,
      "learning_rate": 0.00018327336623038778,
      "loss": 0.1324,
      "step": 847
    },
    {
      "epoch": 0.8051269878946119,
      "grad_norm": 0.03311248868703842,
      "learning_rate": 0.00018321213293660558,
      "loss": 0.1308,
      "step": 848
    },
    {
      "epoch": 0.8060764300973178,
      "grad_norm": 0.035102471709251404,
      "learning_rate": 0.00018315079803371605,
      "loss": 0.1345,
      "step": 849
    },
    {
      "epoch": 0.8070258723000238,
      "grad_norm": 0.03358345478773117,
      "learning_rate": 0.0001830893615966143,
      "loss": 0.1341,
      "step": 850
    },
    {
      "epoch": 0.8079753145027296,
      "grad_norm": 0.06460444629192352,
      "learning_rate": 0.00018302782370031948,
      "loss": 0.2051,
      "step": 851
    },
    {
      "epoch": 0.8089247567054355,
      "grad_norm": 0.033203575760126114,
      "learning_rate": 0.0001829661844199746,
      "loss": 0.1357,
      "step": 852
    },
    {
      "epoch": 0.8098741989081415,
      "grad_norm": 0.03588509559631348,
      "learning_rate": 0.0001829044438308465,
      "loss": 0.1335,
      "step": 853
    },
    {
      "epoch": 0.8108236411108474,
      "grad_norm": 0.04263895004987717,
      "learning_rate": 0.00018284260200832563,
      "loss": 0.1739,
      "step": 854
    },
    {
      "epoch": 0.8117730833135532,
      "grad_norm": 0.04004021733999252,
      "learning_rate": 0.00018278065902792618,
      "loss": 0.131,
      "step": 855
    },
    {
      "epoch": 0.8127225255162592,
      "grad_norm": 0.035174135118722916,
      "learning_rate": 0.00018271861496528584,
      "loss": 0.1248,
      "step": 856
    },
    {
      "epoch": 0.8136719677189651,
      "grad_norm": 0.03610173240303993,
      "learning_rate": 0.00018265646989616566,
      "loss": 0.1287,
      "step": 857
    },
    {
      "epoch": 0.814621409921671,
      "grad_norm": 0.035818714648485184,
      "learning_rate": 0.00018259422389645008,
      "loss": 0.1335,
      "step": 858
    },
    {
      "epoch": 0.815570852124377,
      "grad_norm": 0.03248162940144539,
      "learning_rate": 0.00018253187704214672,
      "loss": 0.1308,
      "step": 859
    },
    {
      "epoch": 0.8165202943270828,
      "grad_norm": 0.031658854335546494,
      "learning_rate": 0.00018246942940938646,
      "loss": 0.1339,
      "step": 860
    },
    {
      "epoch": 0.8174697365297887,
      "grad_norm": 0.035879503935575485,
      "learning_rate": 0.0001824068810744232,
      "loss": 0.1222,
      "step": 861
    },
    {
      "epoch": 0.8184191787324947,
      "grad_norm": 0.05258049815893173,
      "learning_rate": 0.0001823442321136337,
      "loss": 0.1937,
      "step": 862
    },
    {
      "epoch": 0.8193686209352006,
      "grad_norm": 0.03625549003481865,
      "learning_rate": 0.0001822814826035178,
      "loss": 0.1268,
      "step": 863
    },
    {
      "epoch": 0.8203180631379065,
      "grad_norm": 0.04990942031145096,
      "learning_rate": 0.00018221863262069793,
      "loss": 0.1661,
      "step": 864
    },
    {
      "epoch": 0.8212675053406124,
      "grad_norm": 0.0631263256072998,
      "learning_rate": 0.00018215568224191927,
      "loss": 0.2126,
      "step": 865
    },
    {
      "epoch": 0.8222169475433183,
      "grad_norm": 0.03726550564169884,
      "learning_rate": 0.00018209263154404958,
      "loss": 0.1334,
      "step": 866
    },
    {
      "epoch": 0.8231663897460242,
      "grad_norm": 0.040383536368608475,
      "learning_rate": 0.0001820294806040792,
      "loss": 0.1619,
      "step": 867
    },
    {
      "epoch": 0.8241158319487302,
      "grad_norm": 0.03525468334555626,
      "learning_rate": 0.00018196622949912078,
      "loss": 0.1263,
      "step": 868
    },
    {
      "epoch": 0.825065274151436,
      "grad_norm": 0.03585941344499588,
      "learning_rate": 0.00018190287830640933,
      "loss": 0.1245,
      "step": 869
    },
    {
      "epoch": 0.8260147163541419,
      "grad_norm": 0.03207286074757576,
      "learning_rate": 0.00018183942710330202,
      "loss": 0.1262,
      "step": 870
    },
    {
      "epoch": 0.8269641585568479,
      "grad_norm": 0.04638965427875519,
      "learning_rate": 0.00018177587596727822,
      "loss": 0.1653,
      "step": 871
    },
    {
      "epoch": 0.8279136007595538,
      "grad_norm": 0.030705489218235016,
      "learning_rate": 0.00018171222497593922,
      "loss": 0.1276,
      "step": 872
    },
    {
      "epoch": 0.8288630429622597,
      "grad_norm": 0.03139735013246536,
      "learning_rate": 0.00018164847420700837,
      "loss": 0.1344,
      "step": 873
    },
    {
      "epoch": 0.8298124851649655,
      "grad_norm": 0.039802953600883484,
      "learning_rate": 0.00018158462373833078,
      "loss": 0.1373,
      "step": 874
    },
    {
      "epoch": 0.8307619273676715,
      "grad_norm": 0.03284341096878052,
      "learning_rate": 0.00018152067364787325,
      "loss": 0.1236,
      "step": 875
    },
    {
      "epoch": 0.8317113695703774,
      "grad_norm": 0.056572429835796356,
      "learning_rate": 0.0001814566240137244,
      "loss": 0.1665,
      "step": 876
    },
    {
      "epoch": 0.8326608117730833,
      "grad_norm": 0.03471997380256653,
      "learning_rate": 0.00018139247491409424,
      "loss": 0.13,
      "step": 877
    },
    {
      "epoch": 0.8336102539757893,
      "grad_norm": 0.03601829707622528,
      "learning_rate": 0.00018132822642731426,
      "loss": 0.127,
      "step": 878
    },
    {
      "epoch": 0.8345596961784951,
      "grad_norm": 0.032708846032619476,
      "learning_rate": 0.00018126387863183737,
      "loss": 0.1264,
      "step": 879
    },
    {
      "epoch": 0.835509138381201,
      "grad_norm": 0.035340629518032074,
      "learning_rate": 0.00018119943160623773,
      "loss": 0.1334,
      "step": 880
    },
    {
      "epoch": 0.836458580583907,
      "grad_norm": 0.030397990718483925,
      "learning_rate": 0.00018113488542921061,
      "loss": 0.1254,
      "step": 881
    },
    {
      "epoch": 0.8374080227866129,
      "grad_norm": 0.03871999308466911,
      "learning_rate": 0.00018107024017957244,
      "loss": 0.132,
      "step": 882
    },
    {
      "epoch": 0.8383574649893187,
      "grad_norm": 0.04331507533788681,
      "learning_rate": 0.00018100549593626052,
      "loss": 0.1354,
      "step": 883
    },
    {
      "epoch": 0.8393069071920247,
      "grad_norm": 0.03445984423160553,
      "learning_rate": 0.00018094065277833314,
      "loss": 0.129,
      "step": 884
    },
    {
      "epoch": 0.8402563493947306,
      "grad_norm": 0.03362146392464638,
      "learning_rate": 0.0001808757107849693,
      "loss": 0.125,
      "step": 885
    },
    {
      "epoch": 0.8412057915974365,
      "grad_norm": 0.041491370648145676,
      "learning_rate": 0.00018081067003546876,
      "loss": 0.1314,
      "step": 886
    },
    {
      "epoch": 0.8421552338001425,
      "grad_norm": 0.034560974687337875,
      "learning_rate": 0.00018074553060925175,
      "loss": 0.126,
      "step": 887
    },
    {
      "epoch": 0.8431046760028483,
      "grad_norm": 0.049931105226278305,
      "learning_rate": 0.0001806802925858591,
      "loss": 0.1709,
      "step": 888
    },
    {
      "epoch": 0.8440541182055542,
      "grad_norm": 0.035841234028339386,
      "learning_rate": 0.00018061495604495195,
      "loss": 0.1396,
      "step": 889
    },
    {
      "epoch": 0.8450035604082602,
      "grad_norm": 0.03359563648700714,
      "learning_rate": 0.00018054952106631188,
      "loss": 0.1323,
      "step": 890
    },
    {
      "epoch": 0.8459530026109661,
      "grad_norm": 0.03390706703066826,
      "learning_rate": 0.00018048398772984046,
      "loss": 0.1287,
      "step": 891
    },
    {
      "epoch": 0.8469024448136719,
      "grad_norm": 0.0474267303943634,
      "learning_rate": 0.00018041835611555957,
      "loss": 0.1693,
      "step": 892
    },
    {
      "epoch": 0.8478518870163779,
      "grad_norm": 0.0334562286734581,
      "learning_rate": 0.00018035262630361097,
      "loss": 0.1295,
      "step": 893
    },
    {
      "epoch": 0.8488013292190838,
      "grad_norm": 0.03383705019950867,
      "learning_rate": 0.00018028679837425634,
      "loss": 0.1259,
      "step": 894
    },
    {
      "epoch": 0.8497507714217897,
      "grad_norm": 0.03384934738278389,
      "learning_rate": 0.00018022087240787728,
      "loss": 0.1218,
      "step": 895
    },
    {
      "epoch": 0.8507002136244957,
      "grad_norm": 0.04088185727596283,
      "learning_rate": 0.0001801548484849749,
      "loss": 0.1343,
      "step": 896
    },
    {
      "epoch": 0.8516496558272015,
      "grad_norm": 0.05273745581507683,
      "learning_rate": 0.00018008872668617013,
      "loss": 0.1688,
      "step": 897
    },
    {
      "epoch": 0.8525990980299074,
      "grad_norm": 0.03253067284822464,
      "learning_rate": 0.00018002250709220325,
      "loss": 0.1333,
      "step": 898
    },
    {
      "epoch": 0.8535485402326133,
      "grad_norm": 0.03033488616347313,
      "learning_rate": 0.0001799561897839341,
      "loss": 0.1292,
      "step": 899
    },
    {
      "epoch": 0.8544979824353193,
      "grad_norm": 0.033945854753255844,
      "learning_rate": 0.00017988977484234174,
      "loss": 0.1415,
      "step": 900
    },
    {
      "epoch": 0.8554474246380251,
      "grad_norm": 0.04456301033496857,
      "learning_rate": 0.0001798232623485244,
      "loss": 0.1762,
      "step": 901
    },
    {
      "epoch": 0.856396866840731,
      "grad_norm": 0.03912430256605148,
      "learning_rate": 0.00017975665238369962,
      "loss": 0.142,
      "step": 902
    },
    {
      "epoch": 0.857346309043437,
      "grad_norm": 0.032741378992795944,
      "learning_rate": 0.0001796899450292038,
      "loss": 0.1212,
      "step": 903
    },
    {
      "epoch": 0.8582957512461429,
      "grad_norm": 0.047262486070394516,
      "learning_rate": 0.0001796231403664923,
      "loss": 0.1762,
      "step": 904
    },
    {
      "epoch": 0.8592451934488488,
      "grad_norm": 0.03242664784193039,
      "learning_rate": 0.00017955623847713928,
      "loss": 0.1323,
      "step": 905
    },
    {
      "epoch": 0.8601946356515547,
      "grad_norm": 0.030855266377329826,
      "learning_rate": 0.0001794892394428377,
      "loss": 0.1258,
      "step": 906
    },
    {
      "epoch": 0.8611440778542606,
      "grad_norm": 0.03360726311802864,
      "learning_rate": 0.00017942214334539907,
      "loss": 0.1325,
      "step": 907
    },
    {
      "epoch": 0.8620935200569665,
      "grad_norm": 0.032459285110235214,
      "learning_rate": 0.00017935495026675345,
      "loss": 0.1267,
      "step": 908
    },
    {
      "epoch": 0.8630429622596725,
      "grad_norm": 0.04160567373037338,
      "learning_rate": 0.00017928766028894928,
      "loss": 0.1255,
      "step": 909
    },
    {
      "epoch": 0.8639924044623783,
      "grad_norm": 0.03851740434765816,
      "learning_rate": 0.0001792202734941534,
      "loss": 0.1212,
      "step": 910
    },
    {
      "epoch": 0.8649418466650842,
      "grad_norm": 0.03414515405893326,
      "learning_rate": 0.00017915278996465084,
      "loss": 0.1239,
      "step": 911
    },
    {
      "epoch": 0.8658912888677902,
      "grad_norm": 0.17817381024360657,
      "learning_rate": 0.0001790852097828447,
      "loss": 0.1336,
      "step": 912
    },
    {
      "epoch": 0.8668407310704961,
      "grad_norm": 0.03545542433857918,
      "learning_rate": 0.0001790175330312562,
      "loss": 0.1353,
      "step": 913
    },
    {
      "epoch": 0.867790173273202,
      "grad_norm": 0.03207210451364517,
      "learning_rate": 0.00017894975979252436,
      "loss": 0.1243,
      "step": 914
    },
    {
      "epoch": 0.868739615475908,
      "grad_norm": 0.046145763248205185,
      "learning_rate": 0.0001788818901494061,
      "loss": 0.1668,
      "step": 915
    },
    {
      "epoch": 0.8696890576786138,
      "grad_norm": 0.03051767125725746,
      "learning_rate": 0.00017881392418477607,
      "loss": 0.1311,
      "step": 916
    },
    {
      "epoch": 0.8706384998813197,
      "grad_norm": 0.03918071463704109,
      "learning_rate": 0.00017874586198162647,
      "loss": 0.1692,
      "step": 917
    },
    {
      "epoch": 0.8715879420840257,
      "grad_norm": 0.03229302540421486,
      "learning_rate": 0.0001786777036230671,
      "loss": 0.1276,
      "step": 918
    },
    {
      "epoch": 0.8725373842867316,
      "grad_norm": 0.032113853842020035,
      "learning_rate": 0.00017860944919232503,
      "loss": 0.1256,
      "step": 919
    },
    {
      "epoch": 0.8734868264894374,
      "grad_norm": 0.03725959360599518,
      "learning_rate": 0.00017854109877274484,
      "loss": 0.1363,
      "step": 920
    },
    {
      "epoch": 0.8744362686921434,
      "grad_norm": 0.02805374562740326,
      "learning_rate": 0.00017847265244778817,
      "loss": 0.1259,
      "step": 921
    },
    {
      "epoch": 0.8753857108948493,
      "grad_norm": 0.03541216999292374,
      "learning_rate": 0.00017840411030103383,
      "loss": 0.1288,
      "step": 922
    },
    {
      "epoch": 0.8763351530975552,
      "grad_norm": 0.04267534613609314,
      "learning_rate": 0.0001783354724161776,
      "loss": 0.1601,
      "step": 923
    },
    {
      "epoch": 0.8772845953002611,
      "grad_norm": 0.04881501942873001,
      "learning_rate": 0.00017826673887703223,
      "loss": 0.1686,
      "step": 924
    },
    {
      "epoch": 0.878234037502967,
      "grad_norm": 0.0337185375392437,
      "learning_rate": 0.00017819790976752718,
      "loss": 0.131,
      "step": 925
    },
    {
      "epoch": 0.8791834797056729,
      "grad_norm": 0.033597834408283234,
      "learning_rate": 0.00017812898517170872,
      "loss": 0.1365,
      "step": 926
    },
    {
      "epoch": 0.8801329219083788,
      "grad_norm": 0.047949645668268204,
      "learning_rate": 0.00017805996517373962,
      "loss": 0.178,
      "step": 927
    },
    {
      "epoch": 0.8810823641110848,
      "grad_norm": 0.03533579409122467,
      "learning_rate": 0.00017799084985789916,
      "loss": 0.1281,
      "step": 928
    },
    {
      "epoch": 0.8820318063137906,
      "grad_norm": 0.03638564050197601,
      "learning_rate": 0.0001779216393085831,
      "loss": 0.136,
      "step": 929
    },
    {
      "epoch": 0.8829812485164965,
      "grad_norm": 0.034585777670145035,
      "learning_rate": 0.00017785233361030333,
      "loss": 0.1221,
      "step": 930
    },
    {
      "epoch": 0.8839306907192025,
      "grad_norm": 0.03344082459807396,
      "learning_rate": 0.00017778293284768807,
      "loss": 0.1335,
      "step": 931
    },
    {
      "epoch": 0.8848801329219084,
      "grad_norm": 0.029832901433110237,
      "learning_rate": 0.00017771343710548155,
      "loss": 0.131,
      "step": 932
    },
    {
      "epoch": 0.8858295751246142,
      "grad_norm": 0.030377686023712158,
      "learning_rate": 0.00017764384646854405,
      "loss": 0.1216,
      "step": 933
    },
    {
      "epoch": 0.8867790173273202,
      "grad_norm": 0.036345310509204865,
      "learning_rate": 0.0001775741610218516,
      "loss": 0.1289,
      "step": 934
    },
    {
      "epoch": 0.8877284595300261,
      "grad_norm": 0.04609441012144089,
      "learning_rate": 0.00017750438085049606,
      "loss": 0.1598,
      "step": 935
    },
    {
      "epoch": 0.888677901732732,
      "grad_norm": 0.03439109027385712,
      "learning_rate": 0.00017743450603968506,
      "loss": 0.1316,
      "step": 936
    },
    {
      "epoch": 0.889627343935438,
      "grad_norm": 0.07119124382734299,
      "learning_rate": 0.0001773645366747416,
      "loss": 0.1664,
      "step": 937
    },
    {
      "epoch": 0.8905767861381438,
      "grad_norm": 0.03385334461927414,
      "learning_rate": 0.0001772944728411043,
      "loss": 0.1294,
      "step": 938
    },
    {
      "epoch": 0.8915262283408497,
      "grad_norm": 0.033481206744909286,
      "learning_rate": 0.00017722431462432705,
      "loss": 0.1218,
      "step": 939
    },
    {
      "epoch": 0.8924756705435557,
      "grad_norm": 0.03365306556224823,
      "learning_rate": 0.00017715406211007902,
      "loss": 0.1295,
      "step": 940
    },
    {
      "epoch": 0.8934251127462616,
      "grad_norm": 0.03675035014748573,
      "learning_rate": 0.0001770837153841445,
      "loss": 0.1237,
      "step": 941
    },
    {
      "epoch": 0.8943745549489674,
      "grad_norm": 0.03245026618242264,
      "learning_rate": 0.00017701327453242284,
      "loss": 0.1304,
      "step": 942
    },
    {
      "epoch": 0.8953239971516734,
      "grad_norm": 0.03346354141831398,
      "learning_rate": 0.00017694273964092837,
      "loss": 0.1274,
      "step": 943
    },
    {
      "epoch": 0.8962734393543793,
      "grad_norm": 0.048563096672296524,
      "learning_rate": 0.00017687211079579017,
      "loss": 0.1719,
      "step": 944
    },
    {
      "epoch": 0.8972228815570852,
      "grad_norm": 0.04709222912788391,
      "learning_rate": 0.0001768013880832521,
      "loss": 0.1281,
      "step": 945
    },
    {
      "epoch": 0.8981723237597912,
      "grad_norm": 0.030402177944779396,
      "learning_rate": 0.00017673057158967254,
      "loss": 0.1229,
      "step": 946
    },
    {
      "epoch": 0.899121765962497,
      "grad_norm": 0.03577994927763939,
      "learning_rate": 0.00017665966140152458,
      "loss": 0.1255,
      "step": 947
    },
    {
      "epoch": 0.9000712081652029,
      "grad_norm": 0.04566454887390137,
      "learning_rate": 0.00017658865760539552,
      "loss": 0.1617,
      "step": 948
    },
    {
      "epoch": 0.9010206503679089,
      "grad_norm": 0.04077988117933273,
      "learning_rate": 0.00017651756028798713,
      "loss": 0.1619,
      "step": 949
    },
    {
      "epoch": 0.9019700925706148,
      "grad_norm": 0.045764826238155365,
      "learning_rate": 0.00017644636953611522,
      "loss": 0.1608,
      "step": 950
    },
    {
      "epoch": 0.9029195347733207,
      "grad_norm": 0.035656195133924484,
      "learning_rate": 0.0001763750854367098,
      "loss": 0.1288,
      "step": 951
    },
    {
      "epoch": 0.9038689769760266,
      "grad_norm": 0.04220154508948326,
      "learning_rate": 0.0001763037080768148,
      "loss": 0.1688,
      "step": 952
    },
    {
      "epoch": 0.9048184191787325,
      "grad_norm": 0.03406943380832672,
      "learning_rate": 0.0001762322375435881,
      "loss": 0.1314,
      "step": 953
    },
    {
      "epoch": 0.9057678613814384,
      "grad_norm": 0.037942539900541306,
      "learning_rate": 0.00017616067392430126,
      "loss": 0.1342,
      "step": 954
    },
    {
      "epoch": 0.9067173035841443,
      "grad_norm": 0.06412187963724136,
      "learning_rate": 0.00017608901730633964,
      "loss": 0.2207,
      "step": 955
    },
    {
      "epoch": 0.9076667457868502,
      "grad_norm": 0.0313476026058197,
      "learning_rate": 0.00017601726777720202,
      "loss": 0.1249,
      "step": 956
    },
    {
      "epoch": 0.9086161879895561,
      "grad_norm": 0.0276046060025692,
      "learning_rate": 0.00017594542542450072,
      "loss": 0.1212,
      "step": 957
    },
    {
      "epoch": 0.909565630192262,
      "grad_norm": 0.032439909875392914,
      "learning_rate": 0.00017587349033596134,
      "loss": 0.1277,
      "step": 958
    },
    {
      "epoch": 0.910515072394968,
      "grad_norm": 0.039732351899147034,
      "learning_rate": 0.00017580146259942278,
      "loss": 0.1222,
      "step": 959
    },
    {
      "epoch": 0.9114645145976739,
      "grad_norm": 0.033820103853940964,
      "learning_rate": 0.00017572934230283707,
      "loss": 0.1246,
      "step": 960
    },
    {
      "epoch": 0.9124139568003797,
      "grad_norm": 0.03361973166465759,
      "learning_rate": 0.00017565712953426918,
      "loss": 0.1328,
      "step": 961
    },
    {
      "epoch": 0.9133633990030857,
      "grad_norm": 0.0338444709777832,
      "learning_rate": 0.00017558482438189712,
      "loss": 0.1306,
      "step": 962
    },
    {
      "epoch": 0.9143128412057916,
      "grad_norm": 0.04851710423827171,
      "learning_rate": 0.0001755124269340116,
      "loss": 0.1765,
      "step": 963
    },
    {
      "epoch": 0.9152622834084975,
      "grad_norm": 0.03290700539946556,
      "learning_rate": 0.0001754399372790161,
      "loss": 0.1386,
      "step": 964
    },
    {
      "epoch": 0.9162117256112035,
      "grad_norm": 0.034565720707178116,
      "learning_rate": 0.00017536735550542661,
      "loss": 0.1212,
      "step": 965
    },
    {
      "epoch": 0.9171611678139093,
      "grad_norm": 0.04606771841645241,
      "learning_rate": 0.00017529468170187176,
      "loss": 0.1567,
      "step": 966
    },
    {
      "epoch": 0.9181106100166152,
      "grad_norm": 0.03279464691877365,
      "learning_rate": 0.00017522191595709238,
      "loss": 0.1214,
      "step": 967
    },
    {
      "epoch": 0.9190600522193212,
      "grad_norm": 0.036700885742902756,
      "learning_rate": 0.00017514905835994168,
      "loss": 0.1314,
      "step": 968
    },
    {
      "epoch": 0.9200094944220271,
      "grad_norm": 0.04098424315452576,
      "learning_rate": 0.00017507610899938501,
      "loss": 0.164,
      "step": 969
    },
    {
      "epoch": 0.9209589366247329,
      "grad_norm": 0.033782679587602615,
      "learning_rate": 0.0001750030679644997,
      "loss": 0.1376,
      "step": 970
    },
    {
      "epoch": 0.9219083788274389,
      "grad_norm": 0.03304159641265869,
      "learning_rate": 0.00017492993534447515,
      "loss": 0.1244,
      "step": 971
    },
    {
      "epoch": 0.9228578210301448,
      "grad_norm": 0.03158386051654816,
      "learning_rate": 0.0001748567112286125,
      "loss": 0.1345,
      "step": 972
    },
    {
      "epoch": 0.9238072632328507,
      "grad_norm": 0.03615015745162964,
      "learning_rate": 0.00017478339570632458,
      "loss": 0.1434,
      "step": 973
    },
    {
      "epoch": 0.9247567054355567,
      "grad_norm": 0.033553823828697205,
      "learning_rate": 0.00017470998886713596,
      "loss": 0.1292,
      "step": 974
    },
    {
      "epoch": 0.9257061476382625,
      "grad_norm": 0.03953874111175537,
      "learning_rate": 0.00017463649080068266,
      "loss": 0.1621,
      "step": 975
    },
    {
      "epoch": 0.9266555898409684,
      "grad_norm": 0.03288433700799942,
      "learning_rate": 0.00017456290159671202,
      "loss": 0.1357,
      "step": 976
    },
    {
      "epoch": 0.9276050320436744,
      "grad_norm": 0.03154657408595085,
      "learning_rate": 0.00017448922134508275,
      "loss": 0.1322,
      "step": 977
    },
    {
      "epoch": 0.9285544742463803,
      "grad_norm": 0.05669796094298363,
      "learning_rate": 0.00017441545013576477,
      "loss": 0.1761,
      "step": 978
    },
    {
      "epoch": 0.9295039164490861,
      "grad_norm": 0.026679178699851036,
      "learning_rate": 0.00017434158805883896,
      "loss": 0.1295,
      "step": 979
    },
    {
      "epoch": 0.930453358651792,
      "grad_norm": 0.03597673401236534,
      "learning_rate": 0.00017426763520449721,
      "loss": 0.1265,
      "step": 980
    },
    {
      "epoch": 0.931402800854498,
      "grad_norm": 0.03097674809396267,
      "learning_rate": 0.0001741935916630423,
      "loss": 0.1339,
      "step": 981
    },
    {
      "epoch": 0.9323522430572039,
      "grad_norm": 0.030252935364842415,
      "learning_rate": 0.00017411945752488766,
      "loss": 0.1247,
      "step": 982
    },
    {
      "epoch": 0.9333016852599098,
      "grad_norm": 0.03460918739438057,
      "learning_rate": 0.00017404523288055743,
      "loss": 0.1323,
      "step": 983
    },
    {
      "epoch": 0.9342511274626157,
      "grad_norm": 0.035575591027736664,
      "learning_rate": 0.00017397091782068622,
      "loss": 0.1258,
      "step": 984
    },
    {
      "epoch": 0.9352005696653216,
      "grad_norm": 0.05128021538257599,
      "learning_rate": 0.00017389651243601904,
      "loss": 0.1364,
      "step": 985
    },
    {
      "epoch": 0.9361500118680275,
      "grad_norm": 0.04355672374367714,
      "learning_rate": 0.00017382201681741122,
      "loss": 0.1656,
      "step": 986
    },
    {
      "epoch": 0.9370994540707335,
      "grad_norm": 0.03357682749629021,
      "learning_rate": 0.0001737474310558282,
      "loss": 0.1285,
      "step": 987
    },
    {
      "epoch": 0.9380488962734393,
      "grad_norm": 0.10623644292354584,
      "learning_rate": 0.00017367275524234565,
      "loss": 0.1726,
      "step": 988
    },
    {
      "epoch": 0.9389983384761452,
      "grad_norm": 0.03605256229639053,
      "learning_rate": 0.00017359798946814907,
      "loss": 0.1358,
      "step": 989
    },
    {
      "epoch": 0.9399477806788512,
      "grad_norm": 0.039663393050432205,
      "learning_rate": 0.00017352313382453378,
      "loss": 0.1299,
      "step": 990
    },
    {
      "epoch": 0.9408972228815571,
      "grad_norm": 0.12416961044073105,
      "learning_rate": 0.000173448188402905,
      "loss": 0.1666,
      "step": 991
    },
    {
      "epoch": 0.941846665084263,
      "grad_norm": 0.045010216534137726,
      "learning_rate": 0.00017337315329477742,
      "loss": 0.1733,
      "step": 992
    },
    {
      "epoch": 0.9427961072869689,
      "grad_norm": 0.03456486761569977,
      "learning_rate": 0.0001732980285917753,
      "loss": 0.1312,
      "step": 993
    },
    {
      "epoch": 0.9437455494896748,
      "grad_norm": 0.039561979472637177,
      "learning_rate": 0.00017322281438563234,
      "loss": 0.1354,
      "step": 994
    },
    {
      "epoch": 0.9446949916923807,
      "grad_norm": 0.043275121599435806,
      "learning_rate": 0.00017314751076819146,
      "loss": 0.1651,
      "step": 995
    },
    {
      "epoch": 0.9456444338950867,
      "grad_norm": 0.0392397940158844,
      "learning_rate": 0.00017307211783140482,
      "loss": 0.1647,
      "step": 996
    },
    {
      "epoch": 0.9465938760977926,
      "grad_norm": 0.03428703919053078,
      "learning_rate": 0.0001729966356673336,
      "loss": 0.128,
      "step": 997
    },
    {
      "epoch": 0.9475433183004984,
      "grad_norm": 0.03511650487780571,
      "learning_rate": 0.000172921064368148,
      "loss": 0.1297,
      "step": 998
    },
    {
      "epoch": 0.9484927605032044,
      "grad_norm": 0.030319994315505028,
      "learning_rate": 0.00017284540402612696,
      "loss": 0.1269,
      "step": 999
    },
    {
      "epoch": 0.9494422027059103,
      "grad_norm": 0.03071141429245472,
      "learning_rate": 0.00017276965473365827,
      "loss": 0.1224,
      "step": 1000
    },
    {
      "epoch": 0.9503916449086162,
      "grad_norm": 0.04097789525985718,
      "learning_rate": 0.00017269381658323822,
      "loss": 0.1597,
      "step": 1001
    },
    {
      "epoch": 0.9513410871113221,
      "grad_norm": 0.03407077491283417,
      "learning_rate": 0.00017261788966747168,
      "loss": 0.1268,
      "step": 1002
    },
    {
      "epoch": 0.952290529314028,
      "grad_norm": 0.035802800208330154,
      "learning_rate": 0.00017254187407907189,
      "loss": 0.1338,
      "step": 1003
    },
    {
      "epoch": 0.9532399715167339,
      "grad_norm": 0.030097633600234985,
      "learning_rate": 0.00017246576991086034,
      "loss": 0.1222,
      "step": 1004
    },
    {
      "epoch": 0.9541894137194399,
      "grad_norm": 0.047994308173656464,
      "learning_rate": 0.0001723895772557667,
      "loss": 0.1632,
      "step": 1005
    },
    {
      "epoch": 0.9551388559221458,
      "grad_norm": 0.03451845049858093,
      "learning_rate": 0.00017231329620682876,
      "loss": 0.1278,
      "step": 1006
    },
    {
      "epoch": 0.9560882981248516,
      "grad_norm": 0.036820750683546066,
      "learning_rate": 0.00017223692685719213,
      "loss": 0.1355,
      "step": 1007
    },
    {
      "epoch": 0.9570377403275575,
      "grad_norm": 0.03521284461021423,
      "learning_rate": 0.0001721604693001103,
      "loss": 0.1383,
      "step": 1008
    },
    {
      "epoch": 0.9579871825302635,
      "grad_norm": 0.036953702569007874,
      "learning_rate": 0.00017208392362894447,
      "loss": 0.1352,
      "step": 1009
    },
    {
      "epoch": 0.9589366247329694,
      "grad_norm": 0.031185979023575783,
      "learning_rate": 0.00017200728993716345,
      "loss": 0.1262,
      "step": 1010
    },
    {
      "epoch": 0.9598860669356752,
      "grad_norm": 0.030822455883026123,
      "learning_rate": 0.00017193056831834346,
      "loss": 0.1211,
      "step": 1011
    },
    {
      "epoch": 0.9608355091383812,
      "grad_norm": 0.031467005610466,
      "learning_rate": 0.0001718537588661682,
      "loss": 0.1271,
      "step": 1012
    },
    {
      "epoch": 0.9617849513410871,
      "grad_norm": 0.03788928687572479,
      "learning_rate": 0.0001717768616744285,
      "loss": 0.1413,
      "step": 1013
    },
    {
      "epoch": 0.962734393543793,
      "grad_norm": 0.03359632566571236,
      "learning_rate": 0.00017169987683702243,
      "loss": 0.1276,
      "step": 1014
    },
    {
      "epoch": 0.963683835746499,
      "grad_norm": 0.03274601325392723,
      "learning_rate": 0.000171622804447955,
      "loss": 0.1308,
      "step": 1015
    },
    {
      "epoch": 0.9646332779492048,
      "grad_norm": 0.03634633496403694,
      "learning_rate": 0.0001715456446013382,
      "loss": 0.1384,
      "step": 1016
    },
    {
      "epoch": 0.9655827201519107,
      "grad_norm": 0.02978476695716381,
      "learning_rate": 0.00017146839739139077,
      "loss": 0.1301,
      "step": 1017
    },
    {
      "epoch": 0.9665321623546167,
      "grad_norm": 0.03389682248234749,
      "learning_rate": 0.0001713910629124381,
      "loss": 0.1264,
      "step": 1018
    },
    {
      "epoch": 0.9674816045573226,
      "grad_norm": 0.03452256694436073,
      "learning_rate": 0.00017131364125891224,
      "loss": 0.1317,
      "step": 1019
    },
    {
      "epoch": 0.9684310467600284,
      "grad_norm": 0.03967840224504471,
      "learning_rate": 0.00017123613252535163,
      "loss": 0.1308,
      "step": 1020
    },
    {
      "epoch": 0.9693804889627344,
      "grad_norm": 0.04021480306982994,
      "learning_rate": 0.00017115853680640098,
      "loss": 0.1637,
      "step": 1021
    },
    {
      "epoch": 0.9703299311654403,
      "grad_norm": 0.02766057476401329,
      "learning_rate": 0.00017108085419681132,
      "loss": 0.1239,
      "step": 1022
    },
    {
      "epoch": 0.9712793733681462,
      "grad_norm": 0.029945319518446922,
      "learning_rate": 0.00017100308479143974,
      "loss": 0.1236,
      "step": 1023
    },
    {
      "epoch": 0.9722288155708522,
      "grad_norm": 0.03135136887431145,
      "learning_rate": 0.00017092522868524928,
      "loss": 0.1203,
      "step": 1024
    },
    {
      "epoch": 0.973178257773558,
      "grad_norm": 0.04876153543591499,
      "learning_rate": 0.00017084728597330893,
      "loss": 0.1802,
      "step": 1025
    },
    {
      "epoch": 0.9741276999762639,
      "grad_norm": 0.042958084493875504,
      "learning_rate": 0.00017076925675079335,
      "loss": 0.1656,
      "step": 1026
    },
    {
      "epoch": 0.9750771421789699,
      "grad_norm": 0.04739035665988922,
      "learning_rate": 0.00017069114111298287,
      "loss": 0.167,
      "step": 1027
    },
    {
      "epoch": 0.9760265843816758,
      "grad_norm": 0.042968571186065674,
      "learning_rate": 0.00017061293915526335,
      "loss": 0.173,
      "step": 1028
    },
    {
      "epoch": 0.9769760265843817,
      "grad_norm": 0.07628528028726578,
      "learning_rate": 0.00017053465097312606,
      "loss": 0.1351,
      "step": 1029
    },
    {
      "epoch": 0.9779254687870876,
      "grad_norm": 0.032479528337717056,
      "learning_rate": 0.00017045627666216755,
      "loss": 0.1294,
      "step": 1030
    },
    {
      "epoch": 0.9788749109897935,
      "grad_norm": 0.029842333868145943,
      "learning_rate": 0.0001703778163180895,
      "loss": 0.1264,
      "step": 1031
    },
    {
      "epoch": 0.9798243531924994,
      "grad_norm": 0.03622937202453613,
      "learning_rate": 0.00017029927003669868,
      "loss": 0.1287,
      "step": 1032
    },
    {
      "epoch": 0.9807737953952054,
      "grad_norm": 0.05245399475097656,
      "learning_rate": 0.00017022063791390684,
      "loss": 0.1923,
      "step": 1033
    },
    {
      "epoch": 0.9817232375979112,
      "grad_norm": 0.03335704281926155,
      "learning_rate": 0.00017014192004573047,
      "loss": 0.1241,
      "step": 1034
    },
    {
      "epoch": 0.9826726798006171,
      "grad_norm": 0.03645642474293709,
      "learning_rate": 0.0001700631165282908,
      "loss": 0.1321,
      "step": 1035
    },
    {
      "epoch": 0.983622122003323,
      "grad_norm": 0.05331774801015854,
      "learning_rate": 0.00016998422745781363,
      "loss": 0.169,
      "step": 1036
    },
    {
      "epoch": 0.984571564206029,
      "grad_norm": 0.04615236446261406,
      "learning_rate": 0.00016990525293062927,
      "loss": 0.1623,
      "step": 1037
    },
    {
      "epoch": 0.9855210064087349,
      "grad_norm": 0.047434594482183456,
      "learning_rate": 0.00016982619304317233,
      "loss": 0.1303,
      "step": 1038
    },
    {
      "epoch": 0.9864704486114407,
      "grad_norm": 0.03144746273756027,
      "learning_rate": 0.00016974704789198168,
      "loss": 0.1203,
      "step": 1039
    },
    {
      "epoch": 0.9874198908141467,
      "grad_norm": 0.04035501554608345,
      "learning_rate": 0.00016966781757370028,
      "loss": 0.1246,
      "step": 1040
    },
    {
      "epoch": 0.9883693330168526,
      "grad_norm": 0.03864790499210358,
      "learning_rate": 0.0001695885021850751,
      "loss": 0.1305,
      "step": 1041
    },
    {
      "epoch": 0.9893187752195585,
      "grad_norm": 0.03547806292772293,
      "learning_rate": 0.00016950910182295705,
      "loss": 0.1319,
      "step": 1042
    },
    {
      "epoch": 0.9902682174222645,
      "grad_norm": 0.03442002460360527,
      "learning_rate": 0.0001694296165843007,
      "loss": 0.1344,
      "step": 1043
    },
    {
      "epoch": 0.9912176596249703,
      "grad_norm": 0.0333750881254673,
      "learning_rate": 0.00016935004656616425,
      "loss": 0.1278,
      "step": 1044
    },
    {
      "epoch": 0.9921671018276762,
      "grad_norm": 0.03143637254834175,
      "learning_rate": 0.00016927039186570954,
      "loss": 0.1237,
      "step": 1045
    },
    {
      "epoch": 0.9931165440303822,
      "grad_norm": 0.03841651603579521,
      "learning_rate": 0.0001691906525802017,
      "loss": 0.1395,
      "step": 1046
    },
    {
      "epoch": 0.9940659862330881,
      "grad_norm": 0.03443494066596031,
      "learning_rate": 0.00016911082880700926,
      "loss": 0.1422,
      "step": 1047
    },
    {
      "epoch": 0.9950154284357939,
      "grad_norm": 0.027661804109811783,
      "learning_rate": 0.0001690309206436038,
      "loss": 0.1251,
      "step": 1048
    },
    {
      "epoch": 0.9959648706384999,
      "grad_norm": 0.036862559616565704,
      "learning_rate": 0.00016895092818756006,
      "loss": 0.1337,
      "step": 1049
    },
    {
      "epoch": 0.9969143128412058,
      "grad_norm": 0.035234466195106506,
      "learning_rate": 0.00016887085153655554,
      "loss": 0.1267,
      "step": 1050
    },
    {
      "epoch": 0.9978637550439117,
      "grad_norm": 0.032372791320085526,
      "learning_rate": 0.00016879069078837075,
      "loss": 0.1254,
      "step": 1051
    },
    {
      "epoch": 0.9988131972466177,
      "grad_norm": 0.037299785763025284,
      "learning_rate": 0.00016871044604088877,
      "loss": 0.1324,
      "step": 1052
    },
    {
      "epoch": 0.9997626394493235,
      "grad_norm": 0.03843718767166138,
      "learning_rate": 0.00016863011739209527,
      "loss": 0.1328,
      "step": 1053
    },
    {
      "epoch": 1.0007120816520294,
      "grad_norm": 0.03160862624645233,
      "learning_rate": 0.00016854970494007836,
      "loss": 0.1284,
      "step": 1054
    },
    {
      "epoch": 1.0016615238547353,
      "grad_norm": 0.05188068002462387,
      "learning_rate": 0.00016846920878302852,
      "loss": 0.1775,
      "step": 1055
    },
    {
      "epoch": 1.0026109660574412,
      "grad_norm": 0.04362662881612778,
      "learning_rate": 0.00016838862901923842,
      "loss": 0.1577,
      "step": 1056
    },
    {
      "epoch": 1.0035604082601473,
      "grad_norm": 0.033426132053136826,
      "learning_rate": 0.00016830796574710284,
      "loss": 0.1252,
      "step": 1057
    },
    {
      "epoch": 1.0045098504628531,
      "grad_norm": 0.06085265800356865,
      "learning_rate": 0.00016822721906511844,
      "loss": 0.1769,
      "step": 1058
    },
    {
      "epoch": 1.005459292665559,
      "grad_norm": 0.03222273662686348,
      "learning_rate": 0.00016814638907188388,
      "loss": 0.1239,
      "step": 1059
    },
    {
      "epoch": 1.0064087348682649,
      "grad_norm": 0.032014038413763046,
      "learning_rate": 0.00016806547586609947,
      "loss": 0.1191,
      "step": 1060
    },
    {
      "epoch": 1.0073581770709708,
      "grad_norm": 0.03323471546173096,
      "learning_rate": 0.00016798447954656707,
      "loss": 0.1334,
      "step": 1061
    },
    {
      "epoch": 1.0083076192736766,
      "grad_norm": 0.04325219243764877,
      "learning_rate": 0.0001679034002121901,
      "loss": 0.1623,
      "step": 1062
    },
    {
      "epoch": 1.0092570614763827,
      "grad_norm": 0.029746338725090027,
      "learning_rate": 0.0001678222379619734,
      "loss": 0.1292,
      "step": 1063
    },
    {
      "epoch": 1.0102065036790886,
      "grad_norm": 0.03265037387609482,
      "learning_rate": 0.00016774099289502297,
      "loss": 0.1271,
      "step": 1064
    },
    {
      "epoch": 1.0111559458817945,
      "grad_norm": 0.04023383557796478,
      "learning_rate": 0.0001676596651105459,
      "loss": 0.1537,
      "step": 1065
    },
    {
      "epoch": 1.0121053880845003,
      "grad_norm": 0.036106862127780914,
      "learning_rate": 0.00016757825470785042,
      "loss": 0.1237,
      "step": 1066
    },
    {
      "epoch": 1.0130548302872062,
      "grad_norm": 0.04061293974518776,
      "learning_rate": 0.00016749676178634556,
      "loss": 0.1324,
      "step": 1067
    },
    {
      "epoch": 1.014004272489912,
      "grad_norm": 0.050820399075746536,
      "learning_rate": 0.0001674151864455411,
      "loss": 0.1705,
      "step": 1068
    },
    {
      "epoch": 1.0149537146926182,
      "grad_norm": 0.037347592413425446,
      "learning_rate": 0.00016733352878504752,
      "loss": 0.1248,
      "step": 1069
    },
    {
      "epoch": 1.015903156895324,
      "grad_norm": 0.04108656942844391,
      "learning_rate": 0.00016725178890457571,
      "loss": 0.1201,
      "step": 1070
    },
    {
      "epoch": 1.01685259909803,
      "grad_norm": 0.051215577870607376,
      "learning_rate": 0.00016716996690393715,
      "loss": 0.1705,
      "step": 1071
    },
    {
      "epoch": 1.0178020413007358,
      "grad_norm": 0.05008477717638016,
      "learning_rate": 0.00016708806288304336,
      "loss": 0.1606,
      "step": 1072
    },
    {
      "epoch": 1.0187514835034417,
      "grad_norm": 0.05916628614068031,
      "learning_rate": 0.00016700607694190617,
      "loss": 0.1824,
      "step": 1073
    },
    {
      "epoch": 1.0197009257061476,
      "grad_norm": 0.03331366181373596,
      "learning_rate": 0.00016692400918063744,
      "loss": 0.1256,
      "step": 1074
    },
    {
      "epoch": 1.0206503679088534,
      "grad_norm": 0.03364944830536842,
      "learning_rate": 0.00016684185969944885,
      "loss": 0.1273,
      "step": 1075
    },
    {
      "epoch": 1.0215998101115595,
      "grad_norm": 0.02990981563925743,
      "learning_rate": 0.000166759628598652,
      "loss": 0.1284,
      "step": 1076
    },
    {
      "epoch": 1.0225492523142654,
      "grad_norm": 0.03323819115757942,
      "learning_rate": 0.00016667731597865796,
      "loss": 0.1258,
      "step": 1077
    },
    {
      "epoch": 1.0234986945169713,
      "grad_norm": 0.03008713200688362,
      "learning_rate": 0.0001665949219399775,
      "loss": 0.1244,
      "step": 1078
    },
    {
      "epoch": 1.0244481367196772,
      "grad_norm": 0.04623178020119667,
      "learning_rate": 0.00016651244658322085,
      "loss": 0.1537,
      "step": 1079
    },
    {
      "epoch": 1.025397578922383,
      "grad_norm": 0.034869614988565445,
      "learning_rate": 0.00016642989000909732,
      "loss": 0.1272,
      "step": 1080
    },
    {
      "epoch": 1.026347021125089,
      "grad_norm": 0.03236447647213936,
      "learning_rate": 0.0001663472523184156,
      "loss": 0.1299,
      "step": 1081
    },
    {
      "epoch": 1.027296463327795,
      "grad_norm": 0.02806561440229416,
      "learning_rate": 0.00016626453361208335,
      "loss": 0.1198,
      "step": 1082
    },
    {
      "epoch": 1.0282459055305009,
      "grad_norm": 0.04762514680624008,
      "learning_rate": 0.0001661817339911071,
      "loss": 0.1695,
      "step": 1083
    },
    {
      "epoch": 1.0291953477332068,
      "grad_norm": 0.039319079369306564,
      "learning_rate": 0.00016609885355659234,
      "loss": 0.1612,
      "step": 1084
    },
    {
      "epoch": 1.0301447899359126,
      "grad_norm": 0.030540715903043747,
      "learning_rate": 0.0001660158924097431,
      "loss": 0.1251,
      "step": 1085
    },
    {
      "epoch": 1.0310942321386185,
      "grad_norm": 0.029828663915395737,
      "learning_rate": 0.000165932850651862,
      "loss": 0.1287,
      "step": 1086
    },
    {
      "epoch": 1.0320436743413244,
      "grad_norm": 0.030012918636202812,
      "learning_rate": 0.0001658497283843501,
      "loss": 0.132,
      "step": 1087
    },
    {
      "epoch": 1.0329931165440305,
      "grad_norm": 0.03255194425582886,
      "learning_rate": 0.0001657665257087068,
      "loss": 0.1257,
      "step": 1088
    },
    {
      "epoch": 1.0339425587467364,
      "grad_norm": 0.040951523929834366,
      "learning_rate": 0.00016568324272652965,
      "loss": 0.1507,
      "step": 1089
    },
    {
      "epoch": 1.0348920009494422,
      "grad_norm": 0.027678990736603737,
      "learning_rate": 0.00016559987953951427,
      "loss": 0.1217,
      "step": 1090
    },
    {
      "epoch": 1.035841443152148,
      "grad_norm": 0.03241724148392677,
      "learning_rate": 0.0001655164362494542,
      "loss": 0.1298,
      "step": 1091
    },
    {
      "epoch": 1.036790885354854,
      "grad_norm": 0.038156237453222275,
      "learning_rate": 0.00016543291295824085,
      "loss": 0.1252,
      "step": 1092
    },
    {
      "epoch": 1.0377403275575598,
      "grad_norm": 0.029806343838572502,
      "learning_rate": 0.00016534930976786323,
      "loss": 0.1265,
      "step": 1093
    },
    {
      "epoch": 1.038689769760266,
      "grad_norm": 0.035036977380514145,
      "learning_rate": 0.00016526562678040804,
      "loss": 0.1247,
      "step": 1094
    },
    {
      "epoch": 1.0396392119629718,
      "grad_norm": 0.032223109155893326,
      "learning_rate": 0.00016518186409805922,
      "loss": 0.1326,
      "step": 1095
    },
    {
      "epoch": 1.0405886541656777,
      "grad_norm": 0.03192323073744774,
      "learning_rate": 0.0001650980218230982,
      "loss": 0.1186,
      "step": 1096
    },
    {
      "epoch": 1.0415380963683836,
      "grad_norm": 0.031004801392555237,
      "learning_rate": 0.00016501410005790362,
      "loss": 0.1292,
      "step": 1097
    },
    {
      "epoch": 1.0424875385710894,
      "grad_norm": 0.03421878442168236,
      "learning_rate": 0.00016493009890495102,
      "loss": 0.1362,
      "step": 1098
    },
    {
      "epoch": 1.0434369807737953,
      "grad_norm": 0.03153158724308014,
      "learning_rate": 0.00016484601846681297,
      "loss": 0.1182,
      "step": 1099
    },
    {
      "epoch": 1.0443864229765012,
      "grad_norm": 0.03977439925074577,
      "learning_rate": 0.0001647618588461589,
      "loss": 0.1327,
      "step": 1100
    },
    {
      "epoch": 1.0453358651792073,
      "grad_norm": 0.03982316702604294,
      "learning_rate": 0.00016467762014575485,
      "loss": 0.1582,
      "step": 1101
    },
    {
      "epoch": 1.0462853073819132,
      "grad_norm": 0.034796085208654404,
      "learning_rate": 0.00016459330246846348,
      "loss": 0.1258,
      "step": 1102
    },
    {
      "epoch": 1.047234749584619,
      "grad_norm": 0.039261046797037125,
      "learning_rate": 0.0001645089059172438,
      "loss": 0.1321,
      "step": 1103
    },
    {
      "epoch": 1.048184191787325,
      "grad_norm": 0.04305882379412651,
      "learning_rate": 0.00016442443059515126,
      "loss": 0.1406,
      "step": 1104
    },
    {
      "epoch": 1.0491336339900308,
      "grad_norm": 0.03491320461034775,
      "learning_rate": 0.00016433987660533742,
      "loss": 0.1312,
      "step": 1105
    },
    {
      "epoch": 1.0500830761927367,
      "grad_norm": 0.04404641315340996,
      "learning_rate": 0.00016425524405104986,
      "loss": 0.1267,
      "step": 1106
    },
    {
      "epoch": 1.0510325183954428,
      "grad_norm": 0.034407854080200195,
      "learning_rate": 0.0001641705330356322,
      "loss": 0.1268,
      "step": 1107
    },
    {
      "epoch": 1.0519819605981486,
      "grad_norm": 0.04843935742974281,
      "learning_rate": 0.00016408574366252374,
      "loss": 0.1601,
      "step": 1108
    },
    {
      "epoch": 1.0529314028008545,
      "grad_norm": 0.03394000977277756,
      "learning_rate": 0.0001640008760352596,
      "loss": 0.13,
      "step": 1109
    },
    {
      "epoch": 1.0538808450035604,
      "grad_norm": 0.027672087773680687,
      "learning_rate": 0.00016391593025747038,
      "loss": 0.1202,
      "step": 1110
    },
    {
      "epoch": 1.0548302872062663,
      "grad_norm": 0.03761329874396324,
      "learning_rate": 0.0001638309064328821,
      "loss": 0.1562,
      "step": 1111
    },
    {
      "epoch": 1.0557797294089721,
      "grad_norm": 0.048850156366825104,
      "learning_rate": 0.0001637458046653161,
      "loss": 0.1707,
      "step": 1112
    },
    {
      "epoch": 1.0567291716116782,
      "grad_norm": 0.027066387236118317,
      "learning_rate": 0.00016366062505868888,
      "loss": 0.1204,
      "step": 1113
    },
    {
      "epoch": 1.057678613814384,
      "grad_norm": 0.034062668681144714,
      "learning_rate": 0.00016357536771701198,
      "loss": 0.1378,
      "step": 1114
    },
    {
      "epoch": 1.05862805601709,
      "grad_norm": 0.0422850139439106,
      "learning_rate": 0.00016349003274439194,
      "loss": 0.1583,
      "step": 1115
    },
    {
      "epoch": 1.0595774982197959,
      "grad_norm": 0.0334283784031868,
      "learning_rate": 0.00016340462024503,
      "loss": 0.1276,
      "step": 1116
    },
    {
      "epoch": 1.0605269404225017,
      "grad_norm": 0.03338415175676346,
      "learning_rate": 0.00016331913032322212,
      "loss": 0.1229,
      "step": 1117
    },
    {
      "epoch": 1.0614763826252076,
      "grad_norm": 0.03128555044531822,
      "learning_rate": 0.00016323356308335876,
      "loss": 0.1167,
      "step": 1118
    },
    {
      "epoch": 1.0624258248279137,
      "grad_norm": 0.033790841698646545,
      "learning_rate": 0.00016314791862992486,
      "loss": 0.1236,
      "step": 1119
    },
    {
      "epoch": 1.0633752670306196,
      "grad_norm": 0.03544427454471588,
      "learning_rate": 0.00016306219706749953,
      "loss": 0.1319,
      "step": 1120
    },
    {
      "epoch": 1.0643247092333254,
      "grad_norm": 0.03969413787126541,
      "learning_rate": 0.0001629763985007561,
      "loss": 0.1612,
      "step": 1121
    },
    {
      "epoch": 1.0652741514360313,
      "grad_norm": 0.042924992740154266,
      "learning_rate": 0.00016289052303446202,
      "loss": 0.1659,
      "step": 1122
    },
    {
      "epoch": 1.0662235936387372,
      "grad_norm": 0.04624541476368904,
      "learning_rate": 0.00016280457077347848,
      "loss": 0.1617,
      "step": 1123
    },
    {
      "epoch": 1.067173035841443,
      "grad_norm": 0.034341566264629364,
      "learning_rate": 0.00016271854182276058,
      "loss": 0.1188,
      "step": 1124
    },
    {
      "epoch": 1.068122478044149,
      "grad_norm": 0.03228682279586792,
      "learning_rate": 0.00016263243628735695,
      "loss": 0.129,
      "step": 1125
    },
    {
      "epoch": 1.069071920246855,
      "grad_norm": 0.036037541925907135,
      "learning_rate": 0.00016254625427240978,
      "loss": 0.1309,
      "step": 1126
    },
    {
      "epoch": 1.070021362449561,
      "grad_norm": 0.027421532198786736,
      "learning_rate": 0.0001624599958831547,
      "loss": 0.1176,
      "step": 1127
    },
    {
      "epoch": 1.0709708046522668,
      "grad_norm": 0.030262261629104614,
      "learning_rate": 0.00016237366122492052,
      "loss": 0.1195,
      "step": 1128
    },
    {
      "epoch": 1.0719202468549727,
      "grad_norm": 0.041230421513319016,
      "learning_rate": 0.00016228725040312925,
      "loss": 0.1562,
      "step": 1129
    },
    {
      "epoch": 1.0728696890576785,
      "grad_norm": 0.03141395375132561,
      "learning_rate": 0.00016220076352329582,
      "loss": 0.13,
      "step": 1130
    },
    {
      "epoch": 1.0738191312603846,
      "grad_norm": 0.0343187153339386,
      "learning_rate": 0.00016211420069102815,
      "loss": 0.134,
      "step": 1131
    },
    {
      "epoch": 1.0747685734630905,
      "grad_norm": 0.04862483590841293,
      "learning_rate": 0.0001620275620120268,
      "loss": 0.1574,
      "step": 1132
    },
    {
      "epoch": 1.0757180156657964,
      "grad_norm": 0.04204042628407478,
      "learning_rate": 0.00016194084759208494,
      "loss": 0.162,
      "step": 1133
    },
    {
      "epoch": 1.0766674578685023,
      "grad_norm": 0.03309663385152817,
      "learning_rate": 0.00016185405753708833,
      "loss": 0.1251,
      "step": 1134
    },
    {
      "epoch": 1.0776169000712081,
      "grad_norm": 0.03191671893000603,
      "learning_rate": 0.00016176719195301503,
      "loss": 0.125,
      "step": 1135
    },
    {
      "epoch": 1.078566342273914,
      "grad_norm": 0.036822427064180374,
      "learning_rate": 0.0001616802509459353,
      "loss": 0.1484,
      "step": 1136
    },
    {
      "epoch": 1.0795157844766199,
      "grad_norm": 0.029125772416591644,
      "learning_rate": 0.00016159323462201149,
      "loss": 0.1192,
      "step": 1137
    },
    {
      "epoch": 1.080465226679326,
      "grad_norm": 0.034059688448905945,
      "learning_rate": 0.000161506143087498,
      "loss": 0.1309,
      "step": 1138
    },
    {
      "epoch": 1.0814146688820319,
      "grad_norm": 0.03434915095567703,
      "learning_rate": 0.00016141897644874096,
      "loss": 0.1336,
      "step": 1139
    },
    {
      "epoch": 1.0823641110847377,
      "grad_norm": 0.0348944216966629,
      "learning_rate": 0.00016133173481217833,
      "loss": 0.1317,
      "step": 1140
    },
    {
      "epoch": 1.0833135532874436,
      "grad_norm": 0.033639729022979736,
      "learning_rate": 0.00016124441828433957,
      "loss": 0.1187,
      "step": 1141
    },
    {
      "epoch": 1.0842629954901495,
      "grad_norm": 0.03063533827662468,
      "learning_rate": 0.00016115702697184556,
      "loss": 0.1332,
      "step": 1142
    },
    {
      "epoch": 1.0852124376928554,
      "grad_norm": 0.03273540362715721,
      "learning_rate": 0.00016106956098140858,
      "loss": 0.1284,
      "step": 1143
    },
    {
      "epoch": 1.0861618798955615,
      "grad_norm": 0.05293993651866913,
      "learning_rate": 0.00016098202041983206,
      "loss": 0.1687,
      "step": 1144
    },
    {
      "epoch": 1.0871113220982673,
      "grad_norm": 0.03251373767852783,
      "learning_rate": 0.00016089440539401046,
      "loss": 0.1252,
      "step": 1145
    },
    {
      "epoch": 1.0880607643009732,
      "grad_norm": 0.0367170013487339,
      "learning_rate": 0.00016080671601092922,
      "loss": 0.1419,
      "step": 1146
    },
    {
      "epoch": 1.089010206503679,
      "grad_norm": 0.030752060934901237,
      "learning_rate": 0.00016071895237766457,
      "loss": 0.1257,
      "step": 1147
    },
    {
      "epoch": 1.089959648706385,
      "grad_norm": 0.035168685019016266,
      "learning_rate": 0.00016063111460138334,
      "loss": 0.1385,
      "step": 1148
    },
    {
      "epoch": 1.0909090909090908,
      "grad_norm": 0.03252134099602699,
      "learning_rate": 0.00016054320278934296,
      "loss": 0.1232,
      "step": 1149
    },
    {
      "epoch": 1.0918585331117967,
      "grad_norm": 0.028666459023952484,
      "learning_rate": 0.00016045521704889128,
      "loss": 0.1242,
      "step": 1150
    },
    {
      "epoch": 1.0928079753145028,
      "grad_norm": 0.047707218676805496,
      "learning_rate": 0.00016036715748746634,
      "loss": 0.1643,
      "step": 1151
    },
    {
      "epoch": 1.0937574175172087,
      "grad_norm": 0.035980336368083954,
      "learning_rate": 0.00016027902421259638,
      "loss": 0.1329,
      "step": 1152
    },
    {
      "epoch": 1.0947068597199145,
      "grad_norm": 0.04506576433777809,
      "learning_rate": 0.00016019081733189967,
      "loss": 0.1631,
      "step": 1153
    },
    {
      "epoch": 1.0956563019226204,
      "grad_norm": 0.030268298462033272,
      "learning_rate": 0.0001601025369530843,
      "loss": 0.1319,
      "step": 1154
    },
    {
      "epoch": 1.0966057441253263,
      "grad_norm": 0.056095585227012634,
      "learning_rate": 0.00016001418318394817,
      "loss": 0.1529,
      "step": 1155
    },
    {
      "epoch": 1.0975551863280324,
      "grad_norm": 0.029666630551218987,
      "learning_rate": 0.0001599257561323787,
      "loss": 0.126,
      "step": 1156
    },
    {
      "epoch": 1.0985046285307383,
      "grad_norm": 0.03648681938648224,
      "learning_rate": 0.00015983725590635293,
      "loss": 0.1378,
      "step": 1157
    },
    {
      "epoch": 1.0994540707334441,
      "grad_norm": 0.03170529007911682,
      "learning_rate": 0.00015974868261393714,
      "loss": 0.1238,
      "step": 1158
    },
    {
      "epoch": 1.10040351293615,
      "grad_norm": 0.032316990196704865,
      "learning_rate": 0.0001596600363632869,
      "loss": 0.1305,
      "step": 1159
    },
    {
      "epoch": 1.101352955138856,
      "grad_norm": 0.03148328512907028,
      "learning_rate": 0.00015957131726264677,
      "loss": 0.1303,
      "step": 1160
    },
    {
      "epoch": 1.1023023973415618,
      "grad_norm": 0.03739064186811447,
      "learning_rate": 0.00015948252542035042,
      "loss": 0.16,
      "step": 1161
    },
    {
      "epoch": 1.1032518395442676,
      "grad_norm": 0.034856993705034256,
      "learning_rate": 0.00015939366094482025,
      "loss": 0.1273,
      "step": 1162
    },
    {
      "epoch": 1.1042012817469737,
      "grad_norm": 0.03102080523967743,
      "learning_rate": 0.0001593047239445673,
      "loss": 0.1331,
      "step": 1163
    },
    {
      "epoch": 1.1051507239496796,
      "grad_norm": 0.026448125019669533,
      "learning_rate": 0.00015921571452819127,
      "loss": 0.1241,
      "step": 1164
    },
    {
      "epoch": 1.1061001661523855,
      "grad_norm": 0.034301795065402985,
      "learning_rate": 0.0001591266328043802,
      "loss": 0.1283,
      "step": 1165
    },
    {
      "epoch": 1.1070496083550914,
      "grad_norm": 0.03346949443221092,
      "learning_rate": 0.00015903747888191053,
      "loss": 0.1355,
      "step": 1166
    },
    {
      "epoch": 1.1079990505577972,
      "grad_norm": 0.0324571467936039,
      "learning_rate": 0.00015894825286964675,
      "loss": 0.1354,
      "step": 1167
    },
    {
      "epoch": 1.1089484927605031,
      "grad_norm": 0.05366596579551697,
      "learning_rate": 0.00015885895487654147,
      "loss": 0.2099,
      "step": 1168
    },
    {
      "epoch": 1.1098979349632092,
      "grad_norm": 0.04298697039484978,
      "learning_rate": 0.00015876958501163512,
      "loss": 0.1709,
      "step": 1169
    },
    {
      "epoch": 1.110847377165915,
      "grad_norm": 0.02922794409096241,
      "learning_rate": 0.00015868014338405592,
      "loss": 0.1201,
      "step": 1170
    },
    {
      "epoch": 1.111796819368621,
      "grad_norm": 0.02963380515575409,
      "learning_rate": 0.00015859063010301974,
      "loss": 0.119,
      "step": 1171
    },
    {
      "epoch": 1.1127462615713268,
      "grad_norm": 0.05272309482097626,
      "learning_rate": 0.0001585010452778299,
      "loss": 0.1785,
      "step": 1172
    },
    {
      "epoch": 1.1136957037740327,
      "grad_norm": 0.031892240047454834,
      "learning_rate": 0.00015841138901787714,
      "loss": 0.1292,
      "step": 1173
    },
    {
      "epoch": 1.1146451459767386,
      "grad_norm": 0.02971399575471878,
      "learning_rate": 0.0001583216614326394,
      "loss": 0.124,
      "step": 1174
    },
    {
      "epoch": 1.1155945881794447,
      "grad_norm": 0.03037869744002819,
      "learning_rate": 0.00015823186263168169,
      "loss": 0.1303,
      "step": 1175
    },
    {
      "epoch": 1.1165440303821506,
      "grad_norm": 0.02748207375407219,
      "learning_rate": 0.000158141992724656,
      "loss": 0.1243,
      "step": 1176
    },
    {
      "epoch": 1.1174934725848564,
      "grad_norm": 0.033940836787223816,
      "learning_rate": 0.00015805205182130113,
      "loss": 0.1254,
      "step": 1177
    },
    {
      "epoch": 1.1184429147875623,
      "grad_norm": 0.03295721858739853,
      "learning_rate": 0.00015796204003144264,
      "loss": 0.1235,
      "step": 1178
    },
    {
      "epoch": 1.1193923569902682,
      "grad_norm": 0.031388405710458755,
      "learning_rate": 0.00015787195746499254,
      "loss": 0.1221,
      "step": 1179
    },
    {
      "epoch": 1.120341799192974,
      "grad_norm": 0.033072203397750854,
      "learning_rate": 0.00015778180423194936,
      "loss": 0.1284,
      "step": 1180
    },
    {
      "epoch": 1.1212912413956801,
      "grad_norm": 0.03310628607869148,
      "learning_rate": 0.00015769158044239787,
      "loss": 0.136,
      "step": 1181
    },
    {
      "epoch": 1.122240683598386,
      "grad_norm": 0.030244866386055946,
      "learning_rate": 0.000157601286206509,
      "loss": 0.1255,
      "step": 1182
    },
    {
      "epoch": 1.123190125801092,
      "grad_norm": 0.04400714859366417,
      "learning_rate": 0.0001575109216345397,
      "loss": 0.1706,
      "step": 1183
    },
    {
      "epoch": 1.1241395680037978,
      "grad_norm": 0.03094104304909706,
      "learning_rate": 0.00015742048683683288,
      "loss": 0.1261,
      "step": 1184
    },
    {
      "epoch": 1.1250890102065036,
      "grad_norm": 0.03327153995633125,
      "learning_rate": 0.00015732998192381707,
      "loss": 0.1334,
      "step": 1185
    },
    {
      "epoch": 1.1260384524092095,
      "grad_norm": 0.03229563683271408,
      "learning_rate": 0.0001572394070060065,
      "loss": 0.1168,
      "step": 1186
    },
    {
      "epoch": 1.1269878946119154,
      "grad_norm": 0.03267960995435715,
      "learning_rate": 0.0001571487621940009,
      "loss": 0.1331,
      "step": 1187
    },
    {
      "epoch": 1.1279373368146215,
      "grad_norm": 0.02902175299823284,
      "learning_rate": 0.00015705804759848523,
      "loss": 0.1302,
      "step": 1188
    },
    {
      "epoch": 1.1288867790173274,
      "grad_norm": 0.030303161591291428,
      "learning_rate": 0.00015696726333022984,
      "loss": 0.1267,
      "step": 1189
    },
    {
      "epoch": 1.1298362212200332,
      "grad_norm": 0.031242702156305313,
      "learning_rate": 0.00015687640950009,
      "loss": 0.125,
      "step": 1190
    },
    {
      "epoch": 1.1307856634227391,
      "grad_norm": 0.02697862684726715,
      "learning_rate": 0.00015678548621900597,
      "loss": 0.1207,
      "step": 1191
    },
    {
      "epoch": 1.131735105625445,
      "grad_norm": 0.03510352969169617,
      "learning_rate": 0.0001566944935980029,
      "loss": 0.132,
      "step": 1192
    },
    {
      "epoch": 1.1326845478281509,
      "grad_norm": 0.03274201229214668,
      "learning_rate": 0.00015660343174819045,
      "loss": 0.1262,
      "step": 1193
    },
    {
      "epoch": 1.133633990030857,
      "grad_norm": 0.03378736600279808,
      "learning_rate": 0.00015651230078076296,
      "loss": 0.1318,
      "step": 1194
    },
    {
      "epoch": 1.1345834322335628,
      "grad_norm": 0.032314665615558624,
      "learning_rate": 0.00015642110080699907,
      "loss": 0.117,
      "step": 1195
    },
    {
      "epoch": 1.1355328744362687,
      "grad_norm": 0.04177004471421242,
      "learning_rate": 0.00015632983193826174,
      "loss": 0.1527,
      "step": 1196
    },
    {
      "epoch": 1.1364823166389746,
      "grad_norm": 0.05647768825292587,
      "learning_rate": 0.00015623849428599804,
      "loss": 0.2019,
      "step": 1197
    },
    {
      "epoch": 1.1374317588416805,
      "grad_norm": 0.03124004229903221,
      "learning_rate": 0.00015614708796173906,
      "loss": 0.1228,
      "step": 1198
    },
    {
      "epoch": 1.1383812010443863,
      "grad_norm": 0.029155779629945755,
      "learning_rate": 0.00015605561307709964,
      "loss": 0.126,
      "step": 1199
    },
    {
      "epoch": 1.1393306432470922,
      "grad_norm": 0.030367044731974602,
      "learning_rate": 0.0001559640697437785,
      "loss": 0.1296,
      "step": 1200
    },
    {
      "epoch": 1.1402800854497983,
      "grad_norm": 0.06225927174091339,
      "learning_rate": 0.00015587245807355778,
      "loss": 0.2258,
      "step": 1201
    },
    {
      "epoch": 1.1412295276525042,
      "grad_norm": 0.03878935053944588,
      "learning_rate": 0.00015578077817830313,
      "loss": 0.1322,
      "step": 1202
    },
    {
      "epoch": 1.14217896985521,
      "grad_norm": 0.030811108648777008,
      "learning_rate": 0.0001556890301699636,
      "loss": 0.127,
      "step": 1203
    },
    {
      "epoch": 1.143128412057916,
      "grad_norm": 0.0561293326318264,
      "learning_rate": 0.00015559721416057127,
      "loss": 0.1689,
      "step": 1204
    },
    {
      "epoch": 1.1440778542606218,
      "grad_norm": 0.05256973206996918,
      "learning_rate": 0.0001555053302622413,
      "loss": 0.1735,
      "step": 1205
    },
    {
      "epoch": 1.145027296463328,
      "grad_norm": 0.037241023033857346,
      "learning_rate": 0.0001554133785871718,
      "loss": 0.1304,
      "step": 1206
    },
    {
      "epoch": 1.1459767386660338,
      "grad_norm": 0.04502008110284805,
      "learning_rate": 0.00015532135924764358,
      "loss": 0.1594,
      "step": 1207
    },
    {
      "epoch": 1.1469261808687397,
      "grad_norm": 0.053607337176799774,
      "learning_rate": 0.00015522927235602014,
      "loss": 0.1683,
      "step": 1208
    },
    {
      "epoch": 1.1478756230714455,
      "grad_norm": 0.029219908639788628,
      "learning_rate": 0.00015513711802474735,
      "loss": 0.1267,
      "step": 1209
    },
    {
      "epoch": 1.1488250652741514,
      "grad_norm": 0.03088328242301941,
      "learning_rate": 0.0001550448963663536,
      "loss": 0.1318,
      "step": 1210
    },
    {
      "epoch": 1.1497745074768573,
      "grad_norm": 0.03560802713036537,
      "learning_rate": 0.00015495260749344932,
      "loss": 0.1433,
      "step": 1211
    },
    {
      "epoch": 1.1507239496795632,
      "grad_norm": 0.033212240785360336,
      "learning_rate": 0.00015486025151872706,
      "loss": 0.1222,
      "step": 1212
    },
    {
      "epoch": 1.1516733918822692,
      "grad_norm": 0.032883357256650925,
      "learning_rate": 0.00015476782855496145,
      "loss": 0.1317,
      "step": 1213
    },
    {
      "epoch": 1.1526228340849751,
      "grad_norm": 0.028118513524532318,
      "learning_rate": 0.0001546753387150087,
      "loss": 0.1306,
      "step": 1214
    },
    {
      "epoch": 1.153572276287681,
      "grad_norm": 0.040216926485300064,
      "learning_rate": 0.00015458278211180688,
      "loss": 0.1482,
      "step": 1215
    },
    {
      "epoch": 1.1545217184903869,
      "grad_norm": 0.029117384925484657,
      "learning_rate": 0.00015449015885837542,
      "loss": 0.1287,
      "step": 1216
    },
    {
      "epoch": 1.1554711606930927,
      "grad_norm": 0.08133453875780106,
      "learning_rate": 0.00015439746906781524,
      "loss": 0.2109,
      "step": 1217
    },
    {
      "epoch": 1.1564206028957986,
      "grad_norm": 0.03554106503725052,
      "learning_rate": 0.00015430471285330846,
      "loss": 0.1297,
      "step": 1218
    },
    {
      "epoch": 1.1573700450985047,
      "grad_norm": 0.037721745669841766,
      "learning_rate": 0.00015421189032811835,
      "loss": 0.1364,
      "step": 1219
    },
    {
      "epoch": 1.1583194873012106,
      "grad_norm": 0.025993864983320236,
      "learning_rate": 0.00015411900160558912,
      "loss": 0.1198,
      "step": 1220
    },
    {
      "epoch": 1.1592689295039165,
      "grad_norm": 0.03330320492386818,
      "learning_rate": 0.00015402604679914575,
      "loss": 0.1253,
      "step": 1221
    },
    {
      "epoch": 1.1602183717066223,
      "grad_norm": 0.03463476151227951,
      "learning_rate": 0.00015393302602229408,
      "loss": 0.1235,
      "step": 1222
    },
    {
      "epoch": 1.1611678139093282,
      "grad_norm": 0.03458210453391075,
      "learning_rate": 0.00015383993938862037,
      "loss": 0.1238,
      "step": 1223
    },
    {
      "epoch": 1.162117256112034,
      "grad_norm": 0.03344335779547691,
      "learning_rate": 0.00015374678701179134,
      "loss": 0.1267,
      "step": 1224
    },
    {
      "epoch": 1.16306669831474,
      "grad_norm": 0.027585169300436974,
      "learning_rate": 0.00015365356900555395,
      "loss": 0.1192,
      "step": 1225
    },
    {
      "epoch": 1.164016140517446,
      "grad_norm": 0.03154926374554634,
      "learning_rate": 0.00015356028548373538,
      "loss": 0.1288,
      "step": 1226
    },
    {
      "epoch": 1.164965582720152,
      "grad_norm": 0.030677665024995804,
      "learning_rate": 0.00015346693656024271,
      "loss": 0.1292,
      "step": 1227
    },
    {
      "epoch": 1.1659150249228578,
      "grad_norm": 0.030145341530442238,
      "learning_rate": 0.00015337352234906298,
      "loss": 0.1331,
      "step": 1228
    },
    {
      "epoch": 1.1668644671255637,
      "grad_norm": 0.03683342784643173,
      "learning_rate": 0.00015328004296426287,
      "loss": 0.125,
      "step": 1229
    },
    {
      "epoch": 1.1678139093282696,
      "grad_norm": 0.029177436605095863,
      "learning_rate": 0.0001531864985199887,
      "loss": 0.1313,
      "step": 1230
    },
    {
      "epoch": 1.1687633515309757,
      "grad_norm": 0.0415952131152153,
      "learning_rate": 0.0001530928891304662,
      "loss": 0.1642,
      "step": 1231
    },
    {
      "epoch": 1.1697127937336815,
      "grad_norm": 0.027380244806408882,
      "learning_rate": 0.00015299921491000043,
      "loss": 0.1254,
      "step": 1232
    },
    {
      "epoch": 1.1706622359363874,
      "grad_norm": 0.03130066767334938,
      "learning_rate": 0.00015290547597297555,
      "loss": 0.1291,
      "step": 1233
    },
    {
      "epoch": 1.1716116781390933,
      "grad_norm": 0.04524728283286095,
      "learning_rate": 0.00015281167243385484,
      "loss": 0.1627,
      "step": 1234
    },
    {
      "epoch": 1.1725611203417992,
      "grad_norm": 0.04698526859283447,
      "learning_rate": 0.0001527178044071804,
      "loss": 0.1892,
      "step": 1235
    },
    {
      "epoch": 1.173510562544505,
      "grad_norm": 0.041941914707422256,
      "learning_rate": 0.00015262387200757314,
      "loss": 0.1603,
      "step": 1236
    },
    {
      "epoch": 1.174460004747211,
      "grad_norm": 0.03519544377923012,
      "learning_rate": 0.0001525298753497324,
      "loss": 0.1306,
      "step": 1237
    },
    {
      "epoch": 1.175409446949917,
      "grad_norm": 0.036025673151016235,
      "learning_rate": 0.00015243581454843624,
      "loss": 0.1315,
      "step": 1238
    },
    {
      "epoch": 1.1763588891526229,
      "grad_norm": 0.05133717134594917,
      "learning_rate": 0.0001523416897185409,
      "loss": 0.1661,
      "step": 1239
    },
    {
      "epoch": 1.1773083313553288,
      "grad_norm": 0.03367958217859268,
      "learning_rate": 0.00015224750097498073,
      "loss": 0.1311,
      "step": 1240
    },
    {
      "epoch": 1.1782577735580346,
      "grad_norm": 0.035372741520404816,
      "learning_rate": 0.0001521532484327683,
      "loss": 0.1324,
      "step": 1241
    },
    {
      "epoch": 1.1792072157607405,
      "grad_norm": 0.048746585845947266,
      "learning_rate": 0.000152058932206994,
      "loss": 0.1756,
      "step": 1242
    },
    {
      "epoch": 1.1801566579634466,
      "grad_norm": 0.03578799590468407,
      "learning_rate": 0.00015196455241282592,
      "loss": 0.1344,
      "step": 1243
    },
    {
      "epoch": 1.1811061001661525,
      "grad_norm": 0.030654437839984894,
      "learning_rate": 0.00015187010916550988,
      "loss": 0.1268,
      "step": 1244
    },
    {
      "epoch": 1.1820555423688583,
      "grad_norm": 0.02881826087832451,
      "learning_rate": 0.0001517756025803691,
      "loss": 0.1149,
      "step": 1245
    },
    {
      "epoch": 1.1830049845715642,
      "grad_norm": 0.031242484226822853,
      "learning_rate": 0.00015168103277280422,
      "loss": 0.1338,
      "step": 1246
    },
    {
      "epoch": 1.18395442677427,
      "grad_norm": 0.028528152033686638,
      "learning_rate": 0.000151586399858293,
      "loss": 0.1203,
      "step": 1247
    },
    {
      "epoch": 1.184903868976976,
      "grad_norm": 0.028410421684384346,
      "learning_rate": 0.00015149170395239035,
      "loss": 0.1296,
      "step": 1248
    },
    {
      "epoch": 1.1858533111796818,
      "grad_norm": 0.029383866116404533,
      "learning_rate": 0.00015139694517072796,
      "loss": 0.1284,
      "step": 1249
    },
    {
      "epoch": 1.186802753382388,
      "grad_norm": 0.031084850430488586,
      "learning_rate": 0.00015130212362901447,
      "loss": 0.1272,
      "step": 1250
    },
    {
      "epoch": 1.1877521955850938,
      "grad_norm": 0.035449109971523285,
      "learning_rate": 0.00015120723944303497,
      "loss": 0.1293,
      "step": 1251
    },
    {
      "epoch": 1.1887016377877997,
      "grad_norm": 0.030890950933098793,
      "learning_rate": 0.0001511122927286512,
      "loss": 0.1221,
      "step": 1252
    },
    {
      "epoch": 1.1896510799905056,
      "grad_norm": 0.06967780739068985,
      "learning_rate": 0.0001510172836018012,
      "loss": 0.1277,
      "step": 1253
    },
    {
      "epoch": 1.1906005221932114,
      "grad_norm": 0.03075585328042507,
      "learning_rate": 0.00015092221217849917,
      "loss": 0.1278,
      "step": 1254
    },
    {
      "epoch": 1.1915499643959173,
      "grad_norm": 0.040119290351867676,
      "learning_rate": 0.00015082707857483544,
      "loss": 0.1546,
      "step": 1255
    },
    {
      "epoch": 1.1924994065986234,
      "grad_norm": 0.03376394882798195,
      "learning_rate": 0.0001507318829069763,
      "loss": 0.1325,
      "step": 1256
    },
    {
      "epoch": 1.1934488488013293,
      "grad_norm": 0.034935321658849716,
      "learning_rate": 0.00015063662529116368,
      "loss": 0.1361,
      "step": 1257
    },
    {
      "epoch": 1.1943982910040352,
      "grad_norm": 0.043972812592983246,
      "learning_rate": 0.00015054130584371528,
      "loss": 0.1292,
      "step": 1258
    },
    {
      "epoch": 1.195347733206741,
      "grad_norm": 0.06039128825068474,
      "learning_rate": 0.0001504459246810243,
      "loss": 0.1958,
      "step": 1259
    },
    {
      "epoch": 1.196297175409447,
      "grad_norm": 0.030998772010207176,
      "learning_rate": 0.00015035048191955927,
      "loss": 0.1166,
      "step": 1260
    },
    {
      "epoch": 1.1972466176121528,
      "grad_norm": 0.0286384467035532,
      "learning_rate": 0.00015025497767586393,
      "loss": 0.1225,
      "step": 1261
    },
    {
      "epoch": 1.1981960598148587,
      "grad_norm": 0.03200898319482803,
      "learning_rate": 0.0001501594120665571,
      "loss": 0.1244,
      "step": 1262
    },
    {
      "epoch": 1.1991455020175648,
      "grad_norm": 0.032870370894670486,
      "learning_rate": 0.00015006378520833252,
      "loss": 0.126,
      "step": 1263
    },
    {
      "epoch": 1.2000949442202706,
      "grad_norm": 0.034849826246500015,
      "learning_rate": 0.00014996809721795872,
      "loss": 0.1263,
      "step": 1264
    },
    {
      "epoch": 1.2010443864229765,
      "grad_norm": 0.045324552804231644,
      "learning_rate": 0.00014987234821227898,
      "loss": 0.1668,
      "step": 1265
    },
    {
      "epoch": 1.2019938286256824,
      "grad_norm": 0.036612797528505325,
      "learning_rate": 0.0001497765383082109,
      "loss": 0.1595,
      "step": 1266
    },
    {
      "epoch": 1.2029432708283883,
      "grad_norm": 0.03746375814080238,
      "learning_rate": 0.00014968066762274657,
      "loss": 0.1644,
      "step": 1267
    },
    {
      "epoch": 1.2038927130310944,
      "grad_norm": 0.03137432038784027,
      "learning_rate": 0.0001495847362729523,
      "loss": 0.1239,
      "step": 1268
    },
    {
      "epoch": 1.2048421552338002,
      "grad_norm": 0.0314825214445591,
      "learning_rate": 0.0001494887443759684,
      "loss": 0.1258,
      "step": 1269
    },
    {
      "epoch": 1.205791597436506,
      "grad_norm": 0.032157186418771744,
      "learning_rate": 0.00014939269204900917,
      "loss": 0.1233,
      "step": 1270
    },
    {
      "epoch": 1.206741039639212,
      "grad_norm": 0.0410330593585968,
      "learning_rate": 0.0001492965794093627,
      "loss": 0.153,
      "step": 1271
    },
    {
      "epoch": 1.2076904818419179,
      "grad_norm": 0.0325077585875988,
      "learning_rate": 0.0001492004065743907,
      "loss": 0.1241,
      "step": 1272
    },
    {
      "epoch": 1.2086399240446237,
      "grad_norm": 0.033166393637657166,
      "learning_rate": 0.00014910417366152844,
      "loss": 0.1292,
      "step": 1273
    },
    {
      "epoch": 1.2095893662473296,
      "grad_norm": 0.02926860749721527,
      "learning_rate": 0.0001490078807882845,
      "loss": 0.1242,
      "step": 1274
    },
    {
      "epoch": 1.2105388084500357,
      "grad_norm": 0.04637501388788223,
      "learning_rate": 0.00014891152807224066,
      "loss": 0.1404,
      "step": 1275
    },
    {
      "epoch": 1.2114882506527416,
      "grad_norm": 0.035617321729660034,
      "learning_rate": 0.0001488151156310518,
      "loss": 0.1292,
      "step": 1276
    },
    {
      "epoch": 1.2124376928554474,
      "grad_norm": 0.036330446600914,
      "learning_rate": 0.00014871864358244574,
      "loss": 0.1326,
      "step": 1277
    },
    {
      "epoch": 1.2133871350581533,
      "grad_norm": 0.034302353858947754,
      "learning_rate": 0.00014862211204422305,
      "loss": 0.1296,
      "step": 1278
    },
    {
      "epoch": 1.2143365772608592,
      "grad_norm": 0.027070587500929832,
      "learning_rate": 0.00014852552113425702,
      "loss": 0.1227,
      "step": 1279
    },
    {
      "epoch": 1.215286019463565,
      "grad_norm": 0.029872050508856773,
      "learning_rate": 0.00014842887097049333,
      "loss": 0.1333,
      "step": 1280
    },
    {
      "epoch": 1.2162354616662712,
      "grad_norm": 0.0336853489279747,
      "learning_rate": 0.0001483321616709501,
      "loss": 0.1264,
      "step": 1281
    },
    {
      "epoch": 1.217184903868977,
      "grad_norm": 0.03892628103494644,
      "learning_rate": 0.00014823539335371763,
      "loss": 0.1516,
      "step": 1282
    },
    {
      "epoch": 1.218134346071683,
      "grad_norm": 0.03041486255824566,
      "learning_rate": 0.00014813856613695825,
      "loss": 0.1303,
      "step": 1283
    },
    {
      "epoch": 1.2190837882743888,
      "grad_norm": 0.042988792061805725,
      "learning_rate": 0.00014804168013890628,
      "loss": 0.1697,
      "step": 1284
    },
    {
      "epoch": 1.2200332304770947,
      "grad_norm": 0.03176325559616089,
      "learning_rate": 0.00014794473547786777,
      "loss": 0.1309,
      "step": 1285
    },
    {
      "epoch": 1.2209826726798005,
      "grad_norm": 0.032539233565330505,
      "learning_rate": 0.00014784773227222042,
      "loss": 0.1336,
      "step": 1286
    },
    {
      "epoch": 1.2219321148825064,
      "grad_norm": 0.03153330832719803,
      "learning_rate": 0.00014775067064041341,
      "loss": 0.1244,
      "step": 1287
    },
    {
      "epoch": 1.2228815570852125,
      "grad_norm": 0.03229093924164772,
      "learning_rate": 0.00014765355070096728,
      "loss": 0.1331,
      "step": 1288
    },
    {
      "epoch": 1.2238309992879184,
      "grad_norm": 0.03116600587964058,
      "learning_rate": 0.0001475563725724737,
      "loss": 0.1263,
      "step": 1289
    },
    {
      "epoch": 1.2247804414906243,
      "grad_norm": 0.027543194591999054,
      "learning_rate": 0.0001474591363735955,
      "loss": 0.1319,
      "step": 1290
    },
    {
      "epoch": 1.2257298836933301,
      "grad_norm": 0.031299810856580734,
      "learning_rate": 0.00014736184222306637,
      "loss": 0.1235,
      "step": 1291
    },
    {
      "epoch": 1.226679325896036,
      "grad_norm": 0.030574094504117966,
      "learning_rate": 0.00014726449023969073,
      "loss": 0.1337,
      "step": 1292
    },
    {
      "epoch": 1.227628768098742,
      "grad_norm": 0.04210914671421051,
      "learning_rate": 0.0001471670805423437,
      "loss": 0.1439,
      "step": 1293
    },
    {
      "epoch": 1.228578210301448,
      "grad_norm": 0.03234979510307312,
      "learning_rate": 0.00014706961324997077,
      "loss": 0.1339,
      "step": 1294
    },
    {
      "epoch": 1.2295276525041539,
      "grad_norm": 0.028707873076200485,
      "learning_rate": 0.00014697208848158782,
      "loss": 0.1271,
      "step": 1295
    },
    {
      "epoch": 1.2304770947068597,
      "grad_norm": 0.03162172809243202,
      "learning_rate": 0.0001468745063562809,
      "loss": 0.1291,
      "step": 1296
    },
    {
      "epoch": 1.2314265369095656,
      "grad_norm": 0.032575272023677826,
      "learning_rate": 0.00014677686699320614,
      "loss": 0.1345,
      "step": 1297
    },
    {
      "epoch": 1.2323759791122715,
      "grad_norm": 0.02751932106912136,
      "learning_rate": 0.0001466791705115895,
      "loss": 0.12,
      "step": 1298
    },
    {
      "epoch": 1.2333254213149774,
      "grad_norm": 0.026365652680397034,
      "learning_rate": 0.00014658141703072675,
      "loss": 0.1147,
      "step": 1299
    },
    {
      "epoch": 1.2342748635176835,
      "grad_norm": 0.03227808326482773,
      "learning_rate": 0.00014648360666998314,
      "loss": 0.1332,
      "step": 1300
    },
    {
      "epoch": 1.2352243057203893,
      "grad_norm": 0.057433344423770905,
      "learning_rate": 0.00014638573954879356,
      "loss": 0.2349,
      "step": 1301
    },
    {
      "epoch": 1.2361737479230952,
      "grad_norm": 0.0376310870051384,
      "learning_rate": 0.000146287815786662,
      "loss": 0.1588,
      "step": 1302
    },
    {
      "epoch": 1.237123190125801,
      "grad_norm": 0.04079489782452583,
      "learning_rate": 0.00014618983550316182,
      "loss": 0.1625,
      "step": 1303
    },
    {
      "epoch": 1.238072632328507,
      "grad_norm": 0.03367112949490547,
      "learning_rate": 0.00014609179881793524,
      "loss": 0.1266,
      "step": 1304
    },
    {
      "epoch": 1.2390220745312128,
      "grad_norm": 0.030257422477006912,
      "learning_rate": 0.0001459937058506934,
      "loss": 0.132,
      "step": 1305
    },
    {
      "epoch": 1.239971516733919,
      "grad_norm": 0.03830450400710106,
      "learning_rate": 0.00014589555672121622,
      "loss": 0.1349,
      "step": 1306
    },
    {
      "epoch": 1.2409209589366248,
      "grad_norm": 0.030318403616547585,
      "learning_rate": 0.00014579735154935213,
      "loss": 0.1346,
      "step": 1307
    },
    {
      "epoch": 1.2418704011393307,
      "grad_norm": 0.033780913800001144,
      "learning_rate": 0.000145699090455018,
      "loss": 0.1373,
      "step": 1308
    },
    {
      "epoch": 1.2428198433420365,
      "grad_norm": 0.03642027825117111,
      "learning_rate": 0.00014560077355819904,
      "loss": 0.1279,
      "step": 1309
    },
    {
      "epoch": 1.2437692855447424,
      "grad_norm": 0.027665693312883377,
      "learning_rate": 0.00014550240097894852,
      "loss": 0.1177,
      "step": 1310
    },
    {
      "epoch": 1.2447187277474483,
      "grad_norm": 0.030852187424898148,
      "learning_rate": 0.00014540397283738777,
      "loss": 0.1373,
      "step": 1311
    },
    {
      "epoch": 1.2456681699501542,
      "grad_norm": 0.03137564659118652,
      "learning_rate": 0.00014530548925370594,
      "loss": 0.129,
      "step": 1312
    },
    {
      "epoch": 1.2466176121528603,
      "grad_norm": 0.032402969896793365,
      "learning_rate": 0.0001452069503481599,
      "loss": 0.1394,
      "step": 1313
    },
    {
      "epoch": 1.2475670543555661,
      "grad_norm": 0.049039825797080994,
      "learning_rate": 0.00014510835624107396,
      "loss": 0.1593,
      "step": 1314
    },
    {
      "epoch": 1.248516496558272,
      "grad_norm": 0.0339655838906765,
      "learning_rate": 0.00014500970705284006,
      "loss": 0.1367,
      "step": 1315
    },
    {
      "epoch": 1.249465938760978,
      "grad_norm": 0.040319304913282394,
      "learning_rate": 0.00014491100290391716,
      "loss": 0.1679,
      "step": 1316
    },
    {
      "epoch": 1.2504153809636838,
      "grad_norm": 0.024075858294963837,
      "learning_rate": 0.00014481224391483152,
      "loss": 0.1273,
      "step": 1317
    },
    {
      "epoch": 1.2513648231663899,
      "grad_norm": 0.02570619434118271,
      "learning_rate": 0.00014471343020617625,
      "loss": 0.1277,
      "step": 1318
    },
    {
      "epoch": 1.2523142653690957,
      "grad_norm": 0.028086170554161072,
      "learning_rate": 0.00014461456189861132,
      "loss": 0.1246,
      "step": 1319
    },
    {
      "epoch": 1.2532637075718016,
      "grad_norm": 0.02902062050998211,
      "learning_rate": 0.0001445156391128633,
      "loss": 0.1305,
      "step": 1320
    },
    {
      "epoch": 1.2542131497745075,
      "grad_norm": 0.030352482572197914,
      "learning_rate": 0.00014441666196972542,
      "loss": 0.1329,
      "step": 1321
    },
    {
      "epoch": 1.2551625919772134,
      "grad_norm": 0.029998816549777985,
      "learning_rate": 0.00014431763059005718,
      "loss": 0.131,
      "step": 1322
    },
    {
      "epoch": 1.2561120341799192,
      "grad_norm": 0.028111204504966736,
      "learning_rate": 0.00014421854509478435,
      "loss": 0.124,
      "step": 1323
    },
    {
      "epoch": 1.257061476382625,
      "grad_norm": 0.02926759235560894,
      "learning_rate": 0.00014411940560489877,
      "loss": 0.1215,
      "step": 1324
    },
    {
      "epoch": 1.258010918585331,
      "grad_norm": 0.03321680426597595,
      "learning_rate": 0.00014402021224145815,
      "loss": 0.1216,
      "step": 1325
    },
    {
      "epoch": 1.258960360788037,
      "grad_norm": 0.03386010602116585,
      "learning_rate": 0.00014392096512558613,
      "loss": 0.1335,
      "step": 1326
    },
    {
      "epoch": 1.259909802990743,
      "grad_norm": 0.03989921137690544,
      "learning_rate": 0.0001438216643784718,
      "loss": 0.1481,
      "step": 1327
    },
    {
      "epoch": 1.2608592451934488,
      "grad_norm": 0.030915161594748497,
      "learning_rate": 0.00014372231012136995,
      "loss": 0.1254,
      "step": 1328
    },
    {
      "epoch": 1.2618086873961547,
      "grad_norm": 0.04395739734172821,
      "learning_rate": 0.00014362290247560053,
      "loss": 0.1537,
      "step": 1329
    },
    {
      "epoch": 1.2627581295988608,
      "grad_norm": 0.02942941151559353,
      "learning_rate": 0.00014352344156254873,
      "loss": 0.1248,
      "step": 1330
    },
    {
      "epoch": 1.2637075718015667,
      "grad_norm": 0.02858722023665905,
      "learning_rate": 0.00014342392750366485,
      "loss": 0.1236,
      "step": 1331
    },
    {
      "epoch": 1.2646570140042726,
      "grad_norm": 0.029218707233667374,
      "learning_rate": 0.000143324360420464,
      "loss": 0.1227,
      "step": 1332
    },
    {
      "epoch": 1.2656064562069784,
      "grad_norm": 0.03079938143491745,
      "learning_rate": 0.0001432247404345261,
      "loss": 0.1251,
      "step": 1333
    },
    {
      "epoch": 1.2665558984096843,
      "grad_norm": 0.03626713901758194,
      "learning_rate": 0.00014312506766749563,
      "loss": 0.1407,
      "step": 1334
    },
    {
      "epoch": 1.2675053406123902,
      "grad_norm": 0.026556458324193954,
      "learning_rate": 0.00014302534224108152,
      "loss": 0.1235,
      "step": 1335
    },
    {
      "epoch": 1.268454782815096,
      "grad_norm": 0.033421531319618225,
      "learning_rate": 0.00014292556427705706,
      "loss": 0.1324,
      "step": 1336
    },
    {
      "epoch": 1.269404225017802,
      "grad_norm": 0.0425841398537159,
      "learning_rate": 0.00014282573389725966,
      "loss": 0.1674,
      "step": 1337
    },
    {
      "epoch": 1.270353667220508,
      "grad_norm": 0.03258546441793442,
      "learning_rate": 0.00014272585122359068,
      "loss": 0.131,
      "step": 1338
    },
    {
      "epoch": 1.271303109423214,
      "grad_norm": 0.03566194325685501,
      "learning_rate": 0.00014262591637801536,
      "loss": 0.128,
      "step": 1339
    },
    {
      "epoch": 1.2722525516259198,
      "grad_norm": 0.03155380114912987,
      "learning_rate": 0.0001425259294825627,
      "loss": 0.1277,
      "step": 1340
    },
    {
      "epoch": 1.2732019938286256,
      "grad_norm": 0.04435742273926735,
      "learning_rate": 0.00014242589065932524,
      "loss": 0.1594,
      "step": 1341
    },
    {
      "epoch": 1.2741514360313315,
      "grad_norm": 0.03839895501732826,
      "learning_rate": 0.0001423258000304589,
      "loss": 0.1598,
      "step": 1342
    },
    {
      "epoch": 1.2751008782340376,
      "grad_norm": 0.037625472992658615,
      "learning_rate": 0.00014222565771818282,
      "loss": 0.1276,
      "step": 1343
    },
    {
      "epoch": 1.2760503204367435,
      "grad_norm": 0.029852135106921196,
      "learning_rate": 0.00014212546384477934,
      "loss": 0.1272,
      "step": 1344
    },
    {
      "epoch": 1.2769997626394494,
      "grad_norm": 0.04018719121813774,
      "learning_rate": 0.00014202521853259368,
      "loss": 0.153,
      "step": 1345
    },
    {
      "epoch": 1.2779492048421552,
      "grad_norm": 0.028893720358610153,
      "learning_rate": 0.00014192492190403402,
      "loss": 0.1245,
      "step": 1346
    },
    {
      "epoch": 1.2788986470448611,
      "grad_norm": 0.029052307829260826,
      "learning_rate": 0.000141824574081571,
      "loss": 0.1272,
      "step": 1347
    },
    {
      "epoch": 1.279848089247567,
      "grad_norm": 0.03023959882557392,
      "learning_rate": 0.00014172417518773788,
      "loss": 0.1259,
      "step": 1348
    },
    {
      "epoch": 1.2807975314502729,
      "grad_norm": 0.06486006826162338,
      "learning_rate": 0.00014162372534513027,
      "loss": 0.2279,
      "step": 1349
    },
    {
      "epoch": 1.2817469736529787,
      "grad_norm": 0.034129489213228226,
      "learning_rate": 0.00014152322467640599,
      "loss": 0.138,
      "step": 1350
    },
    {
      "epoch": 1.2826964158556848,
      "grad_norm": 0.034559451043605804,
      "learning_rate": 0.0001414226733042849,
      "loss": 0.1321,
      "step": 1351
    },
    {
      "epoch": 1.2836458580583907,
      "grad_norm": 0.03269064798951149,
      "learning_rate": 0.0001413220713515489,
      "loss": 0.1297,
      "step": 1352
    },
    {
      "epoch": 1.2845953002610966,
      "grad_norm": 0.030022764578461647,
      "learning_rate": 0.0001412214189410414,
      "loss": 0.1278,
      "step": 1353
    },
    {
      "epoch": 1.2855447424638025,
      "grad_norm": 0.05017710104584694,
      "learning_rate": 0.00014112071619566766,
      "loss": 0.1572,
      "step": 1354
    },
    {
      "epoch": 1.2864941846665086,
      "grad_norm": 0.035493746399879456,
      "learning_rate": 0.00014101996323839433,
      "loss": 0.1277,
      "step": 1355
    },
    {
      "epoch": 1.2874436268692144,
      "grad_norm": 0.03152285888791084,
      "learning_rate": 0.0001409191601922493,
      "loss": 0.1321,
      "step": 1356
    },
    {
      "epoch": 1.2883930690719203,
      "grad_norm": 0.029683001339435577,
      "learning_rate": 0.00014081830718032175,
      "loss": 0.1224,
      "step": 1357
    },
    {
      "epoch": 1.2893425112746262,
      "grad_norm": 0.03202647715806961,
      "learning_rate": 0.0001407174043257617,
      "loss": 0.13,
      "step": 1358
    },
    {
      "epoch": 1.290291953477332,
      "grad_norm": 0.026956327259540558,
      "learning_rate": 0.00014061645175178025,
      "loss": 0.1225,
      "step": 1359
    },
    {
      "epoch": 1.291241395680038,
      "grad_norm": 0.03426060825586319,
      "learning_rate": 0.00014051544958164903,
      "loss": 0.1345,
      "step": 1360
    },
    {
      "epoch": 1.2921908378827438,
      "grad_norm": 0.031120451167225838,
      "learning_rate": 0.00014041439793870036,
      "loss": 0.1246,
      "step": 1361
    },
    {
      "epoch": 1.2931402800854497,
      "grad_norm": 0.02656574547290802,
      "learning_rate": 0.00014031329694632683,
      "loss": 0.1297,
      "step": 1362
    },
    {
      "epoch": 1.2940897222881558,
      "grad_norm": 0.02752675488591194,
      "learning_rate": 0.00014021214672798143,
      "loss": 0.1294,
      "step": 1363
    },
    {
      "epoch": 1.2950391644908616,
      "grad_norm": 0.02884535677731037,
      "learning_rate": 0.00014011094740717714,
      "loss": 0.126,
      "step": 1364
    },
    {
      "epoch": 1.2959886066935675,
      "grad_norm": 0.03029620461165905,
      "learning_rate": 0.00014000969910748704,
      "loss": 0.1338,
      "step": 1365
    },
    {
      "epoch": 1.2969380488962734,
      "grad_norm": 0.04302069917321205,
      "learning_rate": 0.00013990840195254384,
      "loss": 0.1653,
      "step": 1366
    },
    {
      "epoch": 1.2978874910989793,
      "grad_norm": 0.048259928822517395,
      "learning_rate": 0.00013980705606604011,
      "loss": 0.1269,
      "step": 1367
    },
    {
      "epoch": 1.2988369333016854,
      "grad_norm": 0.029876641929149628,
      "learning_rate": 0.00013970566157172774,
      "loss": 0.1282,
      "step": 1368
    },
    {
      "epoch": 1.2997863755043912,
      "grad_norm": 0.35130763053894043,
      "learning_rate": 0.00013960421859341804,
      "loss": 0.1434,
      "step": 1369
    },
    {
      "epoch": 1.3007358177070971,
      "grad_norm": 0.23888662457466125,
      "learning_rate": 0.00013950272725498156,
      "loss": 0.186,
      "step": 1370
    },
    {
      "epoch": 1.301685259909803,
      "grad_norm": 0.24867364764213562,
      "learning_rate": 0.00013940118768034792,
      "loss": 0.1585,
      "step": 1371
    },
    {
      "epoch": 1.3026347021125089,
      "grad_norm": 0.10958488285541534,
      "learning_rate": 0.0001392995999935055,
      "loss": 0.1448,
      "step": 1372
    },
    {
      "epoch": 1.3035841443152147,
      "grad_norm": 0.05493846908211708,
      "learning_rate": 0.0001391979643185016,
      "loss": 0.1292,
      "step": 1373
    },
    {
      "epoch": 1.3045335865179206,
      "grad_norm": 0.0429663360118866,
      "learning_rate": 0.000139096280779442,
      "loss": 0.1243,
      "step": 1374
    },
    {
      "epoch": 1.3054830287206267,
      "grad_norm": 0.02995472215116024,
      "learning_rate": 0.000138994549500491,
      "loss": 0.1216,
      "step": 1375
    },
    {
      "epoch": 1.3064324709233326,
      "grad_norm": 0.04113904386758804,
      "learning_rate": 0.00013889277060587119,
      "loss": 0.1586,
      "step": 1376
    },
    {
      "epoch": 1.3073819131260385,
      "grad_norm": 0.030523164197802544,
      "learning_rate": 0.0001387909442198632,
      "loss": 0.1212,
      "step": 1377
    },
    {
      "epoch": 1.3083313553287443,
      "grad_norm": 0.04093822091817856,
      "learning_rate": 0.00013868907046680576,
      "loss": 0.1254,
      "step": 1378
    },
    {
      "epoch": 1.3092807975314502,
      "grad_norm": 0.04895343258976936,
      "learning_rate": 0.0001385871494710954,
      "loss": 0.1636,
      "step": 1379
    },
    {
      "epoch": 1.3102302397341563,
      "grad_norm": 0.09062381833791733,
      "learning_rate": 0.0001384851813571864,
      "loss": 0.167,
      "step": 1380
    },
    {
      "epoch": 1.3111796819368622,
      "grad_norm": 0.034514930099248886,
      "learning_rate": 0.00013838316624959044,
      "loss": 0.1186,
      "step": 1381
    },
    {
      "epoch": 1.312129124139568,
      "grad_norm": 0.052746132016181946,
      "learning_rate": 0.0001382811042728767,
      "loss": 0.1289,
      "step": 1382
    },
    {
      "epoch": 1.313078566342274,
      "grad_norm": 0.06090299040079117,
      "learning_rate": 0.00013817899555167154,
      "loss": 0.1599,
      "step": 1383
    },
    {
      "epoch": 1.3140280085449798,
      "grad_norm": 0.036167677491903305,
      "learning_rate": 0.00013807684021065842,
      "loss": 0.1294,
      "step": 1384
    },
    {
      "epoch": 1.3149774507476857,
      "grad_norm": 0.05916972458362579,
      "learning_rate": 0.00013797463837457775,
      "loss": 0.1263,
      "step": 1385
    },
    {
      "epoch": 1.3159268929503916,
      "grad_norm": 0.03488500416278839,
      "learning_rate": 0.00013787239016822662,
      "loss": 0.1347,
      "step": 1386
    },
    {
      "epoch": 1.3168763351530974,
      "grad_norm": 0.038088779896497726,
      "learning_rate": 0.00013777009571645885,
      "loss": 0.1302,
      "step": 1387
    },
    {
      "epoch": 1.3178257773558035,
      "grad_norm": 0.05069038271903992,
      "learning_rate": 0.00013766775514418469,
      "loss": 0.1553,
      "step": 1388
    },
    {
      "epoch": 1.3187752195585094,
      "grad_norm": 0.06475794315338135,
      "learning_rate": 0.00013756536857637065,
      "loss": 0.2018,
      "step": 1389
    },
    {
      "epoch": 1.3197246617612153,
      "grad_norm": 0.03393110632896423,
      "learning_rate": 0.00013746293613803952,
      "loss": 0.1238,
      "step": 1390
    },
    {
      "epoch": 1.3206741039639212,
      "grad_norm": 0.04623769596219063,
      "learning_rate": 0.00013736045795427002,
      "loss": 0.1603,
      "step": 1391
    },
    {
      "epoch": 1.321623546166627,
      "grad_norm": 0.03696979209780693,
      "learning_rate": 0.0001372579341501967,
      "loss": 0.1291,
      "step": 1392
    },
    {
      "epoch": 1.3225729883693331,
      "grad_norm": 0.041281502693891525,
      "learning_rate": 0.00013715536485100994,
      "loss": 0.1344,
      "step": 1393
    },
    {
      "epoch": 1.323522430572039,
      "grad_norm": 0.03636416420340538,
      "learning_rate": 0.00013705275018195557,
      "loss": 0.1334,
      "step": 1394
    },
    {
      "epoch": 1.3244718727747449,
      "grad_norm": 0.037941355258226395,
      "learning_rate": 0.0001369500902683348,
      "loss": 0.1256,
      "step": 1395
    },
    {
      "epoch": 1.3254213149774507,
      "grad_norm": 0.04853476956486702,
      "learning_rate": 0.0001368473852355042,
      "loss": 0.1604,
      "step": 1396
    },
    {
      "epoch": 1.3263707571801566,
      "grad_norm": 0.034059878438711166,
      "learning_rate": 0.00013674463520887533,
      "loss": 0.1308,
      "step": 1397
    },
    {
      "epoch": 1.3273201993828625,
      "grad_norm": 0.03482759743928909,
      "learning_rate": 0.00013664184031391473,
      "loss": 0.128,
      "step": 1398
    },
    {
      "epoch": 1.3282696415855684,
      "grad_norm": 0.032961416989564896,
      "learning_rate": 0.00013653900067614377,
      "loss": 0.1276,
      "step": 1399
    },
    {
      "epoch": 1.3292190837882745,
      "grad_norm": 0.03538922220468521,
      "learning_rate": 0.00013643611642113842,
      "loss": 0.1215,
      "step": 1400
    },
    {
      "epoch": 1.3301685259909803,
      "grad_norm": 0.032544538378715515,
      "learning_rate": 0.00013633318767452903,
      "loss": 0.1235,
      "step": 1401
    },
    {
      "epoch": 1.3311179681936862,
      "grad_norm": 0.04236935079097748,
      "learning_rate": 0.00013623021456200048,
      "loss": 0.1663,
      "step": 1402
    },
    {
      "epoch": 1.332067410396392,
      "grad_norm": 0.04283679649233818,
      "learning_rate": 0.00013612719720929164,
      "loss": 0.1622,
      "step": 1403
    },
    {
      "epoch": 1.333016852599098,
      "grad_norm": 0.03691123425960541,
      "learning_rate": 0.00013602413574219553,
      "loss": 0.1247,
      "step": 1404
    },
    {
      "epoch": 1.333966294801804,
      "grad_norm": 0.028608130291104317,
      "learning_rate": 0.000135921030286559,
      "loss": 0.1153,
      "step": 1405
    },
    {
      "epoch": 1.33491573700451,
      "grad_norm": 0.03310587257146835,
      "learning_rate": 0.00013581788096828253,
      "loss": 0.1305,
      "step": 1406
    },
    {
      "epoch": 1.3358651792072158,
      "grad_norm": 0.03368659317493439,
      "learning_rate": 0.00013571468791332024,
      "loss": 0.128,
      "step": 1407
    },
    {
      "epoch": 1.3368146214099217,
      "grad_norm": 0.04785076901316643,
      "learning_rate": 0.00013561145124767968,
      "loss": 0.1715,
      "step": 1408
    },
    {
      "epoch": 1.3377640636126276,
      "grad_norm": 0.03625485301017761,
      "learning_rate": 0.0001355081710974217,
      "loss": 0.1305,
      "step": 1409
    },
    {
      "epoch": 1.3387135058153334,
      "grad_norm": 0.03318242356181145,
      "learning_rate": 0.00013540484758866,
      "loss": 0.1244,
      "step": 1410
    },
    {
      "epoch": 1.3396629480180393,
      "grad_norm": 0.03148429095745087,
      "learning_rate": 0.0001353014808475615,
      "loss": 0.1311,
      "step": 1411
    },
    {
      "epoch": 1.3406123902207452,
      "grad_norm": 0.03518190607428551,
      "learning_rate": 0.00013519807100034577,
      "loss": 0.1276,
      "step": 1412
    },
    {
      "epoch": 1.3415618324234513,
      "grad_norm": 0.031286850571632385,
      "learning_rate": 0.00013509461817328507,
      "loss": 0.1252,
      "step": 1413
    },
    {
      "epoch": 1.3425112746261572,
      "grad_norm": 0.04668812453746796,
      "learning_rate": 0.00013499112249270407,
      "loss": 0.1639,
      "step": 1414
    },
    {
      "epoch": 1.343460716828863,
      "grad_norm": 0.03220203518867493,
      "learning_rate": 0.00013488758408497988,
      "loss": 0.1254,
      "step": 1415
    },
    {
      "epoch": 1.344410159031569,
      "grad_norm": 0.03599967062473297,
      "learning_rate": 0.0001347840030765417,
      "loss": 0.1307,
      "step": 1416
    },
    {
      "epoch": 1.3453596012342748,
      "grad_norm": 0.03225992992520332,
      "learning_rate": 0.00013468037959387075,
      "loss": 0.12,
      "step": 1417
    },
    {
      "epoch": 1.3463090434369809,
      "grad_norm": 0.0338221937417984,
      "learning_rate": 0.00013457671376350012,
      "loss": 0.1199,
      "step": 1418
    },
    {
      "epoch": 1.3472584856396868,
      "grad_norm": 0.046623844653367996,
      "learning_rate": 0.00013447300571201468,
      "loss": 0.1695,
      "step": 1419
    },
    {
      "epoch": 1.3482079278423926,
      "grad_norm": 0.029645482078194618,
      "learning_rate": 0.00013436925556605078,
      "loss": 0.127,
      "step": 1420
    },
    {
      "epoch": 1.3491573700450985,
      "grad_norm": 0.03167693316936493,
      "learning_rate": 0.00013426546345229618,
      "loss": 0.1268,
      "step": 1421
    },
    {
      "epoch": 1.3501068122478044,
      "grad_norm": 0.04903126880526543,
      "learning_rate": 0.0001341616294974899,
      "loss": 0.1616,
      "step": 1422
    },
    {
      "epoch": 1.3510562544505103,
      "grad_norm": 0.03323996067047119,
      "learning_rate": 0.00013405775382842206,
      "loss": 0.1345,
      "step": 1423
    },
    {
      "epoch": 1.3520056966532161,
      "grad_norm": 0.03047449141740799,
      "learning_rate": 0.0001339538365719337,
      "loss": 0.1286,
      "step": 1424
    },
    {
      "epoch": 1.3529551388559222,
      "grad_norm": 0.03170877322554588,
      "learning_rate": 0.00013384987785491665,
      "loss": 0.1264,
      "step": 1425
    },
    {
      "epoch": 1.353904581058628,
      "grad_norm": 0.03209366276860237,
      "learning_rate": 0.00013374587780431337,
      "loss": 0.1256,
      "step": 1426
    },
    {
      "epoch": 1.354854023261334,
      "grad_norm": 0.04508209601044655,
      "learning_rate": 0.00013364183654711678,
      "loss": 0.1675,
      "step": 1427
    },
    {
      "epoch": 1.3558034654640398,
      "grad_norm": 0.0423690564930439,
      "learning_rate": 0.00013353775421037008,
      "loss": 0.1578,
      "step": 1428
    },
    {
      "epoch": 1.3567529076667457,
      "grad_norm": 0.03641896694898605,
      "learning_rate": 0.0001334336309211668,
      "loss": 0.1321,
      "step": 1429
    },
    {
      "epoch": 1.3577023498694518,
      "grad_norm": 0.03118027374148369,
      "learning_rate": 0.00013332946680665023,
      "loss": 0.1284,
      "step": 1430
    },
    {
      "epoch": 1.3586517920721577,
      "grad_norm": 0.029232513159513474,
      "learning_rate": 0.00013322526199401367,
      "loss": 0.1251,
      "step": 1431
    },
    {
      "epoch": 1.3596012342748636,
      "grad_norm": 0.028796685859560966,
      "learning_rate": 0.00013312101661050007,
      "loss": 0.1266,
      "step": 1432
    },
    {
      "epoch": 1.3605506764775694,
      "grad_norm": 0.04318710416555405,
      "learning_rate": 0.00013301673078340196,
      "loss": 0.162,
      "step": 1433
    },
    {
      "epoch": 1.3615001186802753,
      "grad_norm": 0.0319884791970253,
      "learning_rate": 0.00013291240464006118,
      "loss": 0.1248,
      "step": 1434
    },
    {
      "epoch": 1.3624495608829812,
      "grad_norm": 0.034977950155735016,
      "learning_rate": 0.0001328080383078689,
      "loss": 0.1325,
      "step": 1435
    },
    {
      "epoch": 1.363399003085687,
      "grad_norm": 0.026079673320055008,
      "learning_rate": 0.00013270363191426524,
      "loss": 0.1266,
      "step": 1436
    },
    {
      "epoch": 1.364348445288393,
      "grad_norm": 0.03158127889037132,
      "learning_rate": 0.0001325991855867394,
      "loss": 0.1326,
      "step": 1437
    },
    {
      "epoch": 1.365297887491099,
      "grad_norm": 0.030608203262090683,
      "learning_rate": 0.00013249469945282916,
      "loss": 0.1358,
      "step": 1438
    },
    {
      "epoch": 1.366247329693805,
      "grad_norm": 0.028404321521520615,
      "learning_rate": 0.00013239017364012105,
      "loss": 0.1273,
      "step": 1439
    },
    {
      "epoch": 1.3671967718965108,
      "grad_norm": 0.03177287429571152,
      "learning_rate": 0.00013228560827624995,
      "loss": 0.1241,
      "step": 1440
    },
    {
      "epoch": 1.3681462140992167,
      "grad_norm": 0.030803440138697624,
      "learning_rate": 0.00013218100348889912,
      "loss": 0.1271,
      "step": 1441
    },
    {
      "epoch": 1.3690956563019228,
      "grad_norm": 0.038757532835006714,
      "learning_rate": 0.0001320763594057999,
      "loss": 0.1302,
      "step": 1442
    },
    {
      "epoch": 1.3700450985046286,
      "grad_norm": 0.030192391946911812,
      "learning_rate": 0.00013197167615473164,
      "loss": 0.1246,
      "step": 1443
    },
    {
      "epoch": 1.3709945407073345,
      "grad_norm": 0.056465089321136475,
      "learning_rate": 0.00013186695386352158,
      "loss": 0.186,
      "step": 1444
    },
    {
      "epoch": 1.3719439829100404,
      "grad_norm": 0.029975950717926025,
      "learning_rate": 0.00013176219266004442,
      "loss": 0.1238,
      "step": 1445
    },
    {
      "epoch": 1.3728934251127463,
      "grad_norm": 0.03526061028242111,
      "learning_rate": 0.00013165739267222262,
      "loss": 0.1198,
      "step": 1446
    },
    {
      "epoch": 1.3738428673154521,
      "grad_norm": 0.031407009810209274,
      "learning_rate": 0.0001315525540280259,
      "loss": 0.1283,
      "step": 1447
    },
    {
      "epoch": 1.374792309518158,
      "grad_norm": 0.05062803998589516,
      "learning_rate": 0.0001314476768554712,
      "loss": 0.1598,
      "step": 1448
    },
    {
      "epoch": 1.3757417517208639,
      "grad_norm": 0.05837153270840645,
      "learning_rate": 0.0001313427612826224,
      "loss": 0.1706,
      "step": 1449
    },
    {
      "epoch": 1.37669119392357,
      "grad_norm": 0.032810915261507034,
      "learning_rate": 0.0001312378074375904,
      "loss": 0.1228,
      "step": 1450
    },
    {
      "epoch": 1.3776406361262759,
      "grad_norm": 0.052471403032541275,
      "learning_rate": 0.0001311328154485328,
      "loss": 0.2028,
      "step": 1451
    },
    {
      "epoch": 1.3785900783289817,
      "grad_norm": 0.062009330838918686,
      "learning_rate": 0.00013102778544365378,
      "loss": 0.1659,
      "step": 1452
    },
    {
      "epoch": 1.3795395205316876,
      "grad_norm": 0.03157106414437294,
      "learning_rate": 0.00013092271755120392,
      "loss": 0.1103,
      "step": 1453
    },
    {
      "epoch": 1.3804889627343935,
      "grad_norm": 0.03269607201218605,
      "learning_rate": 0.00013081761189948006,
      "loss": 0.1274,
      "step": 1454
    },
    {
      "epoch": 1.3814384049370996,
      "grad_norm": 0.032491281628608704,
      "learning_rate": 0.00013071246861682515,
      "loss": 0.1299,
      "step": 1455
    },
    {
      "epoch": 1.3823878471398054,
      "grad_norm": 0.04476369544863701,
      "learning_rate": 0.00013060728783162814,
      "loss": 0.1543,
      "step": 1456
    },
    {
      "epoch": 1.3833372893425113,
      "grad_norm": 0.03646747022867203,
      "learning_rate": 0.0001305020696723237,
      "loss": 0.1401,
      "step": 1457
    },
    {
      "epoch": 1.3842867315452172,
      "grad_norm": 0.03056688979268074,
      "learning_rate": 0.0001303968142673922,
      "loss": 0.1318,
      "step": 1458
    },
    {
      "epoch": 1.385236173747923,
      "grad_norm": 0.0505138523876667,
      "learning_rate": 0.00013029152174535942,
      "loss": 0.1702,
      "step": 1459
    },
    {
      "epoch": 1.386185615950629,
      "grad_norm": 0.03478003665804863,
      "learning_rate": 0.00013018619223479654,
      "loss": 0.136,
      "step": 1460
    },
    {
      "epoch": 1.3871350581533348,
      "grad_norm": 0.03196396306157112,
      "learning_rate": 0.00013008082586431983,
      "loss": 0.1235,
      "step": 1461
    },
    {
      "epoch": 1.3880845003560407,
      "grad_norm": 0.030580265447497368,
      "learning_rate": 0.0001299754227625907,
      "loss": 0.1274,
      "step": 1462
    },
    {
      "epoch": 1.3890339425587468,
      "grad_norm": 0.043844275176525116,
      "learning_rate": 0.00012986998305831524,
      "loss": 0.172,
      "step": 1463
    },
    {
      "epoch": 1.3899833847614527,
      "grad_norm": 0.031638097018003464,
      "learning_rate": 0.00012976450688024433,
      "loss": 0.1221,
      "step": 1464
    },
    {
      "epoch": 1.3909328269641585,
      "grad_norm": 0.030004551634192467,
      "learning_rate": 0.00012965899435717337,
      "loss": 0.1321,
      "step": 1465
    },
    {
      "epoch": 1.3918822691668644,
      "grad_norm": 0.031170252710580826,
      "learning_rate": 0.00012955344561794218,
      "loss": 0.1327,
      "step": 1466
    },
    {
      "epoch": 1.3928317113695705,
      "grad_norm": 0.02902391366660595,
      "learning_rate": 0.00012944786079143472,
      "loss": 0.1266,
      "step": 1467
    },
    {
      "epoch": 1.3937811535722764,
      "grad_norm": 0.031269557774066925,
      "learning_rate": 0.00012934224000657913,
      "loss": 0.1237,
      "step": 1468
    },
    {
      "epoch": 1.3947305957749823,
      "grad_norm": 0.04232865571975708,
      "learning_rate": 0.0001292365833923473,
      "loss": 0.1529,
      "step": 1469
    },
    {
      "epoch": 1.3956800379776881,
      "grad_norm": 0.03645455837249756,
      "learning_rate": 0.00012913089107775502,
      "loss": 0.1594,
      "step": 1470
    },
    {
      "epoch": 1.396629480180394,
      "grad_norm": 0.029367268085479736,
      "learning_rate": 0.00012902516319186161,
      "loss": 0.121,
      "step": 1471
    },
    {
      "epoch": 1.3975789223830999,
      "grad_norm": 0.03407928720116615,
      "learning_rate": 0.00012891939986376985,
      "loss": 0.1289,
      "step": 1472
    },
    {
      "epoch": 1.3985283645858058,
      "grad_norm": 0.034839022904634476,
      "learning_rate": 0.00012881360122262575,
      "loss": 0.1321,
      "step": 1473
    },
    {
      "epoch": 1.3994778067885116,
      "grad_norm": 0.04005248472094536,
      "learning_rate": 0.00012870776739761847,
      "loss": 0.17,
      "step": 1474
    },
    {
      "epoch": 1.4004272489912177,
      "grad_norm": 0.045347243547439575,
      "learning_rate": 0.00012860189851798012,
      "loss": 0.16,
      "step": 1475
    },
    {
      "epoch": 1.4013766911939236,
      "grad_norm": 0.031093263998627663,
      "learning_rate": 0.00012849599471298565,
      "loss": 0.1268,
      "step": 1476
    },
    {
      "epoch": 1.4023261333966295,
      "grad_norm": 0.025807669386267662,
      "learning_rate": 0.00012839005611195269,
      "loss": 0.1155,
      "step": 1477
    },
    {
      "epoch": 1.4032755755993354,
      "grad_norm": 0.04496198520064354,
      "learning_rate": 0.00012828408284424117,
      "loss": 0.1696,
      "step": 1478
    },
    {
      "epoch": 1.4042250178020412,
      "grad_norm": 0.028986521065235138,
      "learning_rate": 0.00012817807503925357,
      "loss": 0.1244,
      "step": 1479
    },
    {
      "epoch": 1.4051744600047473,
      "grad_norm": 0.043852079659700394,
      "learning_rate": 0.00012807203282643443,
      "loss": 0.1562,
      "step": 1480
    },
    {
      "epoch": 1.4061239022074532,
      "grad_norm": 0.028449110686779022,
      "learning_rate": 0.00012796595633527032,
      "loss": 0.1276,
      "step": 1481
    },
    {
      "epoch": 1.407073344410159,
      "grad_norm": 0.03772464022040367,
      "learning_rate": 0.00012785984569528975,
      "loss": 0.163,
      "step": 1482
    },
    {
      "epoch": 1.408022786612865,
      "grad_norm": 0.03227540850639343,
      "learning_rate": 0.00012775370103606276,
      "loss": 0.1272,
      "step": 1483
    },
    {
      "epoch": 1.4089722288155708,
      "grad_norm": 0.03001963160932064,
      "learning_rate": 0.0001276475224872011,
      "loss": 0.1256,
      "step": 1484
    },
    {
      "epoch": 1.4099216710182767,
      "grad_norm": 0.0357728935778141,
      "learning_rate": 0.00012754131017835777,
      "loss": 0.1355,
      "step": 1485
    },
    {
      "epoch": 1.4108711132209826,
      "grad_norm": 0.03219794109463692,
      "learning_rate": 0.0001274350642392271,
      "loss": 0.1304,
      "step": 1486
    },
    {
      "epoch": 1.4118205554236885,
      "grad_norm": 0.04606242850422859,
      "learning_rate": 0.00012732878479954445,
      "loss": 0.1629,
      "step": 1487
    },
    {
      "epoch": 1.4127699976263945,
      "grad_norm": 0.04288827255368233,
      "learning_rate": 0.000127222471989086,
      "loss": 0.1667,
      "step": 1488
    },
    {
      "epoch": 1.4137194398291004,
      "grad_norm": 0.031533095985651016,
      "learning_rate": 0.0001271161259376688,
      "loss": 0.1266,
      "step": 1489
    },
    {
      "epoch": 1.4146688820318063,
      "grad_norm": 0.03418329730629921,
      "learning_rate": 0.00012700974677515046,
      "loss": 0.1441,
      "step": 1490
    },
    {
      "epoch": 1.4156183242345122,
      "grad_norm": 0.028918685391545296,
      "learning_rate": 0.00012690333463142897,
      "loss": 0.117,
      "step": 1491
    },
    {
      "epoch": 1.4165677664372183,
      "grad_norm": 0.04630662500858307,
      "learning_rate": 0.00012679688963644265,
      "loss": 0.1694,
      "step": 1492
    },
    {
      "epoch": 1.4175172086399241,
      "grad_norm": 0.028670761734247208,
      "learning_rate": 0.00012669041192016993,
      "loss": 0.1218,
      "step": 1493
    },
    {
      "epoch": 1.41846665084263,
      "grad_norm": 0.03250902146100998,
      "learning_rate": 0.0001265839016126291,
      "loss": 0.1353,
      "step": 1494
    },
    {
      "epoch": 1.419416093045336,
      "grad_norm": 0.03904202580451965,
      "learning_rate": 0.00012647735884387842,
      "loss": 0.1566,
      "step": 1495
    },
    {
      "epoch": 1.4203655352480418,
      "grad_norm": 0.030610278248786926,
      "learning_rate": 0.00012637078374401568,
      "loss": 0.1248,
      "step": 1496
    },
    {
      "epoch": 1.4213149774507476,
      "grad_norm": 0.0320439413189888,
      "learning_rate": 0.00012626417644317808,
      "loss": 0.1341,
      "step": 1497
    },
    {
      "epoch": 1.4222644196534535,
      "grad_norm": 0.03740748390555382,
      "learning_rate": 0.0001261575370715423,
      "loss": 0.1374,
      "step": 1498
    },
    {
      "epoch": 1.4232138618561594,
      "grad_norm": 0.041164278984069824,
      "learning_rate": 0.00012605086575932407,
      "loss": 0.1242,
      "step": 1499
    },
    {
      "epoch": 1.4241633040588655,
      "grad_norm": 0.02908271551132202,
      "learning_rate": 0.00012594416263677816,
      "loss": 0.1224,
      "step": 1500
    },
    {
      "epoch": 1.4251127462615714,
      "grad_norm": 0.030539128929376602,
      "learning_rate": 0.0001258374278341982,
      "loss": 0.1236,
      "step": 1501
    },
    {
      "epoch": 1.4260621884642772,
      "grad_norm": 0.027197500690817833,
      "learning_rate": 0.00012573066148191647,
      "loss": 0.1254,
      "step": 1502
    },
    {
      "epoch": 1.427011630666983,
      "grad_norm": 0.031813718378543854,
      "learning_rate": 0.00012562386371030377,
      "loss": 0.1294,
      "step": 1503
    },
    {
      "epoch": 1.427961072869689,
      "grad_norm": 0.04184641316533089,
      "learning_rate": 0.00012551703464976928,
      "loss": 0.1615,
      "step": 1504
    },
    {
      "epoch": 1.428910515072395,
      "grad_norm": 0.03790717199444771,
      "learning_rate": 0.00012541017443076042,
      "loss": 0.1638,
      "step": 1505
    },
    {
      "epoch": 1.429859957275101,
      "grad_norm": 0.03084125556051731,
      "learning_rate": 0.00012530328318376258,
      "loss": 0.1292,
      "step": 1506
    },
    {
      "epoch": 1.4308093994778068,
      "grad_norm": 0.042278289794921875,
      "learning_rate": 0.00012519636103929912,
      "loss": 0.1691,
      "step": 1507
    },
    {
      "epoch": 1.4317588416805127,
      "grad_norm": 0.02734595723450184,
      "learning_rate": 0.0001250894081279311,
      "loss": 0.1248,
      "step": 1508
    },
    {
      "epoch": 1.4327082838832186,
      "grad_norm": 0.02997264452278614,
      "learning_rate": 0.00012498242458025712,
      "loss": 0.124,
      "step": 1509
    },
    {
      "epoch": 1.4336577260859245,
      "grad_norm": 0.031008126214146614,
      "learning_rate": 0.00012487541052691323,
      "loss": 0.1335,
      "step": 1510
    },
    {
      "epoch": 1.4346071682886303,
      "grad_norm": 0.042471520602703094,
      "learning_rate": 0.0001247683660985727,
      "loss": 0.1589,
      "step": 1511
    },
    {
      "epoch": 1.4355566104913362,
      "grad_norm": 0.027912134304642677,
      "learning_rate": 0.00012466129142594588,
      "loss": 0.1208,
      "step": 1512
    },
    {
      "epoch": 1.4365060526940423,
      "grad_norm": 0.03753120079636574,
      "learning_rate": 0.0001245541866397801,
      "loss": 0.1626,
      "step": 1513
    },
    {
      "epoch": 1.4374554948967482,
      "grad_norm": 0.02756452187895775,
      "learning_rate": 0.0001244470518708594,
      "loss": 0.1215,
      "step": 1514
    },
    {
      "epoch": 1.438404937099454,
      "grad_norm": 0.03357706964015961,
      "learning_rate": 0.0001243398872500045,
      "loss": 0.1376,
      "step": 1515
    },
    {
      "epoch": 1.43935437930216,
      "grad_norm": 0.032955266535282135,
      "learning_rate": 0.00012423269290807258,
      "loss": 0.1364,
      "step": 1516
    },
    {
      "epoch": 1.440303821504866,
      "grad_norm": 0.03405732661485672,
      "learning_rate": 0.000124125468975957,
      "loss": 0.126,
      "step": 1517
    },
    {
      "epoch": 1.441253263707572,
      "grad_norm": 0.03659766912460327,
      "learning_rate": 0.00012401821558458728,
      "loss": 0.1561,
      "step": 1518
    },
    {
      "epoch": 1.4422027059102778,
      "grad_norm": 0.030413653701543808,
      "learning_rate": 0.00012391093286492905,
      "loss": 0.1253,
      "step": 1519
    },
    {
      "epoch": 1.4431521481129836,
      "grad_norm": 0.027322586625814438,
      "learning_rate": 0.00012380362094798362,
      "loss": 0.1217,
      "step": 1520
    },
    {
      "epoch": 1.4441015903156895,
      "grad_norm": 0.037558842450380325,
      "learning_rate": 0.00012369627996478797,
      "loss": 0.1348,
      "step": 1521
    },
    {
      "epoch": 1.4450510325183954,
      "grad_norm": 0.028927726671099663,
      "learning_rate": 0.0001235889100464146,
      "loss": 0.1184,
      "step": 1522
    },
    {
      "epoch": 1.4460004747211013,
      "grad_norm": 0.028258686885237694,
      "learning_rate": 0.00012348151132397133,
      "loss": 0.1276,
      "step": 1523
    },
    {
      "epoch": 1.4469499169238071,
      "grad_norm": 0.027749182656407356,
      "learning_rate": 0.00012337408392860117,
      "loss": 0.1235,
      "step": 1524
    },
    {
      "epoch": 1.4478993591265132,
      "grad_norm": 0.03684193268418312,
      "learning_rate": 0.0001232666279914821,
      "loss": 0.1421,
      "step": 1525
    },
    {
      "epoch": 1.4488488013292191,
      "grad_norm": 0.033882539719343185,
      "learning_rate": 0.00012315914364382705,
      "loss": 0.1236,
      "step": 1526
    },
    {
      "epoch": 1.449798243531925,
      "grad_norm": 0.03675851225852966,
      "learning_rate": 0.00012305163101688352,
      "loss": 0.1618,
      "step": 1527
    },
    {
      "epoch": 1.4507476857346309,
      "grad_norm": 0.030739063397049904,
      "learning_rate": 0.00012294409024193355,
      "loss": 0.127,
      "step": 1528
    },
    {
      "epoch": 1.4516971279373367,
      "grad_norm": 0.026678606867790222,
      "learning_rate": 0.00012283652145029362,
      "loss": 0.1191,
      "step": 1529
    },
    {
      "epoch": 1.4526465701400428,
      "grad_norm": 0.028862981125712395,
      "learning_rate": 0.0001227289247733144,
      "loss": 0.1255,
      "step": 1530
    },
    {
      "epoch": 1.4535960123427487,
      "grad_norm": 0.02993926964700222,
      "learning_rate": 0.0001226213003423807,
      "loss": 0.1235,
      "step": 1531
    },
    {
      "epoch": 1.4545454545454546,
      "grad_norm": 0.02765621617436409,
      "learning_rate": 0.00012251364828891093,
      "loss": 0.127,
      "step": 1532
    },
    {
      "epoch": 1.4554948967481605,
      "grad_norm": 0.03139231353998184,
      "learning_rate": 0.00012240596874435756,
      "loss": 0.1225,
      "step": 1533
    },
    {
      "epoch": 1.4564443389508663,
      "grad_norm": 0.030977755784988403,
      "learning_rate": 0.00012229826184020649,
      "loss": 0.1249,
      "step": 1534
    },
    {
      "epoch": 1.4573937811535722,
      "grad_norm": 0.03158799931406975,
      "learning_rate": 0.000122190527707977,
      "loss": 0.1254,
      "step": 1535
    },
    {
      "epoch": 1.458343223356278,
      "grad_norm": 0.03206062689423561,
      "learning_rate": 0.00012208276647922162,
      "loss": 0.1332,
      "step": 1536
    },
    {
      "epoch": 1.459292665558984,
      "grad_norm": 0.028509238734841347,
      "learning_rate": 0.00012197497828552601,
      "loss": 0.1196,
      "step": 1537
    },
    {
      "epoch": 1.46024210776169,
      "grad_norm": 0.03207945078611374,
      "learning_rate": 0.0001218671632585088,
      "loss": 0.1241,
      "step": 1538
    },
    {
      "epoch": 1.461191549964396,
      "grad_norm": 0.026839956641197205,
      "learning_rate": 0.00012175932152982125,
      "loss": 0.1246,
      "step": 1539
    },
    {
      "epoch": 1.4621409921671018,
      "grad_norm": 0.03129103407263756,
      "learning_rate": 0.0001216514532311474,
      "loss": 0.1192,
      "step": 1540
    },
    {
      "epoch": 1.4630904343698077,
      "grad_norm": 0.027821926400065422,
      "learning_rate": 0.00012154355849420353,
      "loss": 0.1221,
      "step": 1541
    },
    {
      "epoch": 1.4640398765725138,
      "grad_norm": 0.028374498710036278,
      "learning_rate": 0.00012143563745073836,
      "loss": 0.1253,
      "step": 1542
    },
    {
      "epoch": 1.4649893187752197,
      "grad_norm": 0.05225376784801483,
      "learning_rate": 0.0001213276902325327,
      "loss": 0.1194,
      "step": 1543
    },
    {
      "epoch": 1.4659387609779255,
      "grad_norm": 0.04315062612295151,
      "learning_rate": 0.00012121971697139926,
      "loss": 0.1613,
      "step": 1544
    },
    {
      "epoch": 1.4668882031806314,
      "grad_norm": 0.027962563559412956,
      "learning_rate": 0.00012111171779918264,
      "loss": 0.1284,
      "step": 1545
    },
    {
      "epoch": 1.4678376453833373,
      "grad_norm": 0.0343998521566391,
      "learning_rate": 0.000121003692847759,
      "loss": 0.1208,
      "step": 1546
    },
    {
      "epoch": 1.4687870875860431,
      "grad_norm": 0.031139155849814415,
      "learning_rate": 0.00012089564224903607,
      "loss": 0.1323,
      "step": 1547
    },
    {
      "epoch": 1.469736529788749,
      "grad_norm": 0.028015103191137314,
      "learning_rate": 0.00012078756613495277,
      "loss": 0.1266,
      "step": 1548
    },
    {
      "epoch": 1.470685971991455,
      "grad_norm": 0.04571033641695976,
      "learning_rate": 0.00012067946463747928,
      "loss": 0.1561,
      "step": 1549
    },
    {
      "epoch": 1.471635414194161,
      "grad_norm": 0.027564501389861107,
      "learning_rate": 0.00012057133788861677,
      "loss": 0.1214,
      "step": 1550
    },
    {
      "epoch": 1.4725848563968669,
      "grad_norm": 0.027498599141836166,
      "learning_rate": 0.00012046318602039717,
      "loss": 0.1233,
      "step": 1551
    },
    {
      "epoch": 1.4735342985995727,
      "grad_norm": 0.040304671972990036,
      "learning_rate": 0.00012035500916488316,
      "loss": 0.1636,
      "step": 1552
    },
    {
      "epoch": 1.4744837408022786,
      "grad_norm": 0.045689020305871964,
      "learning_rate": 0.00012024680745416787,
      "loss": 0.1696,
      "step": 1553
    },
    {
      "epoch": 1.4754331830049845,
      "grad_norm": 0.02754673734307289,
      "learning_rate": 0.00012013858102037485,
      "loss": 0.1265,
      "step": 1554
    },
    {
      "epoch": 1.4763826252076906,
      "grad_norm": 0.04088296741247177,
      "learning_rate": 0.0001200303299956578,
      "loss": 0.1675,
      "step": 1555
    },
    {
      "epoch": 1.4773320674103965,
      "grad_norm": 0.03440406173467636,
      "learning_rate": 0.0001199220545122004,
      "loss": 0.1245,
      "step": 1556
    },
    {
      "epoch": 1.4782815096131023,
      "grad_norm": 0.04637245833873749,
      "learning_rate": 0.00011981375470221628,
      "loss": 0.1668,
      "step": 1557
    },
    {
      "epoch": 1.4792309518158082,
      "grad_norm": 0.03825761005282402,
      "learning_rate": 0.00011970543069794875,
      "loss": 0.1599,
      "step": 1558
    },
    {
      "epoch": 1.480180394018514,
      "grad_norm": 0.031823255121707916,
      "learning_rate": 0.00011959708263167067,
      "loss": 0.1232,
      "step": 1559
    },
    {
      "epoch": 1.48112983622122,
      "grad_norm": 0.032104745507240295,
      "learning_rate": 0.00011948871063568419,
      "loss": 0.1237,
      "step": 1560
    },
    {
      "epoch": 1.4820792784239258,
      "grad_norm": 0.05664534121751785,
      "learning_rate": 0.00011938031484232079,
      "loss": 0.1617,
      "step": 1561
    },
    {
      "epoch": 1.483028720626632,
      "grad_norm": 0.02819441631436348,
      "learning_rate": 0.00011927189538394101,
      "loss": 0.1198,
      "step": 1562
    },
    {
      "epoch": 1.4839781628293378,
      "grad_norm": 0.030618941411376,
      "learning_rate": 0.00011916345239293423,
      "loss": 0.1291,
      "step": 1563
    },
    {
      "epoch": 1.4849276050320437,
      "grad_norm": 0.027746165171265602,
      "learning_rate": 0.00011905498600171859,
      "loss": 0.1287,
      "step": 1564
    },
    {
      "epoch": 1.4858770472347496,
      "grad_norm": 0.05675269663333893,
      "learning_rate": 0.00011894649634274075,
      "loss": 0.1988,
      "step": 1565
    },
    {
      "epoch": 1.4868264894374554,
      "grad_norm": 0.044594231992959976,
      "learning_rate": 0.00011883798354847589,
      "loss": 0.1611,
      "step": 1566
    },
    {
      "epoch": 1.4877759316401615,
      "grad_norm": 0.050554268062114716,
      "learning_rate": 0.00011872944775142736,
      "loss": 0.1762,
      "step": 1567
    },
    {
      "epoch": 1.4887253738428674,
      "grad_norm": 0.04185627028346062,
      "learning_rate": 0.0001186208890841266,
      "loss": 0.1593,
      "step": 1568
    },
    {
      "epoch": 1.4896748160455733,
      "grad_norm": 0.030089320614933968,
      "learning_rate": 0.00011851230767913303,
      "loss": 0.1217,
      "step": 1569
    },
    {
      "epoch": 1.4906242582482792,
      "grad_norm": 0.033343639224767685,
      "learning_rate": 0.00011840370366903382,
      "loss": 0.1284,
      "step": 1570
    },
    {
      "epoch": 1.491573700450985,
      "grad_norm": 0.030400337651371956,
      "learning_rate": 0.00011829507718644366,
      "loss": 0.1315,
      "step": 1571
    },
    {
      "epoch": 1.492523142653691,
      "grad_norm": 0.026966162025928497,
      "learning_rate": 0.00011818642836400477,
      "loss": 0.1206,
      "step": 1572
    },
    {
      "epoch": 1.4934725848563968,
      "grad_norm": 0.030626913532614708,
      "learning_rate": 0.00011807775733438664,
      "loss": 0.1228,
      "step": 1573
    },
    {
      "epoch": 1.4944220270591027,
      "grad_norm": 0.03993997722864151,
      "learning_rate": 0.00011796906423028588,
      "loss": 0.1488,
      "step": 1574
    },
    {
      "epoch": 1.4953714692618088,
      "grad_norm": 0.030942171812057495,
      "learning_rate": 0.00011786034918442596,
      "loss": 0.1284,
      "step": 1575
    },
    {
      "epoch": 1.4963209114645146,
      "grad_norm": 0.027237005531787872,
      "learning_rate": 0.00011775161232955729,
      "loss": 0.1278,
      "step": 1576
    },
    {
      "epoch": 1.4972703536672205,
      "grad_norm": 0.030200913548469543,
      "learning_rate": 0.0001176428537984568,
      "loss": 0.1264,
      "step": 1577
    },
    {
      "epoch": 1.4982197958699264,
      "grad_norm": 0.03261629864573479,
      "learning_rate": 0.00011753407372392795,
      "loss": 0.128,
      "step": 1578
    },
    {
      "epoch": 1.4991692380726322,
      "grad_norm": 0.033451877534389496,
      "learning_rate": 0.0001174252722388005,
      "loss": 0.1345,
      "step": 1579
    },
    {
      "epoch": 1.5001186802753383,
      "grad_norm": 0.045393262058496475,
      "learning_rate": 0.00011731644947593026,
      "loss": 0.1627,
      "step": 1580
    },
    {
      "epoch": 1.5010681224780442,
      "grad_norm": 0.05379907041788101,
      "learning_rate": 0.00011720760556819916,
      "loss": 0.1867,
      "step": 1581
    },
    {
      "epoch": 1.50201756468075,
      "grad_norm": 0.025756366550922394,
      "learning_rate": 0.00011709874064851487,
      "loss": 0.1229,
      "step": 1582
    },
    {
      "epoch": 1.502967006883456,
      "grad_norm": 0.03149167448282242,
      "learning_rate": 0.00011698985484981077,
      "loss": 0.124,
      "step": 1583
    },
    {
      "epoch": 1.5039164490861618,
      "grad_norm": 0.044556062668561935,
      "learning_rate": 0.00011688094830504566,
      "loss": 0.1522,
      "step": 1584
    },
    {
      "epoch": 1.5048658912888677,
      "grad_norm": 0.029473567381501198,
      "learning_rate": 0.00011677202114720374,
      "loss": 0.1231,
      "step": 1585
    },
    {
      "epoch": 1.5058153334915736,
      "grad_norm": 0.02735304646193981,
      "learning_rate": 0.00011666307350929435,
      "loss": 0.1242,
      "step": 1586
    },
    {
      "epoch": 1.5067647756942795,
      "grad_norm": 0.04760657250881195,
      "learning_rate": 0.00011655410552435184,
      "loss": 0.1574,
      "step": 1587
    },
    {
      "epoch": 1.5077142178969856,
      "grad_norm": 0.029798056930303574,
      "learning_rate": 0.00011644511732543542,
      "loss": 0.1289,
      "step": 1588
    },
    {
      "epoch": 1.5086636600996914,
      "grad_norm": 0.0329173281788826,
      "learning_rate": 0.00011633610904562892,
      "loss": 0.1251,
      "step": 1589
    },
    {
      "epoch": 1.5096131023023973,
      "grad_norm": 0.030921783298254013,
      "learning_rate": 0.00011622708081804081,
      "loss": 0.1257,
      "step": 1590
    },
    {
      "epoch": 1.5105625445051034,
      "grad_norm": 0.031381431967020035,
      "learning_rate": 0.0001161180327758038,
      "loss": 0.1258,
      "step": 1591
    },
    {
      "epoch": 1.5115119867078093,
      "grad_norm": 0.050078343600034714,
      "learning_rate": 0.00011600896505207488,
      "loss": 0.1764,
      "step": 1592
    },
    {
      "epoch": 1.5124614289105152,
      "grad_norm": 0.04285876452922821,
      "learning_rate": 0.00011589987778003501,
      "loss": 0.1607,
      "step": 1593
    },
    {
      "epoch": 1.513410871113221,
      "grad_norm": 0.02959441766142845,
      "learning_rate": 0.00011579077109288907,
      "loss": 0.1266,
      "step": 1594
    },
    {
      "epoch": 1.514360313315927,
      "grad_norm": 0.03024190105497837,
      "learning_rate": 0.00011568164512386559,
      "loss": 0.1306,
      "step": 1595
    },
    {
      "epoch": 1.5153097555186328,
      "grad_norm": 0.031076421961188316,
      "learning_rate": 0.00011557250000621674,
      "loss": 0.1326,
      "step": 1596
    },
    {
      "epoch": 1.5162591977213387,
      "grad_norm": 0.10080650448799133,
      "learning_rate": 0.00011546333587321795,
      "loss": 0.1596,
      "step": 1597
    },
    {
      "epoch": 1.5172086399240445,
      "grad_norm": 0.029658254235982895,
      "learning_rate": 0.000115354152858168,
      "loss": 0.1306,
      "step": 1598
    },
    {
      "epoch": 1.5181580821267504,
      "grad_norm": 0.025450505316257477,
      "learning_rate": 0.00011524495109438857,
      "loss": 0.1199,
      "step": 1599
    },
    {
      "epoch": 1.5191075243294563,
      "grad_norm": 0.0450344942510128,
      "learning_rate": 0.00011513573071522439,
      "loss": 0.1671,
      "step": 1600
    },
    {
      "epoch": 1.5200569665321624,
      "grad_norm": 0.02928159199655056,
      "learning_rate": 0.00011502649185404281,
      "loss": 0.1247,
      "step": 1601
    },
    {
      "epoch": 1.5210064087348683,
      "grad_norm": 0.04587600752711296,
      "learning_rate": 0.00011491723464423385,
      "loss": 0.1746,
      "step": 1602
    },
    {
      "epoch": 1.5219558509375741,
      "grad_norm": 0.03615221753716469,
      "learning_rate": 0.00011480795921920984,
      "loss": 0.153,
      "step": 1603
    },
    {
      "epoch": 1.5229052931402802,
      "grad_norm": 0.026470355689525604,
      "learning_rate": 0.00011469866571240535,
      "loss": 0.1204,
      "step": 1604
    },
    {
      "epoch": 1.523854735342986,
      "grad_norm": 0.0324716791510582,
      "learning_rate": 0.00011458935425727713,
      "loss": 0.127,
      "step": 1605
    },
    {
      "epoch": 1.524804177545692,
      "grad_norm": 0.028570136055350304,
      "learning_rate": 0.00011448002498730375,
      "loss": 0.1244,
      "step": 1606
    },
    {
      "epoch": 1.5257536197483978,
      "grad_norm": 0.03165844827890396,
      "learning_rate": 0.00011437067803598558,
      "loss": 0.1286,
      "step": 1607
    },
    {
      "epoch": 1.5267030619511037,
      "grad_norm": 0.029820239171385765,
      "learning_rate": 0.00011426131353684457,
      "loss": 0.1232,
      "step": 1608
    },
    {
      "epoch": 1.5276525041538096,
      "grad_norm": 0.03704296052455902,
      "learning_rate": 0.00011415193162342407,
      "loss": 0.1262,
      "step": 1609
    },
    {
      "epoch": 1.5286019463565155,
      "grad_norm": 0.029848331585526466,
      "learning_rate": 0.00011404253242928877,
      "loss": 0.1345,
      "step": 1610
    },
    {
      "epoch": 1.5295513885592213,
      "grad_norm": 0.02494947612285614,
      "learning_rate": 0.00011393311608802437,
      "loss": 0.1247,
      "step": 1611
    },
    {
      "epoch": 1.5305008307619272,
      "grad_norm": 0.024150602519512177,
      "learning_rate": 0.0001138236827332376,
      "loss": 0.1195,
      "step": 1612
    },
    {
      "epoch": 1.5314502729646333,
      "grad_norm": 0.028073903173208237,
      "learning_rate": 0.00011371423249855584,
      "loss": 0.1298,
      "step": 1613
    },
    {
      "epoch": 1.5323997151673392,
      "grad_norm": 0.03696022182703018,
      "learning_rate": 0.0001136047655176272,
      "loss": 0.1658,
      "step": 1614
    },
    {
      "epoch": 1.533349157370045,
      "grad_norm": 0.026254741474986076,
      "learning_rate": 0.00011349528192412018,
      "loss": 0.1209,
      "step": 1615
    },
    {
      "epoch": 1.5342985995727512,
      "grad_norm": 0.04131542891263962,
      "learning_rate": 0.0001133857818517236,
      "loss": 0.1566,
      "step": 1616
    },
    {
      "epoch": 1.535248041775457,
      "grad_norm": 0.028996463865041733,
      "learning_rate": 0.00011327626543414636,
      "loss": 0.1287,
      "step": 1617
    },
    {
      "epoch": 1.536197483978163,
      "grad_norm": 0.029789695516228676,
      "learning_rate": 0.00011316673280511738,
      "loss": 0.1303,
      "step": 1618
    },
    {
      "epoch": 1.5371469261808688,
      "grad_norm": 0.026316309347748756,
      "learning_rate": 0.00011305718409838528,
      "loss": 0.1224,
      "step": 1619
    },
    {
      "epoch": 1.5380963683835747,
      "grad_norm": 0.029919691383838654,
      "learning_rate": 0.0001129476194477184,
      "loss": 0.1297,
      "step": 1620
    },
    {
      "epoch": 1.5390458105862805,
      "grad_norm": 0.029496189206838608,
      "learning_rate": 0.0001128380389869045,
      "loss": 0.1259,
      "step": 1621
    },
    {
      "epoch": 1.5399952527889864,
      "grad_norm": 0.0289089847356081,
      "learning_rate": 0.00011272844284975072,
      "loss": 0.1138,
      "step": 1622
    },
    {
      "epoch": 1.5409446949916923,
      "grad_norm": 0.03120460920035839,
      "learning_rate": 0.00011261883117008321,
      "loss": 0.1255,
      "step": 1623
    },
    {
      "epoch": 1.5418941371943982,
      "grad_norm": 0.030355585739016533,
      "learning_rate": 0.0001125092040817472,
      "loss": 0.1234,
      "step": 1624
    },
    {
      "epoch": 1.542843579397104,
      "grad_norm": 0.026462506502866745,
      "learning_rate": 0.00011239956171860675,
      "loss": 0.1194,
      "step": 1625
    },
    {
      "epoch": 1.5437930215998101,
      "grad_norm": 0.04788212105631828,
      "learning_rate": 0.00011228990421454449,
      "loss": 0.1721,
      "step": 1626
    },
    {
      "epoch": 1.544742463802516,
      "grad_norm": 0.04839539900422096,
      "learning_rate": 0.00011218023170346159,
      "loss": 0.1616,
      "step": 1627
    },
    {
      "epoch": 1.5456919060052219,
      "grad_norm": 0.029102666303515434,
      "learning_rate": 0.00011207054431927752,
      "loss": 0.1244,
      "step": 1628
    },
    {
      "epoch": 1.546641348207928,
      "grad_norm": 0.030667860060930252,
      "learning_rate": 0.00011196084219592994,
      "loss": 0.1211,
      "step": 1629
    },
    {
      "epoch": 1.5475907904106339,
      "grad_norm": 0.029911190271377563,
      "learning_rate": 0.00011185112546737451,
      "loss": 0.1209,
      "step": 1630
    },
    {
      "epoch": 1.5485402326133397,
      "grad_norm": 0.026976125314831734,
      "learning_rate": 0.00011174139426758466,
      "loss": 0.1201,
      "step": 1631
    },
    {
      "epoch": 1.5494896748160456,
      "grad_norm": 0.04256618767976761,
      "learning_rate": 0.00011163164873055158,
      "loss": 0.1716,
      "step": 1632
    },
    {
      "epoch": 1.5504391170187515,
      "grad_norm": 0.04431037977337837,
      "learning_rate": 0.00011152188899028393,
      "loss": 0.1535,
      "step": 1633
    },
    {
      "epoch": 1.5513885592214574,
      "grad_norm": 0.040324628353118896,
      "learning_rate": 0.00011141211518080768,
      "loss": 0.1559,
      "step": 1634
    },
    {
      "epoch": 1.5523380014241632,
      "grad_norm": 0.02631363458931446,
      "learning_rate": 0.00011130232743616602,
      "loss": 0.1254,
      "step": 1635
    },
    {
      "epoch": 1.553287443626869,
      "grad_norm": 0.031095106154680252,
      "learning_rate": 0.00011119252589041917,
      "loss": 0.1296,
      "step": 1636
    },
    {
      "epoch": 1.554236885829575,
      "grad_norm": 0.029540112242102623,
      "learning_rate": 0.00011108271067764413,
      "loss": 0.1237,
      "step": 1637
    },
    {
      "epoch": 1.555186328032281,
      "grad_norm": 0.032384589314460754,
      "learning_rate": 0.00011097288193193465,
      "loss": 0.122,
      "step": 1638
    },
    {
      "epoch": 1.556135770234987,
      "grad_norm": 0.041656699031591415,
      "learning_rate": 0.00011086303978740102,
      "loss": 0.1627,
      "step": 1639
    },
    {
      "epoch": 1.5570852124376928,
      "grad_norm": 0.02969949133694172,
      "learning_rate": 0.00011075318437816981,
      "loss": 0.1278,
      "step": 1640
    },
    {
      "epoch": 1.558034654640399,
      "grad_norm": 0.029392141848802567,
      "learning_rate": 0.00011064331583838389,
      "loss": 0.1222,
      "step": 1641
    },
    {
      "epoch": 1.5589840968431048,
      "grad_norm": 0.03221196308732033,
      "learning_rate": 0.0001105334343022021,
      "loss": 0.1301,
      "step": 1642
    },
    {
      "epoch": 1.5599335390458107,
      "grad_norm": 0.04113316535949707,
      "learning_rate": 0.00011042353990379917,
      "loss": 0.1545,
      "step": 1643
    },
    {
      "epoch": 1.5608829812485165,
      "grad_norm": 0.04606552794575691,
      "learning_rate": 0.00011031363277736546,
      "loss": 0.1582,
      "step": 1644
    },
    {
      "epoch": 1.5618324234512224,
      "grad_norm": 0.05224507302045822,
      "learning_rate": 0.00011020371305710701,
      "loss": 0.1702,
      "step": 1645
    },
    {
      "epoch": 1.5627818656539283,
      "grad_norm": 0.03016183339059353,
      "learning_rate": 0.00011009378087724518,
      "loss": 0.1299,
      "step": 1646
    },
    {
      "epoch": 1.5637313078566342,
      "grad_norm": 0.02981068380177021,
      "learning_rate": 0.00010998383637201648,
      "loss": 0.1258,
      "step": 1647
    },
    {
      "epoch": 1.56468075005934,
      "grad_norm": 0.027657071128487587,
      "learning_rate": 0.00010987387967567252,
      "loss": 0.1338,
      "step": 1648
    },
    {
      "epoch": 1.565630192262046,
      "grad_norm": 0.030992772430181503,
      "learning_rate": 0.00010976391092247986,
      "loss": 0.1249,
      "step": 1649
    },
    {
      "epoch": 1.566579634464752,
      "grad_norm": 0.039394572377204895,
      "learning_rate": 0.00010965393024671966,
      "loss": 0.1598,
      "step": 1650
    },
    {
      "epoch": 1.5675290766674579,
      "grad_norm": 0.042735736817121506,
      "learning_rate": 0.00010954393778268777,
      "loss": 0.1566,
      "step": 1651
    },
    {
      "epoch": 1.5684785188701638,
      "grad_norm": 0.03833623602986336,
      "learning_rate": 0.00010943393366469427,
      "loss": 0.1557,
      "step": 1652
    },
    {
      "epoch": 1.5694279610728696,
      "grad_norm": 0.028264719992876053,
      "learning_rate": 0.00010932391802706363,
      "loss": 0.1295,
      "step": 1653
    },
    {
      "epoch": 1.5703774032755757,
      "grad_norm": 0.030619991943240166,
      "learning_rate": 0.00010921389100413428,
      "loss": 0.1294,
      "step": 1654
    },
    {
      "epoch": 1.5713268454782816,
      "grad_norm": 0.0320441797375679,
      "learning_rate": 0.00010910385273025865,
      "loss": 0.1266,
      "step": 1655
    },
    {
      "epoch": 1.5722762876809875,
      "grad_norm": 0.028138084337115288,
      "learning_rate": 0.00010899380333980282,
      "loss": 0.1177,
      "step": 1656
    },
    {
      "epoch": 1.5732257298836934,
      "grad_norm": 0.030276020988821983,
      "learning_rate": 0.00010888374296714644,
      "loss": 0.1258,
      "step": 1657
    },
    {
      "epoch": 1.5741751720863992,
      "grad_norm": 0.04478145390748978,
      "learning_rate": 0.00010877367174668269,
      "loss": 0.1555,
      "step": 1658
    },
    {
      "epoch": 1.575124614289105,
      "grad_norm": 0.025850724428892136,
      "learning_rate": 0.00010866358981281783,
      "loss": 0.1186,
      "step": 1659
    },
    {
      "epoch": 1.576074056491811,
      "grad_norm": 0.03069223277270794,
      "learning_rate": 0.00010855349729997135,
      "loss": 0.1314,
      "step": 1660
    },
    {
      "epoch": 1.5770234986945169,
      "grad_norm": 0.050107646733522415,
      "learning_rate": 0.00010844339434257558,
      "loss": 0.1632,
      "step": 1661
    },
    {
      "epoch": 1.5779729408972227,
      "grad_norm": 0.04577158764004707,
      "learning_rate": 0.00010833328107507556,
      "loss": 0.1777,
      "step": 1662
    },
    {
      "epoch": 1.5789223830999288,
      "grad_norm": 0.030337292701005936,
      "learning_rate": 0.00010822315763192903,
      "loss": 0.1277,
      "step": 1663
    },
    {
      "epoch": 1.5798718253026347,
      "grad_norm": 0.05210689827799797,
      "learning_rate": 0.00010811302414760609,
      "loss": 0.1583,
      "step": 1664
    },
    {
      "epoch": 1.5808212675053406,
      "grad_norm": 0.056502800434827805,
      "learning_rate": 0.00010800288075658911,
      "loss": 0.1533,
      "step": 1665
    },
    {
      "epoch": 1.5817707097080467,
      "grad_norm": 0.03072887845337391,
      "learning_rate": 0.00010789272759337257,
      "loss": 0.1349,
      "step": 1666
    },
    {
      "epoch": 1.5827201519107525,
      "grad_norm": 0.030606022104620934,
      "learning_rate": 0.00010778256479246283,
      "loss": 0.1288,
      "step": 1667
    },
    {
      "epoch": 1.5836695941134584,
      "grad_norm": 0.027298327535390854,
      "learning_rate": 0.00010767239248837811,
      "loss": 0.1256,
      "step": 1668
    },
    {
      "epoch": 1.5846190363161643,
      "grad_norm": 0.044606730341911316,
      "learning_rate": 0.00010756221081564813,
      "loss": 0.1673,
      "step": 1669
    },
    {
      "epoch": 1.5855684785188702,
      "grad_norm": 0.04783207178115845,
      "learning_rate": 0.00010745201990881417,
      "loss": 0.1539,
      "step": 1670
    },
    {
      "epoch": 1.586517920721576,
      "grad_norm": 0.03072645701467991,
      "learning_rate": 0.00010734181990242868,
      "loss": 0.1255,
      "step": 1671
    },
    {
      "epoch": 1.587467362924282,
      "grad_norm": 0.028861412778496742,
      "learning_rate": 0.00010723161093105527,
      "loss": 0.1252,
      "step": 1672
    },
    {
      "epoch": 1.5884168051269878,
      "grad_norm": 0.06253904849290848,
      "learning_rate": 0.0001071213931292685,
      "loss": 0.208,
      "step": 1673
    },
    {
      "epoch": 1.5893662473296937,
      "grad_norm": 0.02638799510896206,
      "learning_rate": 0.00010701116663165368,
      "loss": 0.1307,
      "step": 1674
    },
    {
      "epoch": 1.5903156895323998,
      "grad_norm": 0.02984490990638733,
      "learning_rate": 0.00010690093157280678,
      "loss": 0.1305,
      "step": 1675
    },
    {
      "epoch": 1.5912651317351056,
      "grad_norm": 0.029151547700166702,
      "learning_rate": 0.0001067906880873342,
      "loss": 0.1278,
      "step": 1676
    },
    {
      "epoch": 1.5922145739378115,
      "grad_norm": 0.03524734824895859,
      "learning_rate": 0.00010668043630985259,
      "loss": 0.1426,
      "step": 1677
    },
    {
      "epoch": 1.5931640161405174,
      "grad_norm": 0.03428010269999504,
      "learning_rate": 0.00010657017637498881,
      "loss": 0.125,
      "step": 1678
    },
    {
      "epoch": 1.5941134583432235,
      "grad_norm": 0.05122271180152893,
      "learning_rate": 0.00010645990841737965,
      "loss": 0.1569,
      "step": 1679
    },
    {
      "epoch": 1.5950629005459294,
      "grad_norm": 0.035184647887945175,
      "learning_rate": 0.00010634963257167167,
      "loss": 0.1358,
      "step": 1680
    },
    {
      "epoch": 1.5960123427486352,
      "grad_norm": 0.03079393319785595,
      "learning_rate": 0.00010623934897252106,
      "loss": 0.1268,
      "step": 1681
    },
    {
      "epoch": 1.5969617849513411,
      "grad_norm": 0.03128993511199951,
      "learning_rate": 0.00010612905775459349,
      "loss": 0.1223,
      "step": 1682
    },
    {
      "epoch": 1.597911227154047,
      "grad_norm": 0.02892274223268032,
      "learning_rate": 0.00010601875905256398,
      "loss": 0.1293,
      "step": 1683
    },
    {
      "epoch": 1.5988606693567529,
      "grad_norm": 0.030298085883259773,
      "learning_rate": 0.00010590845300111663,
      "loss": 0.1293,
      "step": 1684
    },
    {
      "epoch": 1.5998101115594587,
      "grad_norm": 0.028357641771435738,
      "learning_rate": 0.00010579813973494454,
      "loss": 0.1269,
      "step": 1685
    },
    {
      "epoch": 1.6007595537621646,
      "grad_norm": 0.027196258306503296,
      "learning_rate": 0.00010568781938874959,
      "loss": 0.1296,
      "step": 1686
    },
    {
      "epoch": 1.6017089959648705,
      "grad_norm": 0.05107175186276436,
      "learning_rate": 0.00010557749209724233,
      "loss": 0.1604,
      "step": 1687
    },
    {
      "epoch": 1.6026584381675766,
      "grad_norm": 0.050482697784900665,
      "learning_rate": 0.00010546715799514178,
      "loss": 0.1902,
      "step": 1688
    },
    {
      "epoch": 1.6036078803702825,
      "grad_norm": 0.026253553107380867,
      "learning_rate": 0.00010535681721717529,
      "loss": 0.1226,
      "step": 1689
    },
    {
      "epoch": 1.6045573225729883,
      "grad_norm": 0.03308340907096863,
      "learning_rate": 0.0001052464698980784,
      "loss": 0.1384,
      "step": 1690
    },
    {
      "epoch": 1.6055067647756944,
      "grad_norm": 0.03511514514684677,
      "learning_rate": 0.00010513611617259454,
      "loss": 0.1577,
      "step": 1691
    },
    {
      "epoch": 1.6064562069784003,
      "grad_norm": 0.0325862281024456,
      "learning_rate": 0.00010502575617547501,
      "loss": 0.1343,
      "step": 1692
    },
    {
      "epoch": 1.6074056491811062,
      "grad_norm": 0.029174668714404106,
      "learning_rate": 0.00010491539004147879,
      "loss": 0.1222,
      "step": 1693
    },
    {
      "epoch": 1.608355091383812,
      "grad_norm": 0.028774891048669815,
      "learning_rate": 0.00010480501790537236,
      "loss": 0.1237,
      "step": 1694
    },
    {
      "epoch": 1.609304533586518,
      "grad_norm": 0.030504655092954636,
      "learning_rate": 0.00010469463990192947,
      "loss": 0.1228,
      "step": 1695
    },
    {
      "epoch": 1.6102539757892238,
      "grad_norm": 0.02889893390238285,
      "learning_rate": 0.0001045842561659311,
      "loss": 0.1208,
      "step": 1696
    },
    {
      "epoch": 1.6112034179919297,
      "grad_norm": 0.028366010636091232,
      "learning_rate": 0.00010447386683216518,
      "loss": 0.1193,
      "step": 1697
    },
    {
      "epoch": 1.6121528601946356,
      "grad_norm": 0.028841307386755943,
      "learning_rate": 0.0001043634720354265,
      "loss": 0.1287,
      "step": 1698
    },
    {
      "epoch": 1.6131023023973414,
      "grad_norm": 0.03739466145634651,
      "learning_rate": 0.00010425307191051654,
      "loss": 0.1369,
      "step": 1699
    },
    {
      "epoch": 1.6140517446000475,
      "grad_norm": 0.031514909118413925,
      "learning_rate": 0.00010414266659224323,
      "loss": 0.1301,
      "step": 1700
    },
    {
      "epoch": 1.6150011868027534,
      "grad_norm": 0.04335467517375946,
      "learning_rate": 0.00010403225621542089,
      "loss": 0.1543,
      "step": 1701
    },
    {
      "epoch": 1.6159506290054593,
      "grad_norm": 0.026403702795505524,
      "learning_rate": 0.00010392184091487,
      "loss": 0.1229,
      "step": 1702
    },
    {
      "epoch": 1.6169000712081654,
      "grad_norm": 0.06492079049348831,
      "learning_rate": 0.00010381142082541706,
      "loss": 0.1526,
      "step": 1703
    },
    {
      "epoch": 1.6178495134108712,
      "grad_norm": 0.031183555722236633,
      "learning_rate": 0.00010370099608189439,
      "loss": 0.1255,
      "step": 1704
    },
    {
      "epoch": 1.6187989556135771,
      "grad_norm": 0.02694527618587017,
      "learning_rate": 0.00010359056681914006,
      "loss": 0.1178,
      "step": 1705
    },
    {
      "epoch": 1.619748397816283,
      "grad_norm": 0.031590498983860016,
      "learning_rate": 0.00010348013317199756,
      "loss": 0.1311,
      "step": 1706
    },
    {
      "epoch": 1.6206978400189889,
      "grad_norm": 0.034355372190475464,
      "learning_rate": 0.00010336969527531577,
      "loss": 0.1363,
      "step": 1707
    },
    {
      "epoch": 1.6216472822216947,
      "grad_norm": 0.04307783022522926,
      "learning_rate": 0.00010325925326394886,
      "loss": 0.1659,
      "step": 1708
    },
    {
      "epoch": 1.6225967244244006,
      "grad_norm": 0.05346130579710007,
      "learning_rate": 0.00010314880727275591,
      "loss": 0.2022,
      "step": 1709
    },
    {
      "epoch": 1.6235461666271065,
      "grad_norm": 0.028053171932697296,
      "learning_rate": 0.00010303835743660086,
      "loss": 0.1251,
      "step": 1710
    },
    {
      "epoch": 1.6244956088298124,
      "grad_norm": 0.03092275932431221,
      "learning_rate": 0.00010292790389035239,
      "loss": 0.1299,
      "step": 1711
    },
    {
      "epoch": 1.6254450510325182,
      "grad_norm": 0.03486338630318642,
      "learning_rate": 0.00010281744676888368,
      "loss": 0.1374,
      "step": 1712
    },
    {
      "epoch": 1.6263944932352243,
      "grad_norm": 0.030160879716277122,
      "learning_rate": 0.00010270698620707231,
      "loss": 0.1381,
      "step": 1713
    },
    {
      "epoch": 1.6273439354379302,
      "grad_norm": 0.03339090943336487,
      "learning_rate": 0.00010259652233980007,
      "loss": 0.1316,
      "step": 1714
    },
    {
      "epoch": 1.628293377640636,
      "grad_norm": 0.027464497834444046,
      "learning_rate": 0.00010248605530195268,
      "loss": 0.1204,
      "step": 1715
    },
    {
      "epoch": 1.6292428198433422,
      "grad_norm": 0.02733561024069786,
      "learning_rate": 0.00010237558522841985,
      "loss": 0.1259,
      "step": 1716
    },
    {
      "epoch": 1.630192262046048,
      "grad_norm": 0.029772773385047913,
      "learning_rate": 0.00010226511225409499,
      "loss": 0.1252,
      "step": 1717
    },
    {
      "epoch": 1.631141704248754,
      "grad_norm": 0.04465902969241142,
      "learning_rate": 0.00010215463651387499,
      "loss": 0.151,
      "step": 1718
    },
    {
      "epoch": 1.6320911464514598,
      "grad_norm": 0.028140738606452942,
      "learning_rate": 0.0001020441581426601,
      "loss": 0.1221,
      "step": 1719
    },
    {
      "epoch": 1.6330405886541657,
      "grad_norm": 0.026517389342188835,
      "learning_rate": 0.00010193367727535392,
      "loss": 0.1253,
      "step": 1720
    },
    {
      "epoch": 1.6339900308568716,
      "grad_norm": 0.06271334737539291,
      "learning_rate": 0.00010182319404686293,
      "loss": 0.2072,
      "step": 1721
    },
    {
      "epoch": 1.6349394730595774,
      "grad_norm": 0.051109135150909424,
      "learning_rate": 0.00010171270859209662,
      "loss": 0.1923,
      "step": 1722
    },
    {
      "epoch": 1.6358889152622833,
      "grad_norm": 0.02858104184269905,
      "learning_rate": 0.00010160222104596716,
      "loss": 0.1317,
      "step": 1723
    },
    {
      "epoch": 1.6368383574649892,
      "grad_norm": 0.031905338168144226,
      "learning_rate": 0.00010149173154338917,
      "loss": 0.1334,
      "step": 1724
    },
    {
      "epoch": 1.6377877996676953,
      "grad_norm": 0.035984478890895844,
      "learning_rate": 0.00010138124021927984,
      "loss": 0.1308,
      "step": 1725
    },
    {
      "epoch": 1.6387372418704012,
      "grad_norm": 0.029515955597162247,
      "learning_rate": 0.00010127074720855845,
      "loss": 0.1232,
      "step": 1726
    },
    {
      "epoch": 1.639686684073107,
      "grad_norm": 0.03353870287537575,
      "learning_rate": 0.0001011602526461464,
      "loss": 0.1342,
      "step": 1727
    },
    {
      "epoch": 1.6406361262758131,
      "grad_norm": 0.02787208929657936,
      "learning_rate": 0.00010104975666696697,
      "loss": 0.1216,
      "step": 1728
    },
    {
      "epoch": 1.641585568478519,
      "grad_norm": 0.061213839799165726,
      "learning_rate": 0.0001009392594059452,
      "loss": 0.2093,
      "step": 1729
    },
    {
      "epoch": 1.6425350106812249,
      "grad_norm": 0.034235142171382904,
      "learning_rate": 0.0001008287609980076,
      "loss": 0.1329,
      "step": 1730
    },
    {
      "epoch": 1.6434844528839307,
      "grad_norm": 0.026360424235463142,
      "learning_rate": 0.00010071826157808217,
      "loss": 0.1239,
      "step": 1731
    },
    {
      "epoch": 1.6444338950866366,
      "grad_norm": 0.026264041662216187,
      "learning_rate": 0.00010060776128109812,
      "loss": 0.12,
      "step": 1732
    },
    {
      "epoch": 1.6453833372893425,
      "grad_norm": 0.02740940824151039,
      "learning_rate": 0.00010049726024198578,
      "loss": 0.1314,
      "step": 1733
    },
    {
      "epoch": 1.6463327794920484,
      "grad_norm": 0.04096614569425583,
      "learning_rate": 0.00010038675859567628,
      "loss": 0.1681,
      "step": 1734
    },
    {
      "epoch": 1.6472822216947542,
      "grad_norm": 0.04552573338150978,
      "learning_rate": 0.00010027625647710155,
      "loss": 0.16,
      "step": 1735
    },
    {
      "epoch": 1.6482316638974601,
      "grad_norm": 0.034032173454761505,
      "learning_rate": 0.00010016575402119413,
      "loss": 0.1326,
      "step": 1736
    },
    {
      "epoch": 1.649181106100166,
      "grad_norm": 0.03644052520394325,
      "learning_rate": 0.00010005525136288692,
      "loss": 0.146,
      "step": 1737
    },
    {
      "epoch": 1.650130548302872,
      "grad_norm": 0.04277161881327629,
      "learning_rate": 9.994474863711311e-05,
      "loss": 0.1719,
      "step": 1738
    },
    {
      "epoch": 1.651079990505578,
      "grad_norm": 0.027901561930775642,
      "learning_rate": 9.98342459788059e-05,
      "loss": 0.1238,
      "step": 1739
    },
    {
      "epoch": 1.6520294327082838,
      "grad_norm": 0.030957000330090523,
      "learning_rate": 9.972374352289848e-05,
      "loss": 0.1315,
      "step": 1740
    },
    {
      "epoch": 1.65297887491099,
      "grad_norm": 0.029299341142177582,
      "learning_rate": 9.961324140432376e-05,
      "loss": 0.1247,
      "step": 1741
    },
    {
      "epoch": 1.6539283171136958,
      "grad_norm": 0.0292718093842268,
      "learning_rate": 9.950273975801424e-05,
      "loss": 0.1296,
      "step": 1742
    },
    {
      "epoch": 1.6548777593164017,
      "grad_norm": 0.03113977424800396,
      "learning_rate": 9.93922387189019e-05,
      "loss": 0.1294,
      "step": 1743
    },
    {
      "epoch": 1.6558272015191076,
      "grad_norm": 0.05127384141087532,
      "learning_rate": 9.928173842191786e-05,
      "loss": 0.1623,
      "step": 1744
    },
    {
      "epoch": 1.6567766437218134,
      "grad_norm": 0.03058856725692749,
      "learning_rate": 9.917123900199245e-05,
      "loss": 0.1251,
      "step": 1745
    },
    {
      "epoch": 1.6577260859245193,
      "grad_norm": 0.043525367975234985,
      "learning_rate": 9.906074059405486e-05,
      "loss": 0.1584,
      "step": 1746
    },
    {
      "epoch": 1.6586755281272252,
      "grad_norm": 0.02724611759185791,
      "learning_rate": 9.895024333303305e-05,
      "loss": 0.1273,
      "step": 1747
    },
    {
      "epoch": 1.659624970329931,
      "grad_norm": 0.026182804256677628,
      "learning_rate": 9.883974735385361e-05,
      "loss": 0.1165,
      "step": 1748
    },
    {
      "epoch": 1.660574412532637,
      "grad_norm": 0.030495688319206238,
      "learning_rate": 9.87292527914416e-05,
      "loss": 0.1251,
      "step": 1749
    },
    {
      "epoch": 1.661523854735343,
      "grad_norm": 0.03013971447944641,
      "learning_rate": 9.861875978072017e-05,
      "loss": 0.1278,
      "step": 1750
    },
    {
      "epoch": 1.662473296938049,
      "grad_norm": 0.03999912738800049,
      "learning_rate": 9.850826845661082e-05,
      "loss": 0.1519,
      "step": 1751
    },
    {
      "epoch": 1.6634227391407548,
      "grad_norm": 0.029559755697846413,
      "learning_rate": 9.839777895403287e-05,
      "loss": 0.1293,
      "step": 1752
    },
    {
      "epoch": 1.6643721813434609,
      "grad_norm": 0.04213762283325195,
      "learning_rate": 9.828729140790337e-05,
      "loss": 0.1696,
      "step": 1753
    },
    {
      "epoch": 1.6653216235461668,
      "grad_norm": 0.029974251985549927,
      "learning_rate": 9.817680595313705e-05,
      "loss": 0.1182,
      "step": 1754
    },
    {
      "epoch": 1.6662710657488726,
      "grad_norm": 0.03835977986454964,
      "learning_rate": 9.806632272464607e-05,
      "loss": 0.1467,
      "step": 1755
    },
    {
      "epoch": 1.6672205079515785,
      "grad_norm": 0.031473349779844284,
      "learning_rate": 9.795584185733988e-05,
      "loss": 0.1305,
      "step": 1756
    },
    {
      "epoch": 1.6681699501542844,
      "grad_norm": 0.02675897814333439,
      "learning_rate": 9.784536348612504e-05,
      "loss": 0.1196,
      "step": 1757
    },
    {
      "epoch": 1.6691193923569903,
      "grad_norm": 0.0419435016810894,
      "learning_rate": 9.773488774590504e-05,
      "loss": 0.1558,
      "step": 1758
    },
    {
      "epoch": 1.6700688345596961,
      "grad_norm": 0.027311773970723152,
      "learning_rate": 9.762441477158016e-05,
      "loss": 0.1236,
      "step": 1759
    },
    {
      "epoch": 1.671018276762402,
      "grad_norm": 0.05605548992753029,
      "learning_rate": 9.751394469804734e-05,
      "loss": 0.1787,
      "step": 1760
    },
    {
      "epoch": 1.6719677189651079,
      "grad_norm": 0.025175364688038826,
      "learning_rate": 9.740347766019997e-05,
      "loss": 0.1275,
      "step": 1761
    },
    {
      "epoch": 1.6729171611678137,
      "grad_norm": 0.04951293021440506,
      "learning_rate": 9.729301379292773e-05,
      "loss": 0.2023,
      "step": 1762
    },
    {
      "epoch": 1.6738666033705198,
      "grad_norm": 0.02842806465923786,
      "learning_rate": 9.718255323111635e-05,
      "loss": 0.1238,
      "step": 1763
    },
    {
      "epoch": 1.6748160455732257,
      "grad_norm": 0.029241712763905525,
      "learning_rate": 9.707209610964765e-05,
      "loss": 0.121,
      "step": 1764
    },
    {
      "epoch": 1.6757654877759316,
      "grad_norm": 0.03337705507874489,
      "learning_rate": 9.696164256339917e-05,
      "loss": 0.1354,
      "step": 1765
    },
    {
      "epoch": 1.6767149299786377,
      "grad_norm": 0.030520187690854073,
      "learning_rate": 9.685119272724411e-05,
      "loss": 0.1256,
      "step": 1766
    },
    {
      "epoch": 1.6776643721813436,
      "grad_norm": 0.0318867489695549,
      "learning_rate": 9.674074673605115e-05,
      "loss": 0.1286,
      "step": 1767
    },
    {
      "epoch": 1.6786138143840494,
      "grad_norm": 0.02671106904745102,
      "learning_rate": 9.663030472468424e-05,
      "loss": 0.1297,
      "step": 1768
    },
    {
      "epoch": 1.6795632565867553,
      "grad_norm": 0.041651055216789246,
      "learning_rate": 9.651986682800249e-05,
      "loss": 0.1618,
      "step": 1769
    },
    {
      "epoch": 1.6805126987894612,
      "grad_norm": 0.03195889666676521,
      "learning_rate": 9.640943318085999e-05,
      "loss": 0.1279,
      "step": 1770
    },
    {
      "epoch": 1.681462140992167,
      "grad_norm": 0.04086165875196457,
      "learning_rate": 9.629900391810563e-05,
      "loss": 0.1678,
      "step": 1771
    },
    {
      "epoch": 1.682411583194873,
      "grad_norm": 0.025256391614675522,
      "learning_rate": 9.618857917458298e-05,
      "loss": 0.1197,
      "step": 1772
    },
    {
      "epoch": 1.6833610253975788,
      "grad_norm": 0.03352576494216919,
      "learning_rate": 9.607815908513005e-05,
      "loss": 0.1345,
      "step": 1773
    },
    {
      "epoch": 1.6843104676002847,
      "grad_norm": 0.06082432344555855,
      "learning_rate": 9.596774378457916e-05,
      "loss": 0.1639,
      "step": 1774
    },
    {
      "epoch": 1.6852599098029908,
      "grad_norm": 0.029191186651587486,
      "learning_rate": 9.585733340775677e-05,
      "loss": 0.1305,
      "step": 1775
    },
    {
      "epoch": 1.6862093520056967,
      "grad_norm": 0.029343895614147186,
      "learning_rate": 9.574692808948348e-05,
      "loss": 0.1265,
      "step": 1776
    },
    {
      "epoch": 1.6871587942084025,
      "grad_norm": 0.02953837811946869,
      "learning_rate": 9.56365279645735e-05,
      "loss": 0.1281,
      "step": 1777
    },
    {
      "epoch": 1.6881082364111086,
      "grad_norm": 0.028798846527934074,
      "learning_rate": 9.552613316783483e-05,
      "loss": 0.1257,
      "step": 1778
    },
    {
      "epoch": 1.6890576786138145,
      "grad_norm": 0.02905990555882454,
      "learning_rate": 9.54157438340689e-05,
      "loss": 0.1308,
      "step": 1779
    },
    {
      "epoch": 1.6900071208165204,
      "grad_norm": 0.02965502068400383,
      "learning_rate": 9.530536009807053e-05,
      "loss": 0.1296,
      "step": 1780
    },
    {
      "epoch": 1.6909565630192263,
      "grad_norm": 0.029197504743933678,
      "learning_rate": 9.519498209462766e-05,
      "loss": 0.1204,
      "step": 1781
    },
    {
      "epoch": 1.6919060052219321,
      "grad_norm": 0.04930657148361206,
      "learning_rate": 9.508460995852122e-05,
      "loss": 0.1522,
      "step": 1782
    },
    {
      "epoch": 1.692855447424638,
      "grad_norm": 0.027312377467751503,
      "learning_rate": 9.497424382452501e-05,
      "loss": 0.1203,
      "step": 1783
    },
    {
      "epoch": 1.6938048896273439,
      "grad_norm": 0.03260885551571846,
      "learning_rate": 9.486388382740548e-05,
      "loss": 0.1334,
      "step": 1784
    },
    {
      "epoch": 1.6947543318300498,
      "grad_norm": 0.052055153995752335,
      "learning_rate": 9.475353010192162e-05,
      "loss": 0.2113,
      "step": 1785
    },
    {
      "epoch": 1.6957037740327556,
      "grad_norm": 0.038476429879665375,
      "learning_rate": 9.464318278282472e-05,
      "loss": 0.1669,
      "step": 1786
    },
    {
      "epoch": 1.6966532162354615,
      "grad_norm": 0.03111964277923107,
      "learning_rate": 9.453284200485825e-05,
      "loss": 0.1255,
      "step": 1787
    },
    {
      "epoch": 1.6976026584381676,
      "grad_norm": 0.02803085930645466,
      "learning_rate": 9.44225079027577e-05,
      "loss": 0.1297,
      "step": 1788
    },
    {
      "epoch": 1.6985521006408735,
      "grad_norm": 0.029160544276237488,
      "learning_rate": 9.431218061125044e-05,
      "loss": 0.1263,
      "step": 1789
    },
    {
      "epoch": 1.6995015428435793,
      "grad_norm": 0.03121958300471306,
      "learning_rate": 9.420186026505548e-05,
      "loss": 0.1277,
      "step": 1790
    },
    {
      "epoch": 1.7004509850462854,
      "grad_norm": 0.027002684772014618,
      "learning_rate": 9.40915469988834e-05,
      "loss": 0.1236,
      "step": 1791
    },
    {
      "epoch": 1.7014004272489913,
      "grad_norm": 0.027172109112143517,
      "learning_rate": 9.398124094743604e-05,
      "loss": 0.1241,
      "step": 1792
    },
    {
      "epoch": 1.7023498694516972,
      "grad_norm": 0.02658463642001152,
      "learning_rate": 9.387094224540653e-05,
      "loss": 0.1268,
      "step": 1793
    },
    {
      "epoch": 1.703299311654403,
      "grad_norm": 0.028862452134490013,
      "learning_rate": 9.376065102747898e-05,
      "loss": 0.1317,
      "step": 1794
    },
    {
      "epoch": 1.704248753857109,
      "grad_norm": 0.025097506120800972,
      "learning_rate": 9.365036742832838e-05,
      "loss": 0.1164,
      "step": 1795
    },
    {
      "epoch": 1.7051981960598148,
      "grad_norm": 0.04557095095515251,
      "learning_rate": 9.354009158262038e-05,
      "loss": 0.1522,
      "step": 1796
    },
    {
      "epoch": 1.7061476382625207,
      "grad_norm": 0.03487172722816467,
      "learning_rate": 9.342982362501123e-05,
      "loss": 0.1398,
      "step": 1797
    },
    {
      "epoch": 1.7070970804652266,
      "grad_norm": 0.02509194053709507,
      "learning_rate": 9.331956369014746e-05,
      "loss": 0.1166,
      "step": 1798
    },
    {
      "epoch": 1.7080465226679324,
      "grad_norm": 0.030363403260707855,
      "learning_rate": 9.320931191266587e-05,
      "loss": 0.1191,
      "step": 1799
    },
    {
      "epoch": 1.7089959648706385,
      "grad_norm": 0.08008571714162827,
      "learning_rate": 9.309906842719323e-05,
      "loss": 0.1494,
      "step": 1800
    },
    {
      "epoch": 1.7099454070733444,
      "grad_norm": 0.027857676148414612,
      "learning_rate": 9.298883336834633e-05,
      "loss": 0.1307,
      "step": 1801
    },
    {
      "epoch": 1.7108948492760503,
      "grad_norm": 0.02744341269135475,
      "learning_rate": 9.28786068707315e-05,
      "loss": 0.1217,
      "step": 1802
    },
    {
      "epoch": 1.7118442914787564,
      "grad_norm": 0.0324774868786335,
      "learning_rate": 9.276838906894472e-05,
      "loss": 0.1311,
      "step": 1803
    },
    {
      "epoch": 1.7127937336814623,
      "grad_norm": 0.028761887922883034,
      "learning_rate": 9.265818009757132e-05,
      "loss": 0.1275,
      "step": 1804
    },
    {
      "epoch": 1.7137431758841681,
      "grad_norm": 0.02950756810605526,
      "learning_rate": 9.254798009118584e-05,
      "loss": 0.1262,
      "step": 1805
    },
    {
      "epoch": 1.714692618086874,
      "grad_norm": 0.027881214395165443,
      "learning_rate": 9.243778918435187e-05,
      "loss": 0.1266,
      "step": 1806
    },
    {
      "epoch": 1.7156420602895799,
      "grad_norm": 0.05155957117676735,
      "learning_rate": 9.232760751162193e-05,
      "loss": 0.1936,
      "step": 1807
    },
    {
      "epoch": 1.7165915024922858,
      "grad_norm": 0.029041916131973267,
      "learning_rate": 9.221743520753719e-05,
      "loss": 0.1204,
      "step": 1808
    },
    {
      "epoch": 1.7175409446949916,
      "grad_norm": 0.030144108459353447,
      "learning_rate": 9.210727240662747e-05,
      "loss": 0.1285,
      "step": 1809
    },
    {
      "epoch": 1.7184903868976975,
      "grad_norm": 0.028103960677981377,
      "learning_rate": 9.199711924341093e-05,
      "loss": 0.125,
      "step": 1810
    },
    {
      "epoch": 1.7194398291004034,
      "grad_norm": 0.03844073414802551,
      "learning_rate": 9.188697585239394e-05,
      "loss": 0.1525,
      "step": 1811
    },
    {
      "epoch": 1.7203892713031093,
      "grad_norm": 0.04454744979739189,
      "learning_rate": 9.177684236807099e-05,
      "loss": 0.1616,
      "step": 1812
    },
    {
      "epoch": 1.7213387135058154,
      "grad_norm": 0.027989163994789124,
      "learning_rate": 9.166671892492446e-05,
      "loss": 0.1265,
      "step": 1813
    },
    {
      "epoch": 1.7222881557085212,
      "grad_norm": 0.04422546178102493,
      "learning_rate": 9.155660565742444e-05,
      "loss": 0.159,
      "step": 1814
    },
    {
      "epoch": 1.723237597911227,
      "grad_norm": 0.027917252853512764,
      "learning_rate": 9.144650270002866e-05,
      "loss": 0.1229,
      "step": 1815
    },
    {
      "epoch": 1.7241870401139332,
      "grad_norm": 0.05252804979681969,
      "learning_rate": 9.133641018718217e-05,
      "loss": 0.1955,
      "step": 1816
    },
    {
      "epoch": 1.725136482316639,
      "grad_norm": 0.029228439554572105,
      "learning_rate": 9.122632825331733e-05,
      "loss": 0.1197,
      "step": 1817
    },
    {
      "epoch": 1.726085924519345,
      "grad_norm": 0.02810599096119404,
      "learning_rate": 9.111625703285356e-05,
      "loss": 0.1284,
      "step": 1818
    },
    {
      "epoch": 1.7270353667220508,
      "grad_norm": 0.02618074230849743,
      "learning_rate": 9.10061966601972e-05,
      "loss": 0.1239,
      "step": 1819
    },
    {
      "epoch": 1.7279848089247567,
      "grad_norm": 0.026649268344044685,
      "learning_rate": 9.089614726974137e-05,
      "loss": 0.1218,
      "step": 1820
    },
    {
      "epoch": 1.7289342511274626,
      "grad_norm": 0.02857782505452633,
      "learning_rate": 9.078610899586575e-05,
      "loss": 0.133,
      "step": 1821
    },
    {
      "epoch": 1.7298836933301684,
      "grad_norm": 0.026309454813599586,
      "learning_rate": 9.067608197293642e-05,
      "loss": 0.1175,
      "step": 1822
    },
    {
      "epoch": 1.7308331355328743,
      "grad_norm": 0.02791914902627468,
      "learning_rate": 9.056606633530578e-05,
      "loss": 0.12,
      "step": 1823
    },
    {
      "epoch": 1.7317825777355802,
      "grad_norm": 0.030874181538820267,
      "learning_rate": 9.045606221731229e-05,
      "loss": 0.1307,
      "step": 1824
    },
    {
      "epoch": 1.7327320199382863,
      "grad_norm": 0.030806539580225945,
      "learning_rate": 9.034606975328033e-05,
      "loss": 0.1188,
      "step": 1825
    },
    {
      "epoch": 1.7336814621409922,
      "grad_norm": 0.028665419667959213,
      "learning_rate": 9.023608907752015e-05,
      "loss": 0.1311,
      "step": 1826
    },
    {
      "epoch": 1.734630904343698,
      "grad_norm": 0.029301505535840988,
      "learning_rate": 9.012612032432747e-05,
      "loss": 0.1325,
      "step": 1827
    },
    {
      "epoch": 1.7355803465464041,
      "grad_norm": 0.0394410640001297,
      "learning_rate": 9.001616362798353e-05,
      "loss": 0.164,
      "step": 1828
    },
    {
      "epoch": 1.73652978874911,
      "grad_norm": 0.02754109725356102,
      "learning_rate": 8.990621912275484e-05,
      "loss": 0.12,
      "step": 1829
    },
    {
      "epoch": 1.737479230951816,
      "grad_norm": 0.02719545178115368,
      "learning_rate": 8.9796286942893e-05,
      "loss": 0.1203,
      "step": 1830
    },
    {
      "epoch": 1.7384286731545218,
      "grad_norm": 0.02480783686041832,
      "learning_rate": 8.968636722263455e-05,
      "loss": 0.1225,
      "step": 1831
    },
    {
      "epoch": 1.7393781153572276,
      "grad_norm": 0.025418803095817566,
      "learning_rate": 8.957646009620085e-05,
      "loss": 0.125,
      "step": 1832
    },
    {
      "epoch": 1.7403275575599335,
      "grad_norm": 0.024165470153093338,
      "learning_rate": 8.94665656977979e-05,
      "loss": 0.125,
      "step": 1833
    },
    {
      "epoch": 1.7412769997626394,
      "grad_norm": 0.03011813573539257,
      "learning_rate": 8.935668416161612e-05,
      "loss": 0.1337,
      "step": 1834
    },
    {
      "epoch": 1.7422264419653453,
      "grad_norm": 0.038413502275943756,
      "learning_rate": 8.92468156218302e-05,
      "loss": 0.1715,
      "step": 1835
    },
    {
      "epoch": 1.7431758841680511,
      "grad_norm": 0.023849591612815857,
      "learning_rate": 8.9136960212599e-05,
      "loss": 0.1148,
      "step": 1836
    },
    {
      "epoch": 1.7441253263707572,
      "grad_norm": 0.027159664779901505,
      "learning_rate": 8.902711806806536e-05,
      "loss": 0.1255,
      "step": 1837
    },
    {
      "epoch": 1.745074768573463,
      "grad_norm": 0.030395383015275,
      "learning_rate": 8.89172893223559e-05,
      "loss": 0.1267,
      "step": 1838
    },
    {
      "epoch": 1.746024210776169,
      "grad_norm": 0.025772254914045334,
      "learning_rate": 8.880747410958085e-05,
      "loss": 0.1212,
      "step": 1839
    },
    {
      "epoch": 1.7469736529788749,
      "grad_norm": 0.04073212668299675,
      "learning_rate": 8.8697672563834e-05,
      "loss": 0.1229,
      "step": 1840
    },
    {
      "epoch": 1.747923095181581,
      "grad_norm": 0.048602957278490067,
      "learning_rate": 8.858788481919235e-05,
      "loss": 0.1587,
      "step": 1841
    },
    {
      "epoch": 1.7488725373842868,
      "grad_norm": 0.030672159045934677,
      "learning_rate": 8.84781110097161e-05,
      "loss": 0.1276,
      "step": 1842
    },
    {
      "epoch": 1.7498219795869927,
      "grad_norm": 0.029867272824048996,
      "learning_rate": 8.836835126944843e-05,
      "loss": 0.1316,
      "step": 1843
    },
    {
      "epoch": 1.7507714217896986,
      "grad_norm": 0.03122364915907383,
      "learning_rate": 8.825860573241535e-05,
      "loss": 0.1276,
      "step": 1844
    },
    {
      "epoch": 1.7517208639924045,
      "grad_norm": 0.03530842810869217,
      "learning_rate": 8.814887453262555e-05,
      "loss": 0.1272,
      "step": 1845
    },
    {
      "epoch": 1.7526703061951103,
      "grad_norm": 0.028104711323976517,
      "learning_rate": 8.803915780407009e-05,
      "loss": 0.1277,
      "step": 1846
    },
    {
      "epoch": 1.7536197483978162,
      "grad_norm": 0.02434263750910759,
      "learning_rate": 8.792945568072252e-05,
      "loss": 0.1136,
      "step": 1847
    },
    {
      "epoch": 1.754569190600522,
      "grad_norm": 0.027843188494443893,
      "learning_rate": 8.781976829653846e-05,
      "loss": 0.1199,
      "step": 1848
    },
    {
      "epoch": 1.755518632803228,
      "grad_norm": 0.03688850998878479,
      "learning_rate": 8.771009578545553e-05,
      "loss": 0.1345,
      "step": 1849
    },
    {
      "epoch": 1.756468075005934,
      "grad_norm": 0.027186516672372818,
      "learning_rate": 8.760043828139325e-05,
      "loss": 0.1149,
      "step": 1850
    },
    {
      "epoch": 1.75741751720864,
      "grad_norm": 0.04049715772271156,
      "learning_rate": 8.749079591825278e-05,
      "loss": 0.1585,
      "step": 1851
    },
    {
      "epoch": 1.7583669594113458,
      "grad_norm": 0.02956775203347206,
      "learning_rate": 8.738116882991679e-05,
      "loss": 0.1303,
      "step": 1852
    },
    {
      "epoch": 1.759316401614052,
      "grad_norm": 0.026160147041082382,
      "learning_rate": 8.72715571502493e-05,
      "loss": 0.1237,
      "step": 1853
    },
    {
      "epoch": 1.7602658438167578,
      "grad_norm": 0.023719090968370438,
      "learning_rate": 8.71619610130955e-05,
      "loss": 0.1172,
      "step": 1854
    },
    {
      "epoch": 1.7612152860194636,
      "grad_norm": 0.024884294718503952,
      "learning_rate": 8.705238055228161e-05,
      "loss": 0.123,
      "step": 1855
    },
    {
      "epoch": 1.7621647282221695,
      "grad_norm": 0.028241394087672234,
      "learning_rate": 8.694281590161474e-05,
      "loss": 0.129,
      "step": 1856
    },
    {
      "epoch": 1.7631141704248754,
      "grad_norm": 0.028791090473532677,
      "learning_rate": 8.683326719488263e-05,
      "loss": 0.121,
      "step": 1857
    },
    {
      "epoch": 1.7640636126275813,
      "grad_norm": 0.046369921416044235,
      "learning_rate": 8.672373456585365e-05,
      "loss": 0.1666,
      "step": 1858
    },
    {
      "epoch": 1.7650130548302871,
      "grad_norm": 0.025271739810705185,
      "learning_rate": 8.661421814827641e-05,
      "loss": 0.1196,
      "step": 1859
    },
    {
      "epoch": 1.765962497032993,
      "grad_norm": 0.02569795772433281,
      "learning_rate": 8.650471807587983e-05,
      "loss": 0.1235,
      "step": 1860
    },
    {
      "epoch": 1.766911939235699,
      "grad_norm": 0.03638843819499016,
      "learning_rate": 8.639523448237282e-05,
      "loss": 0.1523,
      "step": 1861
    },
    {
      "epoch": 1.767861381438405,
      "grad_norm": 0.03260574862360954,
      "learning_rate": 8.628576750144419e-05,
      "loss": 0.1328,
      "step": 1862
    },
    {
      "epoch": 1.7688108236411109,
      "grad_norm": 0.02770201303064823,
      "learning_rate": 8.617631726676243e-05,
      "loss": 0.1256,
      "step": 1863
    },
    {
      "epoch": 1.7697602658438167,
      "grad_norm": 0.02869422361254692,
      "learning_rate": 8.606688391197564e-05,
      "loss": 0.1261,
      "step": 1864
    },
    {
      "epoch": 1.7707097080465226,
      "grad_norm": 0.02792002633213997,
      "learning_rate": 8.595746757071125e-05,
      "loss": 0.1277,
      "step": 1865
    },
    {
      "epoch": 1.7716591502492287,
      "grad_norm": 0.025662843137979507,
      "learning_rate": 8.584806837657594e-05,
      "loss": 0.1163,
      "step": 1866
    },
    {
      "epoch": 1.7726085924519346,
      "grad_norm": 0.027771448716521263,
      "learning_rate": 8.573868646315546e-05,
      "loss": 0.1273,
      "step": 1867
    },
    {
      "epoch": 1.7735580346546405,
      "grad_norm": 0.026355689391493797,
      "learning_rate": 8.562932196401444e-05,
      "loss": 0.1241,
      "step": 1868
    },
    {
      "epoch": 1.7745074768573463,
      "grad_norm": 0.028244782239198685,
      "learning_rate": 8.551997501269629e-05,
      "loss": 0.1319,
      "step": 1869
    },
    {
      "epoch": 1.7754569190600522,
      "grad_norm": 0.027661755681037903,
      "learning_rate": 8.541064574272292e-05,
      "loss": 0.134,
      "step": 1870
    },
    {
      "epoch": 1.776406361262758,
      "grad_norm": 0.026287924498319626,
      "learning_rate": 8.530133428759468e-05,
      "loss": 0.1215,
      "step": 1871
    },
    {
      "epoch": 1.777355803465464,
      "grad_norm": 0.040449049323797226,
      "learning_rate": 8.519204078079021e-05,
      "loss": 0.179,
      "step": 1872
    },
    {
      "epoch": 1.7783052456681698,
      "grad_norm": 0.022792836651206017,
      "learning_rate": 8.508276535576619e-05,
      "loss": 0.1208,
      "step": 1873
    },
    {
      "epoch": 1.7792546878708757,
      "grad_norm": 0.042618922889232635,
      "learning_rate": 8.497350814595721e-05,
      "loss": 0.1666,
      "step": 1874
    },
    {
      "epoch": 1.7802041300735818,
      "grad_norm": 0.0521213673055172,
      "learning_rate": 8.486426928477561e-05,
      "loss": 0.1858,
      "step": 1875
    },
    {
      "epoch": 1.7811535722762877,
      "grad_norm": 0.04682205617427826,
      "learning_rate": 8.475504890561142e-05,
      "loss": 0.2037,
      "step": 1876
    },
    {
      "epoch": 1.7821030144789936,
      "grad_norm": 0.041265182197093964,
      "learning_rate": 8.464584714183204e-05,
      "loss": 0.1775,
      "step": 1877
    },
    {
      "epoch": 1.7830524566816996,
      "grad_norm": 0.027602121233940125,
      "learning_rate": 8.453666412678206e-05,
      "loss": 0.1186,
      "step": 1878
    },
    {
      "epoch": 1.7840018988844055,
      "grad_norm": 0.025885846465826035,
      "learning_rate": 8.442749999378327e-05,
      "loss": 0.1275,
      "step": 1879
    },
    {
      "epoch": 1.7849513410871114,
      "grad_norm": 0.05111463740468025,
      "learning_rate": 8.43183548761344e-05,
      "loss": 0.1535,
      "step": 1880
    },
    {
      "epoch": 1.7859007832898173,
      "grad_norm": 0.026447484269738197,
      "learning_rate": 8.420922890711094e-05,
      "loss": 0.1244,
      "step": 1881
    },
    {
      "epoch": 1.7868502254925231,
      "grad_norm": 0.046114444732666016,
      "learning_rate": 8.410012221996502e-05,
      "loss": 0.1549,
      "step": 1882
    },
    {
      "epoch": 1.787799667695229,
      "grad_norm": 0.027883663773536682,
      "learning_rate": 8.399103494792514e-05,
      "loss": 0.1186,
      "step": 1883
    },
    {
      "epoch": 1.788749109897935,
      "grad_norm": 0.02667239122092724,
      "learning_rate": 8.388196722419621e-05,
      "loss": 0.1367,
      "step": 1884
    },
    {
      "epoch": 1.7896985521006408,
      "grad_norm": 0.028317047283053398,
      "learning_rate": 8.377291918195922e-05,
      "loss": 0.1293,
      "step": 1885
    },
    {
      "epoch": 1.7906479943033466,
      "grad_norm": 0.044224657118320465,
      "learning_rate": 8.36638909543711e-05,
      "loss": 0.1722,
      "step": 1886
    },
    {
      "epoch": 1.7915974365060527,
      "grad_norm": 0.029220551252365112,
      "learning_rate": 8.35548826745646e-05,
      "loss": 0.1315,
      "step": 1887
    },
    {
      "epoch": 1.7925468787087586,
      "grad_norm": 0.028357302770018578,
      "learning_rate": 8.344589447564818e-05,
      "loss": 0.1271,
      "step": 1888
    },
    {
      "epoch": 1.7934963209114645,
      "grad_norm": 0.027882913127541542,
      "learning_rate": 8.333692649070568e-05,
      "loss": 0.1311,
      "step": 1889
    },
    {
      "epoch": 1.7944457631141706,
      "grad_norm": 0.029344897717237473,
      "learning_rate": 8.322797885279627e-05,
      "loss": 0.1231,
      "step": 1890
    },
    {
      "epoch": 1.7953952053168765,
      "grad_norm": 0.039409589022397995,
      "learning_rate": 8.311905169495435e-05,
      "loss": 0.1651,
      "step": 1891
    },
    {
      "epoch": 1.7963446475195823,
      "grad_norm": 0.025551313534379005,
      "learning_rate": 8.301014515018925e-05,
      "loss": 0.1162,
      "step": 1892
    },
    {
      "epoch": 1.7972940897222882,
      "grad_norm": 0.027775781229138374,
      "learning_rate": 8.290125935148516e-05,
      "loss": 0.1254,
      "step": 1893
    },
    {
      "epoch": 1.798243531924994,
      "grad_norm": 0.025555282831192017,
      "learning_rate": 8.279239443180088e-05,
      "loss": 0.1173,
      "step": 1894
    },
    {
      "epoch": 1.7991929741277,
      "grad_norm": 0.027120131999254227,
      "learning_rate": 8.268355052406978e-05,
      "loss": 0.123,
      "step": 1895
    },
    {
      "epoch": 1.8001424163304058,
      "grad_norm": 0.027624819427728653,
      "learning_rate": 8.257472776119957e-05,
      "loss": 0.1313,
      "step": 1896
    },
    {
      "epoch": 1.8010918585331117,
      "grad_norm": 0.04341182857751846,
      "learning_rate": 8.246592627607208e-05,
      "loss": 0.1695,
      "step": 1897
    },
    {
      "epoch": 1.8020413007358176,
      "grad_norm": 0.049969110637903214,
      "learning_rate": 8.235714620154323e-05,
      "loss": 0.1609,
      "step": 1898
    },
    {
      "epoch": 1.8029907429385235,
      "grad_norm": 0.033960528671741486,
      "learning_rate": 8.224838767044275e-05,
      "loss": 0.1299,
      "step": 1899
    },
    {
      "epoch": 1.8039401851412296,
      "grad_norm": 0.045656926929950714,
      "learning_rate": 8.213965081557402e-05,
      "loss": 0.1613,
      "step": 1900
    },
    {
      "epoch": 1.8048896273439354,
      "grad_norm": 0.028559250757098198,
      "learning_rate": 8.203093576971414e-05,
      "loss": 0.1222,
      "step": 1901
    },
    {
      "epoch": 1.8058390695466413,
      "grad_norm": 0.026110464707016945,
      "learning_rate": 8.192224266561336e-05,
      "loss": 0.1204,
      "step": 1902
    },
    {
      "epoch": 1.8067885117493474,
      "grad_norm": 0.023995952680706978,
      "learning_rate": 8.181357163599522e-05,
      "loss": 0.1164,
      "step": 1903
    },
    {
      "epoch": 1.8077379539520533,
      "grad_norm": 0.054921507835388184,
      "learning_rate": 8.170492281355635e-05,
      "loss": 0.1535,
      "step": 1904
    },
    {
      "epoch": 1.8086873961547592,
      "grad_norm": 0.03065885417163372,
      "learning_rate": 8.159629633096619e-05,
      "loss": 0.1325,
      "step": 1905
    },
    {
      "epoch": 1.809636838357465,
      "grad_norm": 0.02722746506333351,
      "learning_rate": 8.148769232086698e-05,
      "loss": 0.1201,
      "step": 1906
    },
    {
      "epoch": 1.810586280560171,
      "grad_norm": 0.026619885116815567,
      "learning_rate": 8.13791109158734e-05,
      "loss": 0.125,
      "step": 1907
    },
    {
      "epoch": 1.8115357227628768,
      "grad_norm": 0.02867223508656025,
      "learning_rate": 8.127055224857266e-05,
      "loss": 0.1279,
      "step": 1908
    },
    {
      "epoch": 1.8124851649655827,
      "grad_norm": 0.029073316603899002,
      "learning_rate": 8.116201645152412e-05,
      "loss": 0.1195,
      "step": 1909
    },
    {
      "epoch": 1.8134346071682885,
      "grad_norm": 0.04955434426665306,
      "learning_rate": 8.105350365725926e-05,
      "loss": 0.1657,
      "step": 1910
    },
    {
      "epoch": 1.8143840493709944,
      "grad_norm": 0.03038748912513256,
      "learning_rate": 8.094501399828143e-05,
      "loss": 0.1244,
      "step": 1911
    },
    {
      "epoch": 1.8153334915737005,
      "grad_norm": 0.08984460681676865,
      "learning_rate": 8.08365476070658e-05,
      "loss": 0.1602,
      "step": 1912
    },
    {
      "epoch": 1.8162829337764064,
      "grad_norm": 0.028402511030435562,
      "learning_rate": 8.0728104616059e-05,
      "loss": 0.1162,
      "step": 1913
    },
    {
      "epoch": 1.8172323759791122,
      "grad_norm": 0.03234838321805,
      "learning_rate": 8.061968515767922e-05,
      "loss": 0.1271,
      "step": 1914
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.031242968514561653,
      "learning_rate": 8.051128936431584e-05,
      "loss": 0.1266,
      "step": 1915
    },
    {
      "epoch": 1.8191312603845242,
      "grad_norm": 0.030700111761689186,
      "learning_rate": 8.040291736832937e-05,
      "loss": 0.1294,
      "step": 1916
    },
    {
      "epoch": 1.82008070258723,
      "grad_norm": 0.031359221786260605,
      "learning_rate": 8.029456930205128e-05,
      "loss": 0.1316,
      "step": 1917
    },
    {
      "epoch": 1.821030144789936,
      "grad_norm": 0.03048795275390148,
      "learning_rate": 8.018624529778375e-05,
      "loss": 0.127,
      "step": 1918
    },
    {
      "epoch": 1.8219795869926418,
      "grad_norm": 0.03642559424042702,
      "learning_rate": 8.007794548779964e-05,
      "loss": 0.1577,
      "step": 1919
    },
    {
      "epoch": 1.8229290291953477,
      "grad_norm": 0.029665078967809677,
      "learning_rate": 7.996967000434224e-05,
      "loss": 0.1266,
      "step": 1920
    },
    {
      "epoch": 1.8238784713980536,
      "grad_norm": 0.027418775483965874,
      "learning_rate": 7.986141897962518e-05,
      "loss": 0.1319,
      "step": 1921
    },
    {
      "epoch": 1.8248279136007595,
      "grad_norm": 0.03040868602693081,
      "learning_rate": 7.975319254583216e-05,
      "loss": 0.1263,
      "step": 1922
    },
    {
      "epoch": 1.8257773558034653,
      "grad_norm": 0.030304348096251488,
      "learning_rate": 7.96449908351169e-05,
      "loss": 0.1357,
      "step": 1923
    },
    {
      "epoch": 1.8267267980061712,
      "grad_norm": 0.029055261984467506,
      "learning_rate": 7.953681397960287e-05,
      "loss": 0.1293,
      "step": 1924
    },
    {
      "epoch": 1.8276762402088773,
      "grad_norm": 0.030226033180952072,
      "learning_rate": 7.942866211138324e-05,
      "loss": 0.1335,
      "step": 1925
    },
    {
      "epoch": 1.8286256824115832,
      "grad_norm": 0.02857894077897072,
      "learning_rate": 7.93205353625207e-05,
      "loss": 0.1307,
      "step": 1926
    },
    {
      "epoch": 1.829575124614289,
      "grad_norm": 0.031932733952999115,
      "learning_rate": 7.921243386504723e-05,
      "loss": 0.1345,
      "step": 1927
    },
    {
      "epoch": 1.8305245668169952,
      "grad_norm": 0.030729882419109344,
      "learning_rate": 7.910435775096394e-05,
      "loss": 0.1256,
      "step": 1928
    },
    {
      "epoch": 1.831474009019701,
      "grad_norm": 0.056101903319358826,
      "learning_rate": 7.899630715224098e-05,
      "loss": 0.1858,
      "step": 1929
    },
    {
      "epoch": 1.832423451222407,
      "grad_norm": 0.02785342186689377,
      "learning_rate": 7.888828220081738e-05,
      "loss": 0.1244,
      "step": 1930
    },
    {
      "epoch": 1.8333728934251128,
      "grad_norm": 0.031731851398944855,
      "learning_rate": 7.878028302860076e-05,
      "loss": 0.1339,
      "step": 1931
    },
    {
      "epoch": 1.8343223356278187,
      "grad_norm": 0.02918057143688202,
      "learning_rate": 7.867230976746733e-05,
      "loss": 0.1309,
      "step": 1932
    },
    {
      "epoch": 1.8352717778305245,
      "grad_norm": 0.029841450974345207,
      "learning_rate": 7.856436254926165e-05,
      "loss": 0.1244,
      "step": 1933
    },
    {
      "epoch": 1.8362212200332304,
      "grad_norm": 0.02823001891374588,
      "learning_rate": 7.845644150579649e-05,
      "loss": 0.1253,
      "step": 1934
    },
    {
      "epoch": 1.8371706622359363,
      "grad_norm": 0.027422424405813217,
      "learning_rate": 7.834854676885262e-05,
      "loss": 0.1141,
      "step": 1935
    },
    {
      "epoch": 1.8381201044386422,
      "grad_norm": 0.02671034075319767,
      "learning_rate": 7.824067847017876e-05,
      "loss": 0.1204,
      "step": 1936
    },
    {
      "epoch": 1.8390695466413483,
      "grad_norm": 0.02999771386384964,
      "learning_rate": 7.813283674149123e-05,
      "loss": 0.1258,
      "step": 1937
    },
    {
      "epoch": 1.8400189888440541,
      "grad_norm": 0.026322634890675545,
      "learning_rate": 7.8025021714474e-05,
      "loss": 0.117,
      "step": 1938
    },
    {
      "epoch": 1.84096843104676,
      "grad_norm": 0.02665727399289608,
      "learning_rate": 7.791723352077842e-05,
      "loss": 0.1168,
      "step": 1939
    },
    {
      "epoch": 1.841917873249466,
      "grad_norm": 0.043415650725364685,
      "learning_rate": 7.780947229202305e-05,
      "loss": 0.1666,
      "step": 1940
    },
    {
      "epoch": 1.842867315452172,
      "grad_norm": 0.03161248564720154,
      "learning_rate": 7.770173815979356e-05,
      "loss": 0.1307,
      "step": 1941
    },
    {
      "epoch": 1.8438167576548778,
      "grad_norm": 0.028511585667729378,
      "learning_rate": 7.759403125564246e-05,
      "loss": 0.1329,
      "step": 1942
    },
    {
      "epoch": 1.8447661998575837,
      "grad_norm": 0.03340164199471474,
      "learning_rate": 7.74863517110891e-05,
      "loss": 0.1233,
      "step": 1943
    },
    {
      "epoch": 1.8457156420602896,
      "grad_norm": 0.027353493496775627,
      "learning_rate": 7.737869965761937e-05,
      "loss": 0.1227,
      "step": 1944
    },
    {
      "epoch": 1.8466650842629955,
      "grad_norm": 0.04435974359512329,
      "learning_rate": 7.72710752266856e-05,
      "loss": 0.1586,
      "step": 1945
    },
    {
      "epoch": 1.8476145264657013,
      "grad_norm": 0.03443425893783569,
      "learning_rate": 7.716347854970642e-05,
      "loss": 0.1285,
      "step": 1946
    },
    {
      "epoch": 1.8485639686684072,
      "grad_norm": 0.02941983938217163,
      "learning_rate": 7.705590975806652e-05,
      "loss": 0.1227,
      "step": 1947
    },
    {
      "epoch": 1.849513410871113,
      "grad_norm": 0.031541094183921814,
      "learning_rate": 7.694836898311654e-05,
      "loss": 0.1307,
      "step": 1948
    },
    {
      "epoch": 1.850462853073819,
      "grad_norm": 0.030199352651834488,
      "learning_rate": 7.684085635617297e-05,
      "loss": 0.126,
      "step": 1949
    },
    {
      "epoch": 1.851412295276525,
      "grad_norm": 0.024474412202835083,
      "learning_rate": 7.673337200851787e-05,
      "loss": 0.1222,
      "step": 1950
    },
    {
      "epoch": 1.852361737479231,
      "grad_norm": 0.02779853343963623,
      "learning_rate": 7.662591607139882e-05,
      "loss": 0.1242,
      "step": 1951
    },
    {
      "epoch": 1.8533111796819368,
      "grad_norm": 0.036591142416000366,
      "learning_rate": 7.651848867602867e-05,
      "loss": 0.1593,
      "step": 1952
    },
    {
      "epoch": 1.854260621884643,
      "grad_norm": 0.029311561957001686,
      "learning_rate": 7.641108995358542e-05,
      "loss": 0.1237,
      "step": 1953
    },
    {
      "epoch": 1.8552100640873488,
      "grad_norm": 0.029321955516934395,
      "learning_rate": 7.630372003521204e-05,
      "loss": 0.1289,
      "step": 1954
    },
    {
      "epoch": 1.8561595062900547,
      "grad_norm": 0.034341856837272644,
      "learning_rate": 7.619637905201642e-05,
      "loss": 0.1323,
      "step": 1955
    },
    {
      "epoch": 1.8571089484927605,
      "grad_norm": 0.02771337330341339,
      "learning_rate": 7.608906713507098e-05,
      "loss": 0.133,
      "step": 1956
    },
    {
      "epoch": 1.8580583906954664,
      "grad_norm": 0.03146693855524063,
      "learning_rate": 7.598178441541274e-05,
      "loss": 0.1346,
      "step": 1957
    },
    {
      "epoch": 1.8590078328981723,
      "grad_norm": 0.028372354805469513,
      "learning_rate": 7.587453102404306e-05,
      "loss": 0.1194,
      "step": 1958
    },
    {
      "epoch": 1.8599572751008782,
      "grad_norm": 0.027766333892941475,
      "learning_rate": 7.576730709192744e-05,
      "loss": 0.1241,
      "step": 1959
    },
    {
      "epoch": 1.860906717303584,
      "grad_norm": 0.026262789964675903,
      "learning_rate": 7.566011274999549e-05,
      "loss": 0.1191,
      "step": 1960
    },
    {
      "epoch": 1.86185615950629,
      "grad_norm": 0.028471313416957855,
      "learning_rate": 7.555294812914061e-05,
      "loss": 0.1208,
      "step": 1961
    },
    {
      "epoch": 1.862805601708996,
      "grad_norm": 0.0403280183672905,
      "learning_rate": 7.544581336021994e-05,
      "loss": 0.169,
      "step": 1962
    },
    {
      "epoch": 1.8637550439117019,
      "grad_norm": 0.029336489737033844,
      "learning_rate": 7.533870857405414e-05,
      "loss": 0.1275,
      "step": 1963
    },
    {
      "epoch": 1.8647044861144078,
      "grad_norm": 0.05861514061689377,
      "learning_rate": 7.523163390142732e-05,
      "loss": 0.1984,
      "step": 1964
    },
    {
      "epoch": 1.8656539283171139,
      "grad_norm": 0.026019204407930374,
      "learning_rate": 7.51245894730868e-05,
      "loss": 0.1194,
      "step": 1965
    },
    {
      "epoch": 1.8666033705198197,
      "grad_norm": 0.043394673615694046,
      "learning_rate": 7.501757541974289e-05,
      "loss": 0.1598,
      "step": 1966
    },
    {
      "epoch": 1.8675528127225256,
      "grad_norm": 0.025404971092939377,
      "learning_rate": 7.49105918720689e-05,
      "loss": 0.1159,
      "step": 1967
    },
    {
      "epoch": 1.8685022549252315,
      "grad_norm": 0.028354499489068985,
      "learning_rate": 7.480363896070089e-05,
      "loss": 0.1216,
      "step": 1968
    },
    {
      "epoch": 1.8694516971279374,
      "grad_norm": 0.06748262792825699,
      "learning_rate": 7.469671681623742e-05,
      "loss": 0.1888,
      "step": 1969
    },
    {
      "epoch": 1.8704011393306432,
      "grad_norm": 0.027621906250715256,
      "learning_rate": 7.458982556923963e-05,
      "loss": 0.1196,
      "step": 1970
    },
    {
      "epoch": 1.871350581533349,
      "grad_norm": 0.031015096232295036,
      "learning_rate": 7.448296535023077e-05,
      "loss": 0.1266,
      "step": 1971
    },
    {
      "epoch": 1.872300023736055,
      "grad_norm": 0.02895331382751465,
      "learning_rate": 7.437613628969627e-05,
      "loss": 0.1284,
      "step": 1972
    },
    {
      "epoch": 1.8732494659387608,
      "grad_norm": 0.04110453650355339,
      "learning_rate": 7.426933851808355e-05,
      "loss": 0.1545,
      "step": 1973
    },
    {
      "epoch": 1.8741989081414667,
      "grad_norm": 0.030046746134757996,
      "learning_rate": 7.416257216580181e-05,
      "loss": 0.1269,
      "step": 1974
    },
    {
      "epoch": 1.8751483503441728,
      "grad_norm": 0.04101106524467468,
      "learning_rate": 7.405583736322182e-05,
      "loss": 0.1621,
      "step": 1975
    },
    {
      "epoch": 1.8760977925468787,
      "grad_norm": 0.04438061639666557,
      "learning_rate": 7.394913424067591e-05,
      "loss": 0.1693,
      "step": 1976
    },
    {
      "epoch": 1.8770472347495846,
      "grad_norm": 0.028554782271385193,
      "learning_rate": 7.38424629284577e-05,
      "loss": 0.1265,
      "step": 1977
    },
    {
      "epoch": 1.8779966769522907,
      "grad_norm": 0.028575632721185684,
      "learning_rate": 7.373582355682191e-05,
      "loss": 0.12,
      "step": 1978
    },
    {
      "epoch": 1.8789461191549965,
      "grad_norm": 0.027870802208781242,
      "learning_rate": 7.362921625598436e-05,
      "loss": 0.1301,
      "step": 1979
    },
    {
      "epoch": 1.8798955613577024,
      "grad_norm": 0.03442731872200966,
      "learning_rate": 7.352264115612158e-05,
      "loss": 0.1537,
      "step": 1980
    },
    {
      "epoch": 1.8808450035604083,
      "grad_norm": 0.03244437277317047,
      "learning_rate": 7.341609838737089e-05,
      "loss": 0.1356,
      "step": 1981
    },
    {
      "epoch": 1.8817944457631142,
      "grad_norm": 0.0298260897397995,
      "learning_rate": 7.330958807983011e-05,
      "loss": 0.1238,
      "step": 1982
    },
    {
      "epoch": 1.88274388796582,
      "grad_norm": 0.02521882764995098,
      "learning_rate": 7.320311036355736e-05,
      "loss": 0.1185,
      "step": 1983
    },
    {
      "epoch": 1.883693330168526,
      "grad_norm": 0.02781338430941105,
      "learning_rate": 7.309666536857106e-05,
      "loss": 0.1214,
      "step": 1984
    },
    {
      "epoch": 1.8846427723712318,
      "grad_norm": 0.05946779251098633,
      "learning_rate": 7.299025322484958e-05,
      "loss": 0.1897,
      "step": 1985
    },
    {
      "epoch": 1.8855922145739377,
      "grad_norm": 0.028507012873888016,
      "learning_rate": 7.288387406233122e-05,
      "loss": 0.1282,
      "step": 1986
    },
    {
      "epoch": 1.8865416567766438,
      "grad_norm": 0.029610810801386833,
      "learning_rate": 7.277752801091404e-05,
      "loss": 0.1302,
      "step": 1987
    },
    {
      "epoch": 1.8874910989793496,
      "grad_norm": 0.030304808169603348,
      "learning_rate": 7.267121520045558e-05,
      "loss": 0.132,
      "step": 1988
    },
    {
      "epoch": 1.8884405411820555,
      "grad_norm": 0.028647607192397118,
      "learning_rate": 7.256493576077292e-05,
      "loss": 0.1309,
      "step": 1989
    },
    {
      "epoch": 1.8893899833847616,
      "grad_norm": 0.04364948347210884,
      "learning_rate": 7.245868982164226e-05,
      "loss": 0.1628,
      "step": 1990
    },
    {
      "epoch": 1.8903394255874675,
      "grad_norm": 0.029457390308380127,
      "learning_rate": 7.235247751279893e-05,
      "loss": 0.1163,
      "step": 1991
    },
    {
      "epoch": 1.8912888677901734,
      "grad_norm": 0.059156183153390884,
      "learning_rate": 7.224629896393726e-05,
      "loss": 0.2033,
      "step": 1992
    },
    {
      "epoch": 1.8922383099928792,
      "grad_norm": 0.02888781949877739,
      "learning_rate": 7.214015430471028e-05,
      "loss": 0.1242,
      "step": 1993
    },
    {
      "epoch": 1.893187752195585,
      "grad_norm": 0.03040069155395031,
      "learning_rate": 7.20340436647297e-05,
      "loss": 0.1211,
      "step": 1994
    },
    {
      "epoch": 1.894137194398291,
      "grad_norm": 0.028204258531332016,
      "learning_rate": 7.192796717356562e-05,
      "loss": 0.1267,
      "step": 1995
    },
    {
      "epoch": 1.8950866366009969,
      "grad_norm": 0.030367571860551834,
      "learning_rate": 7.182192496074648e-05,
      "loss": 0.1232,
      "step": 1996
    },
    {
      "epoch": 1.8960360788037027,
      "grad_norm": 0.02587362751364708,
      "learning_rate": 7.171591715575888e-05,
      "loss": 0.1261,
      "step": 1997
    },
    {
      "epoch": 1.8969855210064086,
      "grad_norm": 0.028903882950544357,
      "learning_rate": 7.160994388804736e-05,
      "loss": 0.1318,
      "step": 1998
    },
    {
      "epoch": 1.8979349632091147,
      "grad_norm": 0.025526562705636024,
      "learning_rate": 7.150400528701436e-05,
      "loss": 0.1205,
      "step": 1999
    },
    {
      "epoch": 1.8988844054118206,
      "grad_norm": 0.029438691213726997,
      "learning_rate": 7.139810148201987e-05,
      "loss": 0.131,
      "step": 2000
    },
    {
      "epoch": 1.8998338476145264,
      "grad_norm": 0.026545461267232895,
      "learning_rate": 7.129223260238154e-05,
      "loss": 0.1219,
      "step": 2001
    },
    {
      "epoch": 1.9007832898172323,
      "grad_norm": 0.03137153759598732,
      "learning_rate": 7.118639877737425e-05,
      "loss": 0.1389,
      "step": 2002
    },
    {
      "epoch": 1.9017327320199384,
      "grad_norm": 0.03655494004487991,
      "learning_rate": 7.108060013623017e-05,
      "loss": 0.1592,
      "step": 2003
    },
    {
      "epoch": 1.9026821742226443,
      "grad_norm": 0.0271841399371624,
      "learning_rate": 7.09748368081384e-05,
      "loss": 0.1309,
      "step": 2004
    },
    {
      "epoch": 1.9036316164253502,
      "grad_norm": 0.028577405959367752,
      "learning_rate": 7.086910892224499e-05,
      "loss": 0.1261,
      "step": 2005
    },
    {
      "epoch": 1.904581058628056,
      "grad_norm": 0.028025876730680466,
      "learning_rate": 7.076341660765271e-05,
      "loss": 0.1323,
      "step": 2006
    },
    {
      "epoch": 1.905530500830762,
      "grad_norm": 0.03332342579960823,
      "learning_rate": 7.065775999342091e-05,
      "loss": 0.1327,
      "step": 2007
    },
    {
      "epoch": 1.9064799430334678,
      "grad_norm": 0.043180011212825775,
      "learning_rate": 7.055213920856529e-05,
      "loss": 0.1613,
      "step": 2008
    },
    {
      "epoch": 1.9074293852361737,
      "grad_norm": 0.04228482022881508,
      "learning_rate": 7.044655438205785e-05,
      "loss": 0.1594,
      "step": 2009
    },
    {
      "epoch": 1.9083788274388795,
      "grad_norm": 0.029172802343964577,
      "learning_rate": 7.034100564282664e-05,
      "loss": 0.1258,
      "step": 2010
    },
    {
      "epoch": 1.9093282696415854,
      "grad_norm": 0.0426810160279274,
      "learning_rate": 7.02354931197557e-05,
      "loss": 0.16,
      "step": 2011
    },
    {
      "epoch": 1.9102777118442915,
      "grad_norm": 0.025085503235459328,
      "learning_rate": 7.013001694168478e-05,
      "loss": 0.1233,
      "step": 2012
    },
    {
      "epoch": 1.9112271540469974,
      "grad_norm": 0.0266293715685606,
      "learning_rate": 7.002457723740934e-05,
      "loss": 0.1214,
      "step": 2013
    },
    {
      "epoch": 1.9121765962497033,
      "grad_norm": 0.03064984828233719,
      "learning_rate": 6.991917413568017e-05,
      "loss": 0.1186,
      "step": 2014
    },
    {
      "epoch": 1.9131260384524094,
      "grad_norm": 0.026003271341323853,
      "learning_rate": 6.981380776520348e-05,
      "loss": 0.1228,
      "step": 2015
    },
    {
      "epoch": 1.9140754806551152,
      "grad_norm": 0.045436155050992966,
      "learning_rate": 6.970847825464059e-05,
      "loss": 0.174,
      "step": 2016
    },
    {
      "epoch": 1.915024922857821,
      "grad_norm": 0.029938362538814545,
      "learning_rate": 6.960318573260783e-05,
      "loss": 0.1201,
      "step": 2017
    },
    {
      "epoch": 1.915974365060527,
      "grad_norm": 0.026935014873743057,
      "learning_rate": 6.949793032767634e-05,
      "loss": 0.1165,
      "step": 2018
    },
    {
      "epoch": 1.9169238072632329,
      "grad_norm": 0.02809876948595047,
      "learning_rate": 6.93927121683719e-05,
      "loss": 0.1248,
      "step": 2019
    },
    {
      "epoch": 1.9178732494659387,
      "grad_norm": 0.03932083770632744,
      "learning_rate": 6.928753138317488e-05,
      "loss": 0.1607,
      "step": 2020
    },
    {
      "epoch": 1.9188226916686446,
      "grad_norm": 0.029043098911643028,
      "learning_rate": 6.918238810051999e-05,
      "loss": 0.1292,
      "step": 2021
    },
    {
      "epoch": 1.9197721338713505,
      "grad_norm": 0.03849990293383598,
      "learning_rate": 6.907728244879611e-05,
      "loss": 0.1611,
      "step": 2022
    },
    {
      "epoch": 1.9207215760740564,
      "grad_norm": 0.028439447283744812,
      "learning_rate": 6.897221455634624e-05,
      "loss": 0.1265,
      "step": 2023
    },
    {
      "epoch": 1.9216710182767625,
      "grad_norm": 0.028611112385988235,
      "learning_rate": 6.886718455146724e-05,
      "loss": 0.1312,
      "step": 2024
    },
    {
      "epoch": 1.9226204604794683,
      "grad_norm": 0.02605103701353073,
      "learning_rate": 6.87621925624096e-05,
      "loss": 0.1241,
      "step": 2025
    },
    {
      "epoch": 1.9235699026821742,
      "grad_norm": 0.06604333966970444,
      "learning_rate": 6.865723871737762e-05,
      "loss": 0.2016,
      "step": 2026
    },
    {
      "epoch": 1.92451934488488,
      "grad_norm": 0.044974714517593384,
      "learning_rate": 6.855232314452884e-05,
      "loss": 0.1778,
      "step": 2027
    },
    {
      "epoch": 1.9254687870875862,
      "grad_norm": 0.03168616443872452,
      "learning_rate": 6.844744597197409e-05,
      "loss": 0.1327,
      "step": 2028
    },
    {
      "epoch": 1.926418229290292,
      "grad_norm": 0.029546428471803665,
      "learning_rate": 6.834260732777736e-05,
      "loss": 0.1302,
      "step": 2029
    },
    {
      "epoch": 1.927367671492998,
      "grad_norm": 0.05021713301539421,
      "learning_rate": 6.823780733995557e-05,
      "loss": 0.1863,
      "step": 2030
    },
    {
      "epoch": 1.9283171136957038,
      "grad_norm": 0.030026502907276154,
      "learning_rate": 6.813304613647845e-05,
      "loss": 0.1349,
      "step": 2031
    },
    {
      "epoch": 1.9292665558984097,
      "grad_norm": 0.03538592904806137,
      "learning_rate": 6.802832384526836e-05,
      "loss": 0.1374,
      "step": 2032
    },
    {
      "epoch": 1.9302159981011155,
      "grad_norm": 0.027488164603710175,
      "learning_rate": 6.792364059420012e-05,
      "loss": 0.1237,
      "step": 2033
    },
    {
      "epoch": 1.9311654403038214,
      "grad_norm": 0.03553836792707443,
      "learning_rate": 6.781899651110091e-05,
      "loss": 0.1522,
      "step": 2034
    },
    {
      "epoch": 1.9321148825065273,
      "grad_norm": 0.029753949493169785,
      "learning_rate": 6.771439172375007e-05,
      "loss": 0.1222,
      "step": 2035
    },
    {
      "epoch": 1.9330643247092332,
      "grad_norm": 0.03108718991279602,
      "learning_rate": 6.760982635987899e-05,
      "loss": 0.1186,
      "step": 2036
    },
    {
      "epoch": 1.9340137669119393,
      "grad_norm": 0.02662482298910618,
      "learning_rate": 6.750530054717088e-05,
      "loss": 0.1189,
      "step": 2037
    },
    {
      "epoch": 1.9349632091146451,
      "grad_norm": 0.029288165271282196,
      "learning_rate": 6.740081441326062e-05,
      "loss": 0.1179,
      "step": 2038
    },
    {
      "epoch": 1.935912651317351,
      "grad_norm": 0.03478897735476494,
      "learning_rate": 6.729636808573476e-05,
      "loss": 0.1249,
      "step": 2039
    },
    {
      "epoch": 1.9368620935200571,
      "grad_norm": 0.03969739004969597,
      "learning_rate": 6.719196169213114e-05,
      "loss": 0.1579,
      "step": 2040
    },
    {
      "epoch": 1.937811535722763,
      "grad_norm": 0.030195200815796852,
      "learning_rate": 6.708759535993884e-05,
      "loss": 0.115,
      "step": 2041
    },
    {
      "epoch": 1.9387609779254689,
      "grad_norm": 0.03426138311624527,
      "learning_rate": 6.698326921659808e-05,
      "loss": 0.1266,
      "step": 2042
    },
    {
      "epoch": 1.9397104201281747,
      "grad_norm": 0.05202037841081619,
      "learning_rate": 6.687898338949998e-05,
      "loss": 0.193,
      "step": 2043
    },
    {
      "epoch": 1.9406598623308806,
      "grad_norm": 0.027649085968732834,
      "learning_rate": 6.67747380059864e-05,
      "loss": 0.1222,
      "step": 2044
    },
    {
      "epoch": 1.9416093045335865,
      "grad_norm": 0.026928169652819633,
      "learning_rate": 6.667053319334982e-05,
      "loss": 0.1204,
      "step": 2045
    },
    {
      "epoch": 1.9425587467362924,
      "grad_norm": 0.056547269225120544,
      "learning_rate": 6.656636907883325e-05,
      "loss": 0.1602,
      "step": 2046
    },
    {
      "epoch": 1.9435081889389982,
      "grad_norm": 0.026589645072817802,
      "learning_rate": 6.646224578962993e-05,
      "loss": 0.1214,
      "step": 2047
    },
    {
      "epoch": 1.9444576311417041,
      "grad_norm": 0.02858765795826912,
      "learning_rate": 6.635816345288329e-05,
      "loss": 0.1242,
      "step": 2048
    },
    {
      "epoch": 1.9454070733444102,
      "grad_norm": 0.04160701856017113,
      "learning_rate": 6.625412219568668e-05,
      "loss": 0.1606,
      "step": 2049
    },
    {
      "epoch": 1.946356515547116,
      "grad_norm": 0.03329680487513542,
      "learning_rate": 6.615012214508336e-05,
      "loss": 0.1346,
      "step": 2050
    },
    {
      "epoch": 1.947305957749822,
      "grad_norm": 0.041767850518226624,
      "learning_rate": 6.604616342806632e-05,
      "loss": 0.1566,
      "step": 2051
    },
    {
      "epoch": 1.948255399952528,
      "grad_norm": 0.027340400964021683,
      "learning_rate": 6.594224617157795e-05,
      "loss": 0.1253,
      "step": 2052
    },
    {
      "epoch": 1.949204842155234,
      "grad_norm": 0.06383645534515381,
      "learning_rate": 6.583837050251012e-05,
      "loss": 0.1518,
      "step": 2053
    },
    {
      "epoch": 1.9501542843579398,
      "grad_norm": 0.051231034100055695,
      "learning_rate": 6.573453654770383e-05,
      "loss": 0.1565,
      "step": 2054
    },
    {
      "epoch": 1.9501542843579398,
      "eval_loss": 0.37301480770111084,
      "eval_runtime": 38.0432,
      "eval_samples_per_second": 2.261,
      "eval_steps_per_second": 2.261,
      "step": 2054
    },
    {
      "epoch": 1.9513410871113221,
      "grad_norm": 0.4340180456638336,
      "learning_rate": 0.00013378114170405474,
      "loss": 0.3769,
      "step": 2055
    },
    {
      "epoch": 1.952290529314028,
      "grad_norm": 0.2337399125099182,
      "learning_rate": 0.00013372497405242763,
      "loss": 0.3148,
      "step": 2056
    },
    {
      "epoch": 1.953239971516734,
      "grad_norm": 0.15890651941299438,
      "learning_rate": 0.00013366879439324493,
      "loss": 0.3167,
      "step": 2057
    },
    {
      "epoch": 1.95418941371944,
      "grad_norm": 0.14962317049503326,
      "learning_rate": 0.00013361260274650906,
      "loss": 0.3146,
      "step": 2058
    },
    {
      "epoch": 1.9551388559221459,
      "grad_norm": 5.296742916107178,
      "learning_rate": 0.00013355639913222668,
      "loss": 0.4622,
      "step": 2059
    },
    {
      "epoch": 1.9560882981248517,
      "grad_norm": 7.226221084594727,
      "learning_rate": 0.0001335001835704087,
      "loss": 1.4115,
      "step": 2060
    },
    {
      "epoch": 1.9570377403275576,
      "grad_norm": 3.240274667739868,
      "learning_rate": 0.00013344395608107031,
      "loss": 1.2552,
      "step": 2061
    },
    {
      "epoch": 1.9579871825302635,
      "grad_norm": 2.299501657485962,
      "learning_rate": 0.00013338771668423095,
      "loss": 0.3784,
      "step": 2062
    },
    {
      "epoch": 1.9589366247329694,
      "grad_norm": 2.4971210956573486,
      "learning_rate": 0.00013333146539991431,
      "loss": 0.6146,
      "step": 2063
    },
    {
      "epoch": 1.9598860669356752,
      "grad_norm": 1.7239331007003784,
      "learning_rate": 0.00013327520224814822,
      "loss": 0.4257,
      "step": 2064
    },
    {
      "epoch": 1.9608355091383811,
      "grad_norm": 0.29740026593208313,
      "learning_rate": 0.00013321892724896484,
      "loss": 0.3187,
      "step": 2065
    },
    {
      "epoch": 1.961784951341087,
      "grad_norm": 8.102334022521973,
      "learning_rate": 0.0001331626404224005,
      "loss": 0.481,
      "step": 2066
    },
    {
      "epoch": 1.9627343935437929,
      "grad_norm": 0.29957181215286255,
      "learning_rate": 0.0001331063417884958,
      "loss": 0.3117,
      "step": 2067
    },
    {
      "epoch": 1.963683835746499,
      "grad_norm": 2.238389730453491,
      "learning_rate": 0.00013305003136729552,
      "loss": 0.3736,
      "step": 2068
    },
    {
      "epoch": 1.9646332779492048,
      "grad_norm": 0.37475112080574036,
      "learning_rate": 0.0001329937091788485,
      "loss": 0.3093,
      "step": 2069
    },
    {
      "epoch": 1.9655827201519107,
      "grad_norm": 1.1860514879226685,
      "learning_rate": 0.00013293737524320797,
      "loss": 0.3951,
      "step": 2070
    },
    {
      "epoch": 1.9665321623546168,
      "grad_norm": 0.2817871868610382,
      "learning_rate": 0.00013288102958043126,
      "loss": 0.3127,
      "step": 2071
    },
    {
      "epoch": 1.9674816045573227,
      "grad_norm": 0.3843158483505249,
      "learning_rate": 0.00013282467221057984,
      "loss": 0.2984,
      "step": 2072
    },
    {
      "epoch": 1.9684310467600286,
      "grad_norm": 0.40091991424560547,
      "learning_rate": 0.0001327683031537194,
      "loss": 0.2966,
      "step": 2073
    },
    {
      "epoch": 1.9693804889627344,
      "grad_norm": 0.34588465094566345,
      "learning_rate": 0.00013271192242991976,
      "loss": 0.3163,
      "step": 2074
    },
    {
      "epoch": 1.9703299311654403,
      "grad_norm": 0.19917060434818268,
      "learning_rate": 0.00013265553005925492,
      "loss": 0.3001,
      "step": 2075
    },
    {
      "epoch": 1.9712793733681462,
      "grad_norm": 0.17843176424503326,
      "learning_rate": 0.00013259912606180301,
      "loss": 0.3018,
      "step": 2076
    },
    {
      "epoch": 1.972228815570852,
      "grad_norm": 0.10518278181552887,
      "learning_rate": 0.00013254271045764636,
      "loss": 0.2883,
      "step": 2077
    },
    {
      "epoch": 1.973178257773558,
      "grad_norm": 0.16444529592990875,
      "learning_rate": 0.00013248628326687124,
      "loss": 0.3041,
      "step": 2078
    },
    {
      "epoch": 1.9741276999762638,
      "grad_norm": 0.1926691085100174,
      "learning_rate": 0.00013242984450956828,
      "loss": 0.2763,
      "step": 2079
    },
    {
      "epoch": 1.97507714217897,
      "grad_norm": 0.24896161258220673,
      "learning_rate": 0.00013237339420583212,
      "loss": 0.2895,
      "step": 2080
    },
    {
      "epoch": 1.9760265843816758,
      "grad_norm": 0.23915739357471466,
      "learning_rate": 0.00013231693237576148,
      "loss": 0.2901,
      "step": 2081
    },
    {
      "epoch": 1.9769760265843817,
      "grad_norm": 0.08436968922615051,
      "learning_rate": 0.00013226045903945926,
      "loss": 0.278,
      "step": 2082
    },
    {
      "epoch": 1.9779254687870877,
      "grad_norm": 0.9301303625106812,
      "learning_rate": 0.00013220397421703247,
      "loss": 0.316,
      "step": 2083
    },
    {
      "epoch": 1.9788749109897936,
      "grad_norm": 1.2519832849502563,
      "learning_rate": 0.00013214747792859201,
      "loss": 0.2931,
      "step": 2084
    },
    {
      "epoch": 1.9798243531924995,
      "grad_norm": 0.4805239140987396,
      "learning_rate": 0.00013209097019425316,
      "loss": 0.4146,
      "step": 2085
    },
    {
      "epoch": 1.9807737953952054,
      "grad_norm": 0.21979232132434845,
      "learning_rate": 0.00013203445103413507,
      "loss": 0.3,
      "step": 2086
    },
    {
      "epoch": 1.9817232375979112,
      "grad_norm": 0.1640891432762146,
      "learning_rate": 0.000131977920468361,
      "loss": 0.2969,
      "step": 2087
    },
    {
      "epoch": 1.9826726798006171,
      "grad_norm": 0.17019522190093994,
      "learning_rate": 0.0001319213785170583,
      "loss": 0.2914,
      "step": 2088
    },
    {
      "epoch": 1.983622122003323,
      "grad_norm": 0.09475825726985931,
      "learning_rate": 0.00013186482520035839,
      "loss": 0.297,
      "step": 2089
    },
    {
      "epoch": 1.9845715642060289,
      "grad_norm": 0.09213607013225555,
      "learning_rate": 0.00013180826053839668,
      "loss": 0.288,
      "step": 2090
    },
    {
      "epoch": 1.9855210064087347,
      "grad_norm": 0.11374935507774353,
      "learning_rate": 0.00013175168455131263,
      "loss": 0.2796,
      "step": 2091
    },
    {
      "epoch": 1.9864704486114406,
      "grad_norm": 0.10812429338693619,
      "learning_rate": 0.0001316950972592498,
      "loss": 0.3057,
      "step": 2092
    },
    {
      "epoch": 1.9874198908141467,
      "grad_norm": 0.07910951226949692,
      "learning_rate": 0.00013163849868235564,
      "loss": 0.2877,
      "step": 2093
    },
    {
      "epoch": 1.9883693330168526,
      "grad_norm": 0.09240693598985672,
      "learning_rate": 0.00013158188884078182,
      "loss": 0.2906,
      "step": 2094
    },
    {
      "epoch": 1.9893187752195585,
      "grad_norm": 0.097608283162117,
      "learning_rate": 0.00013152526775468378,
      "loss": 0.2906,
      "step": 2095
    },
    {
      "epoch": 1.9902682174222646,
      "grad_norm": 0.2190292775630951,
      "learning_rate": 0.00013146863544422118,
      "loss": 0.2835,
      "step": 2096
    },
    {
      "epoch": 1.9912176596249704,
      "grad_norm": 0.07066213339567184,
      "learning_rate": 0.00013141199192955751,
      "loss": 0.2856,
      "step": 2097
    },
    {
      "epoch": 1.9921671018276763,
      "grad_norm": 0.10716898739337921,
      "learning_rate": 0.0001313553372308604,
      "loss": 0.3012,
      "step": 2098
    },
    {
      "epoch": 1.9931165440303822,
      "grad_norm": 0.07971798628568649,
      "learning_rate": 0.00013129867136830127,
      "loss": 0.2678,
      "step": 2099
    },
    {
      "epoch": 1.994065986233088,
      "grad_norm": 0.11225918680429459,
      "learning_rate": 0.00013124199436205576,
      "loss": 0.2799,
      "step": 2100
    },
    {
      "epoch": 1.995015428435794,
      "grad_norm": 0.08741844445466995,
      "learning_rate": 0.00013118530623230327,
      "loss": 0.284,
      "step": 2101
    },
    {
      "epoch": 1.9959648706384998,
      "grad_norm": 0.07644308358430862,
      "learning_rate": 0.00013112860699922722,
      "loss": 0.2988,
      "step": 2102
    },
    {
      "epoch": 1.9969143128412057,
      "grad_norm": 0.07610399276018143,
      "learning_rate": 0.00013107189668301508,
      "loss": 0.2813,
      "step": 2103
    },
    {
      "epoch": 1.9978637550439116,
      "grad_norm": 0.1364275962114334,
      "learning_rate": 0.0001310151753038581,
      "loss": 0.3006,
      "step": 2104
    },
    {
      "epoch": 1.9988131972466177,
      "grad_norm": 0.06598393619060516,
      "learning_rate": 0.0001309584428819516,
      "loss": 0.2822,
      "step": 2105
    },
    {
      "epoch": 1.9997626394493235,
      "grad_norm": 0.08182472735643387,
      "learning_rate": 0.00013090169943749476,
      "loss": 0.2757,
      "step": 2106
    },
    {
      "epoch": 2.0007120816520296,
      "grad_norm": 0.06756250560283661,
      "learning_rate": 0.0001308449449906907,
      "loss": 0.2619,
      "step": 2107
    },
    {
      "epoch": 2.0016615238547355,
      "grad_norm": 0.05981763079762459,
      "learning_rate": 0.00013078817956174656,
      "loss": 0.2856,
      "step": 2108
    },
    {
      "epoch": 2.0026109660574414,
      "grad_norm": 0.0790615975856781,
      "learning_rate": 0.0001307314031708732,
      "loss": 0.2875,
      "step": 2109
    },
    {
      "epoch": 2.0035604082601473,
      "grad_norm": 0.06421328336000443,
      "learning_rate": 0.00013067461583828553,
      "loss": 0.2683,
      "step": 2110
    },
    {
      "epoch": 2.004509850462853,
      "grad_norm": 0.06607569754123688,
      "learning_rate": 0.0001306178175842023,
      "loss": 0.271,
      "step": 2111
    },
    {
      "epoch": 2.005459292665559,
      "grad_norm": 0.06524945050477982,
      "learning_rate": 0.00013056100842884612,
      "loss": 0.2796,
      "step": 2112
    },
    {
      "epoch": 2.006408734868265,
      "grad_norm": 0.05927155539393425,
      "learning_rate": 0.00013050418839244355,
      "loss": 0.2755,
      "step": 2113
    },
    {
      "epoch": 2.0073581770709708,
      "grad_norm": 0.06408464163541794,
      "learning_rate": 0.000130447357495225,
      "loss": 0.2748,
      "step": 2114
    },
    {
      "epoch": 2.0083076192736766,
      "grad_norm": 0.05964144691824913,
      "learning_rate": 0.0001303905157574247,
      "loss": 0.2772,
      "step": 2115
    },
    {
      "epoch": 2.0092570614763825,
      "grad_norm": 0.05294380709528923,
      "learning_rate": 0.00013033366319928079,
      "loss": 0.2543,
      "step": 2116
    },
    {
      "epoch": 2.0102065036790884,
      "grad_norm": 0.06316480785608292,
      "learning_rate": 0.00013027679984103528,
      "loss": 0.2659,
      "step": 2117
    },
    {
      "epoch": 2.0111559458817942,
      "grad_norm": 0.0780426636338234,
      "learning_rate": 0.000130219925702934,
      "loss": 0.2809,
      "step": 2118
    },
    {
      "epoch": 2.0121053880845006,
      "grad_norm": 0.05921616032719612,
      "learning_rate": 0.00013016304080522656,
      "loss": 0.2651,
      "step": 2119
    },
    {
      "epoch": 2.0130548302872064,
      "grad_norm": 0.063509002327919,
      "learning_rate": 0.0001301061451681665,
      "loss": 0.2766,
      "step": 2120
    },
    {
      "epoch": 2.0140042724899123,
      "grad_norm": 0.06251564621925354,
      "learning_rate": 0.0001300492388120111,
      "loss": 0.2826,
      "step": 2121
    },
    {
      "epoch": 2.014953714692618,
      "grad_norm": 0.07721933722496033,
      "learning_rate": 0.0001299923217570215,
      "loss": 0.2876,
      "step": 2122
    },
    {
      "epoch": 2.015903156895324,
      "grad_norm": 0.22655093669891357,
      "learning_rate": 0.0001299353940234627,
      "loss": 0.3023,
      "step": 2123
    },
    {
      "epoch": 2.01685259909803,
      "grad_norm": 0.16343270242214203,
      "learning_rate": 0.0001298784556316034,
      "loss": 0.2902,
      "step": 2124
    },
    {
      "epoch": 2.017802041300736,
      "grad_norm": 0.0674663856625557,
      "learning_rate": 0.00012982150660171613,
      "loss": 0.2639,
      "step": 2125
    },
    {
      "epoch": 2.0187514835034417,
      "grad_norm": 0.06698331236839294,
      "learning_rate": 0.00012976454695407723,
      "loss": 0.2918,
      "step": 2126
    },
    {
      "epoch": 2.0197009257061476,
      "grad_norm": 0.05850343778729439,
      "learning_rate": 0.00012970757670896683,
      "loss": 0.2691,
      "step": 2127
    },
    {
      "epoch": 2.0206503679088534,
      "grad_norm": 0.05704069882631302,
      "learning_rate": 0.0001296505958866688,
      "loss": 0.2688,
      "step": 2128
    },
    {
      "epoch": 2.0215998101115593,
      "grad_norm": 0.060891758650541306,
      "learning_rate": 0.00012959360450747075,
      "loss": 0.2652,
      "step": 2129
    },
    {
      "epoch": 2.022549252314265,
      "grad_norm": 0.061691515147686005,
      "learning_rate": 0.00012953660259166412,
      "loss": 0.2756,
      "step": 2130
    },
    {
      "epoch": 2.023498694516971,
      "grad_norm": 0.059189558029174805,
      "learning_rate": 0.00012947959015954406,
      "loss": 0.2759,
      "step": 2131
    },
    {
      "epoch": 2.0244481367196774,
      "grad_norm": 0.06447713077068329,
      "learning_rate": 0.00012942256723140952,
      "loss": 0.2773,
      "step": 2132
    },
    {
      "epoch": 2.0253975789223833,
      "grad_norm": 0.06263953447341919,
      "learning_rate": 0.0001293655338275631,
      "loss": 0.2867,
      "step": 2133
    },
    {
      "epoch": 2.026347021125089,
      "grad_norm": 0.0576293058693409,
      "learning_rate": 0.00012930848996831114,
      "loss": 0.2776,
      "step": 2134
    },
    {
      "epoch": 2.027296463327795,
      "grad_norm": 0.05699608847498894,
      "learning_rate": 0.00012925143567396374,
      "loss": 0.2757,
      "step": 2135
    },
    {
      "epoch": 2.028245905530501,
      "grad_norm": 0.052561014890670776,
      "learning_rate": 0.00012919437096483476,
      "loss": 0.2555,
      "step": 2136
    },
    {
      "epoch": 2.0291953477332068,
      "grad_norm": 0.053198445588350296,
      "learning_rate": 0.00012913729586124165,
      "loss": 0.2676,
      "step": 2137
    },
    {
      "epoch": 2.0301447899359126,
      "grad_norm": 0.09329196810722351,
      "learning_rate": 0.00012908021038350568,
      "loss": 0.2796,
      "step": 2138
    },
    {
      "epoch": 2.0310942321386185,
      "grad_norm": 0.07239534705877304,
      "learning_rate": 0.00012902311455195172,
      "loss": 0.2809,
      "step": 2139
    },
    {
      "epoch": 2.0320436743413244,
      "grad_norm": 0.06299670785665512,
      "learning_rate": 0.00012896600838690838,
      "loss": 0.2672,
      "step": 2140
    },
    {
      "epoch": 2.0329931165440303,
      "grad_norm": 0.05467437952756882,
      "learning_rate": 0.00012890889190870795,
      "loss": 0.268,
      "step": 2141
    },
    {
      "epoch": 2.033942558746736,
      "grad_norm": 0.0641472190618515,
      "learning_rate": 0.00012885176513768637,
      "loss": 0.2844,
      "step": 2142
    },
    {
      "epoch": 2.034892000949442,
      "grad_norm": 0.06481951475143433,
      "learning_rate": 0.00012879462809418325,
      "loss": 0.2883,
      "step": 2143
    },
    {
      "epoch": 2.0358414431521483,
      "grad_norm": 0.05889345332980156,
      "learning_rate": 0.0001287374807985418,
      "loss": 0.2688,
      "step": 2144
    },
    {
      "epoch": 2.036790885354854,
      "grad_norm": 0.05446067079901695,
      "learning_rate": 0.00012868032327110904,
      "loss": 0.2699,
      "step": 2145
    },
    {
      "epoch": 2.03774032755756,
      "grad_norm": 0.0558142326772213,
      "learning_rate": 0.00012862315553223547,
      "loss": 0.2662,
      "step": 2146
    },
    {
      "epoch": 2.038689769760266,
      "grad_norm": 0.05485325679183006,
      "learning_rate": 0.0001285659776022753,
      "loss": 0.2684,
      "step": 2147
    },
    {
      "epoch": 2.039639211962972,
      "grad_norm": 0.05541551858186722,
      "learning_rate": 0.0001285087895015864,
      "loss": 0.2674,
      "step": 2148
    },
    {
      "epoch": 2.0405886541656777,
      "grad_norm": 0.10139881074428558,
      "learning_rate": 0.0001284515912505301,
      "loss": 0.2737,
      "step": 2149
    },
    {
      "epoch": 2.0415380963683836,
      "grad_norm": 0.05179375782608986,
      "learning_rate": 0.00012839438286947163,
      "loss": 0.2647,
      "step": 2150
    },
    {
      "epoch": 2.0424875385710894,
      "grad_norm": 0.0590873584151268,
      "learning_rate": 0.0001283371643787795,
      "loss": 0.2743,
      "step": 2151
    },
    {
      "epoch": 2.0434369807737953,
      "grad_norm": 0.0546240359544754,
      "learning_rate": 0.00012827993579882612,
      "loss": 0.2798,
      "step": 2152
    },
    {
      "epoch": 2.044386422976501,
      "grad_norm": 0.056896887719631195,
      "learning_rate": 0.0001282226971499872,
      "loss": 0.2717,
      "step": 2153
    },
    {
      "epoch": 2.045335865179207,
      "grad_norm": 0.052284859120845795,
      "learning_rate": 0.00012816544845264228,
      "loss": 0.2719,
      "step": 2154
    },
    {
      "epoch": 2.046285307381913,
      "grad_norm": 0.060961298644542694,
      "learning_rate": 0.0001281081897271744,
      "loss": 0.2764,
      "step": 2155
    },
    {
      "epoch": 2.047234749584619,
      "grad_norm": 0.08830570429563522,
      "learning_rate": 0.0001280509209939701,
      "loss": 0.2946,
      "step": 2156
    },
    {
      "epoch": 2.048184191787325,
      "grad_norm": 0.05548688769340515,
      "learning_rate": 0.00012799364227341955,
      "loss": 0.2647,
      "step": 2157
    },
    {
      "epoch": 2.049133633990031,
      "grad_norm": 0.05134082958102226,
      "learning_rate": 0.00012793635358591645,
      "loss": 0.2724,
      "step": 2158
    },
    {
      "epoch": 2.050083076192737,
      "grad_norm": 0.06974118202924728,
      "learning_rate": 0.0001278790549518581,
      "loss": 0.2873,
      "step": 2159
    },
    {
      "epoch": 2.0510325183954428,
      "grad_norm": 0.06583964079618454,
      "learning_rate": 0.0001278217463916453,
      "loss": 0.2823,
      "step": 2160
    },
    {
      "epoch": 2.0519819605981486,
      "grad_norm": 0.05401783436536789,
      "learning_rate": 0.00012776442792568232,
      "loss": 0.2788,
      "step": 2161
    },
    {
      "epoch": 2.0529314028008545,
      "grad_norm": 0.09343112260103226,
      "learning_rate": 0.00012770709957437708,
      "loss": 0.2824,
      "step": 2162
    },
    {
      "epoch": 2.0538808450035604,
      "grad_norm": 0.11026190966367722,
      "learning_rate": 0.00012764976135814094,
      "loss": 0.2861,
      "step": 2163
    },
    {
      "epoch": 2.0548302872062663,
      "grad_norm": 0.05160842835903168,
      "learning_rate": 0.00012759241329738887,
      "loss": 0.2615,
      "step": 2164
    },
    {
      "epoch": 2.055779729408972,
      "grad_norm": 0.057216208428144455,
      "learning_rate": 0.00012753505541253916,
      "loss": 0.2757,
      "step": 2165
    },
    {
      "epoch": 2.056729171611678,
      "grad_norm": 0.07923352718353271,
      "learning_rate": 0.00012747768772401378,
      "loss": 0.2659,
      "step": 2166
    },
    {
      "epoch": 2.057678613814384,
      "grad_norm": 0.055502623319625854,
      "learning_rate": 0.0001274203102522381,
      "loss": 0.2757,
      "step": 2167
    },
    {
      "epoch": 2.0586280560170898,
      "grad_norm": 0.10472196340560913,
      "learning_rate": 0.00012736292301764098,
      "loss": 0.2938,
      "step": 2168
    },
    {
      "epoch": 2.059577498219796,
      "grad_norm": 0.1105305552482605,
      "learning_rate": 0.00012730552604065475,
      "loss": 0.2846,
      "step": 2169
    },
    {
      "epoch": 2.060526940422502,
      "grad_norm": 0.06079312413930893,
      "learning_rate": 0.0001272481193417153,
      "loss": 0.2724,
      "step": 2170
    },
    {
      "epoch": 2.061476382625208,
      "grad_norm": 0.06276509910821915,
      "learning_rate": 0.00012719070294126182,
      "loss": 0.2704,
      "step": 2171
    },
    {
      "epoch": 2.0624258248279137,
      "grad_norm": 0.08746016025543213,
      "learning_rate": 0.00012713327685973707,
      "loss": 0.2834,
      "step": 2172
    },
    {
      "epoch": 2.0633752670306196,
      "grad_norm": 0.053869761526584625,
      "learning_rate": 0.0001270758411175873,
      "loss": 0.2712,
      "step": 2173
    },
    {
      "epoch": 2.0643247092333254,
      "grad_norm": 0.05118397995829582,
      "learning_rate": 0.00012701839573526206,
      "loss": 0.2737,
      "step": 2174
    },
    {
      "epoch": 2.0652741514360313,
      "grad_norm": 0.05905655771493912,
      "learning_rate": 0.0001269609407332144,
      "loss": 0.2663,
      "step": 2175
    },
    {
      "epoch": 2.066223593638737,
      "grad_norm": 0.049641139805316925,
      "learning_rate": 0.00012690347613190082,
      "loss": 0.263,
      "step": 2176
    },
    {
      "epoch": 2.067173035841443,
      "grad_norm": 0.04823688417673111,
      "learning_rate": 0.00012684600195178117,
      "loss": 0.2667,
      "step": 2177
    },
    {
      "epoch": 2.068122478044149,
      "grad_norm": 0.07979489117860794,
      "learning_rate": 0.00012678851821331882,
      "loss": 0.2854,
      "step": 2178
    },
    {
      "epoch": 2.069071920246855,
      "grad_norm": 0.06123083457350731,
      "learning_rate": 0.00012673102493698042,
      "loss": 0.2832,
      "step": 2179
    },
    {
      "epoch": 2.0700213624495607,
      "grad_norm": 0.07498030364513397,
      "learning_rate": 0.00012667352214323614,
      "loss": 0.3061,
      "step": 2180
    },
    {
      "epoch": 2.0709708046522666,
      "grad_norm": 0.059050336480140686,
      "learning_rate": 0.0001266160098525594,
      "loss": 0.2623,
      "step": 2181
    },
    {
      "epoch": 2.071920246854973,
      "grad_norm": 0.060739047825336456,
      "learning_rate": 0.00012655848808542709,
      "loss": 0.282,
      "step": 2182
    },
    {
      "epoch": 2.0728696890576788,
      "grad_norm": 0.059133414179086685,
      "learning_rate": 0.00012650095686231953,
      "loss": 0.2637,
      "step": 2183
    },
    {
      "epoch": 2.0738191312603846,
      "grad_norm": 0.05270388349890709,
      "learning_rate": 0.00012644341620372023,
      "loss": 0.2742,
      "step": 2184
    },
    {
      "epoch": 2.0747685734630905,
      "grad_norm": 0.049184754490852356,
      "learning_rate": 0.00012638586613011624,
      "loss": 0.2582,
      "step": 2185
    },
    {
      "epoch": 2.0757180156657964,
      "grad_norm": 0.05757623910903931,
      "learning_rate": 0.0001263283066619978,
      "loss": 0.2672,
      "step": 2186
    },
    {
      "epoch": 2.0766674578685023,
      "grad_norm": 0.051976773887872696,
      "learning_rate": 0.0001262707378198587,
      "loss": 0.2769,
      "step": 2187
    },
    {
      "epoch": 2.077616900071208,
      "grad_norm": 0.04786711558699608,
      "learning_rate": 0.00012621315962419585,
      "loss": 0.2661,
      "step": 2188
    },
    {
      "epoch": 2.078566342273914,
      "grad_norm": 0.0624409057199955,
      "learning_rate": 0.00012615557209550967,
      "loss": 0.2867,
      "step": 2189
    },
    {
      "epoch": 2.07951578447662,
      "grad_norm": 0.05563337355852127,
      "learning_rate": 0.00012609797525430373,
      "loss": 0.2778,
      "step": 2190
    },
    {
      "epoch": 2.0804652266793258,
      "grad_norm": 0.04968985542654991,
      "learning_rate": 0.00012604036912108505,
      "loss": 0.2562,
      "step": 2191
    },
    {
      "epoch": 2.0814146688820316,
      "grad_norm": 0.05211299657821655,
      "learning_rate": 0.00012598275371636394,
      "loss": 0.2746,
      "step": 2192
    },
    {
      "epoch": 2.0823641110847375,
      "grad_norm": 0.0466628223657608,
      "learning_rate": 0.00012592512906065397,
      "loss": 0.2654,
      "step": 2193
    },
    {
      "epoch": 2.083313553287444,
      "grad_norm": 0.056648485362529755,
      "learning_rate": 0.000125867495174472,
      "loss": 0.28,
      "step": 2194
    },
    {
      "epoch": 2.0842629954901497,
      "grad_norm": 0.056760817766189575,
      "learning_rate": 0.0001258098520783382,
      "loss": 0.2732,
      "step": 2195
    },
    {
      "epoch": 2.0852124376928556,
      "grad_norm": 0.05097498744726181,
      "learning_rate": 0.00012575219979277602,
      "loss": 0.261,
      "step": 2196
    },
    {
      "epoch": 2.0861618798955615,
      "grad_norm": 0.05032607540488243,
      "learning_rate": 0.00012569453833831222,
      "loss": 0.2769,
      "step": 2197
    },
    {
      "epoch": 2.0871113220982673,
      "grad_norm": 0.04438967630267143,
      "learning_rate": 0.00012563686773547675,
      "loss": 0.2561,
      "step": 2198
    },
    {
      "epoch": 2.088060764300973,
      "grad_norm": 0.05397673696279526,
      "learning_rate": 0.00012557918800480282,
      "loss": 0.2712,
      "step": 2199
    },
    {
      "epoch": 2.089010206503679,
      "grad_norm": 0.05158831924200058,
      "learning_rate": 0.00012552149916682695,
      "loss": 0.2685,
      "step": 2200
    },
    {
      "epoch": 2.089959648706385,
      "grad_norm": 0.06279024481773376,
      "learning_rate": 0.00012546380124208887,
      "loss": 0.2722,
      "step": 2201
    },
    {
      "epoch": 2.090909090909091,
      "grad_norm": 0.04665720462799072,
      "learning_rate": 0.00012540609425113156,
      "loss": 0.2604,
      "step": 2202
    },
    {
      "epoch": 2.0918585331117967,
      "grad_norm": 0.059546615928411484,
      "learning_rate": 0.00012534837821450117,
      "loss": 0.2721,
      "step": 2203
    },
    {
      "epoch": 2.0928079753145026,
      "grad_norm": 0.0592176578938961,
      "learning_rate": 0.0001252906531527472,
      "loss": 0.2716,
      "step": 2204
    },
    {
      "epoch": 2.0937574175172085,
      "grad_norm": 0.04968995600938797,
      "learning_rate": 0.00012523291908642217,
      "loss": 0.2474,
      "step": 2205
    },
    {
      "epoch": 2.0947068597199143,
      "grad_norm": 0.052708033472299576,
      "learning_rate": 0.00012517517603608203,
      "loss": 0.2668,
      "step": 2206
    },
    {
      "epoch": 2.0956563019226206,
      "grad_norm": 0.06978727877140045,
      "learning_rate": 0.0001251174240222857,
      "loss": 0.2729,
      "step": 2207
    },
    {
      "epoch": 2.0966057441253265,
      "grad_norm": 0.061792004853487015,
      "learning_rate": 0.0001250596630655955,
      "loss": 0.2706,
      "step": 2208
    },
    {
      "epoch": 2.0975551863280324,
      "grad_norm": 0.05177111551165581,
      "learning_rate": 0.00012500189318657675,
      "loss": 0.2759,
      "step": 2209
    },
    {
      "epoch": 2.0985046285307383,
      "grad_norm": 0.05225459113717079,
      "learning_rate": 0.00012494411440579814,
      "loss": 0.2662,
      "step": 2210
    },
    {
      "epoch": 2.099454070733444,
      "grad_norm": 0.046468012034893036,
      "learning_rate": 0.00012488632674383134,
      "loss": 0.2712,
      "step": 2211
    },
    {
      "epoch": 2.10040351293615,
      "grad_norm": 0.044963154941797256,
      "learning_rate": 0.00012482853022125132,
      "loss": 0.2685,
      "step": 2212
    },
    {
      "epoch": 2.101352955138856,
      "grad_norm": 0.04540163278579712,
      "learning_rate": 0.00012477072485863613,
      "loss": 0.2625,
      "step": 2213
    },
    {
      "epoch": 2.1023023973415618,
      "grad_norm": 0.05036984756588936,
      "learning_rate": 0.00012471291067656697,
      "loss": 0.271,
      "step": 2214
    },
    {
      "epoch": 2.1032518395442676,
      "grad_norm": 0.05176820978522301,
      "learning_rate": 0.00012465508769562823,
      "loss": 0.2819,
      "step": 2215
    },
    {
      "epoch": 2.1042012817469735,
      "grad_norm": 0.07643739879131317,
      "learning_rate": 0.0001245972559364074,
      "loss": 0.2868,
      "step": 2216
    },
    {
      "epoch": 2.1051507239496794,
      "grad_norm": 0.04632152244448662,
      "learning_rate": 0.0001245394154194951,
      "loss": 0.2686,
      "step": 2217
    },
    {
      "epoch": 2.1061001661523853,
      "grad_norm": 0.0568397156894207,
      "learning_rate": 0.00012448156616548506,
      "loss": 0.2626,
      "step": 2218
    },
    {
      "epoch": 2.1070496083550916,
      "grad_norm": 0.0479881577193737,
      "learning_rate": 0.0001244237081949741,
      "loss": 0.258,
      "step": 2219
    },
    {
      "epoch": 2.1079990505577975,
      "grad_norm": 0.05392912030220032,
      "learning_rate": 0.0001243658415285622,
      "loss": 0.2601,
      "step": 2220
    },
    {
      "epoch": 2.1089484927605033,
      "grad_norm": 0.055512312799692154,
      "learning_rate": 0.00012430796618685244,
      "loss": 0.2748,
      "step": 2221
    },
    {
      "epoch": 2.109897934963209,
      "grad_norm": 0.05387381836771965,
      "learning_rate": 0.00012425008219045088,
      "loss": 0.2764,
      "step": 2222
    },
    {
      "epoch": 2.110847377165915,
      "grad_norm": 0.05436617136001587,
      "learning_rate": 0.00012419218955996676,
      "loss": 0.2746,
      "step": 2223
    },
    {
      "epoch": 2.111796819368621,
      "grad_norm": 0.06682915985584259,
      "learning_rate": 0.00012413428831601245,
      "loss": 0.2932,
      "step": 2224
    },
    {
      "epoch": 2.112746261571327,
      "grad_norm": 0.048501718789339066,
      "learning_rate": 0.0001240763784792032,
      "loss": 0.2506,
      "step": 2225
    },
    {
      "epoch": 2.1136957037740327,
      "grad_norm": 0.050359319895505905,
      "learning_rate": 0.0001240184600701575,
      "loss": 0.2746,
      "step": 2226
    },
    {
      "epoch": 2.1146451459767386,
      "grad_norm": 0.061569176614284515,
      "learning_rate": 0.00012396053310949673,
      "loss": 0.2874,
      "step": 2227
    },
    {
      "epoch": 2.1155945881794445,
      "grad_norm": 0.053734250366687775,
      "learning_rate": 0.00012390259761784552,
      "loss": 0.2691,
      "step": 2228
    },
    {
      "epoch": 2.1165440303821503,
      "grad_norm": 0.07295026630163193,
      "learning_rate": 0.00012384465361583134,
      "loss": 0.2892,
      "step": 2229
    },
    {
      "epoch": 2.117493472584856,
      "grad_norm": 0.07634708285331726,
      "learning_rate": 0.0001237867011240848,
      "loss": 0.2754,
      "step": 2230
    },
    {
      "epoch": 2.118442914787562,
      "grad_norm": 0.08975531905889511,
      "learning_rate": 0.00012372874016323951,
      "loss": 0.2806,
      "step": 2231
    },
    {
      "epoch": 2.1193923569902684,
      "grad_norm": 0.07255006581544876,
      "learning_rate": 0.0001236707707539321,
      "loss": 0.3048,
      "step": 2232
    },
    {
      "epoch": 2.1203417991929743,
      "grad_norm": 0.049513548612594604,
      "learning_rate": 0.00012361279291680214,
      "loss": 0.2512,
      "step": 2233
    },
    {
      "epoch": 2.12129124139568,
      "grad_norm": 0.05127749219536781,
      "learning_rate": 0.00012355480667249232,
      "loss": 0.2614,
      "step": 2234
    },
    {
      "epoch": 2.122240683598386,
      "grad_norm": 0.05769433453679085,
      "learning_rate": 0.00012349681204164824,
      "loss": 0.2564,
      "step": 2235
    },
    {
      "epoch": 2.123190125801092,
      "grad_norm": 0.0693785548210144,
      "learning_rate": 0.00012343880904491848,
      "loss": 0.2714,
      "step": 2236
    },
    {
      "epoch": 2.1241395680037978,
      "grad_norm": 0.056581392884254456,
      "learning_rate": 0.00012338079770295466,
      "loss": 0.2684,
      "step": 2237
    },
    {
      "epoch": 2.1250890102065036,
      "grad_norm": 0.06454044580459595,
      "learning_rate": 0.00012332277803641135,
      "loss": 0.2638,
      "step": 2238
    },
    {
      "epoch": 2.1260384524092095,
      "grad_norm": 0.05345448851585388,
      "learning_rate": 0.00012326475006594606,
      "loss": 0.2638,
      "step": 2239
    },
    {
      "epoch": 2.1269878946119154,
      "grad_norm": 0.05101858824491501,
      "learning_rate": 0.0001232067138122192,
      "loss": 0.2654,
      "step": 2240
    },
    {
      "epoch": 2.1279373368146213,
      "grad_norm": 0.051356613636016846,
      "learning_rate": 0.00012314866929589432,
      "loss": 0.2598,
      "step": 2241
    },
    {
      "epoch": 2.128886779017327,
      "grad_norm": 0.07914724946022034,
      "learning_rate": 0.0001230906165376377,
      "loss": 0.2704,
      "step": 2242
    },
    {
      "epoch": 2.1298362212200335,
      "grad_norm": 0.05196173116564751,
      "learning_rate": 0.00012303255555811866,
      "loss": 0.2676,
      "step": 2243
    },
    {
      "epoch": 2.1307856634227393,
      "grad_norm": 0.0463208444416523,
      "learning_rate": 0.00012297448637800943,
      "loss": 0.2658,
      "step": 2244
    },
    {
      "epoch": 2.131735105625445,
      "grad_norm": 0.0464724637567997,
      "learning_rate": 0.0001229164090179852,
      "loss": 0.2739,
      "step": 2245
    },
    {
      "epoch": 2.132684547828151,
      "grad_norm": 0.044462431222200394,
      "learning_rate": 0.000122858323498724,
      "loss": 0.2724,
      "step": 2246
    },
    {
      "epoch": 2.133633990030857,
      "grad_norm": 0.059855107218027115,
      "learning_rate": 0.00012280022984090675,
      "loss": 0.2885,
      "step": 2247
    },
    {
      "epoch": 2.134583432233563,
      "grad_norm": 0.05929682031273842,
      "learning_rate": 0.0001227421280652174,
      "loss": 0.2774,
      "step": 2248
    },
    {
      "epoch": 2.1355328744362687,
      "grad_norm": 0.05959324166178703,
      "learning_rate": 0.0001226840181923427,
      "loss": 0.285,
      "step": 2249
    },
    {
      "epoch": 2.1364823166389746,
      "grad_norm": 0.049392689019441605,
      "learning_rate": 0.00012262590024297225,
      "loss": 0.2723,
      "step": 2250
    },
    {
      "epoch": 2.1374317588416805,
      "grad_norm": 0.05052879452705383,
      "learning_rate": 0.00012256777423779851,
      "loss": 0.267,
      "step": 2251
    },
    {
      "epoch": 2.1383812010443863,
      "grad_norm": 0.04806723818182945,
      "learning_rate": 0.00012250964019751696,
      "loss": 0.2865,
      "step": 2252
    },
    {
      "epoch": 2.139330643247092,
      "grad_norm": 0.06047017127275467,
      "learning_rate": 0.00012245149814282583,
      "loss": 0.2818,
      "step": 2253
    },
    {
      "epoch": 2.140280085449798,
      "grad_norm": 0.04983370006084442,
      "learning_rate": 0.0001223933480944262,
      "loss": 0.2511,
      "step": 2254
    },
    {
      "epoch": 2.141229527652504,
      "grad_norm": 0.05417335778474808,
      "learning_rate": 0.00012233519007302202,
      "loss": 0.2737,
      "step": 2255
    },
    {
      "epoch": 2.14217896985521,
      "grad_norm": 0.06751585006713867,
      "learning_rate": 0.00012227702409932001,
      "loss": 0.2818,
      "step": 2256
    },
    {
      "epoch": 2.143128412057916,
      "grad_norm": 0.13051903247833252,
      "learning_rate": 0.00012221885019402984,
      "loss": 0.2715,
      "step": 2257
    },
    {
      "epoch": 2.144077854260622,
      "grad_norm": 0.05568401888012886,
      "learning_rate": 0.000122160668377864,
      "loss": 0.2831,
      "step": 2258
    },
    {
      "epoch": 2.145027296463328,
      "grad_norm": 0.060945216566324234,
      "learning_rate": 0.00012210247867153765,
      "loss": 0.2827,
      "step": 2259
    },
    {
      "epoch": 2.145976738666034,
      "grad_norm": 0.0824795663356781,
      "learning_rate": 0.00012204428109576887,
      "loss": 0.28,
      "step": 2260
    },
    {
      "epoch": 2.1469261808687397,
      "grad_norm": 0.07995422184467316,
      "learning_rate": 0.00012198607567127854,
      "loss": 0.2945,
      "step": 2261
    },
    {
      "epoch": 2.1478756230714455,
      "grad_norm": 0.047262005507946014,
      "learning_rate": 0.00012192786241879033,
      "loss": 0.2664,
      "step": 2262
    },
    {
      "epoch": 2.1488250652741514,
      "grad_norm": 0.04705559089779854,
      "learning_rate": 0.0001218696413590307,
      "loss": 0.2495,
      "step": 2263
    },
    {
      "epoch": 2.1497745074768573,
      "grad_norm": 0.09175438433885574,
      "learning_rate": 0.00012181141251272885,
      "loss": 0.2982,
      "step": 2264
    },
    {
      "epoch": 2.150723949679563,
      "grad_norm": 0.07894378155469894,
      "learning_rate": 0.00012175317590061674,
      "loss": 0.2846,
      "step": 2265
    },
    {
      "epoch": 2.151673391882269,
      "grad_norm": 0.08106362819671631,
      "learning_rate": 0.00012169493154342922,
      "loss": 0.2836,
      "step": 2266
    },
    {
      "epoch": 2.152622834084975,
      "grad_norm": 0.04687848687171936,
      "learning_rate": 0.00012163667946190376,
      "loss": 0.2573,
      "step": 2267
    },
    {
      "epoch": 2.153572276287681,
      "grad_norm": 0.05097149685025215,
      "learning_rate": 0.00012157841967678063,
      "loss": 0.2633,
      "step": 2268
    },
    {
      "epoch": 2.154521718490387,
      "grad_norm": 0.056790512055158615,
      "learning_rate": 0.00012152015220880284,
      "loss": 0.2841,
      "step": 2269
    },
    {
      "epoch": 2.155471160693093,
      "grad_norm": 0.058457743376493454,
      "learning_rate": 0.00012146187707871617,
      "loss": 0.2911,
      "step": 2270
    },
    {
      "epoch": 2.156420602895799,
      "grad_norm": 0.04693342000246048,
      "learning_rate": 0.00012140359430726906,
      "loss": 0.2573,
      "step": 2271
    },
    {
      "epoch": 2.1573700450985047,
      "grad_norm": 0.04494727402925491,
      "learning_rate": 0.00012134530391521275,
      "loss": 0.261,
      "step": 2272
    },
    {
      "epoch": 2.1583194873012106,
      "grad_norm": 0.056293174624443054,
      "learning_rate": 0.00012128700592330114,
      "loss": 0.2954,
      "step": 2273
    },
    {
      "epoch": 2.1592689295039165,
      "grad_norm": 0.04697936400771141,
      "learning_rate": 0.0001212287003522908,
      "loss": 0.2672,
      "step": 2274
    },
    {
      "epoch": 2.1602183717066223,
      "grad_norm": 0.04936970770359039,
      "learning_rate": 0.0001211703872229411,
      "loss": 0.2756,
      "step": 2275
    },
    {
      "epoch": 2.161167813909328,
      "grad_norm": 0.1244848370552063,
      "learning_rate": 0.000121112066556014,
      "loss": 0.2897,
      "step": 2276
    },
    {
      "epoch": 2.162117256112034,
      "grad_norm": 0.04847177118062973,
      "learning_rate": 0.00012105373837227425,
      "loss": 0.276,
      "step": 2277
    },
    {
      "epoch": 2.16306669831474,
      "grad_norm": 0.053024183958768845,
      "learning_rate": 0.00012099540269248917,
      "loss": 0.2755,
      "step": 2278
    },
    {
      "epoch": 2.164016140517446,
      "grad_norm": 0.07293348014354706,
      "learning_rate": 0.0001209370595374288,
      "loss": 0.272,
      "step": 2279
    },
    {
      "epoch": 2.1649655827201517,
      "grad_norm": 0.044251903891563416,
      "learning_rate": 0.00012087870892786588,
      "loss": 0.2546,
      "step": 2280
    },
    {
      "epoch": 2.1659150249228576,
      "grad_norm": 0.0693570151925087,
      "learning_rate": 0.00012082035088457573,
      "loss": 0.2901,
      "step": 2281
    },
    {
      "epoch": 2.166864467125564,
      "grad_norm": 0.05188895761966705,
      "learning_rate": 0.00012076198542833632,
      "loss": 0.2698,
      "step": 2282
    },
    {
      "epoch": 2.16781390932827,
      "grad_norm": 0.07463851571083069,
      "learning_rate": 0.00012070361257992832,
      "loss": 0.2682,
      "step": 2283
    },
    {
      "epoch": 2.1687633515309757,
      "grad_norm": 0.05541319400072098,
      "learning_rate": 0.00012064523236013498,
      "loss": 0.2913,
      "step": 2284
    },
    {
      "epoch": 2.1697127937336815,
      "grad_norm": 0.059766124933958054,
      "learning_rate": 0.00012058684478974224,
      "loss": 0.2764,
      "step": 2285
    },
    {
      "epoch": 2.1706622359363874,
      "grad_norm": 0.06126366928219795,
      "learning_rate": 0.0001205284498895386,
      "loss": 0.2891,
      "step": 2286
    },
    {
      "epoch": 2.1716116781390933,
      "grad_norm": 0.04642792418599129,
      "learning_rate": 0.00012047004768031513,
      "loss": 0.2599,
      "step": 2287
    },
    {
      "epoch": 2.172561120341799,
      "grad_norm": 0.07450695335865021,
      "learning_rate": 0.00012041163818286559,
      "loss": 0.2758,
      "step": 2288
    },
    {
      "epoch": 2.173510562544505,
      "grad_norm": 0.05021700635552406,
      "learning_rate": 0.00012035322141798629,
      "loss": 0.2677,
      "step": 2289
    },
    {
      "epoch": 2.174460004747211,
      "grad_norm": 0.03968047723174095,
      "learning_rate": 0.00012029479740647613,
      "loss": 0.2594,
      "step": 2290
    },
    {
      "epoch": 2.175409446949917,
      "grad_norm": 0.048740074038505554,
      "learning_rate": 0.00012023636616913663,
      "loss": 0.2715,
      "step": 2291
    },
    {
      "epoch": 2.1763588891526227,
      "grad_norm": 0.042600784450769424,
      "learning_rate": 0.00012017792772677177,
      "loss": 0.2652,
      "step": 2292
    },
    {
      "epoch": 2.177308331355329,
      "grad_norm": 0.05429399758577347,
      "learning_rate": 0.00012011948210018827,
      "loss": 0.2842,
      "step": 2293
    },
    {
      "epoch": 2.178257773558035,
      "grad_norm": 0.04825136438012123,
      "learning_rate": 0.00012006102931019522,
      "loss": 0.2652,
      "step": 2294
    },
    {
      "epoch": 2.1792072157607407,
      "grad_norm": 0.06344690918922424,
      "learning_rate": 0.00012000256937760445,
      "loss": 0.2872,
      "step": 2295
    },
    {
      "epoch": 2.1801566579634466,
      "grad_norm": 0.04262791574001312,
      "learning_rate": 0.00011994410232323017,
      "loss": 0.2635,
      "step": 2296
    },
    {
      "epoch": 2.1811061001661525,
      "grad_norm": 0.046105436980724335,
      "learning_rate": 0.00011988562816788921,
      "loss": 0.2673,
      "step": 2297
    },
    {
      "epoch": 2.1820555423688583,
      "grad_norm": 0.04826546832919121,
      "learning_rate": 0.00011982714693240089,
      "loss": 0.2731,
      "step": 2298
    },
    {
      "epoch": 2.1830049845715642,
      "grad_norm": 0.04453393071889877,
      "learning_rate": 0.00011976865863758708,
      "loss": 0.2595,
      "step": 2299
    },
    {
      "epoch": 2.18395442677427,
      "grad_norm": 0.04711470380425453,
      "learning_rate": 0.00011971016330427215,
      "loss": 0.2655,
      "step": 2300
    },
    {
      "epoch": 2.184903868976976,
      "grad_norm": 0.05461464077234268,
      "learning_rate": 0.00011965166095328301,
      "loss": 0.267,
      "step": 2301
    },
    {
      "epoch": 2.185853311179682,
      "grad_norm": 0.05645698308944702,
      "learning_rate": 0.00011959315160544901,
      "loss": 0.2922,
      "step": 2302
    },
    {
      "epoch": 2.1868027533823877,
      "grad_norm": 0.04374222829937935,
      "learning_rate": 0.00011953463528160202,
      "loss": 0.2592,
      "step": 2303
    },
    {
      "epoch": 2.1877521955850936,
      "grad_norm": 0.0473443977534771,
      "learning_rate": 0.0001194761120025764,
      "loss": 0.2613,
      "step": 2304
    },
    {
      "epoch": 2.1887016377877995,
      "grad_norm": 0.0495041161775589,
      "learning_rate": 0.00011941758178920898,
      "loss": 0.2672,
      "step": 2305
    },
    {
      "epoch": 2.1896510799905053,
      "grad_norm": 0.048812057822942734,
      "learning_rate": 0.00011935904466233907,
      "loss": 0.2771,
      "step": 2306
    },
    {
      "epoch": 2.1906005221932117,
      "grad_norm": 0.06169416755437851,
      "learning_rate": 0.00011930050064280838,
      "loss": 0.2887,
      "step": 2307
    },
    {
      "epoch": 2.1915499643959175,
      "grad_norm": 0.0863117203116417,
      "learning_rate": 0.00011924194975146117,
      "loss": 0.2709,
      "step": 2308
    },
    {
      "epoch": 2.1924994065986234,
      "grad_norm": 0.05131746456027031,
      "learning_rate": 0.00011918339200914407,
      "loss": 0.27,
      "step": 2309
    },
    {
      "epoch": 2.1934488488013293,
      "grad_norm": 0.0597839280962944,
      "learning_rate": 0.00011912482743670624,
      "loss": 0.2834,
      "step": 2310
    },
    {
      "epoch": 2.194398291004035,
      "grad_norm": 0.044621869921684265,
      "learning_rate": 0.00011906625605499915,
      "loss": 0.2526,
      "step": 2311
    },
    {
      "epoch": 2.195347733206741,
      "grad_norm": 0.04936400428414345,
      "learning_rate": 0.00011900767788487674,
      "loss": 0.2632,
      "step": 2312
    },
    {
      "epoch": 2.196297175409447,
      "grad_norm": 0.0530143566429615,
      "learning_rate": 0.00011894909294719547,
      "loss": 0.278,
      "step": 2313
    },
    {
      "epoch": 2.197246617612153,
      "grad_norm": 0.053086057305336,
      "learning_rate": 0.00011889050126281405,
      "loss": 0.2652,
      "step": 2314
    },
    {
      "epoch": 2.1981960598148587,
      "grad_norm": 0.047230660915374756,
      "learning_rate": 0.00011883190285259369,
      "loss": 0.2574,
      "step": 2315
    },
    {
      "epoch": 2.1991455020175645,
      "grad_norm": 0.07178585976362228,
      "learning_rate": 0.00011877329773739794,
      "loss": 0.2919,
      "step": 2316
    },
    {
      "epoch": 2.2000949442202704,
      "grad_norm": 0.04587990790605545,
      "learning_rate": 0.0001187146859380928,
      "loss": 0.2559,
      "step": 2317
    },
    {
      "epoch": 2.2010443864229767,
      "grad_norm": 0.050480857491493225,
      "learning_rate": 0.00011865606747554663,
      "loss": 0.259,
      "step": 2318
    },
    {
      "epoch": 2.2019938286256826,
      "grad_norm": 0.06074557080864906,
      "learning_rate": 0.00011859744237063011,
      "loss": 0.2819,
      "step": 2319
    },
    {
      "epoch": 2.2029432708283885,
      "grad_norm": 0.09227973967790604,
      "learning_rate": 0.00011853881064421634,
      "loss": 0.2809,
      "step": 2320
    },
    {
      "epoch": 2.2038927130310944,
      "grad_norm": 0.05148273706436157,
      "learning_rate": 0.00011848017231718076,
      "loss": 0.2523,
      "step": 2321
    },
    {
      "epoch": 2.2048421552338002,
      "grad_norm": 0.06572956591844559,
      "learning_rate": 0.00011842152741040116,
      "loss": 0.286,
      "step": 2322
    },
    {
      "epoch": 2.205791597436506,
      "grad_norm": 0.05026514083147049,
      "learning_rate": 0.0001183628759447577,
      "loss": 0.2789,
      "step": 2323
    },
    {
      "epoch": 2.206741039639212,
      "grad_norm": 0.04762961342930794,
      "learning_rate": 0.0001183042179411328,
      "loss": 0.2495,
      "step": 2324
    },
    {
      "epoch": 2.207690481841918,
      "grad_norm": 0.05108138173818588,
      "learning_rate": 0.00011824555342041128,
      "loss": 0.2777,
      "step": 2325
    },
    {
      "epoch": 2.2086399240446237,
      "grad_norm": 0.06025318801403046,
      "learning_rate": 0.00011818688240348024,
      "loss": 0.2865,
      "step": 2326
    },
    {
      "epoch": 2.2095893662473296,
      "grad_norm": 0.04540006071329117,
      "learning_rate": 0.00011812820491122918,
      "loss": 0.267,
      "step": 2327
    },
    {
      "epoch": 2.2105388084500355,
      "grad_norm": 0.056342415511608124,
      "learning_rate": 0.00011806952096454975,
      "loss": 0.2905,
      "step": 2328
    },
    {
      "epoch": 2.2114882506527413,
      "grad_norm": 0.054570119827985764,
      "learning_rate": 0.00011801083058433607,
      "loss": 0.2711,
      "step": 2329
    },
    {
      "epoch": 2.2124376928554472,
      "grad_norm": 0.04845500364899635,
      "learning_rate": 0.00011795213379148436,
      "loss": 0.2776,
      "step": 2330
    },
    {
      "epoch": 2.213387135058153,
      "grad_norm": 0.043802276253700256,
      "learning_rate": 0.00011789343060689329,
      "loss": 0.2641,
      "step": 2331
    },
    {
      "epoch": 2.2143365772608594,
      "grad_norm": 0.04750855267047882,
      "learning_rate": 0.00011783472105146376,
      "loss": 0.2687,
      "step": 2332
    },
    {
      "epoch": 2.2152860194635653,
      "grad_norm": 0.04865497350692749,
      "learning_rate": 0.00011777600514609886,
      "loss": 0.26,
      "step": 2333
    },
    {
      "epoch": 2.216235461666271,
      "grad_norm": 0.04385308921337128,
      "learning_rate": 0.00011771728291170407,
      "loss": 0.264,
      "step": 2334
    },
    {
      "epoch": 2.217184903868977,
      "grad_norm": 0.059330157935619354,
      "learning_rate": 0.00011765855436918701,
      "loss": 0.2792,
      "step": 2335
    },
    {
      "epoch": 2.218134346071683,
      "grad_norm": 0.05777224153280258,
      "learning_rate": 0.0001175998195394576,
      "loss": 0.2746,
      "step": 2336
    },
    {
      "epoch": 2.219083788274389,
      "grad_norm": 0.051478311419487,
      "learning_rate": 0.00011754107844342803,
      "loss": 0.2696,
      "step": 2337
    },
    {
      "epoch": 2.2200332304770947,
      "grad_norm": 0.046342022716999054,
      "learning_rate": 0.00011748233110201265,
      "loss": 0.2468,
      "step": 2338
    },
    {
      "epoch": 2.2209826726798005,
      "grad_norm": 0.04625660553574562,
      "learning_rate": 0.00011742357753612803,
      "loss": 0.2608,
      "step": 2339
    },
    {
      "epoch": 2.2219321148825064,
      "grad_norm": 0.06775734573602676,
      "learning_rate": 0.00011736481776669306,
      "loss": 0.263,
      "step": 2340
    },
    {
      "epoch": 2.2228815570852123,
      "grad_norm": 0.04909680038690567,
      "learning_rate": 0.00011730605181462871,
      "loss": 0.2515,
      "step": 2341
    },
    {
      "epoch": 2.223830999287918,
      "grad_norm": 0.06404335796833038,
      "learning_rate": 0.00011724727970085824,
      "loss": 0.2906,
      "step": 2342
    },
    {
      "epoch": 2.2247804414906245,
      "grad_norm": 0.04836789518594742,
      "learning_rate": 0.00011718850144630709,
      "loss": 0.2721,
      "step": 2343
    },
    {
      "epoch": 2.2257298836933304,
      "grad_norm": 0.041848376393318176,
      "learning_rate": 0.00011712971707190283,
      "loss": 0.2588,
      "step": 2344
    },
    {
      "epoch": 2.2266793258960362,
      "grad_norm": 0.05488808453083038,
      "learning_rate": 0.00011707092659857531,
      "loss": 0.2845,
      "step": 2345
    },
    {
      "epoch": 2.227628768098742,
      "grad_norm": 0.04626215994358063,
      "learning_rate": 0.00011701213004725644,
      "loss": 0.24,
      "step": 2346
    },
    {
      "epoch": 2.228578210301448,
      "grad_norm": 0.04804147407412529,
      "learning_rate": 0.00011695332743888036,
      "loss": 0.2764,
      "step": 2347
    },
    {
      "epoch": 2.229527652504154,
      "grad_norm": 0.046024907380342484,
      "learning_rate": 0.00011689451879438338,
      "loss": 0.2643,
      "step": 2348
    },
    {
      "epoch": 2.2304770947068597,
      "grad_norm": 0.055915262550115585,
      "learning_rate": 0.00011683570413470383,
      "loss": 0.284,
      "step": 2349
    },
    {
      "epoch": 2.2314265369095656,
      "grad_norm": 0.04396609589457512,
      "learning_rate": 0.00011677688348078244,
      "loss": 0.262,
      "step": 2350
    },
    {
      "epoch": 2.2323759791122715,
      "grad_norm": 0.07269623130559921,
      "learning_rate": 0.00011671805685356183,
      "loss": 0.3068,
      "step": 2351
    },
    {
      "epoch": 2.2333254213149774,
      "grad_norm": 0.04890700802206993,
      "learning_rate": 0.00011665922427398683,
      "loss": 0.2723,
      "step": 2352
    },
    {
      "epoch": 2.2342748635176832,
      "grad_norm": 0.0517299585044384,
      "learning_rate": 0.00011660038576300443,
      "loss": 0.2776,
      "step": 2353
    },
    {
      "epoch": 2.235224305720389,
      "grad_norm": 0.07574246823787689,
      "learning_rate": 0.00011654154134156364,
      "loss": 0.276,
      "step": 2354
    },
    {
      "epoch": 2.236173747923095,
      "grad_norm": 0.043694209307432175,
      "learning_rate": 0.00011648269103061566,
      "loss": 0.2651,
      "step": 2355
    },
    {
      "epoch": 2.2371231901258013,
      "grad_norm": 0.04978908598423004,
      "learning_rate": 0.00011642383485111378,
      "loss": 0.2474,
      "step": 2356
    },
    {
      "epoch": 2.238072632328507,
      "grad_norm": 0.05337736755609512,
      "learning_rate": 0.00011636497282401331,
      "loss": 0.2703,
      "step": 2357
    },
    {
      "epoch": 2.239022074531213,
      "grad_norm": 0.05080539733171463,
      "learning_rate": 0.00011630610497027174,
      "loss": 0.2715,
      "step": 2358
    },
    {
      "epoch": 2.239971516733919,
      "grad_norm": 0.05404691770672798,
      "learning_rate": 0.00011624723131084854,
      "loss": 0.2808,
      "step": 2359
    },
    {
      "epoch": 2.240920958936625,
      "grad_norm": 0.05867183208465576,
      "learning_rate": 0.00011618835186670531,
      "loss": 0.2622,
      "step": 2360
    },
    {
      "epoch": 2.2418704011393307,
      "grad_norm": 0.0530124269425869,
      "learning_rate": 0.00011612946665880571,
      "loss": 0.2747,
      "step": 2361
    },
    {
      "epoch": 2.2428198433420365,
      "grad_norm": 0.056134361773729324,
      "learning_rate": 0.0001160705757081154,
      "loss": 0.276,
      "step": 2362
    },
    {
      "epoch": 2.2437692855447424,
      "grad_norm": 0.041885942220687866,
      "learning_rate": 0.00011601167903560208,
      "loss": 0.2659,
      "step": 2363
    },
    {
      "epoch": 2.2447187277474483,
      "grad_norm": 0.04484890028834343,
      "learning_rate": 0.00011595277666223561,
      "loss": 0.2608,
      "step": 2364
    },
    {
      "epoch": 2.245668169950154,
      "grad_norm": 0.06328167766332626,
      "learning_rate": 0.00011589386860898772,
      "loss": 0.2881,
      "step": 2365
    },
    {
      "epoch": 2.24661761215286,
      "grad_norm": 0.055244866758584976,
      "learning_rate": 0.00011583495489683229,
      "loss": 0.2801,
      "step": 2366
    },
    {
      "epoch": 2.247567054355566,
      "grad_norm": 0.04593993350863457,
      "learning_rate": 0.00011577603554674514,
      "loss": 0.2721,
      "step": 2367
    },
    {
      "epoch": 2.2485164965582722,
      "grad_norm": 0.0732899010181427,
      "learning_rate": 0.00011571711057970409,
      "loss": 0.2729,
      "step": 2368
    },
    {
      "epoch": 2.249465938760978,
      "grad_norm": 0.05230560898780823,
      "learning_rate": 0.00011565818001668904,
      "loss": 0.2744,
      "step": 2369
    },
    {
      "epoch": 2.250415380963684,
      "grad_norm": 0.0604710727930069,
      "learning_rate": 0.00011559924387868179,
      "loss": 0.2783,
      "step": 2370
    },
    {
      "epoch": 2.25136482316639,
      "grad_norm": 0.04672817140817642,
      "learning_rate": 0.00011554030218666619,
      "loss": 0.2673,
      "step": 2371
    },
    {
      "epoch": 2.2523142653690957,
      "grad_norm": 0.05419154092669487,
      "learning_rate": 0.00011548135496162799,
      "loss": 0.2692,
      "step": 2372
    },
    {
      "epoch": 2.2532637075718016,
      "grad_norm": 0.07617273926734924,
      "learning_rate": 0.00011542240222455502,
      "loss": 0.2545,
      "step": 2373
    },
    {
      "epoch": 2.2542131497745075,
      "grad_norm": 0.04525422304868698,
      "learning_rate": 0.00011536344399643701,
      "loss": 0.2746,
      "step": 2374
    },
    {
      "epoch": 2.2551625919772134,
      "grad_norm": 0.059686966240406036,
      "learning_rate": 0.00011530448029826566,
      "loss": 0.2917,
      "step": 2375
    },
    {
      "epoch": 2.2561120341799192,
      "grad_norm": 0.04423639923334122,
      "learning_rate": 0.00011524551115103454,
      "loss": 0.2499,
      "step": 2376
    },
    {
      "epoch": 2.257061476382625,
      "grad_norm": 0.04687541723251343,
      "learning_rate": 0.0001151865365757393,
      "loss": 0.2728,
      "step": 2377
    },
    {
      "epoch": 2.258010918585331,
      "grad_norm": 0.0430920235812664,
      "learning_rate": 0.00011512755659337742,
      "loss": 0.266,
      "step": 2378
    },
    {
      "epoch": 2.258960360788037,
      "grad_norm": 0.04343993961811066,
      "learning_rate": 0.00011506857122494831,
      "loss": 0.2517,
      "step": 2379
    },
    {
      "epoch": 2.2599098029907427,
      "grad_norm": 0.04559265822172165,
      "learning_rate": 0.0001150095804914534,
      "loss": 0.2614,
      "step": 2380
    },
    {
      "epoch": 2.2608592451934486,
      "grad_norm": 0.0903773307800293,
      "learning_rate": 0.00011495058441389586,
      "loss": 0.2708,
      "step": 2381
    },
    {
      "epoch": 2.261808687396155,
      "grad_norm": 0.06456193327903748,
      "learning_rate": 0.00011489158301328092,
      "loss": 0.2749,
      "step": 2382
    },
    {
      "epoch": 2.262758129598861,
      "grad_norm": 0.049897704273462296,
      "learning_rate": 0.00011483257631061562,
      "loss": 0.2732,
      "step": 2383
    },
    {
      "epoch": 2.2637075718015667,
      "grad_norm": 0.054795295000076294,
      "learning_rate": 0.00011477356432690891,
      "loss": 0.2798,
      "step": 2384
    },
    {
      "epoch": 2.2646570140042726,
      "grad_norm": 0.06840714812278748,
      "learning_rate": 0.00011471454708317162,
      "loss": 0.2906,
      "step": 2385
    },
    {
      "epoch": 2.2656064562069784,
      "grad_norm": 0.05174139142036438,
      "learning_rate": 0.00011465552460041644,
      "loss": 0.2689,
      "step": 2386
    },
    {
      "epoch": 2.2665558984096843,
      "grad_norm": 0.1207653358578682,
      "learning_rate": 0.00011459649689965797,
      "loss": 0.2827,
      "step": 2387
    },
    {
      "epoch": 2.26750534061239,
      "grad_norm": 0.0508701354265213,
      "learning_rate": 0.00011453746400191261,
      "loss": 0.2607,
      "step": 2388
    },
    {
      "epoch": 2.268454782815096,
      "grad_norm": 0.05010244995355606,
      "learning_rate": 0.00011447842592819866,
      "loss": 0.2688,
      "step": 2389
    },
    {
      "epoch": 2.269404225017802,
      "grad_norm": 0.044035494327545166,
      "learning_rate": 0.00011441938269953618,
      "loss": 0.274,
      "step": 2390
    },
    {
      "epoch": 2.270353667220508,
      "grad_norm": 0.04500316083431244,
      "learning_rate": 0.00011436033433694718,
      "loss": 0.2707,
      "step": 2391
    },
    {
      "epoch": 2.2713031094232137,
      "grad_norm": 0.04644524306058884,
      "learning_rate": 0.00011430128086145542,
      "loss": 0.2616,
      "step": 2392
    },
    {
      "epoch": 2.27225255162592,
      "grad_norm": 0.04804198071360588,
      "learning_rate": 0.00011424222229408652,
      "loss": 0.2726,
      "step": 2393
    },
    {
      "epoch": 2.273201993828626,
      "grad_norm": 0.04829863831400871,
      "learning_rate": 0.00011418315865586788,
      "loss": 0.2732,
      "step": 2394
    },
    {
      "epoch": 2.2741514360313317,
      "grad_norm": 0.0522797591984272,
      "learning_rate": 0.00011412408996782871,
      "loss": 0.2761,
      "step": 2395
    },
    {
      "epoch": 2.2751008782340376,
      "grad_norm": 0.04852959141135216,
      "learning_rate": 0.00011406501625100006,
      "loss": 0.277,
      "step": 2396
    },
    {
      "epoch": 2.2760503204367435,
      "grad_norm": 0.04573357105255127,
      "learning_rate": 0.00011400593752641473,
      "loss": 0.266,
      "step": 2397
    },
    {
      "epoch": 2.2769997626394494,
      "grad_norm": 0.06536805629730225,
      "learning_rate": 0.00011394685381510726,
      "loss": 0.289,
      "step": 2398
    },
    {
      "epoch": 2.2779492048421552,
      "grad_norm": 0.04507270082831383,
      "learning_rate": 0.0001138877651381141,
      "loss": 0.2629,
      "step": 2399
    },
    {
      "epoch": 2.278898647044861,
      "grad_norm": 0.046577729284763336,
      "learning_rate": 0.00011382867151647332,
      "loss": 0.2701,
      "step": 2400
    },
    {
      "epoch": 2.279848089247567,
      "grad_norm": 0.04779546707868576,
      "learning_rate": 0.00011376957297122486,
      "loss": 0.2628,
      "step": 2401
    },
    {
      "epoch": 2.280797531450273,
      "grad_norm": 0.04618161544203758,
      "learning_rate": 0.00011371046952341034,
      "loss": 0.2676,
      "step": 2402
    },
    {
      "epoch": 2.2817469736529787,
      "grad_norm": 0.04306333512067795,
      "learning_rate": 0.00011365136119407319,
      "loss": 0.2658,
      "step": 2403
    },
    {
      "epoch": 2.2826964158556846,
      "grad_norm": 0.044828448444604874,
      "learning_rate": 0.00011359224800425849,
      "loss": 0.2584,
      "step": 2404
    },
    {
      "epoch": 2.2836458580583905,
      "grad_norm": 0.045854486525058746,
      "learning_rate": 0.00011353312997501313,
      "loss": 0.258,
      "step": 2405
    },
    {
      "epoch": 2.2845953002610964,
      "grad_norm": 0.05045664310455322,
      "learning_rate": 0.00011347400712738567,
      "loss": 0.2658,
      "step": 2406
    },
    {
      "epoch": 2.2855447424638027,
      "grad_norm": 0.04601123183965683,
      "learning_rate": 0.00011341487948242648,
      "loss": 0.2602,
      "step": 2407
    },
    {
      "epoch": 2.2864941846665086,
      "grad_norm": 0.055264122784137726,
      "learning_rate": 0.00011335574706118754,
      "loss": 0.2856,
      "step": 2408
    },
    {
      "epoch": 2.2874436268692144,
      "grad_norm": 0.054593365639448166,
      "learning_rate": 0.00011329660988472253,
      "loss": 0.2663,
      "step": 2409
    },
    {
      "epoch": 2.2883930690719203,
      "grad_norm": 0.04195168986916542,
      "learning_rate": 0.00011323746797408688,
      "loss": 0.2623,
      "step": 2410
    },
    {
      "epoch": 2.289342511274626,
      "grad_norm": 0.04949670657515526,
      "learning_rate": 0.00011317832135033766,
      "loss": 0.2635,
      "step": 2411
    },
    {
      "epoch": 2.290291953477332,
      "grad_norm": 0.0510125458240509,
      "learning_rate": 0.00011311917003453365,
      "loss": 0.2701,
      "step": 2412
    },
    {
      "epoch": 2.291241395680038,
      "grad_norm": 0.04839101806282997,
      "learning_rate": 0.0001130600140477353,
      "loss": 0.2687,
      "step": 2413
    },
    {
      "epoch": 2.292190837882744,
      "grad_norm": 0.04288674145936966,
      "learning_rate": 0.00011300085341100466,
      "loss": 0.268,
      "step": 2414
    },
    {
      "epoch": 2.2931402800854497,
      "grad_norm": 0.04649089276790619,
      "learning_rate": 0.00011294168814540553,
      "loss": 0.2649,
      "step": 2415
    },
    {
      "epoch": 2.2940897222881556,
      "grad_norm": 0.03943301737308502,
      "learning_rate": 0.00011288251827200334,
      "loss": 0.2675,
      "step": 2416
    },
    {
      "epoch": 2.2950391644908614,
      "grad_norm": 0.06496595591306686,
      "learning_rate": 0.0001128233438118651,
      "loss": 0.265,
      "step": 2417
    },
    {
      "epoch": 2.2959886066935677,
      "grad_norm": 0.05821962654590607,
      "learning_rate": 0.00011276416478605949,
      "loss": 0.2705,
      "step": 2418
    },
    {
      "epoch": 2.2969380488962736,
      "grad_norm": 0.041703104972839355,
      "learning_rate": 0.00011270498121565678,
      "loss": 0.2568,
      "step": 2419
    },
    {
      "epoch": 2.2978874910989795,
      "grad_norm": 0.053453344851732254,
      "learning_rate": 0.00011264579312172895,
      "loss": 0.2734,
      "step": 2420
    },
    {
      "epoch": 2.2988369333016854,
      "grad_norm": 0.04353068396449089,
      "learning_rate": 0.00011258660052534951,
      "loss": 0.2633,
      "step": 2421
    },
    {
      "epoch": 2.2997863755043912,
      "grad_norm": 0.04292818903923035,
      "learning_rate": 0.00011252740344759356,
      "loss": 0.2594,
      "step": 2422
    },
    {
      "epoch": 2.300735817707097,
      "grad_norm": 0.05199714004993439,
      "learning_rate": 0.00011246820190953791,
      "loss": 0.2703,
      "step": 2423
    },
    {
      "epoch": 2.301685259909803,
      "grad_norm": 0.04548816755414009,
      "learning_rate": 0.0001124089959322608,
      "loss": 0.2739,
      "step": 2424
    },
    {
      "epoch": 2.302634702112509,
      "grad_norm": 0.03984839841723442,
      "learning_rate": 0.00011234978553684219,
      "loss": 0.2573,
      "step": 2425
    },
    {
      "epoch": 2.3035841443152147,
      "grad_norm": 0.0392242856323719,
      "learning_rate": 0.00011229057074436351,
      "loss": 0.2595,
      "step": 2426
    },
    {
      "epoch": 2.3045335865179206,
      "grad_norm": 0.03876666724681854,
      "learning_rate": 0.00011223135157590783,
      "loss": 0.2577,
      "step": 2427
    },
    {
      "epoch": 2.3054830287206265,
      "grad_norm": 0.051638517528772354,
      "learning_rate": 0.00011217212805255968,
      "loss": 0.279,
      "step": 2428
    },
    {
      "epoch": 2.3064324709233324,
      "grad_norm": 0.04209835082292557,
      "learning_rate": 0.0001121129001954053,
      "loss": 0.2604,
      "step": 2429
    },
    {
      "epoch": 2.3073819131260382,
      "grad_norm": 0.05616452917456627,
      "learning_rate": 0.0001120536680255323,
      "loss": 0.2774,
      "step": 2430
    },
    {
      "epoch": 2.308331355328744,
      "grad_norm": 0.04177265986800194,
      "learning_rate": 0.00011199443156402998,
      "loss": 0.2616,
      "step": 2431
    },
    {
      "epoch": 2.3092807975314504,
      "grad_norm": 0.04343918338418007,
      "learning_rate": 0.00011193519083198905,
      "loss": 0.269,
      "step": 2432
    },
    {
      "epoch": 2.3102302397341563,
      "grad_norm": 0.06559456884860992,
      "learning_rate": 0.00011187594585050174,
      "loss": 0.2846,
      "step": 2433
    },
    {
      "epoch": 2.311179681936862,
      "grad_norm": 0.045688070356845856,
      "learning_rate": 0.00011181669664066192,
      "loss": 0.2586,
      "step": 2434
    },
    {
      "epoch": 2.312129124139568,
      "grad_norm": 0.09138458222150803,
      "learning_rate": 0.00011175744322356487,
      "loss": 0.2793,
      "step": 2435
    },
    {
      "epoch": 2.313078566342274,
      "grad_norm": 0.06139371171593666,
      "learning_rate": 0.00011169818562030733,
      "loss": 0.2962,
      "step": 2436
    },
    {
      "epoch": 2.31402800854498,
      "grad_norm": 0.043909333646297455,
      "learning_rate": 0.0001116389238519876,
      "loss": 0.2517,
      "step": 2437
    },
    {
      "epoch": 2.3149774507476857,
      "grad_norm": 0.06976691633462906,
      "learning_rate": 0.00011157965793970544,
      "loss": 0.2834,
      "step": 2438
    },
    {
      "epoch": 2.3159268929503916,
      "grad_norm": 0.041745271533727646,
      "learning_rate": 0.00011152038790456211,
      "loss": 0.2596,
      "step": 2439
    },
    {
      "epoch": 2.3168763351530974,
      "grad_norm": 0.03987791761755943,
      "learning_rate": 0.00011146111376766033,
      "loss": 0.2538,
      "step": 2440
    },
    {
      "epoch": 2.3178257773558033,
      "grad_norm": 0.08367100358009338,
      "learning_rate": 0.00011140183555010424,
      "loss": 0.2819,
      "step": 2441
    },
    {
      "epoch": 2.318775219558509,
      "grad_norm": 0.042115338146686554,
      "learning_rate": 0.00011134255327299943,
      "loss": 0.2511,
      "step": 2442
    },
    {
      "epoch": 2.3197246617612155,
      "grad_norm": 0.047765813767910004,
      "learning_rate": 0.00011128326695745301,
      "loss": 0.2543,
      "step": 2443
    },
    {
      "epoch": 2.3206741039639214,
      "grad_norm": 0.0455872118473053,
      "learning_rate": 0.0001112239766245735,
      "loss": 0.2798,
      "step": 2444
    },
    {
      "epoch": 2.3216235461666272,
      "grad_norm": 0.04317116364836693,
      "learning_rate": 0.0001111646822954708,
      "loss": 0.2677,
      "step": 2445
    },
    {
      "epoch": 2.322572988369333,
      "grad_norm": 0.04277396202087402,
      "learning_rate": 0.00011110538399125625,
      "loss": 0.255,
      "step": 2446
    },
    {
      "epoch": 2.323522430572039,
      "grad_norm": 0.046795833855867386,
      "learning_rate": 0.00011104608173304262,
      "loss": 0.2725,
      "step": 2447
    },
    {
      "epoch": 2.324471872774745,
      "grad_norm": 0.04250842332839966,
      "learning_rate": 0.00011098677554194417,
      "loss": 0.2645,
      "step": 2448
    },
    {
      "epoch": 2.3254213149774507,
      "grad_norm": 0.05834075063467026,
      "learning_rate": 0.00011092746543907642,
      "loss": 0.2849,
      "step": 2449
    },
    {
      "epoch": 2.3263707571801566,
      "grad_norm": 0.04197695106267929,
      "learning_rate": 0.00011086815144555633,
      "loss": 0.26,
      "step": 2450
    },
    {
      "epoch": 2.3273201993828625,
      "grad_norm": 0.0852176696062088,
      "learning_rate": 0.00011080883358250225,
      "loss": 0.2636,
      "step": 2451
    },
    {
      "epoch": 2.3282696415855684,
      "grad_norm": 0.04568492993712425,
      "learning_rate": 0.00011074951187103397,
      "loss": 0.2607,
      "step": 2452
    },
    {
      "epoch": 2.3292190837882742,
      "grad_norm": 0.05550101399421692,
      "learning_rate": 0.00011069018633227257,
      "loss": 0.2831,
      "step": 2453
    },
    {
      "epoch": 2.33016852599098,
      "grad_norm": 0.06848349422216415,
      "learning_rate": 0.00011063085698734052,
      "loss": 0.2621,
      "step": 2454
    },
    {
      "epoch": 2.331117968193686,
      "grad_norm": 0.07170405983924866,
      "learning_rate": 0.0001105715238573616,
      "loss": 0.2849,
      "step": 2455
    },
    {
      "epoch": 2.332067410396392,
      "grad_norm": 0.05143957957625389,
      "learning_rate": 0.00011051218696346104,
      "loss": 0.2639,
      "step": 2456
    },
    {
      "epoch": 2.333016852599098,
      "grad_norm": 0.073124960064888,
      "learning_rate": 0.00011045284632676536,
      "loss": 0.2829,
      "step": 2457
    },
    {
      "epoch": 2.333966294801804,
      "grad_norm": 0.08123373240232468,
      "learning_rate": 0.00011039350196840235,
      "loss": 0.273,
      "step": 2458
    },
    {
      "epoch": 2.33491573700451,
      "grad_norm": 0.04808083176612854,
      "learning_rate": 0.00011033415390950121,
      "loss": 0.2654,
      "step": 2459
    },
    {
      "epoch": 2.335865179207216,
      "grad_norm": 0.0888887345790863,
      "learning_rate": 0.00011027480217119244,
      "loss": 0.281,
      "step": 2460
    },
    {
      "epoch": 2.3368146214099217,
      "grad_norm": 0.07269085198640823,
      "learning_rate": 0.00011021544677460778,
      "loss": 0.2757,
      "step": 2461
    },
    {
      "epoch": 2.3377640636126276,
      "grad_norm": 0.06212505325675011,
      "learning_rate": 0.00011015608774088039,
      "loss": 0.2792,
      "step": 2462
    },
    {
      "epoch": 2.3387135058153334,
      "grad_norm": 0.045006170868873596,
      "learning_rate": 0.00011009672509114461,
      "loss": 0.263,
      "step": 2463
    },
    {
      "epoch": 2.3396629480180393,
      "grad_norm": 0.04857483506202698,
      "learning_rate": 0.0001100373588465362,
      "loss": 0.2705,
      "step": 2464
    },
    {
      "epoch": 2.340612390220745,
      "grad_norm": 0.05448603257536888,
      "learning_rate": 0.00010997798902819208,
      "loss": 0.2762,
      "step": 2465
    },
    {
      "epoch": 2.341561832423451,
      "grad_norm": 0.04412839934229851,
      "learning_rate": 0.00010991861565725044,
      "loss": 0.2638,
      "step": 2466
    },
    {
      "epoch": 2.3425112746261574,
      "grad_norm": 0.05423035845160484,
      "learning_rate": 0.00010985923875485083,
      "loss": 0.2739,
      "step": 2467
    },
    {
      "epoch": 2.3434607168288633,
      "grad_norm": 0.0835297629237175,
      "learning_rate": 0.00010979985834213399,
      "loss": 0.3022,
      "step": 2468
    },
    {
      "epoch": 2.344410159031569,
      "grad_norm": 0.061721861362457275,
      "learning_rate": 0.00010974047444024195,
      "loss": 0.2896,
      "step": 2469
    },
    {
      "epoch": 2.345359601234275,
      "grad_norm": 0.04539257660508156,
      "learning_rate": 0.00010968108707031792,
      "loss": 0.2548,
      "step": 2470
    },
    {
      "epoch": 2.346309043436981,
      "grad_norm": 0.07001818716526031,
      "learning_rate": 0.0001096216962535064,
      "loss": 0.306,
      "step": 2471
    },
    {
      "epoch": 2.3472584856396868,
      "grad_norm": 0.04498448595404625,
      "learning_rate": 0.00010956230201095312,
      "loss": 0.2589,
      "step": 2472
    },
    {
      "epoch": 2.3482079278423926,
      "grad_norm": 0.045867983251810074,
      "learning_rate": 0.00010950290436380499,
      "loss": 0.2639,
      "step": 2473
    },
    {
      "epoch": 2.3491573700450985,
      "grad_norm": 0.053373124450445175,
      "learning_rate": 0.00010944350333321014,
      "loss": 0.2703,
      "step": 2474
    },
    {
      "epoch": 2.3501068122478044,
      "grad_norm": 0.04680047556757927,
      "learning_rate": 0.00010938409894031794,
      "loss": 0.2705,
      "step": 2475
    },
    {
      "epoch": 2.3510562544505103,
      "grad_norm": 0.05392623320221901,
      "learning_rate": 0.00010932469120627893,
      "loss": 0.2738,
      "step": 2476
    },
    {
      "epoch": 2.352005696653216,
      "grad_norm": 0.046140771359205246,
      "learning_rate": 0.00010926528015224485,
      "loss": 0.2654,
      "step": 2477
    },
    {
      "epoch": 2.352955138855922,
      "grad_norm": 0.16307492554187775,
      "learning_rate": 0.00010920586579936858,
      "loss": 0.2649,
      "step": 2478
    },
    {
      "epoch": 2.353904581058628,
      "grad_norm": 0.04612316936254501,
      "learning_rate": 0.00010914644816880423,
      "loss": 0.2639,
      "step": 2479
    },
    {
      "epoch": 2.3548540232613338,
      "grad_norm": 0.044796962291002274,
      "learning_rate": 0.00010908702728170705,
      "loss": 0.2622,
      "step": 2480
    },
    {
      "epoch": 2.35580346546404,
      "grad_norm": 0.045174986124038696,
      "learning_rate": 0.00010902760315923352,
      "loss": 0.256,
      "step": 2481
    },
    {
      "epoch": 2.356752907666746,
      "grad_norm": 0.1589425504207611,
      "learning_rate": 0.00010896817582254113,
      "loss": 0.2754,
      "step": 2482
    },
    {
      "epoch": 2.357702349869452,
      "grad_norm": 0.0925155058503151,
      "learning_rate": 0.00010890874529278865,
      "loss": 0.2678,
      "step": 2483
    },
    {
      "epoch": 2.3586517920721577,
      "grad_norm": 0.09574342519044876,
      "learning_rate": 0.00010884931159113586,
      "loss": 0.2534,
      "step": 2484
    },
    {
      "epoch": 2.3596012342748636,
      "grad_norm": 0.050892025232315063,
      "learning_rate": 0.0001087898747387438,
      "loss": 0.2677,
      "step": 2485
    },
    {
      "epoch": 2.3605506764775694,
      "grad_norm": 0.0862436443567276,
      "learning_rate": 0.00010873043475677455,
      "loss": 0.2834,
      "step": 2486
    },
    {
      "epoch": 2.3615001186802753,
      "grad_norm": 0.0461130365729332,
      "learning_rate": 0.0001086709916663913,
      "loss": 0.2673,
      "step": 2487
    },
    {
      "epoch": 2.362449560882981,
      "grad_norm": 0.046872738748788834,
      "learning_rate": 0.00010861154548875843,
      "loss": 0.2693,
      "step": 2488
    },
    {
      "epoch": 2.363399003085687,
      "grad_norm": 0.04682895913720131,
      "learning_rate": 0.0001085520962450413,
      "loss": 0.2593,
      "step": 2489
    },
    {
      "epoch": 2.364348445288393,
      "grad_norm": 0.04921196401119232,
      "learning_rate": 0.00010849264395640649,
      "loss": 0.2593,
      "step": 2490
    },
    {
      "epoch": 2.365297887491099,
      "grad_norm": 0.04984479025006294,
      "learning_rate": 0.00010843318864402154,
      "loss": 0.2619,
      "step": 2491
    },
    {
      "epoch": 2.366247329693805,
      "grad_norm": 0.0450386144220829,
      "learning_rate": 0.00010837373032905515,
      "loss": 0.2517,
      "step": 2492
    },
    {
      "epoch": 2.367196771896511,
      "grad_norm": 0.04759416729211807,
      "learning_rate": 0.00010831426903267706,
      "loss": 0.2665,
      "step": 2493
    },
    {
      "epoch": 2.368146214099217,
      "grad_norm": 0.04298888519406319,
      "learning_rate": 0.00010825480477605805,
      "loss": 0.2587,
      "step": 2494
    },
    {
      "epoch": 2.3690956563019228,
      "grad_norm": 0.047177206724882126,
      "learning_rate": 0.00010819533758037002,
      "loss": 0.2651,
      "step": 2495
    },
    {
      "epoch": 2.3700450985046286,
      "grad_norm": 0.04960494488477707,
      "learning_rate": 0.00010813586746678583,
      "loss": 0.2618,
      "step": 2496
    },
    {
      "epoch": 2.3709945407073345,
      "grad_norm": 0.06197304278612137,
      "learning_rate": 0.00010807639445647948,
      "loss": 0.2797,
      "step": 2497
    },
    {
      "epoch": 2.3719439829100404,
      "grad_norm": 0.053523700684309006,
      "learning_rate": 0.00010801691857062586,
      "loss": 0.2833,
      "step": 2498
    },
    {
      "epoch": 2.3728934251127463,
      "grad_norm": 0.04236749932169914,
      "learning_rate": 0.00010795743983040107,
      "loss": 0.2627,
      "step": 2499
    },
    {
      "epoch": 2.373842867315452,
      "grad_norm": 0.04760702699422836,
      "learning_rate": 0.00010789795825698205,
      "loss": 0.272,
      "step": 2500
    },
    {
      "epoch": 2.373842867315452,
      "eval_loss": 0.2669290602207184,
      "eval_runtime": 37.912,
      "eval_samples_per_second": 2.268,
      "eval_steps_per_second": 2.268,
      "step": 2500
    },
    {
      "epoch": 2.374792309518158,
      "grad_norm": 0.0647454559803009,
      "learning_rate": 0.00010783847387154687,
      "loss": 0.2765,
      "step": 2501
    },
    {
      "epoch": 2.375741751720864,
      "grad_norm": 0.049812283366918564,
      "learning_rate": 0.00010777898669527449,
      "loss": 0.2766,
      "step": 2502
    },
    {
      "epoch": 2.3766911939235698,
      "grad_norm": 0.04538872838020325,
      "learning_rate": 0.00010771949674934499,
      "loss": 0.2674,
      "step": 2503
    },
    {
      "epoch": 2.3776406361262756,
      "grad_norm": 0.05949138104915619,
      "learning_rate": 0.00010766000405493936,
      "loss": 0.28,
      "step": 2504
    },
    {
      "epoch": 2.3785900783289815,
      "grad_norm": 0.04528718441724777,
      "learning_rate": 0.00010760050863323961,
      "loss": 0.2641,
      "step": 2505
    },
    {
      "epoch": 2.379539520531688,
      "grad_norm": 0.0555870421230793,
      "learning_rate": 0.00010754101050542865,
      "loss": 0.275,
      "step": 2506
    },
    {
      "epoch": 2.3804889627343937,
      "grad_norm": 0.04176267981529236,
      "learning_rate": 0.00010748150969269038,
      "loss": 0.2577,
      "step": 2507
    },
    {
      "epoch": 2.3814384049370996,
      "grad_norm": 0.04654645174741745,
      "learning_rate": 0.00010742200621620972,
      "loss": 0.2701,
      "step": 2508
    },
    {
      "epoch": 2.3823878471398054,
      "grad_norm": 0.043700382113456726,
      "learning_rate": 0.00010736250009717247,
      "loss": 0.2638,
      "step": 2509
    },
    {
      "epoch": 2.3833372893425113,
      "grad_norm": 0.040448714047670364,
      "learning_rate": 0.00010730299135676545,
      "loss": 0.2539,
      "step": 2510
    },
    {
      "epoch": 2.384286731545217,
      "grad_norm": 0.05429979786276817,
      "learning_rate": 0.00010724348001617625,
      "loss": 0.2624,
      "step": 2511
    },
    {
      "epoch": 2.385236173747923,
      "grad_norm": 0.048745136708021164,
      "learning_rate": 0.00010718396609659356,
      "loss": 0.2586,
      "step": 2512
    },
    {
      "epoch": 2.386185615950629,
      "grad_norm": 0.053171828389167786,
      "learning_rate": 0.00010712444961920691,
      "loss": 0.263,
      "step": 2513
    },
    {
      "epoch": 2.387135058153335,
      "grad_norm": 0.05061480775475502,
      "learning_rate": 0.00010706493060520678,
      "loss": 0.2672,
      "step": 2514
    },
    {
      "epoch": 2.3880845003560407,
      "grad_norm": 0.04448014125227928,
      "learning_rate": 0.00010700540907578447,
      "loss": 0.2694,
      "step": 2515
    },
    {
      "epoch": 2.3890339425587466,
      "grad_norm": 0.044203128665685654,
      "learning_rate": 0.00010694588505213224,
      "loss": 0.2608,
      "step": 2516
    },
    {
      "epoch": 2.389983384761453,
      "grad_norm": 0.045293718576431274,
      "learning_rate": 0.00010688635855544326,
      "loss": 0.2761,
      "step": 2517
    },
    {
      "epoch": 2.3909328269641588,
      "grad_norm": 0.04204658046364784,
      "learning_rate": 0.00010682682960691153,
      "loss": 0.2577,
      "step": 2518
    },
    {
      "epoch": 2.3918822691668646,
      "grad_norm": 0.043219927698373795,
      "learning_rate": 0.00010676729822773193,
      "loss": 0.2671,
      "step": 2519
    },
    {
      "epoch": 2.3928317113695705,
      "grad_norm": 0.05860576406121254,
      "learning_rate": 0.00010670776443910024,
      "loss": 0.2806,
      "step": 2520
    },
    {
      "epoch": 2.3937811535722764,
      "grad_norm": 0.05496351793408394,
      "learning_rate": 0.00010664822826221309,
      "loss": 0.2816,
      "step": 2521
    },
    {
      "epoch": 2.3947305957749823,
      "grad_norm": 0.05229606106877327,
      "learning_rate": 0.00010658868971826785,
      "loss": 0.2858,
      "step": 2522
    },
    {
      "epoch": 2.395680037977688,
      "grad_norm": 0.04436548799276352,
      "learning_rate": 0.00010652914882846295,
      "loss": 0.2618,
      "step": 2523
    },
    {
      "epoch": 2.396629480180394,
      "grad_norm": 0.04846099019050598,
      "learning_rate": 0.00010646960561399745,
      "loss": 0.2825,
      "step": 2524
    },
    {
      "epoch": 2.3975789223831,
      "grad_norm": 0.04811537638306618,
      "learning_rate": 0.00010641006009607137,
      "loss": 0.2656,
      "step": 2525
    },
    {
      "epoch": 2.3985283645858058,
      "grad_norm": 0.04264158755540848,
      "learning_rate": 0.00010635051229588546,
      "loss": 0.2611,
      "step": 2526
    },
    {
      "epoch": 2.3994778067885116,
      "grad_norm": 0.04192167893052101,
      "learning_rate": 0.00010629096223464137,
      "loss": 0.2544,
      "step": 2527
    },
    {
      "epoch": 2.4004272489912175,
      "grad_norm": 0.042320821434259415,
      "learning_rate": 0.00010623140993354144,
      "loss": 0.2729,
      "step": 2528
    },
    {
      "epoch": 2.4013766911939234,
      "grad_norm": 0.07291635125875473,
      "learning_rate": 0.00010617185541378895,
      "loss": 0.2919,
      "step": 2529
    },
    {
      "epoch": 2.4023261333966293,
      "grad_norm": 0.04448839649558067,
      "learning_rate": 0.00010611229869658785,
      "loss": 0.2625,
      "step": 2530
    },
    {
      "epoch": 2.4032755755993356,
      "grad_norm": 0.05060156062245369,
      "learning_rate": 0.00010605273980314292,
      "loss": 0.2783,
      "step": 2531
    },
    {
      "epoch": 2.4042250178020415,
      "grad_norm": 0.04214916750788689,
      "learning_rate": 0.00010599317875465976,
      "loss": 0.2518,
      "step": 2532
    },
    {
      "epoch": 2.4051744600047473,
      "grad_norm": 0.05565851926803589,
      "learning_rate": 0.00010593361557234462,
      "loss": 0.2541,
      "step": 2533
    },
    {
      "epoch": 2.406123902207453,
      "grad_norm": 0.07710932940244675,
      "learning_rate": 0.00010587405027740465,
      "loss": 0.2522,
      "step": 2534
    },
    {
      "epoch": 2.407073344410159,
      "grad_norm": 0.040221601724624634,
      "learning_rate": 0.00010581448289104758,
      "loss": 0.2545,
      "step": 2535
    },
    {
      "epoch": 2.408022786612865,
      "grad_norm": 0.04807737097144127,
      "learning_rate": 0.0001057549134344821,
      "loss": 0.2642,
      "step": 2536
    },
    {
      "epoch": 2.408972228815571,
      "grad_norm": 0.0500488318502903,
      "learning_rate": 0.00010569534192891748,
      "loss": 0.2764,
      "step": 2537
    },
    {
      "epoch": 2.4099216710182767,
      "grad_norm": 0.04549793899059296,
      "learning_rate": 0.00010563576839556374,
      "loss": 0.2528,
      "step": 2538
    },
    {
      "epoch": 2.4108711132209826,
      "grad_norm": 0.05493444204330444,
      "learning_rate": 0.0001055761928556317,
      "loss": 0.2679,
      "step": 2539
    },
    {
      "epoch": 2.4118205554236885,
      "grad_norm": 0.04553236812353134,
      "learning_rate": 0.00010551661533033275,
      "loss": 0.2621,
      "step": 2540
    },
    {
      "epoch": 2.4127699976263943,
      "grad_norm": 0.11890033632516861,
      "learning_rate": 0.00010545703584087918,
      "loss": 0.2874,
      "step": 2541
    },
    {
      "epoch": 2.4137194398291006,
      "grad_norm": 0.08107148110866547,
      "learning_rate": 0.0001053974544084838,
      "loss": 0.2864,
      "step": 2542
    },
    {
      "epoch": 2.4146688820318065,
      "grad_norm": 0.047696180641651154,
      "learning_rate": 0.00010533787105436026,
      "loss": 0.2657,
      "step": 2543
    },
    {
      "epoch": 2.4156183242345124,
      "grad_norm": 0.04514947161078453,
      "learning_rate": 0.00010527828579972272,
      "loss": 0.2515,
      "step": 2544
    },
    {
      "epoch": 2.4165677664372183,
      "grad_norm": 0.04135221242904663,
      "learning_rate": 0.0001052186986657862,
      "loss": 0.2609,
      "step": 2545
    },
    {
      "epoch": 2.417517208639924,
      "grad_norm": 0.0875149667263031,
      "learning_rate": 0.00010515910967376627,
      "loss": 0.2666,
      "step": 2546
    },
    {
      "epoch": 2.41846665084263,
      "grad_norm": 0.038753919303417206,
      "learning_rate": 0.00010509951884487926,
      "loss": 0.2605,
      "step": 2547
    },
    {
      "epoch": 2.419416093045336,
      "grad_norm": 0.059343352913856506,
      "learning_rate": 0.00010503992620034202,
      "loss": 0.2795,
      "step": 2548
    },
    {
      "epoch": 2.4203655352480418,
      "grad_norm": 0.057376034557819366,
      "learning_rate": 0.00010498033176137212,
      "loss": 0.2805,
      "step": 2549
    },
    {
      "epoch": 2.4213149774507476,
      "grad_norm": 0.04280192777514458,
      "learning_rate": 0.00010492073554918782,
      "loss": 0.2592,
      "step": 2550
    },
    {
      "epoch": 2.4222644196534535,
      "grad_norm": 0.0482187457382679,
      "learning_rate": 0.00010486113758500795,
      "loss": 0.261,
      "step": 2551
    },
    {
      "epoch": 2.4232138618561594,
      "grad_norm": 0.0874866172671318,
      "learning_rate": 0.00010480153789005193,
      "loss": 0.2593,
      "step": 2552
    },
    {
      "epoch": 2.4241633040588653,
      "grad_norm": 0.04283559322357178,
      "learning_rate": 0.00010474193648553989,
      "loss": 0.2628,
      "step": 2553
    },
    {
      "epoch": 2.425112746261571,
      "grad_norm": 0.046166982501745224,
      "learning_rate": 0.0001046823333926925,
      "loss": 0.264,
      "step": 2554
    },
    {
      "epoch": 2.426062188464277,
      "grad_norm": 0.09907100349664688,
      "learning_rate": 0.00010462272863273104,
      "loss": 0.2804,
      "step": 2555
    },
    {
      "epoch": 2.4270116306669833,
      "grad_norm": 0.04987293481826782,
      "learning_rate": 0.0001045631222268774,
      "loss": 0.2623,
      "step": 2556
    },
    {
      "epoch": 2.427961072869689,
      "grad_norm": 0.05097109079360962,
      "learning_rate": 0.00010450351419635407,
      "loss": 0.2684,
      "step": 2557
    },
    {
      "epoch": 2.428910515072395,
      "grad_norm": 0.04925335571169853,
      "learning_rate": 0.00010444390456238404,
      "loss": 0.2693,
      "step": 2558
    },
    {
      "epoch": 2.429859957275101,
      "grad_norm": 0.041472528129816055,
      "learning_rate": 0.00010438429334619102,
      "loss": 0.2552,
      "step": 2559
    },
    {
      "epoch": 2.430809399477807,
      "grad_norm": 0.04419026896357536,
      "learning_rate": 0.00010432468056899909,
      "loss": 0.2625,
      "step": 2560
    },
    {
      "epoch": 2.4317588416805127,
      "grad_norm": 0.039240069687366486,
      "learning_rate": 0.00010426506625203307,
      "loss": 0.2633,
      "step": 2561
    },
    {
      "epoch": 2.4327082838832186,
      "grad_norm": 0.040763020515441895,
      "learning_rate": 0.00010420545041651822,
      "loss": 0.2507,
      "step": 2562
    },
    {
      "epoch": 2.4336577260859245,
      "grad_norm": 0.051095739006996155,
      "learning_rate": 0.00010414583308368033,
      "loss": 0.2643,
      "step": 2563
    },
    {
      "epoch": 2.4346071682886303,
      "grad_norm": 0.05506645515561104,
      "learning_rate": 0.00010408621427474581,
      "loss": 0.2605,
      "step": 2564
    },
    {
      "epoch": 2.435556610491336,
      "grad_norm": 0.03904623165726662,
      "learning_rate": 0.00010402659401094152,
      "loss": 0.2638,
      "step": 2565
    },
    {
      "epoch": 2.436506052694042,
      "grad_norm": 0.06974940747022629,
      "learning_rate": 0.00010396697231349488,
      "loss": 0.261,
      "step": 2566
    },
    {
      "epoch": 2.4374554948967484,
      "grad_norm": 0.048252761363983154,
      "learning_rate": 0.00010390734920363379,
      "loss": 0.2738,
      "step": 2567
    },
    {
      "epoch": 2.4384049370994543,
      "grad_norm": 0.04159051179885864,
      "learning_rate": 0.00010384772470258663,
      "loss": 0.2663,
      "step": 2568
    },
    {
      "epoch": 2.43935437930216,
      "grad_norm": 0.051444459706544876,
      "learning_rate": 0.0001037880988315824,
      "loss": 0.2678,
      "step": 2569
    },
    {
      "epoch": 2.440303821504866,
      "grad_norm": 0.0407598651945591,
      "learning_rate": 0.00010372847161185046,
      "loss": 0.2562,
      "step": 2570
    },
    {
      "epoch": 2.441253263707572,
      "grad_norm": 0.055472977459430695,
      "learning_rate": 0.00010366884306462068,
      "loss": 0.2881,
      "step": 2571
    },
    {
      "epoch": 2.4422027059102778,
      "grad_norm": 0.04310688376426697,
      "learning_rate": 0.00010360921321112337,
      "loss": 0.2578,
      "step": 2572
    },
    {
      "epoch": 2.4431521481129836,
      "grad_norm": 0.04362298175692558,
      "learning_rate": 0.00010354958207258945,
      "loss": 0.268,
      "step": 2573
    },
    {
      "epoch": 2.4441015903156895,
      "grad_norm": 0.04311308637261391,
      "learning_rate": 0.00010348994967025012,
      "loss": 0.272,
      "step": 2574
    },
    {
      "epoch": 2.4450510325183954,
      "grad_norm": 0.06475937366485596,
      "learning_rate": 0.00010343031602533713,
      "loss": 0.2882,
      "step": 2575
    },
    {
      "epoch": 2.4460004747211013,
      "grad_norm": 0.04065021127462387,
      "learning_rate": 0.0001033706811590826,
      "loss": 0.2578,
      "step": 2576
    },
    {
      "epoch": 2.446949916923807,
      "grad_norm": 0.044748466461896896,
      "learning_rate": 0.00010331104509271918,
      "loss": 0.2667,
      "step": 2577
    },
    {
      "epoch": 2.447899359126513,
      "grad_norm": 0.04572344943881035,
      "learning_rate": 0.00010325140784747993,
      "loss": 0.2669,
      "step": 2578
    },
    {
      "epoch": 2.448848801329219,
      "grad_norm": 0.05856434255838394,
      "learning_rate": 0.00010319176944459826,
      "loss": 0.2708,
      "step": 2579
    },
    {
      "epoch": 2.4497982435319248,
      "grad_norm": 0.03956478834152222,
      "learning_rate": 0.00010313212990530803,
      "loss": 0.2524,
      "step": 2580
    },
    {
      "epoch": 2.450747685734631,
      "grad_norm": 0.04141313582658768,
      "learning_rate": 0.00010307248925084352,
      "loss": 0.2576,
      "step": 2581
    },
    {
      "epoch": 2.451697127937337,
      "grad_norm": 0.04271996021270752,
      "learning_rate": 0.00010301284750243936,
      "loss": 0.2643,
      "step": 2582
    },
    {
      "epoch": 2.452646570140043,
      "grad_norm": 0.06122612580657005,
      "learning_rate": 0.00010295320468133066,
      "loss": 0.2799,
      "step": 2583
    },
    {
      "epoch": 2.4535960123427487,
      "grad_norm": 0.037602152675390244,
      "learning_rate": 0.00010289356080875277,
      "loss": 0.2491,
      "step": 2584
    },
    {
      "epoch": 2.4545454545454546,
      "grad_norm": 0.04972713813185692,
      "learning_rate": 0.00010283391590594161,
      "loss": 0.2869,
      "step": 2585
    },
    {
      "epoch": 2.4554948967481605,
      "grad_norm": 0.045384474098682404,
      "learning_rate": 0.00010277426999413327,
      "loss": 0.2762,
      "step": 2586
    },
    {
      "epoch": 2.4564443389508663,
      "grad_norm": 0.04337489232420921,
      "learning_rate": 0.0001027146230945643,
      "loss": 0.255,
      "step": 2587
    },
    {
      "epoch": 2.457393781153572,
      "grad_norm": 0.09511115401983261,
      "learning_rate": 0.00010265497522847162,
      "loss": 0.2713,
      "step": 2588
    },
    {
      "epoch": 2.458343223356278,
      "grad_norm": 0.08762829005718231,
      "learning_rate": 0.00010259532641709247,
      "loss": 0.2665,
      "step": 2589
    },
    {
      "epoch": 2.459292665558984,
      "grad_norm": 0.040508076548576355,
      "learning_rate": 0.00010253567668166435,
      "loss": 0.2485,
      "step": 2590
    },
    {
      "epoch": 2.46024210776169,
      "grad_norm": 0.04852940887212753,
      "learning_rate": 0.00010247602604342519,
      "loss": 0.2613,
      "step": 2591
    },
    {
      "epoch": 2.461191549964396,
      "grad_norm": 0.05233342573046684,
      "learning_rate": 0.00010241637452361323,
      "loss": 0.2781,
      "step": 2592
    },
    {
      "epoch": 2.462140992167102,
      "grad_norm": 0.08143714815378189,
      "learning_rate": 0.000102356722143467,
      "loss": 0.2548,
      "step": 2593
    },
    {
      "epoch": 2.463090434369808,
      "grad_norm": 0.05217353627085686,
      "learning_rate": 0.00010229706892422531,
      "loss": 0.2708,
      "step": 2594
    },
    {
      "epoch": 2.4640398765725138,
      "grad_norm": 0.09524723887443542,
      "learning_rate": 0.00010223741488712733,
      "loss": 0.2688,
      "step": 2595
    },
    {
      "epoch": 2.4649893187752197,
      "grad_norm": 0.05073004588484764,
      "learning_rate": 0.00010217776005341241,
      "loss": 0.2732,
      "step": 2596
    },
    {
      "epoch": 2.4659387609779255,
      "grad_norm": 0.04612806811928749,
      "learning_rate": 0.0001021181044443204,
      "loss": 0.2479,
      "step": 2597
    },
    {
      "epoch": 2.4668882031806314,
      "grad_norm": 0.045733220875263214,
      "learning_rate": 0.00010205844808109117,
      "loss": 0.2621,
      "step": 2598
    },
    {
      "epoch": 2.4678376453833373,
      "grad_norm": 0.044287122786045074,
      "learning_rate": 0.00010199879098496504,
      "loss": 0.2652,
      "step": 2599
    },
    {
      "epoch": 2.468787087586043,
      "grad_norm": 0.06199018657207489,
      "learning_rate": 0.00010193913317718244,
      "loss": 0.278,
      "step": 2600
    },
    {
      "epoch": 2.469736529788749,
      "grad_norm": 0.041201118379831314,
      "learning_rate": 0.00010187947467898425,
      "loss": 0.2632,
      "step": 2601
    },
    {
      "epoch": 2.470685971991455,
      "grad_norm": 0.04487983509898186,
      "learning_rate": 0.00010181981551161144,
      "loss": 0.27,
      "step": 2602
    },
    {
      "epoch": 2.4716354141941608,
      "grad_norm": 0.04142885282635689,
      "learning_rate": 0.00010176015569630526,
      "loss": 0.2537,
      "step": 2603
    },
    {
      "epoch": 2.4725848563968666,
      "grad_norm": 0.04327712580561638,
      "learning_rate": 0.0001017004952543072,
      "loss": 0.2615,
      "step": 2604
    },
    {
      "epoch": 2.4735342985995725,
      "grad_norm": 0.043669018894433975,
      "learning_rate": 0.00010164083420685897,
      "loss": 0.2617,
      "step": 2605
    },
    {
      "epoch": 2.474483740802279,
      "grad_norm": 0.05305905640125275,
      "learning_rate": 0.0001015811725752025,
      "loss": 0.2806,
      "step": 2606
    },
    {
      "epoch": 2.4754331830049847,
      "grad_norm": 0.042252250015735626,
      "learning_rate": 0.00010152151038057993,
      "loss": 0.2672,
      "step": 2607
    },
    {
      "epoch": 2.4763826252076906,
      "grad_norm": 0.043947260826826096,
      "learning_rate": 0.00010146184764423357,
      "loss": 0.2652,
      "step": 2608
    },
    {
      "epoch": 2.4773320674103965,
      "grad_norm": 0.04211874678730965,
      "learning_rate": 0.00010140218438740591,
      "loss": 0.2544,
      "step": 2609
    },
    {
      "epoch": 2.4782815096131023,
      "grad_norm": 0.04319967329502106,
      "learning_rate": 0.00010134252063133975,
      "loss": 0.2689,
      "step": 2610
    },
    {
      "epoch": 2.479230951815808,
      "grad_norm": 0.041697051376104355,
      "learning_rate": 0.00010128285639727792,
      "loss": 0.2566,
      "step": 2611
    },
    {
      "epoch": 2.480180394018514,
      "grad_norm": 0.04475269839167595,
      "learning_rate": 0.0001012231917064635,
      "loss": 0.2641,
      "step": 2612
    },
    {
      "epoch": 2.48112983622122,
      "grad_norm": 0.03968726098537445,
      "learning_rate": 0.00010116352658013973,
      "loss": 0.2552,
      "step": 2613
    },
    {
      "epoch": 2.482079278423926,
      "grad_norm": 0.0413176491856575,
      "learning_rate": 0.00010110386103954992,
      "loss": 0.2516,
      "step": 2614
    },
    {
      "epoch": 2.4830287206266317,
      "grad_norm": 0.04182640090584755,
      "learning_rate": 0.00010104419510593764,
      "loss": 0.2602,
      "step": 2615
    },
    {
      "epoch": 2.4839781628293376,
      "grad_norm": 0.04140539839863777,
      "learning_rate": 0.00010098452880054656,
      "loss": 0.2595,
      "step": 2616
    },
    {
      "epoch": 2.484927605032044,
      "grad_norm": 0.042879413813352585,
      "learning_rate": 0.00010092486214462045,
      "loss": 0.2722,
      "step": 2617
    },
    {
      "epoch": 2.48587704723475,
      "grad_norm": 0.04176124185323715,
      "learning_rate": 0.00010086519515940326,
      "loss": 0.255,
      "step": 2618
    },
    {
      "epoch": 2.4868264894374557,
      "grad_norm": 0.04227910190820694,
      "learning_rate": 0.00010080552786613899,
      "loss": 0.2731,
      "step": 2619
    },
    {
      "epoch": 2.4877759316401615,
      "grad_norm": 0.050102487206459045,
      "learning_rate": 0.00010074586028607184,
      "loss": 0.2863,
      "step": 2620
    },
    {
      "epoch": 2.4887253738428674,
      "grad_norm": 0.04757722094655037,
      "learning_rate": 0.00010068619244044604,
      "loss": 0.2633,
      "step": 2621
    },
    {
      "epoch": 2.4896748160455733,
      "grad_norm": 0.0399179607629776,
      "learning_rate": 0.00010062652435050592,
      "loss": 0.2608,
      "step": 2622
    },
    {
      "epoch": 2.490624258248279,
      "grad_norm": 0.041110094636678696,
      "learning_rate": 0.00010056685603749589,
      "loss": 0.2614,
      "step": 2623
    },
    {
      "epoch": 2.491573700450985,
      "grad_norm": 0.04052494838833809,
      "learning_rate": 0.00010050718752266053,
      "loss": 0.2494,
      "step": 2624
    },
    {
      "epoch": 2.492523142653691,
      "grad_norm": 0.04536557197570801,
      "learning_rate": 0.00010044751882724435,
      "loss": 0.2645,
      "step": 2625
    },
    {
      "epoch": 2.493472584856397,
      "grad_norm": 0.04890064895153046,
      "learning_rate": 0.00010038784997249205,
      "loss": 0.2648,
      "step": 2626
    },
    {
      "epoch": 2.4944220270591027,
      "grad_norm": 0.04206657037138939,
      "learning_rate": 0.00010032818097964829,
      "loss": 0.2629,
      "step": 2627
    },
    {
      "epoch": 2.4953714692618085,
      "grad_norm": 0.049437765032052994,
      "learning_rate": 0.00010026851186995785,
      "loss": 0.2711,
      "step": 2628
    },
    {
      "epoch": 2.4963209114645144,
      "grad_norm": 0.04828893393278122,
      "learning_rate": 0.00010020884266466554,
      "loss": 0.2704,
      "step": 2629
    },
    {
      "epoch": 2.4972703536672203,
      "grad_norm": 0.041123807430267334,
      "learning_rate": 0.00010014917338501618,
      "loss": 0.2713,
      "step": 2630
    },
    {
      "epoch": 2.4982197958699266,
      "grad_norm": 0.04797301068902016,
      "learning_rate": 0.00010008950405225462,
      "loss": 0.2658,
      "step": 2631
    },
    {
      "epoch": 2.4991692380726325,
      "grad_norm": 0.04327033832669258,
      "learning_rate": 0.0001000298346876257,
      "loss": 0.2627,
      "step": 2632
    },
    {
      "epoch": 2.5001186802753383,
      "grad_norm": 0.04332072660326958,
      "learning_rate": 9.997016531237432e-05,
      "loss": 0.2722,
      "step": 2633
    },
    {
      "epoch": 2.501068122478044,
      "grad_norm": 0.047892216593027115,
      "learning_rate": 9.991049594774543e-05,
      "loss": 0.264,
      "step": 2634
    },
    {
      "epoch": 2.50201756468075,
      "grad_norm": 0.04232776165008545,
      "learning_rate": 9.985082661498383e-05,
      "loss": 0.2598,
      "step": 2635
    },
    {
      "epoch": 2.502967006883456,
      "grad_norm": 0.0745973065495491,
      "learning_rate": 9.979115733533449e-05,
      "loss": 0.2631,
      "step": 2636
    },
    {
      "epoch": 2.503916449086162,
      "grad_norm": 0.04066299647092819,
      "learning_rate": 9.973148813004216e-05,
      "loss": 0.2542,
      "step": 2637
    },
    {
      "epoch": 2.5048658912888677,
      "grad_norm": 0.10445679724216461,
      "learning_rate": 9.96718190203517e-05,
      "loss": 0.2801,
      "step": 2638
    },
    {
      "epoch": 2.5058153334915736,
      "grad_norm": 0.04454142972826958,
      "learning_rate": 9.961215002750799e-05,
      "loss": 0.2698,
      "step": 2639
    },
    {
      "epoch": 2.5067647756942795,
      "grad_norm": 0.03955570235848427,
      "learning_rate": 9.955248117275566e-05,
      "loss": 0.2588,
      "step": 2640
    },
    {
      "epoch": 2.507714217896986,
      "grad_norm": 0.042795125395059586,
      "learning_rate": 9.949281247733952e-05,
      "loss": 0.2619,
      "step": 2641
    },
    {
      "epoch": 2.5086636600996917,
      "grad_norm": 0.045542147010564804,
      "learning_rate": 9.943314396250413e-05,
      "loss": 0.2599,
      "step": 2642
    },
    {
      "epoch": 2.5096131023023975,
      "grad_norm": 0.04280371963977814,
      "learning_rate": 9.937347564949413e-05,
      "loss": 0.2557,
      "step": 2643
    },
    {
      "epoch": 2.5105625445051034,
      "grad_norm": 0.04497581720352173,
      "learning_rate": 9.931380755955398e-05,
      "loss": 0.2683,
      "step": 2644
    },
    {
      "epoch": 2.5115119867078093,
      "grad_norm": 0.05084951967000961,
      "learning_rate": 9.925413971392815e-05,
      "loss": 0.2593,
      "step": 2645
    },
    {
      "epoch": 2.512461428910515,
      "grad_norm": 0.04743589088320732,
      "learning_rate": 9.919447213386103e-05,
      "loss": 0.2686,
      "step": 2646
    },
    {
      "epoch": 2.513410871113221,
      "grad_norm": 0.051552269607782364,
      "learning_rate": 9.913480484059676e-05,
      "loss": 0.2576,
      "step": 2647
    },
    {
      "epoch": 2.514360313315927,
      "grad_norm": 0.05527034029364586,
      "learning_rate": 9.907513785537957e-05,
      "loss": 0.2859,
      "step": 2648
    },
    {
      "epoch": 2.515309755518633,
      "grad_norm": 0.05772867426276207,
      "learning_rate": 9.901547119945345e-05,
      "loss": 0.2871,
      "step": 2649
    },
    {
      "epoch": 2.5162591977213387,
      "grad_norm": 0.07210524380207062,
      "learning_rate": 9.89558048940624e-05,
      "loss": 0.2582,
      "step": 2650
    },
    {
      "epoch": 2.5172086399240445,
      "grad_norm": 0.053755562752485275,
      "learning_rate": 9.889613896045012e-05,
      "loss": 0.2677,
      "step": 2651
    },
    {
      "epoch": 2.5181580821267504,
      "grad_norm": 0.06051605939865112,
      "learning_rate": 9.883647341986032e-05,
      "loss": 0.2792,
      "step": 2652
    },
    {
      "epoch": 2.5191075243294563,
      "grad_norm": 0.04284793883562088,
      "learning_rate": 9.87768082935365e-05,
      "loss": 0.2507,
      "step": 2653
    },
    {
      "epoch": 2.520056966532162,
      "grad_norm": 0.04125799611210823,
      "learning_rate": 9.871714360272208e-05,
      "loss": 0.258,
      "step": 2654
    },
    {
      "epoch": 2.521006408734868,
      "grad_norm": 0.04322294518351555,
      "learning_rate": 9.865747936866027e-05,
      "loss": 0.2678,
      "step": 2655
    },
    {
      "epoch": 2.521955850937574,
      "grad_norm": 0.03939296305179596,
      "learning_rate": 9.85978156125941e-05,
      "loss": 0.254,
      "step": 2656
    },
    {
      "epoch": 2.5229052931402802,
      "grad_norm": 0.0440739206969738,
      "learning_rate": 9.853815235576648e-05,
      "loss": 0.2542,
      "step": 2657
    },
    {
      "epoch": 2.523854735342986,
      "grad_norm": 0.04515873268246651,
      "learning_rate": 9.847848961942008e-05,
      "loss": 0.2556,
      "step": 2658
    },
    {
      "epoch": 2.524804177545692,
      "grad_norm": 0.044637441635131836,
      "learning_rate": 9.841882742479753e-05,
      "loss": 0.2648,
      "step": 2659
    },
    {
      "epoch": 2.525753619748398,
      "grad_norm": 0.04760422930121422,
      "learning_rate": 9.835916579314105e-05,
      "loss": 0.2581,
      "step": 2660
    },
    {
      "epoch": 2.5267030619511037,
      "grad_norm": 0.09039561450481415,
      "learning_rate": 9.829950474569281e-05,
      "loss": 0.2744,
      "step": 2661
    },
    {
      "epoch": 2.5276525041538096,
      "grad_norm": 0.04204951971769333,
      "learning_rate": 9.823984430369477e-05,
      "loss": 0.2474,
      "step": 2662
    },
    {
      "epoch": 2.5286019463565155,
      "grad_norm": 0.04530685022473335,
      "learning_rate": 9.818018448838855e-05,
      "loss": 0.2687,
      "step": 2663
    },
    {
      "epoch": 2.5295513885592213,
      "grad_norm": 0.05104728043079376,
      "learning_rate": 9.812052532101578e-05,
      "loss": 0.2701,
      "step": 2664
    },
    {
      "epoch": 2.5305008307619272,
      "grad_norm": 0.03922882676124573,
      "learning_rate": 9.806086682281758e-05,
      "loss": 0.2582,
      "step": 2665
    },
    {
      "epoch": 2.5314502729646335,
      "grad_norm": 0.043734561651945114,
      "learning_rate": 9.800120901503503e-05,
      "loss": 0.2689,
      "step": 2666
    },
    {
      "epoch": 2.5323997151673394,
      "grad_norm": 0.043197888880968094,
      "learning_rate": 9.794155191890885e-05,
      "loss": 0.2589,
      "step": 2667
    },
    {
      "epoch": 2.5333491573700453,
      "grad_norm": 0.041970640420913696,
      "learning_rate": 9.788189555567966e-05,
      "loss": 0.2527,
      "step": 2668
    },
    {
      "epoch": 2.534298599572751,
      "grad_norm": 0.04040301963686943,
      "learning_rate": 9.78222399465876e-05,
      "loss": 0.2578,
      "step": 2669
    },
    {
      "epoch": 2.535248041775457,
      "grad_norm": 0.041597891598939896,
      "learning_rate": 9.776258511287271e-05,
      "loss": 0.2593,
      "step": 2670
    },
    {
      "epoch": 2.536197483978163,
      "grad_norm": 0.05185459926724434,
      "learning_rate": 9.770293107577471e-05,
      "loss": 0.2746,
      "step": 2671
    },
    {
      "epoch": 2.537146926180869,
      "grad_norm": 0.03791874647140503,
      "learning_rate": 9.764327785653302e-05,
      "loss": 0.257,
      "step": 2672
    },
    {
      "epoch": 2.5380963683835747,
      "grad_norm": 0.04940661042928696,
      "learning_rate": 9.75836254763868e-05,
      "loss": 0.2589,
      "step": 2673
    },
    {
      "epoch": 2.5390458105862805,
      "grad_norm": 0.040791600942611694,
      "learning_rate": 9.752397395657482e-05,
      "loss": 0.2616,
      "step": 2674
    },
    {
      "epoch": 2.5399952527889864,
      "grad_norm": 0.0469420962035656,
      "learning_rate": 9.746432331833569e-05,
      "loss": 0.2655,
      "step": 2675
    },
    {
      "epoch": 2.5409446949916923,
      "grad_norm": 0.046912360936403275,
      "learning_rate": 9.740467358290755e-05,
      "loss": 0.2618,
      "step": 2676
    },
    {
      "epoch": 2.541894137194398,
      "grad_norm": 0.043544989079236984,
      "learning_rate": 9.734502477152841e-05,
      "loss": 0.2634,
      "step": 2677
    },
    {
      "epoch": 2.542843579397104,
      "grad_norm": 0.050445009022951126,
      "learning_rate": 9.728537690543572e-05,
      "loss": 0.2671,
      "step": 2678
    },
    {
      "epoch": 2.54379302159981,
      "grad_norm": 0.07378991693258286,
      "learning_rate": 9.722573000586676e-05,
      "loss": 0.2881,
      "step": 2679
    },
    {
      "epoch": 2.544742463802516,
      "grad_norm": 0.03695583716034889,
      "learning_rate": 9.716608409405842e-05,
      "loss": 0.2499,
      "step": 2680
    },
    {
      "epoch": 2.5456919060052217,
      "grad_norm": 0.05563429370522499,
      "learning_rate": 9.710643919124723e-05,
      "loss": 0.2725,
      "step": 2681
    },
    {
      "epoch": 2.546641348207928,
      "grad_norm": 0.04267679899930954,
      "learning_rate": 9.704679531866941e-05,
      "loss": 0.2511,
      "step": 2682
    },
    {
      "epoch": 2.547590790410634,
      "grad_norm": 0.04398363456130028,
      "learning_rate": 9.698715249756067e-05,
      "loss": 0.2644,
      "step": 2683
    },
    {
      "epoch": 2.5485402326133397,
      "grad_norm": 0.04224398359656334,
      "learning_rate": 9.692751074915653e-05,
      "loss": 0.2566,
      "step": 2684
    },
    {
      "epoch": 2.5494896748160456,
      "grad_norm": 0.039920974522829056,
      "learning_rate": 9.6867870094692e-05,
      "loss": 0.2487,
      "step": 2685
    },
    {
      "epoch": 2.5504391170187515,
      "grad_norm": 0.05325039476156235,
      "learning_rate": 9.680823055540174e-05,
      "loss": 0.2793,
      "step": 2686
    },
    {
      "epoch": 2.5513885592214574,
      "grad_norm": 0.05420837551355362,
      "learning_rate": 9.67485921525201e-05,
      "loss": 0.2778,
      "step": 2687
    },
    {
      "epoch": 2.5523380014241632,
      "grad_norm": 0.043418120592832565,
      "learning_rate": 9.668895490728082e-05,
      "loss": 0.27,
      "step": 2688
    },
    {
      "epoch": 2.553287443626869,
      "grad_norm": 0.04475219547748566,
      "learning_rate": 9.662931884091741e-05,
      "loss": 0.2699,
      "step": 2689
    },
    {
      "epoch": 2.554236885829575,
      "grad_norm": 0.047918371856212616,
      "learning_rate": 9.656968397466291e-05,
      "loss": 0.2694,
      "step": 2690
    },
    {
      "epoch": 2.5551863280322813,
      "grad_norm": 0.0412350669503212,
      "learning_rate": 9.651005032974994e-05,
      "loss": 0.2625,
      "step": 2691
    },
    {
      "epoch": 2.556135770234987,
      "grad_norm": 0.0425032414495945,
      "learning_rate": 9.645041792741057e-05,
      "loss": 0.2581,
      "step": 2692
    },
    {
      "epoch": 2.557085212437693,
      "grad_norm": 0.0597444623708725,
      "learning_rate": 9.639078678887665e-05,
      "loss": 0.2771,
      "step": 2693
    },
    {
      "epoch": 2.558034654640399,
      "grad_norm": 0.05487224459648132,
      "learning_rate": 9.633115693537935e-05,
      "loss": 0.2642,
      "step": 2694
    },
    {
      "epoch": 2.558984096843105,
      "grad_norm": 0.04886789247393608,
      "learning_rate": 9.627152838814953e-05,
      "loss": 0.2695,
      "step": 2695
    },
    {
      "epoch": 2.5599335390458107,
      "grad_norm": 0.042438726872205734,
      "learning_rate": 9.62119011684176e-05,
      "loss": 0.2689,
      "step": 2696
    },
    {
      "epoch": 2.5608829812485165,
      "grad_norm": 0.04784049838781357,
      "learning_rate": 9.615227529741335e-05,
      "loss": 0.2725,
      "step": 2697
    },
    {
      "epoch": 2.5618324234512224,
      "grad_norm": 0.08410549908876419,
      "learning_rate": 9.609265079636623e-05,
      "loss": 0.2688,
      "step": 2698
    },
    {
      "epoch": 2.5627818656539283,
      "grad_norm": 0.03873563930392265,
      "learning_rate": 9.603302768650513e-05,
      "loss": 0.2621,
      "step": 2699
    },
    {
      "epoch": 2.563731307856634,
      "grad_norm": 0.12595133483409882,
      "learning_rate": 9.597340598905852e-05,
      "loss": 0.2718,
      "step": 2700
    },
    {
      "epoch": 2.56468075005934,
      "grad_norm": 0.041138943284749985,
      "learning_rate": 9.591378572525422e-05,
      "loss": 0.2611,
      "step": 2701
    },
    {
      "epoch": 2.565630192262046,
      "grad_norm": 0.05437808111310005,
      "learning_rate": 9.585416691631968e-05,
      "loss": 0.2836,
      "step": 2702
    },
    {
      "epoch": 2.566579634464752,
      "grad_norm": 0.04374735429883003,
      "learning_rate": 9.57945495834818e-05,
      "loss": 0.2672,
      "step": 2703
    },
    {
      "epoch": 2.5675290766674577,
      "grad_norm": 0.04583863914012909,
      "learning_rate": 9.573493374796693e-05,
      "loss": 0.2499,
      "step": 2704
    },
    {
      "epoch": 2.5684785188701635,
      "grad_norm": 0.0510207898914814,
      "learning_rate": 9.567531943100093e-05,
      "loss": 0.2767,
      "step": 2705
    },
    {
      "epoch": 2.5694279610728694,
      "grad_norm": 0.0616254098713398,
      "learning_rate": 9.561570665380901e-05,
      "loss": 0.2633,
      "step": 2706
    },
    {
      "epoch": 2.5703774032755757,
      "grad_norm": 0.044144704937934875,
      "learning_rate": 9.555609543761597e-05,
      "loss": 0.2559,
      "step": 2707
    },
    {
      "epoch": 2.5713268454782816,
      "grad_norm": 0.051671102643013,
      "learning_rate": 9.549648580364595e-05,
      "loss": 0.2632,
      "step": 2708
    },
    {
      "epoch": 2.5722762876809875,
      "grad_norm": 0.05204401910305023,
      "learning_rate": 9.543687777312263e-05,
      "loss": 0.2675,
      "step": 2709
    },
    {
      "epoch": 2.5732257298836934,
      "grad_norm": 0.07156354933977127,
      "learning_rate": 9.537727136726898e-05,
      "loss": 0.2718,
      "step": 2710
    },
    {
      "epoch": 2.5741751720863992,
      "grad_norm": 0.05089721456170082,
      "learning_rate": 9.531766660730752e-05,
      "loss": 0.2683,
      "step": 2711
    },
    {
      "epoch": 2.575124614289105,
      "grad_norm": 0.04808243736624718,
      "learning_rate": 9.525806351446013e-05,
      "loss": 0.2496,
      "step": 2712
    },
    {
      "epoch": 2.576074056491811,
      "grad_norm": 0.06348177045583725,
      "learning_rate": 9.519846210994806e-05,
      "loss": 0.2876,
      "step": 2713
    },
    {
      "epoch": 2.577023498694517,
      "grad_norm": 0.06046038866043091,
      "learning_rate": 9.513886241499209e-05,
      "loss": 0.2763,
      "step": 2714
    },
    {
      "epoch": 2.5779729408972227,
      "grad_norm": 0.04020876809954643,
      "learning_rate": 9.507926445081219e-05,
      "loss": 0.2655,
      "step": 2715
    },
    {
      "epoch": 2.578922383099929,
      "grad_norm": 0.05419212952256203,
      "learning_rate": 9.50196682386279e-05,
      "loss": 0.2615,
      "step": 2716
    },
    {
      "epoch": 2.579871825302635,
      "grad_norm": 0.056060582399368286,
      "learning_rate": 9.496007379965801e-05,
      "loss": 0.2801,
      "step": 2717
    },
    {
      "epoch": 2.580821267505341,
      "grad_norm": 0.05008630454540253,
      "learning_rate": 9.490048115512074e-05,
      "loss": 0.2834,
      "step": 2718
    },
    {
      "epoch": 2.5817707097080467,
      "grad_norm": 0.040566056966781616,
      "learning_rate": 9.484089032623374e-05,
      "loss": 0.2591,
      "step": 2719
    },
    {
      "epoch": 2.5827201519107525,
      "grad_norm": 0.04528141766786575,
      "learning_rate": 9.47813013342138e-05,
      "loss": 0.2619,
      "step": 2720
    },
    {
      "epoch": 2.5836695941134584,
      "grad_norm": 0.041971296072006226,
      "learning_rate": 9.47217142002773e-05,
      "loss": 0.2645,
      "step": 2721
    },
    {
      "epoch": 2.5846190363161643,
      "grad_norm": 0.047894254326820374,
      "learning_rate": 9.466212894563977e-05,
      "loss": 0.2742,
      "step": 2722
    },
    {
      "epoch": 2.58556847851887,
      "grad_norm": 0.0397222638130188,
      "learning_rate": 9.460254559151622e-05,
      "loss": 0.2553,
      "step": 2723
    },
    {
      "epoch": 2.586517920721576,
      "grad_norm": 0.04544184356927872,
      "learning_rate": 9.454296415912085e-05,
      "loss": 0.2614,
      "step": 2724
    },
    {
      "epoch": 2.587467362924282,
      "grad_norm": 0.04659304767847061,
      "learning_rate": 9.448338466966726e-05,
      "loss": 0.2764,
      "step": 2725
    },
    {
      "epoch": 2.588416805126988,
      "grad_norm": 0.03721113130450249,
      "learning_rate": 9.442380714436834e-05,
      "loss": 0.2489,
      "step": 2726
    },
    {
      "epoch": 2.5893662473296937,
      "grad_norm": 0.08494356274604797,
      "learning_rate": 9.436423160443625e-05,
      "loss": 0.2695,
      "step": 2727
    },
    {
      "epoch": 2.5903156895323995,
      "grad_norm": 0.04514550790190697,
      "learning_rate": 9.430465807108255e-05,
      "loss": 0.271,
      "step": 2728
    },
    {
      "epoch": 2.5912651317351054,
      "grad_norm": 0.044937096536159515,
      "learning_rate": 9.42450865655179e-05,
      "loss": 0.265,
      "step": 2729
    },
    {
      "epoch": 2.5922145739378113,
      "grad_norm": 0.04702681675553322,
      "learning_rate": 9.418551710895243e-05,
      "loss": 0.2615,
      "step": 2730
    },
    {
      "epoch": 2.593164016140517,
      "grad_norm": 0.0370311513543129,
      "learning_rate": 9.412594972259539e-05,
      "loss": 0.2461,
      "step": 2731
    },
    {
      "epoch": 2.5941134583432235,
      "grad_norm": 0.03818555921316147,
      "learning_rate": 9.406638442765542e-05,
      "loss": 0.2596,
      "step": 2732
    },
    {
      "epoch": 2.5950629005459294,
      "grad_norm": 0.04003351554274559,
      "learning_rate": 9.400682124534027e-05,
      "loss": 0.2651,
      "step": 2733
    },
    {
      "epoch": 2.5960123427486352,
      "grad_norm": 0.05568593740463257,
      "learning_rate": 9.394726019685706e-05,
      "loss": 0.2783,
      "step": 2734
    },
    {
      "epoch": 2.596961784951341,
      "grad_norm": 0.1012004017829895,
      "learning_rate": 9.388770130341217e-05,
      "loss": 0.3044,
      "step": 2735
    },
    {
      "epoch": 2.597911227154047,
      "grad_norm": 0.03857073932886124,
      "learning_rate": 9.382814458621106e-05,
      "loss": 0.2567,
      "step": 2736
    },
    {
      "epoch": 2.598860669356753,
      "grad_norm": 0.041371818631887436,
      "learning_rate": 9.376859006645859e-05,
      "loss": 0.2647,
      "step": 2737
    },
    {
      "epoch": 2.5998101115594587,
      "grad_norm": 0.043267469853162766,
      "learning_rate": 9.370903776535865e-05,
      "loss": 0.2643,
      "step": 2738
    },
    {
      "epoch": 2.6007595537621646,
      "grad_norm": 0.04230300337076187,
      "learning_rate": 9.364948770411456e-05,
      "loss": 0.2575,
      "step": 2739
    },
    {
      "epoch": 2.6017089959648705,
      "grad_norm": 0.06302177906036377,
      "learning_rate": 9.358993990392864e-05,
      "loss": 0.2686,
      "step": 2740
    },
    {
      "epoch": 2.602658438167577,
      "grad_norm": 0.04174618795514107,
      "learning_rate": 9.353039438600257e-05,
      "loss": 0.2623,
      "step": 2741
    },
    {
      "epoch": 2.6036078803702827,
      "grad_norm": 0.07566291093826294,
      "learning_rate": 9.347085117153707e-05,
      "loss": 0.2529,
      "step": 2742
    },
    {
      "epoch": 2.6045573225729886,
      "grad_norm": 0.036149147897958755,
      "learning_rate": 9.341131028173214e-05,
      "loss": 0.2588,
      "step": 2743
    },
    {
      "epoch": 2.6055067647756944,
      "grad_norm": 0.03942830488085747,
      "learning_rate": 9.335177173778695e-05,
      "loss": 0.2507,
      "step": 2744
    },
    {
      "epoch": 2.6064562069784003,
      "grad_norm": 0.04223598912358284,
      "learning_rate": 9.329223556089975e-05,
      "loss": 0.2633,
      "step": 2745
    },
    {
      "epoch": 2.607405649181106,
      "grad_norm": 0.04097060486674309,
      "learning_rate": 9.32327017722681e-05,
      "loss": 0.2551,
      "step": 2746
    },
    {
      "epoch": 2.608355091383812,
      "grad_norm": 0.045439936220645905,
      "learning_rate": 9.317317039308848e-05,
      "loss": 0.266,
      "step": 2747
    },
    {
      "epoch": 2.609304533586518,
      "grad_norm": 0.047136351466178894,
      "learning_rate": 9.311364144455679e-05,
      "loss": 0.2593,
      "step": 2748
    },
    {
      "epoch": 2.610253975789224,
      "grad_norm": 0.04558572918176651,
      "learning_rate": 9.305411494786779e-05,
      "loss": 0.2715,
      "step": 2749
    },
    {
      "epoch": 2.6112034179919297,
      "grad_norm": 0.06827898323535919,
      "learning_rate": 9.299459092421558e-05,
      "loss": 0.2836,
      "step": 2750
    },
    {
      "epoch": 2.6121528601946356,
      "grad_norm": 0.0461437962949276,
      "learning_rate": 9.293506939479325e-05,
      "loss": 0.2716,
      "step": 2751
    },
    {
      "epoch": 2.6131023023973414,
      "grad_norm": 0.06343681365251541,
      "learning_rate": 9.287555038079309e-05,
      "loss": 0.2752,
      "step": 2752
    },
    {
      "epoch": 2.6140517446000473,
      "grad_norm": 0.038738228380680084,
      "learning_rate": 9.281603390340648e-05,
      "loss": 0.2575,
      "step": 2753
    },
    {
      "epoch": 2.615001186802753,
      "grad_norm": 0.04323815181851387,
      "learning_rate": 9.275651998382377e-05,
      "loss": 0.2642,
      "step": 2754
    },
    {
      "epoch": 2.615950629005459,
      "grad_norm": 0.04197486490011215,
      "learning_rate": 9.26970086432346e-05,
      "loss": 0.261,
      "step": 2755
    },
    {
      "epoch": 2.6169000712081654,
      "grad_norm": 0.04667133465409279,
      "learning_rate": 9.263749990282754e-05,
      "loss": 0.2699,
      "step": 2756
    },
    {
      "epoch": 2.6178495134108712,
      "grad_norm": 0.06833603978157043,
      "learning_rate": 9.257799378379032e-05,
      "loss": 0.2754,
      "step": 2757
    },
    {
      "epoch": 2.618798955613577,
      "grad_norm": 0.04214934632182121,
      "learning_rate": 9.251849030730964e-05,
      "loss": 0.2579,
      "step": 2758
    },
    {
      "epoch": 2.619748397816283,
      "grad_norm": 0.1015283465385437,
      "learning_rate": 9.245898949457139e-05,
      "loss": 0.2642,
      "step": 2759
    },
    {
      "epoch": 2.620697840018989,
      "grad_norm": 0.08113419264554977,
      "learning_rate": 9.239949136676041e-05,
      "loss": 0.2616,
      "step": 2760
    },
    {
      "epoch": 2.6216472822216947,
      "grad_norm": 0.04192844033241272,
      "learning_rate": 9.233999594506063e-05,
      "loss": 0.2628,
      "step": 2761
    },
    {
      "epoch": 2.6225967244244006,
      "grad_norm": 0.045566219836473465,
      "learning_rate": 9.228050325065503e-05,
      "loss": 0.2687,
      "step": 2762
    },
    {
      "epoch": 2.6235461666271065,
      "grad_norm": 0.08133133500814438,
      "learning_rate": 9.222101330472552e-05,
      "loss": 0.2685,
      "step": 2763
    },
    {
      "epoch": 2.6244956088298124,
      "grad_norm": 0.043140675872564316,
      "learning_rate": 9.216152612845318e-05,
      "loss": 0.2577,
      "step": 2764
    },
    {
      "epoch": 2.6254450510325182,
      "grad_norm": 0.047479353845119476,
      "learning_rate": 9.210204174301796e-05,
      "loss": 0.2658,
      "step": 2765
    },
    {
      "epoch": 2.6263944932352246,
      "grad_norm": 0.041498858481645584,
      "learning_rate": 9.204256016959898e-05,
      "loss": 0.2435,
      "step": 2766
    },
    {
      "epoch": 2.6273439354379304,
      "grad_norm": 0.06182354688644409,
      "learning_rate": 9.198308142937415e-05,
      "loss": 0.2795,
      "step": 2767
    },
    {
      "epoch": 2.6282933776406363,
      "grad_norm": 0.13252820074558258,
      "learning_rate": 9.192360554352055e-05,
      "loss": 0.2846,
      "step": 2768
    },
    {
      "epoch": 2.629242819843342,
      "grad_norm": 0.04402116686105728,
      "learning_rate": 9.186413253321418e-05,
      "loss": 0.2558,
      "step": 2769
    },
    {
      "epoch": 2.630192262046048,
      "grad_norm": 0.05568404123187065,
      "learning_rate": 9.180466241962999e-05,
      "loss": 0.2732,
      "step": 2770
    },
    {
      "epoch": 2.631141704248754,
      "grad_norm": 0.04584593325853348,
      "learning_rate": 9.174519522394198e-05,
      "loss": 0.2588,
      "step": 2771
    },
    {
      "epoch": 2.63209114645146,
      "grad_norm": 0.06789962947368622,
      "learning_rate": 9.168573096732297e-05,
      "loss": 0.2826,
      "step": 2772
    },
    {
      "epoch": 2.6330405886541657,
      "grad_norm": 0.05142885819077492,
      "learning_rate": 9.162626967094487e-05,
      "loss": 0.2692,
      "step": 2773
    },
    {
      "epoch": 2.6339900308568716,
      "grad_norm": 0.04692273959517479,
      "learning_rate": 9.156681135597847e-05,
      "loss": 0.2583,
      "step": 2774
    },
    {
      "epoch": 2.6349394730595774,
      "grad_norm": 0.05026277154684067,
      "learning_rate": 9.150735604359351e-05,
      "loss": 0.2655,
      "step": 2775
    },
    {
      "epoch": 2.6358889152622833,
      "grad_norm": 0.062294721603393555,
      "learning_rate": 9.144790375495871e-05,
      "loss": 0.2696,
      "step": 2776
    },
    {
      "epoch": 2.636838357464989,
      "grad_norm": 0.038956400007009506,
      "learning_rate": 9.138845451124158e-05,
      "loss": 0.2477,
      "step": 2777
    },
    {
      "epoch": 2.637787799667695,
      "grad_norm": 0.1065063551068306,
      "learning_rate": 9.132900833360871e-05,
      "loss": 0.251,
      "step": 2778
    },
    {
      "epoch": 2.638737241870401,
      "grad_norm": 0.05274252966046333,
      "learning_rate": 9.126956524322547e-05,
      "loss": 0.2849,
      "step": 2779
    },
    {
      "epoch": 2.639686684073107,
      "grad_norm": 0.04132537916302681,
      "learning_rate": 9.121012526125626e-05,
      "loss": 0.2652,
      "step": 2780
    },
    {
      "epoch": 2.640636126275813,
      "grad_norm": 0.06332124769687653,
      "learning_rate": 9.115068840886417e-05,
      "loss": 0.2764,
      "step": 2781
    },
    {
      "epoch": 2.641585568478519,
      "grad_norm": 0.04629790782928467,
      "learning_rate": 9.10912547072114e-05,
      "loss": 0.2632,
      "step": 2782
    },
    {
      "epoch": 2.642535010681225,
      "grad_norm": 0.05298149958252907,
      "learning_rate": 9.103182417745888e-05,
      "loss": 0.2641,
      "step": 2783
    },
    {
      "epoch": 2.6434844528839307,
      "grad_norm": 0.039357319474220276,
      "learning_rate": 9.097239684076649e-05,
      "loss": 0.2529,
      "step": 2784
    },
    {
      "epoch": 2.6444338950866366,
      "grad_norm": 0.05280270054936409,
      "learning_rate": 9.091297271829296e-05,
      "loss": 0.2716,
      "step": 2785
    },
    {
      "epoch": 2.6453833372893425,
      "grad_norm": 0.04004097357392311,
      "learning_rate": 9.085355183119579e-05,
      "loss": 0.2507,
      "step": 2786
    },
    {
      "epoch": 2.6463327794920484,
      "grad_norm": 0.039364296942949295,
      "learning_rate": 9.079413420063147e-05,
      "loss": 0.2649,
      "step": 2787
    },
    {
      "epoch": 2.6472822216947542,
      "grad_norm": 0.040332429111003876,
      "learning_rate": 9.073471984775519e-05,
      "loss": 0.266,
      "step": 2788
    },
    {
      "epoch": 2.64823166389746,
      "grad_norm": 0.0906134694814682,
      "learning_rate": 9.067530879372111e-05,
      "loss": 0.2763,
      "step": 2789
    },
    {
      "epoch": 2.649181106100166,
      "grad_norm": 0.03673803433775902,
      "learning_rate": 9.061590105968208e-05,
      "loss": 0.2585,
      "step": 2790
    },
    {
      "epoch": 2.6501305483028723,
      "grad_norm": 0.03839144855737686,
      "learning_rate": 9.055649666678987e-05,
      "loss": 0.2607,
      "step": 2791
    },
    {
      "epoch": 2.651079990505578,
      "grad_norm": 0.04190480336546898,
      "learning_rate": 9.049709563619503e-05,
      "loss": 0.2703,
      "step": 2792
    },
    {
      "epoch": 2.652029432708284,
      "grad_norm": 0.041215989738702774,
      "learning_rate": 9.043769798904689e-05,
      "loss": 0.2639,
      "step": 2793
    },
    {
      "epoch": 2.65297887491099,
      "grad_norm": 0.04381866008043289,
      "learning_rate": 9.037830374649364e-05,
      "loss": 0.2626,
      "step": 2794
    },
    {
      "epoch": 2.653928317113696,
      "grad_norm": 0.03813505917787552,
      "learning_rate": 9.03189129296821e-05,
      "loss": 0.2556,
      "step": 2795
    },
    {
      "epoch": 2.6548777593164017,
      "grad_norm": 0.03909599408507347,
      "learning_rate": 9.025952555975808e-05,
      "loss": 0.2465,
      "step": 2796
    },
    {
      "epoch": 2.6558272015191076,
      "grad_norm": 0.04648647457361221,
      "learning_rate": 9.020014165786602e-05,
      "loss": 0.2682,
      "step": 2797
    },
    {
      "epoch": 2.6567766437218134,
      "grad_norm": 0.041546739637851715,
      "learning_rate": 9.014076124514922e-05,
      "loss": 0.2696,
      "step": 2798
    },
    {
      "epoch": 2.6577260859245193,
      "grad_norm": 0.03989914432168007,
      "learning_rate": 9.00813843427496e-05,
      "loss": 0.2628,
      "step": 2799
    },
    {
      "epoch": 2.658675528127225,
      "grad_norm": 0.04056751728057861,
      "learning_rate": 9.002201097180796e-05,
      "loss": 0.2635,
      "step": 2800
    },
    {
      "epoch": 2.659624970329931,
      "grad_norm": 0.058337803930044174,
      "learning_rate": 8.996264115346382e-05,
      "loss": 0.2656,
      "step": 2801
    },
    {
      "epoch": 2.660574412532637,
      "grad_norm": 0.038185685873031616,
      "learning_rate": 8.990327490885537e-05,
      "loss": 0.2566,
      "step": 2802
    },
    {
      "epoch": 2.661523854735343,
      "grad_norm": 0.045001666992902756,
      "learning_rate": 8.984391225911966e-05,
      "loss": 0.2669,
      "step": 2803
    },
    {
      "epoch": 2.6624732969380487,
      "grad_norm": 0.04217304661870003,
      "learning_rate": 8.978455322539225e-05,
      "loss": 0.2605,
      "step": 2804
    },
    {
      "epoch": 2.6634227391407546,
      "grad_norm": 0.03670491650700569,
      "learning_rate": 8.97251978288076e-05,
      "loss": 0.2588,
      "step": 2805
    },
    {
      "epoch": 2.664372181343461,
      "grad_norm": 0.03951079025864601,
      "learning_rate": 8.96658460904988e-05,
      "loss": 0.2562,
      "step": 2806
    },
    {
      "epoch": 2.6653216235461668,
      "grad_norm": 0.062239449471235275,
      "learning_rate": 8.960649803159765e-05,
      "loss": 0.2726,
      "step": 2807
    },
    {
      "epoch": 2.6662710657488726,
      "grad_norm": 0.04627174511551857,
      "learning_rate": 8.954715367323468e-05,
      "loss": 0.267,
      "step": 2808
    },
    {
      "epoch": 2.6672205079515785,
      "grad_norm": 0.038264404982328415,
      "learning_rate": 8.948781303653896e-05,
      "loss": 0.2558,
      "step": 2809
    },
    {
      "epoch": 2.6681699501542844,
      "grad_norm": 0.05796501412987709,
      "learning_rate": 8.94284761426384e-05,
      "loss": 0.2663,
      "step": 2810
    },
    {
      "epoch": 2.6691193923569903,
      "grad_norm": 0.07112397253513336,
      "learning_rate": 8.93691430126595e-05,
      "loss": 0.2607,
      "step": 2811
    },
    {
      "epoch": 2.670068834559696,
      "grad_norm": 0.042541395872831345,
      "learning_rate": 8.930981366772746e-05,
      "loss": 0.251,
      "step": 2812
    },
    {
      "epoch": 2.671018276762402,
      "grad_norm": 0.04384302347898483,
      "learning_rate": 8.925048812896605e-05,
      "loss": 0.2618,
      "step": 2813
    },
    {
      "epoch": 2.671967718965108,
      "grad_norm": 0.04728015884757042,
      "learning_rate": 8.919116641749776e-05,
      "loss": 0.2611,
      "step": 2814
    },
    {
      "epoch": 2.6729171611678137,
      "grad_norm": 0.04660949110984802,
      "learning_rate": 8.91318485544437e-05,
      "loss": 0.2575,
      "step": 2815
    },
    {
      "epoch": 2.67386660337052,
      "grad_norm": 0.04812900722026825,
      "learning_rate": 8.907253456092359e-05,
      "loss": 0.2563,
      "step": 2816
    },
    {
      "epoch": 2.674816045573226,
      "grad_norm": 0.05315341800451279,
      "learning_rate": 8.901322445805586e-05,
      "loss": 0.2801,
      "step": 2817
    },
    {
      "epoch": 2.675765487775932,
      "grad_norm": 0.07543495297431946,
      "learning_rate": 8.895391826695737e-05,
      "loss": 0.2603,
      "step": 2818
    },
    {
      "epoch": 2.6767149299786377,
      "grad_norm": 0.047977060079574585,
      "learning_rate": 8.889461600874378e-05,
      "loss": 0.2663,
      "step": 2819
    },
    {
      "epoch": 2.6776643721813436,
      "grad_norm": 0.04339151456952095,
      "learning_rate": 8.883531770452923e-05,
      "loss": 0.258,
      "step": 2820
    },
    {
      "epoch": 2.6786138143840494,
      "grad_norm": 0.04932010546326637,
      "learning_rate": 8.877602337542655e-05,
      "loss": 0.2543,
      "step": 2821
    },
    {
      "epoch": 2.6795632565867553,
      "grad_norm": 0.04265378788113594,
      "learning_rate": 8.8716733042547e-05,
      "loss": 0.2642,
      "step": 2822
    },
    {
      "epoch": 2.680512698789461,
      "grad_norm": 0.06549558788537979,
      "learning_rate": 8.86574467270006e-05,
      "loss": 0.275,
      "step": 2823
    },
    {
      "epoch": 2.681462140992167,
      "grad_norm": 0.042048145085573196,
      "learning_rate": 8.85981644498958e-05,
      "loss": 0.264,
      "step": 2824
    },
    {
      "epoch": 2.682411583194873,
      "grad_norm": 0.03816340118646622,
      "learning_rate": 8.853888623233967e-05,
      "loss": 0.2575,
      "step": 2825
    },
    {
      "epoch": 2.683361025397579,
      "grad_norm": 0.04002130404114723,
      "learning_rate": 8.84796120954379e-05,
      "loss": 0.2635,
      "step": 2826
    },
    {
      "epoch": 2.6843104676002847,
      "grad_norm": 0.04486146941781044,
      "learning_rate": 8.842034206029456e-05,
      "loss": 0.256,
      "step": 2827
    },
    {
      "epoch": 2.6852599098029906,
      "grad_norm": 0.037432510405778885,
      "learning_rate": 8.836107614801243e-05,
      "loss": 0.2508,
      "step": 2828
    },
    {
      "epoch": 2.6862093520056964,
      "grad_norm": 0.06960198283195496,
      "learning_rate": 8.830181437969269e-05,
      "loss": 0.2759,
      "step": 2829
    },
    {
      "epoch": 2.6871587942084023,
      "grad_norm": 0.04306569695472717,
      "learning_rate": 8.824255677643518e-05,
      "loss": 0.2545,
      "step": 2830
    },
    {
      "epoch": 2.6881082364111086,
      "grad_norm": 0.0954747125506401,
      "learning_rate": 8.818330335933809e-05,
      "loss": 0.2554,
      "step": 2831
    },
    {
      "epoch": 2.6890576786138145,
      "grad_norm": 0.047692351043224335,
      "learning_rate": 8.812405414949825e-05,
      "loss": 0.2569,
      "step": 2832
    },
    {
      "epoch": 2.6900071208165204,
      "grad_norm": 0.04142098128795624,
      "learning_rate": 8.806480916801099e-05,
      "loss": 0.2584,
      "step": 2833
    },
    {
      "epoch": 2.6909565630192263,
      "grad_norm": 0.039575859904289246,
      "learning_rate": 8.800556843597002e-05,
      "loss": 0.2518,
      "step": 2834
    },
    {
      "epoch": 2.691906005221932,
      "grad_norm": 0.07023467123508453,
      "learning_rate": 8.79463319744677e-05,
      "loss": 0.263,
      "step": 2835
    },
    {
      "epoch": 2.692855447424638,
      "grad_norm": 0.07248935103416443,
      "learning_rate": 8.788709980459472e-05,
      "loss": 0.2771,
      "step": 2836
    },
    {
      "epoch": 2.693804889627344,
      "grad_norm": 0.07990462332963943,
      "learning_rate": 8.782787194744033e-05,
      "loss": 0.2648,
      "step": 2837
    },
    {
      "epoch": 2.6947543318300498,
      "grad_norm": 0.05330291762948036,
      "learning_rate": 8.77686484240922e-05,
      "loss": 0.2738,
      "step": 2838
    },
    {
      "epoch": 2.6957037740327556,
      "grad_norm": 0.07456424087285995,
      "learning_rate": 8.770942925563654e-05,
      "loss": 0.2845,
      "step": 2839
    },
    {
      "epoch": 2.6966532162354615,
      "grad_norm": 0.04465353116393089,
      "learning_rate": 8.765021446315785e-05,
      "loss": 0.2571,
      "step": 2840
    },
    {
      "epoch": 2.697602658438168,
      "grad_norm": 0.03848935291171074,
      "learning_rate": 8.75910040677392e-05,
      "loss": 0.2521,
      "step": 2841
    },
    {
      "epoch": 2.6985521006408737,
      "grad_norm": 0.05211983248591423,
      "learning_rate": 8.753179809046211e-05,
      "loss": 0.2553,
      "step": 2842
    },
    {
      "epoch": 2.6995015428435796,
      "grad_norm": 0.04083755984902382,
      "learning_rate": 8.747259655240642e-05,
      "loss": 0.26,
      "step": 2843
    },
    {
      "epoch": 2.7004509850462854,
      "grad_norm": 0.04455624893307686,
      "learning_rate": 8.741339947465054e-05,
      "loss": 0.2516,
      "step": 2844
    },
    {
      "epoch": 2.7014004272489913,
      "grad_norm": 0.04386875405907631,
      "learning_rate": 8.735420687827107e-05,
      "loss": 0.2632,
      "step": 2845
    },
    {
      "epoch": 2.702349869451697,
      "grad_norm": 0.044795092195272446,
      "learning_rate": 8.729501878434325e-05,
      "loss": 0.2694,
      "step": 2846
    },
    {
      "epoch": 2.703299311654403,
      "grad_norm": 0.045176975429058075,
      "learning_rate": 8.723583521394054e-05,
      "loss": 0.2647,
      "step": 2847
    },
    {
      "epoch": 2.704248753857109,
      "grad_norm": 0.044440098106861115,
      "learning_rate": 8.717665618813491e-05,
      "loss": 0.257,
      "step": 2848
    },
    {
      "epoch": 2.705198196059815,
      "grad_norm": 0.04165451228618622,
      "learning_rate": 8.711748172799667e-05,
      "loss": 0.252,
      "step": 2849
    },
    {
      "epoch": 2.7061476382625207,
      "grad_norm": 0.04421938210725784,
      "learning_rate": 8.705831185459445e-05,
      "loss": 0.2784,
      "step": 2850
    },
    {
      "epoch": 2.7070970804652266,
      "grad_norm": 0.05041582137346268,
      "learning_rate": 8.699914658899535e-05,
      "loss": 0.267,
      "step": 2851
    },
    {
      "epoch": 2.7080465226679324,
      "grad_norm": 0.061946917325258255,
      "learning_rate": 8.693998595226473e-05,
      "loss": 0.2769,
      "step": 2852
    },
    {
      "epoch": 2.7089959648706383,
      "grad_norm": 0.06854099780321121,
      "learning_rate": 8.68808299654664e-05,
      "loss": 0.2858,
      "step": 2853
    },
    {
      "epoch": 2.709945407073344,
      "grad_norm": 0.04000728577375412,
      "learning_rate": 8.682167864966236e-05,
      "loss": 0.2626,
      "step": 2854
    },
    {
      "epoch": 2.71089484927605,
      "grad_norm": 0.047487881034612656,
      "learning_rate": 8.676253202591317e-05,
      "loss": 0.2628,
      "step": 2855
    },
    {
      "epoch": 2.7118442914787564,
      "grad_norm": 0.038573890924453735,
      "learning_rate": 8.670339011527748e-05,
      "loss": 0.2601,
      "step": 2856
    },
    {
      "epoch": 2.7127937336814623,
      "grad_norm": 0.04861799627542496,
      "learning_rate": 8.664425293881247e-05,
      "loss": 0.2686,
      "step": 2857
    },
    {
      "epoch": 2.713743175884168,
      "grad_norm": 0.042451322078704834,
      "learning_rate": 8.658512051757353e-05,
      "loss": 0.2652,
      "step": 2858
    },
    {
      "epoch": 2.714692618086874,
      "grad_norm": 0.044802404940128326,
      "learning_rate": 8.652599287261431e-05,
      "loss": 0.2687,
      "step": 2859
    },
    {
      "epoch": 2.71564206028958,
      "grad_norm": 0.0430615097284317,
      "learning_rate": 8.646687002498692e-05,
      "loss": 0.2524,
      "step": 2860
    },
    {
      "epoch": 2.7165915024922858,
      "grad_norm": 0.04607314616441727,
      "learning_rate": 8.640775199574154e-05,
      "loss": 0.2753,
      "step": 2861
    },
    {
      "epoch": 2.7175409446949916,
      "grad_norm": 0.04442737251520157,
      "learning_rate": 8.634863880592686e-05,
      "loss": 0.2571,
      "step": 2862
    },
    {
      "epoch": 2.7184903868976975,
      "grad_norm": 0.0405968502163887,
      "learning_rate": 8.628953047658967e-05,
      "loss": 0.2634,
      "step": 2863
    },
    {
      "epoch": 2.7194398291004034,
      "grad_norm": 0.04561749845743179,
      "learning_rate": 8.623042702877515e-05,
      "loss": 0.2418,
      "step": 2864
    },
    {
      "epoch": 2.7203892713031093,
      "grad_norm": 0.039593134075403214,
      "learning_rate": 8.617132848352671e-05,
      "loss": 0.2687,
      "step": 2865
    },
    {
      "epoch": 2.7213387135058156,
      "grad_norm": 0.04114865884184837,
      "learning_rate": 8.611223486188591e-05,
      "loss": 0.2582,
      "step": 2866
    },
    {
      "epoch": 2.7222881557085215,
      "grad_norm": 0.073084756731987,
      "learning_rate": 8.605314618489275e-05,
      "loss": 0.2665,
      "step": 2867
    },
    {
      "epoch": 2.7232375979112273,
      "grad_norm": 0.03542012348771095,
      "learning_rate": 8.59940624735853e-05,
      "loss": 0.2596,
      "step": 2868
    },
    {
      "epoch": 2.724187040113933,
      "grad_norm": 0.07219967246055603,
      "learning_rate": 8.593498374899998e-05,
      "loss": 0.2651,
      "step": 2869
    },
    {
      "epoch": 2.725136482316639,
      "grad_norm": 0.05063614621758461,
      "learning_rate": 8.58759100321713e-05,
      "loss": 0.284,
      "step": 2870
    },
    {
      "epoch": 2.726085924519345,
      "grad_norm": 0.03707686811685562,
      "learning_rate": 8.581684134413216e-05,
      "loss": 0.2555,
      "step": 2871
    },
    {
      "epoch": 2.727035366722051,
      "grad_norm": 0.0445532388985157,
      "learning_rate": 8.57577777059135e-05,
      "loss": 0.2623,
      "step": 2872
    },
    {
      "epoch": 2.7279848089247567,
      "grad_norm": 0.040541306138038635,
      "learning_rate": 8.569871913854458e-05,
      "loss": 0.2613,
      "step": 2873
    },
    {
      "epoch": 2.7289342511274626,
      "grad_norm": 0.03940880671143532,
      "learning_rate": 8.563966566305286e-05,
      "loss": 0.2498,
      "step": 2874
    },
    {
      "epoch": 2.7298836933301684,
      "grad_norm": 0.04151361435651779,
      "learning_rate": 8.558061730046384e-05,
      "loss": 0.2578,
      "step": 2875
    },
    {
      "epoch": 2.7308331355328743,
      "grad_norm": 0.04065687954425812,
      "learning_rate": 8.552157407180139e-05,
      "loss": 0.2538,
      "step": 2876
    },
    {
      "epoch": 2.73178257773558,
      "grad_norm": 0.04001820832490921,
      "learning_rate": 8.54625359980874e-05,
      "loss": 0.2525,
      "step": 2877
    },
    {
      "epoch": 2.732732019938286,
      "grad_norm": 0.05147264897823334,
      "learning_rate": 8.540350310034206e-05,
      "loss": 0.2731,
      "step": 2878
    },
    {
      "epoch": 2.733681462140992,
      "grad_norm": 0.04745444655418396,
      "learning_rate": 8.534447539958358e-05,
      "loss": 0.2603,
      "step": 2879
    },
    {
      "epoch": 2.734630904343698,
      "grad_norm": 0.07255495339632034,
      "learning_rate": 8.528545291682838e-05,
      "loss": 0.2694,
      "step": 2880
    },
    {
      "epoch": 2.735580346546404,
      "grad_norm": 0.04382932558655739,
      "learning_rate": 8.522643567309112e-05,
      "loss": 0.2683,
      "step": 2881
    },
    {
      "epoch": 2.73652978874911,
      "grad_norm": 0.051482584327459335,
      "learning_rate": 8.516742368938439e-05,
      "loss": 0.251,
      "step": 2882
    },
    {
      "epoch": 2.737479230951816,
      "grad_norm": 0.05244822800159454,
      "learning_rate": 8.510841698671912e-05,
      "loss": 0.2603,
      "step": 2883
    },
    {
      "epoch": 2.7384286731545218,
      "grad_norm": 0.035695601254701614,
      "learning_rate": 8.504941558610416e-05,
      "loss": 0.2546,
      "step": 2884
    },
    {
      "epoch": 2.7393781153572276,
      "grad_norm": 0.04054943472146988,
      "learning_rate": 8.499041950854665e-05,
      "loss": 0.2577,
      "step": 2885
    },
    {
      "epoch": 2.7403275575599335,
      "grad_norm": 0.04638770595192909,
      "learning_rate": 8.49314287750517e-05,
      "loss": 0.2614,
      "step": 2886
    },
    {
      "epoch": 2.7412769997626394,
      "grad_norm": 0.0429297499358654,
      "learning_rate": 8.487244340662264e-05,
      "loss": 0.2651,
      "step": 2887
    },
    {
      "epoch": 2.7422264419653453,
      "grad_norm": 0.03873150050640106,
      "learning_rate": 8.481346342426073e-05,
      "loss": 0.2543,
      "step": 2888
    },
    {
      "epoch": 2.743175884168051,
      "grad_norm": 0.0463930107653141,
      "learning_rate": 8.475448884896547e-05,
      "loss": 0.2638,
      "step": 2889
    },
    {
      "epoch": 2.7441253263707575,
      "grad_norm": 0.04211205989122391,
      "learning_rate": 8.469551970173437e-05,
      "loss": 0.2513,
      "step": 2890
    },
    {
      "epoch": 2.7450747685734633,
      "grad_norm": 0.05451524630188942,
      "learning_rate": 8.463655600356297e-05,
      "loss": 0.2805,
      "step": 2891
    },
    {
      "epoch": 2.746024210776169,
      "grad_norm": 0.0439947172999382,
      "learning_rate": 8.457759777544499e-05,
      "loss": 0.2668,
      "step": 2892
    },
    {
      "epoch": 2.746973652978875,
      "grad_norm": 0.039369143545627594,
      "learning_rate": 8.451864503837202e-05,
      "loss": 0.2489,
      "step": 2893
    },
    {
      "epoch": 2.747923095181581,
      "grad_norm": 0.03729270026087761,
      "learning_rate": 8.445969781333385e-05,
      "loss": 0.2619,
      "step": 2894
    },
    {
      "epoch": 2.748872537384287,
      "grad_norm": 0.049005176872015,
      "learning_rate": 8.440075612131823e-05,
      "loss": 0.2662,
      "step": 2895
    },
    {
      "epoch": 2.7498219795869927,
      "grad_norm": 0.03956499695777893,
      "learning_rate": 8.434181998331101e-05,
      "loss": 0.2656,
      "step": 2896
    },
    {
      "epoch": 2.7507714217896986,
      "grad_norm": 0.039099693298339844,
      "learning_rate": 8.428288942029593e-05,
      "loss": 0.2699,
      "step": 2897
    },
    {
      "epoch": 2.7517208639924045,
      "grad_norm": 0.04464574530720711,
      "learning_rate": 8.422396445325487e-05,
      "loss": 0.2733,
      "step": 2898
    },
    {
      "epoch": 2.7526703061951103,
      "grad_norm": 0.06105086952447891,
      "learning_rate": 8.416504510316773e-05,
      "loss": 0.2753,
      "step": 2899
    },
    {
      "epoch": 2.753619748397816,
      "grad_norm": 0.06804367899894714,
      "learning_rate": 8.410613139101227e-05,
      "loss": 0.2527,
      "step": 2900
    },
    {
      "epoch": 2.754569190600522,
      "grad_norm": 0.04179168865084648,
      "learning_rate": 8.404722333776444e-05,
      "loss": 0.2512,
      "step": 2901
    },
    {
      "epoch": 2.755518632803228,
      "grad_norm": 0.056286826729774475,
      "learning_rate": 8.398832096439795e-05,
      "loss": 0.2412,
      "step": 2902
    },
    {
      "epoch": 2.756468075005934,
      "grad_norm": 0.04326535761356354,
      "learning_rate": 8.392942429188466e-05,
      "loss": 0.2639,
      "step": 2903
    },
    {
      "epoch": 2.7574175172086397,
      "grad_norm": 0.04712899401783943,
      "learning_rate": 8.387053334119432e-05,
      "loss": 0.2626,
      "step": 2904
    },
    {
      "epoch": 2.7583669594113456,
      "grad_norm": 0.04596768319606781,
      "learning_rate": 8.381164813329469e-05,
      "loss": 0.254,
      "step": 2905
    },
    {
      "epoch": 2.759316401614052,
      "grad_norm": 0.04214185103774071,
      "learning_rate": 8.375276868915148e-05,
      "loss": 0.2611,
      "step": 2906
    },
    {
      "epoch": 2.7602658438167578,
      "grad_norm": 0.05700606480240822,
      "learning_rate": 8.369389502972828e-05,
      "loss": 0.2549,
      "step": 2907
    },
    {
      "epoch": 2.7612152860194636,
      "grad_norm": 0.04782046377658844,
      "learning_rate": 8.36350271759867e-05,
      "loss": 0.2588,
      "step": 2908
    },
    {
      "epoch": 2.7621647282221695,
      "grad_norm": 0.05099222809076309,
      "learning_rate": 8.357616514888624e-05,
      "loss": 0.2648,
      "step": 2909
    },
    {
      "epoch": 2.7631141704248754,
      "grad_norm": 0.06003917381167412,
      "learning_rate": 8.351730896938437e-05,
      "loss": 0.2615,
      "step": 2910
    },
    {
      "epoch": 2.7640636126275813,
      "grad_norm": 0.039956480264663696,
      "learning_rate": 8.34584586584364e-05,
      "loss": 0.2519,
      "step": 2911
    },
    {
      "epoch": 2.765013054830287,
      "grad_norm": 0.040129296481609344,
      "learning_rate": 8.339961423699562e-05,
      "loss": 0.2535,
      "step": 2912
    },
    {
      "epoch": 2.765962497032993,
      "grad_norm": 0.05875218287110329,
      "learning_rate": 8.334077572601318e-05,
      "loss": 0.2833,
      "step": 2913
    },
    {
      "epoch": 2.766911939235699,
      "grad_norm": 0.05164382606744766,
      "learning_rate": 8.328194314643816e-05,
      "loss": 0.2645,
      "step": 2914
    },
    {
      "epoch": 2.767861381438405,
      "grad_norm": 0.05058974772691727,
      "learning_rate": 8.322311651921759e-05,
      "loss": 0.2667,
      "step": 2915
    },
    {
      "epoch": 2.768810823641111,
      "grad_norm": 0.03610742464661598,
      "learning_rate": 8.316429586529615e-05,
      "loss": 0.2556,
      "step": 2916
    },
    {
      "epoch": 2.769760265843817,
      "grad_norm": 0.04182818531990051,
      "learning_rate": 8.310548120561667e-05,
      "loss": 0.2643,
      "step": 2917
    },
    {
      "epoch": 2.770709708046523,
      "grad_norm": 0.041662104427814484,
      "learning_rate": 8.304667256111965e-05,
      "loss": 0.2564,
      "step": 2918
    },
    {
      "epoch": 2.7716591502492287,
      "grad_norm": 0.04125606641173363,
      "learning_rate": 8.29878699527436e-05,
      "loss": 0.2596,
      "step": 2919
    },
    {
      "epoch": 2.7726085924519346,
      "grad_norm": 0.036673858761787415,
      "learning_rate": 8.292907340142471e-05,
      "loss": 0.2568,
      "step": 2920
    },
    {
      "epoch": 2.7735580346546405,
      "grad_norm": 0.04233416169881821,
      "learning_rate": 8.287028292809717e-05,
      "loss": 0.2677,
      "step": 2921
    },
    {
      "epoch": 2.7745074768573463,
      "grad_norm": 0.042057108134031296,
      "learning_rate": 8.281149855369293e-05,
      "loss": 0.2509,
      "step": 2922
    },
    {
      "epoch": 2.775456919060052,
      "grad_norm": 0.05955182760953903,
      "learning_rate": 8.275272029914177e-05,
      "loss": 0.2785,
      "step": 2923
    },
    {
      "epoch": 2.776406361262758,
      "grad_norm": 0.036538127809762955,
      "learning_rate": 8.269394818537133e-05,
      "loss": 0.2476,
      "step": 2924
    },
    {
      "epoch": 2.777355803465464,
      "grad_norm": 0.052744511514902115,
      "learning_rate": 8.263518223330697e-05,
      "loss": 0.2859,
      "step": 2925
    },
    {
      "epoch": 2.77830524566817,
      "grad_norm": 0.06520146876573563,
      "learning_rate": 8.2576422463872e-05,
      "loss": 0.2907,
      "step": 2926
    },
    {
      "epoch": 2.7792546878708757,
      "grad_norm": 0.04221632331609726,
      "learning_rate": 8.251766889798738e-05,
      "loss": 0.2617,
      "step": 2927
    },
    {
      "epoch": 2.7802041300735816,
      "grad_norm": 0.042480263859033585,
      "learning_rate": 8.245892155657201e-05,
      "loss": 0.2684,
      "step": 2928
    },
    {
      "epoch": 2.7811535722762875,
      "grad_norm": 0.04169554263353348,
      "learning_rate": 8.240018046054241e-05,
      "loss": 0.2633,
      "step": 2929
    },
    {
      "epoch": 2.7821030144789933,
      "grad_norm": 0.04474500194191933,
      "learning_rate": 8.2341445630813e-05,
      "loss": 0.2685,
      "step": 2930
    },
    {
      "epoch": 2.7830524566816996,
      "grad_norm": 0.043009109795093536,
      "learning_rate": 8.228271708829595e-05,
      "loss": 0.2658,
      "step": 2931
    },
    {
      "epoch": 2.7840018988844055,
      "grad_norm": 0.04732086881995201,
      "learning_rate": 8.222399485390114e-05,
      "loss": 0.2713,
      "step": 2932
    },
    {
      "epoch": 2.7849513410871114,
      "grad_norm": 0.051858678460121155,
      "learning_rate": 8.216527894853629e-05,
      "loss": 0.2784,
      "step": 2933
    },
    {
      "epoch": 2.7859007832898173,
      "grad_norm": 0.08715417236089706,
      "learning_rate": 8.210656939310672e-05,
      "loss": 0.2532,
      "step": 2934
    },
    {
      "epoch": 2.786850225492523,
      "grad_norm": 0.04026304930448532,
      "learning_rate": 8.204786620851568e-05,
      "loss": 0.2589,
      "step": 2935
    },
    {
      "epoch": 2.787799667695229,
      "grad_norm": 0.0430745892226696,
      "learning_rate": 8.198916941566397e-05,
      "loss": 0.2661,
      "step": 2936
    },
    {
      "epoch": 2.788749109897935,
      "grad_norm": 0.04209771007299423,
      "learning_rate": 8.193047903545023e-05,
      "loss": 0.2562,
      "step": 2937
    },
    {
      "epoch": 2.7896985521006408,
      "grad_norm": 0.03844306617975235,
      "learning_rate": 8.187179508877085e-05,
      "loss": 0.2528,
      "step": 2938
    },
    {
      "epoch": 2.7906479943033466,
      "grad_norm": 0.045125510543584824,
      "learning_rate": 8.181311759651975e-05,
      "loss": 0.2618,
      "step": 2939
    },
    {
      "epoch": 2.791597436506053,
      "grad_norm": 0.054230380803346634,
      "learning_rate": 8.175444657958876e-05,
      "loss": 0.2593,
      "step": 2940
    },
    {
      "epoch": 2.792546878708759,
      "grad_norm": 0.04647655412554741,
      "learning_rate": 8.16957820588672e-05,
      "loss": 0.2635,
      "step": 2941
    },
    {
      "epoch": 2.7934963209114647,
      "grad_norm": 0.046050019562244415,
      "learning_rate": 8.163712405524235e-05,
      "loss": 0.2712,
      "step": 2942
    },
    {
      "epoch": 2.7944457631141706,
      "grad_norm": 0.04571513459086418,
      "learning_rate": 8.157847258959885e-05,
      "loss": 0.2592,
      "step": 2943
    },
    {
      "epoch": 2.7953952053168765,
      "grad_norm": 0.0384996272623539,
      "learning_rate": 8.151982768281927e-05,
      "loss": 0.2524,
      "step": 2944
    },
    {
      "epoch": 2.7963446475195823,
      "grad_norm": 0.05060908943414688,
      "learning_rate": 8.146118935578367e-05,
      "loss": 0.2643,
      "step": 2945
    },
    {
      "epoch": 2.797294089722288,
      "grad_norm": 0.04255904257297516,
      "learning_rate": 8.140255762936989e-05,
      "loss": 0.2671,
      "step": 2946
    },
    {
      "epoch": 2.798243531924994,
      "grad_norm": 0.045090414583683014,
      "learning_rate": 8.13439325244534e-05,
      "loss": 0.2673,
      "step": 2947
    },
    {
      "epoch": 2.7991929741277,
      "grad_norm": 0.11718107759952545,
      "learning_rate": 8.128531406190721e-05,
      "loss": 0.2731,
      "step": 2948
    },
    {
      "epoch": 2.800142416330406,
      "grad_norm": 0.03717552870512009,
      "learning_rate": 8.122670226260207e-05,
      "loss": 0.2577,
      "step": 2949
    },
    {
      "epoch": 2.8010918585331117,
      "grad_norm": 0.03922560065984726,
      "learning_rate": 8.116809714740634e-05,
      "loss": 0.266,
      "step": 2950
    },
    {
      "epoch": 2.8020413007358176,
      "grad_norm": 0.04070358723402023,
      "learning_rate": 8.1109498737186e-05,
      "loss": 0.2696,
      "step": 2951
    },
    {
      "epoch": 2.8029907429385235,
      "grad_norm": 0.08717595040798187,
      "learning_rate": 8.105090705280456e-05,
      "loss": 0.2698,
      "step": 2952
    },
    {
      "epoch": 2.8039401851412293,
      "grad_norm": 0.04092169925570488,
      "learning_rate": 8.099232211512326e-05,
      "loss": 0.267,
      "step": 2953
    },
    {
      "epoch": 2.804889627343935,
      "grad_norm": 0.03971577063202858,
      "learning_rate": 8.093374394500088e-05,
      "loss": 0.2692,
      "step": 2954
    },
    {
      "epoch": 2.805839069546641,
      "grad_norm": 0.04230726882815361,
      "learning_rate": 8.087517256329376e-05,
      "loss": 0.2655,
      "step": 2955
    },
    {
      "epoch": 2.8067885117493474,
      "grad_norm": 0.04096872732043266,
      "learning_rate": 8.081660799085594e-05,
      "loss": 0.2658,
      "step": 2956
    },
    {
      "epoch": 2.8077379539520533,
      "grad_norm": 0.04708344489336014,
      "learning_rate": 8.075805024853884e-05,
      "loss": 0.2707,
      "step": 2957
    },
    {
      "epoch": 2.808687396154759,
      "grad_norm": 0.054247885942459106,
      "learning_rate": 8.069949935719165e-05,
      "loss": 0.2598,
      "step": 2958
    },
    {
      "epoch": 2.809636838357465,
      "grad_norm": 0.05410867556929588,
      "learning_rate": 8.064095533766095e-05,
      "loss": 0.2899,
      "step": 2959
    },
    {
      "epoch": 2.810586280560171,
      "grad_norm": 0.10300780832767487,
      "learning_rate": 8.058241821079105e-05,
      "loss": 0.2654,
      "step": 2960
    },
    {
      "epoch": 2.8115357227628768,
      "grad_norm": 0.04165608435869217,
      "learning_rate": 8.052388799742361e-05,
      "loss": 0.2596,
      "step": 2961
    },
    {
      "epoch": 2.8124851649655827,
      "grad_norm": 0.04763954505324364,
      "learning_rate": 8.046536471839798e-05,
      "loss": 0.2515,
      "step": 2962
    },
    {
      "epoch": 2.8134346071682885,
      "grad_norm": 0.04077402129769325,
      "learning_rate": 8.0406848394551e-05,
      "loss": 0.2553,
      "step": 2963
    },
    {
      "epoch": 2.8143840493709944,
      "grad_norm": 0.04058780148625374,
      "learning_rate": 8.034833904671698e-05,
      "loss": 0.2518,
      "step": 2964
    },
    {
      "epoch": 2.8153334915737007,
      "grad_norm": 0.052179377526044846,
      "learning_rate": 8.028983669572786e-05,
      "loss": 0.2647,
      "step": 2965
    },
    {
      "epoch": 2.8162829337764066,
      "grad_norm": 0.04284640774130821,
      "learning_rate": 8.023134136241293e-05,
      "loss": 0.2568,
      "step": 2966
    },
    {
      "epoch": 2.8172323759791125,
      "grad_norm": 0.04037092253565788,
      "learning_rate": 8.017285306759914e-05,
      "loss": 0.2431,
      "step": 2967
    },
    {
      "epoch": 2.8181818181818183,
      "grad_norm": 0.05579729750752449,
      "learning_rate": 8.011437183211081e-05,
      "loss": 0.2807,
      "step": 2968
    },
    {
      "epoch": 2.819131260384524,
      "grad_norm": 0.05057593807578087,
      "learning_rate": 8.005589767676986e-05,
      "loss": 0.2726,
      "step": 2969
    },
    {
      "epoch": 2.82008070258723,
      "grad_norm": 0.04789821803569794,
      "learning_rate": 7.999743062239557e-05,
      "loss": 0.2698,
      "step": 2970
    },
    {
      "epoch": 2.821030144789936,
      "grad_norm": 0.056588314473629,
      "learning_rate": 7.993897068980477e-05,
      "loss": 0.2746,
      "step": 2971
    },
    {
      "epoch": 2.821979586992642,
      "grad_norm": 0.04293562099337578,
      "learning_rate": 7.988051789981176e-05,
      "loss": 0.2607,
      "step": 2972
    },
    {
      "epoch": 2.8229290291953477,
      "grad_norm": 0.03815620765089989,
      "learning_rate": 7.982207227322824e-05,
      "loss": 0.247,
      "step": 2973
    },
    {
      "epoch": 2.8238784713980536,
      "grad_norm": 0.06469085812568665,
      "learning_rate": 7.976363383086342e-05,
      "loss": 0.264,
      "step": 2974
    },
    {
      "epoch": 2.8248279136007595,
      "grad_norm": 0.03865661844611168,
      "learning_rate": 7.97052025935239e-05,
      "loss": 0.2555,
      "step": 2975
    },
    {
      "epoch": 2.8257773558034653,
      "grad_norm": 0.04179610684514046,
      "learning_rate": 7.964677858201376e-05,
      "loss": 0.2663,
      "step": 2976
    },
    {
      "epoch": 2.826726798006171,
      "grad_norm": 0.043444134294986725,
      "learning_rate": 7.958836181713445e-05,
      "loss": 0.2639,
      "step": 2977
    },
    {
      "epoch": 2.827676240208877,
      "grad_norm": 0.0387243777513504,
      "learning_rate": 7.952995231968488e-05,
      "loss": 0.2669,
      "step": 2978
    },
    {
      "epoch": 2.828625682411583,
      "grad_norm": 0.03931890428066254,
      "learning_rate": 7.947155011046144e-05,
      "loss": 0.2537,
      "step": 2979
    },
    {
      "epoch": 2.829575124614289,
      "grad_norm": 0.047014620155096054,
      "learning_rate": 7.941315521025775e-05,
      "loss": 0.2759,
      "step": 2980
    },
    {
      "epoch": 2.830524566816995,
      "grad_norm": 0.03905611112713814,
      "learning_rate": 7.935476763986503e-05,
      "loss": 0.2638,
      "step": 2981
    },
    {
      "epoch": 2.831474009019701,
      "grad_norm": 0.053726695477962494,
      "learning_rate": 7.92963874200717e-05,
      "loss": 0.2709,
      "step": 2982
    },
    {
      "epoch": 2.832423451222407,
      "grad_norm": 0.03801732510328293,
      "learning_rate": 7.923801457166372e-05,
      "loss": 0.2511,
      "step": 2983
    },
    {
      "epoch": 2.833372893425113,
      "grad_norm": 0.04040442034602165,
      "learning_rate": 7.91796491154243e-05,
      "loss": 0.2509,
      "step": 2984
    },
    {
      "epoch": 2.8343223356278187,
      "grad_norm": 0.046596333384513855,
      "learning_rate": 7.912129107213416e-05,
      "loss": 0.2878,
      "step": 2985
    },
    {
      "epoch": 2.8352717778305245,
      "grad_norm": 0.04395151510834694,
      "learning_rate": 7.90629404625712e-05,
      "loss": 0.2664,
      "step": 2986
    },
    {
      "epoch": 2.8362212200332304,
      "grad_norm": 0.04623299464583397,
      "learning_rate": 7.900459730751084e-05,
      "loss": 0.2589,
      "step": 2987
    },
    {
      "epoch": 2.8371706622359363,
      "grad_norm": 0.04465977102518082,
      "learning_rate": 7.894626162772578e-05,
      "loss": 0.2686,
      "step": 2988
    },
    {
      "epoch": 2.838120104438642,
      "grad_norm": 0.04362845793366432,
      "learning_rate": 7.888793344398601e-05,
      "loss": 0.2608,
      "step": 2989
    },
    {
      "epoch": 2.8390695466413485,
      "grad_norm": 0.040313150733709335,
      "learning_rate": 7.882961277705895e-05,
      "loss": 0.2555,
      "step": 2990
    },
    {
      "epoch": 2.8400189888440543,
      "grad_norm": 0.03675536438822746,
      "learning_rate": 7.877129964770924e-05,
      "loss": 0.2434,
      "step": 2991
    },
    {
      "epoch": 2.8409684310467602,
      "grad_norm": 0.059962980449199677,
      "learning_rate": 7.871299407669892e-05,
      "loss": 0.2819,
      "step": 2992
    },
    {
      "epoch": 2.841917873249466,
      "grad_norm": 0.040946152061223984,
      "learning_rate": 7.865469608478726e-05,
      "loss": 0.2615,
      "step": 2993
    },
    {
      "epoch": 2.842867315452172,
      "grad_norm": 0.04320983588695526,
      "learning_rate": 7.859640569273093e-05,
      "loss": 0.2619,
      "step": 2994
    },
    {
      "epoch": 2.843816757654878,
      "grad_norm": 0.04121886566281319,
      "learning_rate": 7.853812292128387e-05,
      "loss": 0.2612,
      "step": 2995
    },
    {
      "epoch": 2.8447661998575837,
      "grad_norm": 0.043592579662799835,
      "learning_rate": 7.847984779119717e-05,
      "loss": 0.2572,
      "step": 2996
    },
    {
      "epoch": 2.8457156420602896,
      "grad_norm": 0.04470603168010712,
      "learning_rate": 7.84215803232194e-05,
      "loss": 0.247,
      "step": 2997
    },
    {
      "epoch": 2.8466650842629955,
      "grad_norm": 0.0900123119354248,
      "learning_rate": 7.836332053809625e-05,
      "loss": 0.2607,
      "step": 2998
    },
    {
      "epoch": 2.8476145264657013,
      "grad_norm": 0.08290302008390427,
      "learning_rate": 7.830506845657082e-05,
      "loss": 0.2535,
      "step": 2999
    },
    {
      "epoch": 2.848563968668407,
      "grad_norm": 0.04399091377854347,
      "learning_rate": 7.824682409938328e-05,
      "loss": 0.2578,
      "step": 3000
    },
    {
      "epoch": 2.848563968668407,
      "eval_loss": 0.26121968030929565,
      "eval_runtime": 37.7523,
      "eval_samples_per_second": 2.278,
      "eval_steps_per_second": 2.278,
      "step": 3000
    },
    {
      "epoch": 2.849513410871113,
      "grad_norm": 0.03738872706890106,
      "learning_rate": 7.81885874872712e-05,
      "loss": 0.2466,
      "step": 3001
    },
    {
      "epoch": 2.850462853073819,
      "grad_norm": 0.048882003873586655,
      "learning_rate": 7.813035864096932e-05,
      "loss": 0.2604,
      "step": 3002
    },
    {
      "epoch": 2.851412295276525,
      "grad_norm": 0.037704020738601685,
      "learning_rate": 7.807213758120966e-05,
      "loss": 0.2504,
      "step": 3003
    },
    {
      "epoch": 2.8523617374792307,
      "grad_norm": 0.07369041442871094,
      "learning_rate": 7.801392432872149e-05,
      "loss": 0.2461,
      "step": 3004
    },
    {
      "epoch": 2.8533111796819366,
      "grad_norm": 0.05542570352554321,
      "learning_rate": 7.795571890423116e-05,
      "loss": 0.2907,
      "step": 3005
    },
    {
      "epoch": 2.854260621884643,
      "grad_norm": 0.05089758709073067,
      "learning_rate": 7.789752132846239e-05,
      "loss": 0.2747,
      "step": 3006
    },
    {
      "epoch": 2.855210064087349,
      "grad_norm": 0.051443975418806076,
      "learning_rate": 7.783933162213604e-05,
      "loss": 0.2673,
      "step": 3007
    },
    {
      "epoch": 2.8561595062900547,
      "grad_norm": 0.04792041704058647,
      "learning_rate": 7.778114980597018e-05,
      "loss": 0.2619,
      "step": 3008
    },
    {
      "epoch": 2.8571089484927605,
      "grad_norm": 0.0704968124628067,
      "learning_rate": 7.772297590068002e-05,
      "loss": 0.2745,
      "step": 3009
    },
    {
      "epoch": 2.8580583906954664,
      "grad_norm": 0.05193328484892845,
      "learning_rate": 7.766480992697802e-05,
      "loss": 0.2614,
      "step": 3010
    },
    {
      "epoch": 2.8590078328981723,
      "grad_norm": 0.05074877664446831,
      "learning_rate": 7.760665190557382e-05,
      "loss": 0.2547,
      "step": 3011
    },
    {
      "epoch": 2.859957275100878,
      "grad_norm": 0.05875850468873978,
      "learning_rate": 7.754850185717415e-05,
      "loss": 0.2617,
      "step": 3012
    },
    {
      "epoch": 2.860906717303584,
      "grad_norm": 0.040531840175390244,
      "learning_rate": 7.749035980248305e-05,
      "loss": 0.254,
      "step": 3013
    },
    {
      "epoch": 2.86185615950629,
      "grad_norm": 0.04752432927489281,
      "learning_rate": 7.74322257622015e-05,
      "loss": 0.2634,
      "step": 3014
    },
    {
      "epoch": 2.8628056017089962,
      "grad_norm": 0.04267633333802223,
      "learning_rate": 7.73740997570278e-05,
      "loss": 0.254,
      "step": 3015
    },
    {
      "epoch": 2.863755043911702,
      "grad_norm": 0.054555121809244156,
      "learning_rate": 7.731598180765732e-05,
      "loss": 0.2834,
      "step": 3016
    },
    {
      "epoch": 2.864704486114408,
      "grad_norm": 0.044725362211465836,
      "learning_rate": 7.725787193478262e-05,
      "loss": 0.2594,
      "step": 3017
    },
    {
      "epoch": 2.865653928317114,
      "grad_norm": 0.04918695613741875,
      "learning_rate": 7.719977015909326e-05,
      "loss": 0.2537,
      "step": 3018
    },
    {
      "epoch": 2.8666033705198197,
      "grad_norm": 0.040399499237537384,
      "learning_rate": 7.714167650127603e-05,
      "loss": 0.2537,
      "step": 3019
    },
    {
      "epoch": 2.8675528127225256,
      "grad_norm": 0.04605403542518616,
      "learning_rate": 7.708359098201482e-05,
      "loss": 0.2625,
      "step": 3020
    },
    {
      "epoch": 2.8685022549252315,
      "grad_norm": 0.039211615920066833,
      "learning_rate": 7.702551362199056e-05,
      "loss": 0.2619,
      "step": 3021
    },
    {
      "epoch": 2.8694516971279374,
      "grad_norm": 0.040134087204933167,
      "learning_rate": 7.696744444188138e-05,
      "loss": 0.2637,
      "step": 3022
    },
    {
      "epoch": 2.8704011393306432,
      "grad_norm": 0.04593004286289215,
      "learning_rate": 7.690938346236233e-05,
      "loss": 0.2761,
      "step": 3023
    },
    {
      "epoch": 2.871350581533349,
      "grad_norm": 0.04389437288045883,
      "learning_rate": 7.685133070410571e-05,
      "loss": 0.2499,
      "step": 3024
    },
    {
      "epoch": 2.872300023736055,
      "grad_norm": 0.04520121216773987,
      "learning_rate": 7.67932861877808e-05,
      "loss": 0.2615,
      "step": 3025
    },
    {
      "epoch": 2.873249465938761,
      "grad_norm": 0.03421633318066597,
      "learning_rate": 7.673524993405397e-05,
      "loss": 0.2619,
      "step": 3026
    },
    {
      "epoch": 2.8741989081414667,
      "grad_norm": 0.04385941103100777,
      "learning_rate": 7.667722196358869e-05,
      "loss": 0.2595,
      "step": 3027
    },
    {
      "epoch": 2.8751483503441726,
      "grad_norm": 0.0449550487101078,
      "learning_rate": 7.661920229704535e-05,
      "loss": 0.2523,
      "step": 3028
    },
    {
      "epoch": 2.8760977925468785,
      "grad_norm": 0.037781983613967896,
      "learning_rate": 7.656119095508154e-05,
      "loss": 0.2474,
      "step": 3029
    },
    {
      "epoch": 2.8770472347495843,
      "grad_norm": 0.043374691158533096,
      "learning_rate": 7.650318795835179e-05,
      "loss": 0.2535,
      "step": 3030
    },
    {
      "epoch": 2.8779966769522907,
      "grad_norm": 0.03891807794570923,
      "learning_rate": 7.644519332750772e-05,
      "loss": 0.2531,
      "step": 3031
    },
    {
      "epoch": 2.8789461191549965,
      "grad_norm": 0.03850429132580757,
      "learning_rate": 7.638720708319789e-05,
      "loss": 0.2594,
      "step": 3032
    },
    {
      "epoch": 2.8798955613577024,
      "grad_norm": 0.03768150508403778,
      "learning_rate": 7.632922924606795e-05,
      "loss": 0.2549,
      "step": 3033
    },
    {
      "epoch": 2.8808450035604083,
      "grad_norm": 0.0817415788769722,
      "learning_rate": 7.62712598367605e-05,
      "loss": 0.2625,
      "step": 3034
    },
    {
      "epoch": 2.881794445763114,
      "grad_norm": 0.04083314538002014,
      "learning_rate": 7.62132988759152e-05,
      "loss": 0.2568,
      "step": 3035
    },
    {
      "epoch": 2.88274388796582,
      "grad_norm": 0.039418481290340424,
      "learning_rate": 7.61553463841687e-05,
      "loss": 0.2605,
      "step": 3036
    },
    {
      "epoch": 2.883693330168526,
      "grad_norm": 0.04179481044411659,
      "learning_rate": 7.60974023821545e-05,
      "loss": 0.2582,
      "step": 3037
    },
    {
      "epoch": 2.884642772371232,
      "grad_norm": 0.040783416479825974,
      "learning_rate": 7.603946689050329e-05,
      "loss": 0.2587,
      "step": 3038
    },
    {
      "epoch": 2.8855922145739377,
      "grad_norm": 0.037703026086091995,
      "learning_rate": 7.598153992984254e-05,
      "loss": 0.2611,
      "step": 3039
    },
    {
      "epoch": 2.886541656776644,
      "grad_norm": 0.03856633976101875,
      "learning_rate": 7.592362152079684e-05,
      "loss": 0.2556,
      "step": 3040
    },
    {
      "epoch": 2.88749109897935,
      "grad_norm": 0.03830847144126892,
      "learning_rate": 7.586571168398759e-05,
      "loss": 0.2596,
      "step": 3041
    },
    {
      "epoch": 2.8884405411820557,
      "grad_norm": 0.07717377692461014,
      "learning_rate": 7.580781044003324e-05,
      "loss": 0.2694,
      "step": 3042
    },
    {
      "epoch": 2.8893899833847616,
      "grad_norm": 0.0506473146378994,
      "learning_rate": 7.574991780954914e-05,
      "loss": 0.2671,
      "step": 3043
    },
    {
      "epoch": 2.8903394255874675,
      "grad_norm": 0.03902186080813408,
      "learning_rate": 7.569203381314757e-05,
      "loss": 0.2629,
      "step": 3044
    },
    {
      "epoch": 2.8912888677901734,
      "grad_norm": 0.03913324326276779,
      "learning_rate": 7.563415847143782e-05,
      "loss": 0.2595,
      "step": 3045
    },
    {
      "epoch": 2.8922383099928792,
      "grad_norm": 0.043382804840803146,
      "learning_rate": 7.557629180502593e-05,
      "loss": 0.2656,
      "step": 3046
    },
    {
      "epoch": 2.893187752195585,
      "grad_norm": 0.04160072281956673,
      "learning_rate": 7.551843383451497e-05,
      "loss": 0.2575,
      "step": 3047
    },
    {
      "epoch": 2.894137194398291,
      "grad_norm": 0.04108607769012451,
      "learning_rate": 7.54605845805049e-05,
      "loss": 0.2565,
      "step": 3048
    },
    {
      "epoch": 2.895086636600997,
      "grad_norm": 0.07181921601295471,
      "learning_rate": 7.540274406359263e-05,
      "loss": 0.2608,
      "step": 3049
    },
    {
      "epoch": 2.8960360788037027,
      "grad_norm": 0.040363240987062454,
      "learning_rate": 7.534491230437178e-05,
      "loss": 0.2639,
      "step": 3050
    },
    {
      "epoch": 2.8969855210064086,
      "grad_norm": 0.08342358469963074,
      "learning_rate": 7.528708932343304e-05,
      "loss": 0.2793,
      "step": 3051
    },
    {
      "epoch": 2.8979349632091145,
      "grad_norm": 0.06666367501020432,
      "learning_rate": 7.52292751413639e-05,
      "loss": 0.2445,
      "step": 3052
    },
    {
      "epoch": 2.8988844054118204,
      "grad_norm": 0.07102706283330917,
      "learning_rate": 7.517146977874869e-05,
      "loss": 0.2902,
      "step": 3053
    },
    {
      "epoch": 2.8998338476145262,
      "grad_norm": 0.04264743998646736,
      "learning_rate": 7.511367325616868e-05,
      "loss": 0.2565,
      "step": 3054
    },
    {
      "epoch": 2.900783289817232,
      "grad_norm": 0.04553769901394844,
      "learning_rate": 7.505588559420189e-05,
      "loss": 0.2549,
      "step": 3055
    },
    {
      "epoch": 2.9017327320199384,
      "grad_norm": 0.04745159670710564,
      "learning_rate": 7.499810681342325e-05,
      "loss": 0.2516,
      "step": 3056
    },
    {
      "epoch": 2.9026821742226443,
      "grad_norm": 0.07339516282081604,
      "learning_rate": 7.494033693440451e-05,
      "loss": 0.2724,
      "step": 3057
    },
    {
      "epoch": 2.90363161642535,
      "grad_norm": 0.04640224948525429,
      "learning_rate": 7.488257597771433e-05,
      "loss": 0.2628,
      "step": 3058
    },
    {
      "epoch": 2.904581058628056,
      "grad_norm": 0.0962815135717392,
      "learning_rate": 7.482482396391801e-05,
      "loss": 0.2716,
      "step": 3059
    },
    {
      "epoch": 2.905530500830762,
      "grad_norm": 0.04240866377949715,
      "learning_rate": 7.476708091357782e-05,
      "loss": 0.258,
      "step": 3060
    },
    {
      "epoch": 2.906479943033468,
      "grad_norm": 0.045219387859106064,
      "learning_rate": 7.470934684725283e-05,
      "loss": 0.2629,
      "step": 3061
    },
    {
      "epoch": 2.9074293852361737,
      "grad_norm": 0.053542688488960266,
      "learning_rate": 7.465162178549881e-05,
      "loss": 0.2738,
      "step": 3062
    },
    {
      "epoch": 2.9083788274388795,
      "grad_norm": 0.10473237186670303,
      "learning_rate": 7.459390574886847e-05,
      "loss": 0.2615,
      "step": 3063
    },
    {
      "epoch": 2.9093282696415854,
      "grad_norm": 0.05222583934664726,
      "learning_rate": 7.453619875791114e-05,
      "loss": 0.2616,
      "step": 3064
    },
    {
      "epoch": 2.9102777118442917,
      "grad_norm": 0.10798244178295135,
      "learning_rate": 7.447850083317307e-05,
      "loss": 0.277,
      "step": 3065
    },
    {
      "epoch": 2.9112271540469976,
      "grad_norm": 0.06557677686214447,
      "learning_rate": 7.442081199519719e-05,
      "loss": 0.2755,
      "step": 3066
    },
    {
      "epoch": 2.9121765962497035,
      "grad_norm": 0.05239195376634598,
      "learning_rate": 7.436313226452325e-05,
      "loss": 0.2772,
      "step": 3067
    },
    {
      "epoch": 2.9131260384524094,
      "grad_norm": 0.044036369770765305,
      "learning_rate": 7.43054616616878e-05,
      "loss": 0.2583,
      "step": 3068
    },
    {
      "epoch": 2.9140754806551152,
      "grad_norm": 0.07759764790534973,
      "learning_rate": 7.424780020722397e-05,
      "loss": 0.2516,
      "step": 3069
    },
    {
      "epoch": 2.915024922857821,
      "grad_norm": 0.04186937212944031,
      "learning_rate": 7.419014792166181e-05,
      "loss": 0.2619,
      "step": 3070
    },
    {
      "epoch": 2.915974365060527,
      "grad_norm": 0.04191587120294571,
      "learning_rate": 7.413250482552802e-05,
      "loss": 0.2616,
      "step": 3071
    },
    {
      "epoch": 2.916923807263233,
      "grad_norm": 0.03930152952671051,
      "learning_rate": 7.407487093934608e-05,
      "loss": 0.2592,
      "step": 3072
    },
    {
      "epoch": 2.9178732494659387,
      "grad_norm": 0.04785576090216637,
      "learning_rate": 7.401724628363608e-05,
      "loss": 0.2668,
      "step": 3073
    },
    {
      "epoch": 2.9188226916686446,
      "grad_norm": 0.0375237911939621,
      "learning_rate": 7.395963087891497e-05,
      "loss": 0.2491,
      "step": 3074
    },
    {
      "epoch": 2.9197721338713505,
      "grad_norm": 0.03894813358783722,
      "learning_rate": 7.39020247456963e-05,
      "loss": 0.2646,
      "step": 3075
    },
    {
      "epoch": 2.9207215760740564,
      "grad_norm": 0.04161771759390831,
      "learning_rate": 7.384442790449036e-05,
      "loss": 0.2592,
      "step": 3076
    },
    {
      "epoch": 2.9216710182767622,
      "grad_norm": 0.0407453328371048,
      "learning_rate": 7.378684037580417e-05,
      "loss": 0.2589,
      "step": 3077
    },
    {
      "epoch": 2.922620460479468,
      "grad_norm": 0.03704614192247391,
      "learning_rate": 7.372926218014131e-05,
      "loss": 0.246,
      "step": 3078
    },
    {
      "epoch": 2.923569902682174,
      "grad_norm": 0.04671390354633331,
      "learning_rate": 7.367169333800218e-05,
      "loss": 0.271,
      "step": 3079
    },
    {
      "epoch": 2.92451934488488,
      "grad_norm": 0.041560541838407516,
      "learning_rate": 7.361413386988378e-05,
      "loss": 0.2707,
      "step": 3080
    },
    {
      "epoch": 2.925468787087586,
      "grad_norm": 0.043347813189029694,
      "learning_rate": 7.35565837962798e-05,
      "loss": 0.2667,
      "step": 3081
    },
    {
      "epoch": 2.926418229290292,
      "grad_norm": 0.047262370586395264,
      "learning_rate": 7.34990431376805e-05,
      "loss": 0.2484,
      "step": 3082
    },
    {
      "epoch": 2.927367671492998,
      "grad_norm": 0.04352419823408127,
      "learning_rate": 7.34415119145729e-05,
      "loss": 0.2684,
      "step": 3083
    },
    {
      "epoch": 2.928317113695704,
      "grad_norm": 0.040909770876169205,
      "learning_rate": 7.338399014744061e-05,
      "loss": 0.2645,
      "step": 3084
    },
    {
      "epoch": 2.9292665558984097,
      "grad_norm": 0.03679214417934418,
      "learning_rate": 7.332647785676388e-05,
      "loss": 0.2608,
      "step": 3085
    },
    {
      "epoch": 2.9302159981011155,
      "grad_norm": 0.050959546118974686,
      "learning_rate": 7.32689750630196e-05,
      "loss": 0.2862,
      "step": 3086
    },
    {
      "epoch": 2.9311654403038214,
      "grad_norm": 0.038495469838380814,
      "learning_rate": 7.32114817866812e-05,
      "loss": 0.2562,
      "step": 3087
    },
    {
      "epoch": 2.9321148825065273,
      "grad_norm": 0.046665240079164505,
      "learning_rate": 7.315399804821888e-05,
      "loss": 0.2599,
      "step": 3088
    },
    {
      "epoch": 2.933064324709233,
      "grad_norm": 0.04234423115849495,
      "learning_rate": 7.309652386809922e-05,
      "loss": 0.2546,
      "step": 3089
    },
    {
      "epoch": 2.9340137669119395,
      "grad_norm": 0.04000794515013695,
      "learning_rate": 7.303905926678564e-05,
      "loss": 0.2579,
      "step": 3090
    },
    {
      "epoch": 2.9349632091146454,
      "grad_norm": 0.054213687777519226,
      "learning_rate": 7.298160426473796e-05,
      "loss": 0.2852,
      "step": 3091
    },
    {
      "epoch": 2.9359126513173512,
      "grad_norm": 0.03904402256011963,
      "learning_rate": 7.29241588824127e-05,
      "loss": 0.2607,
      "step": 3092
    },
    {
      "epoch": 2.936862093520057,
      "grad_norm": 0.03642754629254341,
      "learning_rate": 7.286672314026294e-05,
      "loss": 0.2553,
      "step": 3093
    },
    {
      "epoch": 2.937811535722763,
      "grad_norm": 0.042792122811079025,
      "learning_rate": 7.280929705873818e-05,
      "loss": 0.2649,
      "step": 3094
    },
    {
      "epoch": 2.938760977925469,
      "grad_norm": 0.04224555939435959,
      "learning_rate": 7.275188065828475e-05,
      "loss": 0.263,
      "step": 3095
    },
    {
      "epoch": 2.9397104201281747,
      "grad_norm": 0.1053476333618164,
      "learning_rate": 7.269447395934526e-05,
      "loss": 0.2437,
      "step": 3096
    },
    {
      "epoch": 2.9406598623308806,
      "grad_norm": 0.03846612200140953,
      "learning_rate": 7.263707698235909e-05,
      "loss": 0.2595,
      "step": 3097
    },
    {
      "epoch": 2.9416093045335865,
      "grad_norm": 0.03868841007351875,
      "learning_rate": 7.257968974776194e-05,
      "loss": 0.2478,
      "step": 3098
    },
    {
      "epoch": 2.9425587467362924,
      "grad_norm": 0.05365443229675293,
      "learning_rate": 7.252231227598623e-05,
      "loss": 0.2806,
      "step": 3099
    },
    {
      "epoch": 2.9435081889389982,
      "grad_norm": 0.03868798166513443,
      "learning_rate": 7.246494458746085e-05,
      "loss": 0.2598,
      "step": 3100
    },
    {
      "epoch": 2.944457631141704,
      "grad_norm": 0.03806505724787712,
      "learning_rate": 7.240758670261114e-05,
      "loss": 0.2528,
      "step": 3101
    },
    {
      "epoch": 2.94540707334441,
      "grad_norm": 0.04681408777832985,
      "learning_rate": 7.235023864185906e-05,
      "loss": 0.2594,
      "step": 3102
    },
    {
      "epoch": 2.946356515547116,
      "grad_norm": 0.09290296584367752,
      "learning_rate": 7.229290042562293e-05,
      "loss": 0.2503,
      "step": 3103
    },
    {
      "epoch": 2.9473059577498217,
      "grad_norm": 0.045926641672849655,
      "learning_rate": 7.223557207431772e-05,
      "loss": 0.261,
      "step": 3104
    },
    {
      "epoch": 2.948255399952528,
      "grad_norm": 0.07976659387350082,
      "learning_rate": 7.217825360835473e-05,
      "loss": 0.2597,
      "step": 3105
    },
    {
      "epoch": 2.949204842155234,
      "grad_norm": 0.060166455805301666,
      "learning_rate": 7.212094504814195e-05,
      "loss": 0.2563,
      "step": 3106
    },
    {
      "epoch": 2.95015428435794,
      "grad_norm": 0.078881174325943,
      "learning_rate": 7.206364641408357e-05,
      "loss": 0.2521,
      "step": 3107
    },
    {
      "epoch": 2.9511037265606457,
      "grad_norm": 0.04990749806165695,
      "learning_rate": 7.200635772658047e-05,
      "loss": 0.2659,
      "step": 3108
    },
    {
      "epoch": 2.9520531687633516,
      "grad_norm": 0.05827078968286514,
      "learning_rate": 7.194907900602993e-05,
      "loss": 0.2743,
      "step": 3109
    },
    {
      "epoch": 2.9530026109660574,
      "grad_norm": 0.04445386677980423,
      "learning_rate": 7.189181027282561e-05,
      "loss": 0.2537,
      "step": 3110
    },
    {
      "epoch": 2.9539520531687633,
      "grad_norm": 0.041746556758880615,
      "learning_rate": 7.183455154735774e-05,
      "loss": 0.2472,
      "step": 3111
    },
    {
      "epoch": 2.954901495371469,
      "grad_norm": 0.04664076119661331,
      "learning_rate": 7.177730285001282e-05,
      "loss": 0.2564,
      "step": 3112
    },
    {
      "epoch": 2.955850937574175,
      "grad_norm": 0.04676587134599686,
      "learning_rate": 7.172006420117394e-05,
      "loss": 0.2697,
      "step": 3113
    },
    {
      "epoch": 2.956800379776881,
      "grad_norm": 0.09390587359666824,
      "learning_rate": 7.166283562122049e-05,
      "loss": 0.3155,
      "step": 3114
    },
    {
      "epoch": 2.9577498219795872,
      "grad_norm": 0.08722305297851562,
      "learning_rate": 7.160561713052842e-05,
      "loss": 0.2422,
      "step": 3115
    },
    {
      "epoch": 2.958699264182293,
      "grad_norm": 0.067685566842556,
      "learning_rate": 7.15484087494699e-05,
      "loss": 0.2616,
      "step": 3116
    },
    {
      "epoch": 2.959648706384999,
      "grad_norm": 0.06081811711192131,
      "learning_rate": 7.149121049841363e-05,
      "loss": 0.2777,
      "step": 3117
    },
    {
      "epoch": 2.960598148587705,
      "grad_norm": 0.04524696618318558,
      "learning_rate": 7.143402239772471e-05,
      "loss": 0.264,
      "step": 3118
    },
    {
      "epoch": 2.9615475907904107,
      "grad_norm": 0.042296383529901505,
      "learning_rate": 7.137684446776453e-05,
      "loss": 0.2689,
      "step": 3119
    },
    {
      "epoch": 2.9624970329931166,
      "grad_norm": 0.051620304584503174,
      "learning_rate": 7.131967672889101e-05,
      "loss": 0.266,
      "step": 3120
    },
    {
      "epoch": 2.9634464751958225,
      "grad_norm": 0.05887624993920326,
      "learning_rate": 7.126251920145822e-05,
      "loss": 0.2769,
      "step": 3121
    },
    {
      "epoch": 2.9643959173985284,
      "grad_norm": 0.04584120586514473,
      "learning_rate": 7.12053719058168e-05,
      "loss": 0.2631,
      "step": 3122
    },
    {
      "epoch": 2.9653453596012342,
      "grad_norm": 0.07010935246944427,
      "learning_rate": 7.114823486231366e-05,
      "loss": 0.2587,
      "step": 3123
    },
    {
      "epoch": 2.96629480180394,
      "grad_norm": 0.03999679163098335,
      "learning_rate": 7.109110809129205e-05,
      "loss": 0.2525,
      "step": 3124
    },
    {
      "epoch": 2.967244244006646,
      "grad_norm": 0.049806319177150726,
      "learning_rate": 7.103399161309164e-05,
      "loss": 0.2667,
      "step": 3125
    },
    {
      "epoch": 2.968193686209352,
      "grad_norm": 0.0547214075922966,
      "learning_rate": 7.09768854480483e-05,
      "loss": 0.2807,
      "step": 3126
    },
    {
      "epoch": 2.9691431284120577,
      "grad_norm": 0.06518308818340302,
      "learning_rate": 7.091978961649434e-05,
      "loss": 0.2803,
      "step": 3127
    },
    {
      "epoch": 2.9700925706147636,
      "grad_norm": 0.05517837405204773,
      "learning_rate": 7.086270413875835e-05,
      "loss": 0.2764,
      "step": 3128
    },
    {
      "epoch": 2.9710420128174695,
      "grad_norm": 0.042132288217544556,
      "learning_rate": 7.080562903516529e-05,
      "loss": 0.2551,
      "step": 3129
    },
    {
      "epoch": 2.971991455020176,
      "grad_norm": 0.04256582632660866,
      "learning_rate": 7.074856432603628e-05,
      "loss": 0.2533,
      "step": 3130
    },
    {
      "epoch": 2.9729408972228817,
      "grad_norm": 0.05388769134879112,
      "learning_rate": 7.06915100316889e-05,
      "loss": 0.2715,
      "step": 3131
    },
    {
      "epoch": 2.9738903394255876,
      "grad_norm": 0.04420170560479164,
      "learning_rate": 7.063446617243694e-05,
      "loss": 0.2763,
      "step": 3132
    },
    {
      "epoch": 2.9748397816282934,
      "grad_norm": 0.10847865045070648,
      "learning_rate": 7.057743276859048e-05,
      "loss": 0.2713,
      "step": 3133
    },
    {
      "epoch": 2.9757892238309993,
      "grad_norm": 0.07862085849046707,
      "learning_rate": 7.052040984045595e-05,
      "loss": 0.2489,
      "step": 3134
    },
    {
      "epoch": 2.976738666033705,
      "grad_norm": 0.039099689573049545,
      "learning_rate": 7.04633974083359e-05,
      "loss": 0.2534,
      "step": 3135
    },
    {
      "epoch": 2.977688108236411,
      "grad_norm": 0.06509828567504883,
      "learning_rate": 7.040639549252927e-05,
      "loss": 0.292,
      "step": 3136
    },
    {
      "epoch": 2.978637550439117,
      "grad_norm": 0.06332911550998688,
      "learning_rate": 7.034940411333125e-05,
      "loss": 0.2769,
      "step": 3137
    },
    {
      "epoch": 2.979586992641823,
      "grad_norm": 0.042316656559705734,
      "learning_rate": 7.029242329103323e-05,
      "loss": 0.2639,
      "step": 3138
    },
    {
      "epoch": 2.9805364348445287,
      "grad_norm": 0.04153651371598244,
      "learning_rate": 7.023545304592279e-05,
      "loss": 0.2698,
      "step": 3139
    },
    {
      "epoch": 2.981485877047235,
      "grad_norm": 0.04541517421603203,
      "learning_rate": 7.017849339828389e-05,
      "loss": 0.2629,
      "step": 3140
    },
    {
      "epoch": 2.982435319249941,
      "grad_norm": 0.04102837294340134,
      "learning_rate": 7.012154436839663e-05,
      "loss": 0.2621,
      "step": 3141
    },
    {
      "epoch": 2.9833847614526467,
      "grad_norm": 0.03542923927307129,
      "learning_rate": 7.00646059765373e-05,
      "loss": 0.2488,
      "step": 3142
    },
    {
      "epoch": 2.9843342036553526,
      "grad_norm": 0.04143969714641571,
      "learning_rate": 7.000767824297851e-05,
      "loss": 0.2565,
      "step": 3143
    },
    {
      "epoch": 2.9852836458580585,
      "grad_norm": 0.055488113313913345,
      "learning_rate": 6.995076118798893e-05,
      "loss": 0.2864,
      "step": 3144
    },
    {
      "epoch": 2.9862330880607644,
      "grad_norm": 0.037771549075841904,
      "learning_rate": 6.989385483183355e-05,
      "loss": 0.2584,
      "step": 3145
    },
    {
      "epoch": 2.9871825302634702,
      "grad_norm": 0.046980682760477066,
      "learning_rate": 6.983695919477345e-05,
      "loss": 0.2665,
      "step": 3146
    },
    {
      "epoch": 2.988131972466176,
      "grad_norm": 0.041630521416664124,
      "learning_rate": 6.978007429706606e-05,
      "loss": 0.2605,
      "step": 3147
    },
    {
      "epoch": 2.989081414668882,
      "grad_norm": 0.04207857325673103,
      "learning_rate": 6.972320015896473e-05,
      "loss": 0.259,
      "step": 3148
    },
    {
      "epoch": 2.990030856871588,
      "grad_norm": 0.04457660764455795,
      "learning_rate": 6.966633680071921e-05,
      "loss": 0.2596,
      "step": 3149
    },
    {
      "epoch": 2.9909802990742937,
      "grad_norm": 0.04395845904946327,
      "learning_rate": 6.960948424257532e-05,
      "loss": 0.2655,
      "step": 3150
    },
    {
      "epoch": 2.9919297412769996,
      "grad_norm": 0.0456823855638504,
      "learning_rate": 6.955264250477502e-05,
      "loss": 0.263,
      "step": 3151
    },
    {
      "epoch": 2.9928791834797055,
      "grad_norm": 0.04490295425057411,
      "learning_rate": 6.94958116075565e-05,
      "loss": 0.2653,
      "step": 3152
    },
    {
      "epoch": 2.9938286256824114,
      "grad_norm": 0.08633121848106384,
      "learning_rate": 6.94389915711539e-05,
      "loss": 0.2692,
      "step": 3153
    },
    {
      "epoch": 2.9947780678851172,
      "grad_norm": 0.04307481646537781,
      "learning_rate": 6.938218241579775e-05,
      "loss": 0.2605,
      "step": 3154
    },
    {
      "epoch": 2.9957275100878236,
      "grad_norm": 0.042450159788131714,
      "learning_rate": 6.932538416171447e-05,
      "loss": 0.2628,
      "step": 3155
    },
    {
      "epoch": 2.9966769522905294,
      "grad_norm": 0.0424211211502552,
      "learning_rate": 6.926859682912678e-05,
      "loss": 0.2671,
      "step": 3156
    },
    {
      "epoch": 2.9976263944932353,
      "grad_norm": 0.07237890362739563,
      "learning_rate": 6.921182043825347e-05,
      "loss": 0.2528,
      "step": 3157
    },
    {
      "epoch": 2.998575836695941,
      "grad_norm": 0.04258769005537033,
      "learning_rate": 6.915505500930928e-05,
      "loss": 0.2465,
      "step": 3158
    },
    {
      "epoch": 2.999525278898647,
      "grad_norm": 0.04987820237874985,
      "learning_rate": 6.909830056250527e-05,
      "loss": 0.259,
      "step": 3159
    },
    {
      "epoch": 3.000474721101353,
      "grad_norm": 0.08159984648227692,
      "learning_rate": 6.904155711804842e-05,
      "loss": 0.2765,
      "step": 3160
    },
    {
      "epoch": 3.001424163304059,
      "grad_norm": 0.08647124469280243,
      "learning_rate": 6.898482469614195e-05,
      "loss": 0.2787,
      "step": 3161
    },
    {
      "epoch": 3.0023736055067647,
      "grad_norm": 0.04441903904080391,
      "learning_rate": 6.892810331698496e-05,
      "loss": 0.2592,
      "step": 3162
    },
    {
      "epoch": 3.0033230477094706,
      "grad_norm": 0.044516198337078094,
      "learning_rate": 6.88713930007728e-05,
      "loss": 0.2526,
      "step": 3163
    },
    {
      "epoch": 3.0042724899121764,
      "grad_norm": 0.05410735309123993,
      "learning_rate": 6.881469376769676e-05,
      "loss": 0.2765,
      "step": 3164
    },
    {
      "epoch": 3.0052219321148823,
      "grad_norm": 0.049433011561632156,
      "learning_rate": 6.875800563794425e-05,
      "loss": 0.2671,
      "step": 3165
    },
    {
      "epoch": 3.0061713743175886,
      "grad_norm": 0.04667758569121361,
      "learning_rate": 6.870132863169874e-05,
      "loss": 0.2565,
      "step": 3166
    },
    {
      "epoch": 3.0071208165202945,
      "grad_norm": 0.04814934730529785,
      "learning_rate": 6.864466276913963e-05,
      "loss": 0.2684,
      "step": 3167
    },
    {
      "epoch": 3.0080702587230004,
      "grad_norm": 0.05075441300868988,
      "learning_rate": 6.85880080704425e-05,
      "loss": 0.2595,
      "step": 3168
    },
    {
      "epoch": 3.0090197009257063,
      "grad_norm": 0.04398110508918762,
      "learning_rate": 6.853136455577885e-05,
      "loss": 0.2534,
      "step": 3169
    },
    {
      "epoch": 3.009969143128412,
      "grad_norm": 0.04458888620138168,
      "learning_rate": 6.847473224531624e-05,
      "loss": 0.2647,
      "step": 3170
    },
    {
      "epoch": 3.010918585331118,
      "grad_norm": 0.04190952330827713,
      "learning_rate": 6.841811115921822e-05,
      "loss": 0.2486,
      "step": 3171
    },
    {
      "epoch": 3.011868027533824,
      "grad_norm": 0.045418839901685715,
      "learning_rate": 6.836150131764434e-05,
      "loss": 0.243,
      "step": 3172
    },
    {
      "epoch": 3.0128174697365298,
      "grad_norm": 0.05116521194577217,
      "learning_rate": 6.830490274075022e-05,
      "loss": 0.2598,
      "step": 3173
    },
    {
      "epoch": 3.0137669119392356,
      "grad_norm": 0.07649002224206924,
      "learning_rate": 6.824831544868735e-05,
      "loss": 0.2412,
      "step": 3174
    },
    {
      "epoch": 3.0147163541419415,
      "grad_norm": 0.06899211555719376,
      "learning_rate": 6.819173946160336e-05,
      "loss": 0.2619,
      "step": 3175
    },
    {
      "epoch": 3.0156657963446474,
      "grad_norm": 0.049410488456487656,
      "learning_rate": 6.813517479964162e-05,
      "loss": 0.2545,
      "step": 3176
    },
    {
      "epoch": 3.0166152385473533,
      "grad_norm": 0.04489083215594292,
      "learning_rate": 6.807862148294171e-05,
      "loss": 0.2615,
      "step": 3177
    },
    {
      "epoch": 3.017564680750059,
      "grad_norm": 0.05581679940223694,
      "learning_rate": 6.8022079531639e-05,
      "loss": 0.2678,
      "step": 3178
    },
    {
      "epoch": 3.0185141229527654,
      "grad_norm": 0.043040670454502106,
      "learning_rate": 6.796554896586498e-05,
      "loss": 0.2482,
      "step": 3179
    },
    {
      "epoch": 3.0194635651554713,
      "grad_norm": 0.06353656202554703,
      "learning_rate": 6.790902980574685e-05,
      "loss": 0.2609,
      "step": 3180
    },
    {
      "epoch": 3.020413007358177,
      "grad_norm": 0.044340141117572784,
      "learning_rate": 6.785252207140797e-05,
      "loss": 0.2489,
      "step": 3181
    },
    {
      "epoch": 3.021362449560883,
      "grad_norm": 0.048731133341789246,
      "learning_rate": 6.779602578296757e-05,
      "loss": 0.2663,
      "step": 3182
    },
    {
      "epoch": 3.022311891763589,
      "grad_norm": 0.07609883695840836,
      "learning_rate": 6.773954096054071e-05,
      "loss": 0.2559,
      "step": 3183
    },
    {
      "epoch": 3.023261333966295,
      "grad_norm": 0.0534072183072567,
      "learning_rate": 6.768306762423853e-05,
      "loss": 0.2579,
      "step": 3184
    },
    {
      "epoch": 3.0242107761690007,
      "grad_norm": 0.1267816722393036,
      "learning_rate": 6.762660579416791e-05,
      "loss": 0.2497,
      "step": 3185
    },
    {
      "epoch": 3.0251602183717066,
      "grad_norm": 0.17665016651153564,
      "learning_rate": 6.757015549043175e-05,
      "loss": 0.2561,
      "step": 3186
    },
    {
      "epoch": 3.0261096605744124,
      "grad_norm": 0.05306272208690643,
      "learning_rate": 6.751371673312877e-05,
      "loss": 0.2438,
      "step": 3187
    },
    {
      "epoch": 3.0270591027771183,
      "grad_norm": 0.048476967960596085,
      "learning_rate": 6.74572895423537e-05,
      "loss": 0.258,
      "step": 3188
    },
    {
      "epoch": 3.028008544979824,
      "grad_norm": 0.04300226643681526,
      "learning_rate": 6.740087393819698e-05,
      "loss": 0.2582,
      "step": 3189
    },
    {
      "epoch": 3.02895798718253,
      "grad_norm": 0.04573444277048111,
      "learning_rate": 6.734446994074507e-05,
      "loss": 0.2432,
      "step": 3190
    },
    {
      "epoch": 3.0299074293852364,
      "grad_norm": 0.04853740334510803,
      "learning_rate": 6.728807757008024e-05,
      "loss": 0.2401,
      "step": 3191
    },
    {
      "epoch": 3.0308568715879423,
      "grad_norm": 0.04887021705508232,
      "learning_rate": 6.72316968462806e-05,
      "loss": 0.2637,
      "step": 3192
    },
    {
      "epoch": 3.031806313790648,
      "grad_norm": 0.044489774852991104,
      "learning_rate": 6.717532778942019e-05,
      "loss": 0.2537,
      "step": 3193
    },
    {
      "epoch": 3.032755755993354,
      "grad_norm": 0.04894377663731575,
      "learning_rate": 6.711897041956876e-05,
      "loss": 0.2531,
      "step": 3194
    },
    {
      "epoch": 3.03370519819606,
      "grad_norm": 0.04671604186296463,
      "learning_rate": 6.706262475679205e-05,
      "loss": 0.2622,
      "step": 3195
    },
    {
      "epoch": 3.0346546403987658,
      "grad_norm": 0.046363890171051025,
      "learning_rate": 6.70062908211515e-05,
      "loss": 0.2613,
      "step": 3196
    },
    {
      "epoch": 3.0356040826014716,
      "grad_norm": 0.04283369332551956,
      "learning_rate": 6.694996863270451e-05,
      "loss": 0.2594,
      "step": 3197
    },
    {
      "epoch": 3.0365535248041775,
      "grad_norm": 0.07976411283016205,
      "learning_rate": 6.68936582115042e-05,
      "loss": 0.2537,
      "step": 3198
    },
    {
      "epoch": 3.0375029670068834,
      "grad_norm": 0.04474799707531929,
      "learning_rate": 6.683735957759949e-05,
      "loss": 0.2599,
      "step": 3199
    },
    {
      "epoch": 3.0384524092095893,
      "grad_norm": 0.05153253301978111,
      "learning_rate": 6.678107275103519e-05,
      "loss": 0.2564,
      "step": 3200
    },
    {
      "epoch": 3.039401851412295,
      "grad_norm": 0.04684925451874733,
      "learning_rate": 6.672479775185181e-05,
      "loss": 0.2513,
      "step": 3201
    },
    {
      "epoch": 3.040351293615001,
      "grad_norm": 0.04749739170074463,
      "learning_rate": 6.666853460008575e-05,
      "loss": 0.2652,
      "step": 3202
    },
    {
      "epoch": 3.041300735817707,
      "grad_norm": 0.04831293970346451,
      "learning_rate": 6.661228331576906e-05,
      "loss": 0.2641,
      "step": 3203
    },
    {
      "epoch": 3.042250178020413,
      "grad_norm": 0.055306605994701385,
      "learning_rate": 6.655604391892972e-05,
      "loss": 0.2542,
      "step": 3204
    },
    {
      "epoch": 3.043199620223119,
      "grad_norm": 0.051187027245759964,
      "learning_rate": 6.649981642959132e-05,
      "loss": 0.2677,
      "step": 3205
    },
    {
      "epoch": 3.044149062425825,
      "grad_norm": 0.051244210451841354,
      "learning_rate": 6.644360086777332e-05,
      "loss": 0.2488,
      "step": 3206
    },
    {
      "epoch": 3.045098504628531,
      "grad_norm": 0.0746002122759819,
      "learning_rate": 6.638739725349094e-05,
      "loss": 0.2516,
      "step": 3207
    },
    {
      "epoch": 3.0460479468312367,
      "grad_norm": 0.05683232471346855,
      "learning_rate": 6.633120560675508e-05,
      "loss": 0.2528,
      "step": 3208
    },
    {
      "epoch": 3.0469973890339426,
      "grad_norm": 0.05021344870328903,
      "learning_rate": 6.627502594757242e-05,
      "loss": 0.2561,
      "step": 3209
    },
    {
      "epoch": 3.0479468312366484,
      "grad_norm": 0.051989272236824036,
      "learning_rate": 6.62188582959453e-05,
      "loss": 0.2427,
      "step": 3210
    },
    {
      "epoch": 3.0488962734393543,
      "grad_norm": 0.0517214760184288,
      "learning_rate": 6.61627026718719e-05,
      "loss": 0.2592,
      "step": 3211
    },
    {
      "epoch": 3.04984571564206,
      "grad_norm": 0.044613976031541824,
      "learning_rate": 6.610655909534605e-05,
      "loss": 0.2491,
      "step": 3212
    },
    {
      "epoch": 3.050795157844766,
      "grad_norm": 0.05095863714814186,
      "learning_rate": 6.605042758635729e-05,
      "loss": 0.2512,
      "step": 3213
    },
    {
      "epoch": 3.051744600047472,
      "grad_norm": 0.05005130171775818,
      "learning_rate": 6.599430816489092e-05,
      "loss": 0.2517,
      "step": 3214
    },
    {
      "epoch": 3.052694042250178,
      "grad_norm": 0.04979756847023964,
      "learning_rate": 6.593820085092781e-05,
      "loss": 0.2566,
      "step": 3215
    },
    {
      "epoch": 3.053643484452884,
      "grad_norm": 0.04886776953935623,
      "learning_rate": 6.588210566444469e-05,
      "loss": 0.2584,
      "step": 3216
    },
    {
      "epoch": 3.05459292665559,
      "grad_norm": 0.04781502112746239,
      "learning_rate": 6.582602262541379e-05,
      "loss": 0.2558,
      "step": 3217
    },
    {
      "epoch": 3.055542368858296,
      "grad_norm": 0.044281426817178726,
      "learning_rate": 6.576995175380322e-05,
      "loss": 0.257,
      "step": 3218
    },
    {
      "epoch": 3.0564918110610018,
      "grad_norm": 0.0911925807595253,
      "learning_rate": 6.571389306957654e-05,
      "loss": 0.2588,
      "step": 3219
    },
    {
      "epoch": 3.0574412532637076,
      "grad_norm": 0.07043775916099548,
      "learning_rate": 6.565784659269314e-05,
      "loss": 0.2422,
      "step": 3220
    },
    {
      "epoch": 3.0583906954664135,
      "grad_norm": 0.04519929364323616,
      "learning_rate": 6.560181234310795e-05,
      "loss": 0.2546,
      "step": 3221
    },
    {
      "epoch": 3.0593401376691194,
      "grad_norm": 0.05477475747466087,
      "learning_rate": 6.554579034077164e-05,
      "loss": 0.2589,
      "step": 3222
    },
    {
      "epoch": 3.0602895798718253,
      "grad_norm": 0.04687114432454109,
      "learning_rate": 6.548978060563049e-05,
      "loss": 0.2527,
      "step": 3223
    },
    {
      "epoch": 3.061239022074531,
      "grad_norm": 0.06336509436368942,
      "learning_rate": 6.543378315762634e-05,
      "loss": 0.2553,
      "step": 3224
    },
    {
      "epoch": 3.062188464277237,
      "grad_norm": 0.08432972431182861,
      "learning_rate": 6.537779801669677e-05,
      "loss": 0.2476,
      "step": 3225
    },
    {
      "epoch": 3.063137906479943,
      "grad_norm": 0.04580822214484215,
      "learning_rate": 6.532182520277485e-05,
      "loss": 0.2486,
      "step": 3226
    },
    {
      "epoch": 3.0640873486826488,
      "grad_norm": 0.0473608635365963,
      "learning_rate": 6.526586473578945e-05,
      "loss": 0.2573,
      "step": 3227
    },
    {
      "epoch": 3.0650367908853546,
      "grad_norm": 0.04817958176136017,
      "learning_rate": 6.52099166356648e-05,
      "loss": 0.2636,
      "step": 3228
    },
    {
      "epoch": 3.065986233088061,
      "grad_norm": 0.04982390254735947,
      "learning_rate": 6.515398092232093e-05,
      "loss": 0.2583,
      "step": 3229
    },
    {
      "epoch": 3.066935675290767,
      "grad_norm": 0.05001278966665268,
      "learning_rate": 6.509805761567336e-05,
      "loss": 0.2555,
      "step": 3230
    },
    {
      "epoch": 3.0678851174934727,
      "grad_norm": 0.04802548140287399,
      "learning_rate": 6.50421467356332e-05,
      "loss": 0.2502,
      "step": 3231
    },
    {
      "epoch": 3.0688345596961786,
      "grad_norm": 0.05099169537425041,
      "learning_rate": 6.498624830210722e-05,
      "loss": 0.2579,
      "step": 3232
    },
    {
      "epoch": 3.0697840018988845,
      "grad_norm": 0.05927233770489693,
      "learning_rate": 6.493036233499761e-05,
      "loss": 0.2666,
      "step": 3233
    },
    {
      "epoch": 3.0707334441015903,
      "grad_norm": 0.0797928124666214,
      "learning_rate": 6.487448885420224e-05,
      "loss": 0.2357,
      "step": 3234
    },
    {
      "epoch": 3.071682886304296,
      "grad_norm": 0.05498325452208519,
      "learning_rate": 6.481862787961447e-05,
      "loss": 0.2586,
      "step": 3235
    },
    {
      "epoch": 3.072632328507002,
      "grad_norm": 0.05666450038552284,
      "learning_rate": 6.476277943112331e-05,
      "loss": 0.2516,
      "step": 3236
    },
    {
      "epoch": 3.073581770709708,
      "grad_norm": 0.058523859828710556,
      "learning_rate": 6.470694352861312e-05,
      "loss": 0.2663,
      "step": 3237
    },
    {
      "epoch": 3.074531212912414,
      "grad_norm": 0.0475163534283638,
      "learning_rate": 6.465112019196398e-05,
      "loss": 0.255,
      "step": 3238
    },
    {
      "epoch": 3.0754806551151197,
      "grad_norm": 0.07355392724275589,
      "learning_rate": 6.459530944105141e-05,
      "loss": 0.2676,
      "step": 3239
    },
    {
      "epoch": 3.0764300973178256,
      "grad_norm": 0.047693684697151184,
      "learning_rate": 6.453951129574644e-05,
      "loss": 0.2603,
      "step": 3240
    },
    {
      "epoch": 3.077379539520532,
      "grad_norm": 0.052322860807180405,
      "learning_rate": 6.448372577591568e-05,
      "loss": 0.2518,
      "step": 3241
    },
    {
      "epoch": 3.0783289817232378,
      "grad_norm": 0.04750973731279373,
      "learning_rate": 6.442795290142114e-05,
      "loss": 0.2515,
      "step": 3242
    },
    {
      "epoch": 3.0792784239259436,
      "grad_norm": 0.046384576708078384,
      "learning_rate": 6.437219269212042e-05,
      "loss": 0.2494,
      "step": 3243
    },
    {
      "epoch": 3.0802278661286495,
      "grad_norm": 0.04986315593123436,
      "learning_rate": 6.431644516786657e-05,
      "loss": 0.2534,
      "step": 3244
    },
    {
      "epoch": 3.0811773083313554,
      "grad_norm": 0.04750996455550194,
      "learning_rate": 6.426071034850811e-05,
      "loss": 0.2534,
      "step": 3245
    },
    {
      "epoch": 3.0821267505340613,
      "grad_norm": 0.0551830492913723,
      "learning_rate": 6.420498825388915e-05,
      "loss": 0.246,
      "step": 3246
    },
    {
      "epoch": 3.083076192736767,
      "grad_norm": 0.05087340250611305,
      "learning_rate": 6.414927890384903e-05,
      "loss": 0.2578,
      "step": 3247
    },
    {
      "epoch": 3.084025634939473,
      "grad_norm": 0.04738471284508705,
      "learning_rate": 6.40935823182228e-05,
      "loss": 0.251,
      "step": 3248
    },
    {
      "epoch": 3.084975077142179,
      "grad_norm": 0.07119819521903992,
      "learning_rate": 6.403789851684082e-05,
      "loss": 0.271,
      "step": 3249
    },
    {
      "epoch": 3.0859245193448848,
      "grad_norm": 0.04995843023061752,
      "learning_rate": 6.398222751952899e-05,
      "loss": 0.2585,
      "step": 3250
    },
    {
      "epoch": 3.0868739615475906,
      "grad_norm": 0.05113042891025543,
      "learning_rate": 6.392656934610852e-05,
      "loss": 0.2691,
      "step": 3251
    },
    {
      "epoch": 3.0878234037502965,
      "grad_norm": 0.052136246114969254,
      "learning_rate": 6.387092401639623e-05,
      "loss": 0.265,
      "step": 3252
    },
    {
      "epoch": 3.0887728459530024,
      "grad_norm": 0.048999518156051636,
      "learning_rate": 6.381529155020418e-05,
      "loss": 0.2535,
      "step": 3253
    },
    {
      "epoch": 3.0897222881557087,
      "grad_norm": 0.1198212206363678,
      "learning_rate": 6.375967196734003e-05,
      "loss": 0.2574,
      "step": 3254
    },
    {
      "epoch": 3.0906717303584146,
      "grad_norm": 0.0474587082862854,
      "learning_rate": 6.370406528760675e-05,
      "loss": 0.2623,
      "step": 3255
    },
    {
      "epoch": 3.0916211725611205,
      "grad_norm": 0.04819402098655701,
      "learning_rate": 6.364847153080268e-05,
      "loss": 0.2502,
      "step": 3256
    },
    {
      "epoch": 3.0925706147638263,
      "grad_norm": 0.04643898829817772,
      "learning_rate": 6.359289071672168e-05,
      "loss": 0.2528,
      "step": 3257
    },
    {
      "epoch": 3.093520056966532,
      "grad_norm": 0.05174558609724045,
      "learning_rate": 6.353732286515286e-05,
      "loss": 0.2657,
      "step": 3258
    },
    {
      "epoch": 3.094469499169238,
      "grad_norm": 0.04330934211611748,
      "learning_rate": 6.348176799588088e-05,
      "loss": 0.2559,
      "step": 3259
    },
    {
      "epoch": 3.095418941371944,
      "grad_norm": 0.06805543601512909,
      "learning_rate": 6.34262261286856e-05,
      "loss": 0.2317,
      "step": 3260
    },
    {
      "epoch": 3.09636838357465,
      "grad_norm": 0.042569007724523544,
      "learning_rate": 6.337069728334239e-05,
      "loss": 0.2549,
      "step": 3261
    },
    {
      "epoch": 3.0973178257773557,
      "grad_norm": 0.04311903938651085,
      "learning_rate": 6.33151814796219e-05,
      "loss": 0.2557,
      "step": 3262
    },
    {
      "epoch": 3.0982672679800616,
      "grad_norm": 0.05211617797613144,
      "learning_rate": 6.325967873729018e-05,
      "loss": 0.2471,
      "step": 3263
    },
    {
      "epoch": 3.0992167101827675,
      "grad_norm": 0.04343589022755623,
      "learning_rate": 6.320418907610865e-05,
      "loss": 0.2462,
      "step": 3264
    },
    {
      "epoch": 3.1001661523854733,
      "grad_norm": 0.04785288870334625,
      "learning_rate": 6.314871251583398e-05,
      "loss": 0.2478,
      "step": 3265
    },
    {
      "epoch": 3.1011155945881796,
      "grad_norm": 0.04962451383471489,
      "learning_rate": 6.309324907621827e-05,
      "loss": 0.2448,
      "step": 3266
    },
    {
      "epoch": 3.1020650367908855,
      "grad_norm": 0.05707186087965965,
      "learning_rate": 6.30377987770089e-05,
      "loss": 0.2557,
      "step": 3267
    },
    {
      "epoch": 3.1030144789935914,
      "grad_norm": 0.0540940947830677,
      "learning_rate": 6.298236163794863e-05,
      "loss": 0.2599,
      "step": 3268
    },
    {
      "epoch": 3.1039639211962973,
      "grad_norm": 0.07168012112379074,
      "learning_rate": 6.292693767877542e-05,
      "loss": 0.2355,
      "step": 3269
    },
    {
      "epoch": 3.104913363399003,
      "grad_norm": 0.050079986453056335,
      "learning_rate": 6.287152691922264e-05,
      "loss": 0.2575,
      "step": 3270
    },
    {
      "epoch": 3.105862805601709,
      "grad_norm": 0.05402916669845581,
      "learning_rate": 6.281612937901894e-05,
      "loss": 0.2676,
      "step": 3271
    },
    {
      "epoch": 3.106812247804415,
      "grad_norm": 0.05587102100253105,
      "learning_rate": 6.276074507788821e-05,
      "loss": 0.2721,
      "step": 3272
    },
    {
      "epoch": 3.1077616900071208,
      "grad_norm": 0.052861545234918594,
      "learning_rate": 6.270537403554973e-05,
      "loss": 0.2597,
      "step": 3273
    },
    {
      "epoch": 3.1087111322098266,
      "grad_norm": 0.05834222957491875,
      "learning_rate": 6.265001627171793e-05,
      "loss": 0.2667,
      "step": 3274
    },
    {
      "epoch": 3.1096605744125325,
      "grad_norm": 0.04725935310125351,
      "learning_rate": 6.259467180610261e-05,
      "loss": 0.2649,
      "step": 3275
    },
    {
      "epoch": 3.1106100166152384,
      "grad_norm": 0.09624456614255905,
      "learning_rate": 6.25393406584088e-05,
      "loss": 0.2602,
      "step": 3276
    },
    {
      "epoch": 3.1115594588179443,
      "grad_norm": 0.048281747847795486,
      "learning_rate": 6.248402284833682e-05,
      "loss": 0.2482,
      "step": 3277
    },
    {
      "epoch": 3.11250890102065,
      "grad_norm": 0.07574120908975601,
      "learning_rate": 6.242871839558215e-05,
      "loss": 0.2559,
      "step": 3278
    },
    {
      "epoch": 3.1134583432233565,
      "grad_norm": 0.04702775552868843,
      "learning_rate": 6.237342731983562e-05,
      "loss": 0.2484,
      "step": 3279
    },
    {
      "epoch": 3.1144077854260623,
      "grad_norm": 0.05545090511441231,
      "learning_rate": 6.231814964078327e-05,
      "loss": 0.2659,
      "step": 3280
    },
    {
      "epoch": 3.115357227628768,
      "grad_norm": 0.054547566920518875,
      "learning_rate": 6.22628853781063e-05,
      "loss": 0.2639,
      "step": 3281
    },
    {
      "epoch": 3.116306669831474,
      "grad_norm": 0.08548653870820999,
      "learning_rate": 6.220763455148126e-05,
      "loss": 0.2669,
      "step": 3282
    },
    {
      "epoch": 3.11725611203418,
      "grad_norm": 0.04788108915090561,
      "learning_rate": 6.215239718057976e-05,
      "loss": 0.2496,
      "step": 3283
    },
    {
      "epoch": 3.118205554236886,
      "grad_norm": 0.061255473643541336,
      "learning_rate": 6.209717328506877e-05,
      "loss": 0.2386,
      "step": 3284
    },
    {
      "epoch": 3.1191549964395917,
      "grad_norm": 0.06910520046949387,
      "learning_rate": 6.204196288461037e-05,
      "loss": 0.2486,
      "step": 3285
    },
    {
      "epoch": 3.1201044386422976,
      "grad_norm": 0.06149483472108841,
      "learning_rate": 6.198676599886185e-05,
      "loss": 0.2606,
      "step": 3286
    },
    {
      "epoch": 3.1210538808450035,
      "grad_norm": 0.09108784794807434,
      "learning_rate": 6.193158264747576e-05,
      "loss": 0.2633,
      "step": 3287
    },
    {
      "epoch": 3.1220033230477093,
      "grad_norm": 0.08666419982910156,
      "learning_rate": 6.187641285009966e-05,
      "loss": 0.2737,
      "step": 3288
    },
    {
      "epoch": 3.122952765250415,
      "grad_norm": 0.05654684826731682,
      "learning_rate": 6.18212566263765e-05,
      "loss": 0.2491,
      "step": 3289
    },
    {
      "epoch": 3.123902207453121,
      "grad_norm": 0.05671761557459831,
      "learning_rate": 6.176611399594421e-05,
      "loss": 0.2565,
      "step": 3290
    },
    {
      "epoch": 3.1248516496558274,
      "grad_norm": 0.04744125157594681,
      "learning_rate": 6.171098497843606e-05,
      "loss": 0.2653,
      "step": 3291
    },
    {
      "epoch": 3.1258010918585333,
      "grad_norm": 0.07659009844064713,
      "learning_rate": 6.165586959348026e-05,
      "loss": 0.2655,
      "step": 3292
    },
    {
      "epoch": 3.126750534061239,
      "grad_norm": 0.04795358330011368,
      "learning_rate": 6.160076786070036e-05,
      "loss": 0.2586,
      "step": 3293
    },
    {
      "epoch": 3.127699976263945,
      "grad_norm": 0.05069601535797119,
      "learning_rate": 6.154567979971493e-05,
      "loss": 0.2564,
      "step": 3294
    },
    {
      "epoch": 3.128649418466651,
      "grad_norm": 0.09271470457315445,
      "learning_rate": 6.149060543013771e-05,
      "loss": 0.2662,
      "step": 3295
    },
    {
      "epoch": 3.1295988606693568,
      "grad_norm": 0.06749092042446136,
      "learning_rate": 6.143554477157763e-05,
      "loss": 0.2669,
      "step": 3296
    },
    {
      "epoch": 3.1305483028720626,
      "grad_norm": 0.04530181735754013,
      "learning_rate": 6.13804978436386e-05,
      "loss": 0.2531,
      "step": 3297
    },
    {
      "epoch": 3.1314977450747685,
      "grad_norm": 0.04635681211948395,
      "learning_rate": 6.132546466591977e-05,
      "loss": 0.2463,
      "step": 3298
    },
    {
      "epoch": 3.1324471872774744,
      "grad_norm": 0.0409170500934124,
      "learning_rate": 6.127044525801529e-05,
      "loss": 0.2545,
      "step": 3299
    },
    {
      "epoch": 3.1333966294801803,
      "grad_norm": 0.057700783014297485,
      "learning_rate": 6.121543963951452e-05,
      "loss": 0.2579,
      "step": 3300
    },
    {
      "epoch": 3.134346071682886,
      "grad_norm": 0.044831424951553345,
      "learning_rate": 6.11604478300018e-05,
      "loss": 0.2527,
      "step": 3301
    },
    {
      "epoch": 3.135295513885592,
      "grad_norm": 0.044355545192956924,
      "learning_rate": 6.110546984905661e-05,
      "loss": 0.257,
      "step": 3302
    },
    {
      "epoch": 3.136244956088298,
      "grad_norm": 0.06783049553632736,
      "learning_rate": 6.105050571625353e-05,
      "loss": 0.27,
      "step": 3303
    },
    {
      "epoch": 3.137194398291004,
      "grad_norm": 0.0519019216299057,
      "learning_rate": 6.0995555451162145e-05,
      "loss": 0.2561,
      "step": 3304
    },
    {
      "epoch": 3.13814384049371,
      "grad_norm": 0.057388197630643845,
      "learning_rate": 6.094061907334718e-05,
      "loss": 0.2557,
      "step": 3305
    },
    {
      "epoch": 3.139093282696416,
      "grad_norm": 0.11174029111862183,
      "learning_rate": 6.0885696602368315e-05,
      "loss": 0.2425,
      "step": 3306
    },
    {
      "epoch": 3.140042724899122,
      "grad_norm": 0.059950169175863266,
      "learning_rate": 6.0830788057780374e-05,
      "loss": 0.2546,
      "step": 3307
    },
    {
      "epoch": 3.1409921671018277,
      "grad_norm": 0.04765070974826813,
      "learning_rate": 6.077589345913315e-05,
      "loss": 0.2418,
      "step": 3308
    },
    {
      "epoch": 3.1419416093045336,
      "grad_norm": 0.049464669078588486,
      "learning_rate": 6.072101282597156e-05,
      "loss": 0.2546,
      "step": 3309
    },
    {
      "epoch": 3.1428910515072395,
      "grad_norm": 0.06911448389291763,
      "learning_rate": 6.0666146177835425e-05,
      "loss": 0.2686,
      "step": 3310
    },
    {
      "epoch": 3.1438404937099453,
      "grad_norm": 0.06544983386993408,
      "learning_rate": 6.06112935342597e-05,
      "loss": 0.2493,
      "step": 3311
    },
    {
      "epoch": 3.144789935912651,
      "grad_norm": 0.08926332741975784,
      "learning_rate": 6.0556454914774295e-05,
      "loss": 0.2685,
      "step": 3312
    },
    {
      "epoch": 3.145739378115357,
      "grad_norm": 0.04235906898975372,
      "learning_rate": 6.0501630338904136e-05,
      "loss": 0.2577,
      "step": 3313
    },
    {
      "epoch": 3.146688820318063,
      "grad_norm": 0.04946906492114067,
      "learning_rate": 6.04468198261692e-05,
      "loss": 0.2554,
      "step": 3314
    },
    {
      "epoch": 3.1476382625207693,
      "grad_norm": 0.05564684048295021,
      "learning_rate": 6.039202339608432e-05,
      "loss": 0.2677,
      "step": 3315
    },
    {
      "epoch": 3.148587704723475,
      "grad_norm": 0.0531269796192646,
      "learning_rate": 6.03372410681595e-05,
      "loss": 0.2674,
      "step": 3316
    },
    {
      "epoch": 3.149537146926181,
      "grad_norm": 0.047610778361558914,
      "learning_rate": 6.028247286189953e-05,
      "loss": 0.2578,
      "step": 3317
    },
    {
      "epoch": 3.150486589128887,
      "grad_norm": 0.06487441807985306,
      "learning_rate": 6.0227718796804377e-05,
      "loss": 0.2677,
      "step": 3318
    },
    {
      "epoch": 3.151436031331593,
      "grad_norm": 0.04265904799103737,
      "learning_rate": 6.017297889236878e-05,
      "loss": 0.2526,
      "step": 3319
    },
    {
      "epoch": 3.1523854735342987,
      "grad_norm": 0.04901301488280296,
      "learning_rate": 6.011825316808255e-05,
      "loss": 0.2522,
      "step": 3320
    },
    {
      "epoch": 3.1533349157370045,
      "grad_norm": 0.04461880400776863,
      "learning_rate": 6.006354164343046e-05,
      "loss": 0.2567,
      "step": 3321
    },
    {
      "epoch": 3.1542843579397104,
      "grad_norm": 0.05425048992037773,
      "learning_rate": 6.000884433789211e-05,
      "loss": 0.2708,
      "step": 3322
    },
    {
      "epoch": 3.1552338001424163,
      "grad_norm": 0.047145530581474304,
      "learning_rate": 5.995416127094222e-05,
      "loss": 0.2598,
      "step": 3323
    },
    {
      "epoch": 3.156183242345122,
      "grad_norm": 0.04286041483283043,
      "learning_rate": 5.989949246205024e-05,
      "loss": 0.2544,
      "step": 3324
    },
    {
      "epoch": 3.157132684547828,
      "grad_norm": 0.0453423373401165,
      "learning_rate": 5.984483793068072e-05,
      "loss": 0.2547,
      "step": 3325
    },
    {
      "epoch": 3.158082126750534,
      "grad_norm": 0.07012559473514557,
      "learning_rate": 5.979019769629297e-05,
      "loss": 0.2868,
      "step": 3326
    },
    {
      "epoch": 3.1590315689532398,
      "grad_norm": 0.04472891986370087,
      "learning_rate": 5.9735571778341325e-05,
      "loss": 0.2578,
      "step": 3327
    },
    {
      "epoch": 3.1599810111559457,
      "grad_norm": 0.04697902500629425,
      "learning_rate": 5.9680960196274994e-05,
      "loss": 0.2549,
      "step": 3328
    },
    {
      "epoch": 3.160930453358652,
      "grad_norm": 0.04871654137969017,
      "learning_rate": 5.9626362969538053e-05,
      "loss": 0.2492,
      "step": 3329
    },
    {
      "epoch": 3.161879895561358,
      "grad_norm": 0.042063839733600616,
      "learning_rate": 5.957178011756952e-05,
      "loss": 0.2528,
      "step": 3330
    },
    {
      "epoch": 3.1628293377640637,
      "grad_norm": 0.04910881072282791,
      "learning_rate": 5.9517211659803216e-05,
      "loss": 0.2445,
      "step": 3331
    },
    {
      "epoch": 3.1637787799667696,
      "grad_norm": 0.0521257258951664,
      "learning_rate": 5.94626576156679e-05,
      "loss": 0.2623,
      "step": 3332
    },
    {
      "epoch": 3.1647282221694755,
      "grad_norm": 0.05412798747420311,
      "learning_rate": 5.9408118004587185e-05,
      "loss": 0.2565,
      "step": 3333
    },
    {
      "epoch": 3.1656776643721813,
      "grad_norm": 0.04634969308972359,
      "learning_rate": 5.935359284597957e-05,
      "loss": 0.2501,
      "step": 3334
    },
    {
      "epoch": 3.166627106574887,
      "grad_norm": 0.04625312611460686,
      "learning_rate": 5.92990821592583e-05,
      "loss": 0.2461,
      "step": 3335
    },
    {
      "epoch": 3.167576548777593,
      "grad_norm": 0.061656538397073746,
      "learning_rate": 5.924458596383161e-05,
      "loss": 0.2604,
      "step": 3336
    },
    {
      "epoch": 3.168525990980299,
      "grad_norm": 0.0656399130821228,
      "learning_rate": 5.919010427910252e-05,
      "loss": 0.2729,
      "step": 3337
    },
    {
      "epoch": 3.169475433183005,
      "grad_norm": 0.04736149311065674,
      "learning_rate": 5.913563712446883e-05,
      "loss": 0.2554,
      "step": 3338
    },
    {
      "epoch": 3.1704248753857107,
      "grad_norm": 0.07337143272161484,
      "learning_rate": 5.9081184519323275e-05,
      "loss": 0.2574,
      "step": 3339
    },
    {
      "epoch": 3.171374317588417,
      "grad_norm": 0.06286807358264923,
      "learning_rate": 5.902674648305329e-05,
      "loss": 0.2737,
      "step": 3340
    },
    {
      "epoch": 3.172323759791123,
      "grad_norm": 0.046282291412353516,
      "learning_rate": 5.89723230350412e-05,
      "loss": 0.2544,
      "step": 3341
    },
    {
      "epoch": 3.173273201993829,
      "grad_norm": 0.045843806117773056,
      "learning_rate": 5.89179141946641e-05,
      "loss": 0.2516,
      "step": 3342
    },
    {
      "epoch": 3.1742226441965347,
      "grad_norm": 0.0496109202504158,
      "learning_rate": 5.8863519981293926e-05,
      "loss": 0.261,
      "step": 3343
    },
    {
      "epoch": 3.1751720863992405,
      "grad_norm": 0.04872041568160057,
      "learning_rate": 5.8809140414297416e-05,
      "loss": 0.2449,
      "step": 3344
    },
    {
      "epoch": 3.1761215286019464,
      "grad_norm": 0.054158084094524384,
      "learning_rate": 5.8754775513035964e-05,
      "loss": 0.2507,
      "step": 3345
    },
    {
      "epoch": 3.1770709708046523,
      "grad_norm": 0.051501765847206116,
      "learning_rate": 5.8700425296865905e-05,
      "loss": 0.2472,
      "step": 3346
    },
    {
      "epoch": 3.178020413007358,
      "grad_norm": 0.048579879105091095,
      "learning_rate": 5.8646089785138235e-05,
      "loss": 0.2535,
      "step": 3347
    },
    {
      "epoch": 3.178969855210064,
      "grad_norm": 0.07795701920986176,
      "learning_rate": 5.859176899719883e-05,
      "loss": 0.2796,
      "step": 3348
    },
    {
      "epoch": 3.17991929741277,
      "grad_norm": 0.053580522537231445,
      "learning_rate": 5.8537462952388155e-05,
      "loss": 0.2599,
      "step": 3349
    },
    {
      "epoch": 3.180868739615476,
      "grad_norm": 0.04634246602654457,
      "learning_rate": 5.848317167004158e-05,
      "loss": 0.2451,
      "step": 3350
    },
    {
      "epoch": 3.1818181818181817,
      "grad_norm": 0.044239211827516556,
      "learning_rate": 5.842889516948913e-05,
      "loss": 0.245,
      "step": 3351
    },
    {
      "epoch": 3.1827676240208875,
      "grad_norm": 0.04600201174616814,
      "learning_rate": 5.837463347005561e-05,
      "loss": 0.2621,
      "step": 3352
    },
    {
      "epoch": 3.1837170662235934,
      "grad_norm": 0.0480601042509079,
      "learning_rate": 5.832038659106056e-05,
      "loss": 0.267,
      "step": 3353
    },
    {
      "epoch": 3.1846665084262997,
      "grad_norm": 0.05365194007754326,
      "learning_rate": 5.8266154551818216e-05,
      "loss": 0.226,
      "step": 3354
    },
    {
      "epoch": 3.1856159506290056,
      "grad_norm": 0.1545141488313675,
      "learning_rate": 5.821193737163753e-05,
      "loss": 0.2352,
      "step": 3355
    },
    {
      "epoch": 3.1865653928317115,
      "grad_norm": 0.04504143446683884,
      "learning_rate": 5.8157735069822176e-05,
      "loss": 0.2509,
      "step": 3356
    },
    {
      "epoch": 3.1875148350344173,
      "grad_norm": 0.08130753040313721,
      "learning_rate": 5.810354766567052e-05,
      "loss": 0.2566,
      "step": 3357
    },
    {
      "epoch": 3.1884642772371232,
      "grad_norm": 0.06167406588792801,
      "learning_rate": 5.8049375178475594e-05,
      "loss": 0.243,
      "step": 3358
    },
    {
      "epoch": 3.189413719439829,
      "grad_norm": 0.0594964362680912,
      "learning_rate": 5.799521762752524e-05,
      "loss": 0.2594,
      "step": 3359
    },
    {
      "epoch": 3.190363161642535,
      "grad_norm": 0.049432095140218735,
      "learning_rate": 5.794107503210186e-05,
      "loss": 0.2589,
      "step": 3360
    },
    {
      "epoch": 3.191312603845241,
      "grad_norm": 0.0841529369354248,
      "learning_rate": 5.788694741148257e-05,
      "loss": 0.278,
      "step": 3361
    },
    {
      "epoch": 3.1922620460479467,
      "grad_norm": 0.07379740476608276,
      "learning_rate": 5.7832834784939163e-05,
      "loss": 0.2686,
      "step": 3362
    },
    {
      "epoch": 3.1932114882506526,
      "grad_norm": 0.0550118163228035,
      "learning_rate": 5.777873717173803e-05,
      "loss": 0.2572,
      "step": 3363
    },
    {
      "epoch": 3.1941609304533585,
      "grad_norm": 0.050232190638780594,
      "learning_rate": 5.7724654591140385e-05,
      "loss": 0.2428,
      "step": 3364
    },
    {
      "epoch": 3.195110372656065,
      "grad_norm": 0.07592414319515228,
      "learning_rate": 5.7670587062401826e-05,
      "loss": 0.2639,
      "step": 3365
    },
    {
      "epoch": 3.1960598148587707,
      "grad_norm": 0.06244008243083954,
      "learning_rate": 5.761653460477286e-05,
      "loss": 0.2716,
      "step": 3366
    },
    {
      "epoch": 3.1970092570614765,
      "grad_norm": 0.05664997175335884,
      "learning_rate": 5.756249723749847e-05,
      "loss": 0.2569,
      "step": 3367
    },
    {
      "epoch": 3.1979586992641824,
      "grad_norm": 0.053819846361875534,
      "learning_rate": 5.750847497981827e-05,
      "loss": 0.264,
      "step": 3368
    },
    {
      "epoch": 3.1989081414668883,
      "grad_norm": 0.09246042370796204,
      "learning_rate": 5.745446785096664e-05,
      "loss": 0.256,
      "step": 3369
    },
    {
      "epoch": 3.199857583669594,
      "grad_norm": 0.0775340348482132,
      "learning_rate": 5.740047587017232e-05,
      "loss": 0.2708,
      "step": 3370
    },
    {
      "epoch": 3.2008070258723,
      "grad_norm": 0.06020486727356911,
      "learning_rate": 5.734649905665891e-05,
      "loss": 0.2579,
      "step": 3371
    },
    {
      "epoch": 3.201756468075006,
      "grad_norm": 0.051304880529642105,
      "learning_rate": 5.7292537429644454e-05,
      "loss": 0.2571,
      "step": 3372
    },
    {
      "epoch": 3.202705910277712,
      "grad_norm": 0.05811922997236252,
      "learning_rate": 5.723859100834165e-05,
      "loss": 0.2568,
      "step": 3373
    },
    {
      "epoch": 3.2036553524804177,
      "grad_norm": 0.05013841763138771,
      "learning_rate": 5.718465981195775e-05,
      "loss": 0.2479,
      "step": 3374
    },
    {
      "epoch": 3.2046047946831235,
      "grad_norm": 0.048892173916101456,
      "learning_rate": 5.713074385969457e-05,
      "loss": 0.2522,
      "step": 3375
    },
    {
      "epoch": 3.2055542368858294,
      "grad_norm": 0.09769143909215927,
      "learning_rate": 5.7076843170748615e-05,
      "loss": 0.2512,
      "step": 3376
    },
    {
      "epoch": 3.2065036790885353,
      "grad_norm": 0.09074780344963074,
      "learning_rate": 5.702295776431084e-05,
      "loss": 0.2631,
      "step": 3377
    },
    {
      "epoch": 3.207453121291241,
      "grad_norm": 0.05048530921339989,
      "learning_rate": 5.6969087659566756e-05,
      "loss": 0.2496,
      "step": 3378
    },
    {
      "epoch": 3.2084025634939475,
      "grad_norm": 0.0813853070139885,
      "learning_rate": 5.691523287569649e-05,
      "loss": 0.2616,
      "step": 3379
    },
    {
      "epoch": 3.2093520056966534,
      "grad_norm": 0.04971461743116379,
      "learning_rate": 5.6861393431874675e-05,
      "loss": 0.2684,
      "step": 3380
    },
    {
      "epoch": 3.2103014478993592,
      "grad_norm": 0.07930952310562134,
      "learning_rate": 5.680756934727046e-05,
      "loss": 0.2686,
      "step": 3381
    },
    {
      "epoch": 3.211250890102065,
      "grad_norm": 0.04881668835878372,
      "learning_rate": 5.675376064104767e-05,
      "loss": 0.2527,
      "step": 3382
    },
    {
      "epoch": 3.212200332304771,
      "grad_norm": 0.09243467450141907,
      "learning_rate": 5.669996733236438e-05,
      "loss": 0.2709,
      "step": 3383
    },
    {
      "epoch": 3.213149774507477,
      "grad_norm": 0.05580917000770569,
      "learning_rate": 5.6646189440373456e-05,
      "loss": 0.2482,
      "step": 3384
    },
    {
      "epoch": 3.2140992167101827,
      "grad_norm": 0.0797090157866478,
      "learning_rate": 5.659242698422214e-05,
      "loss": 0.2635,
      "step": 3385
    },
    {
      "epoch": 3.2150486589128886,
      "grad_norm": 0.050484515726566315,
      "learning_rate": 5.653867998305216e-05,
      "loss": 0.246,
      "step": 3386
    },
    {
      "epoch": 3.2159981011155945,
      "grad_norm": 0.07907379418611526,
      "learning_rate": 5.64849484559999e-05,
      "loss": 0.2384,
      "step": 3387
    },
    {
      "epoch": 3.2169475433183004,
      "grad_norm": 0.057728394865989685,
      "learning_rate": 5.6431232422195946e-05,
      "loss": 0.242,
      "step": 3388
    },
    {
      "epoch": 3.2178969855210062,
      "grad_norm": 0.053205668926239014,
      "learning_rate": 5.6377531900765666e-05,
      "loss": 0.2532,
      "step": 3389
    },
    {
      "epoch": 3.2188464277237125,
      "grad_norm": 0.05353543162345886,
      "learning_rate": 5.6323846910828735e-05,
      "loss": 0.2574,
      "step": 3390
    },
    {
      "epoch": 3.2197958699264184,
      "grad_norm": 0.04991352930665016,
      "learning_rate": 5.6270177471499365e-05,
      "loss": 0.2574,
      "step": 3391
    },
    {
      "epoch": 3.2207453121291243,
      "grad_norm": 0.05403256043791771,
      "learning_rate": 5.621652360188617e-05,
      "loss": 0.2556,
      "step": 3392
    },
    {
      "epoch": 3.22169475433183,
      "grad_norm": 0.05058816447854042,
      "learning_rate": 5.616288532109225e-05,
      "loss": 0.2543,
      "step": 3393
    },
    {
      "epoch": 3.222644196534536,
      "grad_norm": 0.04839969053864479,
      "learning_rate": 5.610926264821523e-05,
      "loss": 0.2565,
      "step": 3394
    },
    {
      "epoch": 3.223593638737242,
      "grad_norm": 0.11676731705665588,
      "learning_rate": 5.6055655602347067e-05,
      "loss": 0.2506,
      "step": 3395
    },
    {
      "epoch": 3.224543080939948,
      "grad_norm": 0.04872575402259827,
      "learning_rate": 5.600206420257419e-05,
      "loss": 0.2591,
      "step": 3396
    },
    {
      "epoch": 3.2254925231426537,
      "grad_norm": 0.046489961445331573,
      "learning_rate": 5.5948488467977486e-05,
      "loss": 0.2462,
      "step": 3397
    },
    {
      "epoch": 3.2264419653453595,
      "grad_norm": 0.04424404352903366,
      "learning_rate": 5.589492841763224e-05,
      "loss": 0.2567,
      "step": 3398
    },
    {
      "epoch": 3.2273914075480654,
      "grad_norm": 0.04828077182173729,
      "learning_rate": 5.5841384070608104e-05,
      "loss": 0.2568,
      "step": 3399
    },
    {
      "epoch": 3.2283408497507713,
      "grad_norm": 0.06325559318065643,
      "learning_rate": 5.5787855445969276e-05,
      "loss": 0.2802,
      "step": 3400
    },
    {
      "epoch": 3.229290291953477,
      "grad_norm": 0.10159096866846085,
      "learning_rate": 5.5734342562774234e-05,
      "loss": 0.2502,
      "step": 3401
    },
    {
      "epoch": 3.230239734156183,
      "grad_norm": 0.04155226796865463,
      "learning_rate": 5.568084544007588e-05,
      "loss": 0.2463,
      "step": 3402
    },
    {
      "epoch": 3.2311891763588894,
      "grad_norm": 0.04739375039935112,
      "learning_rate": 5.562736409692153e-05,
      "loss": 0.262,
      "step": 3403
    },
    {
      "epoch": 3.2321386185615952,
      "grad_norm": 0.04406020790338516,
      "learning_rate": 5.55738985523528e-05,
      "loss": 0.2507,
      "step": 3404
    },
    {
      "epoch": 3.233088060764301,
      "grad_norm": 0.09737671911716461,
      "learning_rate": 5.55204488254059e-05,
      "loss": 0.2427,
      "step": 3405
    },
    {
      "epoch": 3.234037502967007,
      "grad_norm": 0.05227050185203552,
      "learning_rate": 5.546701493511106e-05,
      "loss": 0.2669,
      "step": 3406
    },
    {
      "epoch": 3.234986945169713,
      "grad_norm": 0.044119905680418015,
      "learning_rate": 5.541359690049321e-05,
      "loss": 0.2486,
      "step": 3407
    },
    {
      "epoch": 3.2359363873724187,
      "grad_norm": 0.04765981808304787,
      "learning_rate": 5.5360194740571445e-05,
      "loss": 0.261,
      "step": 3408
    },
    {
      "epoch": 3.2368858295751246,
      "grad_norm": 0.050302762538194656,
      "learning_rate": 5.5306808474359205e-05,
      "loss": 0.2604,
      "step": 3409
    },
    {
      "epoch": 3.2378352717778305,
      "grad_norm": 0.10235820710659027,
      "learning_rate": 5.525343812086445e-05,
      "loss": 0.2609,
      "step": 3410
    },
    {
      "epoch": 3.2387847139805364,
      "grad_norm": 0.04724949970841408,
      "learning_rate": 5.520008369908918e-05,
      "loss": 0.2617,
      "step": 3411
    },
    {
      "epoch": 3.2397341561832422,
      "grad_norm": 0.05436694622039795,
      "learning_rate": 5.5146745228030006e-05,
      "loss": 0.2839,
      "step": 3412
    },
    {
      "epoch": 3.240683598385948,
      "grad_norm": 0.04633798077702522,
      "learning_rate": 5.50934227266777e-05,
      "loss": 0.252,
      "step": 3413
    },
    {
      "epoch": 3.241633040588654,
      "grad_norm": 0.04847753047943115,
      "learning_rate": 5.504011621401738e-05,
      "loss": 0.26,
      "step": 3414
    },
    {
      "epoch": 3.2425824827913603,
      "grad_norm": 0.07506626099348068,
      "learning_rate": 5.498682570902849e-05,
      "loss": 0.2523,
      "step": 3415
    },
    {
      "epoch": 3.243531924994066,
      "grad_norm": 0.050959791988134384,
      "learning_rate": 5.493355123068473e-05,
      "loss": 0.2615,
      "step": 3416
    },
    {
      "epoch": 3.244481367196772,
      "grad_norm": 0.06273401528596878,
      "learning_rate": 5.488029279795419e-05,
      "loss": 0.2692,
      "step": 3417
    },
    {
      "epoch": 3.245430809399478,
      "grad_norm": 0.04362702742218971,
      "learning_rate": 5.4827050429799167e-05,
      "loss": 0.2534,
      "step": 3418
    },
    {
      "epoch": 3.246380251602184,
      "grad_norm": 0.08757391571998596,
      "learning_rate": 5.477382414517624e-05,
      "loss": 0.2646,
      "step": 3419
    },
    {
      "epoch": 3.2473296938048897,
      "grad_norm": 0.04454657435417175,
      "learning_rate": 5.472061396303629e-05,
      "loss": 0.2584,
      "step": 3420
    },
    {
      "epoch": 3.2482791360075955,
      "grad_norm": 0.053924281150102615,
      "learning_rate": 5.466741990232445e-05,
      "loss": 0.2429,
      "step": 3421
    },
    {
      "epoch": 3.2492285782103014,
      "grad_norm": 0.05537960305809975,
      "learning_rate": 5.461424198198006e-05,
      "loss": 0.2595,
      "step": 3422
    },
    {
      "epoch": 3.2501780204130073,
      "grad_norm": 0.06580296903848648,
      "learning_rate": 5.456108022093691e-05,
      "loss": 0.2739,
      "step": 3423
    },
    {
      "epoch": 3.251127462615713,
      "grad_norm": 0.0508543886244297,
      "learning_rate": 5.4507934638122727e-05,
      "loss": 0.2566,
      "step": 3424
    },
    {
      "epoch": 3.252076904818419,
      "grad_norm": 0.05126870423555374,
      "learning_rate": 5.445480525245976e-05,
      "loss": 0.2546,
      "step": 3425
    },
    {
      "epoch": 3.253026347021125,
      "grad_norm": 0.0474667064845562,
      "learning_rate": 5.440169208286436e-05,
      "loss": 0.2609,
      "step": 3426
    },
    {
      "epoch": 3.253975789223831,
      "grad_norm": 0.04915899783372879,
      "learning_rate": 5.434859514824706e-05,
      "loss": 0.2558,
      "step": 3427
    },
    {
      "epoch": 3.2549252314265367,
      "grad_norm": 0.061247166246175766,
      "learning_rate": 5.429551446751282e-05,
      "loss": 0.2369,
      "step": 3428
    },
    {
      "epoch": 3.255874673629243,
      "grad_norm": 0.08276187628507614,
      "learning_rate": 5.424245005956048e-05,
      "loss": 0.2525,
      "step": 3429
    },
    {
      "epoch": 3.256824115831949,
      "grad_norm": 0.06013704091310501,
      "learning_rate": 5.418940194328344e-05,
      "loss": 0.2656,
      "step": 3430
    },
    {
      "epoch": 3.2577735580346547,
      "grad_norm": 0.05011112242937088,
      "learning_rate": 5.413637013756898e-05,
      "loss": 0.2445,
      "step": 3431
    },
    {
      "epoch": 3.2587230002373606,
      "grad_norm": 0.0639798641204834,
      "learning_rate": 5.4083354661298814e-05,
      "loss": 0.279,
      "step": 3432
    },
    {
      "epoch": 3.2596724424400665,
      "grad_norm": 0.046627677977085114,
      "learning_rate": 5.403035553334881e-05,
      "loss": 0.252,
      "step": 3433
    },
    {
      "epoch": 3.2606218846427724,
      "grad_norm": 0.049054812639951706,
      "learning_rate": 5.397737277258883e-05,
      "loss": 0.2484,
      "step": 3434
    },
    {
      "epoch": 3.2615713268454782,
      "grad_norm": 0.05973832309246063,
      "learning_rate": 5.3924406397883174e-05,
      "loss": 0.2428,
      "step": 3435
    },
    {
      "epoch": 3.262520769048184,
      "grad_norm": 0.05139094963669777,
      "learning_rate": 5.3871456428090025e-05,
      "loss": 0.2456,
      "step": 3436
    },
    {
      "epoch": 3.26347021125089,
      "grad_norm": 0.07416705787181854,
      "learning_rate": 5.3818522882061995e-05,
      "loss": 0.2297,
      "step": 3437
    },
    {
      "epoch": 3.264419653453596,
      "grad_norm": 0.07129445672035217,
      "learning_rate": 5.376560577864567e-05,
      "loss": 0.2438,
      "step": 3438
    },
    {
      "epoch": 3.2653690956563017,
      "grad_norm": 0.050706807523965836,
      "learning_rate": 5.371270513668185e-05,
      "loss": 0.26,
      "step": 3439
    },
    {
      "epoch": 3.266318537859008,
      "grad_norm": 0.04617472365498543,
      "learning_rate": 5.365982097500545e-05,
      "loss": 0.2546,
      "step": 3440
    },
    {
      "epoch": 3.267267980061714,
      "grad_norm": 0.06968291103839874,
      "learning_rate": 5.36069533124455e-05,
      "loss": 0.2617,
      "step": 3441
    },
    {
      "epoch": 3.26821742226442,
      "grad_norm": 0.044583242386579514,
      "learning_rate": 5.355410216782526e-05,
      "loss": 0.2538,
      "step": 3442
    },
    {
      "epoch": 3.2691668644671257,
      "grad_norm": 0.058784905821084976,
      "learning_rate": 5.350126755996199e-05,
      "loss": 0.2661,
      "step": 3443
    },
    {
      "epoch": 3.2701163066698316,
      "grad_norm": 0.05246102437376976,
      "learning_rate": 5.344844950766712e-05,
      "loss": 0.2618,
      "step": 3444
    },
    {
      "epoch": 3.2710657488725374,
      "grad_norm": 0.0502961203455925,
      "learning_rate": 5.339564802974615e-05,
      "loss": 0.2557,
      "step": 3445
    },
    {
      "epoch": 3.2720151910752433,
      "grad_norm": 0.06429962813854218,
      "learning_rate": 5.33428631449987e-05,
      "loss": 0.2554,
      "step": 3446
    },
    {
      "epoch": 3.272964633277949,
      "grad_norm": 0.07397405058145523,
      "learning_rate": 5.329009487221845e-05,
      "loss": 0.2734,
      "step": 3447
    },
    {
      "epoch": 3.273914075480655,
      "grad_norm": 0.04893027991056442,
      "learning_rate": 5.3237343230193296e-05,
      "loss": 0.2609,
      "step": 3448
    },
    {
      "epoch": 3.274863517683361,
      "grad_norm": 0.05211041122674942,
      "learning_rate": 5.318460823770504e-05,
      "loss": 0.2543,
      "step": 3449
    },
    {
      "epoch": 3.275812959886067,
      "grad_norm": 0.04557442665100098,
      "learning_rate": 5.313188991352964e-05,
      "loss": 0.2482,
      "step": 3450
    },
    {
      "epoch": 3.2767624020887727,
      "grad_norm": 0.048406410962343216,
      "learning_rate": 5.307918827643712e-05,
      "loss": 0.259,
      "step": 3451
    },
    {
      "epoch": 3.2777118442914785,
      "grad_norm": 0.05563647300004959,
      "learning_rate": 5.302650334519151e-05,
      "loss": 0.2263,
      "step": 3452
    },
    {
      "epoch": 3.2786612864941844,
      "grad_norm": 0.04374406114220619,
      "learning_rate": 5.2973835138551056e-05,
      "loss": 0.2494,
      "step": 3453
    },
    {
      "epoch": 3.2796107286968907,
      "grad_norm": 0.053693000227212906,
      "learning_rate": 5.292118367526775e-05,
      "loss": 0.2699,
      "step": 3454
    },
    {
      "epoch": 3.2805601708995966,
      "grad_norm": 0.04658116027712822,
      "learning_rate": 5.2868548974087925e-05,
      "loss": 0.2578,
      "step": 3455
    },
    {
      "epoch": 3.2815096131023025,
      "grad_norm": 0.0444650836288929,
      "learning_rate": 5.28159310537518e-05,
      "loss": 0.25,
      "step": 3456
    },
    {
      "epoch": 3.2824590553050084,
      "grad_norm": 0.04959743097424507,
      "learning_rate": 5.2763329932993574e-05,
      "loss": 0.265,
      "step": 3457
    },
    {
      "epoch": 3.2834084975077142,
      "grad_norm": 0.04746592417359352,
      "learning_rate": 5.2710745630541666e-05,
      "loss": 0.2571,
      "step": 3458
    },
    {
      "epoch": 3.28435793971042,
      "grad_norm": 0.09326629340648651,
      "learning_rate": 5.265817816511822e-05,
      "loss": 0.2475,
      "step": 3459
    },
    {
      "epoch": 3.285307381913126,
      "grad_norm": 0.050191480666399,
      "learning_rate": 5.260562755543963e-05,
      "loss": 0.265,
      "step": 3460
    },
    {
      "epoch": 3.286256824115832,
      "grad_norm": 0.04949505627155304,
      "learning_rate": 5.255309382021618e-05,
      "loss": 0.2694,
      "step": 3461
    },
    {
      "epoch": 3.2872062663185377,
      "grad_norm": 0.05030905082821846,
      "learning_rate": 5.250057697815215e-05,
      "loss": 0.2615,
      "step": 3462
    },
    {
      "epoch": 3.2881557085212436,
      "grad_norm": 0.04959176853299141,
      "learning_rate": 5.244807704794582e-05,
      "loss": 0.2615,
      "step": 3463
    },
    {
      "epoch": 3.2891051507239495,
      "grad_norm": 0.04158158227801323,
      "learning_rate": 5.2395594048289444e-05,
      "loss": 0.2469,
      "step": 3464
    },
    {
      "epoch": 3.290054592926656,
      "grad_norm": 0.0466022789478302,
      "learning_rate": 5.234312799786921e-05,
      "loss": 0.2499,
      "step": 3465
    },
    {
      "epoch": 3.2910040351293617,
      "grad_norm": 0.03744713217020035,
      "learning_rate": 5.229067891536539e-05,
      "loss": 0.2512,
      "step": 3466
    },
    {
      "epoch": 3.2919534773320676,
      "grad_norm": 0.04421250522136688,
      "learning_rate": 5.223824681945211e-05,
      "loss": 0.2553,
      "step": 3467
    },
    {
      "epoch": 3.2929029195347734,
      "grad_norm": 0.049400459975004196,
      "learning_rate": 5.2185831728797443e-05,
      "loss": 0.2647,
      "step": 3468
    },
    {
      "epoch": 3.2938523617374793,
      "grad_norm": 0.10226401686668396,
      "learning_rate": 5.213343366206347e-05,
      "loss": 0.2723,
      "step": 3469
    },
    {
      "epoch": 3.294801803940185,
      "grad_norm": 0.04881599545478821,
      "learning_rate": 5.2081052637906104e-05,
      "loss": 0.2612,
      "step": 3470
    },
    {
      "epoch": 3.295751246142891,
      "grad_norm": 0.04717850685119629,
      "learning_rate": 5.2028688674975415e-05,
      "loss": 0.2496,
      "step": 3471
    },
    {
      "epoch": 3.296700688345597,
      "grad_norm": 0.048411279916763306,
      "learning_rate": 5.197634179191508e-05,
      "loss": 0.2527,
      "step": 3472
    },
    {
      "epoch": 3.297650130548303,
      "grad_norm": 0.05463425815105438,
      "learning_rate": 5.192401200736298e-05,
      "loss": 0.2492,
      "step": 3473
    },
    {
      "epoch": 3.2985995727510087,
      "grad_norm": 0.07052161544561386,
      "learning_rate": 5.1871699339950755e-05,
      "loss": 0.271,
      "step": 3474
    },
    {
      "epoch": 3.2995490149537146,
      "grad_norm": 0.05703425779938698,
      "learning_rate": 5.1819403808303926e-05,
      "loss": 0.2719,
      "step": 3475
    },
    {
      "epoch": 3.3004984571564204,
      "grad_norm": 0.07146856188774109,
      "learning_rate": 5.176712543104212e-05,
      "loss": 0.2593,
      "step": 3476
    },
    {
      "epoch": 3.3014478993591263,
      "grad_norm": 0.054076872766017914,
      "learning_rate": 5.171486422677855e-05,
      "loss": 0.2548,
      "step": 3477
    },
    {
      "epoch": 3.302397341561832,
      "grad_norm": 0.0902649313211441,
      "learning_rate": 5.166262021412058e-05,
      "loss": 0.2347,
      "step": 3478
    },
    {
      "epoch": 3.3033467837645385,
      "grad_norm": 0.047539256513118744,
      "learning_rate": 5.161039341166931e-05,
      "loss": 0.2549,
      "step": 3479
    },
    {
      "epoch": 3.3042962259672444,
      "grad_norm": 0.04861597344279289,
      "learning_rate": 5.1558183838019755e-05,
      "loss": 0.2614,
      "step": 3480
    },
    {
      "epoch": 3.3052456681699502,
      "grad_norm": 0.04589053988456726,
      "learning_rate": 5.15059915117608e-05,
      "loss": 0.2631,
      "step": 3481
    },
    {
      "epoch": 3.306195110372656,
      "grad_norm": 0.048206113278865814,
      "learning_rate": 5.145381645147511e-05,
      "loss": 0.2564,
      "step": 3482
    },
    {
      "epoch": 3.307144552575362,
      "grad_norm": 0.05081455409526825,
      "learning_rate": 5.14016586757394e-05,
      "loss": 0.246,
      "step": 3483
    },
    {
      "epoch": 3.308093994778068,
      "grad_norm": 0.08231256902217865,
      "learning_rate": 5.134951820312401e-05,
      "loss": 0.2348,
      "step": 3484
    },
    {
      "epoch": 3.3090434369807737,
      "grad_norm": 0.04811001196503639,
      "learning_rate": 5.129739505219324e-05,
      "loss": 0.2501,
      "step": 3485
    },
    {
      "epoch": 3.3099928791834796,
      "grad_norm": 0.08391708880662918,
      "learning_rate": 5.124528924150521e-05,
      "loss": 0.2364,
      "step": 3486
    },
    {
      "epoch": 3.3109423213861855,
      "grad_norm": 0.0699569433927536,
      "learning_rate": 5.119320078961183e-05,
      "loss": 0.2391,
      "step": 3487
    },
    {
      "epoch": 3.3118917635888914,
      "grad_norm": 0.0865458995103836,
      "learning_rate": 5.114112971505882e-05,
      "loss": 0.2409,
      "step": 3488
    },
    {
      "epoch": 3.3128412057915972,
      "grad_norm": 0.06671997159719467,
      "learning_rate": 5.108907603638582e-05,
      "loss": 0.2678,
      "step": 3489
    },
    {
      "epoch": 3.3137906479943036,
      "grad_norm": 0.05352495610713959,
      "learning_rate": 5.103703977212615e-05,
      "loss": 0.255,
      "step": 3490
    },
    {
      "epoch": 3.3147400901970094,
      "grad_norm": 0.053172145038843155,
      "learning_rate": 5.0985020940807005e-05,
      "loss": 0.2457,
      "step": 3491
    },
    {
      "epoch": 3.3156895323997153,
      "grad_norm": 0.046016935259103775,
      "learning_rate": 5.093301956094934e-05,
      "loss": 0.2526,
      "step": 3492
    },
    {
      "epoch": 3.316638974602421,
      "grad_norm": 0.046845342963933945,
      "learning_rate": 5.0881035651067855e-05,
      "loss": 0.2591,
      "step": 3493
    },
    {
      "epoch": 3.317588416805127,
      "grad_norm": 0.04753097519278526,
      "learning_rate": 5.08290692296712e-05,
      "loss": 0.2601,
      "step": 3494
    },
    {
      "epoch": 3.318537859007833,
      "grad_norm": 0.044487521052360535,
      "learning_rate": 5.077712031526153e-05,
      "loss": 0.2641,
      "step": 3495
    },
    {
      "epoch": 3.319487301210539,
      "grad_norm": 0.08220727741718292,
      "learning_rate": 5.072518892633502e-05,
      "loss": 0.2822,
      "step": 3496
    },
    {
      "epoch": 3.3204367434132447,
      "grad_norm": 0.04794852435588837,
      "learning_rate": 5.0673275081381475e-05,
      "loss": 0.2519,
      "step": 3497
    },
    {
      "epoch": 3.3213861856159506,
      "grad_norm": 0.0479121133685112,
      "learning_rate": 5.0621378798884446e-05,
      "loss": 0.255,
      "step": 3498
    },
    {
      "epoch": 3.3223356278186564,
      "grad_norm": 0.048131756484508514,
      "learning_rate": 5.056950009732135e-05,
      "loss": 0.2409,
      "step": 3499
    },
    {
      "epoch": 3.3232850700213623,
      "grad_norm": 0.0748886838555336,
      "learning_rate": 5.051763899516313e-05,
      "loss": 0.272,
      "step": 3500
    },
    {
      "epoch": 3.3232850700213623,
      "eval_loss": 0.259135365486145,
      "eval_runtime": 37.7828,
      "eval_samples_per_second": 2.276,
      "eval_steps_per_second": 2.276,
      "step": 3500
    },
    {
      "epoch": 3.324234512224068,
      "grad_norm": 0.050895802676677704,
      "learning_rate": 5.046579551087469e-05,
      "loss": 0.2582,
      "step": 3501
    },
    {
      "epoch": 3.325183954426774,
      "grad_norm": 0.04380827769637108,
      "learning_rate": 5.041396966291453e-05,
      "loss": 0.2492,
      "step": 3502
    },
    {
      "epoch": 3.32613339662948,
      "grad_norm": 0.05072317644953728,
      "learning_rate": 5.036216146973491e-05,
      "loss": 0.2591,
      "step": 3503
    },
    {
      "epoch": 3.3270828388321863,
      "grad_norm": 0.04307514801621437,
      "learning_rate": 5.0310370949781794e-05,
      "loss": 0.2523,
      "step": 3504
    },
    {
      "epoch": 3.328032281034892,
      "grad_norm": 0.060126278549432755,
      "learning_rate": 5.02585981214948e-05,
      "loss": 0.2368,
      "step": 3505
    },
    {
      "epoch": 3.328981723237598,
      "grad_norm": 0.04991989582777023,
      "learning_rate": 5.0206843003307406e-05,
      "loss": 0.2634,
      "step": 3506
    },
    {
      "epoch": 3.329931165440304,
      "grad_norm": 0.0549112968146801,
      "learning_rate": 5.0155105613646636e-05,
      "loss": 0.2487,
      "step": 3507
    },
    {
      "epoch": 3.3308806076430097,
      "grad_norm": 0.045029643923044205,
      "learning_rate": 5.0103385970933245e-05,
      "loss": 0.2444,
      "step": 3508
    },
    {
      "epoch": 3.3318300498457156,
      "grad_norm": 0.0672144964337349,
      "learning_rate": 5.005168409358166e-05,
      "loss": 0.2712,
      "step": 3509
    },
    {
      "epoch": 3.3327794920484215,
      "grad_norm": 0.049809981137514114,
      "learning_rate": 5.000000000000002e-05,
      "loss": 0.2506,
      "step": 3510
    },
    {
      "epoch": 3.3337289342511274,
      "grad_norm": 0.042252179235219955,
      "learning_rate": 4.9948333708590055e-05,
      "loss": 0.2488,
      "step": 3511
    },
    {
      "epoch": 3.3346783764538332,
      "grad_norm": 0.05093264579772949,
      "learning_rate": 4.989668523774732e-05,
      "loss": 0.2574,
      "step": 3512
    },
    {
      "epoch": 3.335627818656539,
      "grad_norm": 0.05729779228568077,
      "learning_rate": 4.9845054605860775e-05,
      "loss": 0.2311,
      "step": 3513
    },
    {
      "epoch": 3.3365772608592454,
      "grad_norm": 0.047221146523952484,
      "learning_rate": 4.979344183131326e-05,
      "loss": 0.2506,
      "step": 3514
    },
    {
      "epoch": 3.3375267030619513,
      "grad_norm": 0.04780590906739235,
      "learning_rate": 4.974184693248115e-05,
      "loss": 0.2605,
      "step": 3515
    },
    {
      "epoch": 3.338476145264657,
      "grad_norm": 0.05309277027845383,
      "learning_rate": 4.9690269927734414e-05,
      "loss": 0.2641,
      "step": 3516
    },
    {
      "epoch": 3.339425587467363,
      "grad_norm": 0.07272838056087494,
      "learning_rate": 4.963871083543683e-05,
      "loss": 0.269,
      "step": 3517
    },
    {
      "epoch": 3.340375029670069,
      "grad_norm": 0.06501755863428116,
      "learning_rate": 4.958716967394552e-05,
      "loss": 0.2788,
      "step": 3518
    },
    {
      "epoch": 3.341324471872775,
      "grad_norm": 0.04865370690822601,
      "learning_rate": 4.953564646161148e-05,
      "loss": 0.2491,
      "step": 3519
    },
    {
      "epoch": 3.3422739140754807,
      "grad_norm": 0.050759755074977875,
      "learning_rate": 4.94841412167792e-05,
      "loss": 0.2303,
      "step": 3520
    },
    {
      "epoch": 3.3432233562781866,
      "grad_norm": 0.07433301210403442,
      "learning_rate": 4.943265395778672e-05,
      "loss": 0.2493,
      "step": 3521
    },
    {
      "epoch": 3.3441727984808924,
      "grad_norm": 0.052284806966781616,
      "learning_rate": 4.938118470296587e-05,
      "loss": 0.2638,
      "step": 3522
    },
    {
      "epoch": 3.3451222406835983,
      "grad_norm": 0.0797976404428482,
      "learning_rate": 4.932973347064177e-05,
      "loss": 0.254,
      "step": 3523
    },
    {
      "epoch": 3.346071682886304,
      "grad_norm": 0.04564180225133896,
      "learning_rate": 4.9278300279133425e-05,
      "loss": 0.251,
      "step": 3524
    },
    {
      "epoch": 3.34702112508901,
      "grad_norm": 0.059488341212272644,
      "learning_rate": 4.922688514675324e-05,
      "loss": 0.2596,
      "step": 3525
    },
    {
      "epoch": 3.347970567291716,
      "grad_norm": 0.050069741904735565,
      "learning_rate": 4.917548809180724e-05,
      "loss": 0.2574,
      "step": 3526
    },
    {
      "epoch": 3.348920009494422,
      "grad_norm": 0.07369361072778702,
      "learning_rate": 4.912410913259501e-05,
      "loss": 0.2708,
      "step": 3527
    },
    {
      "epoch": 3.349869451697128,
      "grad_norm": 0.06439623236656189,
      "learning_rate": 4.9072748287409677e-05,
      "loss": 0.2558,
      "step": 3528
    },
    {
      "epoch": 3.350818893899834,
      "grad_norm": 0.047009095549583435,
      "learning_rate": 4.902140557453791e-05,
      "loss": 0.2509,
      "step": 3529
    },
    {
      "epoch": 3.35176833610254,
      "grad_norm": 0.06549584865570068,
      "learning_rate": 4.897008101226002e-05,
      "loss": 0.2816,
      "step": 3530
    },
    {
      "epoch": 3.3527177783052458,
      "grad_norm": 0.05347298085689545,
      "learning_rate": 4.891877461884973e-05,
      "loss": 0.2425,
      "step": 3531
    },
    {
      "epoch": 3.3536672205079516,
      "grad_norm": 0.08783289790153503,
      "learning_rate": 4.886748641257435e-05,
      "loss": 0.2358,
      "step": 3532
    },
    {
      "epoch": 3.3546166627106575,
      "grad_norm": 0.04892612621188164,
      "learning_rate": 4.881621641169472e-05,
      "loss": 0.2522,
      "step": 3533
    },
    {
      "epoch": 3.3555661049133634,
      "grad_norm": 0.05587669089436531,
      "learning_rate": 4.8764964634465136e-05,
      "loss": 0.2636,
      "step": 3534
    },
    {
      "epoch": 3.3565155471160693,
      "grad_norm": 0.05909194424748421,
      "learning_rate": 4.871373109913358e-05,
      "loss": 0.2501,
      "step": 3535
    },
    {
      "epoch": 3.357464989318775,
      "grad_norm": 0.07228993624448776,
      "learning_rate": 4.8662515823941255e-05,
      "loss": 0.2849,
      "step": 3536
    },
    {
      "epoch": 3.358414431521481,
      "grad_norm": 0.05400128290057182,
      "learning_rate": 4.861131882712314e-05,
      "loss": 0.2576,
      "step": 3537
    },
    {
      "epoch": 3.359363873724187,
      "grad_norm": 0.0629948079586029,
      "learning_rate": 4.8560140126907564e-05,
      "loss": 0.2369,
      "step": 3538
    },
    {
      "epoch": 3.360313315926893,
      "grad_norm": 0.045974526554346085,
      "learning_rate": 4.85089797415163e-05,
      "loss": 0.253,
      "step": 3539
    },
    {
      "epoch": 3.361262758129599,
      "grad_norm": 0.06558282673358917,
      "learning_rate": 4.845783768916482e-05,
      "loss": 0.2458,
      "step": 3540
    },
    {
      "epoch": 3.362212200332305,
      "grad_norm": 0.04700404405593872,
      "learning_rate": 4.840671398806174e-05,
      "loss": 0.2452,
      "step": 3541
    },
    {
      "epoch": 3.363161642535011,
      "grad_norm": 0.07221265882253647,
      "learning_rate": 4.8355608656409426e-05,
      "loss": 0.2776,
      "step": 3542
    },
    {
      "epoch": 3.3641110847377167,
      "grad_norm": 0.05162516236305237,
      "learning_rate": 4.8304521712403575e-05,
      "loss": 0.2571,
      "step": 3543
    },
    {
      "epoch": 3.3650605269404226,
      "grad_norm": 0.0484129823744297,
      "learning_rate": 4.825345317423334e-05,
      "loss": 0.265,
      "step": 3544
    },
    {
      "epoch": 3.3660099691431284,
      "grad_norm": 0.05916272848844528,
      "learning_rate": 4.820240306008136e-05,
      "loss": 0.2429,
      "step": 3545
    },
    {
      "epoch": 3.3669594113458343,
      "grad_norm": 0.09139509499073029,
      "learning_rate": 4.8151371388123644e-05,
      "loss": 0.2717,
      "step": 3546
    },
    {
      "epoch": 3.36790885354854,
      "grad_norm": 0.07215554267168045,
      "learning_rate": 4.8100358176529794e-05,
      "loss": 0.273,
      "step": 3547
    },
    {
      "epoch": 3.368858295751246,
      "grad_norm": 0.08182163536548615,
      "learning_rate": 4.804936344346258e-05,
      "loss": 0.2581,
      "step": 3548
    },
    {
      "epoch": 3.369807737953952,
      "grad_norm": 0.0751405879855156,
      "learning_rate": 4.799838720707846e-05,
      "loss": 0.2648,
      "step": 3549
    },
    {
      "epoch": 3.370757180156658,
      "grad_norm": 0.06326061487197876,
      "learning_rate": 4.794742948552715e-05,
      "loss": 0.2466,
      "step": 3550
    },
    {
      "epoch": 3.3717066223593637,
      "grad_norm": 0.05183592066168785,
      "learning_rate": 4.78964902969518e-05,
      "loss": 0.2387,
      "step": 3551
    },
    {
      "epoch": 3.3726560645620696,
      "grad_norm": 0.04825581982731819,
      "learning_rate": 4.7845569659489e-05,
      "loss": 0.2598,
      "step": 3552
    },
    {
      "epoch": 3.373605506764776,
      "grad_norm": 0.048268452286720276,
      "learning_rate": 4.779466759126868e-05,
      "loss": 0.2543,
      "step": 3553
    },
    {
      "epoch": 3.3745549489674818,
      "grad_norm": 0.06446631997823715,
      "learning_rate": 4.774378411041416e-05,
      "loss": 0.2519,
      "step": 3554
    },
    {
      "epoch": 3.3755043911701876,
      "grad_norm": 0.05456710606813431,
      "learning_rate": 4.7692919235042255e-05,
      "loss": 0.258,
      "step": 3555
    },
    {
      "epoch": 3.3764538333728935,
      "grad_norm": 0.04780459776520729,
      "learning_rate": 4.764207298326301e-05,
      "loss": 0.2443,
      "step": 3556
    },
    {
      "epoch": 3.3774032755755994,
      "grad_norm": 0.06650704890489578,
      "learning_rate": 4.7591245373179924e-05,
      "loss": 0.2538,
      "step": 3557
    },
    {
      "epoch": 3.3783527177783053,
      "grad_norm": 0.04714061692357063,
      "learning_rate": 4.754043642288981e-05,
      "loss": 0.2513,
      "step": 3558
    },
    {
      "epoch": 3.379302159981011,
      "grad_norm": 0.07686729729175568,
      "learning_rate": 4.748964615048285e-05,
      "loss": 0.2756,
      "step": 3559
    },
    {
      "epoch": 3.380251602183717,
      "grad_norm": 0.09275008738040924,
      "learning_rate": 4.743887457404268e-05,
      "loss": 0.2703,
      "step": 3560
    },
    {
      "epoch": 3.381201044386423,
      "grad_norm": 0.07193811982870102,
      "learning_rate": 4.738812171164604e-05,
      "loss": 0.275,
      "step": 3561
    },
    {
      "epoch": 3.3821504865891288,
      "grad_norm": 0.05852194502949715,
      "learning_rate": 4.733738758136327e-05,
      "loss": 0.2501,
      "step": 3562
    },
    {
      "epoch": 3.3830999287918346,
      "grad_norm": 0.043062902987003326,
      "learning_rate": 4.7286672201257873e-05,
      "loss": 0.2575,
      "step": 3563
    },
    {
      "epoch": 3.384049370994541,
      "grad_norm": 0.06103948876261711,
      "learning_rate": 4.723597558938672e-05,
      "loss": 0.2695,
      "step": 3564
    },
    {
      "epoch": 3.384998813197247,
      "grad_norm": 0.05683431774377823,
      "learning_rate": 4.7185297763800084e-05,
      "loss": 0.2587,
      "step": 3565
    },
    {
      "epoch": 3.3859482553999527,
      "grad_norm": 0.05255016312003136,
      "learning_rate": 4.713463874254135e-05,
      "loss": 0.2517,
      "step": 3566
    },
    {
      "epoch": 3.3868976976026586,
      "grad_norm": 0.04798266291618347,
      "learning_rate": 4.708399854364742e-05,
      "loss": 0.2637,
      "step": 3567
    },
    {
      "epoch": 3.3878471398053644,
      "grad_norm": 0.09509357064962387,
      "learning_rate": 4.7033377185148385e-05,
      "loss": 0.2498,
      "step": 3568
    },
    {
      "epoch": 3.3887965820080703,
      "grad_norm": 0.05289481580257416,
      "learning_rate": 4.698277468506763e-05,
      "loss": 0.2664,
      "step": 3569
    },
    {
      "epoch": 3.389746024210776,
      "grad_norm": 0.049032047390937805,
      "learning_rate": 4.693219106142186e-05,
      "loss": 0.2487,
      "step": 3570
    },
    {
      "epoch": 3.390695466413482,
      "grad_norm": 0.06823623180389404,
      "learning_rate": 4.6881626332221e-05,
      "loss": 0.2503,
      "step": 3571
    },
    {
      "epoch": 3.391644908616188,
      "grad_norm": 0.0999704971909523,
      "learning_rate": 4.683108051546836e-05,
      "loss": 0.2484,
      "step": 3572
    },
    {
      "epoch": 3.392594350818894,
      "grad_norm": 0.0468447245657444,
      "learning_rate": 4.678055362916041e-05,
      "loss": 0.256,
      "step": 3573
    },
    {
      "epoch": 3.3935437930215997,
      "grad_norm": 0.056768015027046204,
      "learning_rate": 4.673004569128692e-05,
      "loss": 0.2709,
      "step": 3574
    },
    {
      "epoch": 3.3944932352243056,
      "grad_norm": 0.05072092264890671,
      "learning_rate": 4.66795567198309e-05,
      "loss": 0.258,
      "step": 3575
    },
    {
      "epoch": 3.3954426774270114,
      "grad_norm": 0.04872744157910347,
      "learning_rate": 4.662908673276862e-05,
      "loss": 0.2504,
      "step": 3576
    },
    {
      "epoch": 3.3963921196297173,
      "grad_norm": 0.07183968275785446,
      "learning_rate": 4.6578635748069566e-05,
      "loss": 0.2338,
      "step": 3577
    },
    {
      "epoch": 3.3973415618324236,
      "grad_norm": 0.051606882363557816,
      "learning_rate": 4.6528203783696534e-05,
      "loss": 0.2472,
      "step": 3578
    },
    {
      "epoch": 3.3982910040351295,
      "grad_norm": 0.06574734300374985,
      "learning_rate": 4.647779085760546e-05,
      "loss": 0.2364,
      "step": 3579
    },
    {
      "epoch": 3.3992404462378354,
      "grad_norm": 0.04813767224550247,
      "learning_rate": 4.6427396987745555e-05,
      "loss": 0.252,
      "step": 3580
    },
    {
      "epoch": 3.4001898884405413,
      "grad_norm": 0.045176900923252106,
      "learning_rate": 4.637702219205919e-05,
      "loss": 0.2516,
      "step": 3581
    },
    {
      "epoch": 3.401139330643247,
      "grad_norm": 0.05002497881650925,
      "learning_rate": 4.6326666488481975e-05,
      "loss": 0.2492,
      "step": 3582
    },
    {
      "epoch": 3.402088772845953,
      "grad_norm": 0.0624178983271122,
      "learning_rate": 4.627632989494283e-05,
      "loss": 0.2495,
      "step": 3583
    },
    {
      "epoch": 3.403038215048659,
      "grad_norm": 0.05267757549881935,
      "learning_rate": 4.622601242936361e-05,
      "loss": 0.2616,
      "step": 3584
    },
    {
      "epoch": 3.4039876572513648,
      "grad_norm": 0.052334707230329514,
      "learning_rate": 4.617571410965964e-05,
      "loss": 0.2609,
      "step": 3585
    },
    {
      "epoch": 3.4049370994540706,
      "grad_norm": 0.061013367027044296,
      "learning_rate": 4.6125434953739275e-05,
      "loss": 0.266,
      "step": 3586
    },
    {
      "epoch": 3.4058865416567765,
      "grad_norm": 0.05254976451396942,
      "learning_rate": 4.607517497950402e-05,
      "loss": 0.2616,
      "step": 3587
    },
    {
      "epoch": 3.4068359838594824,
      "grad_norm": 0.053374432027339935,
      "learning_rate": 4.6024934204848745e-05,
      "loss": 0.2558,
      "step": 3588
    },
    {
      "epoch": 3.4077854260621887,
      "grad_norm": 0.05137740448117256,
      "learning_rate": 4.5974712647661176e-05,
      "loss": 0.2589,
      "step": 3589
    },
    {
      "epoch": 3.4087348682648946,
      "grad_norm": 0.04847841337323189,
      "learning_rate": 4.5924510325822503e-05,
      "loss": 0.2563,
      "step": 3590
    },
    {
      "epoch": 3.4096843104676005,
      "grad_norm": 0.04520807042717934,
      "learning_rate": 4.587432725720687e-05,
      "loss": 0.2516,
      "step": 3591
    },
    {
      "epoch": 3.4106337526703063,
      "grad_norm": 0.045983344316482544,
      "learning_rate": 4.5824163459681656e-05,
      "loss": 0.2417,
      "step": 3592
    },
    {
      "epoch": 3.411583194873012,
      "grad_norm": 0.04577578604221344,
      "learning_rate": 4.577401895110733e-05,
      "loss": 0.2401,
      "step": 3593
    },
    {
      "epoch": 3.412532637075718,
      "grad_norm": 0.0507587231695652,
      "learning_rate": 4.57238937493375e-05,
      "loss": 0.2547,
      "step": 3594
    },
    {
      "epoch": 3.413482079278424,
      "grad_norm": 0.05529985949397087,
      "learning_rate": 4.5673787872218965e-05,
      "loss": 0.2687,
      "step": 3595
    },
    {
      "epoch": 3.41443152148113,
      "grad_norm": 0.05202402547001839,
      "learning_rate": 4.5623701337591565e-05,
      "loss": 0.2576,
      "step": 3596
    },
    {
      "epoch": 3.4153809636838357,
      "grad_norm": 0.04816010966897011,
      "learning_rate": 4.5573634163288294e-05,
      "loss": 0.2457,
      "step": 3597
    },
    {
      "epoch": 3.4163304058865416,
      "grad_norm": 0.06441019475460052,
      "learning_rate": 4.552358636713523e-05,
      "loss": 0.2617,
      "step": 3598
    },
    {
      "epoch": 3.4172798480892475,
      "grad_norm": 0.061169568449258804,
      "learning_rate": 4.547355796695156e-05,
      "loss": 0.2748,
      "step": 3599
    },
    {
      "epoch": 3.4182292902919533,
      "grad_norm": 0.04750002920627594,
      "learning_rate": 4.542354898054953e-05,
      "loss": 0.2625,
      "step": 3600
    },
    {
      "epoch": 3.419178732494659,
      "grad_norm": 0.05058363825082779,
      "learning_rate": 4.537355942573463e-05,
      "loss": 0.2629,
      "step": 3601
    },
    {
      "epoch": 3.420128174697365,
      "grad_norm": 0.052922967821359634,
      "learning_rate": 4.532358932030517e-05,
      "loss": 0.2601,
      "step": 3602
    },
    {
      "epoch": 3.4210776169000714,
      "grad_norm": 0.048897773027420044,
      "learning_rate": 4.527363868205278e-05,
      "loss": 0.2518,
      "step": 3603
    },
    {
      "epoch": 3.4220270591027773,
      "grad_norm": 0.07733273506164551,
      "learning_rate": 4.5223707528762e-05,
      "loss": 0.2596,
      "step": 3604
    },
    {
      "epoch": 3.422976501305483,
      "grad_norm": 0.053071219474077225,
      "learning_rate": 4.517379587821049e-05,
      "loss": 0.2563,
      "step": 3605
    },
    {
      "epoch": 3.423925943508189,
      "grad_norm": 0.06092002987861633,
      "learning_rate": 4.512390374816905e-05,
      "loss": 0.2588,
      "step": 3606
    },
    {
      "epoch": 3.424875385710895,
      "grad_norm": 0.04813700541853905,
      "learning_rate": 4.507403115640131e-05,
      "loss": 0.252,
      "step": 3607
    },
    {
      "epoch": 3.4258248279136008,
      "grad_norm": 0.0530259907245636,
      "learning_rate": 4.502417812066418e-05,
      "loss": 0.2591,
      "step": 3608
    },
    {
      "epoch": 3.4267742701163066,
      "grad_norm": 0.05496685951948166,
      "learning_rate": 4.497434465870749e-05,
      "loss": 0.2537,
      "step": 3609
    },
    {
      "epoch": 3.4277237123190125,
      "grad_norm": 0.09745223820209503,
      "learning_rate": 4.492453078827409e-05,
      "loss": 0.261,
      "step": 3610
    },
    {
      "epoch": 3.4286731545217184,
      "grad_norm": 0.12049929052591324,
      "learning_rate": 4.487473652709989e-05,
      "loss": 0.2217,
      "step": 3611
    },
    {
      "epoch": 3.4296225967244243,
      "grad_norm": 0.06632792949676514,
      "learning_rate": 4.4824961892913786e-05,
      "loss": 0.2442,
      "step": 3612
    },
    {
      "epoch": 3.43057203892713,
      "grad_norm": 0.07434866577386856,
      "learning_rate": 4.477520690343776e-05,
      "loss": 0.2357,
      "step": 3613
    },
    {
      "epoch": 3.4315214811298365,
      "grad_norm": 0.09781359136104584,
      "learning_rate": 4.4725471576386735e-05,
      "loss": 0.2687,
      "step": 3614
    },
    {
      "epoch": 3.4324709233325423,
      "grad_norm": 0.049130357801914215,
      "learning_rate": 4.467575592946864e-05,
      "loss": 0.2536,
      "step": 3615
    },
    {
      "epoch": 3.433420365535248,
      "grad_norm": 0.061379898339509964,
      "learning_rate": 4.4626059980384404e-05,
      "loss": 0.2606,
      "step": 3616
    },
    {
      "epoch": 3.434369807737954,
      "grad_norm": 0.04795004054903984,
      "learning_rate": 4.457638374682794e-05,
      "loss": 0.2451,
      "step": 3617
    },
    {
      "epoch": 3.43531924994066,
      "grad_norm": 0.05227689817547798,
      "learning_rate": 4.452672724648611e-05,
      "loss": 0.248,
      "step": 3618
    },
    {
      "epoch": 3.436268692143366,
      "grad_norm": 0.0511259026825428,
      "learning_rate": 4.447709049703885e-05,
      "loss": 0.2566,
      "step": 3619
    },
    {
      "epoch": 3.4372181343460717,
      "grad_norm": 0.05518270656466484,
      "learning_rate": 4.442747351615899e-05,
      "loss": 0.2534,
      "step": 3620
    },
    {
      "epoch": 3.4381675765487776,
      "grad_norm": 0.06017496809363365,
      "learning_rate": 4.43778763215123e-05,
      "loss": 0.2491,
      "step": 3621
    },
    {
      "epoch": 3.4391170187514835,
      "grad_norm": 0.044689204543828964,
      "learning_rate": 4.432829893075755e-05,
      "loss": 0.2521,
      "step": 3622
    },
    {
      "epoch": 3.4400664609541893,
      "grad_norm": 0.1103987917304039,
      "learning_rate": 4.4278741361546404e-05,
      "loss": 0.2351,
      "step": 3623
    },
    {
      "epoch": 3.441015903156895,
      "grad_norm": 0.07348011434078217,
      "learning_rate": 4.4229203631523616e-05,
      "loss": 0.2433,
      "step": 3624
    },
    {
      "epoch": 3.441965345359601,
      "grad_norm": 0.058661412447690964,
      "learning_rate": 4.417968575832664e-05,
      "loss": 0.2381,
      "step": 3625
    },
    {
      "epoch": 3.442914787562307,
      "grad_norm": 0.05927921459078789,
      "learning_rate": 4.413018775958607e-05,
      "loss": 0.2522,
      "step": 3626
    },
    {
      "epoch": 3.443864229765013,
      "grad_norm": 0.05343930050730705,
      "learning_rate": 4.4080709652925336e-05,
      "loss": 0.255,
      "step": 3627
    },
    {
      "epoch": 3.444813671967719,
      "grad_norm": 0.05600766837596893,
      "learning_rate": 4.4031251455960735e-05,
      "loss": 0.2643,
      "step": 3628
    },
    {
      "epoch": 3.445763114170425,
      "grad_norm": 0.08665018528699875,
      "learning_rate": 4.3981813186301646e-05,
      "loss": 0.2348,
      "step": 3629
    },
    {
      "epoch": 3.446712556373131,
      "grad_norm": 0.05690651014447212,
      "learning_rate": 4.3932394861550106e-05,
      "loss": 0.2613,
      "step": 3630
    },
    {
      "epoch": 3.4476619985758368,
      "grad_norm": 0.050910115242004395,
      "learning_rate": 4.38829964993013e-05,
      "loss": 0.2489,
      "step": 3631
    },
    {
      "epoch": 3.4486114407785426,
      "grad_norm": 0.0760849341750145,
      "learning_rate": 4.383361811714313e-05,
      "loss": 0.2771,
      "step": 3632
    },
    {
      "epoch": 3.4495608829812485,
      "grad_norm": 0.07664194703102112,
      "learning_rate": 4.3784259732656464e-05,
      "loss": 0.2696,
      "step": 3633
    },
    {
      "epoch": 3.4505103251839544,
      "grad_norm": 0.05651098117232323,
      "learning_rate": 4.373492136341502e-05,
      "loss": 0.2629,
      "step": 3634
    },
    {
      "epoch": 3.4514597673866603,
      "grad_norm": 0.058015789836645126,
      "learning_rate": 4.3685603026985354e-05,
      "loss": 0.2509,
      "step": 3635
    },
    {
      "epoch": 3.452409209589366,
      "grad_norm": 0.04693415388464928,
      "learning_rate": 4.3636304740927046e-05,
      "loss": 0.2518,
      "step": 3636
    },
    {
      "epoch": 3.453358651792072,
      "grad_norm": 0.07002034038305283,
      "learning_rate": 4.358702652279235e-05,
      "loss": 0.2443,
      "step": 3637
    },
    {
      "epoch": 3.454308093994778,
      "grad_norm": 0.051126398146152496,
      "learning_rate": 4.3537768390126476e-05,
      "loss": 0.2479,
      "step": 3638
    },
    {
      "epoch": 3.455257536197484,
      "grad_norm": 0.077112577855587,
      "learning_rate": 4.348853036046746e-05,
      "loss": 0.2893,
      "step": 3639
    },
    {
      "epoch": 3.45620697840019,
      "grad_norm": 0.07877352833747864,
      "learning_rate": 4.343931245134616e-05,
      "loss": 0.2471,
      "step": 3640
    },
    {
      "epoch": 3.457156420602896,
      "grad_norm": 0.05718987062573433,
      "learning_rate": 4.3390114680286266e-05,
      "loss": 0.2607,
      "step": 3641
    },
    {
      "epoch": 3.458105862805602,
      "grad_norm": 0.0493890680372715,
      "learning_rate": 4.334093706480443e-05,
      "loss": 0.2641,
      "step": 3642
    },
    {
      "epoch": 3.4590553050083077,
      "grad_norm": 0.08550732582807541,
      "learning_rate": 4.329177962240988e-05,
      "loss": 0.269,
      "step": 3643
    },
    {
      "epoch": 3.4600047472110136,
      "grad_norm": 0.04813637584447861,
      "learning_rate": 4.3242642370604893e-05,
      "loss": 0.2592,
      "step": 3644
    },
    {
      "epoch": 3.4609541894137195,
      "grad_norm": 0.04899512976408005,
      "learning_rate": 4.3193525326884435e-05,
      "loss": 0.246,
      "step": 3645
    },
    {
      "epoch": 3.4619036316164253,
      "grad_norm": 0.049979884177446365,
      "learning_rate": 4.314442850873628e-05,
      "loss": 0.2422,
      "step": 3646
    },
    {
      "epoch": 3.462853073819131,
      "grad_norm": 0.05317814648151398,
      "learning_rate": 4.3095351933641124e-05,
      "loss": 0.2615,
      "step": 3647
    },
    {
      "epoch": 3.463802516021837,
      "grad_norm": 0.10087147355079651,
      "learning_rate": 4.304629561907222e-05,
      "loss": 0.26,
      "step": 3648
    },
    {
      "epoch": 3.464751958224543,
      "grad_norm": 0.08153299987316132,
      "learning_rate": 4.299725958249586e-05,
      "loss": 0.289,
      "step": 3649
    },
    {
      "epoch": 3.465701400427249,
      "grad_norm": 0.047881294041872025,
      "learning_rate": 4.294824384137096e-05,
      "loss": 0.257,
      "step": 3650
    },
    {
      "epoch": 3.4666508426299547,
      "grad_norm": 0.05004667118191719,
      "learning_rate": 4.289924841314922e-05,
      "loss": 0.2535,
      "step": 3651
    },
    {
      "epoch": 3.4676002848326606,
      "grad_norm": 0.04900941625237465,
      "learning_rate": 4.285027331527525e-05,
      "loss": 0.2601,
      "step": 3652
    },
    {
      "epoch": 3.468549727035367,
      "grad_norm": 0.045236848294734955,
      "learning_rate": 4.2801318565186165e-05,
      "loss": 0.2551,
      "step": 3653
    },
    {
      "epoch": 3.4694991692380728,
      "grad_norm": 0.048930395394563675,
      "learning_rate": 4.275238418031209e-05,
      "loss": 0.2458,
      "step": 3654
    },
    {
      "epoch": 3.4704486114407787,
      "grad_norm": 0.04744086042046547,
      "learning_rate": 4.270347017807574e-05,
      "loss": 0.2446,
      "step": 3655
    },
    {
      "epoch": 3.4713980536434845,
      "grad_norm": 0.060483697801828384,
      "learning_rate": 4.265457657589267e-05,
      "loss": 0.254,
      "step": 3656
    },
    {
      "epoch": 3.4723474958461904,
      "grad_norm": 0.06201721355319023,
      "learning_rate": 4.260570339117107e-05,
      "loss": 0.2527,
      "step": 3657
    },
    {
      "epoch": 3.4732969380488963,
      "grad_norm": 0.0473172627389431,
      "learning_rate": 4.2556850641311964e-05,
      "loss": 0.2472,
      "step": 3658
    },
    {
      "epoch": 3.474246380251602,
      "grad_norm": 0.058292679488658905,
      "learning_rate": 4.250801834370899e-05,
      "loss": 0.2579,
      "step": 3659
    },
    {
      "epoch": 3.475195822454308,
      "grad_norm": 0.04996812716126442,
      "learning_rate": 4.245920651574864e-05,
      "loss": 0.2364,
      "step": 3660
    },
    {
      "epoch": 3.476145264657014,
      "grad_norm": 0.0430903434753418,
      "learning_rate": 4.241041517481001e-05,
      "loss": 0.2537,
      "step": 3661
    },
    {
      "epoch": 3.4770947068597198,
      "grad_norm": 0.044585153460502625,
      "learning_rate": 4.236164433826495e-05,
      "loss": 0.2513,
      "step": 3662
    },
    {
      "epoch": 3.4780441490624256,
      "grad_norm": 0.061624232679605484,
      "learning_rate": 4.231289402347798e-05,
      "loss": 0.2685,
      "step": 3663
    },
    {
      "epoch": 3.478993591265132,
      "grad_norm": 0.050394099205732346,
      "learning_rate": 4.22641642478063e-05,
      "loss": 0.2681,
      "step": 3664
    },
    {
      "epoch": 3.479943033467838,
      "grad_norm": 0.044247131794691086,
      "learning_rate": 4.221545502859994e-05,
      "loss": 0.2509,
      "step": 3665
    },
    {
      "epoch": 3.4808924756705437,
      "grad_norm": 0.06681392341852188,
      "learning_rate": 4.216676638320135e-05,
      "loss": 0.2365,
      "step": 3666
    },
    {
      "epoch": 3.4818419178732496,
      "grad_norm": 0.04180409014225006,
      "learning_rate": 4.2118098328945896e-05,
      "loss": 0.2547,
      "step": 3667
    },
    {
      "epoch": 3.4827913600759555,
      "grad_norm": 0.051863424479961395,
      "learning_rate": 4.206945088316151e-05,
      "loss": 0.2546,
      "step": 3668
    },
    {
      "epoch": 3.4837408022786613,
      "grad_norm": 0.07532540708780289,
      "learning_rate": 4.202082406316877e-05,
      "loss": 0.2369,
      "step": 3669
    },
    {
      "epoch": 3.484690244481367,
      "grad_norm": 0.04621781036257744,
      "learning_rate": 4.197221788628096e-05,
      "loss": 0.2426,
      "step": 3670
    },
    {
      "epoch": 3.485639686684073,
      "grad_norm": 0.07874076068401337,
      "learning_rate": 4.1923632369803946e-05,
      "loss": 0.2673,
      "step": 3671
    },
    {
      "epoch": 3.486589128886779,
      "grad_norm": 0.044167328625917435,
      "learning_rate": 4.1875067531036374e-05,
      "loss": 0.2445,
      "step": 3672
    },
    {
      "epoch": 3.487538571089485,
      "grad_norm": 0.05342372506856918,
      "learning_rate": 4.18265233872693e-05,
      "loss": 0.2511,
      "step": 3673
    },
    {
      "epoch": 3.4884880132921907,
      "grad_norm": 0.050559043884277344,
      "learning_rate": 4.1777999955786675e-05,
      "loss": 0.2562,
      "step": 3674
    },
    {
      "epoch": 3.4894374554948966,
      "grad_norm": 0.05720347538590431,
      "learning_rate": 4.172949725386488e-05,
      "loss": 0.2429,
      "step": 3675
    },
    {
      "epoch": 3.4903868976976025,
      "grad_norm": 0.054926902055740356,
      "learning_rate": 4.168101529877297e-05,
      "loss": 0.2567,
      "step": 3676
    },
    {
      "epoch": 3.4913363399003083,
      "grad_norm": 0.05101928859949112,
      "learning_rate": 4.163255410777274e-05,
      "loss": 0.2448,
      "step": 3677
    },
    {
      "epoch": 3.4922857821030147,
      "grad_norm": 0.05976559594273567,
      "learning_rate": 4.158411369811831e-05,
      "loss": 0.2592,
      "step": 3678
    },
    {
      "epoch": 3.4932352243057205,
      "grad_norm": 0.050272174179553986,
      "learning_rate": 4.15356940870567e-05,
      "loss": 0.2539,
      "step": 3679
    },
    {
      "epoch": 3.4941846665084264,
      "grad_norm": 0.05651102960109711,
      "learning_rate": 4.148729529182736e-05,
      "loss": 0.2626,
      "step": 3680
    },
    {
      "epoch": 3.4951341087111323,
      "grad_norm": 0.04792777821421623,
      "learning_rate": 4.143891732966233e-05,
      "loss": 0.2435,
      "step": 3681
    },
    {
      "epoch": 3.496083550913838,
      "grad_norm": 0.05119699984788895,
      "learning_rate": 4.13905602177863e-05,
      "loss": 0.2581,
      "step": 3682
    },
    {
      "epoch": 3.497032993116544,
      "grad_norm": 0.05814248323440552,
      "learning_rate": 4.134222397341649e-05,
      "loss": 0.2246,
      "step": 3683
    },
    {
      "epoch": 3.49798243531925,
      "grad_norm": 0.04919525235891342,
      "learning_rate": 4.129390861376268e-05,
      "loss": 0.2573,
      "step": 3684
    },
    {
      "epoch": 3.498931877521956,
      "grad_norm": 0.049026183784008026,
      "learning_rate": 4.124561415602729e-05,
      "loss": 0.2469,
      "step": 3685
    },
    {
      "epoch": 3.4998813197246617,
      "grad_norm": 0.05068015307188034,
      "learning_rate": 4.119734061740521e-05,
      "loss": 0.2501,
      "step": 3686
    },
    {
      "epoch": 3.5008307619273675,
      "grad_norm": 0.059492919594049454,
      "learning_rate": 4.1149088015083925e-05,
      "loss": 0.24,
      "step": 3687
    },
    {
      "epoch": 3.501780204130074,
      "grad_norm": 0.05316033214330673,
      "learning_rate": 4.110085636624346e-05,
      "loss": 0.2647,
      "step": 3688
    },
    {
      "epoch": 3.5027296463327797,
      "grad_norm": 0.05338413268327713,
      "learning_rate": 4.105264568805633e-05,
      "loss": 0.2561,
      "step": 3689
    },
    {
      "epoch": 3.5036790885354856,
      "grad_norm": 0.05011837184429169,
      "learning_rate": 4.100445599768774e-05,
      "loss": 0.2461,
      "step": 3690
    },
    {
      "epoch": 3.5046285307381915,
      "grad_norm": 0.05094316601753235,
      "learning_rate": 4.0956287312295183e-05,
      "loss": 0.2623,
      "step": 3691
    },
    {
      "epoch": 3.5055779729408973,
      "grad_norm": 0.05120784044265747,
      "learning_rate": 4.090813964902889e-05,
      "loss": 0.2494,
      "step": 3692
    },
    {
      "epoch": 3.506527415143603,
      "grad_norm": 0.07456446439027786,
      "learning_rate": 4.08600130250315e-05,
      "loss": 0.2714,
      "step": 3693
    },
    {
      "epoch": 3.507476857346309,
      "grad_norm": 0.05052252858877182,
      "learning_rate": 4.081190745743814e-05,
      "loss": 0.2547,
      "step": 3694
    },
    {
      "epoch": 3.508426299549015,
      "grad_norm": 0.09999831020832062,
      "learning_rate": 4.0763822963376586e-05,
      "loss": 0.269,
      "step": 3695
    },
    {
      "epoch": 3.509375741751721,
      "grad_norm": 0.0458545945584774,
      "learning_rate": 4.071575955996687e-05,
      "loss": 0.2481,
      "step": 3696
    },
    {
      "epoch": 3.5103251839544267,
      "grad_norm": 0.07028304040431976,
      "learning_rate": 4.066771726432176e-05,
      "loss": 0.2618,
      "step": 3697
    },
    {
      "epoch": 3.5112746261571326,
      "grad_norm": 0.05372486636042595,
      "learning_rate": 4.061969609354634e-05,
      "loss": 0.2526,
      "step": 3698
    },
    {
      "epoch": 3.5122240683598385,
      "grad_norm": 0.06410619616508484,
      "learning_rate": 4.057169606473827e-05,
      "loss": 0.2442,
      "step": 3699
    },
    {
      "epoch": 3.5131735105625443,
      "grad_norm": 0.04757387191057205,
      "learning_rate": 4.0523717194987634e-05,
      "loss": 0.2496,
      "step": 3700
    },
    {
      "epoch": 3.51412295276525,
      "grad_norm": 0.05048002675175667,
      "learning_rate": 4.047575950137693e-05,
      "loss": 0.2443,
      "step": 3701
    },
    {
      "epoch": 3.515072394967956,
      "grad_norm": 0.055802926421165466,
      "learning_rate": 4.0427823000981293e-05,
      "loss": 0.2524,
      "step": 3702
    },
    {
      "epoch": 3.516021837170662,
      "grad_norm": 0.07027749717235565,
      "learning_rate": 4.037990771086813e-05,
      "loss": 0.2691,
      "step": 3703
    },
    {
      "epoch": 3.5169712793733683,
      "grad_norm": 0.07510142773389816,
      "learning_rate": 4.0332013648097375e-05,
      "loss": 0.2485,
      "step": 3704
    },
    {
      "epoch": 3.517920721576074,
      "grad_norm": 0.05176989361643791,
      "learning_rate": 4.028414082972141e-05,
      "loss": 0.2573,
      "step": 3705
    },
    {
      "epoch": 3.51887016377878,
      "grad_norm": 0.05572926253080368,
      "learning_rate": 4.023628927278501e-05,
      "loss": 0.2565,
      "step": 3706
    },
    {
      "epoch": 3.519819605981486,
      "grad_norm": 0.13677886128425598,
      "learning_rate": 4.018845899432539e-05,
      "loss": 0.2491,
      "step": 3707
    },
    {
      "epoch": 3.520769048184192,
      "grad_norm": 0.06043456867337227,
      "learning_rate": 4.0140650011372295e-05,
      "loss": 0.2591,
      "step": 3708
    },
    {
      "epoch": 3.5217184903868977,
      "grad_norm": 0.08667951822280884,
      "learning_rate": 4.009286234094772e-05,
      "loss": 0.2825,
      "step": 3709
    },
    {
      "epoch": 3.5226679325896035,
      "grad_norm": 0.060174696147441864,
      "learning_rate": 4.004509600006619e-05,
      "loss": 0.2709,
      "step": 3710
    },
    {
      "epoch": 3.5236173747923094,
      "grad_norm": 0.05058741942048073,
      "learning_rate": 3.999735100573457e-05,
      "loss": 0.2507,
      "step": 3711
    },
    {
      "epoch": 3.5245668169950153,
      "grad_norm": 0.05240025743842125,
      "learning_rate": 3.9949627374952146e-05,
      "loss": 0.2576,
      "step": 3712
    },
    {
      "epoch": 3.5255162591977216,
      "grad_norm": 0.06356460601091385,
      "learning_rate": 3.990192512471068e-05,
      "loss": 0.2688,
      "step": 3713
    },
    {
      "epoch": 3.5264657014004275,
      "grad_norm": 0.05215013772249222,
      "learning_rate": 3.985424427199413e-05,
      "loss": 0.2395,
      "step": 3714
    },
    {
      "epoch": 3.5274151436031334,
      "grad_norm": 0.05398750677704811,
      "learning_rate": 3.9806584833779025e-05,
      "loss": 0.2528,
      "step": 3715
    },
    {
      "epoch": 3.5283645858058392,
      "grad_norm": 0.09040288627147675,
      "learning_rate": 3.975894682703418e-05,
      "loss": 0.2327,
      "step": 3716
    },
    {
      "epoch": 3.529314028008545,
      "grad_norm": 0.062312051653862,
      "learning_rate": 3.971133026872077e-05,
      "loss": 0.2532,
      "step": 3717
    },
    {
      "epoch": 3.530263470211251,
      "grad_norm": 0.07472645491361618,
      "learning_rate": 3.966373517579244e-05,
      "loss": 0.2651,
      "step": 3718
    },
    {
      "epoch": 3.531212912413957,
      "grad_norm": 0.05372164770960808,
      "learning_rate": 3.961616156519499e-05,
      "loss": 0.2546,
      "step": 3719
    },
    {
      "epoch": 3.5321623546166627,
      "grad_norm": 0.10469577461481094,
      "learning_rate": 3.9568609453866766e-05,
      "loss": 0.2797,
      "step": 3720
    },
    {
      "epoch": 3.5331117968193686,
      "grad_norm": 0.05509538576006889,
      "learning_rate": 3.952107885873839e-05,
      "loss": 0.2541,
      "step": 3721
    },
    {
      "epoch": 3.5340612390220745,
      "grad_norm": 0.10789843648672104,
      "learning_rate": 3.947356979673279e-05,
      "loss": 0.2432,
      "step": 3722
    },
    {
      "epoch": 3.5350106812247803,
      "grad_norm": 0.0722312331199646,
      "learning_rate": 3.942608228476526e-05,
      "loss": 0.2764,
      "step": 3723
    },
    {
      "epoch": 3.5359601234274862,
      "grad_norm": 0.05012940987944603,
      "learning_rate": 3.9378616339743404e-05,
      "loss": 0.2418,
      "step": 3724
    },
    {
      "epoch": 3.536909565630192,
      "grad_norm": 0.10994049906730652,
      "learning_rate": 3.9331171978567204e-05,
      "loss": 0.2588,
      "step": 3725
    },
    {
      "epoch": 3.537859007832898,
      "grad_norm": 0.05162129923701286,
      "learning_rate": 3.9283749218128885e-05,
      "loss": 0.2492,
      "step": 3726
    },
    {
      "epoch": 3.538808450035604,
      "grad_norm": 0.05795177444815636,
      "learning_rate": 3.923634807531301e-05,
      "loss": 0.2527,
      "step": 3727
    },
    {
      "epoch": 3.5397578922383097,
      "grad_norm": 0.0650908499956131,
      "learning_rate": 3.9188968566996455e-05,
      "loss": 0.2529,
      "step": 3728
    },
    {
      "epoch": 3.540707334441016,
      "grad_norm": 0.051869384944438934,
      "learning_rate": 3.914161071004836e-05,
      "loss": 0.2549,
      "step": 3729
    },
    {
      "epoch": 3.541656776643722,
      "grad_norm": 0.07333017140626907,
      "learning_rate": 3.909427452133016e-05,
      "loss": 0.2546,
      "step": 3730
    },
    {
      "epoch": 3.542606218846428,
      "grad_norm": 0.053457971662282944,
      "learning_rate": 3.904696001769571e-05,
      "loss": 0.2526,
      "step": 3731
    },
    {
      "epoch": 3.5435556610491337,
      "grad_norm": 0.0637202113866806,
      "learning_rate": 3.899966721599086e-05,
      "loss": 0.2537,
      "step": 3732
    },
    {
      "epoch": 3.5445051032518395,
      "grad_norm": 0.05182478576898575,
      "learning_rate": 3.8952396133054035e-05,
      "loss": 0.2649,
      "step": 3733
    },
    {
      "epoch": 3.5454545454545454,
      "grad_norm": 0.05615059286355972,
      "learning_rate": 3.890514678571575e-05,
      "loss": 0.2608,
      "step": 3734
    },
    {
      "epoch": 3.5464039876572513,
      "grad_norm": 0.05612653121352196,
      "learning_rate": 3.885791919079878e-05,
      "loss": 0.2641,
      "step": 3735
    },
    {
      "epoch": 3.547353429859957,
      "grad_norm": 0.05406120419502258,
      "learning_rate": 3.88107133651183e-05,
      "loss": 0.2455,
      "step": 3736
    },
    {
      "epoch": 3.548302872062663,
      "grad_norm": 0.08043722808361053,
      "learning_rate": 3.876352932548152e-05,
      "loss": 0.2448,
      "step": 3737
    },
    {
      "epoch": 3.5492523142653694,
      "grad_norm": 0.0515315979719162,
      "learning_rate": 3.871636708868809e-05,
      "loss": 0.2487,
      "step": 3738
    },
    {
      "epoch": 3.5502017564680752,
      "grad_norm": 0.058112140744924545,
      "learning_rate": 3.866922667152979e-05,
      "loss": 0.2543,
      "step": 3739
    },
    {
      "epoch": 3.551151198670781,
      "grad_norm": 0.05072011426091194,
      "learning_rate": 3.862210809079061e-05,
      "loss": 0.2518,
      "step": 3740
    },
    {
      "epoch": 3.552100640873487,
      "grad_norm": 0.07569391280412674,
      "learning_rate": 3.857501136324694e-05,
      "loss": 0.2729,
      "step": 3741
    },
    {
      "epoch": 3.553050083076193,
      "grad_norm": 0.047762371599674225,
      "learning_rate": 3.8527936505667095e-05,
      "loss": 0.2598,
      "step": 3742
    },
    {
      "epoch": 3.5539995252788987,
      "grad_norm": 0.050283271819353104,
      "learning_rate": 3.8480883534811886e-05,
      "loss": 0.2604,
      "step": 3743
    },
    {
      "epoch": 3.5549489674816046,
      "grad_norm": 0.0424044132232666,
      "learning_rate": 3.843385246743417e-05,
      "loss": 0.2461,
      "step": 3744
    },
    {
      "epoch": 3.5558984096843105,
      "grad_norm": 0.04651379585266113,
      "learning_rate": 3.8386843320279076e-05,
      "loss": 0.2483,
      "step": 3745
    },
    {
      "epoch": 3.5568478518870164,
      "grad_norm": 0.05307883024215698,
      "learning_rate": 3.833985611008387e-05,
      "loss": 0.2548,
      "step": 3746
    },
    {
      "epoch": 3.5577972940897222,
      "grad_norm": 0.05513716861605644,
      "learning_rate": 3.829289085357806e-05,
      "loss": 0.263,
      "step": 3747
    },
    {
      "epoch": 3.558746736292428,
      "grad_norm": 0.05174417421221733,
      "learning_rate": 3.824594756748326e-05,
      "loss": 0.2432,
      "step": 3748
    },
    {
      "epoch": 3.559696178495134,
      "grad_norm": 0.0701284259557724,
      "learning_rate": 3.8199026268513424e-05,
      "loss": 0.2642,
      "step": 3749
    },
    {
      "epoch": 3.56064562069784,
      "grad_norm": 0.07058609277009964,
      "learning_rate": 3.815212697337451e-05,
      "loss": 0.2311,
      "step": 3750
    },
    {
      "epoch": 3.5615950629005457,
      "grad_norm": 0.051359616219997406,
      "learning_rate": 3.810524969876471e-05,
      "loss": 0.2645,
      "step": 3751
    },
    {
      "epoch": 3.5625445051032516,
      "grad_norm": 0.05393604561686516,
      "learning_rate": 3.805839446137438e-05,
      "loss": 0.2533,
      "step": 3752
    },
    {
      "epoch": 3.5634939473059575,
      "grad_norm": 0.052022725343704224,
      "learning_rate": 3.8011561277885964e-05,
      "loss": 0.2539,
      "step": 3753
    },
    {
      "epoch": 3.564443389508664,
      "grad_norm": 0.06624822318553925,
      "learning_rate": 3.796475016497424e-05,
      "loss": 0.2495,
      "step": 3754
    },
    {
      "epoch": 3.5653928317113697,
      "grad_norm": 0.05817562714219093,
      "learning_rate": 3.7917961139305836e-05,
      "loss": 0.2562,
      "step": 3755
    },
    {
      "epoch": 3.5663422739140755,
      "grad_norm": 0.12431687861680984,
      "learning_rate": 3.787119421753979e-05,
      "loss": 0.2377,
      "step": 3756
    },
    {
      "epoch": 3.5672917161167814,
      "grad_norm": 0.04925607889890671,
      "learning_rate": 3.7824449416327126e-05,
      "loss": 0.2433,
      "step": 3757
    },
    {
      "epoch": 3.5682411583194873,
      "grad_norm": 0.04910467565059662,
      "learning_rate": 3.777772675231098e-05,
      "loss": 0.2582,
      "step": 3758
    },
    {
      "epoch": 3.569190600522193,
      "grad_norm": 0.04863162711262703,
      "learning_rate": 3.7731026242126766e-05,
      "loss": 0.2571,
      "step": 3759
    },
    {
      "epoch": 3.570140042724899,
      "grad_norm": 0.04957108944654465,
      "learning_rate": 3.768434790240175e-05,
      "loss": 0.2559,
      "step": 3760
    },
    {
      "epoch": 3.571089484927605,
      "grad_norm": 0.051660917699337006,
      "learning_rate": 3.7637691749755546e-05,
      "loss": 0.2636,
      "step": 3761
    },
    {
      "epoch": 3.572038927130311,
      "grad_norm": 0.05814244598150253,
      "learning_rate": 3.759105780079974e-05,
      "loss": 0.2622,
      "step": 3762
    },
    {
      "epoch": 3.572988369333017,
      "grad_norm": 0.07492271810770035,
      "learning_rate": 3.7544446072138054e-05,
      "loss": 0.2386,
      "step": 3763
    },
    {
      "epoch": 3.573937811535723,
      "grad_norm": 0.04846430569887161,
      "learning_rate": 3.749785658036627e-05,
      "loss": 0.2626,
      "step": 3764
    },
    {
      "epoch": 3.574887253738429,
      "grad_norm": 0.055590301752090454,
      "learning_rate": 3.745128934207225e-05,
      "loss": 0.253,
      "step": 3765
    },
    {
      "epoch": 3.5758366959411347,
      "grad_norm": 0.052691273391246796,
      "learning_rate": 3.740474437383602e-05,
      "loss": 0.2511,
      "step": 3766
    },
    {
      "epoch": 3.5767861381438406,
      "grad_norm": 0.07563423365354538,
      "learning_rate": 3.735822169222957e-05,
      "loss": 0.2738,
      "step": 3767
    },
    {
      "epoch": 3.5777355803465465,
      "grad_norm": 0.053566690534353256,
      "learning_rate": 3.7311721313816994e-05,
      "loss": 0.2641,
      "step": 3768
    },
    {
      "epoch": 3.5786850225492524,
      "grad_norm": 0.0528990775346756,
      "learning_rate": 3.726524325515446e-05,
      "loss": 0.2561,
      "step": 3769
    },
    {
      "epoch": 3.5796344647519582,
      "grad_norm": 0.05242493748664856,
      "learning_rate": 3.721878753279017e-05,
      "loss": 0.2663,
      "step": 3770
    },
    {
      "epoch": 3.580583906954664,
      "grad_norm": 0.05699344351887703,
      "learning_rate": 3.7172354163264324e-05,
      "loss": 0.2723,
      "step": 3771
    },
    {
      "epoch": 3.58153334915737,
      "grad_norm": 0.05177152156829834,
      "learning_rate": 3.7125943163109354e-05,
      "loss": 0.2464,
      "step": 3772
    },
    {
      "epoch": 3.582482791360076,
      "grad_norm": 0.05266297236084938,
      "learning_rate": 3.707955454884943e-05,
      "loss": 0.2468,
      "step": 3773
    },
    {
      "epoch": 3.5834322335627817,
      "grad_norm": 0.04910292476415634,
      "learning_rate": 3.703318833700103e-05,
      "loss": 0.254,
      "step": 3774
    },
    {
      "epoch": 3.5843816757654876,
      "grad_norm": 0.07194791734218597,
      "learning_rate": 3.6986844544072494e-05,
      "loss": 0.2385,
      "step": 3775
    },
    {
      "epoch": 3.5853311179681935,
      "grad_norm": 0.04586068168282509,
      "learning_rate": 3.694052318656421e-05,
      "loss": 0.2478,
      "step": 3776
    },
    {
      "epoch": 3.5862805601708994,
      "grad_norm": 0.09382741153240204,
      "learning_rate": 3.689422428096868e-05,
      "loss": 0.2705,
      "step": 3777
    },
    {
      "epoch": 3.5872300023736052,
      "grad_norm": 0.06713338196277618,
      "learning_rate": 3.684794784377018e-05,
      "loss": 0.2813,
      "step": 3778
    },
    {
      "epoch": 3.5881794445763115,
      "grad_norm": 0.12264712899923325,
      "learning_rate": 3.68016938914453e-05,
      "loss": 0.2384,
      "step": 3779
    },
    {
      "epoch": 3.5891288867790174,
      "grad_norm": 0.05466051772236824,
      "learning_rate": 3.675546244046228e-05,
      "loss": 0.2558,
      "step": 3780
    },
    {
      "epoch": 3.5900783289817233,
      "grad_norm": 0.05842528119683266,
      "learning_rate": 3.6709253507281624e-05,
      "loss": 0.2696,
      "step": 3781
    },
    {
      "epoch": 3.591027771184429,
      "grad_norm": 0.05473971739411354,
      "learning_rate": 3.6663067108355776e-05,
      "loss": 0.2518,
      "step": 3782
    },
    {
      "epoch": 3.591977213387135,
      "grad_norm": 0.05151469632983208,
      "learning_rate": 3.661690326012897e-05,
      "loss": 0.2444,
      "step": 3783
    },
    {
      "epoch": 3.592926655589841,
      "grad_norm": 0.048597030341625214,
      "learning_rate": 3.657076197903766e-05,
      "loss": 0.2478,
      "step": 3784
    },
    {
      "epoch": 3.593876097792547,
      "grad_norm": 0.0704786479473114,
      "learning_rate": 3.652464328151002e-05,
      "loss": 0.2697,
      "step": 3785
    },
    {
      "epoch": 3.5948255399952527,
      "grad_norm": 0.061194755136966705,
      "learning_rate": 3.647854718396642e-05,
      "loss": 0.2345,
      "step": 3786
    },
    {
      "epoch": 3.5957749821979585,
      "grad_norm": 0.0968698039650917,
      "learning_rate": 3.643247370281903e-05,
      "loss": 0.2414,
      "step": 3787
    },
    {
      "epoch": 3.596724424400665,
      "grad_norm": 0.054514043033123016,
      "learning_rate": 3.638642285447201e-05,
      "loss": 0.2568,
      "step": 3788
    },
    {
      "epoch": 3.5976738666033707,
      "grad_norm": 0.04937252402305603,
      "learning_rate": 3.6340394655321465e-05,
      "loss": 0.2523,
      "step": 3789
    },
    {
      "epoch": 3.5986233088060766,
      "grad_norm": 0.06724977493286133,
      "learning_rate": 3.62943891217554e-05,
      "loss": 0.2568,
      "step": 3790
    },
    {
      "epoch": 3.5995727510087825,
      "grad_norm": 0.049823611974716187,
      "learning_rate": 3.624840627015385e-05,
      "loss": 0.2551,
      "step": 3791
    },
    {
      "epoch": 3.6005221932114884,
      "grad_norm": 0.0614524707198143,
      "learning_rate": 3.6202446116888666e-05,
      "loss": 0.2546,
      "step": 3792
    },
    {
      "epoch": 3.6014716354141942,
      "grad_norm": 0.0789070874452591,
      "learning_rate": 3.6156508678323676e-05,
      "loss": 0.2715,
      "step": 3793
    },
    {
      "epoch": 3.6024210776169,
      "grad_norm": 0.05372389405965805,
      "learning_rate": 3.611059397081459e-05,
      "loss": 0.2418,
      "step": 3794
    },
    {
      "epoch": 3.603370519819606,
      "grad_norm": 0.057337675243616104,
      "learning_rate": 3.606470201070904e-05,
      "loss": 0.2496,
      "step": 3795
    },
    {
      "epoch": 3.604319962022312,
      "grad_norm": 0.07104726880788803,
      "learning_rate": 3.601883281434652e-05,
      "loss": 0.2539,
      "step": 3796
    },
    {
      "epoch": 3.6052694042250177,
      "grad_norm": 0.05221518501639366,
      "learning_rate": 3.597298639805853e-05,
      "loss": 0.2468,
      "step": 3797
    },
    {
      "epoch": 3.6062188464277236,
      "grad_norm": 0.06567849218845367,
      "learning_rate": 3.5927162778168355e-05,
      "loss": 0.2375,
      "step": 3798
    },
    {
      "epoch": 3.6071682886304295,
      "grad_norm": 0.05055369809269905,
      "learning_rate": 3.588136197099119e-05,
      "loss": 0.2625,
      "step": 3799
    },
    {
      "epoch": 3.6081177308331354,
      "grad_norm": 0.046621520072221756,
      "learning_rate": 3.58355839928341e-05,
      "loss": 0.2403,
      "step": 3800
    },
    {
      "epoch": 3.6090671730358412,
      "grad_norm": 0.06843309849500656,
      "learning_rate": 3.5789828859996025e-05,
      "loss": 0.273,
      "step": 3801
    },
    {
      "epoch": 3.610016615238547,
      "grad_norm": 0.05004161596298218,
      "learning_rate": 3.574409658876785e-05,
      "loss": 0.2514,
      "step": 3802
    },
    {
      "epoch": 3.6109660574412534,
      "grad_norm": 0.04522772133350372,
      "learning_rate": 3.5698387195432146e-05,
      "loss": 0.2468,
      "step": 3803
    },
    {
      "epoch": 3.6119154996439593,
      "grad_norm": 0.05035033077001572,
      "learning_rate": 3.565270069626352e-05,
      "loss": 0.2537,
      "step": 3804
    },
    {
      "epoch": 3.612864941846665,
      "grad_norm": 0.04506196454167366,
      "learning_rate": 3.5607037107528326e-05,
      "loss": 0.2504,
      "step": 3805
    },
    {
      "epoch": 3.613814384049371,
      "grad_norm": 0.09473405033349991,
      "learning_rate": 3.5561396445484765e-05,
      "loss": 0.2283,
      "step": 3806
    },
    {
      "epoch": 3.614763826252077,
      "grad_norm": 0.05030398070812225,
      "learning_rate": 3.5515778726382966e-05,
      "loss": 0.248,
      "step": 3807
    },
    {
      "epoch": 3.615713268454783,
      "grad_norm": 0.05178692564368248,
      "learning_rate": 3.54701839664647e-05,
      "loss": 0.253,
      "step": 3808
    },
    {
      "epoch": 3.6166627106574887,
      "grad_norm": 0.050760503858327866,
      "learning_rate": 3.542461218196379e-05,
      "loss": 0.2512,
      "step": 3809
    },
    {
      "epoch": 3.6176121528601946,
      "grad_norm": 0.045815981924533844,
      "learning_rate": 3.5379063389105727e-05,
      "loss": 0.2473,
      "step": 3810
    },
    {
      "epoch": 3.6185615950629004,
      "grad_norm": 0.07337402552366257,
      "learning_rate": 3.533353760410786e-05,
      "loss": 0.2436,
      "step": 3811
    },
    {
      "epoch": 3.6195110372656063,
      "grad_norm": 0.05059612914919853,
      "learning_rate": 3.528803484317934e-05,
      "loss": 0.25,
      "step": 3812
    },
    {
      "epoch": 3.6204604794683126,
      "grad_norm": 0.04782482236623764,
      "learning_rate": 3.524255512252112e-05,
      "loss": 0.256,
      "step": 3813
    },
    {
      "epoch": 3.6214099216710185,
      "grad_norm": 0.04968879744410515,
      "learning_rate": 3.519709845832598e-05,
      "loss": 0.2486,
      "step": 3814
    },
    {
      "epoch": 3.6223593638737244,
      "grad_norm": 0.05315404012799263,
      "learning_rate": 3.515166486677848e-05,
      "loss": 0.2491,
      "step": 3815
    },
    {
      "epoch": 3.6233088060764302,
      "grad_norm": 0.053226448595523834,
      "learning_rate": 3.510625436405491e-05,
      "loss": 0.2607,
      "step": 3816
    },
    {
      "epoch": 3.624258248279136,
      "grad_norm": 0.052453454583883286,
      "learning_rate": 3.5060866966323405e-05,
      "loss": 0.2579,
      "step": 3817
    },
    {
      "epoch": 3.625207690481842,
      "grad_norm": 0.10191935300827026,
      "learning_rate": 3.501550268974385e-05,
      "loss": 0.2735,
      "step": 3818
    },
    {
      "epoch": 3.626157132684548,
      "grad_norm": 0.05817404016852379,
      "learning_rate": 3.497016155046786e-05,
      "loss": 0.2608,
      "step": 3819
    },
    {
      "epoch": 3.6271065748872537,
      "grad_norm": 0.05418933928012848,
      "learning_rate": 3.4924843564638945e-05,
      "loss": 0.2555,
      "step": 3820
    },
    {
      "epoch": 3.6280560170899596,
      "grad_norm": 0.053123462945222855,
      "learning_rate": 3.487954874839214e-05,
      "loss": 0.2495,
      "step": 3821
    },
    {
      "epoch": 3.6290054592926655,
      "grad_norm": 0.04980839043855667,
      "learning_rate": 3.483427711785449e-05,
      "loss": 0.2564,
      "step": 3822
    },
    {
      "epoch": 3.6299549014953714,
      "grad_norm": 0.06832653284072876,
      "learning_rate": 3.478902868914461e-05,
      "loss": 0.2639,
      "step": 3823
    },
    {
      "epoch": 3.6309043436980772,
      "grad_norm": 0.05591224133968353,
      "learning_rate": 3.4743803478372874e-05,
      "loss": 0.255,
      "step": 3824
    },
    {
      "epoch": 3.631853785900783,
      "grad_norm": 0.05391015112400055,
      "learning_rate": 3.469860150164152e-05,
      "loss": 0.2565,
      "step": 3825
    },
    {
      "epoch": 3.632803228103489,
      "grad_norm": 0.05338095501065254,
      "learning_rate": 3.465342277504428e-05,
      "loss": 0.2568,
      "step": 3826
    },
    {
      "epoch": 3.633752670306195,
      "grad_norm": 0.04986494407057762,
      "learning_rate": 3.460826731466685e-05,
      "loss": 0.2532,
      "step": 3827
    },
    {
      "epoch": 3.634702112508901,
      "grad_norm": 0.053041331470012665,
      "learning_rate": 3.45631351365865e-05,
      "loss": 0.2473,
      "step": 3828
    },
    {
      "epoch": 3.635651554711607,
      "grad_norm": 0.08892609924077988,
      "learning_rate": 3.451802625687225e-05,
      "loss": 0.2584,
      "step": 3829
    },
    {
      "epoch": 3.636600996914313,
      "grad_norm": 0.0916905626654625,
      "learning_rate": 3.447294069158481e-05,
      "loss": 0.2617,
      "step": 3830
    },
    {
      "epoch": 3.637550439117019,
      "grad_norm": 0.05388447269797325,
      "learning_rate": 3.4427878456776573e-05,
      "loss": 0.2533,
      "step": 3831
    },
    {
      "epoch": 3.6384998813197247,
      "grad_norm": 0.09179041534662247,
      "learning_rate": 3.438283956849172e-05,
      "loss": 0.2356,
      "step": 3832
    },
    {
      "epoch": 3.6394493235224306,
      "grad_norm": 0.07118596136569977,
      "learning_rate": 3.433782404276601e-05,
      "loss": 0.2463,
      "step": 3833
    },
    {
      "epoch": 3.6403987657251364,
      "grad_norm": 0.059209588915109634,
      "learning_rate": 3.429283189562694e-05,
      "loss": 0.2575,
      "step": 3834
    },
    {
      "epoch": 3.6413482079278423,
      "grad_norm": 0.06903867423534393,
      "learning_rate": 3.424786314309365e-05,
      "loss": 0.2342,
      "step": 3835
    },
    {
      "epoch": 3.642297650130548,
      "grad_norm": 0.04557694122195244,
      "learning_rate": 3.420291780117698e-05,
      "loss": 0.2468,
      "step": 3836
    },
    {
      "epoch": 3.643247092333254,
      "grad_norm": 0.060068871825933456,
      "learning_rate": 3.415799588587939e-05,
      "loss": 0.2571,
      "step": 3837
    },
    {
      "epoch": 3.6441965345359604,
      "grad_norm": 0.053642213344573975,
      "learning_rate": 3.411309741319511e-05,
      "loss": 0.2604,
      "step": 3838
    },
    {
      "epoch": 3.6451459767386662,
      "grad_norm": 0.05564951151609421,
      "learning_rate": 3.4068222399109884e-05,
      "loss": 0.2595,
      "step": 3839
    },
    {
      "epoch": 3.646095418941372,
      "grad_norm": 0.11451321840286255,
      "learning_rate": 3.402337085960119e-05,
      "loss": 0.2544,
      "step": 3840
    },
    {
      "epoch": 3.647044861144078,
      "grad_norm": 0.05393780395388603,
      "learning_rate": 3.3978542810638125e-05,
      "loss": 0.2515,
      "step": 3841
    },
    {
      "epoch": 3.647994303346784,
      "grad_norm": 0.05257837474346161,
      "learning_rate": 3.393373826818137e-05,
      "loss": 0.2544,
      "step": 3842
    },
    {
      "epoch": 3.6489437455494897,
      "grad_norm": 0.07172742486000061,
      "learning_rate": 3.388895724818341e-05,
      "loss": 0.2375,
      "step": 3843
    },
    {
      "epoch": 3.6498931877521956,
      "grad_norm": 0.06196949630975723,
      "learning_rate": 3.384419976658808e-05,
      "loss": 0.2552,
      "step": 3844
    },
    {
      "epoch": 3.6508426299549015,
      "grad_norm": 0.04753759130835533,
      "learning_rate": 3.37994658393311e-05,
      "loss": 0.2506,
      "step": 3845
    },
    {
      "epoch": 3.6517920721576074,
      "grad_norm": 0.06825859099626541,
      "learning_rate": 3.3754755482339653e-05,
      "loss": 0.2814,
      "step": 3846
    },
    {
      "epoch": 3.6527415143603132,
      "grad_norm": 0.13267584145069122,
      "learning_rate": 3.371006871153254e-05,
      "loss": 0.2458,
      "step": 3847
    },
    {
      "epoch": 3.653690956563019,
      "grad_norm": 0.06627479940652847,
      "learning_rate": 3.366540554282028e-05,
      "loss": 0.2419,
      "step": 3848
    },
    {
      "epoch": 3.654640398765725,
      "grad_norm": 0.04707813635468483,
      "learning_rate": 3.362076599210479e-05,
      "loss": 0.2456,
      "step": 3849
    },
    {
      "epoch": 3.655589840968431,
      "grad_norm": 0.09856180846691132,
      "learning_rate": 3.357615007527976e-05,
      "loss": 0.2373,
      "step": 3850
    },
    {
      "epoch": 3.6565392831711367,
      "grad_norm": 0.05382237583398819,
      "learning_rate": 3.3531557808230387e-05,
      "loss": 0.2615,
      "step": 3851
    },
    {
      "epoch": 3.6574887253738426,
      "grad_norm": 0.057159341871738434,
      "learning_rate": 3.348698920683343e-05,
      "loss": 0.2486,
      "step": 3852
    },
    {
      "epoch": 3.658438167576549,
      "grad_norm": 0.05542777106165886,
      "learning_rate": 3.344244428695728e-05,
      "loss": 0.2537,
      "step": 3853
    },
    {
      "epoch": 3.659387609779255,
      "grad_norm": 0.06226300820708275,
      "learning_rate": 3.3397923064461786e-05,
      "loss": 0.2522,
      "step": 3854
    },
    {
      "epoch": 3.6603370519819607,
      "grad_norm": 0.05447829142212868,
      "learning_rate": 3.3353425555198547e-05,
      "loss": 0.2626,
      "step": 3855
    },
    {
      "epoch": 3.6612864941846666,
      "grad_norm": 0.08190428465604782,
      "learning_rate": 3.330895177501056e-05,
      "loss": 0.2403,
      "step": 3856
    },
    {
      "epoch": 3.6622359363873724,
      "grad_norm": 0.09613342583179474,
      "learning_rate": 3.32645017397324e-05,
      "loss": 0.243,
      "step": 3857
    },
    {
      "epoch": 3.6631853785900783,
      "grad_norm": 0.07900498062372208,
      "learning_rate": 3.3220075465190246e-05,
      "loss": 0.2551,
      "step": 3858
    },
    {
      "epoch": 3.664134820792784,
      "grad_norm": 0.0935310423374176,
      "learning_rate": 3.317567296720177e-05,
      "loss": 0.2445,
      "step": 3859
    },
    {
      "epoch": 3.66508426299549,
      "grad_norm": 0.12767313420772552,
      "learning_rate": 3.313129426157613e-05,
      "loss": 0.2532,
      "step": 3860
    },
    {
      "epoch": 3.666033705198196,
      "grad_norm": 0.051936663687229156,
      "learning_rate": 3.308693936411421e-05,
      "loss": 0.251,
      "step": 3861
    },
    {
      "epoch": 3.666983147400902,
      "grad_norm": 0.05065144971013069,
      "learning_rate": 3.3042608290608124e-05,
      "loss": 0.2514,
      "step": 3862
    },
    {
      "epoch": 3.667932589603608,
      "grad_norm": 0.05864845588803291,
      "learning_rate": 3.2998301056841774e-05,
      "loss": 0.2554,
      "step": 3863
    },
    {
      "epoch": 3.668882031806314,
      "grad_norm": 0.05251367390155792,
      "learning_rate": 3.2954017678590406e-05,
      "loss": 0.2494,
      "step": 3864
    },
    {
      "epoch": 3.66983147400902,
      "grad_norm": 0.047137439250946045,
      "learning_rate": 3.290975817162082e-05,
      "loss": 0.2433,
      "step": 3865
    },
    {
      "epoch": 3.6707809162117258,
      "grad_norm": 0.04499272257089615,
      "learning_rate": 3.2865522551691396e-05,
      "loss": 0.2548,
      "step": 3866
    },
    {
      "epoch": 3.6717303584144316,
      "grad_norm": 0.05629131570458412,
      "learning_rate": 3.282131083455183e-05,
      "loss": 0.2491,
      "step": 3867
    },
    {
      "epoch": 3.6726798006171375,
      "grad_norm": 0.05436830222606659,
      "learning_rate": 3.277712303594349e-05,
      "loss": 0.2521,
      "step": 3868
    },
    {
      "epoch": 3.6736292428198434,
      "grad_norm": 0.052901942282915115,
      "learning_rate": 3.273295917159912e-05,
      "loss": 0.2491,
      "step": 3869
    },
    {
      "epoch": 3.6745786850225493,
      "grad_norm": 0.05838017538189888,
      "learning_rate": 3.268881925724297e-05,
      "loss": 0.2704,
      "step": 3870
    },
    {
      "epoch": 3.675528127225255,
      "grad_norm": 0.05413403362035751,
      "learning_rate": 3.264470330859082e-05,
      "loss": 0.2579,
      "step": 3871
    },
    {
      "epoch": 3.676477569427961,
      "grad_norm": 0.05225560441613197,
      "learning_rate": 3.260061134134976e-05,
      "loss": 0.2567,
      "step": 3872
    },
    {
      "epoch": 3.677427011630667,
      "grad_norm": 0.059620507061481476,
      "learning_rate": 3.255654337121855e-05,
      "loss": 0.2611,
      "step": 3873
    },
    {
      "epoch": 3.6783764538333728,
      "grad_norm": 0.05178217962384224,
      "learning_rate": 3.2512499413887255e-05,
      "loss": 0.2583,
      "step": 3874
    },
    {
      "epoch": 3.6793258960360786,
      "grad_norm": 0.04592955484986305,
      "learning_rate": 3.246847948503744e-05,
      "loss": 0.2579,
      "step": 3875
    },
    {
      "epoch": 3.6802753382387845,
      "grad_norm": 0.051579222083091736,
      "learning_rate": 3.2424483600342104e-05,
      "loss": 0.2579,
      "step": 3876
    },
    {
      "epoch": 3.6812247804414904,
      "grad_norm": 0.04873146116733551,
      "learning_rate": 3.238051177546571e-05,
      "loss": 0.2493,
      "step": 3877
    },
    {
      "epoch": 3.6821742226441967,
      "grad_norm": 0.048910629004240036,
      "learning_rate": 3.2336564026064084e-05,
      "loss": 0.2634,
      "step": 3878
    },
    {
      "epoch": 3.6831236648469026,
      "grad_norm": 0.049188822507858276,
      "learning_rate": 3.229264036778462e-05,
      "loss": 0.2525,
      "step": 3879
    },
    {
      "epoch": 3.6840731070496084,
      "grad_norm": 0.04678282514214516,
      "learning_rate": 3.224874081626601e-05,
      "loss": 0.2537,
      "step": 3880
    },
    {
      "epoch": 3.6850225492523143,
      "grad_norm": 0.06820831447839737,
      "learning_rate": 3.220486538713839e-05,
      "loss": 0.2694,
      "step": 3881
    },
    {
      "epoch": 3.68597199145502,
      "grad_norm": 0.04611432924866676,
      "learning_rate": 3.216101409602333e-05,
      "loss": 0.2534,
      "step": 3882
    },
    {
      "epoch": 3.686921433657726,
      "grad_norm": 0.06939106434583664,
      "learning_rate": 3.211718695853375e-05,
      "loss": 0.2687,
      "step": 3883
    },
    {
      "epoch": 3.687870875860432,
      "grad_norm": 0.04914408549666405,
      "learning_rate": 3.207338399027413e-05,
      "loss": 0.2538,
      "step": 3884
    },
    {
      "epoch": 3.688820318063138,
      "grad_norm": 0.049557916820049286,
      "learning_rate": 3.202960520684009e-05,
      "loss": 0.2307,
      "step": 3885
    },
    {
      "epoch": 3.6897697602658437,
      "grad_norm": 0.05168003961443901,
      "learning_rate": 3.198585062381886e-05,
      "loss": 0.26,
      "step": 3886
    },
    {
      "epoch": 3.6907192024685496,
      "grad_norm": 0.09558333456516266,
      "learning_rate": 3.194212025678896e-05,
      "loss": 0.2583,
      "step": 3887
    },
    {
      "epoch": 3.691668644671256,
      "grad_norm": 0.07344388216733932,
      "learning_rate": 3.1898414121320276e-05,
      "loss": 0.2757,
      "step": 3888
    },
    {
      "epoch": 3.6926180868739618,
      "grad_norm": 0.05108393356204033,
      "learning_rate": 3.185473223297416e-05,
      "loss": 0.2616,
      "step": 3889
    },
    {
      "epoch": 3.6935675290766676,
      "grad_norm": 0.06204976141452789,
      "learning_rate": 3.1811074607303135e-05,
      "loss": 0.2602,
      "step": 3890
    },
    {
      "epoch": 3.6945169712793735,
      "grad_norm": 0.0552101694047451,
      "learning_rate": 3.1767441259851374e-05,
      "loss": 0.2535,
      "step": 3891
    },
    {
      "epoch": 3.6954664134820794,
      "grad_norm": 0.05207996442914009,
      "learning_rate": 3.172383220615408e-05,
      "loss": 0.2513,
      "step": 3892
    },
    {
      "epoch": 3.6964158556847853,
      "grad_norm": 0.06118466332554817,
      "learning_rate": 3.168024746173808e-05,
      "loss": 0.2458,
      "step": 3893
    },
    {
      "epoch": 3.697365297887491,
      "grad_norm": 0.05093217268586159,
      "learning_rate": 3.16366870421214e-05,
      "loss": 0.2507,
      "step": 3894
    },
    {
      "epoch": 3.698314740090197,
      "grad_norm": 0.06563693284988403,
      "learning_rate": 3.1593150962813424e-05,
      "loss": 0.274,
      "step": 3895
    },
    {
      "epoch": 3.699264182292903,
      "grad_norm": 0.07928726822137833,
      "learning_rate": 3.154963923931496e-05,
      "loss": 0.2644,
      "step": 3896
    },
    {
      "epoch": 3.7002136244956088,
      "grad_norm": 0.08785230666399002,
      "learning_rate": 3.1506151887117974e-05,
      "loss": 0.2404,
      "step": 3897
    },
    {
      "epoch": 3.7011630666983146,
      "grad_norm": 0.05994411185383797,
      "learning_rate": 3.146268892170592e-05,
      "loss": 0.2451,
      "step": 3898
    },
    {
      "epoch": 3.7021125089010205,
      "grad_norm": 0.057615093886852264,
      "learning_rate": 3.1419250358553474e-05,
      "loss": 0.2688,
      "step": 3899
    },
    {
      "epoch": 3.7030619511037264,
      "grad_norm": 0.06122539937496185,
      "learning_rate": 3.137583621312665e-05,
      "loss": 0.2647,
      "step": 3900
    },
    {
      "epoch": 3.7040113933064323,
      "grad_norm": 0.04660304635763168,
      "learning_rate": 3.1332446500882794e-05,
      "loss": 0.2519,
      "step": 3901
    },
    {
      "epoch": 3.704960835509138,
      "grad_norm": 0.05333678424358368,
      "learning_rate": 3.12890812372705e-05,
      "loss": 0.2445,
      "step": 3902
    },
    {
      "epoch": 3.7059102777118444,
      "grad_norm": 0.053378038108348846,
      "learning_rate": 3.124574043772967e-05,
      "loss": 0.2421,
      "step": 3903
    },
    {
      "epoch": 3.7068597199145503,
      "grad_norm": 0.06653116643428802,
      "learning_rate": 3.1202424117691566e-05,
      "loss": 0.2571,
      "step": 3904
    },
    {
      "epoch": 3.707809162117256,
      "grad_norm": 0.04834749549627304,
      "learning_rate": 3.115913229257864e-05,
      "loss": 0.2509,
      "step": 3905
    },
    {
      "epoch": 3.708758604319962,
      "grad_norm": 0.08246218413114548,
      "learning_rate": 3.1115864977804676e-05,
      "loss": 0.2326,
      "step": 3906
    },
    {
      "epoch": 3.709708046522668,
      "grad_norm": 0.05619177594780922,
      "learning_rate": 3.107262218877473e-05,
      "loss": 0.2535,
      "step": 3907
    },
    {
      "epoch": 3.710657488725374,
      "grad_norm": 0.08788593858480453,
      "learning_rate": 3.102940394088504e-05,
      "loss": 0.2563,
      "step": 3908
    },
    {
      "epoch": 3.7116069309280797,
      "grad_norm": 0.05328623577952385,
      "learning_rate": 3.0986210249523315e-05,
      "loss": 0.256,
      "step": 3909
    },
    {
      "epoch": 3.7125563731307856,
      "grad_norm": 0.06127059459686279,
      "learning_rate": 3.094304113006824e-05,
      "loss": 0.258,
      "step": 3910
    },
    {
      "epoch": 3.7135058153334914,
      "grad_norm": 0.04658037796616554,
      "learning_rate": 3.089989659788999e-05,
      "loss": 0.2488,
      "step": 3911
    },
    {
      "epoch": 3.7144552575361973,
      "grad_norm": 0.05059393495321274,
      "learning_rate": 3.085677666834986e-05,
      "loss": 0.2474,
      "step": 3912
    },
    {
      "epoch": 3.7154046997389036,
      "grad_norm": 0.054072484374046326,
      "learning_rate": 3.0813681356800405e-05,
      "loss": 0.2475,
      "step": 3913
    },
    {
      "epoch": 3.7163541419416095,
      "grad_norm": 0.06147841364145279,
      "learning_rate": 3.07706106785855e-05,
      "loss": 0.2452,
      "step": 3914
    },
    {
      "epoch": 3.7173035841443154,
      "grad_norm": 0.06334567070007324,
      "learning_rate": 3.072756464904006e-05,
      "loss": 0.2608,
      "step": 3915
    },
    {
      "epoch": 3.7182530263470213,
      "grad_norm": 0.1141507476568222,
      "learning_rate": 3.068454328349044e-05,
      "loss": 0.2692,
      "step": 3916
    },
    {
      "epoch": 3.719202468549727,
      "grad_norm": 0.05562411993741989,
      "learning_rate": 3.064154659725408e-05,
      "loss": 0.2566,
      "step": 3917
    },
    {
      "epoch": 3.720151910752433,
      "grad_norm": 0.05495092645287514,
      "learning_rate": 3.059857460563966e-05,
      "loss": 0.2597,
      "step": 3918
    },
    {
      "epoch": 3.721101352955139,
      "grad_norm": 0.0544046126306057,
      "learning_rate": 3.0555627323947076e-05,
      "loss": 0.2711,
      "step": 3919
    },
    {
      "epoch": 3.7220507951578448,
      "grad_norm": 0.061430271714925766,
      "learning_rate": 3.0512704767467413e-05,
      "loss": 0.2688,
      "step": 3920
    },
    {
      "epoch": 3.7230002373605506,
      "grad_norm": 0.08111298084259033,
      "learning_rate": 3.0469806951483017e-05,
      "loss": 0.2747,
      "step": 3921
    },
    {
      "epoch": 3.7239496795632565,
      "grad_norm": 0.05766540765762329,
      "learning_rate": 3.0426933891267327e-05,
      "loss": 0.2575,
      "step": 3922
    },
    {
      "epoch": 3.7248991217659624,
      "grad_norm": 0.05006188526749611,
      "learning_rate": 3.0384085602085044e-05,
      "loss": 0.2515,
      "step": 3923
    },
    {
      "epoch": 3.7258485639686683,
      "grad_norm": 0.056151438504457474,
      "learning_rate": 3.0341262099191993e-05,
      "loss": 0.2665,
      "step": 3924
    },
    {
      "epoch": 3.726798006171374,
      "grad_norm": 0.060625769197940826,
      "learning_rate": 3.029846339783522e-05,
      "loss": 0.2579,
      "step": 3925
    },
    {
      "epoch": 3.72774744837408,
      "grad_norm": 0.05002756416797638,
      "learning_rate": 3.025568951325287e-05,
      "loss": 0.2626,
      "step": 3926
    },
    {
      "epoch": 3.728696890576786,
      "grad_norm": 0.051841624081134796,
      "learning_rate": 3.021294046067439e-05,
      "loss": 0.2545,
      "step": 3927
    },
    {
      "epoch": 3.729646332779492,
      "grad_norm": 0.04806916415691376,
      "learning_rate": 3.0170216255320262e-05,
      "loss": 0.2278,
      "step": 3928
    },
    {
      "epoch": 3.730595774982198,
      "grad_norm": 0.04851704090833664,
      "learning_rate": 3.0127516912402142e-05,
      "loss": 0.2524,
      "step": 3929
    },
    {
      "epoch": 3.731545217184904,
      "grad_norm": 0.05547931790351868,
      "learning_rate": 3.0084842447122864e-05,
      "loss": 0.2621,
      "step": 3930
    },
    {
      "epoch": 3.73249465938761,
      "grad_norm": 0.05749111995100975,
      "learning_rate": 3.0042192874676365e-05,
      "loss": 0.2245,
      "step": 3931
    },
    {
      "epoch": 3.7334441015903157,
      "grad_norm": 0.045757438987493515,
      "learning_rate": 2.999956821024783e-05,
      "loss": 0.2525,
      "step": 3932
    },
    {
      "epoch": 3.7343935437930216,
      "grad_norm": 0.05155427008867264,
      "learning_rate": 2.9956968469013368e-05,
      "loss": 0.2581,
      "step": 3933
    },
    {
      "epoch": 3.7353429859957274,
      "grad_norm": 0.0530339851975441,
      "learning_rate": 2.991439366614043e-05,
      "loss": 0.2489,
      "step": 3934
    },
    {
      "epoch": 3.7362924281984333,
      "grad_norm": 0.04974301531910896,
      "learning_rate": 2.987184381678747e-05,
      "loss": 0.2487,
      "step": 3935
    },
    {
      "epoch": 3.737241870401139,
      "grad_norm": 0.06348806619644165,
      "learning_rate": 2.9829318936104044e-05,
      "loss": 0.2383,
      "step": 3936
    },
    {
      "epoch": 3.7381913126038455,
      "grad_norm": 0.06022779271006584,
      "learning_rate": 2.978681903923095e-05,
      "loss": 0.2293,
      "step": 3937
    },
    {
      "epoch": 3.7391407548065514,
      "grad_norm": 0.10987991839647293,
      "learning_rate": 2.9744344141299884e-05,
      "loss": 0.2495,
      "step": 3938
    },
    {
      "epoch": 3.7400901970092573,
      "grad_norm": 0.04573450982570648,
      "learning_rate": 2.9701894257433826e-05,
      "loss": 0.2496,
      "step": 3939
    },
    {
      "epoch": 3.741039639211963,
      "grad_norm": 0.0443129725754261,
      "learning_rate": 2.9659469402746777e-05,
      "loss": 0.2487,
      "step": 3940
    },
    {
      "epoch": 3.741989081414669,
      "grad_norm": 0.05262639373540878,
      "learning_rate": 2.9617069592343804e-05,
      "loss": 0.2552,
      "step": 3941
    },
    {
      "epoch": 3.742938523617375,
      "grad_norm": 0.04952634498476982,
      "learning_rate": 2.9574694841321082e-05,
      "loss": 0.2432,
      "step": 3942
    },
    {
      "epoch": 3.7438879658200808,
      "grad_norm": 0.05165358632802963,
      "learning_rate": 2.953234516476584e-05,
      "loss": 0.2466,
      "step": 3943
    },
    {
      "epoch": 3.7448374080227866,
      "grad_norm": 0.05307038128376007,
      "learning_rate": 2.9490020577756473e-05,
      "loss": 0.2457,
      "step": 3944
    },
    {
      "epoch": 3.7457868502254925,
      "grad_norm": 0.0756666511297226,
      "learning_rate": 2.9447721095362324e-05,
      "loss": 0.2401,
      "step": 3945
    },
    {
      "epoch": 3.7467362924281984,
      "grad_norm": 0.11313078552484512,
      "learning_rate": 2.940544673264385e-05,
      "loss": 0.2345,
      "step": 3946
    },
    {
      "epoch": 3.7476857346309043,
      "grad_norm": 0.0538734532892704,
      "learning_rate": 2.9363197504652573e-05,
      "loss": 0.2535,
      "step": 3947
    },
    {
      "epoch": 3.74863517683361,
      "grad_norm": 0.06038649380207062,
      "learning_rate": 2.932097342643103e-05,
      "loss": 0.2531,
      "step": 3948
    },
    {
      "epoch": 3.749584619036316,
      "grad_norm": 0.05886239930987358,
      "learning_rate": 2.927877451301282e-05,
      "loss": 0.2583,
      "step": 3949
    },
    {
      "epoch": 3.750534061239022,
      "grad_norm": 0.05016456916928291,
      "learning_rate": 2.9236600779422673e-05,
      "loss": 0.2526,
      "step": 3950
    },
    {
      "epoch": 3.7514835034417278,
      "grad_norm": 0.04979074373841286,
      "learning_rate": 2.919445224067614e-05,
      "loss": 0.2468,
      "step": 3951
    },
    {
      "epoch": 3.7524329456444336,
      "grad_norm": 0.05229020491242409,
      "learning_rate": 2.9152328911780026e-05,
      "loss": 0.2526,
      "step": 3952
    },
    {
      "epoch": 3.75338238784714,
      "grad_norm": 0.055056676268577576,
      "learning_rate": 2.911023080773204e-05,
      "loss": 0.2489,
      "step": 3953
    },
    {
      "epoch": 3.754331830049846,
      "grad_norm": 0.053240686655044556,
      "learning_rate": 2.9068157943520903e-05,
      "loss": 0.2546,
      "step": 3954
    },
    {
      "epoch": 3.7552812722525517,
      "grad_norm": 0.05663219094276428,
      "learning_rate": 2.902611033412648e-05,
      "loss": 0.2471,
      "step": 3955
    },
    {
      "epoch": 3.7562307144552576,
      "grad_norm": 0.05138954147696495,
      "learning_rate": 2.8984087994519405e-05,
      "loss": 0.2511,
      "step": 3956
    },
    {
      "epoch": 3.7571801566579635,
      "grad_norm": 0.05986800789833069,
      "learning_rate": 2.894209093966157e-05,
      "loss": 0.2565,
      "step": 3957
    },
    {
      "epoch": 3.7581295988606693,
      "grad_norm": 0.04984092339873314,
      "learning_rate": 2.8900119184505704e-05,
      "loss": 0.2565,
      "step": 3958
    },
    {
      "epoch": 3.759079041063375,
      "grad_norm": 0.06115711107850075,
      "learning_rate": 2.8858172743995547e-05,
      "loss": 0.2378,
      "step": 3959
    },
    {
      "epoch": 3.760028483266081,
      "grad_norm": 0.10267126560211182,
      "learning_rate": 2.881625163306596e-05,
      "loss": 0.2467,
      "step": 3960
    },
    {
      "epoch": 3.760977925468787,
      "grad_norm": 0.05122007802128792,
      "learning_rate": 2.8774355866642543e-05,
      "loss": 0.2565,
      "step": 3961
    },
    {
      "epoch": 3.7619273676714933,
      "grad_norm": 0.0978183001279831,
      "learning_rate": 2.87324854596421e-05,
      "loss": 0.247,
      "step": 3962
    },
    {
      "epoch": 3.762876809874199,
      "grad_norm": 0.04869680851697922,
      "learning_rate": 2.8690640426972292e-05,
      "loss": 0.2497,
      "step": 3963
    },
    {
      "epoch": 3.763826252076905,
      "grad_norm": 0.06409458816051483,
      "learning_rate": 2.864882078353176e-05,
      "loss": 0.2604,
      "step": 3964
    },
    {
      "epoch": 3.764775694279611,
      "grad_norm": 0.055247921496629715,
      "learning_rate": 2.8607026544210114e-05,
      "loss": 0.2536,
      "step": 3965
    },
    {
      "epoch": 3.7657251364823168,
      "grad_norm": 0.04963747039437294,
      "learning_rate": 2.8565257723887918e-05,
      "loss": 0.246,
      "step": 3966
    },
    {
      "epoch": 3.7666745786850226,
      "grad_norm": 0.04997308924794197,
      "learning_rate": 2.8523514337436663e-05,
      "loss": 0.2582,
      "step": 3967
    },
    {
      "epoch": 3.7676240208877285,
      "grad_norm": 0.057859741151332855,
      "learning_rate": 2.8481796399718874e-05,
      "loss": 0.2591,
      "step": 3968
    },
    {
      "epoch": 3.7685734630904344,
      "grad_norm": 0.05374854803085327,
      "learning_rate": 2.84401039255879e-05,
      "loss": 0.2586,
      "step": 3969
    },
    {
      "epoch": 3.7695229052931403,
      "grad_norm": 0.05135156214237213,
      "learning_rate": 2.8398436929888085e-05,
      "loss": 0.2621,
      "step": 3970
    },
    {
      "epoch": 3.770472347495846,
      "grad_norm": 0.10533904284238815,
      "learning_rate": 2.8356795427454674e-05,
      "loss": 0.2445,
      "step": 3971
    },
    {
      "epoch": 3.771421789698552,
      "grad_norm": 0.04445955157279968,
      "learning_rate": 2.8315179433113847e-05,
      "loss": 0.2421,
      "step": 3972
    },
    {
      "epoch": 3.772371231901258,
      "grad_norm": 0.05651763081550598,
      "learning_rate": 2.8273588961682774e-05,
      "loss": 0.2545,
      "step": 3973
    },
    {
      "epoch": 3.7733206741039638,
      "grad_norm": 0.05163528770208359,
      "learning_rate": 2.8232024027969362e-05,
      "loss": 0.2576,
      "step": 3974
    },
    {
      "epoch": 3.7742701163066696,
      "grad_norm": 0.09719181060791016,
      "learning_rate": 2.8190484646772607e-05,
      "loss": 0.2358,
      "step": 3975
    },
    {
      "epoch": 3.7752195585093755,
      "grad_norm": 0.05339033901691437,
      "learning_rate": 2.8148970832882326e-05,
      "loss": 0.2527,
      "step": 3976
    },
    {
      "epoch": 3.7761690007120814,
      "grad_norm": 0.055794693529605865,
      "learning_rate": 2.8107482601079183e-05,
      "loss": 0.2472,
      "step": 3977
    },
    {
      "epoch": 3.7771184429147877,
      "grad_norm": 0.0768372118473053,
      "learning_rate": 2.8066019966134904e-05,
      "loss": 0.2547,
      "step": 3978
    },
    {
      "epoch": 3.7780678851174936,
      "grad_norm": 0.06772004067897797,
      "learning_rate": 2.8024582942811862e-05,
      "loss": 0.2601,
      "step": 3979
    },
    {
      "epoch": 3.7790173273201995,
      "grad_norm": 0.07262419164180756,
      "learning_rate": 2.798317154586352e-05,
      "loss": 0.2752,
      "step": 3980
    },
    {
      "epoch": 3.7799667695229053,
      "grad_norm": 0.05989028513431549,
      "learning_rate": 2.7941785790034104e-05,
      "loss": 0.2617,
      "step": 3981
    },
    {
      "epoch": 3.780916211725611,
      "grad_norm": 0.058179810643196106,
      "learning_rate": 2.790042569005874e-05,
      "loss": 0.2415,
      "step": 3982
    },
    {
      "epoch": 3.781865653928317,
      "grad_norm": 0.053919147700071335,
      "learning_rate": 2.7859091260663427e-05,
      "loss": 0.2474,
      "step": 3983
    },
    {
      "epoch": 3.782815096131023,
      "grad_norm": 0.04912007227540016,
      "learning_rate": 2.781778251656498e-05,
      "loss": 0.2537,
      "step": 3984
    },
    {
      "epoch": 3.783764538333729,
      "grad_norm": 0.051920775324106216,
      "learning_rate": 2.7776499472471185e-05,
      "loss": 0.2285,
      "step": 3985
    },
    {
      "epoch": 3.7847139805364347,
      "grad_norm": 0.049069683998823166,
      "learning_rate": 2.773524214308054e-05,
      "loss": 0.2546,
      "step": 3986
    },
    {
      "epoch": 3.785663422739141,
      "grad_norm": 0.05164619907736778,
      "learning_rate": 2.7694010543082472e-05,
      "loss": 0.2411,
      "step": 3987
    },
    {
      "epoch": 3.786612864941847,
      "grad_norm": 0.06377508491277695,
      "learning_rate": 2.7652804687157208e-05,
      "loss": 0.2487,
      "step": 3988
    },
    {
      "epoch": 3.7875623071445528,
      "grad_norm": 0.06035701185464859,
      "learning_rate": 2.7611624589975816e-05,
      "loss": 0.2505,
      "step": 3989
    },
    {
      "epoch": 3.7885117493472587,
      "grad_norm": 0.05832657963037491,
      "learning_rate": 2.7570470266200176e-05,
      "loss": 0.24,
      "step": 3990
    },
    {
      "epoch": 3.7894611915499645,
      "grad_norm": 0.04982447996735573,
      "learning_rate": 2.7529341730483117e-05,
      "loss": 0.2523,
      "step": 3991
    },
    {
      "epoch": 3.7904106337526704,
      "grad_norm": 0.050874706357717514,
      "learning_rate": 2.748823899746805e-05,
      "loss": 0.2556,
      "step": 3992
    },
    {
      "epoch": 3.7913600759553763,
      "grad_norm": 0.053797561675310135,
      "learning_rate": 2.7447162081789423e-05,
      "loss": 0.2425,
      "step": 3993
    },
    {
      "epoch": 3.792309518158082,
      "grad_norm": 0.05993964150547981,
      "learning_rate": 2.7406110998072375e-05,
      "loss": 0.2359,
      "step": 3994
    },
    {
      "epoch": 3.793258960360788,
      "grad_norm": 0.06711548566818237,
      "learning_rate": 2.736508576093285e-05,
      "loss": 0.2406,
      "step": 3995
    },
    {
      "epoch": 3.794208402563494,
      "grad_norm": 0.0883718952536583,
      "learning_rate": 2.7324086384977698e-05,
      "loss": 0.2678,
      "step": 3996
    },
    {
      "epoch": 3.7951578447661998,
      "grad_norm": 0.05363830551505089,
      "learning_rate": 2.728311288480436e-05,
      "loss": 0.2518,
      "step": 3997
    },
    {
      "epoch": 3.7961072869689056,
      "grad_norm": 0.044073816388845444,
      "learning_rate": 2.7242165275001273e-05,
      "loss": 0.252,
      "step": 3998
    },
    {
      "epoch": 3.7970567291716115,
      "grad_norm": 0.055176567286252975,
      "learning_rate": 2.720124357014754e-05,
      "loss": 0.2492,
      "step": 3999
    },
    {
      "epoch": 3.7980061713743174,
      "grad_norm": 0.055266376584768295,
      "learning_rate": 2.716034778481301e-05,
      "loss": 0.2626,
      "step": 4000
    },
    {
      "epoch": 3.7980061713743174,
      "eval_loss": 0.25707772374153137,
      "eval_runtime": 37.9163,
      "eval_samples_per_second": 2.268,
      "eval_steps_per_second": 2.268,
      "step": 4000
    },
    {
      "epoch": 3.7989556135770233,
      "grad_norm": 0.05252851918339729,
      "learning_rate": 2.7119477933558478e-05,
      "loss": 0.2514,
      "step": 4001
    },
    {
      "epoch": 3.799905055779729,
      "grad_norm": 0.052902501076459885,
      "learning_rate": 2.7078634030935258e-05,
      "loss": 0.2384,
      "step": 4002
    },
    {
      "epoch": 3.8008544979824355,
      "grad_norm": 0.07614622265100479,
      "learning_rate": 2.7037816091485668e-05,
      "loss": 0.2608,
      "step": 4003
    },
    {
      "epoch": 3.8018039401851413,
      "grad_norm": 0.14443829655647278,
      "learning_rate": 2.6997024129742542e-05,
      "loss": 0.2511,
      "step": 4004
    },
    {
      "epoch": 3.802753382387847,
      "grad_norm": 0.047908131033182144,
      "learning_rate": 2.6956258160229695e-05,
      "loss": 0.2458,
      "step": 4005
    },
    {
      "epoch": 3.803702824590553,
      "grad_norm": 0.04549311101436615,
      "learning_rate": 2.6915518197461553e-05,
      "loss": 0.2503,
      "step": 4006
    },
    {
      "epoch": 3.804652266793259,
      "grad_norm": 0.06348168849945068,
      "learning_rate": 2.6874804255943297e-05,
      "loss": 0.2641,
      "step": 4007
    },
    {
      "epoch": 3.805601708995965,
      "grad_norm": 0.060389503836631775,
      "learning_rate": 2.683411635017087e-05,
      "loss": 0.2549,
      "step": 4008
    },
    {
      "epoch": 3.8065511511986707,
      "grad_norm": 0.16582825779914856,
      "learning_rate": 2.6793454494630888e-05,
      "loss": 0.2791,
      "step": 4009
    },
    {
      "epoch": 3.8075005934013766,
      "grad_norm": 0.05757433548569679,
      "learning_rate": 2.675281870380082e-05,
      "loss": 0.2486,
      "step": 4010
    },
    {
      "epoch": 3.8084500356040825,
      "grad_norm": 0.06793633848428726,
      "learning_rate": 2.6712208992148736e-05,
      "loss": 0.2618,
      "step": 4011
    },
    {
      "epoch": 3.809399477806789,
      "grad_norm": 0.06024308502674103,
      "learning_rate": 2.6671625374133445e-05,
      "loss": 0.2603,
      "step": 4012
    },
    {
      "epoch": 3.8103489200094947,
      "grad_norm": 0.05375480651855469,
      "learning_rate": 2.6631067864204497e-05,
      "loss": 0.2551,
      "step": 4013
    },
    {
      "epoch": 3.8112983622122005,
      "grad_norm": 0.07164514809846878,
      "learning_rate": 2.6590536476802118e-05,
      "loss": 0.2615,
      "step": 4014
    },
    {
      "epoch": 3.8122478044149064,
      "grad_norm": 0.06168850138783455,
      "learning_rate": 2.65500312263572e-05,
      "loss": 0.2535,
      "step": 4015
    },
    {
      "epoch": 3.8131972466176123,
      "grad_norm": 0.05497964471578598,
      "learning_rate": 2.6509552127291447e-05,
      "loss": 0.2468,
      "step": 4016
    },
    {
      "epoch": 3.814146688820318,
      "grad_norm": 0.06022312492132187,
      "learning_rate": 2.6469099194017143e-05,
      "loss": 0.2685,
      "step": 4017
    },
    {
      "epoch": 3.815096131023024,
      "grad_norm": 0.05725991725921631,
      "learning_rate": 2.6428672440937285e-05,
      "loss": 0.2555,
      "step": 4018
    },
    {
      "epoch": 3.81604557322573,
      "grad_norm": 0.05475730448961258,
      "learning_rate": 2.638827188244556e-05,
      "loss": 0.2643,
      "step": 4019
    },
    {
      "epoch": 3.8169950154284358,
      "grad_norm": 0.05317939445376396,
      "learning_rate": 2.634789753292629e-05,
      "loss": 0.2503,
      "step": 4020
    },
    {
      "epoch": 3.8179444576311417,
      "grad_norm": 0.0483684316277504,
      "learning_rate": 2.6307549406754585e-05,
      "loss": 0.2553,
      "step": 4021
    },
    {
      "epoch": 3.8188938998338475,
      "grad_norm": 0.045412421226501465,
      "learning_rate": 2.626722751829601e-05,
      "loss": 0.243,
      "step": 4022
    },
    {
      "epoch": 3.8198433420365534,
      "grad_norm": 0.05453164875507355,
      "learning_rate": 2.622693188190699e-05,
      "loss": 0.2579,
      "step": 4023
    },
    {
      "epoch": 3.8207927842392593,
      "grad_norm": 0.05538022145628929,
      "learning_rate": 2.6186662511934513e-05,
      "loss": 0.2515,
      "step": 4024
    },
    {
      "epoch": 3.821742226441965,
      "grad_norm": 0.06117261201143265,
      "learning_rate": 2.6146419422716173e-05,
      "loss": 0.2556,
      "step": 4025
    },
    {
      "epoch": 3.822691668644671,
      "grad_norm": 0.06717471033334732,
      "learning_rate": 2.6106202628580355e-05,
      "loss": 0.2536,
      "step": 4026
    },
    {
      "epoch": 3.823641110847377,
      "grad_norm": 0.05201391875743866,
      "learning_rate": 2.6066012143845876e-05,
      "loss": 0.2442,
      "step": 4027
    },
    {
      "epoch": 3.824590553050083,
      "grad_norm": 0.04769446328282356,
      "learning_rate": 2.602584798282237e-05,
      "loss": 0.2501,
      "step": 4028
    },
    {
      "epoch": 3.825539995252789,
      "grad_norm": 0.05002529174089432,
      "learning_rate": 2.5985710159809996e-05,
      "loss": 0.2557,
      "step": 4029
    },
    {
      "epoch": 3.826489437455495,
      "grad_norm": 0.1064804345369339,
      "learning_rate": 2.594559868909956e-05,
      "loss": 0.2545,
      "step": 4030
    },
    {
      "epoch": 3.827438879658201,
      "grad_norm": 0.04813575744628906,
      "learning_rate": 2.5905513584972487e-05,
      "loss": 0.2421,
      "step": 4031
    },
    {
      "epoch": 3.8283883218609067,
      "grad_norm": 0.04889265075325966,
      "learning_rate": 2.58654548617008e-05,
      "loss": 0.2497,
      "step": 4032
    },
    {
      "epoch": 3.8293377640636126,
      "grad_norm": 0.06219366192817688,
      "learning_rate": 2.5825422533547184e-05,
      "loss": 0.2386,
      "step": 4033
    },
    {
      "epoch": 3.8302872062663185,
      "grad_norm": 0.06098899245262146,
      "learning_rate": 2.5785416614764867e-05,
      "loss": 0.2557,
      "step": 4034
    },
    {
      "epoch": 3.8312366484690243,
      "grad_norm": 0.04500975087285042,
      "learning_rate": 2.5745437119597705e-05,
      "loss": 0.2516,
      "step": 4035
    },
    {
      "epoch": 3.83218609067173,
      "grad_norm": 0.048414573073387146,
      "learning_rate": 2.5705484062280106e-05,
      "loss": 0.2564,
      "step": 4036
    },
    {
      "epoch": 3.8331355328744365,
      "grad_norm": 0.057486824691295624,
      "learning_rate": 2.5665557457037128e-05,
      "loss": 0.254,
      "step": 4037
    },
    {
      "epoch": 3.8340849750771424,
      "grad_norm": 0.06463531404733658,
      "learning_rate": 2.5625657318084318e-05,
      "loss": 0.276,
      "step": 4038
    },
    {
      "epoch": 3.8350344172798483,
      "grad_norm": 0.048133254051208496,
      "learning_rate": 2.558578365962796e-05,
      "loss": 0.2585,
      "step": 4039
    },
    {
      "epoch": 3.835983859482554,
      "grad_norm": 0.06365126371383667,
      "learning_rate": 2.5545936495864686e-05,
      "loss": 0.249,
      "step": 4040
    },
    {
      "epoch": 3.83693330168526,
      "grad_norm": 0.055381596088409424,
      "learning_rate": 2.5506115840981904e-05,
      "loss": 0.2498,
      "step": 4041
    },
    {
      "epoch": 3.837882743887966,
      "grad_norm": 0.07869044691324234,
      "learning_rate": 2.5466321709157482e-05,
      "loss": 0.2244,
      "step": 4042
    },
    {
      "epoch": 3.838832186090672,
      "grad_norm": 0.05803418159484863,
      "learning_rate": 2.542655411455982e-05,
      "loss": 0.2711,
      "step": 4043
    },
    {
      "epoch": 3.8397816282933777,
      "grad_norm": 0.05418579280376434,
      "learning_rate": 2.5386813071347992e-05,
      "loss": 0.2653,
      "step": 4044
    },
    {
      "epoch": 3.8407310704960835,
      "grad_norm": 0.06050990894436836,
      "learning_rate": 2.5347098593671414e-05,
      "loss": 0.2497,
      "step": 4045
    },
    {
      "epoch": 3.8416805126987894,
      "grad_norm": 0.05278535932302475,
      "learning_rate": 2.5307410695670275e-05,
      "loss": 0.2542,
      "step": 4046
    },
    {
      "epoch": 3.8426299549014953,
      "grad_norm": 0.05332508683204651,
      "learning_rate": 2.5267749391475148e-05,
      "loss": 0.259,
      "step": 4047
    },
    {
      "epoch": 3.843579397104201,
      "grad_norm": 0.051350705325603485,
      "learning_rate": 2.5228114695207172e-05,
      "loss": 0.2501,
      "step": 4048
    },
    {
      "epoch": 3.844528839306907,
      "grad_norm": 0.05481059476733208,
      "learning_rate": 2.5188506620978025e-05,
      "loss": 0.2639,
      "step": 4049
    },
    {
      "epoch": 3.845478281509613,
      "grad_norm": 0.06032145023345947,
      "learning_rate": 2.514892518288988e-05,
      "loss": 0.2499,
      "step": 4050
    },
    {
      "epoch": 3.846427723712319,
      "grad_norm": 0.051769085228443146,
      "learning_rate": 2.5109370395035514e-05,
      "loss": 0.2352,
      "step": 4051
    },
    {
      "epoch": 3.8473771659150247,
      "grad_norm": 0.05359676852822304,
      "learning_rate": 2.5069842271498102e-05,
      "loss": 0.2507,
      "step": 4052
    },
    {
      "epoch": 3.848326608117731,
      "grad_norm": 0.07494449615478516,
      "learning_rate": 2.5030340826351373e-05,
      "loss": 0.2712,
      "step": 4053
    },
    {
      "epoch": 3.849276050320437,
      "grad_norm": 0.05114074423909187,
      "learning_rate": 2.499086607365957e-05,
      "loss": 0.249,
      "step": 4054
    },
    {
      "epoch": 3.8502254925231427,
      "grad_norm": 0.054538123309612274,
      "learning_rate": 2.4951418027477402e-05,
      "loss": 0.2466,
      "step": 4055
    },
    {
      "epoch": 3.8511749347258486,
      "grad_norm": 0.04820817708969116,
      "learning_rate": 2.491199670185008e-05,
      "loss": 0.2512,
      "step": 4056
    },
    {
      "epoch": 3.8521243769285545,
      "grad_norm": 0.049865830689668655,
      "learning_rate": 2.4872602110813348e-05,
      "loss": 0.253,
      "step": 4057
    },
    {
      "epoch": 3.8530738191312603,
      "grad_norm": 0.0494709387421608,
      "learning_rate": 2.4833234268393378e-05,
      "loss": 0.2594,
      "step": 4058
    },
    {
      "epoch": 3.854023261333966,
      "grad_norm": 0.052232518792152405,
      "learning_rate": 2.479389318860682e-05,
      "loss": 0.2581,
      "step": 4059
    },
    {
      "epoch": 3.854972703536672,
      "grad_norm": 0.05848172679543495,
      "learning_rate": 2.475457888546081e-05,
      "loss": 0.2761,
      "step": 4060
    },
    {
      "epoch": 3.855922145739378,
      "grad_norm": 0.05332505330443382,
      "learning_rate": 2.471529137295292e-05,
      "loss": 0.2544,
      "step": 4061
    },
    {
      "epoch": 3.8568715879420843,
      "grad_norm": 0.05203618109226227,
      "learning_rate": 2.467603066507129e-05,
      "loss": 0.2569,
      "step": 4062
    },
    {
      "epoch": 3.85782103014479,
      "grad_norm": 0.06011023744940758,
      "learning_rate": 2.4636796775794336e-05,
      "loss": 0.2516,
      "step": 4063
    },
    {
      "epoch": 3.858770472347496,
      "grad_norm": 0.05454760044813156,
      "learning_rate": 2.4597589719091107e-05,
      "loss": 0.2633,
      "step": 4064
    },
    {
      "epoch": 3.859719914550202,
      "grad_norm": 0.04533764719963074,
      "learning_rate": 2.4558409508920986e-05,
      "loss": 0.2542,
      "step": 4065
    },
    {
      "epoch": 3.860669356752908,
      "grad_norm": 0.05031617358326912,
      "learning_rate": 2.4519256159233795e-05,
      "loss": 0.252,
      "step": 4066
    },
    {
      "epoch": 3.8616187989556137,
      "grad_norm": 0.05697787180542946,
      "learning_rate": 2.4480129683969932e-05,
      "loss": 0.2593,
      "step": 4067
    },
    {
      "epoch": 3.8625682411583195,
      "grad_norm": 0.0841057151556015,
      "learning_rate": 2.444103009705999e-05,
      "loss": 0.233,
      "step": 4068
    },
    {
      "epoch": 3.8635176833610254,
      "grad_norm": 0.08296506106853485,
      "learning_rate": 2.4401957412425214e-05,
      "loss": 0.2376,
      "step": 4069
    },
    {
      "epoch": 3.8644671255637313,
      "grad_norm": 0.055589281022548676,
      "learning_rate": 2.4362911643977147e-05,
      "loss": 0.2404,
      "step": 4070
    },
    {
      "epoch": 3.865416567766437,
      "grad_norm": 0.058519408106803894,
      "learning_rate": 2.4323892805617777e-05,
      "loss": 0.2535,
      "step": 4071
    },
    {
      "epoch": 3.866366009969143,
      "grad_norm": 0.04924516752362251,
      "learning_rate": 2.4284900911239517e-05,
      "loss": 0.2528,
      "step": 4072
    },
    {
      "epoch": 3.867315452171849,
      "grad_norm": 0.05597160384058952,
      "learning_rate": 2.424593597472512e-05,
      "loss": 0.2488,
      "step": 4073
    },
    {
      "epoch": 3.868264894374555,
      "grad_norm": 0.058191198855638504,
      "learning_rate": 2.420699800994787e-05,
      "loss": 0.2483,
      "step": 4074
    },
    {
      "epoch": 3.8692143365772607,
      "grad_norm": 0.07110270857810974,
      "learning_rate": 2.4168087030771346e-05,
      "loss": 0.2675,
      "step": 4075
    },
    {
      "epoch": 3.8701637787799665,
      "grad_norm": 0.06401795893907547,
      "learning_rate": 2.4129203051049555e-05,
      "loss": 0.2556,
      "step": 4076
    },
    {
      "epoch": 3.8711132209826724,
      "grad_norm": 0.047086797654628754,
      "learning_rate": 2.409034608462686e-05,
      "loss": 0.25,
      "step": 4077
    },
    {
      "epoch": 3.8720626631853787,
      "grad_norm": 0.05323222279548645,
      "learning_rate": 2.405151614533804e-05,
      "loss": 0.2477,
      "step": 4078
    },
    {
      "epoch": 3.8730121053880846,
      "grad_norm": 0.051141295582056046,
      "learning_rate": 2.401271324700821e-05,
      "loss": 0.2563,
      "step": 4079
    },
    {
      "epoch": 3.8739615475907905,
      "grad_norm": 0.05030396580696106,
      "learning_rate": 2.3973937403452983e-05,
      "loss": 0.2639,
      "step": 4080
    },
    {
      "epoch": 3.8749109897934964,
      "grad_norm": 0.058408744633197784,
      "learning_rate": 2.3935188628478123e-05,
      "loss": 0.2501,
      "step": 4081
    },
    {
      "epoch": 3.8758604319962022,
      "grad_norm": 0.055497270077466965,
      "learning_rate": 2.389646693587996e-05,
      "loss": 0.2524,
      "step": 4082
    },
    {
      "epoch": 3.876809874198908,
      "grad_norm": 0.05612848326563835,
      "learning_rate": 2.3857772339445063e-05,
      "loss": 0.2589,
      "step": 4083
    },
    {
      "epoch": 3.877759316401614,
      "grad_norm": 0.0453292652964592,
      "learning_rate": 2.3819104852950368e-05,
      "loss": 0.2514,
      "step": 4084
    },
    {
      "epoch": 3.87870875860432,
      "grad_norm": 0.059018030762672424,
      "learning_rate": 2.3780464490163267e-05,
      "loss": 0.2481,
      "step": 4085
    },
    {
      "epoch": 3.8796582008070257,
      "grad_norm": 0.05215967819094658,
      "learning_rate": 2.3741851264841297e-05,
      "loss": 0.2569,
      "step": 4086
    },
    {
      "epoch": 3.880607643009732,
      "grad_norm": 0.06927253305912018,
      "learning_rate": 2.3703265190732526e-05,
      "loss": 0.2708,
      "step": 4087
    },
    {
      "epoch": 3.881557085212438,
      "grad_norm": 0.05080414563417435,
      "learning_rate": 2.3664706281575233e-05,
      "loss": 0.2596,
      "step": 4088
    },
    {
      "epoch": 3.882506527415144,
      "grad_norm": 0.04832320287823677,
      "learning_rate": 2.3626174551098046e-05,
      "loss": 0.2498,
      "step": 4089
    },
    {
      "epoch": 3.8834559696178497,
      "grad_norm": 0.059689346700906754,
      "learning_rate": 2.3587670013020024e-05,
      "loss": 0.235,
      "step": 4090
    },
    {
      "epoch": 3.8844054118205555,
      "grad_norm": 0.049732983112335205,
      "learning_rate": 2.3549192681050336e-05,
      "loss": 0.24,
      "step": 4091
    },
    {
      "epoch": 3.8853548540232614,
      "grad_norm": 0.05987070873379707,
      "learning_rate": 2.3510742568888656e-05,
      "loss": 0.2525,
      "step": 4092
    },
    {
      "epoch": 3.8863042962259673,
      "grad_norm": 0.05327129364013672,
      "learning_rate": 2.3472319690224886e-05,
      "loss": 0.2534,
      "step": 4093
    },
    {
      "epoch": 3.887253738428673,
      "grad_norm": 0.059572651982307434,
      "learning_rate": 2.3433924058739233e-05,
      "loss": 0.2512,
      "step": 4094
    },
    {
      "epoch": 3.888203180631379,
      "grad_norm": 0.06076107546687126,
      "learning_rate": 2.339555568810221e-05,
      "loss": 0.2539,
      "step": 4095
    },
    {
      "epoch": 3.889152622834085,
      "grad_norm": 0.04622993990778923,
      "learning_rate": 2.335721459197462e-05,
      "loss": 0.2533,
      "step": 4096
    },
    {
      "epoch": 3.890102065036791,
      "grad_norm": 0.05739247426390648,
      "learning_rate": 2.3318900784007524e-05,
      "loss": 0.2599,
      "step": 4097
    },
    {
      "epoch": 3.8910515072394967,
      "grad_norm": 0.0760936364531517,
      "learning_rate": 2.3280614277842382e-05,
      "loss": 0.2499,
      "step": 4098
    },
    {
      "epoch": 3.8920009494422025,
      "grad_norm": 0.053201522678136826,
      "learning_rate": 2.32423550871108e-05,
      "loss": 0.2489,
      "step": 4099
    },
    {
      "epoch": 3.8929503916449084,
      "grad_norm": 0.06175504997372627,
      "learning_rate": 2.3204123225434715e-05,
      "loss": 0.2738,
      "step": 4100
    },
    {
      "epoch": 3.8938998338476143,
      "grad_norm": 0.05046062543988228,
      "learning_rate": 2.316591870642635e-05,
      "loss": 0.2543,
      "step": 4101
    },
    {
      "epoch": 3.89484927605032,
      "grad_norm": 0.14622515439987183,
      "learning_rate": 2.312774154368812e-05,
      "loss": 0.2286,
      "step": 4102
    },
    {
      "epoch": 3.8957987182530265,
      "grad_norm": 0.04975195601582527,
      "learning_rate": 2.3089591750812846e-05,
      "loss": 0.2512,
      "step": 4103
    },
    {
      "epoch": 3.8967481604557324,
      "grad_norm": 0.04820247367024422,
      "learning_rate": 2.3051469341383402e-05,
      "loss": 0.2512,
      "step": 4104
    },
    {
      "epoch": 3.8976976026584382,
      "grad_norm": 0.07370350509881973,
      "learning_rate": 2.3013374328973114e-05,
      "loss": 0.246,
      "step": 4105
    },
    {
      "epoch": 3.898647044861144,
      "grad_norm": 0.054529186338186264,
      "learning_rate": 2.2975306727145418e-05,
      "loss": 0.2547,
      "step": 4106
    },
    {
      "epoch": 3.89959648706385,
      "grad_norm": 0.05046022683382034,
      "learning_rate": 2.293726654945402e-05,
      "loss": 0.2458,
      "step": 4107
    },
    {
      "epoch": 3.900545929266556,
      "grad_norm": 0.057758230715990067,
      "learning_rate": 2.2899253809442944e-05,
      "loss": 0.2657,
      "step": 4108
    },
    {
      "epoch": 3.9014953714692617,
      "grad_norm": 0.09853757172822952,
      "learning_rate": 2.2861268520646274e-05,
      "loss": 0.235,
      "step": 4109
    },
    {
      "epoch": 3.9024448136719676,
      "grad_norm": 0.05186276137828827,
      "learning_rate": 2.2823310696588494e-05,
      "loss": 0.2539,
      "step": 4110
    },
    {
      "epoch": 3.9033942558746735,
      "grad_norm": 0.053207337856292725,
      "learning_rate": 2.2785380350784237e-05,
      "loss": 0.2451,
      "step": 4111
    },
    {
      "epoch": 3.90434369807738,
      "grad_norm": 0.07637642323970795,
      "learning_rate": 2.2747477496738334e-05,
      "loss": 0.2699,
      "step": 4112
    },
    {
      "epoch": 3.9052931402800857,
      "grad_norm": 0.10490339994430542,
      "learning_rate": 2.270960214794584e-05,
      "loss": 0.2484,
      "step": 4113
    },
    {
      "epoch": 3.9062425824827915,
      "grad_norm": 0.07953619956970215,
      "learning_rate": 2.2671754317892013e-05,
      "loss": 0.2624,
      "step": 4114
    },
    {
      "epoch": 3.9071920246854974,
      "grad_norm": 0.05667957291007042,
      "learning_rate": 2.2633934020052383e-05,
      "loss": 0.2618,
      "step": 4115
    },
    {
      "epoch": 3.9081414668882033,
      "grad_norm": 0.054316237568855286,
      "learning_rate": 2.2596141267892568e-05,
      "loss": 0.2555,
      "step": 4116
    },
    {
      "epoch": 3.909090909090909,
      "grad_norm": 0.04946750029921532,
      "learning_rate": 2.2558376074868448e-05,
      "loss": 0.2478,
      "step": 4117
    },
    {
      "epoch": 3.910040351293615,
      "grad_norm": 0.06284037977457047,
      "learning_rate": 2.2520638454426068e-05,
      "loss": 0.2634,
      "step": 4118
    },
    {
      "epoch": 3.910989793496321,
      "grad_norm": 0.07433723658323288,
      "learning_rate": 2.2482928420001657e-05,
      "loss": 0.2628,
      "step": 4119
    },
    {
      "epoch": 3.911939235699027,
      "grad_norm": 0.07572853565216064,
      "learning_rate": 2.2445245985021614e-05,
      "loss": 0.237,
      "step": 4120
    },
    {
      "epoch": 3.9128886779017327,
      "grad_norm": 0.0872272476553917,
      "learning_rate": 2.2407591162902573e-05,
      "loss": 0.2529,
      "step": 4121
    },
    {
      "epoch": 3.9138381201044385,
      "grad_norm": 0.05421094223856926,
      "learning_rate": 2.23699639670512e-05,
      "loss": 0.2445,
      "step": 4122
    },
    {
      "epoch": 3.9147875623071444,
      "grad_norm": 0.05467075854539871,
      "learning_rate": 2.2332364410864493e-05,
      "loss": 0.2589,
      "step": 4123
    },
    {
      "epoch": 3.9157370045098503,
      "grad_norm": 0.10132791101932526,
      "learning_rate": 2.229479250772949e-05,
      "loss": 0.2544,
      "step": 4124
    },
    {
      "epoch": 3.916686446712556,
      "grad_norm": 0.06448742747306824,
      "learning_rate": 2.2257248271023423e-05,
      "loss": 0.254,
      "step": 4125
    },
    {
      "epoch": 3.917635888915262,
      "grad_norm": 0.06296240538358688,
      "learning_rate": 2.221973171411367e-05,
      "loss": 0.2405,
      "step": 4126
    },
    {
      "epoch": 3.918585331117968,
      "grad_norm": 0.062131330370903015,
      "learning_rate": 2.218224285035774e-05,
      "loss": 0.2573,
      "step": 4127
    },
    {
      "epoch": 3.9195347733206742,
      "grad_norm": 0.07281263172626495,
      "learning_rate": 2.2144781693103357e-05,
      "loss": 0.2509,
      "step": 4128
    },
    {
      "epoch": 3.92048421552338,
      "grad_norm": 0.05253308266401291,
      "learning_rate": 2.210734825568822e-05,
      "loss": 0.2506,
      "step": 4129
    },
    {
      "epoch": 3.921433657726086,
      "grad_norm": 0.05333153158426285,
      "learning_rate": 2.2069942551440358e-05,
      "loss": 0.2614,
      "step": 4130
    },
    {
      "epoch": 3.922383099928792,
      "grad_norm": 0.055194880813360214,
      "learning_rate": 2.2032564593677774e-05,
      "loss": 0.2503,
      "step": 4131
    },
    {
      "epoch": 3.9233325421314977,
      "grad_norm": 0.05564767122268677,
      "learning_rate": 2.199521439570863e-05,
      "loss": 0.2464,
      "step": 4132
    },
    {
      "epoch": 3.9242819843342036,
      "grad_norm": 0.05214362591505051,
      "learning_rate": 2.1957891970831302e-05,
      "loss": 0.2367,
      "step": 4133
    },
    {
      "epoch": 3.9252314265369095,
      "grad_norm": 0.06188051030039787,
      "learning_rate": 2.192059733233408e-05,
      "loss": 0.247,
      "step": 4134
    },
    {
      "epoch": 3.9261808687396154,
      "grad_norm": 0.0542658269405365,
      "learning_rate": 2.1883330493495557e-05,
      "loss": 0.2525,
      "step": 4135
    },
    {
      "epoch": 3.9271303109423212,
      "grad_norm": 0.05622916668653488,
      "learning_rate": 2.1846091467584318e-05,
      "loss": 0.2442,
      "step": 4136
    },
    {
      "epoch": 3.9280797531450276,
      "grad_norm": 0.05223899707198143,
      "learning_rate": 2.1808880267859078e-05,
      "loss": 0.2414,
      "step": 4137
    },
    {
      "epoch": 3.9290291953477334,
      "grad_norm": 0.051392000168561935,
      "learning_rate": 2.177169690756864e-05,
      "loss": 0.2443,
      "step": 4138
    },
    {
      "epoch": 3.9299786375504393,
      "grad_norm": 0.053416091948747635,
      "learning_rate": 2.1734541399951857e-05,
      "loss": 0.2491,
      "step": 4139
    },
    {
      "epoch": 3.930928079753145,
      "grad_norm": 0.055155426263809204,
      "learning_rate": 2.1697413758237784e-05,
      "loss": 0.2528,
      "step": 4140
    },
    {
      "epoch": 3.931877521955851,
      "grad_norm": 0.0750957801938057,
      "learning_rate": 2.166031399564542e-05,
      "loss": 0.2482,
      "step": 4141
    },
    {
      "epoch": 3.932826964158557,
      "grad_norm": 0.049310583621263504,
      "learning_rate": 2.1623242125383903e-05,
      "loss": 0.249,
      "step": 4142
    },
    {
      "epoch": 3.933776406361263,
      "grad_norm": 0.054114725440740585,
      "learning_rate": 2.1586198160652427e-05,
      "loss": 0.2475,
      "step": 4143
    },
    {
      "epoch": 3.9347258485639687,
      "grad_norm": 0.05737404525279999,
      "learning_rate": 2.1549182114640252e-05,
      "loss": 0.2535,
      "step": 4144
    },
    {
      "epoch": 3.9356752907666746,
      "grad_norm": 0.0634528249502182,
      "learning_rate": 2.1512194000526676e-05,
      "loss": 0.2156,
      "step": 4145
    },
    {
      "epoch": 3.9366247329693804,
      "grad_norm": 0.07257208228111267,
      "learning_rate": 2.1475233831481122e-05,
      "loss": 0.2578,
      "step": 4146
    },
    {
      "epoch": 3.9375741751720863,
      "grad_norm": 0.056972626596689224,
      "learning_rate": 2.1438301620662993e-05,
      "loss": 0.2551,
      "step": 4147
    },
    {
      "epoch": 3.938523617374792,
      "grad_norm": 0.05729486420750618,
      "learning_rate": 2.1401397381221767e-05,
      "loss": 0.2744,
      "step": 4148
    },
    {
      "epoch": 3.939473059577498,
      "grad_norm": 0.05070596560835838,
      "learning_rate": 2.136452112629693e-05,
      "loss": 0.2602,
      "step": 4149
    },
    {
      "epoch": 3.940422501780204,
      "grad_norm": 0.046729620546102524,
      "learning_rate": 2.1327672869018032e-05,
      "loss": 0.2518,
      "step": 4150
    },
    {
      "epoch": 3.94137194398291,
      "grad_norm": 0.048859477043151855,
      "learning_rate": 2.1290852622504732e-05,
      "loss": 0.245,
      "step": 4151
    },
    {
      "epoch": 3.942321386185616,
      "grad_norm": 0.05489668622612953,
      "learning_rate": 2.1254060399866505e-05,
      "loss": 0.2588,
      "step": 4152
    },
    {
      "epoch": 3.943270828388322,
      "grad_norm": 0.04978334531188011,
      "learning_rate": 2.1217296214203086e-05,
      "loss": 0.2504,
      "step": 4153
    },
    {
      "epoch": 3.944220270591028,
      "grad_norm": 0.056370414793491364,
      "learning_rate": 2.1180560078604074e-05,
      "loss": 0.2503,
      "step": 4154
    },
    {
      "epoch": 3.9451697127937337,
      "grad_norm": 0.0500507652759552,
      "learning_rate": 2.1143852006149122e-05,
      "loss": 0.2426,
      "step": 4155
    },
    {
      "epoch": 3.9461191549964396,
      "grad_norm": 0.05014021322131157,
      "learning_rate": 2.110717200990797e-05,
      "loss": 0.2587,
      "step": 4156
    },
    {
      "epoch": 3.9470685971991455,
      "grad_norm": 0.051097285002470016,
      "learning_rate": 2.1070520102940184e-05,
      "loss": 0.2516,
      "step": 4157
    },
    {
      "epoch": 3.9480180394018514,
      "grad_norm": 0.08694472163915634,
      "learning_rate": 2.1033896298295508e-05,
      "loss": 0.2643,
      "step": 4158
    },
    {
      "epoch": 3.9489674816045572,
      "grad_norm": 0.08210060745477676,
      "learning_rate": 2.0997300609013592e-05,
      "loss": 0.2264,
      "step": 4159
    },
    {
      "epoch": 3.949916923807263,
      "grad_norm": 0.05755892023444176,
      "learning_rate": 2.0960733048124083e-05,
      "loss": 0.2692,
      "step": 4160
    },
    {
      "epoch": 3.950866366009969,
      "grad_norm": 0.050832442939281464,
      "learning_rate": 2.0924193628646626e-05,
      "loss": 0.2536,
      "step": 4161
    },
    {
      "epoch": 3.9518158082126753,
      "grad_norm": 0.060199491679668427,
      "learning_rate": 2.088768236359081e-05,
      "loss": 0.2571,
      "step": 4162
    },
    {
      "epoch": 3.952765250415381,
      "grad_norm": 0.04771970584988594,
      "learning_rate": 2.085119926595628e-05,
      "loss": 0.2531,
      "step": 4163
    },
    {
      "epoch": 3.953714692618087,
      "grad_norm": 0.05692679435014725,
      "learning_rate": 2.0814744348732595e-05,
      "loss": 0.2545,
      "step": 4164
    },
    {
      "epoch": 3.954664134820793,
      "grad_norm": 0.05149425193667412,
      "learning_rate": 2.077831762489927e-05,
      "loss": 0.2571,
      "step": 4165
    },
    {
      "epoch": 3.955613577023499,
      "grad_norm": 0.05007016658782959,
      "learning_rate": 2.074191910742581e-05,
      "loss": 0.2602,
      "step": 4166
    },
    {
      "epoch": 3.9565630192262047,
      "grad_norm": 0.04917161166667938,
      "learning_rate": 2.0705548809271658e-05,
      "loss": 0.2603,
      "step": 4167
    },
    {
      "epoch": 3.9575124614289106,
      "grad_norm": 0.07422235608100891,
      "learning_rate": 2.0669206743386216e-05,
      "loss": 0.2713,
      "step": 4168
    },
    {
      "epoch": 3.9584619036316164,
      "grad_norm": 0.05925751104950905,
      "learning_rate": 2.0632892922708892e-05,
      "loss": 0.2671,
      "step": 4169
    },
    {
      "epoch": 3.9594113458343223,
      "grad_norm": 0.053712397813797,
      "learning_rate": 2.0596607360168897e-05,
      "loss": 0.2666,
      "step": 4170
    },
    {
      "epoch": 3.960360788037028,
      "grad_norm": 0.05303372070193291,
      "learning_rate": 2.056035006868553e-05,
      "loss": 0.2433,
      "step": 4171
    },
    {
      "epoch": 3.961310230239734,
      "grad_norm": 0.079205721616745,
      "learning_rate": 2.0524121061167945e-05,
      "loss": 0.26,
      "step": 4172
    },
    {
      "epoch": 3.96225967244244,
      "grad_norm": 0.05416445806622505,
      "learning_rate": 2.0487920350515212e-05,
      "loss": 0.2476,
      "step": 4173
    },
    {
      "epoch": 3.963209114645146,
      "grad_norm": 0.06397967040538788,
      "learning_rate": 2.045174794961644e-05,
      "loss": 0.239,
      "step": 4174
    },
    {
      "epoch": 3.9641585568478517,
      "grad_norm": 0.09109178930521011,
      "learning_rate": 2.0415603871350473e-05,
      "loss": 0.2594,
      "step": 4175
    },
    {
      "epoch": 3.9651079990505576,
      "grad_norm": 0.07438705861568451,
      "learning_rate": 2.0379488128586243e-05,
      "loss": 0.2526,
      "step": 4176
    },
    {
      "epoch": 3.966057441253264,
      "grad_norm": 0.051279328763484955,
      "learning_rate": 2.0343400734182493e-05,
      "loss": 0.249,
      "step": 4177
    },
    {
      "epoch": 3.9670068834559697,
      "grad_norm": 0.08495932817459106,
      "learning_rate": 2.0307341700987892e-05,
      "loss": 0.2366,
      "step": 4178
    },
    {
      "epoch": 3.9679563256586756,
      "grad_norm": 0.051565755158662796,
      "learning_rate": 2.027131104184108e-05,
      "loss": 0.2477,
      "step": 4179
    },
    {
      "epoch": 3.9689057678613815,
      "grad_norm": 0.058595266193151474,
      "learning_rate": 2.023530876957045e-05,
      "loss": 0.2402,
      "step": 4180
    },
    {
      "epoch": 3.9698552100640874,
      "grad_norm": 0.06185249239206314,
      "learning_rate": 2.0199334896994448e-05,
      "loss": 0.2556,
      "step": 4181
    },
    {
      "epoch": 3.9708046522667932,
      "grad_norm": 0.053781840950250626,
      "learning_rate": 2.016338943692131e-05,
      "loss": 0.2568,
      "step": 4182
    },
    {
      "epoch": 3.971754094469499,
      "grad_norm": 0.05680249631404877,
      "learning_rate": 2.0127472402149173e-05,
      "loss": 0.2551,
      "step": 4183
    },
    {
      "epoch": 3.972703536672205,
      "grad_norm": 0.053001519292593,
      "learning_rate": 2.0091583805466075e-05,
      "loss": 0.2521,
      "step": 4184
    },
    {
      "epoch": 3.973652978874911,
      "grad_norm": 0.061895888298749924,
      "learning_rate": 2.0055723659649904e-05,
      "loss": 0.2529,
      "step": 4185
    },
    {
      "epoch": 3.9746024210776167,
      "grad_norm": 0.07784521579742432,
      "learning_rate": 2.0019891977468408e-05,
      "loss": 0.263,
      "step": 4186
    },
    {
      "epoch": 3.975551863280323,
      "grad_norm": 0.060105256736278534,
      "learning_rate": 1.9984088771679264e-05,
      "loss": 0.2542,
      "step": 4187
    },
    {
      "epoch": 3.976501305483029,
      "grad_norm": 0.07342025637626648,
      "learning_rate": 1.994831405502996e-05,
      "loss": 0.232,
      "step": 4188
    },
    {
      "epoch": 3.977450747685735,
      "grad_norm": 0.058840759098529816,
      "learning_rate": 1.9912567840257845e-05,
      "loss": 0.2581,
      "step": 4189
    },
    {
      "epoch": 3.9784001898884407,
      "grad_norm": 0.15697553753852844,
      "learning_rate": 1.9876850140090108e-05,
      "loss": 0.254,
      "step": 4190
    },
    {
      "epoch": 3.9793496320911466,
      "grad_norm": 0.05611603334546089,
      "learning_rate": 1.98411609672438e-05,
      "loss": 0.2435,
      "step": 4191
    },
    {
      "epoch": 3.9802990742938524,
      "grad_norm": 0.05032116547226906,
      "learning_rate": 1.9805500334425876e-05,
      "loss": 0.2561,
      "step": 4192
    },
    {
      "epoch": 3.9812485164965583,
      "grad_norm": 0.08890901505947113,
      "learning_rate": 1.976986825433297e-05,
      "loss": 0.2368,
      "step": 4193
    },
    {
      "epoch": 3.982197958699264,
      "grad_norm": 0.08615860342979431,
      "learning_rate": 1.973426473965172e-05,
      "loss": 0.2725,
      "step": 4194
    },
    {
      "epoch": 3.98314740090197,
      "grad_norm": 0.06632962077856064,
      "learning_rate": 1.9698689803058522e-05,
      "loss": 0.2554,
      "step": 4195
    },
    {
      "epoch": 3.984096843104676,
      "grad_norm": 0.09921465814113617,
      "learning_rate": 1.9663143457219558e-05,
      "loss": 0.2575,
      "step": 4196
    },
    {
      "epoch": 3.985046285307382,
      "grad_norm": 0.048173487186431885,
      "learning_rate": 1.962762571479094e-05,
      "loss": 0.2495,
      "step": 4197
    },
    {
      "epoch": 3.9859957275100877,
      "grad_norm": 0.06488333642482758,
      "learning_rate": 1.959213658841844e-05,
      "loss": 0.239,
      "step": 4198
    },
    {
      "epoch": 3.9869451697127936,
      "grad_norm": 0.05705002322793007,
      "learning_rate": 1.95566760907378e-05,
      "loss": 0.2478,
      "step": 4199
    },
    {
      "epoch": 3.9878946119154994,
      "grad_norm": 0.05279826000332832,
      "learning_rate": 1.952124423437447e-05,
      "loss": 0.2512,
      "step": 4200
    },
    {
      "epoch": 3.9888440541182053,
      "grad_norm": 0.05641533434391022,
      "learning_rate": 1.948584103194373e-05,
      "loss": 0.26,
      "step": 4201
    },
    {
      "epoch": 3.9897934963209116,
      "grad_norm": 0.07074812054634094,
      "learning_rate": 1.9450466496050656e-05,
      "loss": 0.244,
      "step": 4202
    },
    {
      "epoch": 3.9907429385236175,
      "grad_norm": 0.054319318383932114,
      "learning_rate": 1.9415120639290085e-05,
      "loss": 0.2544,
      "step": 4203
    },
    {
      "epoch": 3.9916923807263234,
      "grad_norm": 0.04937317222356796,
      "learning_rate": 1.937980347424675e-05,
      "loss": 0.2481,
      "step": 4204
    },
    {
      "epoch": 3.9926418229290292,
      "grad_norm": 0.07532598078250885,
      "learning_rate": 1.934451501349507e-05,
      "loss": 0.2734,
      "step": 4205
    },
    {
      "epoch": 3.993591265131735,
      "grad_norm": 0.06492338329553604,
      "learning_rate": 1.9309255269599235e-05,
      "loss": 0.2531,
      "step": 4206
    },
    {
      "epoch": 3.994540707334441,
      "grad_norm": 0.05689654126763344,
      "learning_rate": 1.9274024255113287e-05,
      "loss": 0.2523,
      "step": 4207
    },
    {
      "epoch": 3.995490149537147,
      "grad_norm": 0.05884096398949623,
      "learning_rate": 1.9238821982580967e-05,
      "loss": 0.2512,
      "step": 4208
    },
    {
      "epoch": 3.9964395917398527,
      "grad_norm": 0.0537344291806221,
      "learning_rate": 1.9203648464535818e-05,
      "loss": 0.2423,
      "step": 4209
    },
    {
      "epoch": 3.9973890339425586,
      "grad_norm": 0.0568530336022377,
      "learning_rate": 1.9168503713501184e-05,
      "loss": 0.2637,
      "step": 4210
    },
    {
      "epoch": 3.9983384761452645,
      "grad_norm": 0.06064499542117119,
      "learning_rate": 1.913338774199004e-05,
      "loss": 0.2562,
      "step": 4211
    },
    {
      "epoch": 3.999287918347971,
      "grad_norm": 0.05399147793650627,
      "learning_rate": 1.9098300562505266e-05,
      "loss": 0.2424,
      "step": 4212
    },
    {
      "epoch": 4.000237360550677,
      "grad_norm": 0.05513492599129677,
      "learning_rate": 1.90632421875394e-05,
      "loss": 0.2563,
      "step": 4213
    },
    {
      "epoch": 4.001186802753383,
      "grad_norm": 0.08468678593635559,
      "learning_rate": 1.9028212629574726e-05,
      "loss": 0.2547,
      "step": 4214
    },
    {
      "epoch": 4.002136244956088,
      "grad_norm": 0.054223205894231796,
      "learning_rate": 1.8993211901083353e-05,
      "loss": 0.2209,
      "step": 4215
    },
    {
      "epoch": 4.003085687158794,
      "grad_norm": 0.060239773243665695,
      "learning_rate": 1.895824001452696e-05,
      "loss": 0.2468,
      "step": 4216
    },
    {
      "epoch": 4.0040351293615,
      "grad_norm": 0.12123169749975204,
      "learning_rate": 1.892329698235715e-05,
      "loss": 0.2172,
      "step": 4217
    },
    {
      "epoch": 4.004984571564206,
      "grad_norm": 0.05851219221949577,
      "learning_rate": 1.8888382817015117e-05,
      "loss": 0.2243,
      "step": 4218
    },
    {
      "epoch": 4.005934013766912,
      "grad_norm": 0.058043915778398514,
      "learning_rate": 1.8853497530931795e-05,
      "loss": 0.2403,
      "step": 4219
    },
    {
      "epoch": 4.006883455969618,
      "grad_norm": 0.06909014284610748,
      "learning_rate": 1.881864113652796e-05,
      "loss": 0.2411,
      "step": 4220
    },
    {
      "epoch": 4.007832898172324,
      "grad_norm": 0.057406459003686905,
      "learning_rate": 1.8783813646213867e-05,
      "loss": 0.2532,
      "step": 4221
    },
    {
      "epoch": 4.00878234037503,
      "grad_norm": 0.06199433654546738,
      "learning_rate": 1.874901507238972e-05,
      "loss": 0.2478,
      "step": 4222
    },
    {
      "epoch": 4.009731782577735,
      "grad_norm": 0.13553054630756378,
      "learning_rate": 1.8714245427445278e-05,
      "loss": 0.2485,
      "step": 4223
    },
    {
      "epoch": 4.010681224780441,
      "grad_norm": 0.07412765920162201,
      "learning_rate": 1.8679504723760055e-05,
      "loss": 0.2234,
      "step": 4224
    },
    {
      "epoch": 4.011630666983147,
      "grad_norm": 0.05949070304632187,
      "learning_rate": 1.864479297370325e-05,
      "loss": 0.2442,
      "step": 4225
    },
    {
      "epoch": 4.012580109185853,
      "grad_norm": 0.07053567469120026,
      "learning_rate": 1.8610110189633757e-05,
      "loss": 0.2348,
      "step": 4226
    },
    {
      "epoch": 4.013529551388559,
      "grad_norm": 0.07391881942749023,
      "learning_rate": 1.8575456383900114e-05,
      "loss": 0.2482,
      "step": 4227
    },
    {
      "epoch": 4.014478993591265,
      "grad_norm": 0.13988161087036133,
      "learning_rate": 1.8540831568840644e-05,
      "loss": 0.235,
      "step": 4228
    },
    {
      "epoch": 4.015428435793971,
      "grad_norm": 0.073545902967453,
      "learning_rate": 1.8506235756783262e-05,
      "loss": 0.2307,
      "step": 4229
    },
    {
      "epoch": 4.016377877996677,
      "grad_norm": 0.06786803901195526,
      "learning_rate": 1.8471668960045574e-05,
      "loss": 0.2421,
      "step": 4230
    },
    {
      "epoch": 4.017327320199382,
      "grad_norm": 0.07447106391191483,
      "learning_rate": 1.843713119093485e-05,
      "loss": 0.2507,
      "step": 4231
    },
    {
      "epoch": 4.018276762402089,
      "grad_norm": 0.06786752492189407,
      "learning_rate": 1.840262246174803e-05,
      "loss": 0.2211,
      "step": 4232
    },
    {
      "epoch": 4.019226204604795,
      "grad_norm": 0.07335960865020752,
      "learning_rate": 1.836814278477179e-05,
      "loss": 0.2297,
      "step": 4233
    },
    {
      "epoch": 4.020175646807501,
      "grad_norm": 0.08651788532733917,
      "learning_rate": 1.8333692172282292e-05,
      "loss": 0.2484,
      "step": 4234
    },
    {
      "epoch": 4.021125089010207,
      "grad_norm": 0.0822267234325409,
      "learning_rate": 1.8299270636545518e-05,
      "loss": 0.2353,
      "step": 4235
    },
    {
      "epoch": 4.022074531212913,
      "grad_norm": 0.06862235814332962,
      "learning_rate": 1.8264878189817002e-05,
      "loss": 0.2466,
      "step": 4236
    },
    {
      "epoch": 4.023023973415619,
      "grad_norm": 0.09110434353351593,
      "learning_rate": 1.823051484434195e-05,
      "loss": 0.2476,
      "step": 4237
    },
    {
      "epoch": 4.0239734156183244,
      "grad_norm": 0.06781786680221558,
      "learning_rate": 1.819618061235525e-05,
      "loss": 0.2438,
      "step": 4238
    },
    {
      "epoch": 4.02492285782103,
      "grad_norm": 0.06978391855955124,
      "learning_rate": 1.8161875506081293e-05,
      "loss": 0.2524,
      "step": 4239
    },
    {
      "epoch": 4.025872300023736,
      "grad_norm": 0.07549986243247986,
      "learning_rate": 1.8127599537734296e-05,
      "loss": 0.2371,
      "step": 4240
    },
    {
      "epoch": 4.026821742226442,
      "grad_norm": 0.10006662458181381,
      "learning_rate": 1.8093352719517874e-05,
      "loss": 0.2396,
      "step": 4241
    },
    {
      "epoch": 4.027771184429148,
      "grad_norm": 0.0820450708270073,
      "learning_rate": 1.8059135063625477e-05,
      "loss": 0.2416,
      "step": 4242
    },
    {
      "epoch": 4.028720626631854,
      "grad_norm": 0.08785741031169891,
      "learning_rate": 1.8024946582240033e-05,
      "loss": 0.2359,
      "step": 4243
    },
    {
      "epoch": 4.02967006883456,
      "grad_norm": 0.09228499233722687,
      "learning_rate": 1.7990787287534104e-05,
      "loss": 0.2466,
      "step": 4244
    },
    {
      "epoch": 4.030619511037266,
      "grad_norm": 0.0822257474064827,
      "learning_rate": 1.795665719166997e-05,
      "loss": 0.2452,
      "step": 4245
    },
    {
      "epoch": 4.031568953239971,
      "grad_norm": 0.06719242036342621,
      "learning_rate": 1.792255630679931e-05,
      "loss": 0.2441,
      "step": 4246
    },
    {
      "epoch": 4.032518395442677,
      "grad_norm": 0.1547231525182724,
      "learning_rate": 1.788848464506362e-05,
      "loss": 0.2547,
      "step": 4247
    },
    {
      "epoch": 4.033467837645383,
      "grad_norm": 0.08464305847883224,
      "learning_rate": 1.7854442218593838e-05,
      "loss": 0.2151,
      "step": 4248
    },
    {
      "epoch": 4.034417279848089,
      "grad_norm": 0.06678024679422379,
      "learning_rate": 1.7820429039510566e-05,
      "loss": 0.2468,
      "step": 4249
    },
    {
      "epoch": 4.035366722050795,
      "grad_norm": 0.06837774068117142,
      "learning_rate": 1.7786445119923968e-05,
      "loss": 0.2386,
      "step": 4250
    },
    {
      "epoch": 4.036316164253501,
      "grad_norm": 0.067479707300663,
      "learning_rate": 1.775249047193377e-05,
      "loss": 0.2334,
      "step": 4251
    },
    {
      "epoch": 4.037265606456207,
      "grad_norm": 0.07563888281583786,
      "learning_rate": 1.7718565107629347e-05,
      "loss": 0.2449,
      "step": 4252
    },
    {
      "epoch": 4.038215048658913,
      "grad_norm": 0.06924168020486832,
      "learning_rate": 1.7684669039089587e-05,
      "loss": 0.2456,
      "step": 4253
    },
    {
      "epoch": 4.039164490861618,
      "grad_norm": 0.0730455070734024,
      "learning_rate": 1.765080227838295e-05,
      "loss": 0.2498,
      "step": 4254
    },
    {
      "epoch": 4.040113933064324,
      "grad_norm": 0.11271868646144867,
      "learning_rate": 1.7616964837567495e-05,
      "loss": 0.2054,
      "step": 4255
    },
    {
      "epoch": 4.04106337526703,
      "grad_norm": 0.0824706181883812,
      "learning_rate": 1.7583156728690787e-05,
      "loss": 0.2515,
      "step": 4256
    },
    {
      "epoch": 4.042012817469737,
      "grad_norm": 0.09537190198898315,
      "learning_rate": 1.7549377963789994e-05,
      "loss": 0.242,
      "step": 4257
    },
    {
      "epoch": 4.042962259672443,
      "grad_norm": 0.07382145524024963,
      "learning_rate": 1.7515628554891862e-05,
      "loss": 0.2486,
      "step": 4258
    },
    {
      "epoch": 4.043911701875149,
      "grad_norm": 0.08604732155799866,
      "learning_rate": 1.748190851401258e-05,
      "loss": 0.2348,
      "step": 4259
    },
    {
      "epoch": 4.044861144077855,
      "grad_norm": 0.0709012970328331,
      "learning_rate": 1.7448217853157998e-05,
      "loss": 0.2501,
      "step": 4260
    },
    {
      "epoch": 4.0458105862805605,
      "grad_norm": 0.07132518291473389,
      "learning_rate": 1.741455658432344e-05,
      "loss": 0.2423,
      "step": 4261
    },
    {
      "epoch": 4.046760028483266,
      "grad_norm": 0.07555680721998215,
      "learning_rate": 1.738092471949375e-05,
      "loss": 0.2445,
      "step": 4262
    },
    {
      "epoch": 4.047709470685972,
      "grad_norm": 0.07572092860937119,
      "learning_rate": 1.7347322270643418e-05,
      "loss": 0.2493,
      "step": 4263
    },
    {
      "epoch": 4.048658912888678,
      "grad_norm": 0.06770916283130646,
      "learning_rate": 1.7313749249736267e-05,
      "loss": 0.2466,
      "step": 4264
    },
    {
      "epoch": 4.049608355091384,
      "grad_norm": 0.07170511037111282,
      "learning_rate": 1.728020566872581e-05,
      "loss": 0.2072,
      "step": 4265
    },
    {
      "epoch": 4.05055779729409,
      "grad_norm": 0.07005757093429565,
      "learning_rate": 1.7246691539555028e-05,
      "loss": 0.2419,
      "step": 4266
    }
  ],
  "logging_steps": 1,
  "max_steps": 5265,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 158,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 2.4755390979171287e+18,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}