{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.9804594272076372,
  "eval_steps": 500,
  "global_step": 3352,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "grad_norm": 1.0434608134535555,
      "learning_rate": 2e-05,
      "loss": 0.7234,
      "step": 1
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9514763639201144,
      "learning_rate": 4e-05,
      "loss": 0.5916,
      "step": 2
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8903336235741379,
      "learning_rate": 6e-05,
      "loss": 0.5827,
      "step": 3
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.764603046688215,
      "learning_rate": 8e-05,
      "loss": 0.5555,
      "step": 4
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.8956967835024867,
      "learning_rate": 0.0001,
      "loss": 0.6073,
      "step": 5
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.378526599131456,
      "learning_rate": 0.00012,
      "loss": 0.6204,
      "step": 6
    },
    {
      "epoch": 0.0,
      "grad_norm": 1.1442512069955262,
      "learning_rate": 0.00014,
      "loss": 0.5452,
      "step": 7
    },
    {
      "epoch": 0.0,
      "grad_norm": 0.9100913748328603,
      "learning_rate": 0.00016,
      "loss": 0.5858,
      "step": 8
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6859557442210851,
      "learning_rate": 0.00018,
      "loss": 0.5182,
      "step": 9
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8288606055568941,
      "learning_rate": 0.0002,
      "loss": 0.6083,
      "step": 10
    },
    {
      "epoch": 0.01,
      "grad_norm": 1.0417686973414615,
      "learning_rate": 0.0001999999558168346,
      "loss": 0.65,
      "step": 11
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8756017006672853,
      "learning_rate": 0.00019999982326737747,
      "loss": 0.5837,
      "step": 12
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7284541589078422,
      "learning_rate": 0.0001999996023517457,
      "loss": 0.5738,
      "step": 13
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.8602799120413903,
      "learning_rate": 0.0001999992930701345,
      "loss": 0.595,
      "step": 14
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7938840633060059,
      "learning_rate": 0.00019999889542281728,
      "loss": 0.5907,
      "step": 15
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.7022632853348306,
      "learning_rate": 0.00019999840941014525,
      "loss": 0.5513,
      "step": 16
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.6531781263691616,
      "learning_rate": 0.00019999783503254803,
      "loss": 0.5475,
      "step": 17
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5808528104992969,
      "learning_rate": 0.0001999971722905331,
      "loss": 0.519,
      "step": 18
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5452643486331965,
      "learning_rate": 0.00019999642118468614,
      "loss": 0.5421,
      "step": 19
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5893567274117093,
      "learning_rate": 0.00019999558171567082,
      "loss": 0.6016,
      "step": 20
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5039905113559068,
      "learning_rate": 0.000199994653884229,
      "loss": 0.6096,
      "step": 21
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.5236847425188783,
      "learning_rate": 0.00019999363769118055,
      "loss": 0.5845,
      "step": 22
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.4403777461558745,
      "learning_rate": 0.00019999253313742344,
      "loss": 0.5657,
      "step": 23
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.411935443568472,
      "learning_rate": 0.00019999134022393375,
      "loss": 0.5619,
      "step": 24
    },
    {
      "epoch": 0.01,
      "grad_norm": 0.3669921362459581,
      "learning_rate": 0.0001999900589517656,
      "loss": 0.6115,
      "step": 25
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.3613613842516578,
      "learning_rate": 0.0001999886893220512,
      "loss": 0.5286,
      "step": 26
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.378560233146017,
      "learning_rate": 0.0001999872313360008,
      "loss": 0.5887,
      "step": 27
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.3562687135057843,
      "learning_rate": 0.00019998568499490283,
      "loss": 0.5598,
      "step": 28
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.34581851507208355,
      "learning_rate": 0.00019998405030012371,
      "loss": 0.5772,
      "step": 29
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.3812400800902662,
      "learning_rate": 0.00019998232725310796,
      "loss": 0.6154,
      "step": 30
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.2876811822631032,
      "learning_rate": 0.00019998051585537818,
      "loss": 0.4949,
      "step": 31
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.3291934945038139,
      "learning_rate": 0.00019997861610853503,
      "loss": 0.5388,
      "step": 32
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.35220229516562385,
      "learning_rate": 0.00019997662801425725,
      "loss": 0.5801,
      "step": 33
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.3908550087374589,
      "learning_rate": 0.00019997455157430165,
      "loss": 0.5783,
      "step": 34
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.38564043955621646,
      "learning_rate": 0.00019997238679050308,
      "loss": 0.5628,
      "step": 35
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.3248792879576579,
      "learning_rate": 0.00019997013366477453,
      "loss": 0.5896,
      "step": 36
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.33243708146621687,
      "learning_rate": 0.00019996779219910696,
      "loss": 0.5618,
      "step": 37
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.3589452470436555,
      "learning_rate": 0.00019996536239556942,
      "loss": 0.5387,
      "step": 38
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.3635189263065437,
      "learning_rate": 0.0001999628442563091,
      "loss": 0.629,
      "step": 39
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.36761442942017947,
      "learning_rate": 0.00019996023778355113,
      "loss": 0.6133,
      "step": 40
    },
    {
      "epoch": 0.02,
      "grad_norm": 0.30331424568033827,
      "learning_rate": 0.00019995754297959882,
      "loss": 0.5377,
      "step": 41
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3157701200247212,
      "learning_rate": 0.0001999547598468334,
      "loss": 0.6249,
      "step": 42
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3112819252913729,
      "learning_rate": 0.00019995188838771425,
      "loss": 0.5424,
      "step": 43
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.33384944369487113,
      "learning_rate": 0.0001999489286047788,
      "loss": 0.6014,
      "step": 44
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3330564524921197,
      "learning_rate": 0.00019994588050064243,
      "loss": 0.5469,
      "step": 45
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2889450580843479,
      "learning_rate": 0.00019994274407799872,
      "loss": 0.512,
      "step": 46
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3273617072745067,
      "learning_rate": 0.00019993951933961913,
      "loss": 0.5456,
      "step": 47
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3228418979730564,
      "learning_rate": 0.00019993620628835332,
      "loss": 0.5716,
      "step": 48
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3439278828333003,
      "learning_rate": 0.0001999328049271289,
      "loss": 0.5177,
      "step": 49
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3186271172935729,
      "learning_rate": 0.0001999293152589515,
      "loss": 0.5502,
      "step": 50
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.38357922086488366,
      "learning_rate": 0.0001999257372869048,
      "loss": 0.6178,
      "step": 51
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.36013497860303273,
      "learning_rate": 0.00019992207101415053,
      "loss": 0.6278,
      "step": 52
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3146595251755829,
      "learning_rate": 0.00019991831644392848,
      "loss": 0.5348,
      "step": 53
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3986948949803995,
      "learning_rate": 0.00019991447357955639,
      "loss": 0.6331,
      "step": 54
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3194176204715625,
      "learning_rate": 0.00019991054242443008,
      "loss": 0.5817,
      "step": 55
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.29564051537116465,
      "learning_rate": 0.00019990652298202335,
      "loss": 0.545,
      "step": 56
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.2908246398716361,
      "learning_rate": 0.00019990241525588804,
      "loss": 0.5294,
      "step": 57
    },
    {
      "epoch": 0.03,
      "grad_norm": 0.3480952622658696,
      "learning_rate": 0.000199898219249654,
      "loss": 0.6282,
      "step": 58
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.4278607132919695,
      "learning_rate": 0.00019989393496702907,
      "loss": 0.7008,
      "step": 59
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3088760407735635,
      "learning_rate": 0.00019988956241179912,
      "loss": 0.5747,
      "step": 60
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3549589308890128,
      "learning_rate": 0.00019988510158782804,
      "loss": 0.615,
      "step": 61
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.28349959678885256,
      "learning_rate": 0.00019988055249905767,
      "loss": 0.577,
      "step": 62
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.35001480138074803,
      "learning_rate": 0.00019987591514950787,
      "loss": 0.5551,
      "step": 63
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.32895401860950285,
      "learning_rate": 0.00019987118954327654,
      "loss": 0.5617,
      "step": 64
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.347007326906862,
      "learning_rate": 0.00019986637568453945,
      "loss": 0.5935,
      "step": 65
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.30223641676037666,
      "learning_rate": 0.00019986147357755048,
      "loss": 0.5355,
      "step": 66
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.302279046184156,
      "learning_rate": 0.00019985648322664145,
      "loss": 0.5571,
      "step": 67
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3181910281320864,
      "learning_rate": 0.00019985140463622215,
      "loss": 0.5198,
      "step": 68
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.32334719229096776,
      "learning_rate": 0.0001998462378107803,
      "loss": 0.5063,
      "step": 69
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.31038499461943353,
      "learning_rate": 0.0001998409827548817,
      "loss": 0.5805,
      "step": 70
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3032049786542595,
      "learning_rate": 0.00019983563947316996,
      "loss": 0.564,
      "step": 71
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.3345643555445713,
      "learning_rate": 0.00019983020797036683,
      "loss": 0.5442,
      "step": 72
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.32583720357675877,
      "learning_rate": 0.00019982468825127187,
      "loss": 0.5674,
      "step": 73
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.30278224625704836,
      "learning_rate": 0.0001998190803207627,
      "loss": 0.569,
      "step": 74
    },
    {
      "epoch": 0.04,
      "grad_norm": 0.29996902392483177,
      "learning_rate": 0.0001998133841837948,
      "loss": 0.6142,
      "step": 75
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2947151895973628,
      "learning_rate": 0.00019980759984540168,
      "loss": 0.5084,
      "step": 76
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.33309005605837944,
      "learning_rate": 0.0001998017273106947,
      "loss": 0.5807,
      "step": 77
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.31281343399912853,
      "learning_rate": 0.00019979576658486325,
      "loss": 0.6299,
      "step": 78
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.30980839190781245,
      "learning_rate": 0.00019978971767317457,
      "loss": 0.5521,
      "step": 79
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2988115356408324,
      "learning_rate": 0.00019978358058097388,
      "loss": 0.5645,
      "step": 80
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2864799366004751,
      "learning_rate": 0.0001997773553136843,
      "loss": 0.5604,
      "step": 81
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.28284272149262185,
      "learning_rate": 0.00019977104187680688,
      "loss": 0.5964,
      "step": 82
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2807639116477172,
      "learning_rate": 0.00019976464027592053,
      "loss": 0.5441,
      "step": 83
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.3169919212395633,
      "learning_rate": 0.00019975815051668217,
      "loss": 0.5672,
      "step": 84
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2827524094344841,
      "learning_rate": 0.0001997515726048265,
      "loss": 0.5631,
      "step": 85
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.29538492598974014,
      "learning_rate": 0.00019974490654616625,
      "loss": 0.609,
      "step": 86
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.29397994414783907,
      "learning_rate": 0.0001997381523465919,
      "loss": 0.5723,
      "step": 87
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2824005824065347,
      "learning_rate": 0.00019973131001207195,
      "loss": 0.5209,
      "step": 88
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2847018611508931,
      "learning_rate": 0.00019972437954865265,
      "loss": 0.5617,
      "step": 89
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2908040007926844,
      "learning_rate": 0.00019971736096245825,
      "loss": 0.5624,
      "step": 90
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.27754759063410545,
      "learning_rate": 0.00019971025425969083,
      "loss": 0.5353,
      "step": 91
    },
    {
      "epoch": 0.05,
      "grad_norm": 0.2885964599083646,
      "learning_rate": 0.0001997030594466303,
      "loss": 0.5181,
      "step": 92
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.29372725474160666,
      "learning_rate": 0.00019969577652963444,
      "loss": 0.5757,
      "step": 93
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.32149915053639194,
      "learning_rate": 0.0001996884055151389,
      "loss": 0.552,
      "step": 94
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2816717380191918,
      "learning_rate": 0.00019968094640965717,
      "loss": 0.4968,
      "step": 95
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2719020140724135,
      "learning_rate": 0.00019967339921978062,
      "loss": 0.5503,
      "step": 96
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.28166729851780475,
      "learning_rate": 0.00019966576395217837,
      "loss": 0.5546,
      "step": 97
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.27817598279558775,
      "learning_rate": 0.0001996580406135975,
      "loss": 0.6145,
      "step": 98
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3471492732103861,
      "learning_rate": 0.00019965022921086275,
      "loss": 0.6464,
      "step": 99
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2838977359279957,
      "learning_rate": 0.00019964232975087687,
      "loss": 0.5576,
      "step": 100
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2989119777268752,
      "learning_rate": 0.00019963434224062025,
      "loss": 0.5747,
      "step": 101
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.26079447242968457,
      "learning_rate": 0.0001996262666871512,
      "loss": 0.5144,
      "step": 102
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2904578974664885,
      "learning_rate": 0.00019961810309760577,
      "loss": 0.5623,
      "step": 103
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.2682051539483259,
      "learning_rate": 0.00019960985147919778,
      "loss": 0.5722,
      "step": 104
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.3004061592870477,
      "learning_rate": 0.00019960151183921897,
      "loss": 0.5526,
      "step": 105
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.27675115608209533,
      "learning_rate": 0.00019959308418503877,
      "loss": 0.5859,
      "step": 106
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.26526760651496173,
      "learning_rate": 0.00019958456852410433,
      "loss": 0.5395,
      "step": 107
    },
    {
      "epoch": 0.06,
      "grad_norm": 0.29513224606753785,
      "learning_rate": 0.0001995759648639406,
      "loss": 0.59,
      "step": 108
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.27848396362609984,
      "learning_rate": 0.00019956727321215044,
      "loss": 0.6076,
      "step": 109
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.29804931563512865,
      "learning_rate": 0.00019955849357641424,
      "loss": 0.5555,
      "step": 110
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2756038079003531,
      "learning_rate": 0.00019954962596449024,
      "loss": 0.5542,
      "step": 111
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.28433942136507906,
      "learning_rate": 0.0001995406703842145,
      "loss": 0.5527,
      "step": 112
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.646736414676863,
      "learning_rate": 0.0001995316268435007,
      "loss": 0.7024,
      "step": 113
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.25030459489112267,
      "learning_rate": 0.00019952249535034025,
      "loss": 0.4928,
      "step": 114
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.30977646038996587,
      "learning_rate": 0.00019951327591280236,
      "loss": 0.5883,
      "step": 115
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2854791945432696,
      "learning_rate": 0.0001995039685390339,
      "loss": 0.6318,
      "step": 116
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.3264119003161445,
      "learning_rate": 0.00019949457323725946,
      "loss": 0.5654,
      "step": 117
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.266512900378873,
      "learning_rate": 0.0001994850900157813,
      "loss": 0.5457,
      "step": 118
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.30259231663288877,
      "learning_rate": 0.0001994755188829794,
      "loss": 0.5828,
      "step": 119
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.33798187117798184,
      "learning_rate": 0.00019946585984731142,
      "loss": 0.5669,
      "step": 120
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2769767197030659,
      "learning_rate": 0.00019945611291731274,
      "loss": 0.5619,
      "step": 121
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2657775119925365,
      "learning_rate": 0.00019944627810159632,
      "loss": 0.59,
      "step": 122
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2918370192930011,
      "learning_rate": 0.00019943635540885279,
      "loss": 0.5816,
      "step": 123
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.33417238245851544,
      "learning_rate": 0.00019942634484785052,
      "loss": 0.5921,
      "step": 124
    },
    {
      "epoch": 0.07,
      "grad_norm": 0.2482914745160091,
      "learning_rate": 0.00019941624642743548,
      "loss": 0.5113,
      "step": 125
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.28272991564412037,
      "learning_rate": 0.0001994060601565313,
      "loss": 0.5543,
      "step": 126
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.35389083143384653,
      "learning_rate": 0.00019939578604413912,
      "loss": 0.5921,
      "step": 127
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3426787529883331,
      "learning_rate": 0.00019938542409933787,
      "loss": 0.6073,
      "step": 128
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.25621145606980844,
      "learning_rate": 0.000199374974331284,
      "loss": 0.5639,
      "step": 129
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2996022840977615,
      "learning_rate": 0.00019936443674921158,
      "loss": 0.5874,
      "step": 130
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3283726508140091,
      "learning_rate": 0.0001993538113624323,
      "loss": 0.6295,
      "step": 131
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2878523809947236,
      "learning_rate": 0.00019934309818033544,
      "loss": 0.5565,
      "step": 132
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2764010556696928,
      "learning_rate": 0.0001993322972123878,
      "loss": 0.554,
      "step": 133
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.28827215996506156,
      "learning_rate": 0.0001993214084681338,
      "loss": 0.5788,
      "step": 134
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2945105689397871,
      "learning_rate": 0.00019931043195719548,
      "loss": 0.5197,
      "step": 135
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.25439488455073433,
      "learning_rate": 0.00019929936768927232,
      "loss": 0.509,
      "step": 136
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.2746742447613063,
      "learning_rate": 0.00019928821567414144,
      "loss": 0.5479,
      "step": 137
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3033960913538536,
      "learning_rate": 0.00019927697592165747,
      "loss": 0.5859,
      "step": 138
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.28486244663010424,
      "learning_rate": 0.00019926564844175256,
      "loss": 0.5951,
      "step": 139
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3208016816012168,
      "learning_rate": 0.00019925423324443638,
      "loss": 0.5823,
      "step": 140
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.3005362808247367,
      "learning_rate": 0.00019924273033979613,
      "loss": 0.5652,
      "step": 141
    },
    {
      "epoch": 0.08,
      "grad_norm": 0.26910338931514155,
      "learning_rate": 0.0001992311397379965,
      "loss": 0.5463,
      "step": 142
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.28718562659091934,
      "learning_rate": 0.00019921946144927966,
      "loss": 0.5245,
      "step": 143
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2616953117781411,
      "learning_rate": 0.0001992076954839653,
      "loss": 0.5358,
      "step": 144
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.3107257337368987,
      "learning_rate": 0.00019919584185245062,
      "loss": 0.5536,
      "step": 145
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.28427245738065265,
      "learning_rate": 0.00019918390056521018,
      "loss": 0.6126,
      "step": 146
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.25398310452304734,
      "learning_rate": 0.00019917187163279605,
      "loss": 0.5068,
      "step": 147
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2556256469730818,
      "learning_rate": 0.00019915975506583778,
      "loss": 0.5416,
      "step": 148
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2611702329742577,
      "learning_rate": 0.00019914755087504236,
      "loss": 0.5276,
      "step": 149
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2789045987166575,
      "learning_rate": 0.00019913525907119418,
      "loss": 0.5591,
      "step": 150
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.26837658503581957,
      "learning_rate": 0.000199122879665155,
      "loss": 0.6581,
      "step": 151
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.34601396912277804,
      "learning_rate": 0.0001991104126678641,
      "loss": 0.5394,
      "step": 152
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.25684957257052443,
      "learning_rate": 0.00019909785809033806,
      "loss": 0.5392,
      "step": 153
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2906797315813485,
      "learning_rate": 0.00019908521594367098,
      "loss": 0.5185,
      "step": 154
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2852843546202924,
      "learning_rate": 0.0001990724862390342,
      "loss": 0.5436,
      "step": 155
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2875355300862882,
      "learning_rate": 0.0001990596689876765,
      "loss": 0.6009,
      "step": 156
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.32052910212305513,
      "learning_rate": 0.00019904676420092404,
      "loss": 0.5831,
      "step": 157
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.266884162852661,
      "learning_rate": 0.00019903377189018024,
      "loss": 0.5459,
      "step": 158
    },
    {
      "epoch": 0.09,
      "grad_norm": 0.2957365744895018,
      "learning_rate": 0.000199020692066926,
      "loss": 0.5211,
      "step": 159
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.24951992931808137,
      "learning_rate": 0.00019900752474271945,
      "loss": 0.497,
      "step": 160
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.30509964150122953,
      "learning_rate": 0.0001989942699291961,
      "loss": 0.5812,
      "step": 161
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2790293776337124,
      "learning_rate": 0.0001989809276380687,
      "loss": 0.5856,
      "step": 162
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.24940387850774506,
      "learning_rate": 0.00019896749788112737,
      "loss": 0.5281,
      "step": 163
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2664890107453781,
      "learning_rate": 0.0001989539806702395,
      "loss": 0.524,
      "step": 164
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2896608423493073,
      "learning_rate": 0.0001989403760173497,
      "loss": 0.5171,
      "step": 165
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2544937836162412,
      "learning_rate": 0.00019892668393447997,
      "loss": 0.5546,
      "step": 166
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2626626371027326,
      "learning_rate": 0.00019891290443372944,
      "loss": 0.5498,
      "step": 167
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.281410490858952,
      "learning_rate": 0.0001988990375272746,
      "loss": 0.5377,
      "step": 168
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3376943176164128,
      "learning_rate": 0.0001988850832273691,
      "loss": 0.5469,
      "step": 169
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2507691377758427,
      "learning_rate": 0.0001988710415463439,
      "loss": 0.549,
      "step": 170
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.27178996752570117,
      "learning_rate": 0.00019885691249660702,
      "loss": 0.5636,
      "step": 171
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.3359421766962587,
      "learning_rate": 0.00019884269609064386,
      "loss": 0.5957,
      "step": 172
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2638709645045905,
      "learning_rate": 0.0001988283923410169,
      "loss": 0.5793,
      "step": 173
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.25585919726912226,
      "learning_rate": 0.00019881400126036582,
      "loss": 0.5817,
      "step": 174
    },
    {
      "epoch": 0.1,
      "grad_norm": 0.2905067973645414,
      "learning_rate": 0.00019879952286140754,
      "loss": 0.5585,
      "step": 175
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.24197399766587002,
      "learning_rate": 0.0001987849571569361,
      "loss": 0.507,
      "step": 176
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.28898034252731664,
      "learning_rate": 0.0001987703041598226,
      "loss": 0.5981,
      "step": 177
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.26516349701479863,
      "learning_rate": 0.00019875556388301543,
      "loss": 0.56,
      "step": 178
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.27235027968517367,
      "learning_rate": 0.00019874073633953997,
      "loss": 0.5872,
      "step": 179
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2692241567318253,
      "learning_rate": 0.00019872582154249884,
      "loss": 0.5397,
      "step": 180
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2560507155942398,
      "learning_rate": 0.00019871081950507163,
      "loss": 0.5431,
      "step": 181
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.26691224099103567,
      "learning_rate": 0.00019869573024051517,
      "loss": 0.5608,
      "step": 182
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2961375506924155,
      "learning_rate": 0.00019868055376216323,
      "loss": 0.5784,
      "step": 183
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.26055755072015874,
      "learning_rate": 0.00019866529008342673,
      "loss": 0.5369,
      "step": 184
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2525359310079611,
      "learning_rate": 0.00019864993921779361,
      "loss": 0.5438,
      "step": 185
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.249327141855566,
      "learning_rate": 0.0001986345011788289,
      "loss": 0.5668,
      "step": 186
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2983950007732028,
      "learning_rate": 0.00019861897598017457,
      "loss": 0.5271,
      "step": 187
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.25610455444964525,
      "learning_rate": 0.00019860336363554973,
      "loss": 0.6012,
      "step": 188
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.24760444410184018,
      "learning_rate": 0.0001985876641587504,
      "loss": 0.5066,
      "step": 189
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.2614264060863463,
      "learning_rate": 0.00019857187756364958,
      "loss": 0.5792,
      "step": 190
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.27219045408444215,
      "learning_rate": 0.00019855600386419744,
      "loss": 0.543,
      "step": 191
    },
    {
      "epoch": 0.11,
      "grad_norm": 0.24606131498871828,
      "learning_rate": 0.00019854004307442088,
      "loss": 0.5676,
      "step": 192
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.28394763236035964,
      "learning_rate": 0.0001985239952084239,
      "loss": 0.6032,
      "step": 193
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.28350170917034406,
      "learning_rate": 0.0001985078602803874,
      "loss": 0.6264,
      "step": 194
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.24011552907338696,
      "learning_rate": 0.00019849163830456922,
      "loss": 0.4793,
      "step": 195
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2561209086280576,
      "learning_rate": 0.00019847532929530415,
      "loss": 0.6198,
      "step": 196
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.23712886628255178,
      "learning_rate": 0.00019845893326700384,
      "loss": 0.4989,
      "step": 197
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.26720592489417233,
      "learning_rate": 0.00019844245023415685,
      "loss": 0.4934,
      "step": 198
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2753251397417421,
      "learning_rate": 0.0001984258802113287,
      "loss": 0.5544,
      "step": 199
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2557869713293877,
      "learning_rate": 0.0001984092232131616,
      "loss": 0.5643,
      "step": 200
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2669651919314609,
      "learning_rate": 0.0001983924792543748,
      "loss": 0.5879,
      "step": 201
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.25579187132615644,
      "learning_rate": 0.00019837564834976432,
      "loss": 0.5742,
      "step": 202
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2550207949421237,
      "learning_rate": 0.000198358730514203,
      "loss": 0.574,
      "step": 203
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.23565090455665763,
      "learning_rate": 0.0001983417257626405,
      "loss": 0.5299,
      "step": 204
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.236980034600526,
      "learning_rate": 0.00019832463411010331,
      "loss": 0.5199,
      "step": 205
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.2434029841189093,
      "learning_rate": 0.0001983074555716947,
      "loss": 0.5477,
      "step": 206
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.24771511082227154,
      "learning_rate": 0.00019829019016259468,
      "loss": 0.5697,
      "step": 207
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.23705880771864213,
      "learning_rate": 0.00019827283789806011,
      "loss": 0.521,
      "step": 208
    },
    {
      "epoch": 0.12,
      "grad_norm": 0.24167295291353477,
      "learning_rate": 0.0001982553987934245,
      "loss": 0.558,
      "step": 209
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2535406245356529,
      "learning_rate": 0.0001982378728640982,
      "loss": 0.5693,
      "step": 210
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.24865334136075806,
      "learning_rate": 0.00019822026012556818,
      "loss": 0.5499,
      "step": 211
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2544751551481819,
      "learning_rate": 0.0001982025605933982,
      "loss": 0.5449,
      "step": 212
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2599391794330939,
      "learning_rate": 0.0001981847742832287,
      "loss": 0.6222,
      "step": 213
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.23171934920449544,
      "learning_rate": 0.00019816690121077674,
      "loss": 0.5448,
      "step": 214
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.24380268930715565,
      "learning_rate": 0.00019814894139183614,
      "loss": 0.5773,
      "step": 215
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2518755786374484,
      "learning_rate": 0.00019813089484227732,
      "loss": 0.5479,
      "step": 216
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.23133984467720642,
      "learning_rate": 0.00019811276157804733,
      "loss": 0.471,
      "step": 217
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.250968947574734,
      "learning_rate": 0.00019809454161516993,
      "loss": 0.5738,
      "step": 218
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.25976015596485974,
      "learning_rate": 0.00019807623496974537,
      "loss": 0.5592,
      "step": 219
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.24400199531999783,
      "learning_rate": 0.0001980578416579506,
      "loss": 0.5266,
      "step": 220
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.24001572180370875,
      "learning_rate": 0.00019803936169603912,
      "loss": 0.5843,
      "step": 221
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.22867208326764507,
      "learning_rate": 0.00019802079510034096,
      "loss": 0.518,
      "step": 222
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2381724022911579,
      "learning_rate": 0.00019800214188726276,
      "loss": 0.5175,
      "step": 223
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2700455397530704,
      "learning_rate": 0.00019798340207328766,
      "loss": 0.5804,
      "step": 224
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.24320219539003604,
      "learning_rate": 0.00019796457567497537,
      "loss": 0.5304,
      "step": 225
    },
    {
      "epoch": 0.13,
      "grad_norm": 0.2370472610839002,
      "learning_rate": 0.0001979456627089621,
      "loss": 0.5671,
      "step": 226
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.26756212643991917,
      "learning_rate": 0.0001979266631919605,
      "loss": 0.5528,
      "step": 227
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.24929490389202372,
      "learning_rate": 0.00019790757714075979,
      "loss": 0.5407,
      "step": 228
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.23090595152280974,
      "learning_rate": 0.00019788840457222556,
      "loss": 0.5258,
      "step": 229
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2979564627406142,
      "learning_rate": 0.0001978691455033,
      "loss": 0.5367,
      "step": 230
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.24228845587479894,
      "learning_rate": 0.0001978497999510015,
      "loss": 0.5344,
      "step": 231
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.25363482164729867,
      "learning_rate": 0.00019783036793242516,
      "loss": 0.5669,
      "step": 232
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.23622712417060854,
      "learning_rate": 0.00019781084946474226,
      "loss": 0.5797,
      "step": 233
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.21594682302559634,
      "learning_rate": 0.00019779124456520056,
      "loss": 0.5011,
      "step": 234
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.24211833950801223,
      "learning_rate": 0.0001977715532511242,
      "loss": 0.5164,
      "step": 235
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2693820157715391,
      "learning_rate": 0.0001977517755399137,
      "loss": 0.5806,
      "step": 236
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.24734981937576542,
      "learning_rate": 0.00019773191144904586,
      "loss": 0.5233,
      "step": 237
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2741663737268136,
      "learning_rate": 0.00019771196099607386,
      "loss": 0.5402,
      "step": 238
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2521489075033339,
      "learning_rate": 0.00019769192419862716,
      "loss": 0.5862,
      "step": 239
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2633255671236209,
      "learning_rate": 0.0001976718010744116,
      "loss": 0.548,
      "step": 240
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.2691399721238696,
      "learning_rate": 0.00019765159164120916,
      "loss": 0.5648,
      "step": 241
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.25501545746966797,
      "learning_rate": 0.00019763129591687827,
      "loss": 0.5602,
      "step": 242
    },
    {
      "epoch": 0.14,
      "grad_norm": 0.3049976908839563,
      "learning_rate": 0.00019761091391935347,
      "loss": 0.5508,
      "step": 243
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.24467331400916031,
      "learning_rate": 0.00019759044566664558,
      "loss": 0.5229,
      "step": 244
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.27011009612786374,
      "learning_rate": 0.00019756989117684164,
      "loss": 0.5448,
      "step": 245
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.24427143044387528,
      "learning_rate": 0.00019754925046810493,
      "loss": 0.5435,
      "step": 246
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.22753961311031143,
      "learning_rate": 0.00019752852355867486,
      "loss": 0.5369,
      "step": 247
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.23865046003559778,
      "learning_rate": 0.00019750771046686704,
      "loss": 0.5354,
      "step": 248
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2736283930569903,
      "learning_rate": 0.00019748681121107325,
      "loss": 0.5588,
      "step": 249
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.24727127426749082,
      "learning_rate": 0.00019746582580976136,
      "loss": 0.5753,
      "step": 250
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2828829340227291,
      "learning_rate": 0.00019744475428147546,
      "loss": 0.6793,
      "step": 251
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.21818184663896711,
      "learning_rate": 0.00019742359664483563,
      "loss": 0.5248,
      "step": 252
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2320708306833192,
      "learning_rate": 0.00019740235291853812,
      "loss": 0.5461,
      "step": 253
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.25703347930088793,
      "learning_rate": 0.00019738102312135523,
      "loss": 0.5713,
      "step": 254
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.24399588128874033,
      "learning_rate": 0.0001973596072721353,
      "loss": 0.5178,
      "step": 255
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.2229881452119291,
      "learning_rate": 0.00019733810538980281,
      "loss": 0.5144,
      "step": 256
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.23889465035265364,
      "learning_rate": 0.0001973165174933581,
      "loss": 0.5727,
      "step": 257
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.25790569964877214,
      "learning_rate": 0.0001972948436018776,
      "loss": 0.5659,
      "step": 258
    },
    {
      "epoch": 0.15,
      "grad_norm": 0.22511338701135042,
      "learning_rate": 0.00019727308373451377,
      "loss": 0.5292,
      "step": 259
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.23111498863739158,
      "learning_rate": 0.000197251237910495,
      "loss": 0.5267,
      "step": 260
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.23740021982137896,
      "learning_rate": 0.00019722930614912563,
      "loss": 0.5499,
      "step": 261
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.24020258332985106,
      "learning_rate": 0.00019720728846978598,
      "loss": 0.5604,
      "step": 262
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.23947573439011133,
      "learning_rate": 0.00019718518489193225,
      "loss": 0.5638,
      "step": 263
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.23526187217481284,
      "learning_rate": 0.00019716299543509654,
      "loss": 0.5436,
      "step": 264
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2505831003191156,
      "learning_rate": 0.00019714072011888686,
      "loss": 0.5039,
      "step": 265
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.21592487431784965,
      "learning_rate": 0.00019711835896298713,
      "loss": 0.484,
      "step": 266
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.26528309878122613,
      "learning_rate": 0.00019709591198715707,
      "loss": 0.539,
      "step": 267
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.21635789850987178,
      "learning_rate": 0.00019707337921123221,
      "loss": 0.5553,
      "step": 268
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.23671151623321054,
      "learning_rate": 0.00019705076065512398,
      "loss": 0.4968,
      "step": 269
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.25400871793456326,
      "learning_rate": 0.00019702805633881957,
      "loss": 0.5982,
      "step": 270
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2622810971314154,
      "learning_rate": 0.0001970052662823819,
      "loss": 0.5879,
      "step": 271
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.22931814830456296,
      "learning_rate": 0.00019698239050594977,
      "loss": 0.5611,
      "step": 272
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.213695866193263,
      "learning_rate": 0.0001969594290297376,
      "loss": 0.5386,
      "step": 273
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.2431252328609808,
      "learning_rate": 0.00019693638187403563,
      "loss": 0.6039,
      "step": 274
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.23108667253454973,
      "learning_rate": 0.00019691324905920984,
      "loss": 0.5579,
      "step": 275
    },
    {
      "epoch": 0.16,
      "grad_norm": 0.22718831415064272,
      "learning_rate": 0.0001968900306057018,
      "loss": 0.5196,
      "step": 276
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.23632362967796033,
      "learning_rate": 0.0001968667265340288,
      "loss": 0.5336,
      "step": 277
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2421878973201691,
      "learning_rate": 0.00019684333686478383,
      "loss": 0.5928,
      "step": 278
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.225775487602821,
      "learning_rate": 0.00019681986161863542,
      "loss": 0.552,
      "step": 279
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.23037375759338816,
      "learning_rate": 0.00019679630081632782,
      "loss": 0.4983,
      "step": 280
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.24684136612832333,
      "learning_rate": 0.00019677265447868086,
      "loss": 0.5655,
      "step": 281
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2412756534364674,
      "learning_rate": 0.0001967489226265899,
      "loss": 0.5063,
      "step": 282
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.22005765622474396,
      "learning_rate": 0.00019672510528102597,
      "loss": 0.5188,
      "step": 283
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.25071531725514384,
      "learning_rate": 0.0001967012024630355,
      "loss": 0.5938,
      "step": 284
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.22139592405512468,
      "learning_rate": 0.00019667721419374065,
      "loss": 0.4917,
      "step": 285
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.23067244251762076,
      "learning_rate": 0.00019665314049433888,
      "loss": 0.5584,
      "step": 286
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.23829875535152545,
      "learning_rate": 0.00019662898138610323,
      "loss": 0.5264,
      "step": 287
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2641034663020514,
      "learning_rate": 0.00019660473689038228,
      "loss": 0.5805,
      "step": 288
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.22321690487140503,
      "learning_rate": 0.00019658040702859997,
      "loss": 0.5529,
      "step": 289
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2502632163555589,
      "learning_rate": 0.00019655599182225565,
      "loss": 0.5347,
      "step": 290
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2392608020883604,
      "learning_rate": 0.00019653149129292426,
      "loss": 0.5263,
      "step": 291
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.2539237490519494,
      "learning_rate": 0.00019650690546225592,
      "loss": 0.5156,
      "step": 292
    },
    {
      "epoch": 0.17,
      "grad_norm": 0.21964099103511592,
      "learning_rate": 0.00019648223435197627,
      "loss": 0.5101,
      "step": 293
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.24992985700157416,
      "learning_rate": 0.00019645747798388628,
      "loss": 0.5621,
      "step": 294
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2559439615345381,
      "learning_rate": 0.0001964326363798622,
      "loss": 0.5753,
      "step": 295
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2504368010690795,
      "learning_rate": 0.00019640770956185567,
      "loss": 0.5558,
      "step": 296
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.21022187251218089,
      "learning_rate": 0.0001963826975518936,
      "loss": 0.5322,
      "step": 297
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2422143856532352,
      "learning_rate": 0.00019635760037207817,
      "loss": 0.538,
      "step": 298
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.23174554470584352,
      "learning_rate": 0.00019633241804458687,
      "loss": 0.5545,
      "step": 299
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.267070426953347,
      "learning_rate": 0.00019630715059167238,
      "loss": 0.5632,
      "step": 300
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.22256523603127878,
      "learning_rate": 0.0001962817980356626,
      "loss": 0.545,
      "step": 301
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.24403300497950306,
      "learning_rate": 0.0001962563603989607,
      "loss": 0.5448,
      "step": 302
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2193103621019292,
      "learning_rate": 0.00019623083770404492,
      "loss": 0.5064,
      "step": 303
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.23299992325798072,
      "learning_rate": 0.0001962052299734688,
      "loss": 0.5192,
      "step": 304
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2371054150083945,
      "learning_rate": 0.00019617953722986096,
      "loss": 0.5157,
      "step": 305
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2436064901189273,
      "learning_rate": 0.00019615375949592504,
      "loss": 0.5672,
      "step": 306
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.25098365347678436,
      "learning_rate": 0.00019612789679443997,
      "loss": 0.5548,
      "step": 307
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2319425382974216,
      "learning_rate": 0.00019610194914825962,
      "loss": 0.5293,
      "step": 308
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.24156576209403272,
      "learning_rate": 0.000196075916580313,
      "loss": 0.5672,
      "step": 309
    },
    {
      "epoch": 0.18,
      "grad_norm": 0.2337383575844323,
      "learning_rate": 0.0001960497991136041,
      "loss": 0.5509,
      "step": 310
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.23799692988502053,
      "learning_rate": 0.00019602359677121199,
      "loss": 0.5604,
      "step": 311
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2296728275122706,
      "learning_rate": 0.0001959973095762907,
      "loss": 0.5371,
      "step": 312
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.22381626870035518,
      "learning_rate": 0.00019597093755206936,
      "loss": 0.5465,
      "step": 313
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.23335681761234933,
      "learning_rate": 0.00019594448072185182,
      "loss": 0.5386,
      "step": 314
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.22582265649304345,
      "learning_rate": 0.00019591793910901707,
      "loss": 0.543,
      "step": 315
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2439330072441743,
      "learning_rate": 0.00019589131273701894,
      "loss": 0.5177,
      "step": 316
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2138593422237162,
      "learning_rate": 0.00019586460162938622,
      "loss": 0.5157,
      "step": 317
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.24003613679646058,
      "learning_rate": 0.00019583780580972253,
      "loss": 0.5611,
      "step": 318
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.2552582800734971,
      "learning_rate": 0.00019581092530170633,
      "loss": 0.5922,
      "step": 319
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.21898423827197905,
      "learning_rate": 0.00019578396012909092,
      "loss": 0.5272,
      "step": 320
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.22013525107478477,
      "learning_rate": 0.00019575691031570446,
      "loss": 0.5184,
      "step": 321
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.21113180640163418,
      "learning_rate": 0.00019572977588544986,
      "loss": 0.5134,
      "step": 322
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.22335860079943387,
      "learning_rate": 0.00019570255686230485,
      "loss": 0.5227,
      "step": 323
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.23006684721287293,
      "learning_rate": 0.00019567525327032187,
      "loss": 0.5885,
      "step": 324
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.21933564641390155,
      "learning_rate": 0.0001956478651336281,
      "loss": 0.5598,
      "step": 325
    },
    {
      "epoch": 0.19,
      "grad_norm": 0.21770749652400337,
      "learning_rate": 0.00019562039247642546,
      "loss": 0.5082,
      "step": 326
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.22800344133653658,
      "learning_rate": 0.00019559283532299043,
      "loss": 0.5539,
      "step": 327
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2385574924163353,
      "learning_rate": 0.00019556519369767438,
      "loss": 0.5497,
      "step": 328
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.23099538598172079,
      "learning_rate": 0.0001955374676249031,
      "loss": 0.5138,
      "step": 329
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.21517217725478144,
      "learning_rate": 0.0001955096571291772,
      "loss": 0.5051,
      "step": 330
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.21535771106277588,
      "learning_rate": 0.0001954817622350717,
      "loss": 0.524,
      "step": 331
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.20361747402971658,
      "learning_rate": 0.00019545378296723635,
      "loss": 0.4989,
      "step": 332
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.24644921068325687,
      "learning_rate": 0.0001954257193503954,
      "loss": 0.5927,
      "step": 333
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.24765268362172385,
      "learning_rate": 0.0001953975714093476,
      "loss": 0.5451,
      "step": 334
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.20846277824915477,
      "learning_rate": 0.00019536933916896633,
      "loss": 0.5259,
      "step": 335
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2457371199220107,
      "learning_rate": 0.00019534102265419932,
      "loss": 0.5784,
      "step": 336
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.23029745387228598,
      "learning_rate": 0.00019531262189006882,
      "loss": 0.5918,
      "step": 337
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2387820151516941,
      "learning_rate": 0.0001952841369016716,
      "loss": 0.5576,
      "step": 338
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.226451643448924,
      "learning_rate": 0.00019525556771417875,
      "loss": 0.5241,
      "step": 339
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.22086691724075064,
      "learning_rate": 0.00019522691435283585,
      "loss": 0.5392,
      "step": 340
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2259720671796772,
      "learning_rate": 0.00019519817684296285,
      "loss": 0.516,
      "step": 341
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.2244741513315317,
      "learning_rate": 0.00019516935520995393,
      "loss": 0.569,
      "step": 342
    },
    {
      "epoch": 0.2,
      "grad_norm": 0.23890602213836412,
      "learning_rate": 0.0001951404494792778,
      "loss": 0.524,
      "step": 343
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.22136745892767679,
      "learning_rate": 0.00019511145967647737,
      "loss": 0.5472,
      "step": 344
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.22275740066078306,
      "learning_rate": 0.00019508238582716984,
      "loss": 0.5553,
      "step": 345
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.21225155652808625,
      "learning_rate": 0.00019505322795704676,
      "loss": 0.5302,
      "step": 346
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.22704101844750724,
      "learning_rate": 0.0001950239860918738,
      "loss": 0.5485,
      "step": 347
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2135110250199134,
      "learning_rate": 0.00019499466025749097,
      "loss": 0.5343,
      "step": 348
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.22772242632973722,
      "learning_rate": 0.00019496525047981242,
      "loss": 0.5159,
      "step": 349
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.4444297049160113,
      "learning_rate": 0.00019493575678482649,
      "loss": 0.5121,
      "step": 350
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.226632712040011,
      "learning_rate": 0.0001949061791985957,
      "loss": 0.5304,
      "step": 351
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.22132303156586286,
      "learning_rate": 0.00019487651774725663,
      "loss": 0.4817,
      "step": 352
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.23206581340772667,
      "learning_rate": 0.00019484677245702004,
      "loss": 0.5258,
      "step": 353
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2374903834541946,
      "learning_rate": 0.0001948169433541708,
      "loss": 0.5318,
      "step": 354
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.22896458770920267,
      "learning_rate": 0.00019478703046506773,
      "loss": 0.4806,
      "step": 355
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.21040301706147688,
      "learning_rate": 0.00019475703381614375,
      "loss": 0.5144,
      "step": 356
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.21179618454444762,
      "learning_rate": 0.00019472695343390585,
      "loss": 0.524,
      "step": 357
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.20436614333218908,
      "learning_rate": 0.00019469678934493488,
      "loss": 0.501,
      "step": 358
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.2478256130980173,
      "learning_rate": 0.0001946665415758858,
      "loss": 0.5386,
      "step": 359
    },
    {
      "epoch": 0.21,
      "grad_norm": 0.226116084636948,
      "learning_rate": 0.00019463621015348748,
      "loss": 0.5101,
      "step": 360
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.21838947264457534,
      "learning_rate": 0.00019460579510454263,
      "loss": 0.5296,
      "step": 361
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2152879498375444,
      "learning_rate": 0.00019457529645592792,
      "loss": 0.512,
      "step": 362
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.22514971802642378,
      "learning_rate": 0.00019454471423459389,
      "loss": 0.5593,
      "step": 363
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.23402386101532432,
      "learning_rate": 0.00019451404846756494,
      "loss": 0.555,
      "step": 364
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2244514913016572,
      "learning_rate": 0.00019448329918193927,
      "loss": 0.5689,
      "step": 365
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.22260707231596893,
      "learning_rate": 0.00019445246640488893,
      "loss": 0.6062,
      "step": 366
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.21791090145253736,
      "learning_rate": 0.00019442155016365965,
      "loss": 0.531,
      "step": 367
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.21895889257567258,
      "learning_rate": 0.00019439055048557101,
      "loss": 0.5538,
      "step": 368
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.21306696799872818,
      "learning_rate": 0.00019435946739801633,
      "loss": 0.5673,
      "step": 369
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2294607768810707,
      "learning_rate": 0.00019432830092846253,
      "loss": 0.5855,
      "step": 370
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.22758409665267085,
      "learning_rate": 0.0001942970511044503,
      "loss": 0.5783,
      "step": 371
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.21334100614200935,
      "learning_rate": 0.00019426571795359398,
      "loss": 0.5056,
      "step": 372
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.24187624093875965,
      "learning_rate": 0.0001942343015035815,
      "loss": 0.543,
      "step": 373
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.2275714845035408,
      "learning_rate": 0.00019420280178217443,
      "loss": 0.5329,
      "step": 374
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.23237641477505608,
      "learning_rate": 0.00019417121881720793,
      "loss": 0.5134,
      "step": 375
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.25196886008416386,
      "learning_rate": 0.0001941395526365907,
      "loss": 0.6023,
      "step": 376
    },
    {
      "epoch": 0.22,
      "grad_norm": 0.22418514390796682,
      "learning_rate": 0.00019410780326830498,
      "loss": 0.5529,
      "step": 377
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.21438856736265666,
      "learning_rate": 0.0001940759707404065,
      "loss": 0.5134,
      "step": 378
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2331754234870151,
      "learning_rate": 0.00019404405508102455,
      "loss": 0.5406,
      "step": 379
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.24908239322819828,
      "learning_rate": 0.00019401205631836178,
      "loss": 0.5377,
      "step": 380
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.21332745391417657,
      "learning_rate": 0.00019397997448069435,
      "loss": 0.5025,
      "step": 381
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.20749658696001225,
      "learning_rate": 0.00019394780959637177,
      "loss": 0.5257,
      "step": 382
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2237716482529178,
      "learning_rate": 0.000193915561693817,
      "loss": 0.5,
      "step": 383
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.25234282015654147,
      "learning_rate": 0.00019388323080152633,
      "loss": 0.5753,
      "step": 384
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2252939326339829,
      "learning_rate": 0.00019385081694806936,
      "loss": 0.5662,
      "step": 385
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.21979629294660186,
      "learning_rate": 0.00019381832016208904,
      "loss": 0.5141,
      "step": 386
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.24762535901866153,
      "learning_rate": 0.0001937857404723016,
      "loss": 0.6193,
      "step": 387
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.25032044234085526,
      "learning_rate": 0.00019375307790749647,
      "loss": 0.5024,
      "step": 388
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.22892425302508923,
      "learning_rate": 0.0001937203324965364,
      "loss": 0.5401,
      "step": 389
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.2461599771002527,
      "learning_rate": 0.0001936875042683573,
      "loss": 0.5301,
      "step": 390
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.22363255721865732,
      "learning_rate": 0.00019365459325196825,
      "loss": 0.5538,
      "step": 391
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.22482667580972365,
      "learning_rate": 0.00019362159947645152,
      "loss": 0.4928,
      "step": 392
    },
    {
      "epoch": 0.23,
      "grad_norm": 0.22869596173751142,
      "learning_rate": 0.00019358852297096253,
      "loss": 0.5546,
      "step": 393
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2274546469780496,
      "learning_rate": 0.00019355536376472972,
      "loss": 0.5763,
      "step": 394
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.21284874650406885,
      "learning_rate": 0.0001935221218870547,
      "loss": 0.5778,
      "step": 395
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.23158847478661296,
      "learning_rate": 0.0001934887973673121,
      "loss": 0.5654,
      "step": 396
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.24510006704514478,
      "learning_rate": 0.0001934553902349496,
      "loss": 0.5053,
      "step": 397
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.20330878586204656,
      "learning_rate": 0.00019342190051948777,
      "loss": 0.5171,
      "step": 398
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2131804710318274,
      "learning_rate": 0.0001933883282505203,
      "loss": 0.5286,
      "step": 399
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.23297933515492006,
      "learning_rate": 0.00019335467345771377,
      "loss": 0.5593,
      "step": 400
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.24611434220143105,
      "learning_rate": 0.0001933209361708077,
      "loss": 0.604,
      "step": 401
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.23281321736249425,
      "learning_rate": 0.00019328711641961445,
      "loss": 0.5579,
      "step": 402
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.21399822113981087,
      "learning_rate": 0.00019325321423401933,
      "loss": 0.5661,
      "step": 403
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.22113107520723113,
      "learning_rate": 0.00019321922964398046,
      "loss": 0.5789,
      "step": 404
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.23262880002546846,
      "learning_rate": 0.00019318516267952874,
      "loss": 0.5447,
      "step": 405
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.24962941770082592,
      "learning_rate": 0.00019315101337076792,
      "loss": 0.5512,
      "step": 406
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.22210049422713798,
      "learning_rate": 0.0001931167817478745,
      "loss": 0.5427,
      "step": 407
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.22647809883332484,
      "learning_rate": 0.0001930824678410977,
      "loss": 0.4888,
      "step": 408
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.23660763255678552,
      "learning_rate": 0.00019304807168075944,
      "loss": 0.5755,
      "step": 409
    },
    {
      "epoch": 0.24,
      "grad_norm": 0.2354103448271752,
      "learning_rate": 0.00019301359329725436,
      "loss": 0.5265,
      "step": 410
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.24322261128085423,
      "learning_rate": 0.00019297903272104977,
      "loss": 0.5291,
      "step": 411
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.20525199182278092,
      "learning_rate": 0.00019294438998268554,
      "loss": 0.4996,
      "step": 412
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.24678535182755174,
      "learning_rate": 0.00019290966511277422,
      "loss": 0.567,
      "step": 413
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.22165331172413838,
      "learning_rate": 0.00019287485814200087,
      "loss": 0.5348,
      "step": 414
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.24541020782476444,
      "learning_rate": 0.00019283996910112318,
      "loss": 0.5432,
      "step": 415
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2255959168063083,
      "learning_rate": 0.00019280499802097126,
      "loss": 0.5891,
      "step": 416
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.21159018099714821,
      "learning_rate": 0.0001927699449324478,
      "loss": 0.5003,
      "step": 417
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.21379995902020923,
      "learning_rate": 0.00019273480986652794,
      "loss": 0.5314,
      "step": 418
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2853169518220406,
      "learning_rate": 0.0001926995928542592,
      "loss": 0.6108,
      "step": 419
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.22738285867292138,
      "learning_rate": 0.00019266429392676164,
      "loss": 0.5217,
      "step": 420
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.23835369502554374,
      "learning_rate": 0.00019262891311522755,
      "loss": 0.5318,
      "step": 421
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.20671557324330114,
      "learning_rate": 0.0001925934504509217,
      "loss": 0.5234,
      "step": 422
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.205212164360302,
      "learning_rate": 0.00019255790596518112,
      "loss": 0.5023,
      "step": 423
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.21664090577036152,
      "learning_rate": 0.00019252227968941522,
      "loss": 0.5452,
      "step": 424
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.22146041084684798,
      "learning_rate": 0.00019248657165510556,
      "loss": 0.5474,
      "step": 425
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2338997589574809,
      "learning_rate": 0.00019245078189380604,
      "loss": 0.5516,
      "step": 426
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.2313978280927526,
      "learning_rate": 0.0001924149104371428,
      "loss": 0.5831,
      "step": 427
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2098577112814155,
      "learning_rate": 0.00019237895731681408,
      "loss": 0.5452,
      "step": 428
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.26497439164374026,
      "learning_rate": 0.0001923429225645904,
      "loss": 0.5666,
      "step": 429
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.21859970576834997,
      "learning_rate": 0.00019230680621231425,
      "loss": 0.5069,
      "step": 430
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.20509380886351694,
      "learning_rate": 0.0001922706082919004,
      "loss": 0.4573,
      "step": 431
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2182328366507935,
      "learning_rate": 0.0001922343288353356,
      "loss": 0.6133,
      "step": 432
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.2822350271273954,
      "learning_rate": 0.00019219796787467867,
      "loss": 0.5709,
      "step": 433
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.24487543268473794,
      "learning_rate": 0.00019216152544206049,
      "loss": 0.546,
      "step": 434
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.24221176090281485,
      "learning_rate": 0.00019212500156968383,
      "loss": 0.5507,
      "step": 435
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.22053929296251015,
      "learning_rate": 0.00019208839628982358,
      "loss": 0.5473,
      "step": 436
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.22975415570737245,
      "learning_rate": 0.00019205170963482643,
      "loss": 0.5181,
      "step": 437
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.22969105575505203,
      "learning_rate": 0.00019201494163711104,
      "loss": 0.5463,
      "step": 438
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.23764087103158363,
      "learning_rate": 0.00019197809232916795,
      "loss": 0.55,
      "step": 439
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.21997498488474826,
      "learning_rate": 0.00019194116174355954,
      "loss": 0.5421,
      "step": 440
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.22225824990596896,
      "learning_rate": 0.00019190414991291998,
      "loss": 0.5439,
      "step": 441
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.243391488050543,
      "learning_rate": 0.00019186705686995533,
      "loss": 0.6289,
      "step": 442
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.222494273038652,
      "learning_rate": 0.0001918298826474433,
      "loss": 0.5088,
      "step": 443
    },
    {
      "epoch": 0.26,
      "grad_norm": 0.22114450997419682,
      "learning_rate": 0.0001917926272782334,
      "loss": 0.5624,
      "step": 444
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.21964760504534894,
      "learning_rate": 0.00019175529079524687,
      "loss": 0.5289,
      "step": 445
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.3042847973140014,
      "learning_rate": 0.00019171787323147654,
      "loss": 0.5328,
      "step": 446
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.22425571202210934,
      "learning_rate": 0.00019168037461998695,
      "loss": 0.5699,
      "step": 447
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.23406959191320909,
      "learning_rate": 0.00019164279499391427,
      "loss": 0.5147,
      "step": 448
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.3604500123158513,
      "learning_rate": 0.00019160513438646617,
      "loss": 0.5697,
      "step": 449
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2501436029131694,
      "learning_rate": 0.00019156739283092205,
      "loss": 0.6015,
      "step": 450
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.21928141490521824,
      "learning_rate": 0.00019152957036063265,
      "loss": 0.5111,
      "step": 451
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.257908225365161,
      "learning_rate": 0.00019149166700902032,
      "loss": 0.5132,
      "step": 452
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2713678867101362,
      "learning_rate": 0.0001914536828095789,
      "loss": 0.5995,
      "step": 453
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2398794022246256,
      "learning_rate": 0.0001914156177958736,
      "loss": 0.4993,
      "step": 454
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2373981477389832,
      "learning_rate": 0.0001913774720015411,
      "loss": 0.5064,
      "step": 455
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2188011093608266,
      "learning_rate": 0.00019133924546028942,
      "loss": 0.5606,
      "step": 456
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.24077263566935142,
      "learning_rate": 0.00019130093820589791,
      "loss": 0.5606,
      "step": 457
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.23519919814487683,
      "learning_rate": 0.00019126255027221735,
      "loss": 0.5307,
      "step": 458
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.21480730775028578,
      "learning_rate": 0.00019122408169316976,
      "loss": 0.526,
      "step": 459
    },
    {
      "epoch": 0.27,
      "grad_norm": 0.2161668548042441,
      "learning_rate": 0.00019118553250274832,
      "loss": 0.5657,
      "step": 460
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.22318400428439122,
      "learning_rate": 0.00019114690273501765,
      "loss": 0.513,
      "step": 461
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.22252447744680176,
      "learning_rate": 0.00019110819242411337,
      "loss": 0.5247,
      "step": 462
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.21358818358042153,
      "learning_rate": 0.00019106940160424244,
      "loss": 0.556,
      "step": 463
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2121229259271081,
      "learning_rate": 0.0001910305303096828,
      "loss": 0.5138,
      "step": 464
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.22636146624511622,
      "learning_rate": 0.0001909915785747836,
      "loss": 0.5111,
      "step": 465
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.20571954917028099,
      "learning_rate": 0.00019095254643396512,
      "loss": 0.5125,
      "step": 466
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.21968966730793454,
      "learning_rate": 0.0001909134339217186,
      "loss": 0.5358,
      "step": 467
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.21910723327372644,
      "learning_rate": 0.00019087424107260627,
      "loss": 0.5382,
      "step": 468
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2153841373499183,
      "learning_rate": 0.00019083496792126153,
      "loss": 0.5375,
      "step": 469
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.23479205084160673,
      "learning_rate": 0.00019079561450238854,
      "loss": 0.5984,
      "step": 470
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.21595571362737268,
      "learning_rate": 0.00019075618085076247,
      "loss": 0.5417,
      "step": 471
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.24550770571804625,
      "learning_rate": 0.00019071666700122946,
      "loss": 0.5306,
      "step": 472
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.21802243564456578,
      "learning_rate": 0.00019067707298870638,
      "loss": 0.5157,
      "step": 473
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2068796190094572,
      "learning_rate": 0.00019063739884818103,
      "loss": 0.5254,
      "step": 474
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.24034732867281272,
      "learning_rate": 0.000190597644614712,
      "loss": 0.6204,
      "step": 475
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2260836607650634,
      "learning_rate": 0.00019055781032342864,
      "loss": 0.5492,
      "step": 476
    },
    {
      "epoch": 0.28,
      "grad_norm": 0.2476351525598878,
      "learning_rate": 0.00019051789600953102,
      "loss": 0.5157,
      "step": 477
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2280151093681579,
      "learning_rate": 0.00019047790170829003,
      "loss": 0.4984,
      "step": 478
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2217333524292061,
      "learning_rate": 0.00019043782745504711,
      "loss": 0.5149,
      "step": 479
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2356369467654302,
      "learning_rate": 0.00019039767328521442,
      "loss": 0.5724,
      "step": 480
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.21541809863677616,
      "learning_rate": 0.0001903574392342747,
      "loss": 0.5138,
      "step": 481
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.21722431891543054,
      "learning_rate": 0.00019031712533778137,
      "loss": 0.5536,
      "step": 482
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2370708268417489,
      "learning_rate": 0.00019027673163135827,
      "loss": 0.5038,
      "step": 483
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.22809310323516838,
      "learning_rate": 0.00019023625815069989,
      "loss": 0.5713,
      "step": 484
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.22374988575329294,
      "learning_rate": 0.00019019570493157114,
      "loss": 0.5549,
      "step": 485
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.20510711707245072,
      "learning_rate": 0.0001901550720098074,
      "loss": 0.46,
      "step": 486
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2621551195786783,
      "learning_rate": 0.00019011435942131448,
      "loss": 0.5546,
      "step": 487
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.20503054358781417,
      "learning_rate": 0.00019007356720206865,
      "loss": 0.5547,
      "step": 488
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.23586140447856616,
      "learning_rate": 0.00019003269538811647,
      "loss": 0.6075,
      "step": 489
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2828040872125889,
      "learning_rate": 0.00018999174401557488,
      "loss": 0.602,
      "step": 490
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2023429982220119,
      "learning_rate": 0.00018995071312063105,
      "loss": 0.4975,
      "step": 491
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.2054777673202953,
      "learning_rate": 0.00018990960273954254,
      "loss": 0.5295,
      "step": 492
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.1982185225446849,
      "learning_rate": 0.00018986841290863704,
      "loss": 0.5461,
      "step": 493
    },
    {
      "epoch": 0.29,
      "grad_norm": 0.23248022218099268,
      "learning_rate": 0.0001898271436643125,
      "loss": 0.5924,
      "step": 494
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2235279893303581,
      "learning_rate": 0.00018978579504303706,
      "loss": 0.5598,
      "step": 495
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.21675084465821123,
      "learning_rate": 0.000189744367081349,
      "loss": 0.5012,
      "step": 496
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2041881848681654,
      "learning_rate": 0.00018970285981585662,
      "loss": 0.526,
      "step": 497
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.23258761727278376,
      "learning_rate": 0.00018966127328323842,
      "loss": 0.553,
      "step": 498
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.23066266735191,
      "learning_rate": 0.00018961960752024288,
      "loss": 0.5506,
      "step": 499
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.20634958879584178,
      "learning_rate": 0.0001895778625636885,
      "loss": 0.5006,
      "step": 500
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.21082421656186934,
      "learning_rate": 0.00018953603845046378,
      "loss": 0.5279,
      "step": 501
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2057560041730304,
      "learning_rate": 0.00018949413521752713,
      "loss": 0.5598,
      "step": 502
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2096114347066206,
      "learning_rate": 0.00018945215290190693,
      "loss": 0.5113,
      "step": 503
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.23218477255443984,
      "learning_rate": 0.00018941009154070136,
      "loss": 0.5169,
      "step": 504
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.20857717653678057,
      "learning_rate": 0.00018936795117107855,
      "loss": 0.5149,
      "step": 505
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.24006448825761761,
      "learning_rate": 0.0001893257318302764,
      "loss": 0.5228,
      "step": 506
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2146671098435255,
      "learning_rate": 0.00018928343355560258,
      "loss": 0.5257,
      "step": 507
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.20608859556559073,
      "learning_rate": 0.00018924105638443452,
      "loss": 0.527,
      "step": 508
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2336814919363686,
      "learning_rate": 0.0001891986003542194,
      "loss": 0.5461,
      "step": 509
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.2409130946928026,
      "learning_rate": 0.00018915606550247397,
      "loss": 0.5493,
      "step": 510
    },
    {
      "epoch": 0.3,
      "grad_norm": 0.21371348825911873,
      "learning_rate": 0.0001891134518667848,
      "loss": 0.572,
      "step": 511
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2014364828041311,
      "learning_rate": 0.000189070759484808,
      "loss": 0.5109,
      "step": 512
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2290945612613713,
      "learning_rate": 0.0001890279883942692,
      "loss": 0.5493,
      "step": 513
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.22127732127756986,
      "learning_rate": 0.0001889851386329637,
      "loss": 0.5387,
      "step": 514
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.20564079598559082,
      "learning_rate": 0.00018894221023875622,
      "loss": 0.5192,
      "step": 515
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.213993086214796,
      "learning_rate": 0.00018889920324958106,
      "loss": 0.5044,
      "step": 516
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.21506249939577854,
      "learning_rate": 0.00018885611770344185,
      "loss": 0.4969,
      "step": 517
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.22792164808811663,
      "learning_rate": 0.00018881295363841174,
      "loss": 0.5564,
      "step": 518
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.1978731923118128,
      "learning_rate": 0.00018876971109263324,
      "loss": 0.4898,
      "step": 519
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.22394451521984352,
      "learning_rate": 0.00018872639010431822,
      "loss": 0.5586,
      "step": 520
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.20009625678598073,
      "learning_rate": 0.0001886829907117478,
      "loss": 0.5399,
      "step": 521
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.20448355507434923,
      "learning_rate": 0.00018863951295327244,
      "loss": 0.5263,
      "step": 522
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.1967777231547204,
      "learning_rate": 0.00018859595686731187,
      "loss": 0.4904,
      "step": 523
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2052388343929957,
      "learning_rate": 0.00018855232249235498,
      "loss": 0.4951,
      "step": 524
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.1970956590240829,
      "learning_rate": 0.00018850860986695985,
      "loss": 0.5112,
      "step": 525
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.2102143499682878,
      "learning_rate": 0.00018846481902975377,
      "loss": 0.5234,
      "step": 526
    },
    {
      "epoch": 0.31,
      "grad_norm": 0.23384214794287286,
      "learning_rate": 0.00018842095001943306,
      "loss": 0.5387,
      "step": 527
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.20133953340775343,
      "learning_rate": 0.00018837700287476316,
      "loss": 0.4995,
      "step": 528
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2238467486071384,
      "learning_rate": 0.00018833297763457858,
      "loss": 0.5709,
      "step": 529
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.26170161234282546,
      "learning_rate": 0.00018828887433778278,
      "loss": 0.6314,
      "step": 530
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2317819906199683,
      "learning_rate": 0.00018824469302334822,
      "loss": 0.5333,
      "step": 531
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.21538390925414544,
      "learning_rate": 0.0001882004337303163,
      "loss": 0.5603,
      "step": 532
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.23053571801246284,
      "learning_rate": 0.0001881560964977974,
      "loss": 0.593,
      "step": 533
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.21173642276584706,
      "learning_rate": 0.0001881116813649706,
      "loss": 0.5539,
      "step": 534
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.24587290888576793,
      "learning_rate": 0.00018806718837108402,
      "loss": 0.5408,
      "step": 535
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.22324082101473863,
      "learning_rate": 0.00018802261755545443,
      "loss": 0.5857,
      "step": 536
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.21827653101692504,
      "learning_rate": 0.0001879779689574674,
      "loss": 0.5451,
      "step": 537
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2146222856243753,
      "learning_rate": 0.00018793324261657737,
      "loss": 0.5007,
      "step": 538
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.20994383183759666,
      "learning_rate": 0.00018788843857230726,
      "loss": 0.5039,
      "step": 539
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.23384168426304514,
      "learning_rate": 0.00018784355686424876,
      "loss": 0.5329,
      "step": 540
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.20284382518697272,
      "learning_rate": 0.00018779859753206225,
      "loss": 0.5383,
      "step": 541
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.22307014132513725,
      "learning_rate": 0.00018775356061547662,
      "loss": 0.5766,
      "step": 542
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.21675879523474215,
      "learning_rate": 0.00018770844615428932,
      "loss": 0.4994,
      "step": 543
    },
    {
      "epoch": 0.32,
      "grad_norm": 0.2200785983728407,
      "learning_rate": 0.00018766325418836637,
      "loss": 0.5615,
      "step": 544
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.20895654400479502,
      "learning_rate": 0.00018761798475764224,
      "loss": 0.4993,
      "step": 545
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.22152937631276676,
      "learning_rate": 0.00018757263790211988,
      "loss": 0.5275,
      "step": 546
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.209333487906431,
      "learning_rate": 0.0001875272136618706,
      "loss": 0.4911,
      "step": 547
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2123519912763275,
      "learning_rate": 0.00018748171207703417,
      "loss": 0.5662,
      "step": 548
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2147346642469028,
      "learning_rate": 0.00018743613318781868,
      "loss": 0.5651,
      "step": 549
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2017789732342509,
      "learning_rate": 0.00018739047703450048,
      "loss": 0.5573,
      "step": 550
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2084087089737107,
      "learning_rate": 0.00018734474365742428,
      "loss": 0.562,
      "step": 551
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.22130968599178,
      "learning_rate": 0.00018729893309700295,
      "loss": 0.5729,
      "step": 552
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.22736172090948445,
      "learning_rate": 0.0001872530453937176,
      "loss": 0.5548,
      "step": 553
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.21738577850339916,
      "learning_rate": 0.0001872070805881176,
      "loss": 0.5191,
      "step": 554
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.20994273135857797,
      "learning_rate": 0.00018716103872082026,
      "loss": 0.5153,
      "step": 555
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.25944295362906805,
      "learning_rate": 0.00018711491983251113,
      "loss": 0.5471,
      "step": 556
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.2138519097360962,
      "learning_rate": 0.00018706872396394376,
      "loss": 0.4875,
      "step": 557
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.23586915663527888,
      "learning_rate": 0.00018702245115593974,
      "loss": 0.5224,
      "step": 558
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.20477148046499385,
      "learning_rate": 0.0001869761014493887,
      "loss": 0.5466,
      "step": 559
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.21783175505387284,
      "learning_rate": 0.00018692967488524812,
      "loss": 0.5557,
      "step": 560
    },
    {
      "epoch": 0.33,
      "grad_norm": 0.20442177589984145,
      "learning_rate": 0.0001868831715045435,
      "loss": 0.507,
      "step": 561
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.21291324212369495,
      "learning_rate": 0.00018683659134836813,
      "loss": 0.5779,
      "step": 562
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.22670486875141618,
      "learning_rate": 0.00018678993445788323,
      "loss": 0.5831,
      "step": 563
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2431493116309222,
      "learning_rate": 0.00018674320087431768,
      "loss": 0.5389,
      "step": 564
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.22102091260855142,
      "learning_rate": 0.00018669639063896836,
      "loss": 0.5569,
      "step": 565
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.20001951850669827,
      "learning_rate": 0.0001866495037931997,
      "loss": 0.486,
      "step": 566
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.22781103196427857,
      "learning_rate": 0.00018660254037844388,
      "loss": 0.4973,
      "step": 567
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.21129685691062433,
      "learning_rate": 0.00018655550043620073,
      "loss": 0.5459,
      "step": 568
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.20363805081315986,
      "learning_rate": 0.0001865083840080378,
      "loss": 0.4997,
      "step": 569
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.22269838654252982,
      "learning_rate": 0.00018646119113559006,
      "loss": 0.5406,
      "step": 570
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.20307002281681275,
      "learning_rate": 0.00018641392186056016,
      "loss": 0.4861,
      "step": 571
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.20146261628709675,
      "learning_rate": 0.0001863665762247182,
      "loss": 0.561,
      "step": 572
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.21049257054009352,
      "learning_rate": 0.00018631915426990184,
      "loss": 0.5257,
      "step": 573
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2245482792823418,
      "learning_rate": 0.00018627165603801605,
      "loss": 0.5441,
      "step": 574
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2106578436256788,
      "learning_rate": 0.0001862240815710333,
      "loss": 0.5125,
      "step": 575
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.2091435884054145,
      "learning_rate": 0.0001861764309109934,
      "loss": 0.523,
      "step": 576
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.21256854318600532,
      "learning_rate": 0.00018612870410000354,
      "loss": 0.4851,
      "step": 577
    },
    {
      "epoch": 0.34,
      "grad_norm": 0.24387962798982954,
      "learning_rate": 0.00018608090118023808,
      "loss": 0.5423,
      "step": 578
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2357478920855788,
      "learning_rate": 0.00018603302219393874,
      "loss": 0.5386,
      "step": 579
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.21267780857117077,
      "learning_rate": 0.0001859850671834144,
      "loss": 0.5545,
      "step": 580
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.25049614581715324,
      "learning_rate": 0.0001859370361910412,
      "loss": 0.5241,
      "step": 581
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.1937807494598699,
      "learning_rate": 0.00018588892925926228,
      "loss": 0.5533,
      "step": 582
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.21209972240968475,
      "learning_rate": 0.00018584074643058807,
      "loss": 0.538,
      "step": 583
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.22281277082523665,
      "learning_rate": 0.00018579248774759586,
      "loss": 0.5456,
      "step": 584
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.22156542955128883,
      "learning_rate": 0.00018574415325293018,
      "loss": 0.5622,
      "step": 585
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.20068342929250654,
      "learning_rate": 0.00018569574298930237,
      "loss": 0.5372,
      "step": 586
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.21693418845369525,
      "learning_rate": 0.00018564725699949083,
      "loss": 0.4874,
      "step": 587
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2060622909003744,
      "learning_rate": 0.0001855986953263409,
      "loss": 0.5331,
      "step": 588
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.20007419545283933,
      "learning_rate": 0.00018555005801276463,
      "loss": 0.5131,
      "step": 589
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.21905328017125653,
      "learning_rate": 0.00018550134510174115,
      "loss": 0.5572,
      "step": 590
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.21213287568506015,
      "learning_rate": 0.0001854525566363162,
      "loss": 0.5359,
      "step": 591
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.20066093050756748,
      "learning_rate": 0.00018540369265960242,
      "loss": 0.5334,
      "step": 592
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2068811720002483,
      "learning_rate": 0.00018535475321477906,
      "loss": 0.5558,
      "step": 593
    },
    {
      "epoch": 0.35,
      "grad_norm": 0.2025287668887073,
      "learning_rate": 0.00018530573834509215,
      "loss": 0.5098,
      "step": 594
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.20807380346718593,
      "learning_rate": 0.0001852566480938543,
      "loss": 0.5211,
      "step": 595
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2049943719782544,
      "learning_rate": 0.00018520748250444474,
      "loss": 0.5379,
      "step": 596
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.8558508208219735,
      "learning_rate": 0.00018515824162030934,
      "loss": 0.5403,
      "step": 597
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.25414317775682305,
      "learning_rate": 0.00018510892548496047,
      "loss": 0.5804,
      "step": 598
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.20806597400748386,
      "learning_rate": 0.00018505953414197696,
      "loss": 0.5419,
      "step": 599
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.1950528976937739,
      "learning_rate": 0.00018501006763500414,
      "loss": 0.4956,
      "step": 600
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.20652545713558523,
      "learning_rate": 0.00018496052600775376,
      "loss": 0.4942,
      "step": 601
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.20955886781649663,
      "learning_rate": 0.0001849109093040039,
      "loss": 0.5177,
      "step": 602
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.21093362015684414,
      "learning_rate": 0.00018486121756759906,
      "loss": 0.5672,
      "step": 603
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.22033088091533184,
      "learning_rate": 0.00018481145084245002,
      "loss": 0.5691,
      "step": 604
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.20322111965044637,
      "learning_rate": 0.00018476160917253373,
      "loss": 0.5425,
      "step": 605
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2028788101278272,
      "learning_rate": 0.0001847116926018935,
      "loss": 0.5176,
      "step": 606
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.19551140156538951,
      "learning_rate": 0.0001846617011746388,
      "loss": 0.5115,
      "step": 607
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.21944694996534547,
      "learning_rate": 0.00018461163493494517,
      "loss": 0.5496,
      "step": 608
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.21506814147924705,
      "learning_rate": 0.0001845614939270543,
      "loss": 0.5823,
      "step": 609
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.2220938137588105,
      "learning_rate": 0.00018451127819527402,
      "loss": 0.5731,
      "step": 610
    },
    {
      "epoch": 0.36,
      "grad_norm": 0.21590208362786933,
      "learning_rate": 0.00018446098778397807,
      "loss": 0.6063,
      "step": 611
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.20084594317065918,
      "learning_rate": 0.00018441062273760628,
      "loss": 0.5286,
      "step": 612
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.21847304705653886,
      "learning_rate": 0.00018436018310066435,
      "loss": 0.5721,
      "step": 613
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2467936487351411,
      "learning_rate": 0.000184309668917724,
      "loss": 0.571,
      "step": 614
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.21666156526926003,
      "learning_rate": 0.0001842590802334227,
      "loss": 0.5244,
      "step": 615
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.21336859433357677,
      "learning_rate": 0.00018420841709246383,
      "loss": 0.5724,
      "step": 616
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.1933070755110986,
      "learning_rate": 0.0001841576795396166,
      "loss": 0.5347,
      "step": 617
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2332186369470874,
      "learning_rate": 0.00018410686761971586,
      "loss": 0.5474,
      "step": 618
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.1996293438855639,
      "learning_rate": 0.00018405598137766224,
      "loss": 0.5421,
      "step": 619
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2012759807756364,
      "learning_rate": 0.00018400502085842208,
      "loss": 0.519,
      "step": 620
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.24355300568180752,
      "learning_rate": 0.00018395398610702733,
      "loss": 0.597,
      "step": 621
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2136711983483761,
      "learning_rate": 0.00018390287716857546,
      "loss": 0.5398,
      "step": 622
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.22275088525970024,
      "learning_rate": 0.00018385169408822964,
      "loss": 0.5597,
      "step": 623
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.20011931485707388,
      "learning_rate": 0.0001838004369112184,
      "loss": 0.4901,
      "step": 624
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.19544716159187206,
      "learning_rate": 0.00018374910568283594,
      "loss": 0.4726,
      "step": 625
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2176067620544374,
      "learning_rate": 0.00018369770044844168,
      "loss": 0.5369,
      "step": 626
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.2005629047810257,
      "learning_rate": 0.00018364622125346055,
      "loss": 0.4914,
      "step": 627
    },
    {
      "epoch": 0.37,
      "grad_norm": 0.21497281608823432,
      "learning_rate": 0.0001835946681433829,
      "loss": 0.5559,
      "step": 628
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.20354723273049724,
      "learning_rate": 0.00018354304116376425,
      "loss": 0.5083,
      "step": 629
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.23536026550959782,
      "learning_rate": 0.0001834913403602255,
      "loss": 0.5449,
      "step": 630
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.20887211237530257,
      "learning_rate": 0.00018343956577845276,
      "loss": 0.5131,
      "step": 631
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.21728763678777088,
      "learning_rate": 0.00018338771746419726,
      "loss": 0.5484,
      "step": 632
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.21910570476522437,
      "learning_rate": 0.00018333579546327556,
      "loss": 0.5452,
      "step": 633
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.21247350127543838,
      "learning_rate": 0.00018328379982156915,
      "loss": 0.5232,
      "step": 634
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.21706686115456897,
      "learning_rate": 0.00018323173058502472,
      "loss": 0.5353,
      "step": 635
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.19529494853666482,
      "learning_rate": 0.00018317958779965387,
      "loss": 0.4611,
      "step": 636
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.2194890381897013,
      "learning_rate": 0.00018312737151153334,
      "loss": 0.4884,
      "step": 637
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.24336065627870296,
      "learning_rate": 0.00018307508176680472,
      "loss": 0.5708,
      "step": 638
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.22638828434923797,
      "learning_rate": 0.00018302271861167456,
      "loss": 0.5795,
      "step": 639
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.20501380703607638,
      "learning_rate": 0.0001829702820924142,
      "loss": 0.5645,
      "step": 640
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.22705979847255006,
      "learning_rate": 0.00018291777225535994,
      "loss": 0.4974,
      "step": 641
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.22629645320684777,
      "learning_rate": 0.00018286518914691272,
      "loss": 0.5587,
      "step": 642
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.21772563640763765,
      "learning_rate": 0.00018281253281353838,
      "loss": 0.5219,
      "step": 643
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.20447194133414195,
      "learning_rate": 0.00018275980330176737,
      "loss": 0.5425,
      "step": 644
    },
    {
      "epoch": 0.38,
      "grad_norm": 0.24126870503035064,
      "learning_rate": 0.00018270700065819477,
      "loss": 0.5119,
      "step": 645
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.23269297218381896,
      "learning_rate": 0.00018265412492948042,
      "loss": 0.5507,
      "step": 646
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.23416570398912578,
      "learning_rate": 0.0001826011761623486,
      "loss": 0.5947,
      "step": 647
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2186560086983282,
      "learning_rate": 0.0001825481544035882,
      "loss": 0.5204,
      "step": 648
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.20624707271501935,
      "learning_rate": 0.00018249505970005262,
      "loss": 0.4785,
      "step": 649
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.23418189558532218,
      "learning_rate": 0.00018244189209865974,
      "loss": 0.4976,
      "step": 650
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.21372290734059424,
      "learning_rate": 0.00018238865164639173,
      "loss": 0.5237,
      "step": 651
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.1986689651795865,
      "learning_rate": 0.0001823353383902953,
      "loss": 0.5354,
      "step": 652
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.21154599437074698,
      "learning_rate": 0.0001822819523774814,
      "loss": 0.5292,
      "step": 653
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.21348268586605149,
      "learning_rate": 0.00018222849365512523,
      "loss": 0.5249,
      "step": 654
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.22296243039072478,
      "learning_rate": 0.0001821749622704664,
      "loss": 0.5458,
      "step": 655
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.22596567506529938,
      "learning_rate": 0.00018212135827080857,
      "loss": 0.5085,
      "step": 656
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.19012132806019622,
      "learning_rate": 0.00018206768170351962,
      "loss": 0.4977,
      "step": 657
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2125366600531234,
      "learning_rate": 0.0001820139326160316,
      "loss": 0.5051,
      "step": 658
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.23677835317412968,
      "learning_rate": 0.00018196011105584058,
      "loss": 0.575,
      "step": 659
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2262210065848097,
      "learning_rate": 0.00018190621707050671,
      "loss": 0.5744,
      "step": 660
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.21618545867420894,
      "learning_rate": 0.0001818522507076541,
      "loss": 0.5715,
      "step": 661
    },
    {
      "epoch": 0.39,
      "grad_norm": 0.2050215711297079,
      "learning_rate": 0.00018179821201497092,
      "loss": 0.5201,
      "step": 662
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20218467055707082,
      "learning_rate": 0.0001817441010402091,
      "loss": 0.5058,
      "step": 663
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20940987275867923,
      "learning_rate": 0.00018168991783118452,
      "loss": 0.5095,
      "step": 664
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.21341822518403558,
      "learning_rate": 0.00018163566243577697,
      "loss": 0.5599,
      "step": 665
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20028205017927186,
      "learning_rate": 0.0001815813349019299,
      "loss": 0.5318,
      "step": 666
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20184912350066175,
      "learning_rate": 0.00018152693527765057,
      "loss": 0.5643,
      "step": 667
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20882160405967118,
      "learning_rate": 0.0001814724636110099,
      "loss": 0.542,
      "step": 668
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20252144356881077,
      "learning_rate": 0.00018141791995014255,
      "loss": 0.4496,
      "step": 669
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.1956328371434174,
      "learning_rate": 0.00018136330434324674,
      "loss": 0.56,
      "step": 670
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20691128111503362,
      "learning_rate": 0.00018130861683858426,
      "loss": 0.5726,
      "step": 671
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2258004454621585,
      "learning_rate": 0.00018125385748448048,
      "loss": 0.583,
      "step": 672
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.22330059205477634,
      "learning_rate": 0.00018119902632932416,
      "loss": 0.5288,
      "step": 673
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.20473079466150892,
      "learning_rate": 0.0001811441234215677,
      "loss": 0.5085,
      "step": 674
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.19439333859223318,
      "learning_rate": 0.0001810891488097267,
      "loss": 0.5147,
      "step": 675
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.2037181989313857,
      "learning_rate": 0.00018103410254238021,
      "loss": 0.5228,
      "step": 676
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.21580635559566858,
      "learning_rate": 0.0001809789846681706,
      "loss": 0.5034,
      "step": 677
    },
    {
      "epoch": 0.4,
      "grad_norm": 0.21490060304667385,
      "learning_rate": 0.00018092379523580357,
      "loss": 0.5347,
      "step": 678
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.20927738857723482,
      "learning_rate": 0.00018086853429404793,
      "loss": 0.5039,
      "step": 679
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.21391199422702836,
      "learning_rate": 0.00018081320189173577,
      "loss": 0.5404,
      "step": 680
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.22355130583819918,
      "learning_rate": 0.0001807577980777623,
      "loss": 0.5147,
      "step": 681
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.21899190720848985,
      "learning_rate": 0.00018070232290108584,
      "loss": 0.5195,
      "step": 682
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.20636096645560792,
      "learning_rate": 0.00018064677641072775,
      "loss": 0.5158,
      "step": 683
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.20462410706105155,
      "learning_rate": 0.00018059115865577249,
      "loss": 0.5194,
      "step": 684
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.21978634315593423,
      "learning_rate": 0.00018053546968536735,
      "loss": 0.4986,
      "step": 685
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.2203882917140438,
      "learning_rate": 0.00018047970954872264,
      "loss": 0.5855,
      "step": 686
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.20144829000454462,
      "learning_rate": 0.0001804238782951116,
      "loss": 0.5212,
      "step": 687
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.21142991796239274,
      "learning_rate": 0.00018036797597387023,
      "loss": 0.495,
      "step": 688
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.21275432668758548,
      "learning_rate": 0.00018031200263439736,
      "loss": 0.5694,
      "step": 689
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.2035189446424034,
      "learning_rate": 0.00018025595832615459,
      "loss": 0.55,
      "step": 690
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.20030837247360464,
      "learning_rate": 0.00018019984309866619,
      "loss": 0.4748,
      "step": 691
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.20366715425572,
      "learning_rate": 0.00018014365700151912,
      "loss": 0.5792,
      "step": 692
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.2082468197583491,
      "learning_rate": 0.000180087400084363,
      "loss": 0.4973,
      "step": 693
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.21820027454676755,
      "learning_rate": 0.00018003107239691004,
      "loss": 0.5512,
      "step": 694
    },
    {
      "epoch": 0.41,
      "grad_norm": 0.2085678250499903,
      "learning_rate": 0.00017997467398893488,
      "loss": 0.5148,
      "step": 695
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.20422653056781329,
      "learning_rate": 0.00017991820491027472,
      "loss": 0.4968,
      "step": 696
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.1875899162050169,
      "learning_rate": 0.0001798616652108293,
      "loss": 0.5061,
      "step": 697
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.20869663705218836,
      "learning_rate": 0.00017980505494056062,
      "loss": 0.5182,
      "step": 698
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.19250179476147736,
      "learning_rate": 0.00017974837414949307,
      "loss": 0.5184,
      "step": 699
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.21732108838463451,
      "learning_rate": 0.00017969162288771347,
      "loss": 0.5524,
      "step": 700
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.20200315361578813,
      "learning_rate": 0.0001796348012053707,
      "loss": 0.5386,
      "step": 701
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.20242537832049035,
      "learning_rate": 0.00017957790915267615,
      "loss": 0.5656,
      "step": 702
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.1889172192023988,
      "learning_rate": 0.0001795209467799031,
      "loss": 0.5115,
      "step": 703
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.19623435201373893,
      "learning_rate": 0.0001794639141373872,
      "loss": 0.497,
      "step": 704
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.22372809637554478,
      "learning_rate": 0.00017940681127552604,
      "loss": 0.5579,
      "step": 705
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.1968536923376666,
      "learning_rate": 0.0001793496382447794,
      "loss": 0.4891,
      "step": 706
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.1990723573146364,
      "learning_rate": 0.00017929239509566894,
      "loss": 0.5921,
      "step": 707
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.20388703819339077,
      "learning_rate": 0.00017923508187877834,
      "loss": 0.5414,
      "step": 708
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.23657852979478725,
      "learning_rate": 0.00017917769864475314,
      "loss": 0.5672,
      "step": 709
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.22888252332289927,
      "learning_rate": 0.00017912024544430088,
      "loss": 0.5459,
      "step": 710
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.19383907969249117,
      "learning_rate": 0.0001790627223281908,
      "loss": 0.5509,
      "step": 711
    },
    {
      "epoch": 0.42,
      "grad_norm": 0.2154263629956836,
      "learning_rate": 0.00017900512934725397,
      "loss": 0.5629,
      "step": 712
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.19802419635693494,
      "learning_rate": 0.0001789474665523832,
      "loss": 0.5128,
      "step": 713
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.19783321602266912,
      "learning_rate": 0.00017888973399453296,
      "loss": 0.5064,
      "step": 714
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.19864882371614528,
      "learning_rate": 0.00017883193172471944,
      "loss": 0.5458,
      "step": 715
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.23609512585527,
      "learning_rate": 0.00017877405979402038,
      "loss": 0.5069,
      "step": 716
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.19894144678524353,
      "learning_rate": 0.00017871611825357502,
      "loss": 0.5812,
      "step": 717
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.19598819977852033,
      "learning_rate": 0.00017865810715458427,
      "loss": 0.5223,
      "step": 718
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.23274847505011953,
      "learning_rate": 0.00017860002654831032,
      "loss": 0.5703,
      "step": 719
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.19794477486450376,
      "learning_rate": 0.00017854187648607694,
      "loss": 0.5538,
      "step": 720
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.2091737019131215,
      "learning_rate": 0.00017848365701926913,
      "loss": 0.4962,
      "step": 721
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.21890749511490995,
      "learning_rate": 0.00017842536819933337,
      "loss": 0.5074,
      "step": 722
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.22746821737803938,
      "learning_rate": 0.0001783670100777773,
      "loss": 0.5849,
      "step": 723
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.20967916540656184,
      "learning_rate": 0.0001783085827061699,
      "loss": 0.5246,
      "step": 724
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.19798059353181535,
      "learning_rate": 0.00017825008613614127,
      "loss": 0.4667,
      "step": 725
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.1992664047298655,
      "learning_rate": 0.00017819152041938265,
      "loss": 0.5247,
      "step": 726
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.22025628624147217,
      "learning_rate": 0.00017813288560764647,
      "loss": 0.5291,
      "step": 727
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.20405038516624363,
      "learning_rate": 0.00017807418175274612,
      "loss": 0.5235,
      "step": 728
    },
    {
      "epoch": 0.43,
      "grad_norm": 0.20626127985692586,
      "learning_rate": 0.00017801540890655609,
      "loss": 0.5103,
      "step": 729
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2187527308725265,
      "learning_rate": 0.00017795656712101172,
      "loss": 0.5515,
      "step": 730
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.20386714530070776,
      "learning_rate": 0.00017789765644810935,
      "loss": 0.5109,
      "step": 731
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.1990293686392052,
      "learning_rate": 0.00017783867693990624,
      "loss": 0.5208,
      "step": 732
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.19601721442767256,
      "learning_rate": 0.0001777796286485204,
      "loss": 0.5318,
      "step": 733
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.20542580410660244,
      "learning_rate": 0.0001777205116261306,
      "loss": 0.5198,
      "step": 734
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.20998518101289002,
      "learning_rate": 0.0001776613259249764,
      "loss": 0.5384,
      "step": 735
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.20134476803418952,
      "learning_rate": 0.00017760207159735805,
      "loss": 0.5448,
      "step": 736
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.22396912180134018,
      "learning_rate": 0.00017754274869563637,
      "loss": 0.59,
      "step": 737
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2044555533666512,
      "learning_rate": 0.00017748335727223294,
      "loss": 0.5152,
      "step": 738
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2106748606361736,
      "learning_rate": 0.00017742389737962966,
      "loss": 0.5233,
      "step": 739
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.19348093577281505,
      "learning_rate": 0.0001773643690703691,
      "loss": 0.5181,
      "step": 740
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.20393569458294794,
      "learning_rate": 0.00017730477239705428,
      "loss": 0.5671,
      "step": 741
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.19728761757057783,
      "learning_rate": 0.00017724510741234858,
      "loss": 0.4919,
      "step": 742
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2025575313201386,
      "learning_rate": 0.0001771853741689757,
      "loss": 0.5452,
      "step": 743
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.19153867099886435,
      "learning_rate": 0.0001771255727197198,
      "loss": 0.4951,
      "step": 744
    },
    {
      "epoch": 0.44,
      "grad_norm": 0.2220125331576081,
      "learning_rate": 0.00017706570311742516,
      "loss": 0.5521,
      "step": 745
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.22704666961693065,
      "learning_rate": 0.0001770057654149964,
      "loss": 0.5184,
      "step": 746
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.20871880228168335,
      "learning_rate": 0.00017694575966539823,
      "loss": 0.5205,
      "step": 747
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2105924919088961,
      "learning_rate": 0.00017688568592165552,
      "loss": 0.5448,
      "step": 748
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.19780662201378688,
      "learning_rate": 0.00017682554423685329,
      "loss": 0.6037,
      "step": 749
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.23105576261963792,
      "learning_rate": 0.0001767653346641365,
      "loss": 0.7225,
      "step": 750
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.21997563912032173,
      "learning_rate": 0.00017670505725671013,
      "loss": 0.552,
      "step": 751
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2033859052649398,
      "learning_rate": 0.00017664471206783915,
      "loss": 0.5315,
      "step": 752
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.19979214467824102,
      "learning_rate": 0.00017658429915084835,
      "loss": 0.5697,
      "step": 753
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.20567412732571028,
      "learning_rate": 0.00017652381855912247,
      "loss": 0.5051,
      "step": 754
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.20563597140752976,
      "learning_rate": 0.0001764632703461059,
      "loss": 0.5141,
      "step": 755
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.1979658869623221,
      "learning_rate": 0.00017640265456530293,
      "loss": 0.5257,
      "step": 756
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2241077787834463,
      "learning_rate": 0.0001763419712702775,
      "loss": 0.5203,
      "step": 757
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.2197932846972142,
      "learning_rate": 0.00017628122051465322,
      "loss": 0.5847,
      "step": 758
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.1990944255813207,
      "learning_rate": 0.00017622040235211326,
      "loss": 0.4962,
      "step": 759
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.22179309744704687,
      "learning_rate": 0.00017615951683640045,
      "loss": 0.5635,
      "step": 760
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.20505896786344424,
      "learning_rate": 0.00017609856402131703,
      "loss": 0.4968,
      "step": 761
    },
    {
      "epoch": 0.45,
      "grad_norm": 0.21771157401053975,
      "learning_rate": 0.00017603754396072483,
      "loss": 0.4858,
      "step": 762
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.23357401076131715,
      "learning_rate": 0.000175976456708545,
      "loss": 0.5766,
      "step": 763
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.21488993425737504,
      "learning_rate": 0.0001759153023187581,
      "loss": 0.5419,
      "step": 764
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2035555999534868,
      "learning_rate": 0.00017585408084540405,
      "loss": 0.5272,
      "step": 765
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.20066829451010718,
      "learning_rate": 0.00017579279234258198,
      "loss": 0.5013,
      "step": 766
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2052255359730049,
      "learning_rate": 0.00017573143686445034,
      "loss": 0.5383,
      "step": 767
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.19180058672325329,
      "learning_rate": 0.00017567001446522665,
      "loss": 0.5108,
      "step": 768
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.22862029943228582,
      "learning_rate": 0.0001756085251991877,
      "loss": 0.531,
      "step": 769
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2180888066741993,
      "learning_rate": 0.00017554696912066924,
      "loss": 0.5938,
      "step": 770
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.19823263656993223,
      "learning_rate": 0.00017548534628406616,
      "loss": 0.5158,
      "step": 771
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.18700255356016454,
      "learning_rate": 0.00017542365674383227,
      "loss": 0.517,
      "step": 772
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.22948411236460914,
      "learning_rate": 0.00017536190055448037,
      "loss": 0.5464,
      "step": 773
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.21370070213829387,
      "learning_rate": 0.00017530007777058213,
      "loss": 0.5158,
      "step": 774
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.19174674116457566,
      "learning_rate": 0.0001752381884467681,
      "loss": 0.5035,
      "step": 775
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.19069115110218368,
      "learning_rate": 0.00017517623263772758,
      "loss": 0.5341,
      "step": 776
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2401612943495333,
      "learning_rate": 0.00017511421039820863,
      "loss": 0.578,
      "step": 777
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.20371659209716964,
      "learning_rate": 0.00017505212178301805,
      "loss": 0.5103,
      "step": 778
    },
    {
      "epoch": 0.46,
      "grad_norm": 0.2029847143168681,
      "learning_rate": 0.00017498996684702132,
      "loss": 0.537,
      "step": 779
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.1904915669257304,
      "learning_rate": 0.00017492774564514235,
      "loss": 0.5129,
      "step": 780
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.20640027525482552,
      "learning_rate": 0.00017486545823236385,
      "loss": 0.5585,
      "step": 781
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.23582084058854208,
      "learning_rate": 0.00017480310466372686,
      "loss": 0.5648,
      "step": 782
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2219618762642625,
      "learning_rate": 0.00017474068499433098,
      "loss": 0.5365,
      "step": 783
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2021980496149104,
      "learning_rate": 0.00017467819927933416,
      "loss": 0.5232,
      "step": 784
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.22350007413119136,
      "learning_rate": 0.00017461564757395272,
      "loss": 0.571,
      "step": 785
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.1982267515659923,
      "learning_rate": 0.00017455302993346134,
      "loss": 0.5228,
      "step": 786
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.1981641437638338,
      "learning_rate": 0.00017449034641319288,
      "loss": 0.5233,
      "step": 787
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.21019781303279997,
      "learning_rate": 0.00017442759706853855,
      "loss": 0.5207,
      "step": 788
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.2060312831458839,
      "learning_rate": 0.00017436478195494756,
      "loss": 0.5262,
      "step": 789
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.21829001169718656,
      "learning_rate": 0.00017430190112792737,
      "loss": 0.563,
      "step": 790
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.18511782073951058,
      "learning_rate": 0.00017423895464304342,
      "loss": 0.5017,
      "step": 791
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.1883852134929889,
      "learning_rate": 0.00017417594255591927,
      "loss": 0.4598,
      "step": 792
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.18093236424530848,
      "learning_rate": 0.00017411286492223632,
      "loss": 0.4834,
      "step": 793
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.18428120434597678,
      "learning_rate": 0.000174049721797734,
      "loss": 0.5032,
      "step": 794
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.20829275110131446,
      "learning_rate": 0.00017398651323820958,
      "loss": 0.5844,
      "step": 795
    },
    {
      "epoch": 0.47,
      "grad_norm": 0.20484798677763622,
      "learning_rate": 0.00017392323929951812,
      "loss": 0.5674,
      "step": 796
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.24390628538267795,
      "learning_rate": 0.0001738599000375725,
      "loss": 0.5415,
      "step": 797
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.7159631217821198,
      "learning_rate": 0.00017379649550834327,
      "loss": 0.5248,
      "step": 798
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.2153929799459398,
      "learning_rate": 0.00017373302576785874,
      "loss": 0.5362,
      "step": 799
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.18434502268826083,
      "learning_rate": 0.00017366949087220472,
      "loss": 0.5179,
      "step": 800
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.1993458339623336,
      "learning_rate": 0.0001736058908775247,
      "loss": 0.5378,
      "step": 801
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.20482117487035287,
      "learning_rate": 0.0001735422258400197,
      "loss": 0.5066,
      "step": 802
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.22481951556352617,
      "learning_rate": 0.0001734784958159481,
      "loss": 0.5504,
      "step": 803
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.20893222575857784,
      "learning_rate": 0.00017341470086162586,
      "loss": 0.5558,
      "step": 804
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.21011978723049574,
      "learning_rate": 0.0001733508410334262,
      "loss": 0.5164,
      "step": 805
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.19493427746334713,
      "learning_rate": 0.0001732869163877797,
      "loss": 0.4928,
      "step": 806
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.21183476672026114,
      "learning_rate": 0.00017322292698117425,
      "loss": 0.539,
      "step": 807
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.19833380077404217,
      "learning_rate": 0.00017315887287015492,
      "loss": 0.5271,
      "step": 808
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.1914374518219283,
      "learning_rate": 0.000173094754111324,
      "loss": 0.5408,
      "step": 809
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.2086814492670768,
      "learning_rate": 0.00017303057076134085,
      "loss": 0.5289,
      "step": 810
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.20957903788826676,
      "learning_rate": 0.000172966322876922,
      "loss": 0.4998,
      "step": 811
    },
    {
      "epoch": 0.48,
      "grad_norm": 0.20998255172298386,
      "learning_rate": 0.00017290201051484085,
      "loss": 0.5481,
      "step": 812
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.2071975609134585,
      "learning_rate": 0.00017283763373192798,
      "loss": 0.5183,
      "step": 813
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.21738328519054306,
      "learning_rate": 0.00017277319258507073,
      "loss": 0.539,
      "step": 814
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.20518040499899,
      "learning_rate": 0.0001727086871312134,
      "loss": 0.5109,
      "step": 815
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.19341379491491822,
      "learning_rate": 0.00017264411742735707,
      "loss": 0.4882,
      "step": 816
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.23128359760674316,
      "learning_rate": 0.00017257948353055963,
      "loss": 0.547,
      "step": 817
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.1960131633047162,
      "learning_rate": 0.0001725147854979357,
      "loss": 0.5467,
      "step": 818
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.21053602560644855,
      "learning_rate": 0.00017245002338665656,
      "loss": 0.5644,
      "step": 819
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.19602457133539752,
      "learning_rate": 0.00017238519725395007,
      "loss": 0.5121,
      "step": 820
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.1923459024283483,
      "learning_rate": 0.00017232030715710076,
      "loss": 0.5335,
      "step": 821
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.19919783133579333,
      "learning_rate": 0.00017225535315344955,
      "loss": 0.5076,
      "step": 822
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.23727428892467575,
      "learning_rate": 0.00017219033530039397,
      "loss": 0.5396,
      "step": 823
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.18048505778792392,
      "learning_rate": 0.00017212525365538792,
      "loss": 0.467,
      "step": 824
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.20071702267002645,
      "learning_rate": 0.00017206010827594163,
      "loss": 0.5217,
      "step": 825
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.20315216612025339,
      "learning_rate": 0.0001719948992196217,
      "loss": 0.4975,
      "step": 826
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.2142259292765235,
      "learning_rate": 0.00017192962654405096,
      "loss": 0.5148,
      "step": 827
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.19450012283752555,
      "learning_rate": 0.00017186429030690848,
      "loss": 0.5297,
      "step": 828
    },
    {
      "epoch": 0.49,
      "grad_norm": 0.19853162923467543,
      "learning_rate": 0.00017179889056592954,
      "loss": 0.547,
      "step": 829
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.24873174470750906,
      "learning_rate": 0.00017173342737890544,
      "loss": 0.563,
      "step": 830
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.18593730182623175,
      "learning_rate": 0.00017166790080368357,
      "loss": 0.4647,
      "step": 831
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.19387710879340586,
      "learning_rate": 0.00017160231089816748,
      "loss": 0.5313,
      "step": 832
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.20818447206363588,
      "learning_rate": 0.00017153665772031643,
      "loss": 0.5333,
      "step": 833
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.17584822143732362,
      "learning_rate": 0.0001714709413281458,
      "loss": 0.4467,
      "step": 834
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.19622166504995672,
      "learning_rate": 0.00017140516177972676,
      "loss": 0.5129,
      "step": 835
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.18822988249157332,
      "learning_rate": 0.00017133931913318625,
      "loss": 0.5186,
      "step": 836
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2021164051164271,
      "learning_rate": 0.00017127341344670696,
      "loss": 0.551,
      "step": 837
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.19354327476685654,
      "learning_rate": 0.00017120744477852745,
      "loss": 0.5001,
      "step": 838
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2202448214811264,
      "learning_rate": 0.00017114141318694167,
      "loss": 0.5516,
      "step": 839
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2152071248011757,
      "learning_rate": 0.00017107531873029942,
      "loss": 0.603,
      "step": 840
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.21182530477312786,
      "learning_rate": 0.0001710091614670059,
      "loss": 0.522,
      "step": 841
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.19970970678249778,
      "learning_rate": 0.00017094294145552188,
      "loss": 0.547,
      "step": 842
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.19827173723610417,
      "learning_rate": 0.00017087665875436354,
      "loss": 0.5238,
      "step": 843
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.20841052076060687,
      "learning_rate": 0.00017081031342210245,
      "loss": 0.5438,
      "step": 844
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.20276205913776804,
      "learning_rate": 0.0001707439055173656,
      "loss": 0.541,
      "step": 845
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.2235538813636529,
      "learning_rate": 0.00017067743509883515,
      "loss": 0.5496,
      "step": 846
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.19092318633528002,
      "learning_rate": 0.00017061090222524863,
      "loss": 0.47,
      "step": 847
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.23200775329397214,
      "learning_rate": 0.00017054430695539864,
      "loss": 0.5268,
      "step": 848
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.22029427294529383,
      "learning_rate": 0.00017047764934813303,
      "loss": 0.499,
      "step": 849
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.22601440602605696,
      "learning_rate": 0.00017041092946235467,
      "loss": 0.5593,
      "step": 850
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.18461060416824568,
      "learning_rate": 0.00017034414735702145,
      "loss": 0.495,
      "step": 851
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.1901253135554411,
      "learning_rate": 0.0001702773030911463,
      "loss": 0.5333,
      "step": 852
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.21957939715433938,
      "learning_rate": 0.00017021039672379703,
      "loss": 0.5019,
      "step": 853
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.18779803577301077,
      "learning_rate": 0.00017014342831409634,
      "loss": 0.5079,
      "step": 854
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.22887531917427628,
      "learning_rate": 0.00017007639792122173,
      "loss": 0.537,
      "step": 855
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.20978489469439648,
      "learning_rate": 0.00017000930560440554,
      "loss": 0.5488,
      "step": 856
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.21114765352664955,
      "learning_rate": 0.0001699421514229348,
      "loss": 0.55,
      "step": 857
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.1963314578424483,
      "learning_rate": 0.00016987493543615115,
      "loss": 0.487,
      "step": 858
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.21946023399145406,
      "learning_rate": 0.0001698076577034509,
      "loss": 0.5174,
      "step": 859
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.20119091495408187,
      "learning_rate": 0.00016974031828428495,
      "loss": 0.5127,
      "step": 860
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.19098558699810822,
      "learning_rate": 0.00016967291723815863,
      "loss": 0.5776,
      "step": 861
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.20834795709316445,
      "learning_rate": 0.00016960545462463183,
      "loss": 0.4853,
      "step": 862
    },
    {
      "epoch": 0.51,
      "grad_norm": 0.20553975175824055,
      "learning_rate": 0.0001695379305033187,
      "loss": 0.5244,
      "step": 863
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.38517636125953164,
      "learning_rate": 0.00016947034493388786,
      "loss": 0.5314,
      "step": 864
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.19821744800803398,
      "learning_rate": 0.00016940269797606228,
      "loss": 0.5104,
      "step": 865
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.21102895742419708,
      "learning_rate": 0.00016933498968961898,
      "loss": 0.5355,
      "step": 866
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.202288584940297,
      "learning_rate": 0.00016926722013438936,
      "loss": 0.5497,
      "step": 867
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.20339292404553427,
      "learning_rate": 0.00016919938937025886,
      "loss": 0.5655,
      "step": 868
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.23789079718932474,
      "learning_rate": 0.00016913149745716703,
      "loss": 0.5312,
      "step": 869
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.1930501027423073,
      "learning_rate": 0.00016906354445510747,
      "loss": 0.5119,
      "step": 870
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.20670905101801587,
      "learning_rate": 0.0001689955304241278,
      "loss": 0.5297,
      "step": 871
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.18514362407781898,
      "learning_rate": 0.0001689274554243294,
      "loss": 0.4913,
      "step": 872
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.22536281121632565,
      "learning_rate": 0.00016885931951586783,
      "loss": 0.5236,
      "step": 873
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.22244899593514914,
      "learning_rate": 0.00016879112275895215,
      "loss": 0.5305,
      "step": 874
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.19941629299670782,
      "learning_rate": 0.00016872286521384537,
      "loss": 0.5809,
      "step": 875
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.20169129247248377,
      "learning_rate": 0.0001686545469408642,
      "loss": 0.5081,
      "step": 876
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.21516300195343227,
      "learning_rate": 0.000168586168000379,
      "loss": 0.5084,
      "step": 877
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.21422988118226038,
      "learning_rate": 0.00016851772845281367,
      "loss": 0.5246,
      "step": 878
    },
    {
      "epoch": 0.52,
      "grad_norm": 0.19390871621362155,
      "learning_rate": 0.00016844922835864575,
      "loss": 0.5528,
      "step": 879
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.21294605765564745,
      "learning_rate": 0.00016838066777840629,
      "loss": 0.5971,
      "step": 880
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.17876424837335442,
      "learning_rate": 0.00016831204677267975,
      "loss": 0.4906,
      "step": 881
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.21551555099302896,
      "learning_rate": 0.00016824336540210402,
      "loss": 0.5501,
      "step": 882
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.2046934200181805,
      "learning_rate": 0.0001681746237273702,
      "loss": 0.5372,
      "step": 883
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.19709464178914807,
      "learning_rate": 0.00016810582180922293,
      "loss": 0.5479,
      "step": 884
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.19373994384361878,
      "learning_rate": 0.00016803695970845985,
      "loss": 0.5038,
      "step": 885
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.19009129266864652,
      "learning_rate": 0.0001679680374859319,
      "loss": 0.5047,
      "step": 886
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.19438125438775203,
      "learning_rate": 0.0001678990552025431,
      "loss": 0.5241,
      "step": 887
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.23198116415021436,
      "learning_rate": 0.00016783001291925055,
      "loss": 0.5384,
      "step": 888
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.19609980449580128,
      "learning_rate": 0.00016776091069706442,
      "loss": 0.5096,
      "step": 889
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.19406158834918122,
      "learning_rate": 0.00016769174859704783,
      "loss": 0.494,
      "step": 890
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.21696546174997547,
      "learning_rate": 0.00016762252668031674,
      "loss": 0.5064,
      "step": 891
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.18219835689033384,
      "learning_rate": 0.00016755324500804,
      "loss": 0.5172,
      "step": 892
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.20040255213813227,
      "learning_rate": 0.00016748390364143938,
      "loss": 0.5691,
      "step": 893
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.231096201209158,
      "learning_rate": 0.00016741450264178917,
      "loss": 0.5921,
      "step": 894
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.1949281480708862,
      "learning_rate": 0.00016734504207041663,
      "loss": 0.5268,
      "step": 895
    },
    {
      "epoch": 0.53,
      "grad_norm": 0.20701715584416944,
      "learning_rate": 0.00016727552198870135,
      "loss": 0.5415,
      "step": 896
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.19117861330981445,
      "learning_rate": 0.00016720594245807582,
      "loss": 0.5208,
      "step": 897
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.203448739160119,
      "learning_rate": 0.00016713630354002484,
      "loss": 0.4925,
      "step": 898
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.1961421908595443,
      "learning_rate": 0.00016706660529608583,
      "loss": 0.5173,
      "step": 899
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.21571781486847058,
      "learning_rate": 0.0001669968477878485,
      "loss": 0.5288,
      "step": 900
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.1951469824130785,
      "learning_rate": 0.00016692703107695507,
      "loss": 0.5454,
      "step": 901
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.20181422096222762,
      "learning_rate": 0.00016685715522509994,
      "loss": 0.5266,
      "step": 902
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.20297647148502806,
      "learning_rate": 0.0001667872202940299,
      "loss": 0.5621,
      "step": 903
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.21597435470412332,
      "learning_rate": 0.0001667172263455438,
      "loss": 0.5458,
      "step": 904
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.18789940455022036,
      "learning_rate": 0.00016664717344149277,
      "loss": 0.5198,
      "step": 905
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.20826993025429438,
      "learning_rate": 0.00016657706164378,
      "loss": 0.5513,
      "step": 906
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.20437016308518594,
      "learning_rate": 0.00016650689101436073,
      "loss": 0.549,
      "step": 907
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.19686798551292672,
      "learning_rate": 0.00016643666161524217,
      "loss": 0.529,
      "step": 908
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.19123982383563307,
      "learning_rate": 0.00016636637350848338,
      "loss": 0.4862,
      "step": 909
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.1949343721096402,
      "learning_rate": 0.00016629602675619548,
      "loss": 0.5535,
      "step": 910
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.19482047517370968,
      "learning_rate": 0.0001662256214205413,
      "loss": 0.5149,
      "step": 911
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.19782922305356188,
      "learning_rate": 0.00016615515756373533,
      "loss": 0.5132,
      "step": 912
    },
    {
      "epoch": 0.54,
      "grad_norm": 0.19929182594748354,
      "learning_rate": 0.00016608463524804407,
      "loss": 0.4978,
      "step": 913
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.20329183437505496,
      "learning_rate": 0.0001660140545357854,
      "loss": 0.554,
      "step": 914
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.20759199356585792,
      "learning_rate": 0.00016594341548932894,
      "loss": 0.5478,
      "step": 915
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2141758006074044,
      "learning_rate": 0.0001658727181710958,
      "loss": 0.5097,
      "step": 916
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.19823795581184833,
      "learning_rate": 0.0001658019626435586,
      "loss": 0.4997,
      "step": 917
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.19545826935570532,
      "learning_rate": 0.00016573114896924147,
      "loss": 0.4973,
      "step": 918
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2011307790801751,
      "learning_rate": 0.0001656602772107198,
      "loss": 0.5683,
      "step": 919
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.220529866848964,
      "learning_rate": 0.00016558934743062035,
      "loss": 0.574,
      "step": 920
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.19374991809265163,
      "learning_rate": 0.00016551835969162118,
      "loss": 0.4881,
      "step": 921
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.188760632867396,
      "learning_rate": 0.00016544731405645154,
      "loss": 0.5035,
      "step": 922
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.21165086312587134,
      "learning_rate": 0.00016537621058789194,
      "loss": 0.5457,
      "step": 923
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.22858469252110178,
      "learning_rate": 0.00016530504934877377,
      "loss": 0.5361,
      "step": 924
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.22860822822332005,
      "learning_rate": 0.0001652338304019797,
      "loss": 0.5317,
      "step": 925
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.18446055255941704,
      "learning_rate": 0.00016516255381044323,
      "loss": 0.5384,
      "step": 926
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.19147093978335786,
      "learning_rate": 0.00016509121963714896,
      "loss": 0.5462,
      "step": 927
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2010805530061722,
      "learning_rate": 0.00016501982794513219,
      "loss": 0.5224,
      "step": 928
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.2194018595901758,
      "learning_rate": 0.00016494837879747916,
      "loss": 0.5403,
      "step": 929
    },
    {
      "epoch": 0.55,
      "grad_norm": 0.18748965563201492,
      "learning_rate": 0.00016487687225732694,
      "loss": 0.5024,
      "step": 930
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.18976038729649813,
      "learning_rate": 0.00016480530838786312,
      "loss": 0.5373,
      "step": 931
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.20751165573259855,
      "learning_rate": 0.00016473368725232614,
      "loss": 0.5524,
      "step": 932
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.23600220568039398,
      "learning_rate": 0.0001646620089140049,
      "loss": 0.5315,
      "step": 933
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.17133056460736396,
      "learning_rate": 0.00016459027343623906,
      "loss": 0.4753,
      "step": 934
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.1864042334593607,
      "learning_rate": 0.00016451848088241847,
      "loss": 0.5617,
      "step": 935
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.20853995853524807,
      "learning_rate": 0.00016444663131598365,
      "loss": 0.5149,
      "step": 936
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.20631366863701492,
      "learning_rate": 0.00016437472480042544,
      "loss": 0.5712,
      "step": 937
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.18606219350180553,
      "learning_rate": 0.00016430276139928494,
      "loss": 0.5147,
      "step": 938
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.18702291336595328,
      "learning_rate": 0.00016423074117615362,
      "loss": 0.4982,
      "step": 939
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.19313224950151772,
      "learning_rate": 0.00016415866419467308,
      "loss": 0.5283,
      "step": 940
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.19468213657571698,
      "learning_rate": 0.00016408653051853505,
      "loss": 0.5222,
      "step": 941
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.1900318796388756,
      "learning_rate": 0.00016401434021148155,
      "loss": 0.5215,
      "step": 942
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.20563975975379326,
      "learning_rate": 0.00016394209333730437,
      "loss": 0.5332,
      "step": 943
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.19499178353885577,
      "learning_rate": 0.0001638697899598455,
      "loss": 0.5089,
      "step": 944
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.193828603254631,
      "learning_rate": 0.00016379743014299675,
      "loss": 0.4814,
      "step": 945
    },
    {
      "epoch": 0.56,
      "grad_norm": 0.2077835011524252,
      "learning_rate": 0.00016372501395069984,
      "loss": 0.5874,
      "step": 946
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.19612398295653072,
      "learning_rate": 0.0001636525414469463,
      "loss": 0.5526,
      "step": 947
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.20309531438683182,
      "learning_rate": 0.00016358001269577743,
      "loss": 0.5197,
      "step": 948
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.1915526531241072,
      "learning_rate": 0.00016350742776128423,
      "loss": 0.4595,
      "step": 949
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.18881194099828352,
      "learning_rate": 0.00016343478670760732,
      "loss": 0.5367,
      "step": 950
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.18139152767925906,
      "learning_rate": 0.00016336208959893698,
      "loss": 0.4768,
      "step": 951
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.17769254483980393,
      "learning_rate": 0.00016328933649951293,
      "loss": 0.4676,
      "step": 952
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.20965333812663062,
      "learning_rate": 0.00016321652747362445,
      "loss": 0.5555,
      "step": 953
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.19321230034521153,
      "learning_rate": 0.00016314366258561016,
      "loss": 0.5295,
      "step": 954
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.18597567849314112,
      "learning_rate": 0.00016307074189985814,
      "loss": 0.4884,
      "step": 955
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.18434368773024823,
      "learning_rate": 0.0001629977654808057,
      "loss": 0.4824,
      "step": 956
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.20328216484120545,
      "learning_rate": 0.0001629247333929394,
      "loss": 0.58,
      "step": 957
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.21563942573107894,
      "learning_rate": 0.00016285164570079504,
      "loss": 0.5822,
      "step": 958
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.2001040967836008,
      "learning_rate": 0.00016277850246895753,
      "loss": 0.5302,
      "step": 959
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.1937847274208307,
      "learning_rate": 0.0001627053037620609,
      "loss": 0.4953,
      "step": 960
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.19489799779815298,
      "learning_rate": 0.00016263204964478807,
      "loss": 0.5693,
      "step": 961
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.1746038558868276,
      "learning_rate": 0.00016255874018187113,
      "loss": 0.4843,
      "step": 962
    },
    {
      "epoch": 0.57,
      "grad_norm": 0.19886109498653962,
      "learning_rate": 0.00016248537543809085,
      "loss": 0.5251,
      "step": 963
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.23968663323524653,
      "learning_rate": 0.00016241195547827704,
      "loss": 0.5842,
      "step": 964
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.17879861073069336,
      "learning_rate": 0.00016233848036730818,
      "loss": 0.5271,
      "step": 965
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.18682851702979986,
      "learning_rate": 0.00016226495017011155,
      "loss": 0.5117,
      "step": 966
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.17994919875574256,
      "learning_rate": 0.0001621913649516631,
      "loss": 0.5277,
      "step": 967
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.18619266507890767,
      "learning_rate": 0.00016211772477698737,
      "loss": 0.5564,
      "step": 968
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.18791500150949197,
      "learning_rate": 0.0001620440297111575,
      "loss": 0.5138,
      "step": 969
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.20203542918576534,
      "learning_rate": 0.00016197027981929506,
      "loss": 0.5027,
      "step": 970
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2028531086326741,
      "learning_rate": 0.00016189647516657018,
      "loss": 0.5271,
      "step": 971
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.20588712233829473,
      "learning_rate": 0.0001618226158182013,
      "loss": 0.5453,
      "step": 972
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.17483034796275576,
      "learning_rate": 0.00016174870183945523,
      "loss": 0.5213,
      "step": 973
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.1869321547342615,
      "learning_rate": 0.00016167473329564705,
      "loss": 0.5445,
      "step": 974
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.20261074022832953,
      "learning_rate": 0.00016160071025213998,
      "loss": 0.5607,
      "step": 975
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.20067938165729438,
      "learning_rate": 0.00016152663277434556,
      "loss": 0.5145,
      "step": 976
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.1886528405831426,
      "learning_rate": 0.0001614525009277233,
      "loss": 0.5212,
      "step": 977
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.20396953786191716,
      "learning_rate": 0.00016137831477778077,
      "loss": 0.5546,
      "step": 978
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.22783734025618152,
      "learning_rate": 0.00016130407439007355,
      "loss": 0.5541,
      "step": 979
    },
    {
      "epoch": 0.58,
      "grad_norm": 0.2062351395059516,
      "learning_rate": 0.0001612297798302052,
      "loss": 0.5276,
      "step": 980
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.1971656346477824,
      "learning_rate": 0.00016115543116382707,
      "loss": 0.5232,
      "step": 981
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.19528017198031397,
      "learning_rate": 0.00016108102845663832,
      "loss": 0.5468,
      "step": 982
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.213721647209808,
      "learning_rate": 0.00016100657177438592,
      "loss": 0.5617,
      "step": 983
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.21222822398944205,
      "learning_rate": 0.0001609320611828645,
      "loss": 0.5014,
      "step": 984
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.19722416264931983,
      "learning_rate": 0.0001608574967479163,
      "loss": 0.5811,
      "step": 985
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.19590127100369606,
      "learning_rate": 0.00016078287853543125,
      "loss": 0.518,
      "step": 986
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.19341001514547207,
      "learning_rate": 0.00016070820661134668,
      "loss": 0.4885,
      "step": 987
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.20898635250980999,
      "learning_rate": 0.00016063348104164744,
      "loss": 0.4751,
      "step": 988
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.21215717136848186,
      "learning_rate": 0.00016055870189236578,
      "loss": 0.5211,
      "step": 989
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.24258818484100222,
      "learning_rate": 0.00016048386922958127,
      "loss": 0.4836,
      "step": 990
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.1798892664861584,
      "learning_rate": 0.00016040898311942082,
      "loss": 0.4984,
      "step": 991
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.18450252398162806,
      "learning_rate": 0.0001603340436280585,
      "loss": 0.5306,
      "step": 992
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.18837615304247687,
      "learning_rate": 0.00016025905082171562,
      "loss": 0.4754,
      "step": 993
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.2214407132100871,
      "learning_rate": 0.00016018400476666055,
      "loss": 0.6057,
      "step": 994
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.19318387417313984,
      "learning_rate": 0.00016010890552920875,
      "loss": 0.5205,
      "step": 995
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.18092243312526385,
      "learning_rate": 0.00016003375317572263,
      "loss": 0.5283,
      "step": 996
    },
    {
      "epoch": 0.59,
      "grad_norm": 0.18814705771101395,
      "learning_rate": 0.00015995854777261161,
      "loss": 0.5231,
      "step": 997
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2411971952093798,
      "learning_rate": 0.00015988328938633191,
      "loss": 0.5382,
      "step": 998
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19563910470947196,
      "learning_rate": 0.00015980797808338664,
      "loss": 0.5228,
      "step": 999
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.1859383484794571,
      "learning_rate": 0.00015973261393032563,
      "loss": 0.5166,
      "step": 1000
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19354728242000344,
      "learning_rate": 0.0001596571969937454,
      "loss": 0.5317,
      "step": 1001
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19477100537075334,
      "learning_rate": 0.0001595817273402891,
      "loss": 0.5389,
      "step": 1002
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19658938430385506,
      "learning_rate": 0.00015950620503664658,
      "loss": 0.5576,
      "step": 1003
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.18705811284094567,
      "learning_rate": 0.00015943063014955402,
      "loss": 0.5339,
      "step": 1004
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2238775349637154,
      "learning_rate": 0.00015935500274579426,
      "loss": 0.5348,
      "step": 1005
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.20521877935386715,
      "learning_rate": 0.00015927932289219642,
      "loss": 0.5817,
      "step": 1006
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.1748831609387364,
      "learning_rate": 0.00015920359065563604,
      "loss": 0.5003,
      "step": 1007
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19535482397472856,
      "learning_rate": 0.0001591278061030349,
      "loss": 0.5075,
      "step": 1008
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.2091479313897058,
      "learning_rate": 0.00015905196930136097,
      "loss": 0.5555,
      "step": 1009
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19083157236229129,
      "learning_rate": 0.0001589760803176286,
      "loss": 0.5166,
      "step": 1010
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.19553974503051083,
      "learning_rate": 0.00015890013921889795,
      "loss": 0.5978,
      "step": 1011
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.18073049964870547,
      "learning_rate": 0.00015882414607227546,
      "loss": 0.5136,
      "step": 1012
    },
    {
      "epoch": 0.6,
      "grad_norm": 0.18595442399132076,
      "learning_rate": 0.00015874810094491343,
      "loss": 0.494,
      "step": 1013
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.18565792372097342,
      "learning_rate": 0.00015867200390401023,
      "loss": 0.5211,
      "step": 1014
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.19074958534189332,
      "learning_rate": 0.00015859585501681,
      "loss": 0.5371,
      "step": 1015
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.22738878543819066,
      "learning_rate": 0.00015851965435060262,
      "loss": 0.5055,
      "step": 1016
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2316623260697804,
      "learning_rate": 0.00015844340197272393,
      "loss": 0.5467,
      "step": 1017
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.18501859711102792,
      "learning_rate": 0.00015836709795055532,
      "loss": 0.5152,
      "step": 1018
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2000854019680551,
      "learning_rate": 0.0001582907423515239,
      "loss": 0.4975,
      "step": 1019
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2164338400016087,
      "learning_rate": 0.00015821433524310224,
      "loss": 0.5108,
      "step": 1020
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.1794884681634538,
      "learning_rate": 0.00015813787669280855,
      "loss": 0.4852,
      "step": 1021
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.18483080731616555,
      "learning_rate": 0.00015806136676820639,
      "loss": 0.5126,
      "step": 1022
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.18580223692725362,
      "learning_rate": 0.00015798480553690482,
      "loss": 0.5068,
      "step": 1023
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.2061029141413908,
      "learning_rate": 0.0001579081930665582,
      "loss": 0.5358,
      "step": 1024
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.20179672861743628,
      "learning_rate": 0.00015783152942486613,
      "loss": 0.4904,
      "step": 1025
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.1901456318733625,
      "learning_rate": 0.0001577548146795735,
      "loss": 0.5189,
      "step": 1026
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.18700717565555658,
      "learning_rate": 0.00015767804889847025,
      "loss": 0.5053,
      "step": 1027
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.20306080744009194,
      "learning_rate": 0.00015760123214939148,
      "loss": 0.5648,
      "step": 1028
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.19427308531076118,
      "learning_rate": 0.00015752436450021742,
      "loss": 0.5317,
      "step": 1029
    },
    {
      "epoch": 0.61,
      "grad_norm": 0.17730364387799943,
      "learning_rate": 0.0001574474460188731,
      "loss": 0.4865,
      "step": 1030
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.18919444977285255,
      "learning_rate": 0.00015737047677332863,
      "loss": 0.4482,
      "step": 1031
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2186748845065149,
      "learning_rate": 0.0001572934568315989,
      "loss": 0.5166,
      "step": 1032
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.2157209010754937,
      "learning_rate": 0.00015721638626174354,
      "loss": 0.5,
      "step": 1033
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.19854498695084713,
      "learning_rate": 0.00015713926513186702,
      "loss": 0.5833,
      "step": 1034
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.19845689442164263,
      "learning_rate": 0.00015706209351011848,
      "loss": 0.5678,
      "step": 1035
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.1816372371442818,
      "learning_rate": 0.00015698487146469163,
      "loss": 0.5403,
      "step": 1036
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.1770141417061582,
      "learning_rate": 0.0001569075990638248,
      "loss": 0.4799,
      "step": 1037
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.1828187317317013,
      "learning_rate": 0.00015683027637580066,
      "loss": 0.4879,
      "step": 1038
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.20434187555872277,
      "learning_rate": 0.00015675290346894657,
      "loss": 0.5189,
      "step": 1039
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.1906338108685848,
      "learning_rate": 0.00015667548041163406,
      "loss": 0.5162,
      "step": 1040
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.19218887365666604,
      "learning_rate": 0.00015659800727227903,
      "loss": 0.5571,
      "step": 1041
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.19183422180053736,
      "learning_rate": 0.00015652048411934167,
      "loss": 0.5144,
      "step": 1042
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.19849906503502973,
      "learning_rate": 0.00015644291102132635,
      "loss": 0.5271,
      "step": 1043
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.18585204906477085,
      "learning_rate": 0.0001563652880467816,
      "loss": 0.4906,
      "step": 1044
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.20344554390359765,
      "learning_rate": 0.00015628761526429992,
      "loss": 0.5268,
      "step": 1045
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.22108658658095226,
      "learning_rate": 0.00015620989274251797,
      "loss": 0.5253,
      "step": 1046
    },
    {
      "epoch": 0.62,
      "grad_norm": 0.1991875825405089,
      "learning_rate": 0.00015613212055011624,
      "loss": 0.5311,
      "step": 1047
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.20172996858736694,
      "learning_rate": 0.0001560542987558192,
      "loss": 0.5008,
      "step": 1048
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.1879727067503482,
      "learning_rate": 0.00015597642742839506,
      "loss": 0.539,
      "step": 1049
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.2016771106491948,
      "learning_rate": 0.00015589850663665593,
      "loss": 0.5426,
      "step": 1050
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.20217576692011496,
      "learning_rate": 0.0001558205364494575,
      "loss": 0.5315,
      "step": 1051
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.19326432360974946,
      "learning_rate": 0.0001557425169356992,
      "loss": 0.4779,
      "step": 1052
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.20958769456715673,
      "learning_rate": 0.000155664448164324,
      "loss": 0.5368,
      "step": 1053
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.1944842076067339,
      "learning_rate": 0.00015558633020431835,
      "loss": 0.4948,
      "step": 1054
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.19103650755521584,
      "learning_rate": 0.00015550816312471234,
      "loss": 0.4687,
      "step": 1055
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.19130294148586216,
      "learning_rate": 0.00015542994699457925,
      "loss": 0.5241,
      "step": 1056
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.19011766175405304,
      "learning_rate": 0.00015535168188303585,
      "loss": 0.5157,
      "step": 1057
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.19308237510118917,
      "learning_rate": 0.00015527336785924213,
      "loss": 0.5158,
      "step": 1058
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.18949297343958862,
      "learning_rate": 0.00015519500499240133,
      "loss": 0.5525,
      "step": 1059
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.18554479264301466,
      "learning_rate": 0.00015511659335175985,
      "loss": 0.516,
      "step": 1060
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.20363425340107272,
      "learning_rate": 0.00015503813300660717,
      "loss": 0.509,
      "step": 1061
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.18613987636546375,
      "learning_rate": 0.0001549596240262758,
      "loss": 0.5097,
      "step": 1062
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.20481511432281724,
      "learning_rate": 0.00015488106648014127,
      "loss": 0.5263,
      "step": 1063
    },
    {
      "epoch": 0.63,
      "grad_norm": 0.18895386399049763,
      "learning_rate": 0.00015480246043762198,
      "loss": 0.528,
      "step": 1064
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.19514309309247793,
      "learning_rate": 0.00015472380596817922,
      "loss": 0.5444,
      "step": 1065
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2013716596665657,
      "learning_rate": 0.0001546451031413171,
      "loss": 0.535,
      "step": 1066
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2003122019096167,
      "learning_rate": 0.0001545663520265823,
      "loss": 0.5294,
      "step": 1067
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.18908243290011195,
      "learning_rate": 0.00015448755269356442,
      "loss": 0.5366,
      "step": 1068
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.1893813159340979,
      "learning_rate": 0.00015440870521189547,
      "loss": 0.5397,
      "step": 1069
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.17856528748444775,
      "learning_rate": 0.00015432980965125008,
      "loss": 0.4996,
      "step": 1070
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.20198848206655348,
      "learning_rate": 0.0001542508660813454,
      "loss": 0.5084,
      "step": 1071
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2027016976348092,
      "learning_rate": 0.00015417187457194092,
      "loss": 0.5835,
      "step": 1072
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.19225289448462915,
      "learning_rate": 0.00015409283519283857,
      "loss": 0.5333,
      "step": 1073
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.19832197684261557,
      "learning_rate": 0.00015401374801388254,
      "loss": 0.5183,
      "step": 1074
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.18727329905053802,
      "learning_rate": 0.00015393461310495926,
      "loss": 0.5104,
      "step": 1075
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.19371954596455335,
      "learning_rate": 0.0001538554305359974,
      "loss": 0.5482,
      "step": 1076
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.17653026485031678,
      "learning_rate": 0.00015377620037696757,
      "loss": 0.4785,
      "step": 1077
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.18746235874046693,
      "learning_rate": 0.00015369692269788266,
      "loss": 0.5432,
      "step": 1078
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.20320517317727052,
      "learning_rate": 0.0001536175975687974,
      "loss": 0.5358,
      "step": 1079
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.18239445216598227,
      "learning_rate": 0.00015353822505980854,
      "loss": 0.5133,
      "step": 1080
    },
    {
      "epoch": 0.64,
      "grad_norm": 0.2001726032915372,
      "learning_rate": 0.00015345880524105462,
      "loss": 0.5601,
      "step": 1081
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.17441851789965093,
      "learning_rate": 0.00015337933818271597,
      "loss": 0.4992,
      "step": 1082
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.19460177893040895,
      "learning_rate": 0.00015329982395501478,
      "loss": 0.5038,
      "step": 1083
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.18954115748132863,
      "learning_rate": 0.00015322026262821488,
      "loss": 0.5691,
      "step": 1084
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.18958791658091825,
      "learning_rate": 0.00015314065427262166,
      "loss": 0.513,
      "step": 1085
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.18280595622163878,
      "learning_rate": 0.00015306099895858206,
      "loss": 0.5147,
      "step": 1086
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.17678570607217403,
      "learning_rate": 0.00015298129675648462,
      "loss": 0.4952,
      "step": 1087
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.17214270706367016,
      "learning_rate": 0.00015290154773675923,
      "loss": 0.4614,
      "step": 1088
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.1874186691157256,
      "learning_rate": 0.00015282175196987721,
      "loss": 0.5354,
      "step": 1089
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.18779920562332963,
      "learning_rate": 0.00015274190952635106,
      "loss": 0.4892,
      "step": 1090
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.17670235761935024,
      "learning_rate": 0.00015266202047673467,
      "loss": 0.5265,
      "step": 1091
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.1744750957023612,
      "learning_rate": 0.00015258208489162312,
      "loss": 0.547,
      "step": 1092
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.17422295122430556,
      "learning_rate": 0.00015250210284165246,
      "loss": 0.5005,
      "step": 1093
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.17453851271743742,
      "learning_rate": 0.00015242207439749992,
      "loss": 0.4727,
      "step": 1094
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.2020628888829431,
      "learning_rate": 0.0001523419996298837,
      "loss": 0.5164,
      "step": 1095
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.18036982996630943,
      "learning_rate": 0.00015226187860956295,
      "loss": 0.4998,
      "step": 1096
    },
    {
      "epoch": 0.65,
      "grad_norm": 0.19207762457147357,
      "learning_rate": 0.00015218171140733773,
      "loss": 0.4962,
      "step": 1097
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.17728752409159226,
      "learning_rate": 0.00015210149809404875,
      "loss": 0.486,
      "step": 1098
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.18658006486346662,
      "learning_rate": 0.00015202123874057761,
      "loss": 0.5701,
      "step": 1099
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.1799589924022257,
      "learning_rate": 0.00015194093341784655,
      "loss": 0.508,
      "step": 1100
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.18488881231943724,
      "learning_rate": 0.00015186058219681848,
      "loss": 0.5276,
      "step": 1101
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.19172132491153843,
      "learning_rate": 0.00015178018514849678,
      "loss": 0.5103,
      "step": 1102
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.18192159347628917,
      "learning_rate": 0.00015169974234392538,
      "loss": 0.5168,
      "step": 1103
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.17390383147461494,
      "learning_rate": 0.00015161925385418867,
      "loss": 0.5117,
      "step": 1104
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.18438067858194318,
      "learning_rate": 0.00015153871975041131,
      "loss": 0.5362,
      "step": 1105
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.18794738239211156,
      "learning_rate": 0.00015145814010375841,
      "loss": 0.5284,
      "step": 1106
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.18207793056557062,
      "learning_rate": 0.00015137751498543517,
      "loss": 0.5157,
      "step": 1107
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.1820180090632629,
      "learning_rate": 0.00015129684446668713,
      "loss": 0.4754,
      "step": 1108
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.17889600747067783,
      "learning_rate": 0.00015121612861879974,
      "loss": 0.4987,
      "step": 1109
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.17971937316609232,
      "learning_rate": 0.00015113536751309878,
      "loss": 0.4911,
      "step": 1110
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.18140328667930378,
      "learning_rate": 0.00015105456122094983,
      "loss": 0.5305,
      "step": 1111
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.2025119429324591,
      "learning_rate": 0.00015097370981375838,
      "loss": 0.5386,
      "step": 1112
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.19491035376315885,
      "learning_rate": 0.0001508928133629699,
      "loss": 0.4907,
      "step": 1113
    },
    {
      "epoch": 0.66,
      "grad_norm": 0.21078693446799357,
      "learning_rate": 0.00015081187194006962,
      "loss": 0.5365,
      "step": 1114
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.18843997731657822,
      "learning_rate": 0.0001507308856165825,
      "loss": 0.5183,
      "step": 1115
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.18301017548445006,
      "learning_rate": 0.00015064985446407321,
      "loss": 0.5365,
      "step": 1116
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.20526106918611095,
      "learning_rate": 0.00015056877855414594,
      "loss": 0.5424,
      "step": 1117
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.19687691349301994,
      "learning_rate": 0.00015048765795844457,
      "loss": 0.4767,
      "step": 1118
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.24595956331043234,
      "learning_rate": 0.00015040649274865238,
      "loss": 0.5276,
      "step": 1119
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.19028199168907478,
      "learning_rate": 0.000150325282996492,
      "loss": 0.5154,
      "step": 1120
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.1869717933744596,
      "learning_rate": 0.00015024402877372562,
      "loss": 0.4994,
      "step": 1121
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.19760879222012737,
      "learning_rate": 0.00015016273015215455,
      "loss": 0.5181,
      "step": 1122
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.19770025526423807,
      "learning_rate": 0.00015008138720361942,
      "loss": 0.5224,
      "step": 1123
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.21756294254565256,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.5578,
      "step": 1124
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.20716812622877365,
      "learning_rate": 0.0001499185686132152,
      "loss": 0.5812,
      "step": 1125
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.20335171698711324,
      "learning_rate": 0.00014983709311522297,
      "loss": 0.5249,
      "step": 1126
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.2176962106234944,
      "learning_rate": 0.0001497555735780201,
      "loss": 0.5856,
      "step": 1127
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.1783334911039492,
      "learning_rate": 0.00014967401007364255,
      "loss": 0.4926,
      "step": 1128
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.21009643426167063,
      "learning_rate": 0.0001495924026741649,
      "loss": 0.4825,
      "step": 1129
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.182586366890212,
      "learning_rate": 0.0001495107514517007,
      "loss": 0.4694,
      "step": 1130
    },
    {
      "epoch": 0.67,
      "grad_norm": 0.19955812947692972,
      "learning_rate": 0.00014942905647840206,
      "loss": 0.5385,
      "step": 1131
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.18833362414930832,
      "learning_rate": 0.0001493473178264599,
      "loss": 0.5331,
      "step": 1132
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.19652826299925613,
      "learning_rate": 0.0001492655355681036,
      "loss": 0.5833,
      "step": 1133
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.19744208734697974,
      "learning_rate": 0.00014918370977560122,
      "loss": 0.5147,
      "step": 1134
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.19287794254421994,
      "learning_rate": 0.0001491018405212591,
      "loss": 0.4678,
      "step": 1135
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.20401004694813352,
      "learning_rate": 0.00014901992787742219,
      "loss": 0.5388,
      "step": 1136
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.1859539975227642,
      "learning_rate": 0.00014893797191647368,
      "loss": 0.5121,
      "step": 1137
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.1777739643065349,
      "learning_rate": 0.00014885597271083499,
      "loss": 0.4917,
      "step": 1138
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.24667049739226657,
      "learning_rate": 0.00014877393033296585,
      "loss": 0.5631,
      "step": 1139
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.19660914123915638,
      "learning_rate": 0.00014869184485536408,
      "loss": 0.4955,
      "step": 1140
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.1849639582602034,
      "learning_rate": 0.00014860971635056563,
      "loss": 0.5611,
      "step": 1141
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.18618478737494773,
      "learning_rate": 0.00014852754489114444,
      "loss": 0.5668,
      "step": 1142
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.18738098465743458,
      "learning_rate": 0.0001484453305497124,
      "loss": 0.491,
      "step": 1143
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.2001668377337724,
      "learning_rate": 0.00014836307339891934,
      "loss": 0.5395,
      "step": 1144
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.18840397893983923,
      "learning_rate": 0.00014828077351145282,
      "loss": 0.5466,
      "step": 1145
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.18348054201307173,
      "learning_rate": 0.00014819843096003824,
      "loss": 0.551,
      "step": 1146
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.17819691596218545,
      "learning_rate": 0.0001481160458174388,
      "loss": 0.4773,
      "step": 1147
    },
    {
      "epoch": 0.68,
      "grad_norm": 0.17697981019548276,
      "learning_rate": 0.0001480336181564551,
      "loss": 0.4586,
      "step": 1148
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2051551821180113,
      "learning_rate": 0.0001479511480499255,
      "loss": 0.5141,
      "step": 1149
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.19457252340787579,
      "learning_rate": 0.00014786863557072582,
      "loss": 0.4769,
      "step": 1150
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.20436510440947783,
      "learning_rate": 0.00014778608079176923,
      "loss": 0.5556,
      "step": 1151
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.18295670635083391,
      "learning_rate": 0.00014770348378600646,
      "loss": 0.5063,
      "step": 1152
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.18628367387903996,
      "learning_rate": 0.00014762084462642539,
      "loss": 0.5427,
      "step": 1153
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2161510404034079,
      "learning_rate": 0.00014753816338605123,
      "loss": 0.4854,
      "step": 1154
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.17721958276959857,
      "learning_rate": 0.00014745544013794636,
      "loss": 0.5226,
      "step": 1155
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.18343920037243416,
      "learning_rate": 0.0001473726749552103,
      "loss": 0.5187,
      "step": 1156
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.19071533215976352,
      "learning_rate": 0.00014728986791097957,
      "loss": 0.5307,
      "step": 1157
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.17869129856056046,
      "learning_rate": 0.00014720701907842772,
      "loss": 0.4744,
      "step": 1158
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.18707001538233387,
      "learning_rate": 0.00014712412853076524,
      "loss": 0.4709,
      "step": 1159
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.18697017631013504,
      "learning_rate": 0.00014704119634123948,
      "loss": 0.5077,
      "step": 1160
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.18814502472746178,
      "learning_rate": 0.00014695822258313455,
      "loss": 0.5233,
      "step": 1161
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.2368823091300486,
      "learning_rate": 0.00014687520732977128,
      "loss": 0.4966,
      "step": 1162
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.21080889905859476,
      "learning_rate": 0.00014679215065450726,
      "loss": 0.5262,
      "step": 1163
    },
    {
      "epoch": 0.69,
      "grad_norm": 0.20202781405176504,
      "learning_rate": 0.0001467090526307366,
      "loss": 0.547,
      "step": 1164
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.22096110211580308,
      "learning_rate": 0.00014662591333189,
      "loss": 0.5188,
      "step": 1165
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.20568086982318412,
      "learning_rate": 0.0001465427328314346,
      "loss": 0.4716,
      "step": 1166
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2001156528680437,
      "learning_rate": 0.0001464595112028739,
      "loss": 0.4726,
      "step": 1167
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.22271237142339692,
      "learning_rate": 0.0001463762485197479,
      "loss": 0.5685,
      "step": 1168
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.20249978518037207,
      "learning_rate": 0.00014629294485563271,
      "loss": 0.4992,
      "step": 1169
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.21051317014195287,
      "learning_rate": 0.00014620960028414074,
      "loss": 0.4941,
      "step": 1170
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.22015178328592389,
      "learning_rate": 0.0001461262148789205,
      "loss": 0.5554,
      "step": 1171
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.1718994350084717,
      "learning_rate": 0.00014604278871365662,
      "loss": 0.4964,
      "step": 1172
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.17185816501519585,
      "learning_rate": 0.0001459593218620698,
      "loss": 0.5104,
      "step": 1173
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.21401829521307225,
      "learning_rate": 0.0001458758143979166,
      "loss": 0.499,
      "step": 1174
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.19776025857516183,
      "learning_rate": 0.00014579226639498946,
      "loss": 0.496,
      "step": 1175
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.1961032370488782,
      "learning_rate": 0.00014570867792711674,
      "loss": 0.5107,
      "step": 1176
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.18716882427415388,
      "learning_rate": 0.0001456250490681625,
      "loss": 0.4876,
      "step": 1177
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.1956152915992498,
      "learning_rate": 0.00014554137989202643,
      "loss": 0.5732,
      "step": 1178
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.19786724370350303,
      "learning_rate": 0.000145457670472644,
      "loss": 0.5551,
      "step": 1179
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.18947724734010707,
      "learning_rate": 0.00014537392088398608,
      "loss": 0.4944,
      "step": 1180
    },
    {
      "epoch": 0.7,
      "grad_norm": 0.2097627269461171,
      "learning_rate": 0.00014529013120005916,
      "loss": 0.5383,
      "step": 1181
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.17430571658619776,
      "learning_rate": 0.0001452063014949051,
      "loss": 0.4849,
      "step": 1182
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1709299956294579,
      "learning_rate": 0.0001451224318426011,
      "loss": 0.4627,
      "step": 1183
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.17998153878412265,
      "learning_rate": 0.0001450385223172597,
      "loss": 0.5102,
      "step": 1184
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1973625392843972,
      "learning_rate": 0.0001449545729930287,
      "loss": 0.5508,
      "step": 1185
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1871194681672037,
      "learning_rate": 0.00014487058394409104,
      "loss": 0.5061,
      "step": 1186
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1869428596068349,
      "learning_rate": 0.00014478655524466475,
      "loss": 0.4707,
      "step": 1187
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1847860359839481,
      "learning_rate": 0.00014470248696900285,
      "loss": 0.5141,
      "step": 1188
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.18708736509686738,
      "learning_rate": 0.00014461837919139348,
      "loss": 0.5192,
      "step": 1189
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.22393402471111507,
      "learning_rate": 0.00014453423198615957,
      "loss": 0.5192,
      "step": 1190
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.19155699706033086,
      "learning_rate": 0.00014445004542765888,
      "loss": 0.5054,
      "step": 1191
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.18623804761748072,
      "learning_rate": 0.00014436581959028405,
      "loss": 0.5212,
      "step": 1192
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.18614307922016232,
      "learning_rate": 0.00014428155454846225,
      "loss": 0.4667,
      "step": 1193
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.20760229991014245,
      "learning_rate": 0.0001441972503766555,
      "loss": 0.4897,
      "step": 1194
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1713367358079391,
      "learning_rate": 0.00014411290714936033,
      "loss": 0.5075,
      "step": 1195
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.18925060252185533,
      "learning_rate": 0.00014402852494110768,
      "loss": 0.5185,
      "step": 1196
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.1845531822650512,
      "learning_rate": 0.00014394410382646304,
      "loss": 0.4799,
      "step": 1197
    },
    {
      "epoch": 0.71,
      "grad_norm": 0.17403525502879638,
      "learning_rate": 0.00014385964388002623,
      "loss": 0.4935,
      "step": 1198
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.1991754972420626,
      "learning_rate": 0.00014377514517643144,
      "loss": 0.5419,
      "step": 1199
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.17030413694508353,
      "learning_rate": 0.00014369060779034708,
      "loss": 0.4907,
      "step": 1200
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.17984445026847887,
      "learning_rate": 0.00014360603179647567,
      "loss": 0.5024,
      "step": 1201
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.1782253350761795,
      "learning_rate": 0.000143521417269554,
      "loss": 0.4629,
      "step": 1202
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.18078126508901013,
      "learning_rate": 0.00014343676428435275,
      "loss": 0.4884,
      "step": 1203
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.17808414211423282,
      "learning_rate": 0.0001433520729156767,
      "loss": 0.4884,
      "step": 1204
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.19634028134840262,
      "learning_rate": 0.0001432673432383645,
      "loss": 0.5253,
      "step": 1205
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.17516039333071384,
      "learning_rate": 0.00014318257532728866,
      "loss": 0.5234,
      "step": 1206
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.18938507108619154,
      "learning_rate": 0.0001430977692573554,
      "loss": 0.4569,
      "step": 1207
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.18529885163701465,
      "learning_rate": 0.00014301292510350485,
      "loss": 0.5588,
      "step": 1208
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.1980418349421889,
      "learning_rate": 0.0001429280429407106,
      "loss": 0.5567,
      "step": 1209
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.18102359715715138,
      "learning_rate": 0.00014284312284397994,
      "loss": 0.5346,
      "step": 1210
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.1682936887439461,
      "learning_rate": 0.00014275816488835364,
      "loss": 0.4636,
      "step": 1211
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.1800173543128752,
      "learning_rate": 0.00014267316914890583,
      "loss": 0.5273,
      "step": 1212
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.18733177044546032,
      "learning_rate": 0.00014258813570074429,
      "loss": 0.5375,
      "step": 1213
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.1748926227325175,
      "learning_rate": 0.00014250306461900984,
      "loss": 0.5167,
      "step": 1214
    },
    {
      "epoch": 0.72,
      "grad_norm": 0.18731958978952354,
      "learning_rate": 0.00014241795597887675,
      "loss": 0.4931,
      "step": 1215
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.1826531388805938,
      "learning_rate": 0.00014233280985555234,
      "loss": 0.5561,
      "step": 1216
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.17687603086969267,
      "learning_rate": 0.00014224762632427713,
      "loss": 0.4916,
      "step": 1217
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.18307772952489884,
      "learning_rate": 0.0001421624054603247,
      "loss": 0.4885,
      "step": 1218
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.17499886290677696,
      "learning_rate": 0.00014207714733900162,
      "loss": 0.472,
      "step": 1219
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.21149209308036526,
      "learning_rate": 0.00014199185203564728,
      "loss": 0.5162,
      "step": 1220
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.18757068290035836,
      "learning_rate": 0.00014190651962563407,
      "loss": 0.5057,
      "step": 1221
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.1968053254662056,
      "learning_rate": 0.00014182115018436715,
      "loss": 0.4965,
      "step": 1222
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.1832324100206531,
      "learning_rate": 0.0001417357437872843,
      "loss": 0.5297,
      "step": 1223
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.1947511964403348,
      "learning_rate": 0.00014165030050985604,
      "loss": 0.5108,
      "step": 1224
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.1887394891277499,
      "learning_rate": 0.00014156482042758544,
      "loss": 0.5378,
      "step": 1225
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.1948370350677339,
      "learning_rate": 0.0001414793036160081,
      "loss": 0.5031,
      "step": 1226
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.1876481087251801,
      "learning_rate": 0.00014139375015069215,
      "loss": 0.5245,
      "step": 1227
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.17100568796101634,
      "learning_rate": 0.00014130816010723805,
      "loss": 0.474,
      "step": 1228
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.18845060671638836,
      "learning_rate": 0.0001412225335612785,
      "loss": 0.4944,
      "step": 1229
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.19905303803856061,
      "learning_rate": 0.00014113687058847857,
      "loss": 0.5389,
      "step": 1230
    },
    {
      "epoch": 0.73,
      "grad_norm": 0.18736361174530702,
      "learning_rate": 0.00014105117126453554,
      "loss": 0.5128,
      "step": 1231
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.18113672252747182,
      "learning_rate": 0.00014096543566517871,
      "loss": 0.5077,
      "step": 1232
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.18697696198984598,
      "learning_rate": 0.00014087966386616945,
      "loss": 0.5083,
      "step": 1233
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.18321266285619817,
      "learning_rate": 0.00014079385594330121,
      "loss": 0.501,
      "step": 1234
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2085760786366843,
      "learning_rate": 0.00014070801197239928,
      "loss": 0.5337,
      "step": 1235
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.20073471496417797,
      "learning_rate": 0.00014062213202932085,
      "loss": 0.5354,
      "step": 1236
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.17820357534513603,
      "learning_rate": 0.00014053621618995488,
      "loss": 0.4728,
      "step": 1237
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.18692997172911976,
      "learning_rate": 0.00014045026453022197,
      "loss": 0.5461,
      "step": 1238
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.2216376697939887,
      "learning_rate": 0.00014036427712607453,
      "loss": 0.5099,
      "step": 1239
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.19522613324619895,
      "learning_rate": 0.00014027825405349642,
      "loss": 0.5257,
      "step": 1240
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.23644520056181112,
      "learning_rate": 0.0001401921953885031,
      "loss": 0.5637,
      "step": 1241
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.1850220175837294,
      "learning_rate": 0.00014010610120714147,
      "loss": 0.5299,
      "step": 1242
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.17697459178123112,
      "learning_rate": 0.00014001997158548973,
      "loss": 0.4723,
      "step": 1243
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.20359077418663543,
      "learning_rate": 0.00013993380659965755,
      "loss": 0.5546,
      "step": 1244
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.1875137496084646,
      "learning_rate": 0.00013984760632578577,
      "loss": 0.5747,
      "step": 1245
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.21313947649725407,
      "learning_rate": 0.00013976137084004633,
      "loss": 0.5321,
      "step": 1246
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.17217418667779574,
      "learning_rate": 0.0001396751002186424,
      "loss": 0.4719,
      "step": 1247
    },
    {
      "epoch": 0.74,
      "grad_norm": 0.18863205998380614,
      "learning_rate": 0.00013958879453780817,
      "loss": 0.5627,
      "step": 1248
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.17087031621768445,
      "learning_rate": 0.00013950245387380882,
      "loss": 0.4626,
      "step": 1249
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.17677171504664238,
      "learning_rate": 0.00013941607830294042,
      "loss": 0.4593,
      "step": 1250
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.19402672015245978,
      "learning_rate": 0.00013932966790152987,
      "loss": 0.5231,
      "step": 1251
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.2008980487940075,
      "learning_rate": 0.00013924322274593486,
      "loss": 0.5223,
      "step": 1252
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.19654503986426297,
      "learning_rate": 0.00013915674291254383,
      "loss": 0.5489,
      "step": 1253
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.18755885764625013,
      "learning_rate": 0.00013907022847777585,
      "loss": 0.5124,
      "step": 1254
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.16715913531165327,
      "learning_rate": 0.00013898367951808052,
      "loss": 0.4741,
      "step": 1255
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.18224361618494386,
      "learning_rate": 0.000138897096109938,
      "loss": 0.4852,
      "step": 1256
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.17309322546124714,
      "learning_rate": 0.00013881047832985886,
      "loss": 0.4782,
      "step": 1257
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.17208927952188605,
      "learning_rate": 0.00013872382625438405,
      "loss": 0.5383,
      "step": 1258
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.18195195367386732,
      "learning_rate": 0.00013863713996008483,
      "loss": 0.4793,
      "step": 1259
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.1869015589060158,
      "learning_rate": 0.00013855041952356273,
      "loss": 0.4918,
      "step": 1260
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.22970064407630242,
      "learning_rate": 0.00013846366502144936,
      "loss": 0.5468,
      "step": 1261
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.18223297061392882,
      "learning_rate": 0.00013837687653040653,
      "loss": 0.4836,
      "step": 1262
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.1811453560251677,
      "learning_rate": 0.00013829005412712607,
      "loss": 0.4799,
      "step": 1263
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.19531829020393465,
      "learning_rate": 0.00013820319788832968,
      "loss": 0.4886,
      "step": 1264
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.1873613837354565,
      "learning_rate": 0.0001381163078907691,
      "loss": 0.5082,
      "step": 1265
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2725120336639838,
      "learning_rate": 0.0001380293842112258,
      "loss": 0.5434,
      "step": 1266
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.1844679328183984,
      "learning_rate": 0.00013794242692651102,
      "loss": 0.51,
      "step": 1267
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.188306631118848,
      "learning_rate": 0.00013785543611346578,
      "loss": 0.4763,
      "step": 1268
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.19531189056502454,
      "learning_rate": 0.00013776841184896064,
      "loss": 0.5165,
      "step": 1269
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20264440596591932,
      "learning_rate": 0.00013768135420989577,
      "loss": 0.5387,
      "step": 1270
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.2084557864213027,
      "learning_rate": 0.00013759426327320074,
      "loss": 0.5763,
      "step": 1271
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20500727562381132,
      "learning_rate": 0.0001375071391158347,
      "loss": 0.5333,
      "step": 1272
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.18316050604278142,
      "learning_rate": 0.00013741998181478603,
      "loss": 0.4955,
      "step": 1273
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20590646162784712,
      "learning_rate": 0.00013733279144707245,
      "loss": 0.519,
      "step": 1274
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.17998508710498043,
      "learning_rate": 0.00013724556808974086,
      "loss": 0.4866,
      "step": 1275
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.20092486892395545,
      "learning_rate": 0.0001371583118198674,
      "loss": 0.5918,
      "step": 1276
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.1910150922839534,
      "learning_rate": 0.0001370710227145572,
      "loss": 0.5362,
      "step": 1277
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.18781648781714727,
      "learning_rate": 0.00013698370085094442,
      "loss": 0.5094,
      "step": 1278
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.16979981901477645,
      "learning_rate": 0.0001368963463061922,
      "loss": 0.4665,
      "step": 1279
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.17477774487542888,
      "learning_rate": 0.0001368089591574926,
      "loss": 0.5096,
      "step": 1280
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.18366095693364085,
      "learning_rate": 0.00013672153948206635,
      "loss": 0.4843,
      "step": 1281
    },
    {
      "epoch": 0.76,
      "grad_norm": 0.17419423552171015,
      "learning_rate": 0.00013663408735716307,
      "loss": 0.4886,
      "step": 1282
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.18711956168861,
      "learning_rate": 0.00013654660286006095,
      "loss": 0.5093,
      "step": 1283
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.17966945439077842,
      "learning_rate": 0.0001364590860680669,
      "loss": 0.5146,
      "step": 1284
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.1762512874090438,
      "learning_rate": 0.00013637153705851616,
      "loss": 0.4863,
      "step": 1285
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.1851970347695891,
      "learning_rate": 0.00013628395590877277,
      "loss": 0.5562,
      "step": 1286
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.19299530099729093,
      "learning_rate": 0.00013619634269622884,
      "loss": 0.4686,
      "step": 1287
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.19014321879289434,
      "learning_rate": 0.00013610869749830498,
      "loss": 0.5187,
      "step": 1288
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.1851421397455001,
      "learning_rate": 0.00013602102039245002,
      "loss": 0.5738,
      "step": 1289
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.18412094053224104,
      "learning_rate": 0.00013593331145614104,
      "loss": 0.5022,
      "step": 1290
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.17961356701720557,
      "learning_rate": 0.00013584557076688322,
      "loss": 0.5351,
      "step": 1291
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.1705319278777615,
      "learning_rate": 0.00013575779840220976,
      "loss": 0.5084,
      "step": 1292
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.18019495479729933,
      "learning_rate": 0.00013566999443968185,
      "loss": 0.5236,
      "step": 1293
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.17974898003188056,
      "learning_rate": 0.00013558215895688867,
      "loss": 0.542,
      "step": 1294
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.1797247167748882,
      "learning_rate": 0.00013549429203144723,
      "loss": 0.5077,
      "step": 1295
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.1850550416693565,
      "learning_rate": 0.00013540639374100226,
      "loss": 0.5396,
      "step": 1296
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.1733382663357764,
      "learning_rate": 0.00013531846416322627,
      "loss": 0.5239,
      "step": 1297
    },
    {
      "epoch": 0.77,
      "grad_norm": 0.17977034098323824,
      "learning_rate": 0.00013523050337581943,
      "loss": 0.4717,
      "step": 1298
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.18483046559711522,
      "learning_rate": 0.0001351425114565094,
      "loss": 0.5378,
      "step": 1299
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.17834941323623535,
      "learning_rate": 0.0001350544884830515,
      "loss": 0.5662,
      "step": 1300
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.18309245782790126,
      "learning_rate": 0.00013496643453322828,
      "loss": 0.5812,
      "step": 1301
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.19808755077064702,
      "learning_rate": 0.0001348783496848499,
      "loss": 0.5721,
      "step": 1302
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.18923943372482865,
      "learning_rate": 0.00013479023401575366,
      "loss": 0.5383,
      "step": 1303
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.20010184755781324,
      "learning_rate": 0.00013470208760380412,
      "loss": 0.4803,
      "step": 1304
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.2759865843375521,
      "learning_rate": 0.0001346139105268931,
      "loss": 0.5583,
      "step": 1305
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.17317696637576427,
      "learning_rate": 0.00013452570286293938,
      "loss": 0.4797,
      "step": 1306
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.1915090235119688,
      "learning_rate": 0.00013443746468988884,
      "loss": 0.467,
      "step": 1307
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.20256119138747514,
      "learning_rate": 0.00013434919608571437,
      "loss": 0.5311,
      "step": 1308
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.17993484843579866,
      "learning_rate": 0.00013426089712841564,
      "loss": 0.4832,
      "step": 1309
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.18038829097640488,
      "learning_rate": 0.00013417256789601925,
      "loss": 0.5688,
      "step": 1310
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.19541363045484325,
      "learning_rate": 0.00013408420846657844,
      "loss": 0.5244,
      "step": 1311
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.19899174638442974,
      "learning_rate": 0.00013399581891817324,
      "loss": 0.5588,
      "step": 1312
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.1882127221313746,
      "learning_rate": 0.00013390739932891022,
      "loss": 0.4932,
      "step": 1313
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.1823628172344837,
      "learning_rate": 0.00013381894977692257,
      "loss": 0.5594,
      "step": 1314
    },
    {
      "epoch": 0.78,
      "grad_norm": 0.17992657977066912,
      "learning_rate": 0.00013373047034036988,
      "loss": 0.5509,
      "step": 1315
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.18381185146899134,
      "learning_rate": 0.0001336419610974382,
      "loss": 0.5209,
      "step": 1316
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.16713876415743062,
      "learning_rate": 0.00013355342212633986,
      "loss": 0.4726,
      "step": 1317
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.1945012456185686,
      "learning_rate": 0.0001334648535053136,
      "loss": 0.5733,
      "step": 1318
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.1832463509617787,
      "learning_rate": 0.00013337625531262414,
      "loss": 0.5257,
      "step": 1319
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.17703976530840718,
      "learning_rate": 0.0001332876276265625,
      "loss": 0.4854,
      "step": 1320
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.1857229688186767,
      "learning_rate": 0.00013319897052544577,
      "loss": 0.4803,
      "step": 1321
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.18149715939246025,
      "learning_rate": 0.00013311028408761688,
      "loss": 0.4886,
      "step": 1322
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.18600803718548015,
      "learning_rate": 0.00013302156839144484,
      "loss": 0.5098,
      "step": 1323
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.18184709807791666,
      "learning_rate": 0.00013293282351532442,
      "loss": 0.5253,
      "step": 1324
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.17926319053458772,
      "learning_rate": 0.00013284404953767625,
      "loss": 0.5112,
      "step": 1325
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.18473321145802576,
      "learning_rate": 0.00013275524653694665,
      "loss": 0.5257,
      "step": 1326
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.17432084959871605,
      "learning_rate": 0.00013266641459160753,
      "loss": 0.4819,
      "step": 1327
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.2136616093736039,
      "learning_rate": 0.0001325775537801564,
      "loss": 0.5289,
      "step": 1328
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.18909582251032753,
      "learning_rate": 0.00013248866418111635,
      "loss": 0.5369,
      "step": 1329
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.1915972979886006,
      "learning_rate": 0.00013239974587303584,
      "loss": 0.5926,
      "step": 1330
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.18549724952330918,
      "learning_rate": 0.00013231079893448873,
      "loss": 0.4909,
      "step": 1331
    },
    {
      "epoch": 0.79,
      "grad_norm": 0.1986357464763957,
      "learning_rate": 0.00013222182344407415,
      "loss": 0.5257,
      "step": 1332
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.1649013077802235,
      "learning_rate": 0.00013213281948041647,
      "loss": 0.4578,
      "step": 1333
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.24324520525042556,
      "learning_rate": 0.0001320437871221652,
      "loss": 0.5119,
      "step": 1334
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.19388615550793745,
      "learning_rate": 0.00013195472644799504,
      "loss": 0.4655,
      "step": 1335
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.16279182912251683,
      "learning_rate": 0.00013186563753660562,
      "loss": 0.4868,
      "step": 1336
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.17463318249809762,
      "learning_rate": 0.0001317765204667215,
      "loss": 0.5026,
      "step": 1337
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.17843994655748693,
      "learning_rate": 0.0001316873753170922,
      "loss": 0.4725,
      "step": 1338
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.17705143987562852,
      "learning_rate": 0.00013159820216649198,
      "loss": 0.4835,
      "step": 1339
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2071900675528722,
      "learning_rate": 0.00013150900109371998,
      "loss": 0.5452,
      "step": 1340
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.1855602116667224,
      "learning_rate": 0.00013141977217759977,
      "loss": 0.5166,
      "step": 1341
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.19368584941752956,
      "learning_rate": 0.00013133051549697977,
      "loss": 0.5258,
      "step": 1342
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.19453794357182327,
      "learning_rate": 0.00013124123113073278,
      "loss": 0.5039,
      "step": 1343
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.16449765544951725,
      "learning_rate": 0.0001311519191577562,
      "loss": 0.4935,
      "step": 1344
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.1817628748335402,
      "learning_rate": 0.0001310625796569717,
      "loss": 0.4937,
      "step": 1345
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2142696862792987,
      "learning_rate": 0.00013097321270732524,
      "loss": 0.5004,
      "step": 1346
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.2076553608561363,
      "learning_rate": 0.0001308838183877872,
      "loss": 0.499,
      "step": 1347
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.1694419193937227,
      "learning_rate": 0.00013079439677735207,
      "loss": 0.477,
      "step": 1348
    },
    {
      "epoch": 0.8,
      "grad_norm": 0.20906856602739568,
      "learning_rate": 0.0001307049479550384,
      "loss": 0.5227,
      "step": 1349
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.17644464533212287,
      "learning_rate": 0.00013061547199988885,
      "loss": 0.5636,
      "step": 1350
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.18582243028910475,
      "learning_rate": 0.00013052596899097005,
      "loss": 0.4588,
      "step": 1351
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.19864208660134122,
      "learning_rate": 0.0001304364390073725,
      "loss": 0.5367,
      "step": 1352
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.2025705912016794,
      "learning_rate": 0.00013034688212821058,
      "loss": 0.4989,
      "step": 1353
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.1850384411891724,
      "learning_rate": 0.00013025729843262241,
      "loss": 0.4829,
      "step": 1354
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.18112685382941437,
      "learning_rate": 0.00013016768799976983,
      "loss": 0.501,
      "step": 1355
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.19083770143909534,
      "learning_rate": 0.00013007805090883826,
      "loss": 0.4693,
      "step": 1356
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.1741168713346763,
      "learning_rate": 0.00012998838723903675,
      "loss": 0.5051,
      "step": 1357
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.17998449629698884,
      "learning_rate": 0.00012989869706959777,
      "loss": 0.5192,
      "step": 1358
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.22733876158298189,
      "learning_rate": 0.0001298089804797772,
      "loss": 0.5699,
      "step": 1359
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.18706763134147958,
      "learning_rate": 0.00012971923754885438,
      "loss": 0.5352,
      "step": 1360
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.17137720274694657,
      "learning_rate": 0.0001296294683561318,
      "loss": 0.5028,
      "step": 1361
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.1706735720997184,
      "learning_rate": 0.00012953967298093513,
      "loss": 0.5152,
      "step": 1362
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.1877012462021262,
      "learning_rate": 0.00012944985150261341,
      "loss": 0.4895,
      "step": 1363
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.18521525521633062,
      "learning_rate": 0.00012936000400053845,
      "loss": 0.5483,
      "step": 1364
    },
    {
      "epoch": 0.81,
      "grad_norm": 0.175077365429483,
      "learning_rate": 0.00012927013055410522,
      "loss": 0.5405,
      "step": 1365
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.2453533595215046,
      "learning_rate": 0.00012918023124273165,
      "loss": 0.5118,
      "step": 1366
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.20592576443274452,
      "learning_rate": 0.00012909030614585836,
      "loss": 0.5008,
      "step": 1367
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.19466804271674717,
      "learning_rate": 0.00012900035534294893,
      "loss": 0.4973,
      "step": 1368
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.18407694211273226,
      "learning_rate": 0.00012891037891348957,
      "loss": 0.4928,
      "step": 1369
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.1944897009288739,
      "learning_rate": 0.00012882037693698917,
      "loss": 0.5042,
      "step": 1370
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.1876729080378583,
      "learning_rate": 0.00012873034949297912,
      "loss": 0.5565,
      "step": 1371
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.18740458209713104,
      "learning_rate": 0.0001286402966610134,
      "loss": 0.4912,
      "step": 1372
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.17895655124360504,
      "learning_rate": 0.00012855021852066842,
      "loss": 0.4735,
      "step": 1373
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.21851593513203868,
      "learning_rate": 0.00012846011515154287,
      "loss": 0.5548,
      "step": 1374
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.19220669747584535,
      "learning_rate": 0.00012836998663325782,
      "loss": 0.5339,
      "step": 1375
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.1877766868834106,
      "learning_rate": 0.00012827983304545656,
      "loss": 0.5379,
      "step": 1376
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.1781170900201972,
      "learning_rate": 0.00012818965446780448,
      "loss": 0.4614,
      "step": 1377
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.17271334480103068,
      "learning_rate": 0.00012809945097998907,
      "loss": 0.4667,
      "step": 1378
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.19192709550266393,
      "learning_rate": 0.00012800922266171987,
      "loss": 0.539,
      "step": 1379
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.18347756297917528,
      "learning_rate": 0.0001279189695927283,
      "loss": 0.4869,
      "step": 1380
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.18435293599254082,
      "learning_rate": 0.0001278286918527677,
      "loss": 0.5142,
      "step": 1381
    },
    {
      "epoch": 0.82,
      "grad_norm": 0.17568604254392264,
      "learning_rate": 0.00012773838952161322,
      "loss": 0.4944,
      "step": 1382
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.19863759493863561,
      "learning_rate": 0.0001276480626790617,
      "loss": 0.5071,
      "step": 1383
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.18656826295630696,
      "learning_rate": 0.00012755771140493167,
      "loss": 0.4743,
      "step": 1384
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.17629250804517016,
      "learning_rate": 0.0001274673357790632,
      "loss": 0.5198,
      "step": 1385
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.17537444688807757,
      "learning_rate": 0.00012737693588131793,
      "loss": 0.49,
      "step": 1386
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.1739583842649344,
      "learning_rate": 0.00012728651179157895,
      "loss": 0.4826,
      "step": 1387
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.19774251629485748,
      "learning_rate": 0.00012719606358975073,
      "loss": 0.5571,
      "step": 1388
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.17736202390835257,
      "learning_rate": 0.00012710559135575895,
      "loss": 0.4928,
      "step": 1389
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.18721704864956284,
      "learning_rate": 0.00012701509516955067,
      "loss": 0.4798,
      "step": 1390
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.19216069335454633,
      "learning_rate": 0.00012692457511109402,
      "loss": 0.5268,
      "step": 1391
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.20170703615044985,
      "learning_rate": 0.00012683403126037825,
      "loss": 0.5556,
      "step": 1392
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.17163471940645972,
      "learning_rate": 0.00012674346369741365,
      "loss": 0.5224,
      "step": 1393
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.168318318516553,
      "learning_rate": 0.0001266528725022315,
      "loss": 0.5071,
      "step": 1394
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.17605642876202626,
      "learning_rate": 0.00012656225775488383,
      "loss": 0.5356,
      "step": 1395
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.17094228936642658,
      "learning_rate": 0.0001264716195354436,
      "loss": 0.4705,
      "step": 1396
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.17472714805803075,
      "learning_rate": 0.00012638095792400452,
      "loss": 0.4903,
      "step": 1397
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.17866538442934543,
      "learning_rate": 0.00012629027300068088,
      "loss": 0.5151,
      "step": 1398
    },
    {
      "epoch": 0.83,
      "grad_norm": 0.16752002793148427,
      "learning_rate": 0.0001261995648456076,
      "loss": 0.5094,
      "step": 1399
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.18374616312572198,
      "learning_rate": 0.00012610883353894026,
      "loss": 0.4547,
      "step": 1400
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.17615421249336274,
      "learning_rate": 0.00012601807916085461,
      "loss": 0.5194,
      "step": 1401
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.17640888040569758,
      "learning_rate": 0.00012592730179154712,
      "loss": 0.4926,
      "step": 1402
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.17818443729463354,
      "learning_rate": 0.0001258365015112344,
      "loss": 0.5138,
      "step": 1403
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.18271989198154837,
      "learning_rate": 0.00012574567840015324,
      "loss": 0.5012,
      "step": 1404
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.1828993894744449,
      "learning_rate": 0.00012565483253856071,
      "loss": 0.4917,
      "step": 1405
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.17537537909673637,
      "learning_rate": 0.00012556396400673403,
      "loss": 0.4856,
      "step": 1406
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.1791770204684942,
      "learning_rate": 0.00012547307288497035,
      "loss": 0.4993,
      "step": 1407
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.1926761899524845,
      "learning_rate": 0.00012538215925358688,
      "loss": 0.532,
      "step": 1408
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.1641415902243254,
      "learning_rate": 0.00012529122319292053,
      "loss": 0.4994,
      "step": 1409
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.18035208365167332,
      "learning_rate": 0.00012520026478332822,
      "loss": 0.457,
      "step": 1410
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.1767179210201008,
      "learning_rate": 0.00012510928410518663,
      "loss": 0.515,
      "step": 1411
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.16556221078273253,
      "learning_rate": 0.00012501828123889194,
      "loss": 0.4578,
      "step": 1412
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.17248580048933712,
      "learning_rate": 0.00012492725626486013,
      "loss": 0.4818,
      "step": 1413
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.17049351620509895,
      "learning_rate": 0.00012483620926352656,
      "loss": 0.4889,
      "step": 1414
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.2445103047102675,
      "learning_rate": 0.00012474514031534617,
      "loss": 0.543,
      "step": 1415
    },
    {
      "epoch": 0.84,
      "grad_norm": 0.1832392790663422,
      "learning_rate": 0.00012465404950079325,
      "loss": 0.5609,
      "step": 1416
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.18408303780573687,
      "learning_rate": 0.00012456293690036135,
      "loss": 0.489,
      "step": 1417
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.18185382443521964,
      "learning_rate": 0.00012447180259456342,
      "loss": 0.5156,
      "step": 1418
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.19471472250974153,
      "learning_rate": 0.00012438064666393144,
      "loss": 0.493,
      "step": 1419
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.19996647014153662,
      "learning_rate": 0.00012428946918901655,
      "loss": 0.5355,
      "step": 1420
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.17841905649814713,
      "learning_rate": 0.00012419827025038905,
      "loss": 0.5193,
      "step": 1421
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.23618023782431,
      "learning_rate": 0.00012410704992863792,
      "loss": 0.5052,
      "step": 1422
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.19206431381415717,
      "learning_rate": 0.00012401580830437135,
      "loss": 0.5061,
      "step": 1423
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.18232605343034466,
      "learning_rate": 0.0001239245454582162,
      "loss": 0.4726,
      "step": 1424
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.1667177719369294,
      "learning_rate": 0.000123833261470818,
      "loss": 0.4584,
      "step": 1425
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.18664991662366204,
      "learning_rate": 0.0001237419564228412,
      "loss": 0.4984,
      "step": 1426
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.18842903468293132,
      "learning_rate": 0.00012365063039496862,
      "loss": 0.5021,
      "step": 1427
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.17356657187313135,
      "learning_rate": 0.00012355928346790174,
      "loss": 0.4677,
      "step": 1428
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.19172179721922822,
      "learning_rate": 0.0001234679157223605,
      "loss": 0.5298,
      "step": 1429
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.17844753388051934,
      "learning_rate": 0.00012337652723908325,
      "loss": 0.5179,
      "step": 1430
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.17488823189990194,
      "learning_rate": 0.0001232851180988266,
      "loss": 0.504,
      "step": 1431
    },
    {
      "epoch": 0.85,
      "grad_norm": 0.19923950835484613,
      "learning_rate": 0.00012319368838236547,
      "loss": 0.4801,
      "step": 1432
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18421010540531865,
      "learning_rate": 0.00012310223817049292,
      "loss": 0.4874,
      "step": 1433
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18280627148010795,
      "learning_rate": 0.00012301076754402018,
      "loss": 0.4807,
      "step": 1434
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18255969688709006,
      "learning_rate": 0.00012291927658377648,
      "loss": 0.5507,
      "step": 1435
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.19369212033571756,
      "learning_rate": 0.00012282776537060903,
      "loss": 0.4807,
      "step": 1436
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.17045351156650784,
      "learning_rate": 0.0001227362339853829,
      "loss": 0.5108,
      "step": 1437
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18805439573471683,
      "learning_rate": 0.000122644682508981,
      "loss": 0.5024,
      "step": 1438
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18557193779276274,
      "learning_rate": 0.0001225531110223041,
      "loss": 0.4968,
      "step": 1439
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18446256740657319,
      "learning_rate": 0.00012246151960627053,
      "loss": 0.5296,
      "step": 1440
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.1860920392864418,
      "learning_rate": 0.0001223699083418162,
      "loss": 0.5143,
      "step": 1441
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2044775732039818,
      "learning_rate": 0.00012227827730989466,
      "loss": 0.5057,
      "step": 1442
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18607693244293935,
      "learning_rate": 0.00012218662659147693,
      "loss": 0.5174,
      "step": 1443
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.18611453439901565,
      "learning_rate": 0.00012209495626755134,
      "loss": 0.4989,
      "step": 1444
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.19583337708028287,
      "learning_rate": 0.00012200326641912361,
      "loss": 0.5225,
      "step": 1445
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.19901404620525662,
      "learning_rate": 0.00012191155712721667,
      "loss": 0.5108,
      "step": 1446
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.2034918139139227,
      "learning_rate": 0.0001218198284728707,
      "loss": 0.53,
      "step": 1447
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.19938382509011504,
      "learning_rate": 0.00012172808053714292,
      "loss": 0.4907,
      "step": 1448
    },
    {
      "epoch": 0.86,
      "grad_norm": 0.1979156919673814,
      "learning_rate": 0.00012163631340110764,
      "loss": 0.4977,
      "step": 1449
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.1773307028514164,
      "learning_rate": 0.00012154452714585605,
      "loss": 0.5305,
      "step": 1450
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.1822927258357729,
      "learning_rate": 0.00012145272185249634,
      "loss": 0.4921,
      "step": 1451
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.19519341132650003,
      "learning_rate": 0.0001213608976021535,
      "loss": 0.4735,
      "step": 1452
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.18840961162083253,
      "learning_rate": 0.00012126905447596921,
      "loss": 0.5396,
      "step": 1453
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.18484188418670194,
      "learning_rate": 0.00012117719255510188,
      "loss": 0.4916,
      "step": 1454
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.1787508136445565,
      "learning_rate": 0.00012108531192072652,
      "loss": 0.4667,
      "step": 1455
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.17143949715387075,
      "learning_rate": 0.0001209934126540347,
      "loss": 0.4869,
      "step": 1456
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.19700823060671513,
      "learning_rate": 0.00012090149483623438,
      "loss": 0.4673,
      "step": 1457
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.18571871176182977,
      "learning_rate": 0.00012080955854855002,
      "loss": 0.5284,
      "step": 1458
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.19439561697487548,
      "learning_rate": 0.00012071760387222229,
      "loss": 0.5232,
      "step": 1459
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.19667065535729555,
      "learning_rate": 0.0001206256308885082,
      "loss": 0.5436,
      "step": 1460
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.18778267851212818,
      "learning_rate": 0.00012053363967868092,
      "loss": 0.514,
      "step": 1461
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.18611375927652626,
      "learning_rate": 0.00012044163032402965,
      "loss": 0.4708,
      "step": 1462
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.18484579524440076,
      "learning_rate": 0.0001203496029058597,
      "loss": 0.464,
      "step": 1463
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.1826656462774754,
      "learning_rate": 0.00012025755750549233,
      "loss": 0.4919,
      "step": 1464
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.18150683810049317,
      "learning_rate": 0.00012016549420426471,
      "loss": 0.4936,
      "step": 1465
    },
    {
      "epoch": 0.87,
      "grad_norm": 0.18124559007552254,
      "learning_rate": 0.00012007341308352977,
      "loss": 0.4972,
      "step": 1466
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.19182333070322963,
      "learning_rate": 0.00011998131422465621,
      "loss": 0.4856,
      "step": 1467
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.178004127767968,
      "learning_rate": 0.00011988919770902845,
      "loss": 0.4818,
      "step": 1468
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.17292199373403655,
      "learning_rate": 0.00011979706361804644,
      "loss": 0.4829,
      "step": 1469
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.18196556074734327,
      "learning_rate": 0.00011970491203312568,
      "loss": 0.5003,
      "step": 1470
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.18754874881279202,
      "learning_rate": 0.0001196127430356972,
      "loss": 0.5743,
      "step": 1471
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.1694899960861821,
      "learning_rate": 0.00011952055670720732,
      "loss": 0.4812,
      "step": 1472
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.17685121597710485,
      "learning_rate": 0.00011942835312911773,
      "loss": 0.5106,
      "step": 1473
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.18468147975405122,
      "learning_rate": 0.00011933613238290535,
      "loss": 0.4881,
      "step": 1474
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.19502651305818872,
      "learning_rate": 0.00011924389455006226,
      "loss": 0.5436,
      "step": 1475
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.1912752770383613,
      "learning_rate": 0.00011915163971209566,
      "loss": 0.4975,
      "step": 1476
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.18045614062303392,
      "learning_rate": 0.00011905936795052774,
      "loss": 0.4946,
      "step": 1477
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.19188311700454608,
      "learning_rate": 0.0001189670793468957,
      "loss": 0.5096,
      "step": 1478
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.17844071972998474,
      "learning_rate": 0.00011887477398275162,
      "loss": 0.4932,
      "step": 1479
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.1825369869650523,
      "learning_rate": 0.00011878245193966229,
      "loss": 0.5087,
      "step": 1480
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.187073417853317,
      "learning_rate": 0.00011869011329920936,
      "loss": 0.4916,
      "step": 1481
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.18205562507913237,
      "learning_rate": 0.00011859775814298905,
      "loss": 0.5233,
      "step": 1482
    },
    {
      "epoch": 0.88,
      "grad_norm": 0.18850193307348725,
      "learning_rate": 0.0001185053865526123,
      "loss": 0.509,
      "step": 1483
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.16598501445752575,
      "learning_rate": 0.00011841299860970445,
      "loss": 0.4606,
      "step": 1484
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.18164697924930495,
      "learning_rate": 0.00011832059439590533,
      "loss": 0.5326,
      "step": 1485
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.1744542009553421,
      "learning_rate": 0.00011822817399286916,
      "loss": 0.4806,
      "step": 1486
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.1866112771984086,
      "learning_rate": 0.00011813573748226447,
      "loss": 0.5142,
      "step": 1487
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.1804334025313157,
      "learning_rate": 0.00011804328494577402,
      "loss": 0.5055,
      "step": 1488
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.1827107380774223,
      "learning_rate": 0.00011795081646509469,
      "loss": 0.4705,
      "step": 1489
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.17671386861052463,
      "learning_rate": 0.00011785833212193749,
      "loss": 0.5103,
      "step": 1490
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.18983228560014775,
      "learning_rate": 0.00011776583199802746,
      "loss": 0.5105,
      "step": 1491
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.17998266735444157,
      "learning_rate": 0.00011767331617510358,
      "loss": 0.5207,
      "step": 1492
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.17711101362820791,
      "learning_rate": 0.00011758078473491864,
      "loss": 0.5266,
      "step": 1493
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.18914416644450918,
      "learning_rate": 0.00011748823775923934,
      "loss": 0.5338,
      "step": 1494
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.18057892909261303,
      "learning_rate": 0.00011739567532984598,
      "loss": 0.5549,
      "step": 1495
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.16973466919637561,
      "learning_rate": 0.00011730309752853261,
      "loss": 0.4838,
      "step": 1496
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.18240256763305998,
      "learning_rate": 0.00011721050443710688,
      "loss": 0.4946,
      "step": 1497
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.18071282928562282,
      "learning_rate": 0.00011711789613738986,
      "loss": 0.5128,
      "step": 1498
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.1764697230658466,
      "learning_rate": 0.00011702527271121609,
      "loss": 0.5081,
      "step": 1499
    },
    {
      "epoch": 0.89,
      "grad_norm": 0.18036733171638797,
      "learning_rate": 0.00011693263424043353,
      "loss": 0.492,
      "step": 1500
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.18783837144683072,
      "learning_rate": 0.00011683998080690334,
      "loss": 0.5066,
      "step": 1501
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.1748875971730362,
      "learning_rate": 0.00011674731249250008,
      "loss": 0.5045,
      "step": 1502
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.18835075966322473,
      "learning_rate": 0.00011665462937911124,
      "loss": 0.5153,
      "step": 1503
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.16522307101295688,
      "learning_rate": 0.00011656193154863749,
      "loss": 0.4485,
      "step": 1504
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.1743041745662128,
      "learning_rate": 0.00011646921908299254,
      "loss": 0.4668,
      "step": 1505
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.18915730869999403,
      "learning_rate": 0.00011637649206410298,
      "loss": 0.5169,
      "step": 1506
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.1787635275807655,
      "learning_rate": 0.00011628375057390824,
      "loss": 0.5218,
      "step": 1507
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.18297477342275548,
      "learning_rate": 0.00011619099469436061,
      "loss": 0.497,
      "step": 1508
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.1780890941415051,
      "learning_rate": 0.00011609822450742507,
      "loss": 0.4961,
      "step": 1509
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.17078744735386345,
      "learning_rate": 0.0001160054400950792,
      "loss": 0.4832,
      "step": 1510
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.1784315880273548,
      "learning_rate": 0.00011591264153931321,
      "loss": 0.4745,
      "step": 1511
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.1871274533549485,
      "learning_rate": 0.00011581982892212975,
      "loss": 0.5072,
      "step": 1512
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.18545283275745875,
      "learning_rate": 0.0001157270023255439,
      "loss": 0.4982,
      "step": 1513
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.18159317342397205,
      "learning_rate": 0.00011563416183158318,
      "loss": 0.5188,
      "step": 1514
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.20002147439421913,
      "learning_rate": 0.00011554130752228731,
      "loss": 0.5472,
      "step": 1515
    },
    {
      "epoch": 0.9,
      "grad_norm": 0.18673799334263,
      "learning_rate": 0.00011544843947970822,
      "loss": 0.469,
      "step": 1516
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2099300853781104,
      "learning_rate": 0.00011535555778590999,
      "loss": 0.5273,
      "step": 1517
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.18505904288666042,
      "learning_rate": 0.00011526266252296876,
      "loss": 0.5222,
      "step": 1518
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2022588519204151,
      "learning_rate": 0.0001151697537729727,
      "loss": 0.5154,
      "step": 1519
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.17874437237649884,
      "learning_rate": 0.00011507683161802184,
      "loss": 0.5493,
      "step": 1520
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.16382256794398076,
      "learning_rate": 0.00011498389614022807,
      "loss": 0.4838,
      "step": 1521
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.1788771786525409,
      "learning_rate": 0.00011489094742171502,
      "loss": 0.5068,
      "step": 1522
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.19844148701897005,
      "learning_rate": 0.00011479798554461818,
      "loss": 0.499,
      "step": 1523
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.18029997805524606,
      "learning_rate": 0.0001147050105910845,
      "loss": 0.5218,
      "step": 1524
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.2070583771119555,
      "learning_rate": 0.00011461202264327246,
      "loss": 0.5527,
      "step": 1525
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.17683689253976204,
      "learning_rate": 0.00011451902178335219,
      "loss": 0.475,
      "step": 1526
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.1746582188063431,
      "learning_rate": 0.0001144260080935051,
      "loss": 0.5002,
      "step": 1527
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.19411077398249144,
      "learning_rate": 0.00011433298165592396,
      "loss": 0.4862,
      "step": 1528
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.1873910047084602,
      "learning_rate": 0.00011423994255281285,
      "loss": 0.4882,
      "step": 1529
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.18794249715310352,
      "learning_rate": 0.000114146890866387,
      "loss": 0.543,
      "step": 1530
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.1795844721124014,
      "learning_rate": 0.00011405382667887276,
      "loss": 0.5305,
      "step": 1531
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.17955259790256417,
      "learning_rate": 0.00011396075007250758,
      "loss": 0.4973,
      "step": 1532
    },
    {
      "epoch": 0.91,
      "grad_norm": 0.21366812260453688,
      "learning_rate": 0.00011386766112953977,
      "loss": 0.5278,
      "step": 1533
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.18167687811794028,
      "learning_rate": 0.00011377455993222867,
      "loss": 0.5249,
      "step": 1534
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.17645370527975052,
      "learning_rate": 0.00011368144656284436,
      "loss": 0.4947,
      "step": 1535
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.16934323070380375,
      "learning_rate": 0.00011358832110366775,
      "loss": 0.4616,
      "step": 1536
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.17196203109279234,
      "learning_rate": 0.00011349518363699036,
      "loss": 0.4638,
      "step": 1537
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.1943534034058594,
      "learning_rate": 0.00011340203424511434,
      "loss": 0.5094,
      "step": 1538
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.17128940028769005,
      "learning_rate": 0.00011330887301035242,
      "loss": 0.4624,
      "step": 1539
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.179017474107053,
      "learning_rate": 0.00011321570001502775,
      "loss": 0.4872,
      "step": 1540
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.17288738113525648,
      "learning_rate": 0.00011312251534147387,
      "loss": 0.4754,
      "step": 1541
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.17577830411270592,
      "learning_rate": 0.0001130293190720347,
      "loss": 0.4729,
      "step": 1542
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.16957226974611797,
      "learning_rate": 0.00011293611128906431,
      "loss": 0.4814,
      "step": 1543
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.1760426058506451,
      "learning_rate": 0.00011284289207492706,
      "loss": 0.4619,
      "step": 1544
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.18329235131533697,
      "learning_rate": 0.00011274966151199731,
      "loss": 0.5044,
      "step": 1545
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.18307223453280677,
      "learning_rate": 0.00011265641968265945,
      "loss": 0.5241,
      "step": 1546
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.17773989766788875,
      "learning_rate": 0.00011256316666930798,
      "loss": 0.4925,
      "step": 1547
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.1769674765097833,
      "learning_rate": 0.00011246990255434704,
      "loss": 0.5262,
      "step": 1548
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.1849871149599951,
      "learning_rate": 0.00011237662742019075,
      "loss": 0.4594,
      "step": 1549
    },
    {
      "epoch": 0.92,
      "grad_norm": 0.18867364566508427,
      "learning_rate": 0.00011228334134926297,
      "loss": 0.476,
      "step": 1550
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.17417769451068804,
      "learning_rate": 0.00011219004442399712,
      "loss": 0.4972,
      "step": 1551
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.16859318717354108,
      "learning_rate": 0.00011209673672683632,
      "loss": 0.4802,
      "step": 1552
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.1644307458626678,
      "learning_rate": 0.00011200341834023309,
      "loss": 0.4829,
      "step": 1553
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.1989496601102084,
      "learning_rate": 0.00011191008934664951,
      "loss": 0.6644,
      "step": 1554
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.1844631029567801,
      "learning_rate": 0.000111816749828557,
      "loss": 0.4901,
      "step": 1555
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.19816902332331113,
      "learning_rate": 0.00011172339986843626,
      "loss": 0.5496,
      "step": 1556
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.19691336622769823,
      "learning_rate": 0.00011163003954877718,
      "loss": 0.5586,
      "step": 1557
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.18166390903396584,
      "learning_rate": 0.00011153666895207885,
      "loss": 0.5509,
      "step": 1558
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.19059509070126388,
      "learning_rate": 0.00011144328816084952,
      "loss": 0.5301,
      "step": 1559
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.1907238818250723,
      "learning_rate": 0.00011134989725760632,
      "loss": 0.5015,
      "step": 1560
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.18292558413429197,
      "learning_rate": 0.00011125649632487538,
      "loss": 0.5246,
      "step": 1561
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.17463088614239725,
      "learning_rate": 0.00011116308544519163,
      "loss": 0.4912,
      "step": 1562
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.19109769957786926,
      "learning_rate": 0.00011106966470109888,
      "loss": 0.5016,
      "step": 1563
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.20891602551163976,
      "learning_rate": 0.00011097623417514957,
      "loss": 0.5061,
      "step": 1564
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.1912861078378425,
      "learning_rate": 0.00011088279394990491,
      "loss": 0.5087,
      "step": 1565
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.20860613345682275,
      "learning_rate": 0.00011078934410793453,
      "loss": 0.5475,
      "step": 1566
    },
    {
      "epoch": 0.93,
      "grad_norm": 0.2016276928245179,
      "learning_rate": 0.00011069588473181663,
      "loss": 0.4917,
      "step": 1567
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.18622143936681837,
      "learning_rate": 0.00011060241590413787,
      "loss": 0.4691,
      "step": 1568
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.1862650789358927,
      "learning_rate": 0.0001105089377074932,
      "loss": 0.4783,
      "step": 1569
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.17314711363865778,
      "learning_rate": 0.00011041545022448585,
      "loss": 0.4636,
      "step": 1570
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.18882699212061288,
      "learning_rate": 0.00011032195353772732,
      "loss": 0.5071,
      "step": 1571
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.18313968659351176,
      "learning_rate": 0.00011022844772983716,
      "loss": 0.4925,
      "step": 1572
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.18989183467798568,
      "learning_rate": 0.00011013493288344307,
      "loss": 0.5138,
      "step": 1573
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.20009639832032847,
      "learning_rate": 0.00011004140908118069,
      "loss": 0.5033,
      "step": 1574
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.1864822830628491,
      "learning_rate": 0.00010994787640569348,
      "loss": 0.4566,
      "step": 1575
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.18537930991361964,
      "learning_rate": 0.00010985433493963294,
      "loss": 0.4931,
      "step": 1576
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.1860930876562612,
      "learning_rate": 0.00010976078476565818,
      "loss": 0.4843,
      "step": 1577
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2386895987379907,
      "learning_rate": 0.00010966722596643607,
      "loss": 0.5246,
      "step": 1578
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.17624730923913154,
      "learning_rate": 0.00010957365862464106,
      "loss": 0.4471,
      "step": 1579
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.17947142111167033,
      "learning_rate": 0.00010948008282295523,
      "loss": 0.5057,
      "step": 1580
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.17182151942408264,
      "learning_rate": 0.00010938649864406803,
      "loss": 0.4452,
      "step": 1581
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.17298006001721172,
      "learning_rate": 0.0001092929061706764,
      "loss": 0.4578,
      "step": 1582
    },
    {
      "epoch": 0.94,
      "grad_norm": 0.2070838001590237,
      "learning_rate": 0.00010919930548548456,
      "loss": 0.4887,
      "step": 1583
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.18919686254137597,
      "learning_rate": 0.00010910569667120402,
      "loss": 0.5132,
      "step": 1584
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.18877811317785967,
      "learning_rate": 0.0001090120798105534,
      "loss": 0.4522,
      "step": 1585
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.17183583532409152,
      "learning_rate": 0.00010891845498625857,
      "loss": 0.4938,
      "step": 1586
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.19365576655760885,
      "learning_rate": 0.00010882482228105229,
      "loss": 0.5192,
      "step": 1587
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.17490010992510652,
      "learning_rate": 0.00010873118177767433,
      "loss": 0.4711,
      "step": 1588
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.17342191737901563,
      "learning_rate": 0.00010863753355887143,
      "loss": 0.4775,
      "step": 1589
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.16379055188844005,
      "learning_rate": 0.00010854387770739707,
      "loss": 0.4708,
      "step": 1590
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.1842692603288873,
      "learning_rate": 0.00010845021430601143,
      "loss": 0.4949,
      "step": 1591
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.18033303940018516,
      "learning_rate": 0.00010835654343748149,
      "loss": 0.4991,
      "step": 1592
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.16891948671510196,
      "learning_rate": 0.00010826286518458073,
      "loss": 0.4001,
      "step": 1593
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.17260620046518338,
      "learning_rate": 0.00010816917963008916,
      "loss": 0.4984,
      "step": 1594
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.18883680555663057,
      "learning_rate": 0.00010807548685679334,
      "loss": 0.5055,
      "step": 1595
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.17805386706526016,
      "learning_rate": 0.00010798178694748607,
      "loss": 0.4544,
      "step": 1596
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.16667712713064364,
      "learning_rate": 0.00010788807998496655,
      "loss": 0.4746,
      "step": 1597
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.18366350719934063,
      "learning_rate": 0.00010779436605204017,
      "loss": 0.5247,
      "step": 1598
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.22274345582510527,
      "learning_rate": 0.0001077006452315185,
      "loss": 0.5095,
      "step": 1599
    },
    {
      "epoch": 0.95,
      "grad_norm": 0.18493326300947324,
      "learning_rate": 0.00010760691760621921,
      "loss": 0.5327,
      "step": 1600
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.16600394767674562,
      "learning_rate": 0.00010751318325896592,
      "loss": 0.498,
      "step": 1601
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.1788563275400356,
      "learning_rate": 0.00010741944227258827,
      "loss": 0.5264,
      "step": 1602
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.1757402482395372,
      "learning_rate": 0.00010732569472992171,
      "loss": 0.5285,
      "step": 1603
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.18663007232179554,
      "learning_rate": 0.00010723194071380751,
      "loss": 0.5189,
      "step": 1604
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.19049017159093576,
      "learning_rate": 0.00010713818030709268,
      "loss": 0.5003,
      "step": 1605
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.1788772885287754,
      "learning_rate": 0.00010704441359262982,
      "loss": 0.465,
      "step": 1606
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.2158972916651406,
      "learning_rate": 0.00010695064065327712,
      "loss": 0.5272,
      "step": 1607
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.1736784845280322,
      "learning_rate": 0.00010685686157189832,
      "loss": 0.5141,
      "step": 1608
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.19533897028956482,
      "learning_rate": 0.00010676307643136254,
      "loss": 0.5258,
      "step": 1609
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.19307284822375173,
      "learning_rate": 0.00010666928531454428,
      "loss": 0.5016,
      "step": 1610
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.17903971380850872,
      "learning_rate": 0.00010657548830432329,
      "loss": 0.4497,
      "step": 1611
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.19164639321131477,
      "learning_rate": 0.00010648168548358455,
      "loss": 0.5137,
      "step": 1612
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.17618034640166205,
      "learning_rate": 0.00010638787693521819,
      "loss": 0.5087,
      "step": 1613
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.18247646676718576,
      "learning_rate": 0.00010629406274211934,
      "loss": 0.5074,
      "step": 1614
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.17276068231312514,
      "learning_rate": 0.00010620024298718822,
      "loss": 0.4787,
      "step": 1615
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.1915641904837803,
      "learning_rate": 0.00010610641775332983,
      "loss": 0.5306,
      "step": 1616
    },
    {
      "epoch": 0.96,
      "grad_norm": 0.17501613856715206,
      "learning_rate": 0.00010601258712345414,
      "loss": 0.5037,
      "step": 1617
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.18815087782526124,
      "learning_rate": 0.00010591875118047588,
      "loss": 0.5116,
      "step": 1618
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.186783359821177,
      "learning_rate": 0.00010582491000731432,
      "loss": 0.5044,
      "step": 1619
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.22993928050303203,
      "learning_rate": 0.00010573106368689352,
      "loss": 0.4821,
      "step": 1620
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.19722071181591044,
      "learning_rate": 0.00010563721230214203,
      "loss": 0.5189,
      "step": 1621
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.1809577680466395,
      "learning_rate": 0.00010554335593599285,
      "loss": 0.5428,
      "step": 1622
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.1773652413715408,
      "learning_rate": 0.00010544949467138346,
      "loss": 0.4978,
      "step": 1623
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.16603963794042106,
      "learning_rate": 0.00010535562859125558,
      "loss": 0.4497,
      "step": 1624
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.19264000969327572,
      "learning_rate": 0.0001052617577785552,
      "loss": 0.5067,
      "step": 1625
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.19997471766094152,
      "learning_rate": 0.00010516788231623253,
      "loss": 0.4959,
      "step": 1626
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.1853233178226942,
      "learning_rate": 0.00010507400228724192,
      "loss": 0.5259,
      "step": 1627
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.20841560813457816,
      "learning_rate": 0.00010498011777454163,
      "loss": 0.6294,
      "step": 1628
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.17236196464793524,
      "learning_rate": 0.000104886228861094,
      "loss": 0.503,
      "step": 1629
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.17688449011527507,
      "learning_rate": 0.00010479233562986519,
      "loss": 0.4859,
      "step": 1630
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.2609892130797167,
      "learning_rate": 0.00010469843816382526,
      "loss": 0.548,
      "step": 1631
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.19046903787621935,
      "learning_rate": 0.0001046045365459479,
      "loss": 0.5337,
      "step": 1632
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.18314320161449926,
      "learning_rate": 0.00010451063085921056,
      "loss": 0.4978,
      "step": 1633
    },
    {
      "epoch": 0.97,
      "grad_norm": 0.19018122473676155,
      "learning_rate": 0.00010441672118659422,
      "loss": 0.4981,
      "step": 1634
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.1879539993234289,
      "learning_rate": 0.00010432280761108342,
      "loss": 0.502,
      "step": 1635
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.1957235753033609,
      "learning_rate": 0.00010422889021566618,
      "loss": 0.5232,
      "step": 1636
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.1830846528779905,
      "learning_rate": 0.0001041349690833338,
      "loss": 0.5185,
      "step": 1637
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.2110613700559306,
      "learning_rate": 0.00010404104429708097,
      "loss": 0.5631,
      "step": 1638
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.19406820066555858,
      "learning_rate": 0.00010394711593990554,
      "loss": 0.487,
      "step": 1639
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.17641222102986034,
      "learning_rate": 0.00010385318409480862,
      "loss": 0.4597,
      "step": 1640
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.1729485551615291,
      "learning_rate": 0.00010375924884479427,
      "loss": 0.4869,
      "step": 1641
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.17035672775207952,
      "learning_rate": 0.00010366531027286967,
      "loss": 0.4805,
      "step": 1642
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.1718925201317743,
      "learning_rate": 0.00010357136846204487,
      "loss": 0.4773,
      "step": 1643
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.17917233153986767,
      "learning_rate": 0.00010347742349533278,
      "loss": 0.4535,
      "step": 1644
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.19544887303797653,
      "learning_rate": 0.00010338347545574916,
      "loss": 0.5009,
      "step": 1645
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.1759404996666079,
      "learning_rate": 0.00010328952442631241,
      "loss": 0.462,
      "step": 1646
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.17490417817834802,
      "learning_rate": 0.00010319557049004365,
      "loss": 0.4648,
      "step": 1647
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.18605625193133354,
      "learning_rate": 0.00010310161372996648,
      "loss": 0.535,
      "step": 1648
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.18211716389824942,
      "learning_rate": 0.00010300765422910706,
      "loss": 0.482,
      "step": 1649
    },
    {
      "epoch": 0.98,
      "grad_norm": 0.195298449859917,
      "learning_rate": 0.00010291369207049397,
      "loss": 0.4966,
      "step": 1650
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.20359810497996356,
      "learning_rate": 0.00010281972733715808,
      "loss": 0.4735,
      "step": 1651
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.18071892768830874,
      "learning_rate": 0.00010272576011213262,
      "loss": 0.5107,
      "step": 1652
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.19488059200805252,
      "learning_rate": 0.00010263179047845297,
      "loss": 0.5581,
      "step": 1653
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.1827251419622096,
      "learning_rate": 0.00010253781851915663,
      "loss": 0.5429,
      "step": 1654
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.1815977598840352,
      "learning_rate": 0.0001024438443172832,
      "loss": 0.4719,
      "step": 1655
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.1750166365728634,
      "learning_rate": 0.00010234986795587418,
      "loss": 0.4972,
      "step": 1656
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.16860820894878417,
      "learning_rate": 0.00010225588951797309,
      "loss": 0.4881,
      "step": 1657
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.1653555565773814,
      "learning_rate": 0.00010216190908662522,
      "loss": 0.4759,
      "step": 1658
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.1973447838806209,
      "learning_rate": 0.0001020679267448776,
      "loss": 0.5361,
      "step": 1659
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.17166094417441266,
      "learning_rate": 0.00010197394257577902,
      "loss": 0.4587,
      "step": 1660
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.17920046555821298,
      "learning_rate": 0.00010187995666237977,
      "loss": 0.5237,
      "step": 1661
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.1746051081494759,
      "learning_rate": 0.00010178596908773179,
      "loss": 0.5352,
      "step": 1662
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.1904814752787734,
      "learning_rate": 0.00010169197993488851,
      "loss": 0.4947,
      "step": 1663
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.1682118449533182,
      "learning_rate": 0.0001015979892869046,
      "loss": 0.5075,
      "step": 1664
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.1787167363650672,
      "learning_rate": 0.00010150399722683623,
      "loss": 0.503,
      "step": 1665
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.17433015556420575,
      "learning_rate": 0.00010141000383774067,
      "loss": 0.4736,
      "step": 1666
    },
    {
      "epoch": 0.99,
      "grad_norm": 0.18890832234956914,
      "learning_rate": 0.00010131600920267645,
      "loss": 0.482,
      "step": 1667
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.16995768612256246,
      "learning_rate": 0.00010122201340470321,
      "loss": 0.4764,
      "step": 1668
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.19189171962262738,
      "learning_rate": 0.00010112801652688155,
      "loss": 0.5234,
      "step": 1669
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1887634223189705,
      "learning_rate": 0.00010103401865227304,
      "loss": 0.4763,
      "step": 1670
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1842920226325373,
      "learning_rate": 0.00010094001986394024,
      "loss": 0.5156,
      "step": 1671
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.17969781851880492,
      "learning_rate": 0.00010084602024494633,
      "loss": 0.4826,
      "step": 1672
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.19854544410385247,
      "learning_rate": 0.0001007520198783554,
      "loss": 0.5498,
      "step": 1673
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.16752089299272865,
      "learning_rate": 0.0001006580188472321,
      "loss": 0.4453,
      "step": 1674
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1740348252936511,
      "learning_rate": 0.00010056401723464166,
      "loss": 0.5058,
      "step": 1675
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1734770189678822,
      "learning_rate": 0.00010047001512364992,
      "loss": 0.4988,
      "step": 1676
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.19216191844782038,
      "learning_rate": 0.00010037601259732308,
      "loss": 0.5046,
      "step": 1677
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.19668950532063106,
      "learning_rate": 0.00010028200973872766,
      "loss": 0.5271,
      "step": 1678
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.17543353269555373,
      "learning_rate": 0.00010018800663093057,
      "loss": 0.4429,
      "step": 1679
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1649204380890276,
      "learning_rate": 0.00010009400335699894,
      "loss": 0.4532,
      "step": 1680
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.19447076402139798,
      "learning_rate": 0.0001,
      "loss": 0.5214,
      "step": 1681
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.2095390863895404,
      "learning_rate": 9.990599664300105e-05,
      "loss": 0.5393,
      "step": 1682
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.19567081187455143,
      "learning_rate": 9.981199336906944e-05,
      "loss": 0.5266,
      "step": 1683
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.17528573990808377,
      "learning_rate": 9.971799026127236e-05,
      "loss": 0.4686,
      "step": 1684
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.16498662906420647,
      "learning_rate": 9.962398740267696e-05,
      "loss": 0.4576,
      "step": 1685
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.1868464359812763,
      "learning_rate": 9.952998487635011e-05,
      "loss": 0.5504,
      "step": 1686
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.1848579187325209,
      "learning_rate": 9.943598276535835e-05,
      "loss": 0.4671,
      "step": 1687
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.18102324049270346,
      "learning_rate": 9.934198115276793e-05,
      "loss": 0.4869,
      "step": 1688
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.17304140002160995,
      "learning_rate": 9.924798012164459e-05,
      "loss": 0.5308,
      "step": 1689
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.17379146302190593,
      "learning_rate": 9.915397975505369e-05,
      "loss": 0.4888,
      "step": 1690
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.1771864954623761,
      "learning_rate": 9.90599801360598e-05,
      "loss": 0.5018,
      "step": 1691
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.19350545061458058,
      "learning_rate": 9.896598134772697e-05,
      "loss": 0.5308,
      "step": 1692
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.18561731061862277,
      "learning_rate": 9.887198347311849e-05,
      "loss": 0.51,
      "step": 1693
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.2391921626817571,
      "learning_rate": 9.877798659529683e-05,
      "loss": 0.492,
      "step": 1694
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.18604469349478459,
      "learning_rate": 9.868399079732356e-05,
      "loss": 0.4857,
      "step": 1695
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.2054893911376399,
      "learning_rate": 9.858999616225939e-05,
      "loss": 0.5076,
      "step": 1696
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.19787007449159563,
      "learning_rate": 9.849600277316379e-05,
      "loss": 0.4909,
      "step": 1697
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.19531017803501713,
      "learning_rate": 9.840201071309539e-05,
      "loss": 0.5164,
      "step": 1698
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.1743391543139743,
      "learning_rate": 9.830802006511154e-05,
      "loss": 0.4362,
      "step": 1699
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.1793245330684509,
      "learning_rate": 9.821403091226822e-05,
      "loss": 0.5189,
      "step": 1700
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.17976563495164294,
      "learning_rate": 9.812004333762027e-05,
      "loss": 0.501,
      "step": 1701
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.18720637986337038,
      "learning_rate": 9.802605742422104e-05,
      "loss": 0.5105,
      "step": 1702
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.18379799853488712,
      "learning_rate": 9.793207325512242e-05,
      "loss": 0.4786,
      "step": 1703
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1869951004361499,
      "learning_rate": 9.78380909133748e-05,
      "loss": 0.4969,
      "step": 1704
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.19079725212306675,
      "learning_rate": 9.77441104820269e-05,
      "loss": 0.469,
      "step": 1705
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.19533398341345093,
      "learning_rate": 9.765013204412583e-05,
      "loss": 0.4961,
      "step": 1706
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1836971846789222,
      "learning_rate": 9.755615568271683e-05,
      "loss": 0.5366,
      "step": 1707
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.17261463561503315,
      "learning_rate": 9.746218148084337e-05,
      "loss": 0.5368,
      "step": 1708
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.20247882518351865,
      "learning_rate": 9.736820952154706e-05,
      "loss": 0.5068,
      "step": 1709
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1598672802477059,
      "learning_rate": 9.72742398878674e-05,
      "loss": 0.388,
      "step": 1710
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.16834802241635866,
      "learning_rate": 9.718027266284192e-05,
      "loss": 0.4175,
      "step": 1711
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1771452263915879,
      "learning_rate": 9.708630792950608e-05,
      "loss": 0.4277,
      "step": 1712
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1708577470493871,
      "learning_rate": 9.699234577089297e-05,
      "loss": 0.3812,
      "step": 1713
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.18260209637789201,
      "learning_rate": 9.689838627003354e-05,
      "loss": 0.4253,
      "step": 1714
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.22465773493800734,
      "learning_rate": 9.68044295099564e-05,
      "loss": 0.4531,
      "step": 1715
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.2014183902769127,
      "learning_rate": 9.671047557368761e-05,
      "loss": 0.3596,
      "step": 1716
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.19751970937795732,
      "learning_rate": 9.661652454425086e-05,
      "loss": 0.4525,
      "step": 1717
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.20012635962136738,
      "learning_rate": 9.652257650466723e-05,
      "loss": 0.3992,
      "step": 1718
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.18539871190485555,
      "learning_rate": 9.642863153795516e-05,
      "loss": 0.3962,
      "step": 1719
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.18906247477131508,
      "learning_rate": 9.633468972713034e-05,
      "loss": 0.3985,
      "step": 1720
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.19159144899087352,
      "learning_rate": 9.624075115520572e-05,
      "loss": 0.4178,
      "step": 1721
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.22081857132075158,
      "learning_rate": 9.614681590519143e-05,
      "loss": 0.419,
      "step": 1722
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.19043102570259512,
      "learning_rate": 9.605288406009447e-05,
      "loss": 0.4013,
      "step": 1723
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.19103339288259663,
      "learning_rate": 9.595895570291906e-05,
      "loss": 0.4401,
      "step": 1724
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.1734966093658671,
      "learning_rate": 9.586503091666623e-05,
      "loss": 0.3825,
      "step": 1725
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.17722830697815692,
      "learning_rate": 9.577110978433385e-05,
      "loss": 0.4314,
      "step": 1726
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.19257129465178685,
      "learning_rate": 9.567719238891658e-05,
      "loss": 0.4564,
      "step": 1727
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.18967332310638602,
      "learning_rate": 9.55832788134058e-05,
      "loss": 0.439,
      "step": 1728
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.20692915676429485,
      "learning_rate": 9.548936914078946e-05,
      "loss": 0.4399,
      "step": 1729
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.19518385923192133,
      "learning_rate": 9.53954634540521e-05,
      "loss": 0.4042,
      "step": 1730
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.1751627976443351,
      "learning_rate": 9.530156183617475e-05,
      "loss": 0.3965,
      "step": 1731
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.1815012657437565,
      "learning_rate": 9.520766437013483e-05,
      "loss": 0.4374,
      "step": 1732
    },
    {
      "epoch": 1.01,
      "grad_norm": 0.18061460328754317,
      "learning_rate": 9.511377113890602e-05,
      "loss": 0.3605,
      "step": 1733
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.20605498698659788,
      "learning_rate": 9.501988222545838e-05,
      "loss": 0.4335,
      "step": 1734
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.18420612406230608,
      "learning_rate": 9.492599771275813e-05,
      "loss": 0.4283,
      "step": 1735
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.16811914920856832,
      "learning_rate": 9.483211768376749e-05,
      "loss": 0.3856,
      "step": 1736
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1756881106242861,
      "learning_rate": 9.473824222144483e-05,
      "loss": 0.4262,
      "step": 1737
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1748388175069077,
      "learning_rate": 9.464437140874447e-05,
      "loss": 0.3776,
      "step": 1738
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1850259364570879,
      "learning_rate": 9.455050532861656e-05,
      "loss": 0.4085,
      "step": 1739
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1724597528234989,
      "learning_rate": 9.445664406400716e-05,
      "loss": 0.3584,
      "step": 1740
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1892387423485602,
      "learning_rate": 9.4362787697858e-05,
      "loss": 0.3991,
      "step": 1741
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.17643614255793788,
      "learning_rate": 9.42689363131065e-05,
      "loss": 0.3876,
      "step": 1742
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.18239920728297748,
      "learning_rate": 9.417508999268569e-05,
      "loss": 0.3856,
      "step": 1743
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.17690658739964463,
      "learning_rate": 9.408124881952418e-05,
      "loss": 0.3972,
      "step": 1744
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.19597272887285241,
      "learning_rate": 9.398741287654587e-05,
      "loss": 0.4211,
      "step": 1745
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1818944310579676,
      "learning_rate": 9.389358224667019e-05,
      "loss": 0.3708,
      "step": 1746
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1767137534783939,
      "learning_rate": 9.379975701281181e-05,
      "loss": 0.3912,
      "step": 1747
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1711169455706469,
      "learning_rate": 9.370593725788068e-05,
      "loss": 0.3876,
      "step": 1748
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1731607579175634,
      "learning_rate": 9.361212306478185e-05,
      "loss": 0.3862,
      "step": 1749
    },
    {
      "epoch": 1.02,
      "grad_norm": 0.1863257179297087,
      "learning_rate": 9.351831451641546e-05,
      "loss": 0.4546,
      "step": 1750
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.20882434007762518,
      "learning_rate": 9.342451169567675e-05,
      "loss": 0.4224,
      "step": 1751
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.19101829618663632,
      "learning_rate": 9.333071468545573e-05,
      "loss": 0.4388,
      "step": 1752
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.18459630326288334,
      "learning_rate": 9.323692356863746e-05,
      "loss": 0.4376,
      "step": 1753
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.17877215371813454,
      "learning_rate": 9.314313842810172e-05,
      "loss": 0.4084,
      "step": 1754
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.21787097366397204,
      "learning_rate": 9.30493593467229e-05,
      "loss": 0.4495,
      "step": 1755
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.1820262678604187,
      "learning_rate": 9.295558640737019e-05,
      "loss": 0.426,
      "step": 1756
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.1713277869502375,
      "learning_rate": 9.286181969290736e-05,
      "loss": 0.3856,
      "step": 1757
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.18414728315702017,
      "learning_rate": 9.276805928619251e-05,
      "loss": 0.4138,
      "step": 1758
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.18241075623039185,
      "learning_rate": 9.267430527007831e-05,
      "loss": 0.3828,
      "step": 1759
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.18698971522482388,
      "learning_rate": 9.258055772741174e-05,
      "loss": 0.4183,
      "step": 1760
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.16609172517956425,
      "learning_rate": 9.24868167410341e-05,
      "loss": 0.3542,
      "step": 1761
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.18231584353924798,
      "learning_rate": 9.239308239378081e-05,
      "loss": 0.43,
      "step": 1762
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.18874617751046177,
      "learning_rate": 9.229935476848151e-05,
      "loss": 0.4213,
      "step": 1763
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.17314287835154177,
      "learning_rate": 9.220563394795984e-05,
      "loss": 0.401,
      "step": 1764
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.1862791761822762,
      "learning_rate": 9.211192001503346e-05,
      "loss": 0.4073,
      "step": 1765
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.20601420449280228,
      "learning_rate": 9.201821305251393e-05,
      "loss": 0.3986,
      "step": 1766
    },
    {
      "epoch": 1.03,
      "grad_norm": 0.19246329075521015,
      "learning_rate": 9.192451314320669e-05,
      "loss": 0.4495,
      "step": 1767
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.16997776284538635,
      "learning_rate": 9.183082036991084e-05,
      "loss": 0.3906,
      "step": 1768
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.2036008039486783,
      "learning_rate": 9.173713481541929e-05,
      "loss": 0.4135,
      "step": 1769
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.18909810433881546,
      "learning_rate": 9.164345656251853e-05,
      "loss": 0.431,
      "step": 1770
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.20571260974020206,
      "learning_rate": 9.154978569398859e-05,
      "loss": 0.4038,
      "step": 1771
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.18675687168122815,
      "learning_rate": 9.145612229260295e-05,
      "loss": 0.4198,
      "step": 1772
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.18206854372967374,
      "learning_rate": 9.13624664411286e-05,
      "loss": 0.4399,
      "step": 1773
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.18084477521813697,
      "learning_rate": 9.126881822232568e-05,
      "loss": 0.4206,
      "step": 1774
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.19191834762302898,
      "learning_rate": 9.117517771894773e-05,
      "loss": 0.4108,
      "step": 1775
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.1755560835129537,
      "learning_rate": 9.108154501374143e-05,
      "loss": 0.3662,
      "step": 1776
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.185181106446634,
      "learning_rate": 9.098792018944661e-05,
      "loss": 0.4275,
      "step": 1777
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.2030891286279709,
      "learning_rate": 9.089430332879599e-05,
      "loss": 0.4119,
      "step": 1778
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.175294036399659,
      "learning_rate": 9.080069451451544e-05,
      "loss": 0.3841,
      "step": 1779
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.1930453644377178,
      "learning_rate": 9.070709382932363e-05,
      "loss": 0.4081,
      "step": 1780
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.17187828203070712,
      "learning_rate": 9.0613501355932e-05,
      "loss": 0.4122,
      "step": 1781
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.22525449819172308,
      "learning_rate": 9.05199171770448e-05,
      "loss": 0.4644,
      "step": 1782
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.1908671974863637,
      "learning_rate": 9.042634137535898e-05,
      "loss": 0.3771,
      "step": 1783
    },
    {
      "epoch": 1.04,
      "grad_norm": 0.1755715761595777,
      "learning_rate": 9.033277403356397e-05,
      "loss": 0.39,
      "step": 1784
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.18097636315463037,
      "learning_rate": 9.023921523434184e-05,
      "loss": 0.3848,
      "step": 1785
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.17287399316233582,
      "learning_rate": 9.01456650603671e-05,
      "loss": 0.3714,
      "step": 1786
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.18747127569805108,
      "learning_rate": 9.005212359430654e-05,
      "loss": 0.409,
      "step": 1787
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.18137619801864702,
      "learning_rate": 8.995859091881935e-05,
      "loss": 0.3967,
      "step": 1788
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.1779504228236956,
      "learning_rate": 8.986506711655692e-05,
      "loss": 0.4152,
      "step": 1789
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.19489093546042696,
      "learning_rate": 8.977155227016286e-05,
      "loss": 0.4182,
      "step": 1790
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.1845854795417051,
      "learning_rate": 8.967804646227271e-05,
      "loss": 0.4132,
      "step": 1791
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.17653594604432007,
      "learning_rate": 8.958454977551414e-05,
      "loss": 0.4075,
      "step": 1792
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.17538723506657966,
      "learning_rate": 8.949106229250685e-05,
      "loss": 0.4085,
      "step": 1793
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.1984669167523276,
      "learning_rate": 8.939758409586216e-05,
      "loss": 0.3935,
      "step": 1794
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.1817428273700713,
      "learning_rate": 8.930411526818337e-05,
      "loss": 0.3728,
      "step": 1795
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.17233958696209886,
      "learning_rate": 8.92106558920655e-05,
      "loss": 0.4157,
      "step": 1796
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.18025099372025463,
      "learning_rate": 8.911720605009511e-05,
      "loss": 0.3834,
      "step": 1797
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.1876060834020746,
      "learning_rate": 8.902376582485043e-05,
      "loss": 0.4072,
      "step": 1798
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.17556808476603256,
      "learning_rate": 8.893033529890118e-05,
      "loss": 0.3941,
      "step": 1799
    },
    {
      "epoch": 1.05,
      "grad_norm": 0.21284599089407197,
      "learning_rate": 8.883691455480839e-05,
      "loss": 0.4245,
      "step": 1800
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.22170523691150348,
      "learning_rate": 8.874350367512465e-05,
      "loss": 0.4269,
      "step": 1801
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.1797945175853634,
      "learning_rate": 8.865010274239372e-05,
      "loss": 0.3614,
      "step": 1802
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.19392543266039863,
      "learning_rate": 8.85567118391505e-05,
      "loss": 0.415,
      "step": 1803
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.18620026243486437,
      "learning_rate": 8.846333104792116e-05,
      "loss": 0.416,
      "step": 1804
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.1936617538942135,
      "learning_rate": 8.836996045122286e-05,
      "loss": 0.404,
      "step": 1805
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.205931504034666,
      "learning_rate": 8.827660013156381e-05,
      "loss": 0.4025,
      "step": 1806
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.18421178078483139,
      "learning_rate": 8.818325017144302e-05,
      "loss": 0.3884,
      "step": 1807
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.1872697932453832,
      "learning_rate": 8.808991065335049e-05,
      "loss": 0.3989,
      "step": 1808
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.1749427185898119,
      "learning_rate": 8.799658165976694e-05,
      "loss": 0.4226,
      "step": 1809
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.19819149794184812,
      "learning_rate": 8.790326327316372e-05,
      "loss": 0.4501,
      "step": 1810
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.18523438192000483,
      "learning_rate": 8.780995557600287e-05,
      "loss": 0.3982,
      "step": 1811
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.17754926519420264,
      "learning_rate": 8.771665865073707e-05,
      "loss": 0.4238,
      "step": 1812
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.18958930276429575,
      "learning_rate": 8.762337257980927e-05,
      "loss": 0.4067,
      "step": 1813
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.17184782349154784,
      "learning_rate": 8.753009744565297e-05,
      "loss": 0.3839,
      "step": 1814
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.18472593089993697,
      "learning_rate": 8.743683333069208e-05,
      "loss": 0.4147,
      "step": 1815
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.1754635852591751,
      "learning_rate": 8.734358031734056e-05,
      "loss": 0.3632,
      "step": 1816
    },
    {
      "epoch": 1.06,
      "grad_norm": 0.18050807312591965,
      "learning_rate": 8.725033848800273e-05,
      "loss": 0.4126,
      "step": 1817
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.18768687894888247,
      "learning_rate": 8.715710792507295e-05,
      "loss": 0.4277,
      "step": 1818
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.16954704286667174,
      "learning_rate": 8.706388871093571e-05,
      "loss": 0.3805,
      "step": 1819
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.17940056263287732,
      "learning_rate": 8.697068092796531e-05,
      "loss": 0.3967,
      "step": 1820
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.1957651587746892,
      "learning_rate": 8.687748465852614e-05,
      "loss": 0.4095,
      "step": 1821
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.1787849320144345,
      "learning_rate": 8.678429998497229e-05,
      "loss": 0.401,
      "step": 1822
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.1844744058248385,
      "learning_rate": 8.66911269896476e-05,
      "loss": 0.3861,
      "step": 1823
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.19625307901860042,
      "learning_rate": 8.659796575488566e-05,
      "loss": 0.4097,
      "step": 1824
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.1950267957891965,
      "learning_rate": 8.650481636300969e-05,
      "loss": 0.4397,
      "step": 1825
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.17815768732680395,
      "learning_rate": 8.641167889633228e-05,
      "loss": 0.3898,
      "step": 1826
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.18737413534954547,
      "learning_rate": 8.631855343715565e-05,
      "loss": 0.4171,
      "step": 1827
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.1820901135233458,
      "learning_rate": 8.622544006777136e-05,
      "loss": 0.3997,
      "step": 1828
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.1804075064552973,
      "learning_rate": 8.613233887046027e-05,
      "loss": 0.3789,
      "step": 1829
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.19493763026231556,
      "learning_rate": 8.603924992749245e-05,
      "loss": 0.3841,
      "step": 1830
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.20359305389403523,
      "learning_rate": 8.594617332112725e-05,
      "loss": 0.4453,
      "step": 1831
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.2265086862162401,
      "learning_rate": 8.585310913361301e-05,
      "loss": 0.4563,
      "step": 1832
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.17567731483293006,
      "learning_rate": 8.576005744718716e-05,
      "loss": 0.4016,
      "step": 1833
    },
    {
      "epoch": 1.07,
      "grad_norm": 0.17379900034242313,
      "learning_rate": 8.566701834407605e-05,
      "loss": 0.3586,
      "step": 1834
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.1942490326860427,
      "learning_rate": 8.557399190649496e-05,
      "loss": 0.4221,
      "step": 1835
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.19035569554722812,
      "learning_rate": 8.548097821664785e-05,
      "loss": 0.4172,
      "step": 1836
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.1843142843552852,
      "learning_rate": 8.538797735672753e-05,
      "loss": 0.4148,
      "step": 1837
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.18512939658727262,
      "learning_rate": 8.529498940891554e-05,
      "loss": 0.3773,
      "step": 1838
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.20380626583697006,
      "learning_rate": 8.520201445538183e-05,
      "loss": 0.3611,
      "step": 1839
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.19398861478004933,
      "learning_rate": 8.510905257828496e-05,
      "loss": 0.412,
      "step": 1840
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.19213160694154596,
      "learning_rate": 8.501610385977198e-05,
      "loss": 0.4024,
      "step": 1841
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.18667688767595464,
      "learning_rate": 8.49231683819782e-05,
      "loss": 0.4313,
      "step": 1842
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.17127203811779163,
      "learning_rate": 8.483024622702732e-05,
      "loss": 0.3568,
      "step": 1843
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.17717528881555858,
      "learning_rate": 8.473733747703129e-05,
      "loss": 0.4102,
      "step": 1844
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.18077085803280193,
      "learning_rate": 8.464444221409004e-05,
      "loss": 0.3759,
      "step": 1845
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.18149742276377073,
      "learning_rate": 8.45515605202918e-05,
      "loss": 0.423,
      "step": 1846
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.1845319878720036,
      "learning_rate": 8.44586924777127e-05,
      "loss": 0.4175,
      "step": 1847
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.18412454666796282,
      "learning_rate": 8.436583816841684e-05,
      "loss": 0.3959,
      "step": 1848
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.1818564229667886,
      "learning_rate": 8.42729976744561e-05,
      "loss": 0.425,
      "step": 1849
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.21214694567252032,
      "learning_rate": 8.418017107787028e-05,
      "loss": 0.4223,
      "step": 1850
    },
    {
      "epoch": 1.08,
      "grad_norm": 0.18464329908222107,
      "learning_rate": 8.408735846068683e-05,
      "loss": 0.4099,
      "step": 1851
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.1953000842888941,
      "learning_rate": 8.399455990492082e-05,
      "loss": 0.373,
      "step": 1852
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.19283458690381147,
      "learning_rate": 8.390177549257494e-05,
      "loss": 0.4272,
      "step": 1853
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.186815226924515,
      "learning_rate": 8.38090053056394e-05,
      "loss": 0.3891,
      "step": 1854
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.17547970914468208,
      "learning_rate": 8.371624942609177e-05,
      "loss": 0.3661,
      "step": 1855
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.18299608790344135,
      "learning_rate": 8.362350793589705e-05,
      "loss": 0.4133,
      "step": 1856
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.188868917620866,
      "learning_rate": 8.353078091700751e-05,
      "loss": 0.3885,
      "step": 1857
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.18813258089991336,
      "learning_rate": 8.343806845136255e-05,
      "loss": 0.4251,
      "step": 1858
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.2141392463690851,
      "learning_rate": 8.334537062088878e-05,
      "loss": 0.3776,
      "step": 1859
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.18765213967305597,
      "learning_rate": 8.325268750749991e-05,
      "loss": 0.4284,
      "step": 1860
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.17745783167340184,
      "learning_rate": 8.316001919309667e-05,
      "loss": 0.3719,
      "step": 1861
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.17032487036928615,
      "learning_rate": 8.306736575956651e-05,
      "loss": 0.3871,
      "step": 1862
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.1946877937916614,
      "learning_rate": 8.297472728878392e-05,
      "loss": 0.4444,
      "step": 1863
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.17977342966223753,
      "learning_rate": 8.288210386261019e-05,
      "loss": 0.4286,
      "step": 1864
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.17193878042087601,
      "learning_rate": 8.278949556289314e-05,
      "loss": 0.3836,
      "step": 1865
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.17939010894202997,
      "learning_rate": 8.269690247146737e-05,
      "loss": 0.3601,
      "step": 1866
    },
    {
      "epoch": 1.09,
      "grad_norm": 0.19980024554153514,
      "learning_rate": 8.260432467015403e-05,
      "loss": 0.4138,
      "step": 1867
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.18353233337118147,
      "learning_rate": 8.251176224076067e-05,
      "loss": 0.4275,
      "step": 1868
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.1834636907263801,
      "learning_rate": 8.241921526508135e-05,
      "loss": 0.4151,
      "step": 1869
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.17296086904070018,
      "learning_rate": 8.232668382489646e-05,
      "loss": 0.3677,
      "step": 1870
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.16857606057643587,
      "learning_rate": 8.223416800197256e-05,
      "loss": 0.3758,
      "step": 1871
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.18178713996724474,
      "learning_rate": 8.214166787806252e-05,
      "loss": 0.4152,
      "step": 1872
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.18252800035226638,
      "learning_rate": 8.204918353490535e-05,
      "loss": 0.3854,
      "step": 1873
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.19028068789402494,
      "learning_rate": 8.195671505422602e-05,
      "loss": 0.3792,
      "step": 1874
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.19079634326382508,
      "learning_rate": 8.186426251773554e-05,
      "loss": 0.4294,
      "step": 1875
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.17426500052210128,
      "learning_rate": 8.177182600713084e-05,
      "loss": 0.3913,
      "step": 1876
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.17814955101973237,
      "learning_rate": 8.167940560409469e-05,
      "loss": 0.383,
      "step": 1877
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.1846284044065382,
      "learning_rate": 8.158700139029557e-05,
      "loss": 0.3809,
      "step": 1878
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.192583860307762,
      "learning_rate": 8.14946134473877e-05,
      "loss": 0.4238,
      "step": 1879
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.1813183524341541,
      "learning_rate": 8.140224185701097e-05,
      "loss": 0.4186,
      "step": 1880
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.18483836078982704,
      "learning_rate": 8.130988670079068e-05,
      "loss": 0.3645,
      "step": 1881
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.18664080177947134,
      "learning_rate": 8.121754806033772e-05,
      "loss": 0.4221,
      "step": 1882
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.21118292992092838,
      "learning_rate": 8.112522601724844e-05,
      "loss": 0.3988,
      "step": 1883
    },
    {
      "epoch": 1.1,
      "grad_norm": 0.21933086442133368,
      "learning_rate": 8.103292065310431e-05,
      "loss": 0.4194,
      "step": 1884
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.18369374982890216,
      "learning_rate": 8.094063204947227e-05,
      "loss": 0.4313,
      "step": 1885
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.17871299903509055,
      "learning_rate": 8.084836028790438e-05,
      "loss": 0.3579,
      "step": 1886
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.21040972807802708,
      "learning_rate": 8.075610544993777e-05,
      "loss": 0.4453,
      "step": 1887
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.20196209589838468,
      "learning_rate": 8.066386761709467e-05,
      "loss": 0.4136,
      "step": 1888
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.2012185214218647,
      "learning_rate": 8.057164687088228e-05,
      "loss": 0.4185,
      "step": 1889
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.1930053093599451,
      "learning_rate": 8.04794432927927e-05,
      "loss": 0.4088,
      "step": 1890
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.18229158780923613,
      "learning_rate": 8.038725696430281e-05,
      "loss": 0.4119,
      "step": 1891
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.23378468706836802,
      "learning_rate": 8.029508796687432e-05,
      "loss": 0.4211,
      "step": 1892
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.2222541226430703,
      "learning_rate": 8.020293638195361e-05,
      "loss": 0.4337,
      "step": 1893
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.18043439554449295,
      "learning_rate": 8.011080229097159e-05,
      "loss": 0.3612,
      "step": 1894
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.18301487658509413,
      "learning_rate": 8.00186857753438e-05,
      "loss": 0.369,
      "step": 1895
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.16630129703748364,
      "learning_rate": 7.992658691647027e-05,
      "loss": 0.3718,
      "step": 1896
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.19163500305469702,
      "learning_rate": 7.98345057957353e-05,
      "loss": 0.4357,
      "step": 1897
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.1960461834615872,
      "learning_rate": 7.974244249450767e-05,
      "loss": 0.418,
      "step": 1898
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.19224624824324751,
      "learning_rate": 7.965039709414032e-05,
      "loss": 0.3982,
      "step": 1899
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.18840740313567736,
      "learning_rate": 7.955836967597038e-05,
      "loss": 0.4095,
      "step": 1900
    },
    {
      "epoch": 1.11,
      "grad_norm": 0.1844305249109545,
      "learning_rate": 7.946636032131912e-05,
      "loss": 0.4772,
      "step": 1901
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.2009709634690373,
      "learning_rate": 7.937436911149184e-05,
      "loss": 0.4181,
      "step": 1902
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.2020297014858179,
      "learning_rate": 7.928239612777775e-05,
      "loss": 0.4264,
      "step": 1903
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.1895774812668892,
      "learning_rate": 7.919044145145e-05,
      "loss": 0.4022,
      "step": 1904
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.18240865004600954,
      "learning_rate": 7.909850516376563e-05,
      "loss": 0.3806,
      "step": 1905
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.1681279511498143,
      "learning_rate": 7.900658734596536e-05,
      "loss": 0.377,
      "step": 1906
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.20398653582049914,
      "learning_rate": 7.891468807927351e-05,
      "loss": 0.4547,
      "step": 1907
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.17695818800041072,
      "learning_rate": 7.882280744489815e-05,
      "loss": 0.404,
      "step": 1908
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.1888069872727214,
      "learning_rate": 7.873094552403083e-05,
      "loss": 0.4014,
      "step": 1909
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.19794289849692784,
      "learning_rate": 7.863910239784653e-05,
      "loss": 0.378,
      "step": 1910
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.2563098214130778,
      "learning_rate": 7.854727814750366e-05,
      "loss": 0.4226,
      "step": 1911
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.17518721676678928,
      "learning_rate": 7.845547285414399e-05,
      "loss": 0.3878,
      "step": 1912
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.18134286816882556,
      "learning_rate": 7.83636865988924e-05,
      "loss": 0.4501,
      "step": 1913
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.19364287166803107,
      "learning_rate": 7.827191946285709e-05,
      "loss": 0.3813,
      "step": 1914
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.1889713570534168,
      "learning_rate": 7.818017152712933e-05,
      "loss": 0.3485,
      "step": 1915
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.1886184628106324,
      "learning_rate": 7.808844287278336e-05,
      "loss": 0.3974,
      "step": 1916
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.18047433681543168,
      "learning_rate": 7.799673358087643e-05,
      "loss": 0.4264,
      "step": 1917
    },
    {
      "epoch": 1.12,
      "grad_norm": 0.17959650439172858,
      "learning_rate": 7.790504373244866e-05,
      "loss": 0.3832,
      "step": 1918
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.1913833511955245,
      "learning_rate": 7.78133734085231e-05,
      "loss": 0.4556,
      "step": 1919
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.1826764203359158,
      "learning_rate": 7.772172269010535e-05,
      "loss": 0.4044,
      "step": 1920
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.17986155200134601,
      "learning_rate": 7.763009165818382e-05,
      "loss": 0.3745,
      "step": 1921
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.18959379016802552,
      "learning_rate": 7.75384803937295e-05,
      "loss": 0.4505,
      "step": 1922
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.17214698727930164,
      "learning_rate": 7.74468889776959e-05,
      "loss": 0.3736,
      "step": 1923
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.18385963141889453,
      "learning_rate": 7.735531749101898e-05,
      "loss": 0.4265,
      "step": 1924
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.17449413736303118,
      "learning_rate": 7.726376601461716e-05,
      "loss": 0.387,
      "step": 1925
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.21654659378768404,
      "learning_rate": 7.7172234629391e-05,
      "loss": 0.4454,
      "step": 1926
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.18185744997632827,
      "learning_rate": 7.708072341622352e-05,
      "loss": 0.4193,
      "step": 1927
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.17732744182694674,
      "learning_rate": 7.698923245597986e-05,
      "loss": 0.3979,
      "step": 1928
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.1637787721289065,
      "learning_rate": 7.68977618295071e-05,
      "loss": 0.3519,
      "step": 1929
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.17846544793538227,
      "learning_rate": 7.680631161763457e-05,
      "loss": 0.3867,
      "step": 1930
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.17240474406241782,
      "learning_rate": 7.671488190117341e-05,
      "loss": 0.4343,
      "step": 1931
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.19469226587363003,
      "learning_rate": 7.662347276091677e-05,
      "loss": 0.3906,
      "step": 1932
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.16319658498994377,
      "learning_rate": 7.653208427763949e-05,
      "loss": 0.3847,
      "step": 1933
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.18439605456499675,
      "learning_rate": 7.644071653209826e-05,
      "loss": 0.3699,
      "step": 1934
    },
    {
      "epoch": 1.13,
      "grad_norm": 0.19104357865644897,
      "learning_rate": 7.63493696050314e-05,
      "loss": 0.3871,
      "step": 1935
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.16163196012369285,
      "learning_rate": 7.625804357715882e-05,
      "loss": 0.3428,
      "step": 1936
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.1747508317386647,
      "learning_rate": 7.616673852918198e-05,
      "loss": 0.4059,
      "step": 1937
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.18121733774112123,
      "learning_rate": 7.607545454178386e-05,
      "loss": 0.3769,
      "step": 1938
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.18241831118169877,
      "learning_rate": 7.598419169562867e-05,
      "loss": 0.3982,
      "step": 1939
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.17679256583275185,
      "learning_rate": 7.589295007136206e-05,
      "loss": 0.3993,
      "step": 1940
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.18477564112117775,
      "learning_rate": 7.580172974961101e-05,
      "loss": 0.3957,
      "step": 1941
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.23283220926103426,
      "learning_rate": 7.571053081098346e-05,
      "loss": 0.4147,
      "step": 1942
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.1659718388147332,
      "learning_rate": 7.561935333606858e-05,
      "loss": 0.3624,
      "step": 1943
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.18206208296809556,
      "learning_rate": 7.552819740543661e-05,
      "loss": 0.4359,
      "step": 1944
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.17940915446893982,
      "learning_rate": 7.543706309963868e-05,
      "loss": 0.4134,
      "step": 1945
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.1702703539861626,
      "learning_rate": 7.534595049920679e-05,
      "loss": 0.3939,
      "step": 1946
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.1689150777050423,
      "learning_rate": 7.525485968465384e-05,
      "loss": 0.3816,
      "step": 1947
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.15897856127120902,
      "learning_rate": 7.516379073647346e-05,
      "loss": 0.377,
      "step": 1948
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.17742771441407118,
      "learning_rate": 7.50727437351399e-05,
      "loss": 0.3619,
      "step": 1949
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.1816340107538647,
      "learning_rate": 7.498171876110805e-05,
      "loss": 0.4024,
      "step": 1950
    },
    {
      "epoch": 1.14,
      "grad_norm": 0.19659089087972145,
      "learning_rate": 7.489071589481342e-05,
      "loss": 0.3878,
      "step": 1951
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.18428595373407372,
      "learning_rate": 7.479973521667179e-05,
      "loss": 0.4133,
      "step": 1952
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.20632609287533257,
      "learning_rate": 7.470877680707951e-05,
      "loss": 0.4194,
      "step": 1953
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.17669764278155453,
      "learning_rate": 7.461784074641318e-05,
      "loss": 0.4146,
      "step": 1954
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.177643428472356,
      "learning_rate": 7.452692711502964e-05,
      "loss": 0.3895,
      "step": 1955
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.19261073679736163,
      "learning_rate": 7.443603599326596e-05,
      "loss": 0.4313,
      "step": 1956
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.18331838163686537,
      "learning_rate": 7.43451674614393e-05,
      "loss": 0.4061,
      "step": 1957
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.18087488767406523,
      "learning_rate": 7.42543215998468e-05,
      "loss": 0.4041,
      "step": 1958
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.18411365462351864,
      "learning_rate": 7.416349848876562e-05,
      "loss": 0.3838,
      "step": 1959
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.16545729365630413,
      "learning_rate": 7.407269820845286e-05,
      "loss": 0.398,
      "step": 1960
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.18790700003247798,
      "learning_rate": 7.398192083914541e-05,
      "loss": 0.4313,
      "step": 1961
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.17796337044732197,
      "learning_rate": 7.389116646105977e-05,
      "loss": 0.3651,
      "step": 1962
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.17905613829190345,
      "learning_rate": 7.380043515439237e-05,
      "loss": 0.3664,
      "step": 1963
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.18849597901714205,
      "learning_rate": 7.370972699931915e-05,
      "loss": 0.3926,
      "step": 1964
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.1812978236307781,
      "learning_rate": 7.361904207599551e-05,
      "loss": 0.4053,
      "step": 1965
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.18651056741943095,
      "learning_rate": 7.352838046455639e-05,
      "loss": 0.3985,
      "step": 1966
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.1797159480659541,
      "learning_rate": 7.34377422451162e-05,
      "loss": 0.3922,
      "step": 1967
    },
    {
      "epoch": 1.15,
      "grad_norm": 0.19978615888480009,
      "learning_rate": 7.334712749776853e-05,
      "loss": 0.4348,
      "step": 1968
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.17603187423247865,
      "learning_rate": 7.325653630258633e-05,
      "loss": 0.3818,
      "step": 1969
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.18082094994914052,
      "learning_rate": 7.316596873962177e-05,
      "loss": 0.4009,
      "step": 1970
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.1808224594125852,
      "learning_rate": 7.3075424888906e-05,
      "loss": 0.3969,
      "step": 1971
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.19116767160142442,
      "learning_rate": 7.298490483044935e-05,
      "loss": 0.3873,
      "step": 1972
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.17641305911358995,
      "learning_rate": 7.28944086442411e-05,
      "loss": 0.4288,
      "step": 1973
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.17262296279132974,
      "learning_rate": 7.280393641024932e-05,
      "loss": 0.3738,
      "step": 1974
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.17516839482576677,
      "learning_rate": 7.271348820842106e-05,
      "loss": 0.3659,
      "step": 1975
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.17338489664694423,
      "learning_rate": 7.262306411868207e-05,
      "loss": 0.3807,
      "step": 1976
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.1888159247358559,
      "learning_rate": 7.253266422093683e-05,
      "loss": 0.3881,
      "step": 1977
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.17195219592344435,
      "learning_rate": 7.244228859506836e-05,
      "loss": 0.3644,
      "step": 1978
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.18121973200861058,
      "learning_rate": 7.23519373209383e-05,
      "loss": 0.422,
      "step": 1979
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.203387565315694,
      "learning_rate": 7.226161047838679e-05,
      "loss": 0.4226,
      "step": 1980
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.19176102596209946,
      "learning_rate": 7.21713081472323e-05,
      "loss": 0.4114,
      "step": 1981
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.17366451038903769,
      "learning_rate": 7.208103040727172e-05,
      "loss": 0.4188,
      "step": 1982
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.19204682803231887,
      "learning_rate": 7.199077733828019e-05,
      "loss": 0.422,
      "step": 1983
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.19235955451920322,
      "learning_rate": 7.190054902001097e-05,
      "loss": 0.435,
      "step": 1984
    },
    {
      "epoch": 1.16,
      "grad_norm": 0.1910525933944164,
      "learning_rate": 7.181034553219554e-05,
      "loss": 0.3995,
      "step": 1985
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.18707018048340046,
      "learning_rate": 7.172016695454349e-05,
      "loss": 0.4059,
      "step": 1986
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.1757299951012914,
      "learning_rate": 7.16300133667422e-05,
      "loss": 0.3957,
      "step": 1987
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.18577391313825203,
      "learning_rate": 7.153988484845715e-05,
      "loss": 0.4407,
      "step": 1988
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.1923290224001986,
      "learning_rate": 7.144978147933162e-05,
      "loss": 0.407,
      "step": 1989
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.18575659520168106,
      "learning_rate": 7.135970333898661e-05,
      "loss": 0.3765,
      "step": 1990
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.18086735161720963,
      "learning_rate": 7.12696505070209e-05,
      "loss": 0.3808,
      "step": 1991
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.1854599118032104,
      "learning_rate": 7.117962306301084e-05,
      "loss": 0.424,
      "step": 1992
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.17537407633031674,
      "learning_rate": 7.108962108651044e-05,
      "loss": 0.3759,
      "step": 1993
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.18981483483536024,
      "learning_rate": 7.099964465705106e-05,
      "loss": 0.4125,
      "step": 1994
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.18413193277267284,
      "learning_rate": 7.090969385414163e-05,
      "loss": 0.4338,
      "step": 1995
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.18158751633879683,
      "learning_rate": 7.081976875726842e-05,
      "loss": 0.4043,
      "step": 1996
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.1800954346143054,
      "learning_rate": 7.072986944589479e-05,
      "loss": 0.4144,
      "step": 1997
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.18894150742733065,
      "learning_rate": 7.063999599946159e-05,
      "loss": 0.4117,
      "step": 1998
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.20030988236022462,
      "learning_rate": 7.055014849738664e-05,
      "loss": 0.4534,
      "step": 1999
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.17896542445794142,
      "learning_rate": 7.046032701906486e-05,
      "loss": 0.3843,
      "step": 2000
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.18090923195179542,
      "learning_rate": 7.037053164386824e-05,
      "loss": 0.3942,
      "step": 2001
    },
    {
      "epoch": 1.17,
      "grad_norm": 0.20066012152663207,
      "learning_rate": 7.028076245114566e-05,
      "loss": 0.4578,
      "step": 2002
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.1778928677232292,
      "learning_rate": 7.01910195202228e-05,
      "loss": 0.3919,
      "step": 2003
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.17177894275792283,
      "learning_rate": 7.010130293040225e-05,
      "loss": 0.4107,
      "step": 2004
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.19618483219310756,
      "learning_rate": 7.001161276096326e-05,
      "loss": 0.4207,
      "step": 2005
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.19677381672104852,
      "learning_rate": 6.992194909116178e-05,
      "loss": 0.4249,
      "step": 2006
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.1694684202946766,
      "learning_rate": 6.98323120002302e-05,
      "loss": 0.3868,
      "step": 2007
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.17590822070156392,
      "learning_rate": 6.97427015673776e-05,
      "loss": 0.4102,
      "step": 2008
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.16951737219148982,
      "learning_rate": 6.965311787178946e-05,
      "loss": 0.3726,
      "step": 2009
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.19106620473149435,
      "learning_rate": 6.956356099262753e-05,
      "loss": 0.4069,
      "step": 2010
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.1797551006905545,
      "learning_rate": 6.947403100902999e-05,
      "loss": 0.3935,
      "step": 2011
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.19000730900286916,
      "learning_rate": 6.938452800011119e-05,
      "loss": 0.4702,
      "step": 2012
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.17588777897038269,
      "learning_rate": 6.929505204496162e-05,
      "loss": 0.3983,
      "step": 2013
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.17399512960046365,
      "learning_rate": 6.920560322264795e-05,
      "loss": 0.3814,
      "step": 2014
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.18764249310755376,
      "learning_rate": 6.911618161221282e-05,
      "loss": 0.4644,
      "step": 2015
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.1955184775642465,
      "learning_rate": 6.902678729267478e-05,
      "loss": 0.402,
      "step": 2016
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.18693802731785295,
      "learning_rate": 6.893742034302835e-05,
      "loss": 0.4156,
      "step": 2017
    },
    {
      "epoch": 1.18,
      "grad_norm": 0.18911478909963586,
      "learning_rate": 6.88480808422438e-05,
      "loss": 0.4043,
      "step": 2018
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.17496610630242038,
      "learning_rate": 6.875876886926723e-05,
      "loss": 0.3687,
      "step": 2019
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.20364715714956416,
      "learning_rate": 6.866948450302025e-05,
      "loss": 0.4489,
      "step": 2020
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.1764577158807296,
      "learning_rate": 6.858022782240024e-05,
      "loss": 0.4058,
      "step": 2021
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.17749840229336564,
      "learning_rate": 6.849099890628008e-05,
      "loss": 0.3684,
      "step": 2022
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.2033262587464068,
      "learning_rate": 6.840179783350802e-05,
      "loss": 0.4333,
      "step": 2023
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.19801907204469715,
      "learning_rate": 6.831262468290782e-05,
      "loss": 0.3815,
      "step": 2024
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.17479276111326633,
      "learning_rate": 6.822347953327852e-05,
      "loss": 0.3767,
      "step": 2025
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.18329259931818292,
      "learning_rate": 6.813436246339439e-05,
      "loss": 0.4289,
      "step": 2026
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.18093663471154162,
      "learning_rate": 6.804527355200496e-05,
      "loss": 0.3715,
      "step": 2027
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.1792934447291919,
      "learning_rate": 6.795621287783482e-05,
      "loss": 0.3975,
      "step": 2028
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.18691613088073863,
      "learning_rate": 6.786718051958357e-05,
      "loss": 0.3926,
      "step": 2029
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.1922768604680592,
      "learning_rate": 6.777817655592587e-05,
      "loss": 0.4351,
      "step": 2030
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.1783333495829052,
      "learning_rate": 6.768920106551128e-05,
      "loss": 0.3987,
      "step": 2031
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.17612582083352998,
      "learning_rate": 6.760025412696419e-05,
      "loss": 0.3532,
      "step": 2032
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.18031897593024745,
      "learning_rate": 6.751133581888367e-05,
      "loss": 0.4045,
      "step": 2033
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.1943194093034378,
      "learning_rate": 6.742244621984362e-05,
      "loss": 0.4307,
      "step": 2034
    },
    {
      "epoch": 1.19,
      "grad_norm": 0.17761984368104813,
      "learning_rate": 6.733358540839253e-05,
      "loss": 0.4051,
      "step": 2035
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.19053785851354668,
      "learning_rate": 6.724475346305338e-05,
      "loss": 0.403,
      "step": 2036
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.1819068527231751,
      "learning_rate": 6.715595046232374e-05,
      "loss": 0.4102,
      "step": 2037
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.17372665786419286,
      "learning_rate": 6.70671764846756e-05,
      "loss": 0.387,
      "step": 2038
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.17727979798044205,
      "learning_rate": 6.697843160855518e-05,
      "loss": 0.3994,
      "step": 2039
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.1882371246792817,
      "learning_rate": 6.688971591238313e-05,
      "loss": 0.4211,
      "step": 2040
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.1804413873803804,
      "learning_rate": 6.68010294745543e-05,
      "loss": 0.4154,
      "step": 2041
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.18656805553080838,
      "learning_rate": 6.671237237343751e-05,
      "loss": 0.4059,
      "step": 2042
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.2030909495200903,
      "learning_rate": 6.66237446873759e-05,
      "loss": 0.389,
      "step": 2043
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.17679937581787675,
      "learning_rate": 6.653514649468644e-05,
      "loss": 0.3785,
      "step": 2044
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.21855116652362658,
      "learning_rate": 6.644657787366013e-05,
      "loss": 0.4254,
      "step": 2045
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.18903179904922357,
      "learning_rate": 6.635803890256181e-05,
      "loss": 0.3625,
      "step": 2046
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.17348919788827918,
      "learning_rate": 6.626952965963012e-05,
      "loss": 0.3667,
      "step": 2047
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.17361251854492593,
      "learning_rate": 6.618105022307746e-05,
      "loss": 0.3579,
      "step": 2048
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.16596905289453603,
      "learning_rate": 6.609260067108979e-05,
      "loss": 0.3831,
      "step": 2049
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.1821772550745199,
      "learning_rate": 6.600418108182678e-05,
      "loss": 0.3989,
      "step": 2050
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.17611934697238005,
      "learning_rate": 6.59157915334216e-05,
      "loss": 0.3883,
      "step": 2051
    },
    {
      "epoch": 1.2,
      "grad_norm": 0.1920197326417183,
      "learning_rate": 6.582743210398079e-05,
      "loss": 0.3746,
      "step": 2052
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.17930833321205353,
      "learning_rate": 6.573910287158437e-05,
      "loss": 0.3933,
      "step": 2053
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.17538040457127826,
      "learning_rate": 6.565080391428568e-05,
      "loss": 0.3556,
      "step": 2054
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.18958975621302934,
      "learning_rate": 6.556253531011119e-05,
      "loss": 0.4422,
      "step": 2055
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.17115102273167726,
      "learning_rate": 6.547429713706066e-05,
      "loss": 0.3716,
      "step": 2056
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.17770240634247175,
      "learning_rate": 6.538608947310694e-05,
      "loss": 0.3951,
      "step": 2057
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.1789512078302291,
      "learning_rate": 6.529791239619589e-05,
      "loss": 0.3891,
      "step": 2058
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.18976068809608343,
      "learning_rate": 6.520976598424637e-05,
      "loss": 0.4145,
      "step": 2059
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.18256866392900348,
      "learning_rate": 6.512165031515012e-05,
      "loss": 0.3956,
      "step": 2060
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.1985727795822358,
      "learning_rate": 6.503356546677173e-05,
      "loss": 0.4006,
      "step": 2061
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.1939392444337732,
      "learning_rate": 6.494551151694854e-05,
      "loss": 0.4387,
      "step": 2062
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.1728571695359017,
      "learning_rate": 6.485748854349061e-05,
      "loss": 0.381,
      "step": 2063
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.17795690326553984,
      "learning_rate": 6.476949662418062e-05,
      "loss": 0.4181,
      "step": 2064
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.17801662831363646,
      "learning_rate": 6.468153583677377e-05,
      "loss": 0.389,
      "step": 2065
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.189558829045939,
      "learning_rate": 6.459360625899775e-05,
      "loss": 0.3926,
      "step": 2066
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.16815142536338085,
      "learning_rate": 6.450570796855282e-05,
      "loss": 0.3688,
      "step": 2067
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.18054471114398113,
      "learning_rate": 6.441784104311135e-05,
      "loss": 0.4176,
      "step": 2068
    },
    {
      "epoch": 1.21,
      "grad_norm": 0.1900667998193899,
      "learning_rate": 6.433000556031816e-05,
      "loss": 0.4222,
      "step": 2069
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.19866351971870957,
      "learning_rate": 6.424220159779029e-05,
      "loss": 0.4511,
      "step": 2070
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.18085548000788157,
      "learning_rate": 6.415442923311679e-05,
      "loss": 0.3807,
      "step": 2071
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.20079665391358242,
      "learning_rate": 6.406668854385895e-05,
      "loss": 0.4718,
      "step": 2072
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.15874256335306394,
      "learning_rate": 6.397897960755002e-05,
      "loss": 0.3342,
      "step": 2073
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.17134778394449662,
      "learning_rate": 6.389130250169505e-05,
      "loss": 0.3913,
      "step": 2074
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.18450344260328666,
      "learning_rate": 6.38036573037712e-05,
      "loss": 0.4319,
      "step": 2075
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.1820653336275944,
      "learning_rate": 6.371604409122725e-05,
      "loss": 0.4174,
      "step": 2076
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.19171083276523215,
      "learning_rate": 6.362846294148383e-05,
      "loss": 0.4122,
      "step": 2077
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.18542133466551414,
      "learning_rate": 6.354091393193315e-05,
      "loss": 0.4434,
      "step": 2078
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.17992373370079723,
      "learning_rate": 6.345339713993905e-05,
      "loss": 0.3759,
      "step": 2079
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.18560798648589613,
      "learning_rate": 6.336591264283697e-05,
      "loss": 0.4096,
      "step": 2080
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.18633482450613692,
      "learning_rate": 6.327846051793367e-05,
      "loss": 0.4251,
      "step": 2081
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.1836619900352879,
      "learning_rate": 6.319104084250742e-05,
      "loss": 0.3925,
      "step": 2082
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.20091797196508251,
      "learning_rate": 6.310365369380779e-05,
      "loss": 0.404,
      "step": 2083
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.1891532144533284,
      "learning_rate": 6.301629914905559e-05,
      "loss": 0.4185,
      "step": 2084
    },
    {
      "epoch": 1.22,
      "grad_norm": 0.18153942514029875,
      "learning_rate": 6.292897728544282e-05,
      "loss": 0.393,
      "step": 2085
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.18372339720739236,
      "learning_rate": 6.284168818013263e-05,
      "loss": 0.4071,
      "step": 2086
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.2011783046120318,
      "learning_rate": 6.275443191025916e-05,
      "loss": 0.4618,
      "step": 2087
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.18169267533871733,
      "learning_rate": 6.266720855292756e-05,
      "loss": 0.4191,
      "step": 2088
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.19727917370411416,
      "learning_rate": 6.258001818521397e-05,
      "loss": 0.4121,
      "step": 2089
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.18067270195439983,
      "learning_rate": 6.249286088416534e-05,
      "loss": 0.4069,
      "step": 2090
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.1806495150301102,
      "learning_rate": 6.240573672679929e-05,
      "loss": 0.4105,
      "step": 2091
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.1810490781739672,
      "learning_rate": 6.231864579010426e-05,
      "loss": 0.4065,
      "step": 2092
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.1881164857873241,
      "learning_rate": 6.22315881510394e-05,
      "loss": 0.3863,
      "step": 2093
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.21083260811753377,
      "learning_rate": 6.214456388653423e-05,
      "loss": 0.4593,
      "step": 2094
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.1863138604775313,
      "learning_rate": 6.205757307348898e-05,
      "loss": 0.3912,
      "step": 2095
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.18959697990144703,
      "learning_rate": 6.197061578877424e-05,
      "loss": 0.4379,
      "step": 2096
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.17591208224223437,
      "learning_rate": 6.18836921092309e-05,
      "loss": 0.3912,
      "step": 2097
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.1673685779867354,
      "learning_rate": 6.179680211167031e-05,
      "loss": 0.367,
      "step": 2098
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.18369272051157975,
      "learning_rate": 6.170994587287398e-05,
      "loss": 0.4273,
      "step": 2099
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.18265310430390075,
      "learning_rate": 6.162312346959348e-05,
      "loss": 0.3959,
      "step": 2100
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.19340840466526393,
      "learning_rate": 6.153633497855065e-05,
      "loss": 0.436,
      "step": 2101
    },
    {
      "epoch": 1.23,
      "grad_norm": 0.1887901294443666,
      "learning_rate": 6.144958047643732e-05,
      "loss": 0.4083,
      "step": 2102
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.19466524748526068,
      "learning_rate": 6.136286003991518e-05,
      "loss": 0.4002,
      "step": 2103
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.18396890947674227,
      "learning_rate": 6.127617374561597e-05,
      "loss": 0.4071,
      "step": 2104
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.18803256333290438,
      "learning_rate": 6.118952167014115e-05,
      "loss": 0.4463,
      "step": 2105
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.19311540609414365,
      "learning_rate": 6.110290389006204e-05,
      "loss": 0.41,
      "step": 2106
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.17270612339310035,
      "learning_rate": 6.10163204819195e-05,
      "loss": 0.4025,
      "step": 2107
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.18772333666422653,
      "learning_rate": 6.0929771522224165e-05,
      "loss": 0.3697,
      "step": 2108
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.18606081415307524,
      "learning_rate": 6.0843257087456196e-05,
      "loss": 0.3716,
      "step": 2109
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.19080973087182862,
      "learning_rate": 6.075677725406516e-05,
      "loss": 0.3923,
      "step": 2110
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.1775067921640696,
      "learning_rate": 6.067033209847015e-05,
      "loss": 0.4098,
      "step": 2111
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.18103399827757333,
      "learning_rate": 6.058392169705962e-05,
      "loss": 0.3799,
      "step": 2112
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.17852952705890326,
      "learning_rate": 6.0497546126191206e-05,
      "loss": 0.3839,
      "step": 2113
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.18179509972863547,
      "learning_rate": 6.041120546219183e-05,
      "loss": 0.3728,
      "step": 2114
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.19484655001885112,
      "learning_rate": 6.0324899781357624e-05,
      "loss": 0.4212,
      "step": 2115
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.1941471719257855,
      "learning_rate": 6.023862915995371e-05,
      "loss": 0.4265,
      "step": 2116
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.1850316045854547,
      "learning_rate": 6.0152393674214256e-05,
      "loss": 0.4253,
      "step": 2117
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.17451934633072588,
      "learning_rate": 6.006619340034243e-05,
      "loss": 0.3849,
      "step": 2118
    },
    {
      "epoch": 1.24,
      "grad_norm": 0.17437022993161974,
      "learning_rate": 5.998002841451027e-05,
      "loss": 0.3987,
      "step": 2119
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.21820425933182622,
      "learning_rate": 5.9893898792858564e-05,
      "loss": 0.3954,
      "step": 2120
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.1835293588254982,
      "learning_rate": 5.980780461149691e-05,
      "loss": 0.4105,
      "step": 2121
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.18295428463723487,
      "learning_rate": 5.972174594650363e-05,
      "loss": 0.4099,
      "step": 2122
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.1878568911364305,
      "learning_rate": 5.9635722873925514e-05,
      "loss": 0.3768,
      "step": 2123
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.1961901235631556,
      "learning_rate": 5.954973546977806e-05,
      "loss": 0.4059,
      "step": 2124
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.1789683805877612,
      "learning_rate": 5.946378381004518e-05,
      "loss": 0.3809,
      "step": 2125
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.1785260125545566,
      "learning_rate": 5.937786797067917e-05,
      "loss": 0.3934,
      "step": 2126
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.18168550230534175,
      "learning_rate": 5.929198802760072e-05,
      "loss": 0.4142,
      "step": 2127
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.17826458713675644,
      "learning_rate": 5.920614405669882e-05,
      "loss": 0.3845,
      "step": 2128
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.19848867507194148,
      "learning_rate": 5.9120336133830564e-05,
      "loss": 0.4147,
      "step": 2129
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.19693140229221995,
      "learning_rate": 5.903456433482133e-05,
      "loss": 0.4409,
      "step": 2130
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.18964809059631438,
      "learning_rate": 5.894882873546448e-05,
      "loss": 0.3773,
      "step": 2131
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.19548800990162185,
      "learning_rate": 5.886312941152146e-05,
      "loss": 0.4239,
      "step": 2132
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.18556517298441283,
      "learning_rate": 5.877746643872152e-05,
      "loss": 0.4206,
      "step": 2133
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.196685614920556,
      "learning_rate": 5.8691839892761965e-05,
      "loss": 0.3868,
      "step": 2134
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.19578399697755275,
      "learning_rate": 5.860624984930787e-05,
      "loss": 0.4237,
      "step": 2135
    },
    {
      "epoch": 1.25,
      "grad_norm": 0.1978580772450985,
      "learning_rate": 5.852069638399191e-05,
      "loss": 0.4108,
      "step": 2136
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.18181163273583667,
      "learning_rate": 5.843517957241459e-05,
      "loss": 0.3805,
      "step": 2137
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.17366006402637094,
      "learning_rate": 5.8349699490144e-05,
      "loss": 0.3652,
      "step": 2138
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.19113273789571564,
      "learning_rate": 5.8264256212715726e-05,
      "loss": 0.423,
      "step": 2139
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.17897157794176333,
      "learning_rate": 5.817884981563286e-05,
      "loss": 0.3933,
      "step": 2140
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.17668933170504572,
      "learning_rate": 5.809348037436595e-05,
      "loss": 0.3922,
      "step": 2141
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.17818487109986716,
      "learning_rate": 5.8008147964352756e-05,
      "loss": 0.3662,
      "step": 2142
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.18207985224347573,
      "learning_rate": 5.792285266099844e-05,
      "loss": 0.407,
      "step": 2143
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.17683629780008203,
      "learning_rate": 5.783759453967532e-05,
      "loss": 0.4212,
      "step": 2144
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.1722722644784309,
      "learning_rate": 5.7752373675722884e-05,
      "loss": 0.3573,
      "step": 2145
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.1791044681179746,
      "learning_rate": 5.766719014444768e-05,
      "loss": 0.4031,
      "step": 2146
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.19337470024812153,
      "learning_rate": 5.758204402112326e-05,
      "loss": 0.3858,
      "step": 2147
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.1936105726260476,
      "learning_rate": 5.749693538099018e-05,
      "loss": 0.4277,
      "step": 2148
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.1784944177612045,
      "learning_rate": 5.741186429925574e-05,
      "loss": 0.3943,
      "step": 2149
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.16474400059855843,
      "learning_rate": 5.732683085109416e-05,
      "loss": 0.3574,
      "step": 2150
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.1747458965851442,
      "learning_rate": 5.7241835111646444e-05,
      "loss": 0.3542,
      "step": 2151
    },
    {
      "epoch": 1.26,
      "grad_norm": 0.17722508894646266,
      "learning_rate": 5.7156877156020106e-05,
      "loss": 0.383,
      "step": 2152
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.18004930280023854,
      "learning_rate": 5.707195705928943e-05,
      "loss": 0.371,
      "step": 2153
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.1998035437105761,
      "learning_rate": 5.6987074896495176e-05,
      "loss": 0.4052,
      "step": 2154
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.2454047737061032,
      "learning_rate": 5.69022307426446e-05,
      "loss": 0.3956,
      "step": 2155
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.18505776467623378,
      "learning_rate": 5.681742467271137e-05,
      "loss": 0.4188,
      "step": 2156
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.1722368732605317,
      "learning_rate": 5.673265676163555e-05,
      "loss": 0.3941,
      "step": 2157
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.17685811702648357,
      "learning_rate": 5.664792708432333e-05,
      "loss": 0.3567,
      "step": 2158
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.179506955830838,
      "learning_rate": 5.6563235715647264e-05,
      "loss": 0.3972,
      "step": 2159
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.16666951754419984,
      "learning_rate": 5.647858273044602e-05,
      "loss": 0.3469,
      "step": 2160
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.18772902417071785,
      "learning_rate": 5.639396820352436e-05,
      "loss": 0.4102,
      "step": 2161
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.17365394970568318,
      "learning_rate": 5.6309392209652924e-05,
      "loss": 0.3652,
      "step": 2162
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.1781634158116914,
      "learning_rate": 5.622485482356854e-05,
      "loss": 0.3803,
      "step": 2163
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.18323967750889245,
      "learning_rate": 5.614035611997378e-05,
      "loss": 0.4028,
      "step": 2164
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.20230746144118264,
      "learning_rate": 5.605589617353697e-05,
      "loss": 0.3808,
      "step": 2165
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.1805552749948731,
      "learning_rate": 5.597147505889233e-05,
      "loss": 0.4406,
      "step": 2166
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.20374446705458787,
      "learning_rate": 5.588709285063971e-05,
      "loss": 0.4281,
      "step": 2167
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.18591909290479935,
      "learning_rate": 5.580274962334451e-05,
      "loss": 0.4182,
      "step": 2168
    },
    {
      "epoch": 1.27,
      "grad_norm": 0.17103552679405504,
      "learning_rate": 5.571844545153777e-05,
      "loss": 0.3964,
      "step": 2169
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.18684053271434015,
      "learning_rate": 5.5634180409716e-05,
      "loss": 0.4113,
      "step": 2170
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.16926235511223492,
      "learning_rate": 5.5549954572341134e-05,
      "loss": 0.3709,
      "step": 2171
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.1736106649601771,
      "learning_rate": 5.546576801384045e-05,
      "loss": 0.363,
      "step": 2172
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.18806895104857238,
      "learning_rate": 5.538162080860655e-05,
      "loss": 0.4062,
      "step": 2173
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.1844043066518322,
      "learning_rate": 5.529751303099717e-05,
      "loss": 0.3892,
      "step": 2174
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.18160055542379905,
      "learning_rate": 5.52134447553353e-05,
      "loss": 0.3999,
      "step": 2175
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.1956754962166279,
      "learning_rate": 5.512941605590898e-05,
      "loss": 0.4172,
      "step": 2176
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.1838961563135142,
      "learning_rate": 5.5045427006971325e-05,
      "loss": 0.4147,
      "step": 2177
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.19107640177369256,
      "learning_rate": 5.4961477682740325e-05,
      "loss": 0.413,
      "step": 2178
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.18969577739431961,
      "learning_rate": 5.487756815739894e-05,
      "loss": 0.3999,
      "step": 2179
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.1858908768432806,
      "learning_rate": 5.4793698505094926e-05,
      "loss": 0.3967,
      "step": 2180
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.18020222489161605,
      "learning_rate": 5.4709868799940845e-05,
      "loss": 0.4091,
      "step": 2181
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.19520907750630961,
      "learning_rate": 5.4626079116013906e-05,
      "loss": 0.4272,
      "step": 2182
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.18640536176447983,
      "learning_rate": 5.4542329527356025e-05,
      "loss": 0.3604,
      "step": 2183
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.19307798922222952,
      "learning_rate": 5.445862010797358e-05,
      "loss": 0.3864,
      "step": 2184
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.18561105414776297,
      "learning_rate": 5.437495093183753e-05,
      "loss": 0.4381,
      "step": 2185
    },
    {
      "epoch": 1.28,
      "grad_norm": 0.16845529408994608,
      "learning_rate": 5.42913220728833e-05,
      "loss": 0.3478,
      "step": 2186
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.18024110837169,
      "learning_rate": 5.420773360501057e-05,
      "loss": 0.3901,
      "step": 2187
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.18049067690672227,
      "learning_rate": 5.412418560208343e-05,
      "loss": 0.4207,
      "step": 2188
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.1739219539443579,
      "learning_rate": 5.4040678137930214e-05,
      "loss": 0.3659,
      "step": 2189
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.18449670679648686,
      "learning_rate": 5.395721128634338e-05,
      "loss": 0.4288,
      "step": 2190
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.1899315846259125,
      "learning_rate": 5.387378512107952e-05,
      "loss": 0.4338,
      "step": 2191
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.1859583595169519,
      "learning_rate": 5.379039971585929e-05,
      "loss": 0.3851,
      "step": 2192
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.18077813499619705,
      "learning_rate": 5.3707055144367336e-05,
      "loss": 0.4056,
      "step": 2193
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.18580498148705987,
      "learning_rate": 5.362375148025213e-05,
      "loss": 0.393,
      "step": 2194
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.19143544061487677,
      "learning_rate": 5.35404887971261e-05,
      "loss": 0.4284,
      "step": 2195
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.17814943744573722,
      "learning_rate": 5.345726716856545e-05,
      "loss": 0.4002,
      "step": 2196
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.17505964472580593,
      "learning_rate": 5.3374086668110034e-05,
      "loss": 0.3739,
      "step": 2197
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.18148223115355736,
      "learning_rate": 5.329094736926342e-05,
      "loss": 0.3796,
      "step": 2198
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.18553202610972083,
      "learning_rate": 5.320784934549277e-05,
      "loss": 0.3679,
      "step": 2199
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.2028489781335254,
      "learning_rate": 5.312479267022874e-05,
      "loss": 0.3991,
      "step": 2200
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.18274525464999591,
      "learning_rate": 5.304177741686549e-05,
      "loss": 0.3666,
      "step": 2201
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.175148123213247,
      "learning_rate": 5.295880365876058e-05,
      "loss": 0.4083,
      "step": 2202
    },
    {
      "epoch": 1.29,
      "grad_norm": 0.17081671423731545,
      "learning_rate": 5.2875871469234786e-05,
      "loss": 0.3588,
      "step": 2203
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.1709653542288416,
      "learning_rate": 5.27929809215723e-05,
      "loss": 0.3679,
      "step": 2204
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.18417423979858946,
      "learning_rate": 5.271013208902045e-05,
      "loss": 0.4113,
      "step": 2205
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.18420315430603607,
      "learning_rate": 5.262732504478975e-05,
      "loss": 0.4123,
      "step": 2206
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.19549557097472783,
      "learning_rate": 5.254455986205362e-05,
      "loss": 0.4164,
      "step": 2207
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.1890203514363044,
      "learning_rate": 5.246183661394876e-05,
      "loss": 0.4017,
      "step": 2208
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.19289162012261554,
      "learning_rate": 5.237915537357463e-05,
      "loss": 0.4573,
      "step": 2209
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.18549804668544928,
      "learning_rate": 5.2296516213993564e-05,
      "loss": 0.4038,
      "step": 2210
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.19026869447014055,
      "learning_rate": 5.2213919208230775e-05,
      "loss": 0.4368,
      "step": 2211
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.17654966806048847,
      "learning_rate": 5.2131364429274246e-05,
      "loss": 0.3874,
      "step": 2212
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.18351252152306632,
      "learning_rate": 5.204885195007453e-05,
      "loss": 0.3784,
      "step": 2213
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.19107149354135278,
      "learning_rate": 5.196638184354492e-05,
      "loss": 0.4243,
      "step": 2214
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.19065800665794713,
      "learning_rate": 5.188395418256122e-05,
      "loss": 0.4148,
      "step": 2215
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.18419176321318032,
      "learning_rate": 5.180156903996174e-05,
      "loss": 0.4121,
      "step": 2216
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.1965071770498145,
      "learning_rate": 5.171922648854719e-05,
      "loss": 0.4072,
      "step": 2217
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.17271642659146394,
      "learning_rate": 5.163692660108068e-05,
      "loss": 0.3423,
      "step": 2218
    },
    {
      "epoch": 1.3,
      "grad_norm": 0.1814946728416819,
      "learning_rate": 5.155466945028762e-05,
      "loss": 0.3886,
      "step": 2219
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.2022707732074986,
      "learning_rate": 5.147245510885557e-05,
      "loss": 0.4658,
      "step": 2220
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.18841969797937244,
      "learning_rate": 5.1390283649434365e-05,
      "loss": 0.3939,
      "step": 2221
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.18887943727385983,
      "learning_rate": 5.130815514463595e-05,
      "loss": 0.4079,
      "step": 2222
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.18485677458409824,
      "learning_rate": 5.122606966703418e-05,
      "loss": 0.3811,
      "step": 2223
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.17738323302931794,
      "learning_rate": 5.1144027289165045e-05,
      "loss": 0.3669,
      "step": 2224
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.18959955695535494,
      "learning_rate": 5.1062028083526356e-05,
      "loss": 0.388,
      "step": 2225
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.1823026420421858,
      "learning_rate": 5.098007212257782e-05,
      "loss": 0.3942,
      "step": 2226
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.19092667735759558,
      "learning_rate": 5.08981594787409e-05,
      "loss": 0.4174,
      "step": 2227
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.18931049227499339,
      "learning_rate": 5.0816290224398844e-05,
      "loss": 0.3614,
      "step": 2228
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.1933197577499917,
      "learning_rate": 5.073446443189642e-05,
      "loss": 0.4206,
      "step": 2229
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.24897884567636863,
      "learning_rate": 5.0652682173540125e-05,
      "loss": 0.4227,
      "step": 2230
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.18567988264628352,
      "learning_rate": 5.0570943521597934e-05,
      "loss": 0.361,
      "step": 2231
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.1808443791123837,
      "learning_rate": 5.048924854829934e-05,
      "loss": 0.4162,
      "step": 2232
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.17382289163178277,
      "learning_rate": 5.040759732583511e-05,
      "loss": 0.3578,
      "step": 2233
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.1934059706225405,
      "learning_rate": 5.032598992635748e-05,
      "loss": 0.4163,
      "step": 2234
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.18712047190744255,
      "learning_rate": 5.024442642197991e-05,
      "loss": 0.3923,
      "step": 2235
    },
    {
      "epoch": 1.31,
      "grad_norm": 0.194230304793972,
      "learning_rate": 5.016290688477707e-05,
      "loss": 0.4043,
      "step": 2236
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.18345123283309359,
      "learning_rate": 5.008143138678479e-05,
      "loss": 0.3813,
      "step": 2237
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.18014102531773896,
      "learning_rate": 5.000000000000002e-05,
      "loss": 0.3689,
      "step": 2238
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.20811237998328158,
      "learning_rate": 4.991861279638061e-05,
      "loss": 0.4283,
      "step": 2239
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.18205015271300518,
      "learning_rate": 4.983726984784548e-05,
      "loss": 0.3505,
      "step": 2240
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.1795720443931225,
      "learning_rate": 4.975597122627445e-05,
      "loss": 0.4022,
      "step": 2241
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.2009947553079081,
      "learning_rate": 4.9674717003508045e-05,
      "loss": 0.4152,
      "step": 2242
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.1911373360756306,
      "learning_rate": 4.959350725134765e-05,
      "loss": 0.4151,
      "step": 2243
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.19571119806792772,
      "learning_rate": 4.951234204155545e-05,
      "loss": 0.4138,
      "step": 2244
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.17780390094617862,
      "learning_rate": 4.943122144585406e-05,
      "loss": 0.4184,
      "step": 2245
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.18112248897689123,
      "learning_rate": 4.9350145535926796e-05,
      "loss": 0.4011,
      "step": 2246
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.18862279533214676,
      "learning_rate": 4.9269114383417484e-05,
      "loss": 0.4111,
      "step": 2247
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.18318423968599412,
      "learning_rate": 4.9188128059930394e-05,
      "loss": 0.4139,
      "step": 2248
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.19014655668311042,
      "learning_rate": 4.910718663703012e-05,
      "loss": 0.3646,
      "step": 2249
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.17458066227793817,
      "learning_rate": 4.902629018624164e-05,
      "loss": 0.3695,
      "step": 2250
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.17637485698408012,
      "learning_rate": 4.89454387790502e-05,
      "loss": 0.404,
      "step": 2251
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.16443705845773854,
      "learning_rate": 4.886463248690122e-05,
      "loss": 0.3568,
      "step": 2252
    },
    {
      "epoch": 1.32,
      "grad_norm": 0.19342091566192718,
      "learning_rate": 4.878387138120023e-05,
      "loss": 0.4185,
      "step": 2253
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.1904303316001888,
      "learning_rate": 4.8703155533312925e-05,
      "loss": 0.3929,
      "step": 2254
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.18246297112699594,
      "learning_rate": 4.862248501456484e-05,
      "loss": 0.4225,
      "step": 2255
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.1777341775174244,
      "learning_rate": 4.8541859896241616e-05,
      "loss": 0.3738,
      "step": 2256
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.18805743212178455,
      "learning_rate": 4.846128024958872e-05,
      "loss": 0.4079,
      "step": 2257
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.1820196118051572,
      "learning_rate": 4.838074614581137e-05,
      "loss": 0.4098,
      "step": 2258
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.16806431241971856,
      "learning_rate": 4.830025765607463e-05,
      "loss": 0.3666,
      "step": 2259
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.19363355095088744,
      "learning_rate": 4.821981485150323e-05,
      "loss": 0.3951,
      "step": 2260
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.2139490197452364,
      "learning_rate": 4.813941780318153e-05,
      "loss": 0.4495,
      "step": 2261
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.18747955602297983,
      "learning_rate": 4.805906658215344e-05,
      "loss": 0.4116,
      "step": 2262
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.19431415115430906,
      "learning_rate": 4.797876125942239e-05,
      "loss": 0.3709,
      "step": 2263
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.19568799936115497,
      "learning_rate": 4.789850190595129e-05,
      "loss": 0.4169,
      "step": 2264
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.18414590658896182,
      "learning_rate": 4.7818288592662295e-05,
      "loss": 0.4,
      "step": 2265
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.1729939766717147,
      "learning_rate": 4.773812139043703e-05,
      "loss": 0.3921,
      "step": 2266
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.18926823202564338,
      "learning_rate": 4.7658000370116315e-05,
      "loss": 0.3905,
      "step": 2267
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.19583540064221253,
      "learning_rate": 4.757792560250012e-05,
      "loss": 0.4175,
      "step": 2268
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.17806294072826737,
      "learning_rate": 4.749789715834758e-05,
      "loss": 0.3504,
      "step": 2269
    },
    {
      "epoch": 1.33,
      "grad_norm": 0.17134967767482345,
      "learning_rate": 4.741791510837691e-05,
      "loss": 0.3754,
      "step": 2270
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.17893435464363647,
      "learning_rate": 4.733797952326532e-05,
      "loss": 0.3571,
      "step": 2271
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.1708796829805529,
      "learning_rate": 4.725809047364894e-05,
      "loss": 0.3912,
      "step": 2272
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.18302707589369907,
      "learning_rate": 4.717824803012284e-05,
      "loss": 0.3933,
      "step": 2273
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.18717197395682367,
      "learning_rate": 4.7098452263240776e-05,
      "loss": 0.3852,
      "step": 2274
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.17986740395717019,
      "learning_rate": 4.7018703243515385e-05,
      "loss": 0.39,
      "step": 2275
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.1941875502444695,
      "learning_rate": 4.693900104141794e-05,
      "loss": 0.4185,
      "step": 2276
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.18756813942113346,
      "learning_rate": 4.6859345727378387e-05,
      "loss": 0.3923,
      "step": 2277
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.18099857236850664,
      "learning_rate": 4.6779737371785146e-05,
      "loss": 0.3917,
      "step": 2278
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.17611734325453263,
      "learning_rate": 4.670017604498522e-05,
      "loss": 0.396,
      "step": 2279
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.182638294765091,
      "learning_rate": 4.662066181728405e-05,
      "loss": 0.391,
      "step": 2280
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.17438038471285144,
      "learning_rate": 4.654119475894543e-05,
      "loss": 0.3771,
      "step": 2281
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.18251567336825883,
      "learning_rate": 4.6461774940191486e-05,
      "loss": 0.3673,
      "step": 2282
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.48873441447493016,
      "learning_rate": 4.638240243120263e-05,
      "loss": 0.4359,
      "step": 2283
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.2004078757737064,
      "learning_rate": 4.630307730211737e-05,
      "loss": 0.417,
      "step": 2284
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.1869401684241615,
      "learning_rate": 4.622379962303246e-05,
      "loss": 0.4375,
      "step": 2285
    },
    {
      "epoch": 1.34,
      "grad_norm": 0.1792861761265464,
      "learning_rate": 4.6144569464002684e-05,
      "loss": 0.3917,
      "step": 2286
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.1642865831739075,
      "learning_rate": 4.606538689504077e-05,
      "loss": 0.3682,
      "step": 2287
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.1730553053615041,
      "learning_rate": 4.5986251986117454e-05,
      "loss": 0.4223,
      "step": 2288
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.179517265255704,
      "learning_rate": 4.590716480716141e-05,
      "loss": 0.3911,
      "step": 2289
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.1801566944583477,
      "learning_rate": 4.582812542805909e-05,
      "loss": 0.378,
      "step": 2290
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.16832506352891377,
      "learning_rate": 4.5749133918654607e-05,
      "loss": 0.3776,
      "step": 2291
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.17943384818308367,
      "learning_rate": 4.567019034874992e-05,
      "loss": 0.4011,
      "step": 2292
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.18585633962936102,
      "learning_rate": 4.559129478810457e-05,
      "loss": 0.3921,
      "step": 2293
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.17092385090432002,
      "learning_rate": 4.551244730643561e-05,
      "loss": 0.3608,
      "step": 2294
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.1679173704574397,
      "learning_rate": 4.5433647973417703e-05,
      "loss": 0.3858,
      "step": 2295
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.176027839089707,
      "learning_rate": 4.535489685868294e-05,
      "loss": 0.3685,
      "step": 2296
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.19315250710153942,
      "learning_rate": 4.5276194031820774e-05,
      "loss": 0.4209,
      "step": 2297
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.18262863053483308,
      "learning_rate": 4.5197539562378014e-05,
      "loss": 0.4106,
      "step": 2298
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.17738986120110053,
      "learning_rate": 4.511893351985876e-05,
      "loss": 0.4144,
      "step": 2299
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.1939134068956391,
      "learning_rate": 4.5040375973724216e-05,
      "loss": 0.4189,
      "step": 2300
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.1876065806043386,
      "learning_rate": 4.496186699339284e-05,
      "loss": 0.3718,
      "step": 2301
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.17704448706249795,
      "learning_rate": 4.488340664824019e-05,
      "loss": 0.4153,
      "step": 2302
    },
    {
      "epoch": 1.35,
      "grad_norm": 0.19343677065870282,
      "learning_rate": 4.4804995007598694e-05,
      "loss": 0.4102,
      "step": 2303
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.20827443989702227,
      "learning_rate": 4.472663214075788e-05,
      "loss": 0.4191,
      "step": 2304
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.2125984764796864,
      "learning_rate": 4.464831811696417e-05,
      "loss": 0.4045,
      "step": 2305
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.19901809672692197,
      "learning_rate": 4.457005300542077e-05,
      "loss": 0.4358,
      "step": 2306
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.1851209242969792,
      "learning_rate": 4.4491836875287674e-05,
      "loss": 0.4113,
      "step": 2307
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.1977643264945235,
      "learning_rate": 4.441366979568162e-05,
      "loss": 0.4082,
      "step": 2308
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.2235036270909591,
      "learning_rate": 4.433555183567604e-05,
      "loss": 0.3906,
      "step": 2309
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.19623592849966992,
      "learning_rate": 4.425748306430082e-05,
      "loss": 0.3846,
      "step": 2310
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.18223154039092768,
      "learning_rate": 4.41794635505425e-05,
      "loss": 0.3927,
      "step": 2311
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.193962230041838,
      "learning_rate": 4.410149336334411e-05,
      "loss": 0.4521,
      "step": 2312
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.18557136225472978,
      "learning_rate": 4.4023572571604965e-05,
      "loss": 0.4019,
      "step": 2313
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.19258003070227037,
      "learning_rate": 4.3945701244180847e-05,
      "loss": 0.4382,
      "step": 2314
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.17550674522959528,
      "learning_rate": 4.38678794498838e-05,
      "loss": 0.3746,
      "step": 2315
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.17901345999016943,
      "learning_rate": 4.379010725748206e-05,
      "loss": 0.3937,
      "step": 2316
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.18204388035763827,
      "learning_rate": 4.37123847357001e-05,
      "loss": 0.3668,
      "step": 2317
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.1732827092017323,
      "learning_rate": 4.363471195321842e-05,
      "loss": 0.3915,
      "step": 2318
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.1794821815129027,
      "learning_rate": 4.3557088978673676e-05,
      "loss": 0.4019,
      "step": 2319
    },
    {
      "epoch": 1.36,
      "grad_norm": 0.18125659105716263,
      "learning_rate": 4.347951588065835e-05,
      "loss": 0.4132,
      "step": 2320
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.17799580610948854,
      "learning_rate": 4.3401992727721e-05,
      "loss": 0.3796,
      "step": 2321
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.17617546687208122,
      "learning_rate": 4.3324519588366e-05,
      "loss": 0.3765,
      "step": 2322
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.19417273186835765,
      "learning_rate": 4.324709653105346e-05,
      "loss": 0.4013,
      "step": 2323
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.21077608405432277,
      "learning_rate": 4.316972362419931e-05,
      "loss": 0.4132,
      "step": 2324
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.1803109022455879,
      "learning_rate": 4.309240093617524e-05,
      "loss": 0.3994,
      "step": 2325
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.20316435530602564,
      "learning_rate": 4.301512853530837e-05,
      "loss": 0.4351,
      "step": 2326
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.1863698168969151,
      "learning_rate": 4.2937906489881516e-05,
      "loss": 0.4138,
      "step": 2327
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.1749372005931854,
      "learning_rate": 4.2860734868133e-05,
      "loss": 0.4187,
      "step": 2328
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.1808193383735573,
      "learning_rate": 4.27836137382565e-05,
      "loss": 0.3941,
      "step": 2329
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.18808176710898056,
      "learning_rate": 4.270654316840115e-05,
      "loss": 0.4131,
      "step": 2330
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.1903477479527853,
      "learning_rate": 4.2629523226671375e-05,
      "loss": 0.414,
      "step": 2331
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.17844977047828794,
      "learning_rate": 4.25525539811269e-05,
      "loss": 0.3861,
      "step": 2332
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.1891382252674035,
      "learning_rate": 4.247563549978259e-05,
      "loss": 0.4394,
      "step": 2333
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.17790885781029012,
      "learning_rate": 4.23987678506085e-05,
      "loss": 0.4006,
      "step": 2334
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.1807431421440253,
      "learning_rate": 4.23219511015298e-05,
      "loss": 0.4029,
      "step": 2335
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.19080875335733244,
      "learning_rate": 4.224518532042654e-05,
      "loss": 0.4219,
      "step": 2336
    },
    {
      "epoch": 1.37,
      "grad_norm": 0.1702363744752853,
      "learning_rate": 4.2168470575133866e-05,
      "loss": 0.3839,
      "step": 2337
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.1933279271715595,
      "learning_rate": 4.2091806933441826e-05,
      "loss": 0.4337,
      "step": 2338
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.22365190725291184,
      "learning_rate": 4.20151944630952e-05,
      "loss": 0.391,
      "step": 2339
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.18499253190623838,
      "learning_rate": 4.1938633231793624e-05,
      "loss": 0.3911,
      "step": 2340
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.1881298684489429,
      "learning_rate": 4.1862123307191484e-05,
      "loss": 0.3931,
      "step": 2341
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.17357955926644725,
      "learning_rate": 4.178566475689777e-05,
      "loss": 0.3896,
      "step": 2342
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.18756044147301135,
      "learning_rate": 4.1709257648476105e-05,
      "loss": 0.4033,
      "step": 2343
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.18389028551541756,
      "learning_rate": 4.1632902049444686e-05,
      "loss": 0.3439,
      "step": 2344
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.1849238463508717,
      "learning_rate": 4.155659802727608e-05,
      "loss": 0.3785,
      "step": 2345
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.1773081310212394,
      "learning_rate": 4.14803456493974e-05,
      "loss": 0.3766,
      "step": 2346
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.18233508648942373,
      "learning_rate": 4.1404144983190044e-05,
      "loss": 0.3549,
      "step": 2347
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.1851036998957086,
      "learning_rate": 4.132799609598981e-05,
      "loss": 0.3977,
      "step": 2348
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.21244947793755847,
      "learning_rate": 4.1251899055086586e-05,
      "loss": 0.4374,
      "step": 2349
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.18484919749317866,
      "learning_rate": 4.117585392772457e-05,
      "loss": 0.4128,
      "step": 2350
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.2210933648642488,
      "learning_rate": 4.109986078110207e-05,
      "loss": 0.3722,
      "step": 2351
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.1951376264933818,
      "learning_rate": 4.102391968237143e-05,
      "loss": 0.3722,
      "step": 2352
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.18729461438282666,
      "learning_rate": 4.0948030698639006e-05,
      "loss": 0.3946,
      "step": 2353
    },
    {
      "epoch": 1.38,
      "grad_norm": 0.17798234177335076,
      "learning_rate": 4.087219389696515e-05,
      "loss": 0.3632,
      "step": 2354
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.18533426970439967,
      "learning_rate": 4.0796409344363995e-05,
      "loss": 0.3503,
      "step": 2355
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.1838738950459424,
      "learning_rate": 4.072067710780359e-05,
      "loss": 0.3666,
      "step": 2356
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.1782715179343142,
      "learning_rate": 4.064499725420579e-05,
      "loss": 0.4242,
      "step": 2357
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.20282525928063655,
      "learning_rate": 4.0569369850446016e-05,
      "loss": 0.3772,
      "step": 2358
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.1899237116709675,
      "learning_rate": 4.049379496335347e-05,
      "loss": 0.4187,
      "step": 2359
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.20402628915397078,
      "learning_rate": 4.041827265971091e-05,
      "loss": 0.4279,
      "step": 2360
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.18281918217454715,
      "learning_rate": 4.0342803006254624e-05,
      "loss": 0.4135,
      "step": 2361
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.17755636118105714,
      "learning_rate": 4.0267386069674384e-05,
      "loss": 0.3872,
      "step": 2362
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.17952222593236428,
      "learning_rate": 4.0192021916613344e-05,
      "loss": 0.398,
      "step": 2363
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.17493934616958587,
      "learning_rate": 4.011671061366811e-05,
      "loss": 0.3779,
      "step": 2364
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.1809894951465937,
      "learning_rate": 4.004145222738841e-05,
      "loss": 0.429,
      "step": 2365
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.17931118104328542,
      "learning_rate": 3.996624682427739e-05,
      "loss": 0.3839,
      "step": 2366
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.18045630350547573,
      "learning_rate": 3.989109447079131e-05,
      "loss": 0.3934,
      "step": 2367
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.19746890167885522,
      "learning_rate": 3.98159952333395e-05,
      "loss": 0.3887,
      "step": 2368
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.18627456979800522,
      "learning_rate": 3.974094917828438e-05,
      "loss": 0.4036,
      "step": 2369
    },
    {
      "epoch": 1.39,
      "grad_norm": 0.1900210723670723,
      "learning_rate": 3.966595637194153e-05,
      "loss": 0.3811,
      "step": 2370
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.21931012070906103,
      "learning_rate": 3.9591016880579204e-05,
      "loss": 0.4137,
      "step": 2371
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.17673450048063358,
      "learning_rate": 3.951613077041874e-05,
      "loss": 0.3972,
      "step": 2372
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.1992515854559056,
      "learning_rate": 3.944129810763425e-05,
      "loss": 0.4417,
      "step": 2373
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.21033013771714132,
      "learning_rate": 3.9366518958352585e-05,
      "loss": 0.3739,
      "step": 2374
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.16581957344786058,
      "learning_rate": 3.929179338865333e-05,
      "loss": 0.3538,
      "step": 2375
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.18415059414596757,
      "learning_rate": 3.9217121464568764e-05,
      "loss": 0.3997,
      "step": 2376
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.21088258370598734,
      "learning_rate": 3.9142503252083695e-05,
      "loss": 0.4125,
      "step": 2377
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.18939002851918915,
      "learning_rate": 3.906793881713552e-05,
      "loss": 0.4288,
      "step": 2378
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.17858771383658043,
      "learning_rate": 3.899342822561409e-05,
      "loss": 0.3881,
      "step": 2379
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.18362284734207265,
      "learning_rate": 3.8918971543361714e-05,
      "loss": 0.3823,
      "step": 2380
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.19366380872850933,
      "learning_rate": 3.884456883617296e-05,
      "loss": 0.3583,
      "step": 2381
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.1901165370683843,
      "learning_rate": 3.87702201697948e-05,
      "loss": 0.4553,
      "step": 2382
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.18980970612244083,
      "learning_rate": 3.8695925609926475e-05,
      "loss": 0.4263,
      "step": 2383
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.1927879848429132,
      "learning_rate": 3.862168522221927e-05,
      "loss": 0.4226,
      "step": 2384
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.17468957362849613,
      "learning_rate": 3.854749907227674e-05,
      "loss": 0.3732,
      "step": 2385
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.17586062824420354,
      "learning_rate": 3.8473367225654456e-05,
      "loss": 0.3876,
      "step": 2386
    },
    {
      "epoch": 1.4,
      "grad_norm": 0.18528957099228635,
      "learning_rate": 3.839928974786001e-05,
      "loss": 0.3906,
      "step": 2387
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.18446951898251418,
      "learning_rate": 3.832526670435297e-05,
      "loss": 0.3969,
      "step": 2388
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.1816721057010734,
      "learning_rate": 3.825129816054477e-05,
      "loss": 0.4029,
      "step": 2389
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.1902020202566427,
      "learning_rate": 3.8177384181798716e-05,
      "loss": 0.394,
      "step": 2390
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.18444549107773275,
      "learning_rate": 3.810352483342984e-05,
      "loss": 0.408,
      "step": 2391
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.1844755285263522,
      "learning_rate": 3.802972018070495e-05,
      "loss": 0.4056,
      "step": 2392
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.19822608684842405,
      "learning_rate": 3.795597028884256e-05,
      "loss": 0.3833,
      "step": 2393
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.1829016568339407,
      "learning_rate": 3.7882275223012655e-05,
      "loss": 0.3792,
      "step": 2394
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.1851866988160698,
      "learning_rate": 3.7808635048336916e-05,
      "loss": 0.4041,
      "step": 2395
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.1919325341590549,
      "learning_rate": 3.773504982988845e-05,
      "loss": 0.4426,
      "step": 2396
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.18604278311018183,
      "learning_rate": 3.766151963269182e-05,
      "loss": 0.4035,
      "step": 2397
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.17858116193671145,
      "learning_rate": 3.7588044521722965e-05,
      "loss": 0.3996,
      "step": 2398
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.18071850227923503,
      "learning_rate": 3.7514624561909176e-05,
      "loss": 0.4174,
      "step": 2399
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.19733574061187267,
      "learning_rate": 3.74412598181289e-05,
      "loss": 0.39,
      "step": 2400
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.21249491902581671,
      "learning_rate": 3.7367950355211935e-05,
      "loss": 0.4157,
      "step": 2401
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.1857615154929432,
      "learning_rate": 3.7294696237939154e-05,
      "loss": 0.4084,
      "step": 2402
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.17962808249632856,
      "learning_rate": 3.7221497531042496e-05,
      "loss": 0.3989,
      "step": 2403
    },
    {
      "epoch": 1.41,
      "grad_norm": 0.16814028788195468,
      "learning_rate": 3.714835429920499e-05,
      "loss": 0.3735,
      "step": 2404
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.19696181008047892,
      "learning_rate": 3.7075266607060633e-05,
      "loss": 0.3955,
      "step": 2405
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.1754037692198337,
      "learning_rate": 3.700223451919434e-05,
      "loss": 0.3961,
      "step": 2406
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.17946772065216135,
      "learning_rate": 3.6929258100141884e-05,
      "loss": 0.3879,
      "step": 2407
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.18460327674666568,
      "learning_rate": 3.685633741438984e-05,
      "loss": 0.3869,
      "step": 2408
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.16424641965730152,
      "learning_rate": 3.6783472526375596e-05,
      "loss": 0.3463,
      "step": 2409
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.17722507584039435,
      "learning_rate": 3.6710663500487096e-05,
      "loss": 0.363,
      "step": 2410
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.18126901978881674,
      "learning_rate": 3.663791040106304e-05,
      "loss": 0.364,
      "step": 2411
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.19065262082537573,
      "learning_rate": 3.6565213292392706e-05,
      "loss": 0.4063,
      "step": 2412
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.1875234518047201,
      "learning_rate": 3.6492572238715806e-05,
      "loss": 0.3905,
      "step": 2413
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.1809287317584865,
      "learning_rate": 3.641998730422257e-05,
      "loss": 0.4023,
      "step": 2414
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.18186133990542913,
      "learning_rate": 3.6347458553053726e-05,
      "loss": 0.3903,
      "step": 2415
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.1763716250964691,
      "learning_rate": 3.6274986049300186e-05,
      "loss": 0.3994,
      "step": 2416
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.1957566899123632,
      "learning_rate": 3.620256985700328e-05,
      "loss": 0.398,
      "step": 2417
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.18967866601938366,
      "learning_rate": 3.613021004015452e-05,
      "loss": 0.4,
      "step": 2418
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.1838851249112009,
      "learning_rate": 3.605790666269566e-05,
      "loss": 0.4124,
      "step": 2419
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.1793602355569485,
      "learning_rate": 3.5985659788518486e-05,
      "loss": 0.3836,
      "step": 2420
    },
    {
      "epoch": 1.42,
      "grad_norm": 0.184921318265869,
      "learning_rate": 3.591346948146495e-05,
      "loss": 0.4309,
      "step": 2421
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.19167974540770724,
      "learning_rate": 3.584133580532696e-05,
      "loss": 0.3569,
      "step": 2422
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.17829643344053805,
      "learning_rate": 3.57692588238464e-05,
      "loss": 0.3701,
      "step": 2423
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.18187630985706627,
      "learning_rate": 3.569723860071505e-05,
      "loss": 0.4403,
      "step": 2424
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.17957754897288555,
      "learning_rate": 3.5625275199574594e-05,
      "loss": 0.3984,
      "step": 2425
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.1864438615057095,
      "learning_rate": 3.555336868401635e-05,
      "loss": 0.3834,
      "step": 2426
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.18496320192598015,
      "learning_rate": 3.5481519117581544e-05,
      "loss": 0.41,
      "step": 2427
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.17891376090065997,
      "learning_rate": 3.540972656376099e-05,
      "loss": 0.4013,
      "step": 2428
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.199688789707727,
      "learning_rate": 3.533799108599509e-05,
      "loss": 0.4572,
      "step": 2429
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.18778755143316214,
      "learning_rate": 3.526631274767389e-05,
      "loss": 0.3708,
      "step": 2430
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.18606849486194985,
      "learning_rate": 3.51946916121369e-05,
      "loss": 0.4293,
      "step": 2431
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.21202440704452852,
      "learning_rate": 3.512312774267309e-05,
      "loss": 0.4082,
      "step": 2432
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.1883615861955945,
      "learning_rate": 3.505162120252083e-05,
      "loss": 0.3817,
      "step": 2433
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.18141909696192082,
      "learning_rate": 3.4980172054867824e-05,
      "loss": 0.389,
      "step": 2434
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.17858799183526933,
      "learning_rate": 3.490878036285109e-05,
      "loss": 0.3787,
      "step": 2435
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.22220663739461033,
      "learning_rate": 3.483744618955678e-05,
      "loss": 0.3763,
      "step": 2436
    },
    {
      "epoch": 1.43,
      "grad_norm": 0.19995276389000388,
      "learning_rate": 3.4766169598020326e-05,
      "loss": 0.4001,
      "step": 2437
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.18362650006688164,
      "learning_rate": 3.469495065122627e-05,
      "loss": 0.3968,
      "step": 2438
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.18043154356240745,
      "learning_rate": 3.462378941210811e-05,
      "loss": 0.3749,
      "step": 2439
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.20672200265320864,
      "learning_rate": 3.4552685943548446e-05,
      "loss": 0.408,
      "step": 2440
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.18049793078586143,
      "learning_rate": 3.4481640308378826e-05,
      "loss": 0.3874,
      "step": 2441
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.19112644199676984,
      "learning_rate": 3.441065256937966e-05,
      "loss": 0.3688,
      "step": 2442
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.19132070837097626,
      "learning_rate": 3.4339722789280214e-05,
      "loss": 0.3848,
      "step": 2443
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.18419524418528202,
      "learning_rate": 3.4268851030758564e-05,
      "loss": 0.3593,
      "step": 2444
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.1861093369198648,
      "learning_rate": 3.4198037356441406e-05,
      "loss": 0.4068,
      "step": 2445
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.169397778715837,
      "learning_rate": 3.412728182890422e-05,
      "loss": 0.3773,
      "step": 2446
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.1809760516512152,
      "learning_rate": 3.405658451067107e-05,
      "loss": 0.3983,
      "step": 2447
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.18549464871726554,
      "learning_rate": 3.3985945464214644e-05,
      "loss": 0.4013,
      "step": 2448
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.18192212358832036,
      "learning_rate": 3.391536475195597e-05,
      "loss": 0.3962,
      "step": 2449
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.18017976989631,
      "learning_rate": 3.3844842436264645e-05,
      "loss": 0.4006,
      "step": 2450
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.18790923665047435,
      "learning_rate": 3.3774378579458756e-05,
      "loss": 0.4286,
      "step": 2451
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.19054020336793273,
      "learning_rate": 3.370397324380453e-05,
      "loss": 0.3933,
      "step": 2452
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.18812056518773732,
      "learning_rate": 3.363362649151661e-05,
      "loss": 0.4283,
      "step": 2453
    },
    {
      "epoch": 1.44,
      "grad_norm": 0.1832072022873427,
      "learning_rate": 3.356333838475788e-05,
      "loss": 0.4242,
      "step": 2454
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1862514942939368,
      "learning_rate": 3.349310898563928e-05,
      "loss": 0.4161,
      "step": 2455
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1855293596823939,
      "learning_rate": 3.342293835621999e-05,
      "loss": 0.3813,
      "step": 2456
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.17457692598284152,
      "learning_rate": 3.335282655850727e-05,
      "loss": 0.3526,
      "step": 2457
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.2221232547795512,
      "learning_rate": 3.328277365445621e-05,
      "loss": 0.4092,
      "step": 2458
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.18556918217449087,
      "learning_rate": 3.321277970597013e-05,
      "loss": 0.4045,
      "step": 2459
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1849813046141209,
      "learning_rate": 3.314284477490005e-05,
      "loss": 0.4036,
      "step": 2460
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.2052380403966287,
      "learning_rate": 3.307296892304496e-05,
      "loss": 0.454,
      "step": 2461
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1725542429051671,
      "learning_rate": 3.300315221215149e-05,
      "loss": 0.3752,
      "step": 2462
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1856827963688142,
      "learning_rate": 3.293339470391416e-05,
      "loss": 0.3819,
      "step": 2463
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.19290279871871277,
      "learning_rate": 3.286369645997517e-05,
      "loss": 0.4052,
      "step": 2464
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1905484508500894,
      "learning_rate": 3.279405754192419e-05,
      "loss": 0.3614,
      "step": 2465
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1853102955289528,
      "learning_rate": 3.2724478011298655e-05,
      "loss": 0.3868,
      "step": 2466
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1883623687154909,
      "learning_rate": 3.265495792958341e-05,
      "loss": 0.3986,
      "step": 2467
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1881091390523702,
      "learning_rate": 3.2585497358210816e-05,
      "loss": 0.4036,
      "step": 2468
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1762018240680343,
      "learning_rate": 3.2516096358560635e-05,
      "loss": 0.3782,
      "step": 2469
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.18259210530113523,
      "learning_rate": 3.244675499196e-05,
      "loss": 0.3968,
      "step": 2470
    },
    {
      "epoch": 1.45,
      "grad_norm": 0.1876496783835084,
      "learning_rate": 3.2377473319683284e-05,
      "loss": 0.4286,
      "step": 2471
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.16527016945269227,
      "learning_rate": 3.2308251402952184e-05,
      "loss": 0.3237,
      "step": 2472
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.17824915166116562,
      "learning_rate": 3.22390893029356e-05,
      "loss": 0.3884,
      "step": 2473
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.17769070135048026,
      "learning_rate": 3.216998708074948e-05,
      "loss": 0.3859,
      "step": 2474
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.20659366775907526,
      "learning_rate": 3.2100944797456946e-05,
      "loss": 0.3727,
      "step": 2475
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.1842302165987917,
      "learning_rate": 3.2031962514068135e-05,
      "loss": 0.3815,
      "step": 2476
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.1795355173432193,
      "learning_rate": 3.196304029154017e-05,
      "loss": 0.3929,
      "step": 2477
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.20144333980424745,
      "learning_rate": 3.189417819077708e-05,
      "loss": 0.4487,
      "step": 2478
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.1803881397904987,
      "learning_rate": 3.182537627262977e-05,
      "loss": 0.4067,
      "step": 2479
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.18559576834927588,
      "learning_rate": 3.175663459789602e-05,
      "loss": 0.4021,
      "step": 2480
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.18096980466872054,
      "learning_rate": 3.1687953227320255e-05,
      "loss": 0.3889,
      "step": 2481
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.18947934870893382,
      "learning_rate": 3.161933222159371e-05,
      "loss": 0.4185,
      "step": 2482
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.17612052321661906,
      "learning_rate": 3.155077164135428e-05,
      "loss": 0.3926,
      "step": 2483
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.18156449422153734,
      "learning_rate": 3.148227154718638e-05,
      "loss": 0.3947,
      "step": 2484
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.18913021680801503,
      "learning_rate": 3.141383199962106e-05,
      "loss": 0.4201,
      "step": 2485
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.17258485658517256,
      "learning_rate": 3.134545305913582e-05,
      "loss": 0.3837,
      "step": 2486
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.18993701372256167,
      "learning_rate": 3.1277134786154635e-05,
      "loss": 0.4257,
      "step": 2487
    },
    {
      "epoch": 1.46,
      "grad_norm": 0.18064607120490733,
      "learning_rate": 3.120887724104786e-05,
      "loss": 0.419,
      "step": 2488
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.23587919214839248,
      "learning_rate": 3.114068048413218e-05,
      "loss": 0.4105,
      "step": 2489
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.18212465867115668,
      "learning_rate": 3.107254457567059e-05,
      "loss": 0.4004,
      "step": 2490
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.18614735503772217,
      "learning_rate": 3.100446957587224e-05,
      "loss": 0.3708,
      "step": 2491
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.17980373001984692,
      "learning_rate": 3.093645554489254e-05,
      "loss": 0.4045,
      "step": 2492
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.18516336402362785,
      "learning_rate": 3.0868502542833014e-05,
      "loss": 0.4186,
      "step": 2493
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.19077688542561766,
      "learning_rate": 3.080061062974119e-05,
      "loss": 0.3711,
      "step": 2494
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.18723506971423085,
      "learning_rate": 3.073277986561064e-05,
      "loss": 0.4253,
      "step": 2495
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.17887609250397457,
      "learning_rate": 3.066501031038104e-05,
      "loss": 0.3347,
      "step": 2496
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.20234366752403984,
      "learning_rate": 3.059730202393773e-05,
      "loss": 0.385,
      "step": 2497
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.17325627043749642,
      "learning_rate": 3.052965506611212e-05,
      "loss": 0.3711,
      "step": 2498
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.1738175346182311,
      "learning_rate": 3.0462069496681333e-05,
      "loss": 0.3447,
      "step": 2499
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.2293883188659455,
      "learning_rate": 3.0394545375368212e-05,
      "loss": 0.3967,
      "step": 2500
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.18780544734321805,
      "learning_rate": 3.0327082761841376e-05,
      "loss": 0.3927,
      "step": 2501
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.21001427845942877,
      "learning_rate": 3.0259681715715094e-05,
      "loss": 0.425,
      "step": 2502
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.1983145619953598,
      "learning_rate": 3.019234229654909e-05,
      "loss": 0.4236,
      "step": 2503
    },
    {
      "epoch": 1.47,
      "grad_norm": 0.20394563948631428,
      "learning_rate": 3.012506456384885e-05,
      "loss": 0.3835,
      "step": 2504
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.2256167643229903,
      "learning_rate": 3.0057848577065194e-05,
      "loss": 0.4114,
      "step": 2505
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.18958440413331032,
      "learning_rate": 2.9990694395594454e-05,
      "loss": 0.4128,
      "step": 2506
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.21619989441807325,
      "learning_rate": 2.9923602078778267e-05,
      "loss": 0.4396,
      "step": 2507
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.18543165865572497,
      "learning_rate": 2.9856571685903678e-05,
      "loss": 0.4119,
      "step": 2508
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.18652376800981768,
      "learning_rate": 2.9789603276203006e-05,
      "loss": 0.4108,
      "step": 2509
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.1877506760116236,
      "learning_rate": 2.972269690885372e-05,
      "loss": 0.3733,
      "step": 2510
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.20117391667012713,
      "learning_rate": 2.9655852642978567e-05,
      "loss": 0.4053,
      "step": 2511
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.19049978481040805,
      "learning_rate": 2.9589070537645346e-05,
      "loss": 0.3695,
      "step": 2512
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.19377859226154356,
      "learning_rate": 2.952235065186697e-05,
      "loss": 0.4216,
      "step": 2513
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.1877033492781889,
      "learning_rate": 2.945569304460136e-05,
      "loss": 0.3972,
      "step": 2514
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.17561774343820402,
      "learning_rate": 2.9389097774751416e-05,
      "loss": 0.3944,
      "step": 2515
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.23612382395272608,
      "learning_rate": 2.9322564901164872e-05,
      "loss": 0.4192,
      "step": 2516
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.1804117654771822,
      "learning_rate": 2.9256094482634433e-05,
      "loss": 0.3803,
      "step": 2517
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.1871646527168998,
      "learning_rate": 2.9189686577897547e-05,
      "loss": 0.363,
      "step": 2518
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.18268971436832296,
      "learning_rate": 2.9123341245636494e-05,
      "loss": 0.3766,
      "step": 2519
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.19640957670522896,
      "learning_rate": 2.9057058544478144e-05,
      "loss": 0.381,
      "step": 2520
    },
    {
      "epoch": 1.48,
      "grad_norm": 0.1806292879024908,
      "learning_rate": 2.8990838532994104e-05,
      "loss": 0.358,
      "step": 2521
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.18220069066196212,
      "learning_rate": 2.8924681269700582e-05,
      "loss": 0.3923,
      "step": 2522
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.1982043765920388,
      "learning_rate": 2.885858681305832e-05,
      "loss": 0.4287,
      "step": 2523
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.17306560605324575,
      "learning_rate": 2.8792555221472573e-05,
      "loss": 0.3807,
      "step": 2524
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.21490036640126703,
      "learning_rate": 2.8726586553293043e-05,
      "loss": 0.3915,
      "step": 2525
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.1969710728166032,
      "learning_rate": 2.8660680866813782e-05,
      "loss": 0.412,
      "step": 2526
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.18990724568801395,
      "learning_rate": 2.8594838220273256e-05,
      "loss": 0.3688,
      "step": 2527
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.21050184435865085,
      "learning_rate": 2.8529058671854224e-05,
      "loss": 0.414,
      "step": 2528
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.19588995136687634,
      "learning_rate": 2.846334227968359e-05,
      "loss": 0.3833,
      "step": 2529
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.18766563465171782,
      "learning_rate": 2.8397689101832558e-05,
      "loss": 0.414,
      "step": 2530
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.1837863513894188,
      "learning_rate": 2.8332099196316386e-05,
      "loss": 0.3927,
      "step": 2531
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.18099305950603425,
      "learning_rate": 2.8266572621094588e-05,
      "loss": 0.3472,
      "step": 2532
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.18324457098340038,
      "learning_rate": 2.8201109434070482e-05,
      "loss": 0.3938,
      "step": 2533
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.18010794244730502,
      "learning_rate": 2.8135709693091516e-05,
      "loss": 0.3829,
      "step": 2534
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.18178616693007432,
      "learning_rate": 2.807037345594907e-05,
      "loss": 0.3818,
      "step": 2535
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.18482236442092914,
      "learning_rate": 2.8005100780378323e-05,
      "loss": 0.4043,
      "step": 2536
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.1690501700146767,
      "learning_rate": 2.793989172405839e-05,
      "loss": 0.3519,
      "step": 2537
    },
    {
      "epoch": 1.49,
      "grad_norm": 0.16886286326694724,
      "learning_rate": 2.7874746344612114e-05,
      "loss": 0.367,
      "step": 2538
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.1876546785525707,
      "learning_rate": 2.780966469960602e-05,
      "loss": 0.3876,
      "step": 2539
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.2004024323089618,
      "learning_rate": 2.7744646846550448e-05,
      "loss": 0.4066,
      "step": 2540
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.1785253701604336,
      "learning_rate": 2.7679692842899284e-05,
      "loss": 0.4027,
      "step": 2541
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.1860290157709305,
      "learning_rate": 2.7614802746049938e-05,
      "loss": 0.395,
      "step": 2542
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.16775418599650602,
      "learning_rate": 2.7549976613343452e-05,
      "loss": 0.3606,
      "step": 2543
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.18567178220266478,
      "learning_rate": 2.7485214502064316e-05,
      "loss": 0.389,
      "step": 2544
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.20351953451192095,
      "learning_rate": 2.7420516469440384e-05,
      "loss": 0.4492,
      "step": 2545
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.18873302654643795,
      "learning_rate": 2.7355882572642944e-05,
      "loss": 0.427,
      "step": 2546
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.2044457762700013,
      "learning_rate": 2.7291312868786624e-05,
      "loss": 0.3937,
      "step": 2547
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.24462628450748491,
      "learning_rate": 2.7226807414929278e-05,
      "loss": 0.474,
      "step": 2548
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.16432682018058387,
      "learning_rate": 2.7162366268072026e-05,
      "loss": 0.3517,
      "step": 2549
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.19392679244276614,
      "learning_rate": 2.7097989485159137e-05,
      "loss": 0.429,
      "step": 2550
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.16787805427224112,
      "learning_rate": 2.703367712307804e-05,
      "loss": 0.3644,
      "step": 2551
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.20648943423458793,
      "learning_rate": 2.696942923865915e-05,
      "loss": 0.377,
      "step": 2552
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.19572324346821007,
      "learning_rate": 2.6905245888676012e-05,
      "loss": 0.409,
      "step": 2553
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.6184991261797215,
      "learning_rate": 2.68411271298451e-05,
      "loss": 0.4096,
      "step": 2554
    },
    {
      "epoch": 1.5,
      "grad_norm": 0.18889426170403964,
      "learning_rate": 2.6777073018825772e-05,
      "loss": 0.4038,
      "step": 2555
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.18850468933905212,
      "learning_rate": 2.6713083612220314e-05,
      "loss": 0.3848,
      "step": 2556
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.18923419740292627,
      "learning_rate": 2.6649158966573817e-05,
      "loss": 0.377,
      "step": 2557
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.20595259493204982,
      "learning_rate": 2.6585299138374143e-05,
      "loss": 0.3942,
      "step": 2558
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.20110742094369213,
      "learning_rate": 2.6521504184051892e-05,
      "loss": 0.4843,
      "step": 2559
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.19875067878685518,
      "learning_rate": 2.6457774159980307e-05,
      "loss": 0.3911,
      "step": 2560
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.18607601583404562,
      "learning_rate": 2.639410912247531e-05,
      "loss": 0.402,
      "step": 2561
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.19804026395342145,
      "learning_rate": 2.63305091277953e-05,
      "loss": 0.3701,
      "step": 2562
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.1851894040551206,
      "learning_rate": 2.6266974232141285e-05,
      "loss": 0.4279,
      "step": 2563
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.20060869681257895,
      "learning_rate": 2.620350449165676e-05,
      "loss": 0.3891,
      "step": 2564
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.17879505755560798,
      "learning_rate": 2.6140099962427533e-05,
      "loss": 0.3618,
      "step": 2565
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.20607338545873524,
      "learning_rate": 2.6076760700481893e-05,
      "loss": 0.4254,
      "step": 2566
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.18787894153970464,
      "learning_rate": 2.6013486761790427e-05,
      "loss": 0.3721,
      "step": 2567
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.18901310393891138,
      "learning_rate": 2.5950278202265997e-05,
      "loss": 0.383,
      "step": 2568
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.20353516701869728,
      "learning_rate": 2.588713507776368e-05,
      "loss": 0.3866,
      "step": 2569
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.1959203531342037,
      "learning_rate": 2.582405744408076e-05,
      "loss": 0.3676,
      "step": 2570
    },
    {
      "epoch": 1.51,
      "grad_norm": 0.1728488366034335,
      "learning_rate": 2.5761045356956593e-05,
      "loss": 0.3619,
      "step": 2571
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.17762490554986549,
      "learning_rate": 2.5698098872072652e-05,
      "loss": 0.3712,
      "step": 2572
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.1943025267948541,
      "learning_rate": 2.5635218045052477e-05,
      "loss": 0.4136,
      "step": 2573
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.17330976704038878,
      "learning_rate": 2.5572402931461493e-05,
      "loss": 0.393,
      "step": 2574
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.18585903169270396,
      "learning_rate": 2.5509653586807125e-05,
      "loss": 0.3699,
      "step": 2575
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.17889504516326968,
      "learning_rate": 2.5446970066538656e-05,
      "loss": 0.3894,
      "step": 2576
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.19130109369636317,
      "learning_rate": 2.53843524260473e-05,
      "loss": 0.4073,
      "step": 2577
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.19114939911466414,
      "learning_rate": 2.5321800720665856e-05,
      "loss": 0.4145,
      "step": 2578
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.17470921774314174,
      "learning_rate": 2.5259315005669027e-05,
      "loss": 0.3453,
      "step": 2579
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.18014253092191793,
      "learning_rate": 2.5196895336273163e-05,
      "loss": 0.3948,
      "step": 2580
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.19376171678954113,
      "learning_rate": 2.513454176763618e-05,
      "loss": 0.3612,
      "step": 2581
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.193545613814365,
      "learning_rate": 2.507225435485766e-05,
      "loss": 0.4536,
      "step": 2582
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.1816454621102984,
      "learning_rate": 2.501003315297875e-05,
      "loss": 0.4009,
      "step": 2583
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.18568315961597195,
      "learning_rate": 2.4947878216981945e-05,
      "loss": 0.3758,
      "step": 2584
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.20625822474781916,
      "learning_rate": 2.4885789601791364e-05,
      "loss": 0.4358,
      "step": 2585
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.19717438783756117,
      "learning_rate": 2.4823767362272455e-05,
      "loss": 0.3703,
      "step": 2586
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.1857784997413179,
      "learning_rate": 2.4761811553231916e-05,
      "loss": 0.3822,
      "step": 2587
    },
    {
      "epoch": 1.52,
      "grad_norm": 0.20221940136029923,
      "learning_rate": 2.469992222941787e-05,
      "loss": 0.401,
      "step": 2588
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.1933568592568705,
      "learning_rate": 2.4638099445519636e-05,
      "loss": 0.4101,
      "step": 2589
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.17721112883493306,
      "learning_rate": 2.4576343256167766e-05,
      "loss": 0.3718,
      "step": 2590
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.20592471637280918,
      "learning_rate": 2.4514653715933876e-05,
      "loss": 0.4132,
      "step": 2591
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.19568943375143438,
      "learning_rate": 2.4453030879330784e-05,
      "loss": 0.4079,
      "step": 2592
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.19080954173046305,
      "learning_rate": 2.4391474800812332e-05,
      "loss": 0.3834,
      "step": 2593
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.17854767229872442,
      "learning_rate": 2.4329985534773358e-05,
      "loss": 0.4174,
      "step": 2594
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.19135862577144352,
      "learning_rate": 2.4268563135549682e-05,
      "loss": 0.3876,
      "step": 2595
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.18577643412648345,
      "learning_rate": 2.4207207657418042e-05,
      "loss": 0.3792,
      "step": 2596
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.1829599821421232,
      "learning_rate": 2.4145919154595975e-05,
      "loss": 0.3881,
      "step": 2597
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.2054197464506424,
      "learning_rate": 2.4084697681241906e-05,
      "loss": 0.399,
      "step": 2598
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.1789713792446189,
      "learning_rate": 2.402354329145504e-05,
      "loss": 0.3907,
      "step": 2599
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.1880784992003119,
      "learning_rate": 2.3962456039275206e-05,
      "loss": 0.4189,
      "step": 2600
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.1954510623029123,
      "learning_rate": 2.3901435978682986e-05,
      "loss": 0.4032,
      "step": 2601
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.18452853553069323,
      "learning_rate": 2.3840483163599582e-05,
      "loss": 0.3895,
      "step": 2602
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.19828511642616103,
      "learning_rate": 2.3779597647886753e-05,
      "loss": 0.3709,
      "step": 2603
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.17953056383513943,
      "learning_rate": 2.37187794853468e-05,
      "loss": 0.3992,
      "step": 2604
    },
    {
      "epoch": 1.53,
      "grad_norm": 0.2031943276255951,
      "learning_rate": 2.3658028729722502e-05,
      "loss": 0.437,
      "step": 2605
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.19738049305029495,
      "learning_rate": 2.3597345434697093e-05,
      "loss": 0.393,
      "step": 2606
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.1849596059995837,
      "learning_rate": 2.3536729653894118e-05,
      "loss": 0.3803,
      "step": 2607
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.20204811834390532,
      "learning_rate": 2.3476181440877564e-05,
      "loss": 0.4105,
      "step": 2608
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.24667469022404487,
      "learning_rate": 2.341570084915168e-05,
      "loss": 0.4185,
      "step": 2609
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.20269213137231093,
      "learning_rate": 2.3355287932160884e-05,
      "loss": 0.4583,
      "step": 2610
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.19877434103859673,
      "learning_rate": 2.329494274328988e-05,
      "loss": 0.4421,
      "step": 2611
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.18687566792579868,
      "learning_rate": 2.3234665335863526e-05,
      "loss": 0.3901,
      "step": 2612
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.19940981465040128,
      "learning_rate": 2.3174455763146717e-05,
      "loss": 0.4099,
      "step": 2613
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.1823677638592108,
      "learning_rate": 2.3114314078344478e-05,
      "loss": 0.3668,
      "step": 2614
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.19369581454319543,
      "learning_rate": 2.3054240334601805e-05,
      "loss": 0.3986,
      "step": 2615
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.19415035095905245,
      "learning_rate": 2.2994234585003638e-05,
      "loss": 0.4157,
      "step": 2616
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.1957312431961838,
      "learning_rate": 2.2934296882574847e-05,
      "loss": 0.4211,
      "step": 2617
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.22121291068701435,
      "learning_rate": 2.287442728028021e-05,
      "loss": 0.4166,
      "step": 2618
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.17201804281961042,
      "learning_rate": 2.2814625831024318e-05,
      "loss": 0.3583,
      "step": 2619
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.18485977615841773,
      "learning_rate": 2.2754892587651434e-05,
      "loss": 0.3821,
      "step": 2620
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.17947272170945186,
      "learning_rate": 2.2695227602945702e-05,
      "loss": 0.3834,
      "step": 2621
    },
    {
      "epoch": 1.54,
      "grad_norm": 0.19177743756919843,
      "learning_rate": 2.2635630929630904e-05,
      "loss": 0.392,
      "step": 2622
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.18192144036137656,
      "learning_rate": 2.2576102620370364e-05,
      "loss": 0.3574,
      "step": 2623
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.21560141298971364,
      "learning_rate": 2.251664272776709e-05,
      "loss": 0.4083,
      "step": 2624
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.18320008573103155,
      "learning_rate": 2.2457251304363646e-05,
      "loss": 0.4345,
      "step": 2625
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.17861793380088878,
      "learning_rate": 2.2397928402641988e-05,
      "loss": 0.3803,
      "step": 2626
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.2066675002043351,
      "learning_rate": 2.2338674075023615e-05,
      "loss": 0.4367,
      "step": 2627
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.18303979352935243,
      "learning_rate": 2.2279488373869416e-05,
      "loss": 0.3914,
      "step": 2628
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.18526974271163213,
      "learning_rate": 2.2220371351479607e-05,
      "loss": 0.3683,
      "step": 2629
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.19396331959195012,
      "learning_rate": 2.2161323060093742e-05,
      "loss": 0.432,
      "step": 2630
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.17699796388171363,
      "learning_rate": 2.2102343551890627e-05,
      "loss": 0.332,
      "step": 2631
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.1840940308979838,
      "learning_rate": 2.2043432878988313e-05,
      "loss": 0.3735,
      "step": 2632
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.1833386406759228,
      "learning_rate": 2.198459109344395e-05,
      "loss": 0.3718,
      "step": 2633
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.20277352966061787,
      "learning_rate": 2.1925818247253893e-05,
      "loss": 0.389,
      "step": 2634
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.19414136777804827,
      "learning_rate": 2.186711439235356e-05,
      "loss": 0.3913,
      "step": 2635
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.19538391282392667,
      "learning_rate": 2.180847958061737e-05,
      "loss": 0.4474,
      "step": 2636
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.17521825056206217,
      "learning_rate": 2.174991386385876e-05,
      "loss": 0.3753,
      "step": 2637
    },
    {
      "epoch": 1.55,
      "grad_norm": 0.20633732426596804,
      "learning_rate": 2.169141729383011e-05,
      "loss": 0.393,
      "step": 2638
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.1837522109909096,
      "learning_rate": 2.163298992222269e-05,
      "loss": 0.4211,
      "step": 2639
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.1916084114095356,
      "learning_rate": 2.1574631800666635e-05,
      "loss": 0.3848,
      "step": 2640
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.19292067652803527,
      "learning_rate": 2.1516342980730885e-05,
      "loss": 0.4343,
      "step": 2641
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.18559141434436843,
      "learning_rate": 2.145812351392309e-05,
      "loss": 0.3903,
      "step": 2642
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.18681580264278325,
      "learning_rate": 2.1399973451689682e-05,
      "loss": 0.4171,
      "step": 2643
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.183518946539565,
      "learning_rate": 2.1341892845415766e-05,
      "loss": 0.4043,
      "step": 2644
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.18031957343078933,
      "learning_rate": 2.1283881746424982e-05,
      "loss": 0.3932,
      "step": 2645
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.19277340308330332,
      "learning_rate": 2.1225940205979645e-05,
      "loss": 0.3709,
      "step": 2646
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.1793216787197076,
      "learning_rate": 2.1168068275280562e-05,
      "loss": 0.3646,
      "step": 2647
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.18616507644272884,
      "learning_rate": 2.111026600546704e-05,
      "loss": 0.3917,
      "step": 2648
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.20656052185088922,
      "learning_rate": 2.1052533447616817e-05,
      "loss": 0.4195,
      "step": 2649
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.1871278424966707,
      "learning_rate": 2.0994870652746045e-05,
      "loss": 0.3503,
      "step": 2650
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.17727320184190487,
      "learning_rate": 2.093727767180923e-05,
      "loss": 0.3923,
      "step": 2651
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.20761253440270935,
      "learning_rate": 2.087975455569915e-05,
      "loss": 0.4767,
      "step": 2652
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.18825241614671864,
      "learning_rate": 2.0822301355246877e-05,
      "loss": 0.3972,
      "step": 2653
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.20401644685323356,
      "learning_rate": 2.0764918121221722e-05,
      "loss": 0.4012,
      "step": 2654
    },
    {
      "epoch": 1.56,
      "grad_norm": 0.17507114404738788,
      "learning_rate": 2.0707604904331103e-05,
      "loss": 0.3586,
      "step": 2655
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.18781443778443094,
      "learning_rate": 2.0650361755220625e-05,
      "loss": 0.3892,
      "step": 2656
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.1709386563523762,
      "learning_rate": 2.0593188724473956e-05,
      "loss": 0.344,
      "step": 2657
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.1889048966807141,
      "learning_rate": 2.053608586261282e-05,
      "loss": 0.375,
      "step": 2658
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.18455808123424355,
      "learning_rate": 2.04790532200969e-05,
      "loss": 0.3713,
      "step": 2659
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.1754654094920607,
      "learning_rate": 2.042209084732387e-05,
      "loss": 0.3658,
      "step": 2660
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.18022772531742068,
      "learning_rate": 2.0365198794629303e-05,
      "loss": 0.3729,
      "step": 2661
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.19023673110705866,
      "learning_rate": 2.030837711228657e-05,
      "loss": 0.4075,
      "step": 2662
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.19573207608030416,
      "learning_rate": 2.0251625850506927e-05,
      "loss": 0.4093,
      "step": 2663
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.19666613310850647,
      "learning_rate": 2.0194945059439417e-05,
      "loss": 0.4187,
      "step": 2664
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.19091841391757786,
      "learning_rate": 2.01383347891707e-05,
      "loss": 0.3908,
      "step": 2665
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.20185751082030062,
      "learning_rate": 2.0081795089725253e-05,
      "loss": 0.4204,
      "step": 2666
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.19302561622447217,
      "learning_rate": 2.0025326011065148e-05,
      "loss": 0.4143,
      "step": 2667
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.18613507094571158,
      "learning_rate": 1.996892760308998e-05,
      "loss": 0.3884,
      "step": 2668
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.18617443797375527,
      "learning_rate": 1.9912599915637e-05,
      "loss": 0.3809,
      "step": 2669
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.19085092433993356,
      "learning_rate": 1.9856342998480913e-05,
      "loss": 0.3633,
      "step": 2670
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.18701970725839678,
      "learning_rate": 1.9800156901333855e-05,
      "loss": 0.4068,
      "step": 2671
    },
    {
      "epoch": 1.57,
      "grad_norm": 0.1974135928674328,
      "learning_rate": 1.9744041673845448e-05,
      "loss": 0.4193,
      "step": 2672
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.19094778297421405,
      "learning_rate": 1.9687997365602663e-05,
      "loss": 0.4151,
      "step": 2673
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.19765550896862327,
      "learning_rate": 1.963202402612978e-05,
      "loss": 0.4001,
      "step": 2674
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.1781031913824185,
      "learning_rate": 1.9576121704888396e-05,
      "loss": 0.3829,
      "step": 2675
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.19762765974127958,
      "learning_rate": 1.9520290451277358e-05,
      "loss": 0.3953,
      "step": 2676
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.18030388133141192,
      "learning_rate": 1.946453031463269e-05,
      "loss": 0.3794,
      "step": 2677
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.19733771540385325,
      "learning_rate": 1.9408841344227547e-05,
      "loss": 0.426,
      "step": 2678
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.18984191947112966,
      "learning_rate": 1.9353223589272252e-05,
      "loss": 0.3906,
      "step": 2679
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.1901537855371303,
      "learning_rate": 1.92976770989142e-05,
      "loss": 0.3756,
      "step": 2680
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.16848529460481557,
      "learning_rate": 1.9242201922237736e-05,
      "loss": 0.3277,
      "step": 2681
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.18134548951153853,
      "learning_rate": 1.918679810826427e-05,
      "loss": 0.3688,
      "step": 2682
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.18559056941559682,
      "learning_rate": 1.91314657059521e-05,
      "loss": 0.3886,
      "step": 2683
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.1786840191849677,
      "learning_rate": 1.907620476419645e-05,
      "loss": 0.3788,
      "step": 2684
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.1994322658620708,
      "learning_rate": 1.9021015331829396e-05,
      "loss": 0.3893,
      "step": 2685
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.1817868235867725,
      "learning_rate": 1.896589745761982e-05,
      "loss": 0.3957,
      "step": 2686
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.17602823761099434,
      "learning_rate": 1.891085119027334e-05,
      "loss": 0.3554,
      "step": 2687
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.19267039852977308,
      "learning_rate": 1.885587657843232e-05,
      "loss": 0.4044,
      "step": 2688
    },
    {
      "epoch": 1.58,
      "grad_norm": 0.18301072528946535,
      "learning_rate": 1.8800973670675827e-05,
      "loss": 0.358,
      "step": 2689
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.19200649093878824,
      "learning_rate": 1.874614251551957e-05,
      "loss": 0.3852,
      "step": 2690
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.17449134464711386,
      "learning_rate": 1.8691383161415764e-05,
      "loss": 0.3737,
      "step": 2691
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.19645308745000037,
      "learning_rate": 1.8636695656753278e-05,
      "loss": 0.4098,
      "step": 2692
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.18024402249202065,
      "learning_rate": 1.8582080049857465e-05,
      "loss": 0.3609,
      "step": 2693
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.1908219232395505,
      "learning_rate": 1.8527536388990106e-05,
      "loss": 0.4077,
      "step": 2694
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.18326557695226708,
      "learning_rate": 1.8473064722349453e-05,
      "loss": 0.3833,
      "step": 2695
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.18452653482256087,
      "learning_rate": 1.841866509807013e-05,
      "loss": 0.3728,
      "step": 2696
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.17820866873765373,
      "learning_rate": 1.8364337564223057e-05,
      "loss": 0.3632,
      "step": 2697
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.18998043544259907,
      "learning_rate": 1.831008216881548e-05,
      "loss": 0.4227,
      "step": 2698
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.19715562552177654,
      "learning_rate": 1.8255898959790953e-05,
      "loss": 0.4085,
      "step": 2699
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.18232780274953944,
      "learning_rate": 1.820178798502913e-05,
      "loss": 0.3419,
      "step": 2700
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.17608648085623013,
      "learning_rate": 1.8147749292345917e-05,
      "loss": 0.344,
      "step": 2701
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.1773025116918539,
      "learning_rate": 1.809378292949333e-05,
      "loss": 0.3661,
      "step": 2702
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.20513926640304037,
      "learning_rate": 1.8039888944159444e-05,
      "loss": 0.421,
      "step": 2703
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.18068919279336962,
      "learning_rate": 1.798606738396843e-05,
      "loss": 0.3815,
      "step": 2704
    },
    {
      "epoch": 1.59,
      "grad_norm": 0.17767261806592177,
      "learning_rate": 1.79323182964804e-05,
      "loss": 0.3626,
      "step": 2705
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.18267825701494264,
      "learning_rate": 1.787864172919147e-05,
      "loss": 0.3943,
      "step": 2706
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.19565515278454887,
      "learning_rate": 1.7825037729533632e-05,
      "loss": 0.3804,
      "step": 2707
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.1985124018586052,
      "learning_rate": 1.7771506344874778e-05,
      "loss": 0.387,
      "step": 2708
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.18707453082209516,
      "learning_rate": 1.7718047622518652e-05,
      "loss": 0.3809,
      "step": 2709
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.18965578493204102,
      "learning_rate": 1.7664661609704704e-05,
      "loss": 0.3993,
      "step": 2710
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.1963544372024192,
      "learning_rate": 1.761134835360826e-05,
      "loss": 0.4351,
      "step": 2711
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.1988795587027005,
      "learning_rate": 1.755810790134029e-05,
      "loss": 0.4362,
      "step": 2712
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.1842729514301608,
      "learning_rate": 1.750494029994737e-05,
      "loss": 0.356,
      "step": 2713
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.19009932018603531,
      "learning_rate": 1.745184559641181e-05,
      "loss": 0.4163,
      "step": 2714
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.1759133002009477,
      "learning_rate": 1.7398823837651447e-05,
      "loss": 0.354,
      "step": 2715
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.18882431184786572,
      "learning_rate": 1.7345875070519624e-05,
      "loss": 0.3412,
      "step": 2716
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.19579173000957384,
      "learning_rate": 1.729299934180525e-05,
      "loss": 0.3995,
      "step": 2717
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.19977064472735956,
      "learning_rate": 1.7240196698232657e-05,
      "loss": 0.4335,
      "step": 2718
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.18971473912529013,
      "learning_rate": 1.7187467186461626e-05,
      "loss": 0.406,
      "step": 2719
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.17812152081411728,
      "learning_rate": 1.713481085308728e-05,
      "loss": 0.3564,
      "step": 2720
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.2017249184456053,
      "learning_rate": 1.708222774464008e-05,
      "loss": 0.3942,
      "step": 2721
    },
    {
      "epoch": 1.6,
      "grad_norm": 0.19519933753526805,
      "learning_rate": 1.702971790758582e-05,
      "loss": 0.3991,
      "step": 2722
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.18644811213473475,
      "learning_rate": 1.6977281388325472e-05,
      "loss": 0.4005,
      "step": 2723
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.18195473908167434,
      "learning_rate": 1.6924918233195286e-05,
      "loss": 0.376,
      "step": 2724
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.1965395495642495,
      "learning_rate": 1.687262848846668e-05,
      "loss": 0.4223,
      "step": 2725
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.1778406839404565,
      "learning_rate": 1.6820412200346147e-05,
      "loss": 0.3916,
      "step": 2726
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.19717405460574805,
      "learning_rate": 1.6768269414975314e-05,
      "loss": 0.4256,
      "step": 2727
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.17888651432068586,
      "learning_rate": 1.6716200178430852e-05,
      "loss": 0.3389,
      "step": 2728
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.2047795225110605,
      "learning_rate": 1.6664204536724436e-05,
      "loss": 0.3886,
      "step": 2729
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.18488557166519895,
      "learning_rate": 1.6612282535802716e-05,
      "loss": 0.3952,
      "step": 2730
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.19387533792584713,
      "learning_rate": 1.656043422154725e-05,
      "loss": 0.4038,
      "step": 2731
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.19855401567331812,
      "learning_rate": 1.6508659639774503e-05,
      "loss": 0.3573,
      "step": 2732
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.1910322360488063,
      "learning_rate": 1.6456958836235747e-05,
      "loss": 0.3986,
      "step": 2733
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.17910459908780496,
      "learning_rate": 1.64053318566171e-05,
      "loss": 0.3815,
      "step": 2734
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.1791557436007829,
      "learning_rate": 1.635377874653945e-05,
      "loss": 0.3654,
      "step": 2735
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.19730991105935478,
      "learning_rate": 1.6302299551558353e-05,
      "loss": 0.3962,
      "step": 2736
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.19454703085067263,
      "learning_rate": 1.6250894317164088e-05,
      "loss": 0.3852,
      "step": 2737
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.18825033606033495,
      "learning_rate": 1.6199563088781588e-05,
      "loss": 0.4103,
      "step": 2738
    },
    {
      "epoch": 1.61,
      "grad_norm": 0.17855372979030418,
      "learning_rate": 1.6148305911770377e-05,
      "loss": 0.3717,
      "step": 2739
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.17946602161319378,
      "learning_rate": 1.6097122831424538e-05,
      "loss": 0.3583,
      "step": 2740
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.18081197865723575,
      "learning_rate": 1.604601389297271e-05,
      "loss": 0.3771,
      "step": 2741
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.2597749579645915,
      "learning_rate": 1.5994979141577936e-05,
      "loss": 0.4466,
      "step": 2742
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.1819143066308158,
      "learning_rate": 1.5944018622337764e-05,
      "loss": 0.3431,
      "step": 2743
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.18034997883963755,
      "learning_rate": 1.5893132380284183e-05,
      "loss": 0.3839,
      "step": 2744
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.196898440986257,
      "learning_rate": 1.584232046038343e-05,
      "loss": 0.4312,
      "step": 2745
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.18466489312815543,
      "learning_rate": 1.5791582907536152e-05,
      "loss": 0.3577,
      "step": 2746
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.19649178998426725,
      "learning_rate": 1.5740919766577288e-05,
      "loss": 0.417,
      "step": 2747
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.22045623088641314,
      "learning_rate": 1.5690331082276023e-05,
      "loss": 0.3905,
      "step": 2748
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.19000235214368424,
      "learning_rate": 1.5639816899335645e-05,
      "loss": 0.4092,
      "step": 2749
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.1902486275954528,
      "learning_rate": 1.5589377262393735e-05,
      "loss": 0.3654,
      "step": 2750
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.19778769806066843,
      "learning_rate": 1.5539012216021954e-05,
      "loss": 0.3993,
      "step": 2751
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.3270150637834885,
      "learning_rate": 1.5488721804726003e-05,
      "loss": 0.3809,
      "step": 2752
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.18103395554668633,
      "learning_rate": 1.5438506072945703e-05,
      "loss": 0.3828,
      "step": 2753
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.18629332641729748,
      "learning_rate": 1.5388365065054845e-05,
      "loss": 0.365,
      "step": 2754
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.1912439937088145,
      "learning_rate": 1.533829882536121e-05,
      "loss": 0.4163,
      "step": 2755
    },
    {
      "epoch": 1.62,
      "grad_norm": 0.2031011683329402,
      "learning_rate": 1.5288307398106484e-05,
      "loss": 0.4449,
      "step": 2756
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.18874978222699812,
      "learning_rate": 1.5238390827466287e-05,
      "loss": 0.3684,
      "step": 2757
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.18504340910662923,
      "learning_rate": 1.5188549157550013e-05,
      "loss": 0.3854,
      "step": 2758
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.19077975170750153,
      "learning_rate": 1.5138782432400943e-05,
      "loss": 0.3871,
      "step": 2759
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.21232319504204727,
      "learning_rate": 1.50890906959961e-05,
      "loss": 0.459,
      "step": 2760
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.18086892611822036,
      "learning_rate": 1.5039473992246278e-05,
      "loss": 0.3655,
      "step": 2761
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.1973248762783153,
      "learning_rate": 1.4989932364995873e-05,
      "loss": 0.4215,
      "step": 2762
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.2036105383458429,
      "learning_rate": 1.4940465858023055e-05,
      "loss": 0.4068,
      "step": 2763
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.18528333902490204,
      "learning_rate": 1.4891074515039548e-05,
      "loss": 0.3973,
      "step": 2764
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.19995603824690625,
      "learning_rate": 1.4841758379690663e-05,
      "loss": 0.4255,
      "step": 2765
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.17787180969113067,
      "learning_rate": 1.479251749555527e-05,
      "loss": 0.3738,
      "step": 2766
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.1831308365493359,
      "learning_rate": 1.4743351906145741e-05,
      "loss": 0.3912,
      "step": 2767
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.20430223291165012,
      "learning_rate": 1.4694261654907881e-05,
      "loss": 0.4028,
      "step": 2768
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.19756371486336502,
      "learning_rate": 1.4645246785220934e-05,
      "loss": 0.3881,
      "step": 2769
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.21343700040883345,
      "learning_rate": 1.4596307340397597e-05,
      "loss": 0.4174,
      "step": 2770
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.18574834851208513,
      "learning_rate": 1.45474433636838e-05,
      "loss": 0.3787,
      "step": 2771
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.186713913712865,
      "learning_rate": 1.4498654898258857e-05,
      "loss": 0.4028,
      "step": 2772
    },
    {
      "epoch": 1.63,
      "grad_norm": 0.17862363269208836,
      "learning_rate": 1.4449941987235371e-05,
      "loss": 0.3366,
      "step": 2773
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.17991971311499375,
      "learning_rate": 1.4401304673659143e-05,
      "loss": 0.385,
      "step": 2774
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.18266258486092432,
      "learning_rate": 1.4352743000509172e-05,
      "loss": 0.3758,
      "step": 2775
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.19932881267671296,
      "learning_rate": 1.4304257010697642e-05,
      "loss": 0.4001,
      "step": 2776
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.18660263876456015,
      "learning_rate": 1.4255846747069857e-05,
      "loss": 0.3784,
      "step": 2777
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.18446968617714987,
      "learning_rate": 1.4207512252404143e-05,
      "loss": 0.3762,
      "step": 2778
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.18536201861937218,
      "learning_rate": 1.4159253569411956e-05,
      "loss": 0.3468,
      "step": 2779
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.17535615409314131,
      "learning_rate": 1.4111070740737731e-05,
      "loss": 0.3398,
      "step": 2780
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.17771981382314816,
      "learning_rate": 1.406296380895883e-05,
      "loss": 0.3851,
      "step": 2781
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.19105270450058937,
      "learning_rate": 1.4014932816585602e-05,
      "loss": 0.4133,
      "step": 2782
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.1885733896714408,
      "learning_rate": 1.3966977806061277e-05,
      "loss": 0.3945,
      "step": 2783
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.18694162099039605,
      "learning_rate": 1.3919098819761922e-05,
      "loss": 0.4086,
      "step": 2784
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.19829924594841047,
      "learning_rate": 1.387129589999646e-05,
      "loss": 0.4231,
      "step": 2785
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.19892181327078298,
      "learning_rate": 1.3823569089006604e-05,
      "loss": 0.4179,
      "step": 2786
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.19725907955910138,
      "learning_rate": 1.3775918428966716e-05,
      "loss": 0.4141,
      "step": 2787
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.18349490528053952,
      "learning_rate": 1.372834396198397e-05,
      "loss": 0.3965,
      "step": 2788
    },
    {
      "epoch": 1.64,
      "grad_norm": 0.19253183211826913,
      "learning_rate": 1.3680845730098191e-05,
      "loss": 0.3932,
      "step": 2789
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.19353375854479396,
      "learning_rate": 1.3633423775281816e-05,
      "loss": 0.364,
      "step": 2790
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.18771329690536745,
      "learning_rate": 1.3586078139439851e-05,
      "loss": 0.4034,
      "step": 2791
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.192207218124855,
      "learning_rate": 1.3538808864409947e-05,
      "loss": 0.3893,
      "step": 2792
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.18658210647129536,
      "learning_rate": 1.3491615991962225e-05,
      "loss": 0.3801,
      "step": 2793
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.19794402673622702,
      "learning_rate": 1.3444499563799262e-05,
      "loss": 0.4126,
      "step": 2794
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.19194076122619697,
      "learning_rate": 1.339745962155613e-05,
      "loss": 0.4238,
      "step": 2795
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.19500823355157348,
      "learning_rate": 1.3350496206800334e-05,
      "loss": 0.4001,
      "step": 2796
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.1834243107917586,
      "learning_rate": 1.3303609361031655e-05,
      "loss": 0.3846,
      "step": 2797
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.1829659299315814,
      "learning_rate": 1.3256799125682317e-05,
      "loss": 0.3965,
      "step": 2798
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.18958779724287603,
      "learning_rate": 1.3210065542116812e-05,
      "loss": 0.387,
      "step": 2799
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.19480397222419207,
      "learning_rate": 1.316340865163188e-05,
      "loss": 0.3961,
      "step": 2800
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.1814371475093744,
      "learning_rate": 1.31168284954565e-05,
      "loss": 0.3711,
      "step": 2801
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.1899328058081375,
      "learning_rate": 1.3070325114751881e-05,
      "loss": 0.3997,
      "step": 2802
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.19259551695163712,
      "learning_rate": 1.3023898550611313e-05,
      "loss": 0.4041,
      "step": 2803
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.1825667974418399,
      "learning_rate": 1.297754884406025e-05,
      "loss": 0.3843,
      "step": 2804
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.18195055678377903,
      "learning_rate": 1.293127603605625e-05,
      "loss": 0.3875,
      "step": 2805
    },
    {
      "epoch": 1.65,
      "grad_norm": 0.18700493318379097,
      "learning_rate": 1.2885080167488905e-05,
      "loss": 0.3876,
      "step": 2806
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.18830079032859265,
      "learning_rate": 1.2838961279179762e-05,
      "loss": 0.3746,
      "step": 2807
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.1735496795197628,
      "learning_rate": 1.279291941188241e-05,
      "loss": 0.3856,
      "step": 2808
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.192993836978478,
      "learning_rate": 1.274695460628238e-05,
      "loss": 0.4133,
      "step": 2809
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.1840059334442372,
      "learning_rate": 1.2701066902997061e-05,
      "loss": 0.3622,
      "step": 2810
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.1823341646749537,
      "learning_rate": 1.2655256342575738e-05,
      "loss": 0.36,
      "step": 2811
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.1817364403500079,
      "learning_rate": 1.2609522965499553e-05,
      "loss": 0.3747,
      "step": 2812
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.17394778008068928,
      "learning_rate": 1.2563866812181357e-05,
      "loss": 0.391,
      "step": 2813
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.18598102335942115,
      "learning_rate": 1.2518287922965854e-05,
      "loss": 0.3709,
      "step": 2814
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.19552125120914168,
      "learning_rate": 1.2472786338129439e-05,
      "loss": 0.4518,
      "step": 2815
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.19033443568738462,
      "learning_rate": 1.2427362097880168e-05,
      "loss": 0.4108,
      "step": 2816
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.18646893524050182,
      "learning_rate": 1.238201524235778e-05,
      "loss": 0.3519,
      "step": 2817
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.17270013189629863,
      "learning_rate": 1.2336745811633643e-05,
      "loss": 0.3615,
      "step": 2818
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.1891145725571965,
      "learning_rate": 1.229155384571069e-05,
      "loss": 0.3945,
      "step": 2819
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.18665124633114744,
      "learning_rate": 1.224643938452339e-05,
      "loss": 0.3618,
      "step": 2820
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.20704796005105727,
      "learning_rate": 1.220140246793775e-05,
      "loss": 0.3846,
      "step": 2821
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.19746824679309988,
      "learning_rate": 1.215644313575126e-05,
      "loss": 0.4023,
      "step": 2822
    },
    {
      "epoch": 1.66,
      "grad_norm": 0.1866997380267449,
      "learning_rate": 1.2111561427692786e-05,
      "loss": 0.4096,
      "step": 2823
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.19478625576917388,
      "learning_rate": 1.2066757383422667e-05,
      "loss": 0.4302,
      "step": 2824
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.2001333286875977,
      "learning_rate": 1.2022031042532612e-05,
      "loss": 0.3908,
      "step": 2825
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.18803738328817945,
      "learning_rate": 1.1977382444545616e-05,
      "loss": 0.4035,
      "step": 2826
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.18664571940089442,
      "learning_rate": 1.1932811628915996e-05,
      "loss": 0.4117,
      "step": 2827
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.18372861978548846,
      "learning_rate": 1.1888318635029417e-05,
      "loss": 0.3907,
      "step": 2828
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.18380904565923992,
      "learning_rate": 1.1843903502202636e-05,
      "loss": 0.3882,
      "step": 2829
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.19358317009559659,
      "learning_rate": 1.1799566269683693e-05,
      "loss": 0.3826,
      "step": 2830
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.17867740875213367,
      "learning_rate": 1.1755306976651793e-05,
      "loss": 0.3682,
      "step": 2831
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.21554588740187808,
      "learning_rate": 1.1711125662217248e-05,
      "loss": 0.3772,
      "step": 2832
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.20421582491112336,
      "learning_rate": 1.1667022365421432e-05,
      "loss": 0.3831,
      "step": 2833
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.17962040426030446,
      "learning_rate": 1.1622997125236834e-05,
      "loss": 0.3695,
      "step": 2834
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.18661938671499903,
      "learning_rate": 1.1579049980566947e-05,
      "loss": 0.3953,
      "step": 2835
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.18399903543308602,
      "learning_rate": 1.153518097024624e-05,
      "loss": 0.3529,
      "step": 2836
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.20792985146502785,
      "learning_rate": 1.1491390133040147e-05,
      "loss": 0.4302,
      "step": 2837
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.18321892919222757,
      "learning_rate": 1.1447677507645049e-05,
      "loss": 0.4273,
      "step": 2838
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.21343818023745764,
      "learning_rate": 1.1404043132688157e-05,
      "loss": 0.4064,
      "step": 2839
    },
    {
      "epoch": 1.67,
      "grad_norm": 0.18841704880001608,
      "learning_rate": 1.1360487046727576e-05,
      "loss": 0.4103,
      "step": 2840
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.17912270675750835,
      "learning_rate": 1.1317009288252234e-05,
      "loss": 0.3582,
      "step": 2841
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.19542193773343466,
      "learning_rate": 1.1273609895681813e-05,
      "loss": 0.4183,
      "step": 2842
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.18945513945796663,
      "learning_rate": 1.1230288907366759e-05,
      "loss": 0.3921,
      "step": 2843
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.17852007031481354,
      "learning_rate": 1.118704636158826e-05,
      "loss": 0.3624,
      "step": 2844
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.18904710881927972,
      "learning_rate": 1.1143882296558162e-05,
      "loss": 0.3832,
      "step": 2845
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.20283633611433424,
      "learning_rate": 1.1100796750418963e-05,
      "loss": 0.4434,
      "step": 2846
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.19302674167647627,
      "learning_rate": 1.1057789761243776e-05,
      "loss": 0.3669,
      "step": 2847
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.20066283789878311,
      "learning_rate": 1.1014861367036322e-05,
      "loss": 0.4058,
      "step": 2848
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.18205872386879324,
      "learning_rate": 1.0972011605730814e-05,
      "loss": 0.3502,
      "step": 2849
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.18209351538178542,
      "learning_rate": 1.0929240515192018e-05,
      "loss": 0.3612,
      "step": 2850
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.1799463748541795,
      "learning_rate": 1.0886548133215212e-05,
      "loss": 0.3901,
      "step": 2851
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.18457553645487926,
      "learning_rate": 1.0843934497526043e-05,
      "loss": 0.4013,
      "step": 2852
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.18027717433183602,
      "learning_rate": 1.0801399645780642e-05,
      "loss": 0.3787,
      "step": 2853
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.1788257136658512,
      "learning_rate": 1.0758943615565486e-05,
      "loss": 0.3503,
      "step": 2854
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.17764028591316763,
      "learning_rate": 1.0716566444397425e-05,
      "loss": 0.3475,
      "step": 2855
    },
    {
      "epoch": 1.68,
      "grad_norm": 0.18189866888371164,
      "learning_rate": 1.067426816972359e-05,
      "loss": 0.3567,
      "step": 2856
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.18276929910949236,
      "learning_rate": 1.0632048828921459e-05,
      "loss": 0.3738,
      "step": 2857
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.18985574998616447,
      "learning_rate": 1.0589908459298659e-05,
      "loss": 0.3892,
      "step": 2858
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.19425071062777888,
      "learning_rate": 1.05478470980931e-05,
      "loss": 0.4057,
      "step": 2859
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.18272515913706877,
      "learning_rate": 1.0505864782472886e-05,
      "loss": 0.3598,
      "step": 2860
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.19848730153196067,
      "learning_rate": 1.046396154953626e-05,
      "loss": 0.4218,
      "step": 2861
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.20197995519491221,
      "learning_rate": 1.042213743631153e-05,
      "loss": 0.3715,
      "step": 2862
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.18568926557727763,
      "learning_rate": 1.038039247975714e-05,
      "loss": 0.3887,
      "step": 2863
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.19679607011383277,
      "learning_rate": 1.0338726716761593e-05,
      "loss": 0.4043,
      "step": 2864
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.20749038036636144,
      "learning_rate": 1.0297140184143383e-05,
      "loss": 0.3977,
      "step": 2865
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.18887797359179764,
      "learning_rate": 1.0255632918651014e-05,
      "loss": 0.3484,
      "step": 2866
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.21288749940839216,
      "learning_rate": 1.0214204956962947e-05,
      "loss": 0.4228,
      "step": 2867
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.19545998387534477,
      "learning_rate": 1.0172856335687509e-05,
      "loss": 0.364,
      "step": 2868
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.19067129616360895,
      "learning_rate": 1.0131587091362982e-05,
      "loss": 0.4257,
      "step": 2869
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.19945048332024073,
      "learning_rate": 1.0090397260457508e-05,
      "loss": 0.3868,
      "step": 2870
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.17970331087008132,
      "learning_rate": 1.004928687936898e-05,
      "loss": 0.3876,
      "step": 2871
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.17901897921602997,
      "learning_rate": 1.0008255984425141e-05,
      "loss": 0.3608,
      "step": 2872
    },
    {
      "epoch": 1.69,
      "grad_norm": 0.2074985731201921,
      "learning_rate": 9.967304611883543e-06,
      "loss": 0.4021,
      "step": 2873
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.18134001903506683,
      "learning_rate": 9.926432797931351e-06,
      "loss": 0.3543,
      "step": 2874
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.18272241073998963,
      "learning_rate": 9.885640578685518e-06,
      "loss": 0.3707,
      "step": 2875
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.19096229524187255,
      "learning_rate": 9.84492799019261e-06,
      "loss": 0.4044,
      "step": 2876
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.19114097422640478,
      "learning_rate": 9.804295068428881e-06,
      "loss": 0.4191,
      "step": 2877
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.1927940842818163,
      "learning_rate": 9.763741849300124e-06,
      "loss": 0.4368,
      "step": 2878
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.1932222722751544,
      "learning_rate": 9.723268368641735e-06,
      "loss": 0.367,
      "step": 2879
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.18876583554332269,
      "learning_rate": 9.682874662218644e-06,
      "loss": 0.3693,
      "step": 2880
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.19350688957925188,
      "learning_rate": 9.642560765725295e-06,
      "loss": 0.3935,
      "step": 2881
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.19395538454804856,
      "learning_rate": 9.602326714785592e-06,
      "loss": 0.4062,
      "step": 2882
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.18687737837536744,
      "learning_rate": 9.562172544952907e-06,
      "loss": 0.4056,
      "step": 2883
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.17094450733612235,
      "learning_rate": 9.52209829170998e-06,
      "loss": 0.3433,
      "step": 2884
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.19198528742425536,
      "learning_rate": 9.482103990468971e-06,
      "loss": 0.3576,
      "step": 2885
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.191964636104055,
      "learning_rate": 9.44218967657139e-06,
      "loss": 0.4226,
      "step": 2886
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.18284789434021473,
      "learning_rate": 9.40235538528802e-06,
      "loss": 0.3675,
      "step": 2887
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.1905504505631898,
      "learning_rate": 9.362601151818984e-06,
      "loss": 0.387,
      "step": 2888
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.2004227156859444,
      "learning_rate": 9.322927011293637e-06,
      "loss": 0.4,
      "step": 2889
    },
    {
      "epoch": 1.7,
      "grad_norm": 0.18446499947016987,
      "learning_rate": 9.283332998770555e-06,
      "loss": 0.3589,
      "step": 2890
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.1792298555119123,
      "learning_rate": 9.24381914923752e-06,
      "loss": 0.3492,
      "step": 2891
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.2002209834519155,
      "learning_rate": 9.204385497611467e-06,
      "loss": 0.4051,
      "step": 2892
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.20471621996640477,
      "learning_rate": 9.165032078738489e-06,
      "loss": 0.4161,
      "step": 2893
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.18319314415608173,
      "learning_rate": 9.125758927393724e-06,
      "loss": 0.3779,
      "step": 2894
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.18673289511587204,
      "learning_rate": 9.086566078281434e-06,
      "loss": 0.3594,
      "step": 2895
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.19224444681640385,
      "learning_rate": 9.047453566034914e-06,
      "loss": 0.3768,
      "step": 2896
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.19306598799560065,
      "learning_rate": 9.008421425216417e-06,
      "loss": 0.4097,
      "step": 2897
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.19161756654149822,
      "learning_rate": 8.969469690317245e-06,
      "loss": 0.3973,
      "step": 2898
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.2012867849626023,
      "learning_rate": 8.930598395757595e-06,
      "loss": 0.4146,
      "step": 2899
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.1741533788328624,
      "learning_rate": 8.891807575886624e-06,
      "loss": 0.3594,
      "step": 2900
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.19231361448939627,
      "learning_rate": 8.853097264982357e-06,
      "loss": 0.356,
      "step": 2901
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.20533186644938736,
      "learning_rate": 8.814467497251677e-06,
      "loss": 0.3739,
      "step": 2902
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.18698584808737279,
      "learning_rate": 8.775918306830266e-06,
      "loss": 0.4046,
      "step": 2903
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.2104814318764746,
      "learning_rate": 8.737449727782642e-06,
      "loss": 0.4079,
      "step": 2904
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.20098428618573008,
      "learning_rate": 8.699061794102093e-06,
      "loss": 0.4048,
      "step": 2905
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.18507212012634458,
      "learning_rate": 8.660754539710625e-06,
      "loss": 0.3818,
      "step": 2906
    },
    {
      "epoch": 1.71,
      "grad_norm": 0.2014523497453203,
      "learning_rate": 8.62252799845893e-06,
      "loss": 0.4183,
      "step": 2907
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.18290858907170823,
      "learning_rate": 8.584382204126385e-06,
      "loss": 0.3671,
      "step": 2908
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.18983843999716807,
      "learning_rate": 8.546317190421106e-06,
      "loss": 0.3863,
      "step": 2909
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.19999120835019119,
      "learning_rate": 8.508332990979673e-06,
      "loss": 0.3815,
      "step": 2910
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.1751899801069206,
      "learning_rate": 8.47042963936736e-06,
      "loss": 0.3701,
      "step": 2911
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.19502109628725386,
      "learning_rate": 8.432607169077977e-06,
      "loss": 0.3755,
      "step": 2912
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.19565978172851314,
      "learning_rate": 8.394865613533832e-06,
      "loss": 0.3673,
      "step": 2913
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.19759320259101212,
      "learning_rate": 8.357205006085756e-06,
      "loss": 0.4318,
      "step": 2914
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.1859624988201233,
      "learning_rate": 8.319625380013074e-06,
      "loss": 0.4083,
      "step": 2915
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.19061883218141462,
      "learning_rate": 8.282126768523468e-06,
      "loss": 0.4053,
      "step": 2916
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.18514292413007089,
      "learning_rate": 8.24470920475312e-06,
      "loss": 0.3414,
      "step": 2917
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.19488059960726256,
      "learning_rate": 8.207372721766572e-06,
      "loss": 0.3973,
      "step": 2918
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.17718229513021916,
      "learning_rate": 8.170117352556695e-06,
      "loss": 0.3763,
      "step": 2919
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.20246941633114438,
      "learning_rate": 8.132943130044667e-06,
      "loss": 0.3813,
      "step": 2920
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.19085709706361953,
      "learning_rate": 8.095850087080015e-06,
      "loss": 0.4044,
      "step": 2921
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.2038351893649488,
      "learning_rate": 8.058838256440492e-06,
      "loss": 0.3962,
      "step": 2922
    },
    {
      "epoch": 1.72,
      "grad_norm": 0.21825503424463988,
      "learning_rate": 8.021907670832074e-06,
      "loss": 0.3541,
      "step": 2923
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.180708799816823,
      "learning_rate": 7.985058362888975e-06,
      "loss": 0.4023,
      "step": 2924
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.20126430165940656,
      "learning_rate": 7.948290365173584e-06,
      "loss": 0.4045,
      "step": 2925
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.18866439227980727,
      "learning_rate": 7.91160371017644e-06,
      "loss": 0.3817,
      "step": 2926
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.19487415886389572,
      "learning_rate": 7.874998430316172e-06,
      "loss": 0.407,
      "step": 2927
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.18579899661300278,
      "learning_rate": 7.838474557939545e-06,
      "loss": 0.3885,
      "step": 2928
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.18411710692418848,
      "learning_rate": 7.802032125321345e-06,
      "loss": 0.3622,
      "step": 2929
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.20281391997383588,
      "learning_rate": 7.765671164664423e-06,
      "loss": 0.4359,
      "step": 2930
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.18810742810049738,
      "learning_rate": 7.72939170809962e-06,
      "loss": 0.3533,
      "step": 2931
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.1967944384539896,
      "learning_rate": 7.693193787685782e-06,
      "loss": 0.3973,
      "step": 2932
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.17731253391948779,
      "learning_rate": 7.657077435409643e-06,
      "loss": 0.3552,
      "step": 2933
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.1816288509685908,
      "learning_rate": 7.621042683185931e-06,
      "loss": 0.3718,
      "step": 2934
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.20764843361130358,
      "learning_rate": 7.585089562857217e-06,
      "loss": 0.4214,
      "step": 2935
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.19453851257013952,
      "learning_rate": 7.549218106193967e-06,
      "loss": 0.411,
      "step": 2936
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.21279984261284868,
      "learning_rate": 7.513428344894458e-06,
      "loss": 0.3401,
      "step": 2937
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.18343133868234535,
      "learning_rate": 7.4777203105848125e-06,
      "loss": 0.3941,
      "step": 2938
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.21052952203623176,
      "learning_rate": 7.4420940348188806e-06,
      "loss": 0.4359,
      "step": 2939
    },
    {
      "epoch": 1.73,
      "grad_norm": 0.2265859274846711,
      "learning_rate": 7.406549549078312e-06,
      "loss": 0.4307,
      "step": 2940
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.18545526211162194,
      "learning_rate": 7.371086884772471e-06,
      "loss": 0.3763,
      "step": 2941
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.21364237947548975,
      "learning_rate": 7.335706073238391e-06,
      "loss": 0.4378,
      "step": 2942
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.1893654587422108,
      "learning_rate": 7.300407145740806e-06,
      "loss": 0.3961,
      "step": 2943
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.19588854106498704,
      "learning_rate": 7.265190133472089e-06,
      "loss": 0.3607,
      "step": 2944
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.2191471293218054,
      "learning_rate": 7.230055067552211e-06,
      "loss": 0.4441,
      "step": 2945
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.19082213620990843,
      "learning_rate": 7.1950019790287485e-06,
      "loss": 0.3766,
      "step": 2946
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.19297814729942536,
      "learning_rate": 7.160030898876835e-06,
      "loss": 0.406,
      "step": 2947
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.19765593940930495,
      "learning_rate": 7.12514185799914e-06,
      "loss": 0.4105,
      "step": 2948
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.18194885663930477,
      "learning_rate": 7.090334887225792e-06,
      "loss": 0.3886,
      "step": 2949
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.18021052671227944,
      "learning_rate": 7.055610017314463e-06,
      "loss": 0.3517,
      "step": 2950
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.19181467145717865,
      "learning_rate": 7.020967278950253e-06,
      "loss": 0.3959,
      "step": 2951
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.18275022534861474,
      "learning_rate": 6.98640670274564e-06,
      "loss": 0.3728,
      "step": 2952
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.19157406478323333,
      "learning_rate": 6.9519283192405525e-06,
      "loss": 0.3882,
      "step": 2953
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.18949485590643386,
      "learning_rate": 6.917532158902318e-06,
      "loss": 0.3978,
      "step": 2954
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.1989258914761517,
      "learning_rate": 6.883218252125512e-06,
      "loss": 0.4186,
      "step": 2955
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.18665299033631447,
      "learning_rate": 6.848986629232079e-06,
      "loss": 0.3755,
      "step": 2956
    },
    {
      "epoch": 1.74,
      "grad_norm": 0.19251540869035402,
      "learning_rate": 6.814837320471279e-06,
      "loss": 0.4082,
      "step": 2957
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.18901165432013456,
      "learning_rate": 6.780770356019561e-06,
      "loss": 0.3702,
      "step": 2958
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.19203320591549375,
      "learning_rate": 6.746785765980679e-06,
      "loss": 0.3636,
      "step": 2959
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.22235792675190075,
      "learning_rate": 6.712883580385554e-06,
      "loss": 0.4052,
      "step": 2960
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.18708099149404153,
      "learning_rate": 6.679063829192311e-06,
      "loss": 0.3926,
      "step": 2961
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.18494080916307828,
      "learning_rate": 6.645326542286223e-06,
      "loss": 0.3751,
      "step": 2962
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.18946623107272353,
      "learning_rate": 6.611671749479697e-06,
      "loss": 0.3647,
      "step": 2963
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.18705353389459098,
      "learning_rate": 6.578099480512256e-06,
      "loss": 0.3778,
      "step": 2964
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.1916670849042115,
      "learning_rate": 6.5446097650504355e-06,
      "loss": 0.3752,
      "step": 2965
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.21603562470418328,
      "learning_rate": 6.5112026326878965e-06,
      "loss": 0.4069,
      "step": 2966
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.19485054525464968,
      "learning_rate": 6.477878112945301e-06,
      "loss": 0.3866,
      "step": 2967
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.1855274419319167,
      "learning_rate": 6.444636235270285e-06,
      "loss": 0.3967,
      "step": 2968
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.19320539411671053,
      "learning_rate": 6.411477029037494e-06,
      "loss": 0.4241,
      "step": 2969
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.20112976004377922,
      "learning_rate": 6.378400523548489e-06,
      "loss": 0.4192,
      "step": 2970
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.18417473736719064,
      "learning_rate": 6.345406748031768e-06,
      "loss": 0.3716,
      "step": 2971
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.1840100228031329,
      "learning_rate": 6.312495731642731e-06,
      "loss": 0.3885,
      "step": 2972
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.22949045550103406,
      "learning_rate": 6.279667503463638e-06,
      "loss": 0.4162,
      "step": 2973
    },
    {
      "epoch": 1.75,
      "grad_norm": 0.19675031568235163,
      "learning_rate": 6.24692209250356e-06,
      "loss": 0.4307,
      "step": 2974
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.18972862959837244,
      "learning_rate": 6.21425952769843e-06,
      "loss": 0.3751,
      "step": 2975
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.17665794307839597,
      "learning_rate": 6.18167983791097e-06,
      "loss": 0.3554,
      "step": 2976
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.1832311142244817,
      "learning_rate": 6.149183051930662e-06,
      "loss": 0.3818,
      "step": 2977
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.19764654343188384,
      "learning_rate": 6.116769198473693e-06,
      "loss": 0.3851,
      "step": 2978
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.18592846465779592,
      "learning_rate": 6.084438306183015e-06,
      "loss": 0.381,
      "step": 2979
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.18860776190892709,
      "learning_rate": 6.052190403628244e-06,
      "loss": 0.3898,
      "step": 2980
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.1932698353278176,
      "learning_rate": 6.020025519305672e-06,
      "loss": 0.4136,
      "step": 2981
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.19722848524162306,
      "learning_rate": 5.98794368163823e-06,
      "loss": 0.4136,
      "step": 2982
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.18199038244482568,
      "learning_rate": 5.955944918975476e-06,
      "loss": 0.36,
      "step": 2983
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.19911096620484098,
      "learning_rate": 5.924029259593511e-06,
      "loss": 0.3826,
      "step": 2984
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.19078439496902141,
      "learning_rate": 5.892196731695043e-06,
      "loss": 0.4054,
      "step": 2985
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.19821026684553988,
      "learning_rate": 5.860447363409327e-06,
      "loss": 0.4049,
      "step": 2986
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.20139870437661192,
      "learning_rate": 5.8287811827920865e-06,
      "loss": 0.4185,
      "step": 2987
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.18318901706477717,
      "learning_rate": 5.7971982178255835e-06,
      "loss": 0.3834,
      "step": 2988
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.20668888021525425,
      "learning_rate": 5.765698496418515e-06,
      "loss": 0.4023,
      "step": 2989
    },
    {
      "epoch": 1.76,
      "grad_norm": 0.1896019068304225,
      "learning_rate": 5.734282046406025e-06,
      "loss": 0.3881,
      "step": 2990
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.1954645183614734,
      "learning_rate": 5.702948895549698e-06,
      "loss": 0.406,
      "step": 2991
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.19180659148700469,
      "learning_rate": 5.671699071537473e-06,
      "loss": 0.4175,
      "step": 2992
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.20921814241950143,
      "learning_rate": 5.6405326019836835e-06,
      "loss": 0.4527,
      "step": 2993
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.17413574779071542,
      "learning_rate": 5.609449514428977e-06,
      "loss": 0.3323,
      "step": 2994
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.18660960465501725,
      "learning_rate": 5.5784498363403605e-06,
      "loss": 0.3812,
      "step": 2995
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.1925241579835085,
      "learning_rate": 5.547533595111109e-06,
      "loss": 0.3914,
      "step": 2996
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.1841251913859023,
      "learning_rate": 5.5167008180607385e-06,
      "loss": 0.3804,
      "step": 2997
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.18889703992146564,
      "learning_rate": 5.485951532435063e-06,
      "loss": 0.3536,
      "step": 2998
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.1863670029115911,
      "learning_rate": 5.455285765406126e-06,
      "loss": 0.3524,
      "step": 2999
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.1756904397321326,
      "learning_rate": 5.424703544072107e-06,
      "loss": 0.3421,
      "step": 3000
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.2085191353918397,
      "learning_rate": 5.3942048954574e-06,
      "loss": 0.3811,
      "step": 3001
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.19480406801885236,
      "learning_rate": 5.363789846512546e-06,
      "loss": 0.4007,
      "step": 3002
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.18354644009378338,
      "learning_rate": 5.333458424114202e-06,
      "loss": 0.3883,
      "step": 3003
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.2027662999476591,
      "learning_rate": 5.303210655065138e-06,
      "loss": 0.3775,
      "step": 3004
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.17580837553023873,
      "learning_rate": 5.273046566094198e-06,
      "loss": 0.3537,
      "step": 3005
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.20765612035318065,
      "learning_rate": 5.2429661838562684e-06,
      "loss": 0.465,
      "step": 3006
    },
    {
      "epoch": 1.77,
      "grad_norm": 0.19278792658868227,
      "learning_rate": 5.212969534932299e-06,
      "loss": 0.3653,
      "step": 3007
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.19513017429795937,
      "learning_rate": 5.18305664582922e-06,
      "loss": 0.4025,
      "step": 3008
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.19127354333428667,
      "learning_rate": 5.153227542979955e-06,
      "loss": 0.4056,
      "step": 3009
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.20131318938926027,
      "learning_rate": 5.123482252743384e-06,
      "loss": 0.4293,
      "step": 3010
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.20410517958580796,
      "learning_rate": 5.093820801404314e-06,
      "loss": 0.4065,
      "step": 3011
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.18704971564363876,
      "learning_rate": 5.064243215173525e-06,
      "loss": 0.3946,
      "step": 3012
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.17567995302964098,
      "learning_rate": 5.034749520187599e-06,
      "loss": 0.3911,
      "step": 3013
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.18580034034263418,
      "learning_rate": 5.005339742509052e-06,
      "loss": 0.3824,
      "step": 3014
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.18865028556075208,
      "learning_rate": 4.97601390812622e-06,
      "loss": 0.3644,
      "step": 3015
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.1780081966739988,
      "learning_rate": 4.9467720429532626e-06,
      "loss": 0.3715,
      "step": 3016
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.19988120677078622,
      "learning_rate": 4.917614172830165e-06,
      "loss": 0.4045,
      "step": 3017
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.18819370470191035,
      "learning_rate": 4.888540323522639e-06,
      "loss": 0.3869,
      "step": 3018
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.1795741313544112,
      "learning_rate": 4.859550520722212e-06,
      "loss": 0.3849,
      "step": 3019
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.19368920456635835,
      "learning_rate": 4.8306447900460795e-06,
      "loss": 0.3751,
      "step": 3020
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.19386497788215812,
      "learning_rate": 4.8018231570371775e-06,
      "loss": 0.4061,
      "step": 3021
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.2043390708744847,
      "learning_rate": 4.773085647164155e-06,
      "loss": 0.4794,
      "step": 3022
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.19633532001180456,
      "learning_rate": 4.744432285821254e-06,
      "loss": 0.4068,
      "step": 3023
    },
    {
      "epoch": 1.78,
      "grad_norm": 0.19373876041409038,
      "learning_rate": 4.7158630983284106e-06,
      "loss": 0.3835,
      "step": 3024
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.17598407117172582,
      "learning_rate": 4.687378109931184e-06,
      "loss": 0.3597,
      "step": 3025
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.19456749065947174,
      "learning_rate": 4.658977345800697e-06,
      "loss": 0.3696,
      "step": 3026
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.20959412480815218,
      "learning_rate": 4.630660831033673e-06,
      "loss": 0.4393,
      "step": 3027
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.18703532104575968,
      "learning_rate": 4.6024285906523855e-06,
      "loss": 0.383,
      "step": 3028
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.20315455063945598,
      "learning_rate": 4.574280649604601e-06,
      "loss": 0.3698,
      "step": 3029
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.18663063418997797,
      "learning_rate": 4.546217032763645e-06,
      "loss": 0.3697,
      "step": 3030
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.19424594577337717,
      "learning_rate": 4.518237764928301e-06,
      "loss": 0.3825,
      "step": 3031
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.19229780394926582,
      "learning_rate": 4.490342870822828e-06,
      "loss": 0.4045,
      "step": 3032
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.19087778057394364,
      "learning_rate": 4.462532375096895e-06,
      "loss": 0.4272,
      "step": 3033
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.1861966013754029,
      "learning_rate": 4.434806302325634e-06,
      "loss": 0.3641,
      "step": 3034
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.2016550514880452,
      "learning_rate": 4.407164677009568e-06,
      "loss": 0.4028,
      "step": 3035
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.18467782571726843,
      "learning_rate": 4.37960752357458e-06,
      "loss": 0.365,
      "step": 3036
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.17953854507068792,
      "learning_rate": 4.3521348663719045e-06,
      "loss": 0.3518,
      "step": 3037
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.18886356823427813,
      "learning_rate": 4.324746729678142e-06,
      "loss": 0.3646,
      "step": 3038
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.2064592201774944,
      "learning_rate": 4.297443137695156e-06,
      "loss": 0.412,
      "step": 3039
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.1944505486212464,
      "learning_rate": 4.270224114550147e-06,
      "loss": 0.3692,
      "step": 3040
    },
    {
      "epoch": 1.79,
      "grad_norm": 0.20673212324986717,
      "learning_rate": 4.243089684295576e-06,
      "loss": 0.4313,
      "step": 3041
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.19246473196675232,
      "learning_rate": 4.216039870909094e-06,
      "loss": 0.3982,
      "step": 3042
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.18922516737788958,
      "learning_rate": 4.189074698293693e-06,
      "loss": 0.3655,
      "step": 3043
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.19078819475088432,
      "learning_rate": 4.1621941902774906e-06,
      "loss": 0.385,
      "step": 3044
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.19181915011403114,
      "learning_rate": 4.1353983706137745e-06,
      "loss": 0.393,
      "step": 3045
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.17059509967536488,
      "learning_rate": 4.108687262981048e-06,
      "loss": 0.3286,
      "step": 3046
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.18191261156882021,
      "learning_rate": 4.0820608909829416e-06,
      "loss": 0.4133,
      "step": 3047
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.18689231031706133,
      "learning_rate": 4.055519278148201e-06,
      "loss": 0.3549,
      "step": 3048
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.19008551894380415,
      "learning_rate": 4.029062447930665e-06,
      "loss": 0.3862,
      "step": 3049
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.17864720895991723,
      "learning_rate": 4.002690423709277e-06,
      "loss": 0.3628,
      "step": 3050
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.20418332555315316,
      "learning_rate": 3.976403228788017e-06,
      "loss": 0.3849,
      "step": 3051
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.22285648605209982,
      "learning_rate": 3.950200886395916e-06,
      "loss": 0.3925,
      "step": 3052
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.19498916492484897,
      "learning_rate": 3.9240834196870195e-06,
      "loss": 0.3784,
      "step": 3053
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.18714101236614172,
      "learning_rate": 3.898050851740398e-06,
      "loss": 0.3816,
      "step": 3054
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.20015720362037948,
      "learning_rate": 3.872103205560052e-06,
      "loss": 0.3864,
      "step": 3055
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.18942369033523968,
      "learning_rate": 3.846240504074961e-06,
      "loss": 0.3845,
      "step": 3056
    },
    {
      "epoch": 1.8,
      "grad_norm": 0.17566746603927816,
      "learning_rate": 3.820462770139066e-06,
      "loss": 0.3675,
      "step": 3057
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.20157919882361208,
      "learning_rate": 3.7947700265311913e-06,
      "loss": 0.3932,
      "step": 3058
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.19956018543699966,
      "learning_rate": 3.7691622959550754e-06,
      "loss": 0.4065,
      "step": 3059
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.18289114123090686,
      "learning_rate": 3.743639601039317e-06,
      "loss": 0.3618,
      "step": 3060
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.1937354421427421,
      "learning_rate": 3.718201964337409e-06,
      "loss": 0.4134,
      "step": 3061
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.22282895589554233,
      "learning_rate": 3.6928494083276367e-06,
      "loss": 0.442,
      "step": 3062
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.19567047724989875,
      "learning_rate": 3.6675819554131464e-06,
      "loss": 0.3947,
      "step": 3063
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.2048875031927258,
      "learning_rate": 3.6423996279218442e-06,
      "loss": 0.41,
      "step": 3064
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.20308999667135372,
      "learning_rate": 3.6173024481064187e-06,
      "loss": 0.4615,
      "step": 3065
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.19228712713285978,
      "learning_rate": 3.5922904381443413e-06,
      "loss": 0.427,
      "step": 3066
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.19252248368996067,
      "learning_rate": 3.5673636201378204e-06,
      "loss": 0.3685,
      "step": 3067
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.20530020014607442,
      "learning_rate": 3.5425220161137474e-06,
      "loss": 0.4255,
      "step": 3068
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.18145381029299965,
      "learning_rate": 3.51776564802373e-06,
      "loss": 0.3678,
      "step": 3069
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.19310693625619313,
      "learning_rate": 3.4930945377440795e-06,
      "loss": 0.3899,
      "step": 3070
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.1907414474503299,
      "learning_rate": 3.468508707075757e-06,
      "loss": 0.3819,
      "step": 3071
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.1965869704282478,
      "learning_rate": 3.44400817774434e-06,
      "loss": 0.3789,
      "step": 3072
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.21310167998529464,
      "learning_rate": 3.4195929714000654e-06,
      "loss": 0.4106,
      "step": 3073
    },
    {
      "epoch": 1.81,
      "grad_norm": 0.2014074107287134,
      "learning_rate": 3.3952631096177414e-06,
      "loss": 0.4039,
      "step": 3074
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.18533783622734315,
      "learning_rate": 3.3710186138967704e-06,
      "loss": 0.3545,
      "step": 3075
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.17866153267060225,
      "learning_rate": 3.3468595056611372e-06,
      "loss": 0.3836,
      "step": 3076
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.1853015550309895,
      "learning_rate": 3.3227858062593765e-06,
      "loss": 0.4297,
      "step": 3077
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.20097830071097617,
      "learning_rate": 3.2987975369644817e-06,
      "loss": 0.3992,
      "step": 3078
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.189571690369022,
      "learning_rate": 3.274894718974031e-06,
      "loss": 0.3582,
      "step": 3079
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.2347129525703833,
      "learning_rate": 3.251077373410105e-06,
      "loss": 0.372,
      "step": 3080
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.18771010609318178,
      "learning_rate": 3.2273455213191585e-06,
      "loss": 0.3945,
      "step": 3081
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.19502603116959955,
      "learning_rate": 3.203699183672193e-06,
      "loss": 0.4045,
      "step": 3082
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.18252504680025133,
      "learning_rate": 3.180138381364606e-06,
      "loss": 0.3735,
      "step": 3083
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.1796587965575206,
      "learning_rate": 3.156663135216209e-06,
      "loss": 0.3793,
      "step": 3084
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.1980068683285886,
      "learning_rate": 3.133273465971209e-06,
      "loss": 0.4311,
      "step": 3085
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.1984199641786852,
      "learning_rate": 3.109969394298218e-06,
      "loss": 0.4052,
      "step": 3086
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.1969664531299117,
      "learning_rate": 3.0867509407901506e-06,
      "loss": 0.4003,
      "step": 3087
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.1897604334827666,
      "learning_rate": 3.0636181259643514e-06,
      "loss": 0.3682,
      "step": 3088
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.19750841186189524,
      "learning_rate": 3.040570970262402e-06,
      "loss": 0.3792,
      "step": 3089
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.18310374879465044,
      "learning_rate": 3.0176094940502664e-06,
      "loss": 0.3848,
      "step": 3090
    },
    {
      "epoch": 1.82,
      "grad_norm": 0.20034511217222092,
      "learning_rate": 2.9947337176181144e-06,
      "loss": 0.4018,
      "step": 3091
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.20004670284240827,
      "learning_rate": 2.971943661180465e-06,
      "loss": 0.4593,
      "step": 3092
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.1814133588800584,
      "learning_rate": 2.9492393448760426e-06,
      "loss": 0.3799,
      "step": 3093
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.2200272555047984,
      "learning_rate": 2.9266207887678088e-06,
      "loss": 0.4298,
      "step": 3094
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.19018317992268535,
      "learning_rate": 2.9040880128429536e-06,
      "loss": 0.3931,
      "step": 3095
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.20000109367751695,
      "learning_rate": 2.881641037012872e-06,
      "loss": 0.4114,
      "step": 3096
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.19633599630494716,
      "learning_rate": 2.8592798811131416e-06,
      "loss": 0.4446,
      "step": 3097
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.19319921014436206,
      "learning_rate": 2.837004564903478e-06,
      "loss": 0.4133,
      "step": 3098
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.18140944372475734,
      "learning_rate": 2.8148151080677807e-06,
      "loss": 0.3665,
      "step": 3099
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.17640229722588313,
      "learning_rate": 2.7927115302140317e-06,
      "loss": 0.3658,
      "step": 3100
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.19508199702272827,
      "learning_rate": 2.770693850874373e-06,
      "loss": 0.3871,
      "step": 3101
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.20731154915272468,
      "learning_rate": 2.748762089505019e-06,
      "loss": 0.4425,
      "step": 3102
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.18689359827339425,
      "learning_rate": 2.7269162654862457e-06,
      "loss": 0.3844,
      "step": 3103
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.20120781400415283,
      "learning_rate": 2.7051563981224216e-06,
      "loss": 0.3981,
      "step": 3104
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.18363501072484445,
      "learning_rate": 2.683482506641932e-06,
      "loss": 0.3486,
      "step": 3105
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.19875829323639832,
      "learning_rate": 2.661894610197213e-06,
      "loss": 0.4043,
      "step": 3106
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.18863380125947674,
      "learning_rate": 2.6403927278646823e-06,
      "loss": 0.3701,
      "step": 3107
    },
    {
      "epoch": 1.83,
      "grad_norm": 0.19337787981726978,
      "learning_rate": 2.618976878644774e-06,
      "loss": 0.3881,
      "step": 3108
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.1881175788170023,
      "learning_rate": 2.597647081461896e-06,
      "loss": 0.3917,
      "step": 3109
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.18521342222222642,
      "learning_rate": 2.5764033551643917e-06,
      "loss": 0.364,
      "step": 3110
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.1973306468994064,
      "learning_rate": 2.555245718524568e-06,
      "loss": 0.4051,
      "step": 3111
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.17802966644073898,
      "learning_rate": 2.5341741902386583e-06,
      "loss": 0.3847,
      "step": 3112
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.1868961775702084,
      "learning_rate": 2.5131887889267793e-06,
      "loss": 0.3714,
      "step": 3113
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.19173975866165188,
      "learning_rate": 2.4922895331329743e-06,
      "loss": 0.406,
      "step": 3114
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.2019551359323475,
      "learning_rate": 2.4714764413251602e-06,
      "loss": 0.378,
      "step": 3115
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.1841730445013379,
      "learning_rate": 2.4507495318950804e-06,
      "loss": 0.3742,
      "step": 3116
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.18756082157918272,
      "learning_rate": 2.4301088231583615e-06,
      "loss": 0.3568,
      "step": 3117
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.19710909361068985,
      "learning_rate": 2.4095543333544244e-06,
      "loss": 0.4231,
      "step": 3118
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.1843604977757972,
      "learning_rate": 2.389086080646541e-06,
      "loss": 0.3987,
      "step": 3119
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.1917810789555228,
      "learning_rate": 2.368704083121731e-06,
      "loss": 0.3421,
      "step": 3120
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.18416664870208654,
      "learning_rate": 2.348408358790832e-06,
      "loss": 0.3513,
      "step": 3121
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.19071739974081112,
      "learning_rate": 2.328198925588432e-06,
      "loss": 0.3708,
      "step": 3122
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.178307974776429,
      "learning_rate": 2.308075801372844e-06,
      "loss": 0.3498,
      "step": 3123
    },
    {
      "epoch": 1.84,
      "grad_norm": 0.19784957845065634,
      "learning_rate": 2.288039003926157e-06,
      "loss": 0.4153,
      "step": 3124
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.18320277935679874,
      "learning_rate": 2.2680885509541616e-06,
      "loss": 0.4105,
      "step": 3125
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.1936425061613634,
      "learning_rate": 2.2482244600863124e-06,
      "loss": 0.346,
      "step": 3126
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.18912902067810736,
      "learning_rate": 2.22844674887579e-06,
      "loss": 0.3941,
      "step": 3127
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.18476315688660908,
      "learning_rate": 2.2087554347994477e-06,
      "loss": 0.3699,
      "step": 3128
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.19961368109564082,
      "learning_rate": 2.189150535257756e-06,
      "loss": 0.3892,
      "step": 3129
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.17429966573414088,
      "learning_rate": 2.1696320675748447e-06,
      "loss": 0.3555,
      "step": 3130
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.17919440916619483,
      "learning_rate": 2.1502000489984853e-06,
      "loss": 0.3526,
      "step": 3131
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.2078267776964174,
      "learning_rate": 2.13085449670003e-06,
      "loss": 0.4291,
      "step": 3132
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.19063798081819075,
      "learning_rate": 2.1115954277744264e-06,
      "loss": 0.4074,
      "step": 3133
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.19081641574100974,
      "learning_rate": 2.0924228592402174e-06,
      "loss": 0.3689,
      "step": 3134
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.1881455252475656,
      "learning_rate": 2.0733368080395054e-06,
      "loss": 0.3477,
      "step": 3135
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.194375006291378,
      "learning_rate": 2.054337291037911e-06,
      "loss": 0.4095,
      "step": 3136
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.19705529909778877,
      "learning_rate": 2.0354243250246263e-06,
      "loss": 0.3673,
      "step": 3137
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.1845700451116757,
      "learning_rate": 2.016597926712349e-06,
      "loss": 0.4007,
      "step": 3138
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.19741016984428209,
      "learning_rate": 1.997858112737261e-06,
      "loss": 0.3952,
      "step": 3139
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.17693779240080532,
      "learning_rate": 1.9792048996590595e-06,
      "loss": 0.3786,
      "step": 3140
    },
    {
      "epoch": 1.85,
      "grad_norm": 0.19772498598639746,
      "learning_rate": 1.9606383039609043e-06,
      "loss": 0.3906,
      "step": 3141
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.1891658783526746,
      "learning_rate": 1.942158342049405e-06,
      "loss": 0.4,
      "step": 3142
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.2047918931876484,
      "learning_rate": 1.9237650302546318e-06,
      "loss": 0.4152,
      "step": 3143
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.19761414033249414,
      "learning_rate": 1.9054583848300944e-06,
      "loss": 0.3712,
      "step": 3144
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.19782434959629427,
      "learning_rate": 1.8872384219526752e-06,
      "loss": 0.3769,
      "step": 3145
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.18935966535034518,
      "learning_rate": 1.8691051577227059e-06,
      "loss": 0.4046,
      "step": 3146
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.2152916894984238,
      "learning_rate": 1.8510586081638803e-06,
      "loss": 0.4048,
      "step": 3147
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.1932939479942431,
      "learning_rate": 1.8330987892232864e-06,
      "loss": 0.407,
      "step": 3148
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.18254601435770682,
      "learning_rate": 1.8152257167713293e-06,
      "loss": 0.3636,
      "step": 3149
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.202958366945704,
      "learning_rate": 1.7974394066018086e-06,
      "loss": 0.4151,
      "step": 3150
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.19035842521279056,
      "learning_rate": 1.7797398744318294e-06,
      "loss": 0.424,
      "step": 3151
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.19953066152325982,
      "learning_rate": 1.7621271359018143e-06,
      "loss": 0.4353,
      "step": 3152
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.21452092387256122,
      "learning_rate": 1.7446012065755025e-06,
      "loss": 0.4264,
      "step": 3153
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.18855721006001971,
      "learning_rate": 1.7271621019399165e-06,
      "loss": 0.3607,
      "step": 3154
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.17457659478034973,
      "learning_rate": 1.7098098374053295e-06,
      "loss": 0.3699,
      "step": 3155
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.19528073478928745,
      "learning_rate": 1.6925444283053316e-06,
      "loss": 0.3971,
      "step": 3156
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.19440070159663722,
      "learning_rate": 1.6753658898967073e-06,
      "loss": 0.3982,
      "step": 3157
    },
    {
      "epoch": 1.86,
      "grad_norm": 0.20508767460402985,
      "learning_rate": 1.658274237359514e-06,
      "loss": 0.3942,
      "step": 3158
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.1979654844075504,
      "learning_rate": 1.6412694857970256e-06,
      "loss": 0.3798,
      "step": 3159
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.18833758719322716,
      "learning_rate": 1.6243516502356782e-06,
      "loss": 0.4258,
      "step": 3160
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.19086125074224217,
      "learning_rate": 1.6075207456252016e-06,
      "loss": 0.3993,
      "step": 3161
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.2028805157065144,
      "learning_rate": 1.5907767868383993e-06,
      "loss": 0.411,
      "step": 3162
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.18946762384982935,
      "learning_rate": 1.5741197886713243e-06,
      "loss": 0.3613,
      "step": 3163
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.1956602195587916,
      "learning_rate": 1.5575497658431359e-06,
      "loss": 0.4189,
      "step": 3164
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.17588601530400924,
      "learning_rate": 1.541066732996166e-06,
      "loss": 0.3462,
      "step": 3165
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.20371484183429103,
      "learning_rate": 1.524670704695852e-06,
      "loss": 0.3799,
      "step": 3166
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.19179485563072282,
      "learning_rate": 1.5083616954307822e-06,
      "loss": 0.3779,
      "step": 3167
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.1863133844938148,
      "learning_rate": 1.4921397196126063e-06,
      "loss": 0.3821,
      "step": 3168
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.184740343430799,
      "learning_rate": 1.476004791576102e-06,
      "loss": 0.4011,
      "step": 3169
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.1773568646290839,
      "learning_rate": 1.4599569255791312e-06,
      "loss": 0.3644,
      "step": 3170
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.18876985980744976,
      "learning_rate": 1.443996135802572e-06,
      "loss": 0.3784,
      "step": 3171
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.17067634819510838,
      "learning_rate": 1.4281224363504097e-06,
      "loss": 0.3303,
      "step": 3172
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.19437246035111175,
      "learning_rate": 1.4123358412496457e-06,
      "loss": 0.3731,
      "step": 3173
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.17734466145601968,
      "learning_rate": 1.396636364450299e-06,
      "loss": 0.3728,
      "step": 3174
    },
    {
      "epoch": 1.87,
      "grad_norm": 0.19118429102334034,
      "learning_rate": 1.38102401982545e-06,
      "loss": 0.3816,
      "step": 3175
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.20028692342632082,
      "learning_rate": 1.3654988211711294e-06,
      "loss": 0.4221,
      "step": 3176
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.19781252858163023,
      "learning_rate": 1.3500607822063972e-06,
      "loss": 0.3898,
      "step": 3177
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.18896273683703604,
      "learning_rate": 1.3347099165732735e-06,
      "loss": 0.3712,
      "step": 3178
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.20055040703887,
      "learning_rate": 1.3194462378367745e-06,
      "loss": 0.4106,
      "step": 3179
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.20074952718463962,
      "learning_rate": 1.304269759484844e-06,
      "loss": 0.4222,
      "step": 3180
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.19612121742334224,
      "learning_rate": 1.289180494928366e-06,
      "loss": 0.4322,
      "step": 3181
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.2123229180067534,
      "learning_rate": 1.2741784575011738e-06,
      "loss": 0.3584,
      "step": 3182
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.1893099685468654,
      "learning_rate": 1.2592636604600416e-06,
      "loss": 0.3594,
      "step": 3183
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.18648008252336526,
      "learning_rate": 1.2444361169846043e-06,
      "loss": 0.398,
      "step": 3184
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.18085502760305805,
      "learning_rate": 1.2296958401774138e-06,
      "loss": 0.3983,
      "step": 3185
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.19751076143375046,
      "learning_rate": 1.2150428430639293e-06,
      "loss": 0.408,
      "step": 3186
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.19590341226599803,
      "learning_rate": 1.2004771385924486e-06,
      "loss": 0.3865,
      "step": 3187
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.20073541743544435,
      "learning_rate": 1.1859987396341754e-06,
      "loss": 0.3919,
      "step": 3188
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.1794635256970725,
      "learning_rate": 1.1716076589831094e-06,
      "loss": 0.3932,
      "step": 3189
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.18117681473041028,
      "learning_rate": 1.1573039093561556e-06,
      "loss": 0.3766,
      "step": 3190
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.1974848791585883,
      "learning_rate": 1.1430875033929811e-06,
      "loss": 0.3996,
      "step": 3191
    },
    {
      "epoch": 1.88,
      "grad_norm": 0.19703745616617743,
      "learning_rate": 1.1289584536561148e-06,
      "loss": 0.326,
      "step": 3192
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.2036115421172062,
      "learning_rate": 1.1149167726308807e-06,
      "loss": 0.3973,
      "step": 3193
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.20659352979896078,
      "learning_rate": 1.1009624727253975e-06,
      "loss": 0.4069,
      "step": 3194
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.18197354367933516,
      "learning_rate": 1.0870955662705573e-06,
      "loss": 0.3551,
      "step": 3195
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.18674423376873953,
      "learning_rate": 1.073316065520058e-06,
      "loss": 0.357,
      "step": 3196
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.19410304667862208,
      "learning_rate": 1.0596239826503152e-06,
      "loss": 0.3949,
      "step": 3197
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.18686461985035407,
      "learning_rate": 1.0460193297605282e-06,
      "loss": 0.3947,
      "step": 3198
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.19537836898327499,
      "learning_rate": 1.032502118872647e-06,
      "loss": 0.4062,
      "step": 3199
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.18594513708780075,
      "learning_rate": 1.0190723619313169e-06,
      "loss": 0.3897,
      "step": 3200
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.1936564842779298,
      "learning_rate": 1.005730070803912e-06,
      "loss": 0.388,
      "step": 3201
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.20740314634305435,
      "learning_rate": 9.924752572805563e-07,
      "loss": 0.3689,
      "step": 3202
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.1960230455699611,
      "learning_rate": 9.793079330740141e-07,
      "loss": 0.4205,
      "step": 3203
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.20388983642637493,
      "learning_rate": 9.662281098197667e-07,
      "loss": 0.3772,
      "step": 3204
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.1944219717447334,
      "learning_rate": 9.53235799075991e-07,
      "loss": 0.3784,
      "step": 3205
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.1847623074417675,
      "learning_rate": 9.403310123235143e-07,
      "loss": 0.3827,
      "step": 3206
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.1873068719937528,
      "learning_rate": 9.275137609658147e-07,
      "loss": 0.3805,
      "step": 3207
    },
    {
      "epoch": 1.89,
      "grad_norm": 0.19110057619780071,
      "learning_rate": 9.147840563290322e-07,
      "loss": 0.41,
      "step": 3208
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.18875847827987116,
      "learning_rate": 9.021419096619355e-07,
      "loss": 0.3974,
      "step": 3209
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.19056356308135405,
      "learning_rate": 8.895873321359216e-07,
      "loss": 0.3926,
      "step": 3210
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.19827362278371313,
      "learning_rate": 8.771203348450163e-07,
      "loss": 0.4255,
      "step": 3211
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.18716344753936573,
      "learning_rate": 8.647409288058405e-07,
      "loss": 0.3791,
      "step": 3212
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.18713603666692794,
      "learning_rate": 8.524491249576328e-07,
      "loss": 0.3636,
      "step": 3213
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.18462853991958883,
      "learning_rate": 8.402449341622153e-07,
      "loss": 0.4069,
      "step": 3214
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.18918002939895678,
      "learning_rate": 8.281283672039619e-07,
      "loss": 0.3901,
      "step": 3215
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.20052989184313472,
      "learning_rate": 8.16099434789852e-07,
      "loss": 0.4137,
      "step": 3216
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.20948288799457052,
      "learning_rate": 8.041581475493942e-07,
      "loss": 0.3647,
      "step": 3217
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.1935257493379553,
      "learning_rate": 7.92304516034692e-07,
      "loss": 0.3746,
      "step": 3218
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.185075419298866,
      "learning_rate": 7.805385507203555e-07,
      "loss": 0.3931,
      "step": 3219
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.1771901008403066,
      "learning_rate": 7.688602620035346e-07,
      "loss": 0.3668,
      "step": 3220
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.20248014707155454,
      "learning_rate": 7.572696602038965e-07,
      "loss": 0.4483,
      "step": 3221
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.20360597931556265,
      "learning_rate": 7.457667555636371e-07,
      "loss": 0.3947,
      "step": 3222
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.2045453901260168,
      "learning_rate": 7.34351558247448e-07,
      "loss": 0.4156,
      "step": 3223
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.21374774038862304,
      "learning_rate": 7.230240783425379e-07,
      "loss": 0.4915,
      "step": 3224
    },
    {
      "epoch": 1.9,
      "grad_norm": 0.1922782340904662,
      "learning_rate": 7.117843258585666e-07,
      "loss": 0.378,
      "step": 3225
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.18629738802882323,
      "learning_rate": 7.006323107276891e-07,
      "loss": 0.3636,
      "step": 3226
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.2028644937530157,
      "learning_rate": 6.895680428045336e-07,
      "loss": 0.4367,
      "step": 3227
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.17539086550958038,
      "learning_rate": 6.785915318662128e-07,
      "loss": 0.352,
      "step": 3228
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.18385558521543932,
      "learning_rate": 6.677027876122344e-07,
      "loss": 0.3749,
      "step": 3229
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.1920429920106685,
      "learning_rate": 6.569018196645905e-07,
      "loss": 0.3889,
      "step": 3230
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.1862841603189568,
      "learning_rate": 6.461886375677017e-07,
      "loss": 0.3825,
      "step": 3231
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.2058739986931926,
      "learning_rate": 6.355632507884291e-07,
      "loss": 0.4188,
      "step": 3232
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.18709350187825954,
      "learning_rate": 6.250256687160172e-07,
      "loss": 0.4149,
      "step": 3233
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.17757746614569134,
      "learning_rate": 6.145759006621399e-07,
      "loss": 0.3358,
      "step": 3234
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.20412227510225445,
      "learning_rate": 6.042139558608995e-07,
      "loss": 0.3813,
      "step": 3235
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.18178197663692106,
      "learning_rate": 5.939398434687382e-07,
      "loss": 0.367,
      "step": 3236
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.18738341111633122,
      "learning_rate": 5.83753572564516e-07,
      "loss": 0.3846,
      "step": 3237
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.2000427655218693,
      "learning_rate": 5.736551521494881e-07,
      "loss": 0.4284,
      "step": 3238
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.19278619160187718,
      "learning_rate": 5.636445911472276e-07,
      "loss": 0.3957,
      "step": 3239
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.20909231431825567,
      "learning_rate": 5.537218984037251e-07,
      "loss": 0.4799,
      "step": 3240
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.19110883915080917,
      "learning_rate": 5.438870826872777e-07,
      "loss": 0.4012,
      "step": 3241
    },
    {
      "epoch": 1.91,
      "grad_norm": 0.19636539580797252,
      "learning_rate": 5.341401526885781e-07,
      "loss": 0.3895,
      "step": 3242
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.19441381825230175,
      "learning_rate": 5.244811170206143e-07,
      "loss": 0.3933,
      "step": 3243
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.2175801342691988,
      "learning_rate": 5.149099842187254e-07,
      "loss": 0.3942,
      "step": 3244
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.18605708598359258,
      "learning_rate": 5.054267627405574e-07,
      "loss": 0.3787,
      "step": 3245
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.21149087709158926,
      "learning_rate": 4.960314609661065e-07,
      "loss": 0.4078,
      "step": 3246
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.19567321474953228,
      "learning_rate": 4.86724087197643e-07,
      "loss": 0.4215,
      "step": 3247
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.18749423037376656,
      "learning_rate": 4.775046496597546e-07,
      "loss": 0.398,
      "step": 3248
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.17597971825430656,
      "learning_rate": 4.6837315649932435e-07,
      "loss": 0.3231,
      "step": 3249
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.1754490734554022,
      "learning_rate": 4.593296157855087e-07,
      "loss": 0.3417,
      "step": 3250
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.17688928271915183,
      "learning_rate": 4.503740355097597e-07,
      "loss": 0.3527,
      "step": 3251
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.17858234662256542,
      "learning_rate": 4.415064235857913e-07,
      "loss": 0.3453,
      "step": 3252
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.1991198742144274,
      "learning_rate": 4.3272678784959107e-07,
      "loss": 0.4018,
      "step": 3253
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.1819706791664033,
      "learning_rate": 4.240351360593975e-07,
      "loss": 0.3614,
      "step": 3254
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.20732741479735994,
      "learning_rate": 4.154314758957001e-07,
      "loss": 0.4432,
      "step": 3255
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.19273612751874944,
      "learning_rate": 4.0691581496125063e-07,
      "loss": 0.3775,
      "step": 3256
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.18697618780067982,
      "learning_rate": 3.984881607810187e-07,
      "loss": 0.378,
      "step": 3257
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.1999416093998594,
      "learning_rate": 3.9014852080220263e-07,
      "loss": 0.395,
      "step": 3258
    },
    {
      "epoch": 1.92,
      "grad_norm": 0.19999565370341477,
      "learning_rate": 3.81896902394252e-07,
      "loss": 0.4032,
      "step": 3259
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.17929856887972828,
      "learning_rate": 3.7373331284881187e-07,
      "loss": 0.3574,
      "step": 3260
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.17548502665274135,
      "learning_rate": 3.656577593797561e-07,
      "loss": 0.3425,
      "step": 3261
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.19698553973937918,
      "learning_rate": 3.576702491231432e-07,
      "loss": 0.386,
      "step": 3262
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.2033553308059875,
      "learning_rate": 3.497707891372382e-07,
      "loss": 0.4253,
      "step": 3263
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.20003502614490035,
      "learning_rate": 3.419593864025239e-07,
      "loss": 0.3575,
      "step": 3264
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.19362347610384475,
      "learning_rate": 3.342360478216344e-07,
      "loss": 0.336,
      "step": 3265
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.17962439452049042,
      "learning_rate": 3.2660078021941044e-07,
      "loss": 0.3472,
      "step": 3266
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.19842186984852644,
      "learning_rate": 3.1905359034284377e-07,
      "loss": 0.4032,
      "step": 3267
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.18901879026511056,
      "learning_rate": 3.11594484861133e-07,
      "loss": 0.3993,
      "step": 3268
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.1908851117660752,
      "learning_rate": 3.042234703655833e-07,
      "loss": 0.4165,
      "step": 3269
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.19852331588996355,
      "learning_rate": 2.969405533697178e-07,
      "loss": 0.4133,
      "step": 3270
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.19497530192734594,
      "learning_rate": 2.8974574030917747e-07,
      "loss": 0.4006,
      "step": 3271
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.1973867553405961,
      "learning_rate": 2.8263903754174316e-07,
      "loss": 0.4122,
      "step": 3272
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.21082580950152388,
      "learning_rate": 2.756204513473581e-07,
      "loss": 0.4206,
      "step": 3273
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.189060516990307,
      "learning_rate": 2.6868998792808353e-07,
      "loss": 0.4018,
      "step": 3274
    },
    {
      "epoch": 1.93,
      "grad_norm": 0.19300160359755386,
      "learning_rate": 2.6184765340810936e-07,
      "loss": 0.4001,
      "step": 3275
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.17830842913917283,
      "learning_rate": 2.550934538337768e-07,
      "loss": 0.3481,
      "step": 3276
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.18772421717848126,
      "learning_rate": 2.4842739517350055e-07,
      "loss": 0.3792,
      "step": 3277
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.18819704260435957,
      "learning_rate": 2.418494833178464e-07,
      "loss": 0.3672,
      "step": 3278
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.20097846173843922,
      "learning_rate": 2.3535972407947582e-07,
      "loss": 0.3814,
      "step": 3279
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.18030992920333294,
      "learning_rate": 2.2895812319313482e-07,
      "loss": 0.3622,
      "step": 3280
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.1789641489632021,
      "learning_rate": 2.2264468631569836e-07,
      "loss": 0.369,
      "step": 3281
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.19429333891323297,
      "learning_rate": 2.1641941902611483e-07,
      "loss": 0.3832,
      "step": 3282
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.2094149050617977,
      "learning_rate": 2.1028232682542836e-07,
      "loss": 0.4049,
      "step": 3283
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.1981759250210741,
      "learning_rate": 2.0423341513675643e-07,
      "loss": 0.4095,
      "step": 3284
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.19249954341521933,
      "learning_rate": 1.9827268930530106e-07,
      "loss": 0.3773,
      "step": 3285
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.2164196991626135,
      "learning_rate": 1.9240015459832671e-07,
      "loss": 0.4315,
      "step": 3286
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.1941807089426153,
      "learning_rate": 1.8661581620519342e-07,
      "loss": 0.3948,
      "step": 3287
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.19712036540070402,
      "learning_rate": 1.8091967923731246e-07,
      "loss": 0.3887,
      "step": 3288
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.19252783894527362,
      "learning_rate": 1.7531174872813526e-07,
      "loss": 0.3564,
      "step": 3289
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.19529647080333504,
      "learning_rate": 1.6979202963318674e-07,
      "loss": 0.3806,
      "step": 3290
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.18334403219059206,
      "learning_rate": 1.64360526830043e-07,
      "loss": 0.3665,
      "step": 3291
    },
    {
      "epoch": 1.94,
      "grad_norm": 0.18263929788467487,
      "learning_rate": 1.590172451183314e-07,
      "loss": 0.3843,
      "step": 3292
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.17630257086085244,
      "learning_rate": 1.537621892197083e-07,
      "loss": 0.3321,
      "step": 3293
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.2045431522577309,
      "learning_rate": 1.4859536377785922e-07,
      "loss": 0.4665,
      "step": 3294
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.20490350736046714,
      "learning_rate": 1.4351677335854296e-07,
      "loss": 0.4077,
      "step": 3295
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.18550515027639283,
      "learning_rate": 1.385264224495142e-07,
      "loss": 0.3445,
      "step": 3296
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.19548530453578192,
      "learning_rate": 1.3362431546055655e-07,
      "loss": 0.3709,
      "step": 3297
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.19785000290190885,
      "learning_rate": 1.288104567234827e-07,
      "loss": 0.3882,
      "step": 3298
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.1998704411519638,
      "learning_rate": 1.2408485049213438e-07,
      "loss": 0.4262,
      "step": 3299
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.18703231547572702,
      "learning_rate": 1.1944750094234902e-07,
      "loss": 0.376,
      "step": 3300
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.1891653601869006,
      "learning_rate": 1.1489841217197094e-07,
      "loss": 0.3859,
      "step": 3301
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.19006481114825025,
      "learning_rate": 1.1043758820088457e-07,
      "loss": 0.3897,
      "step": 3302
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.20352257802392035,
      "learning_rate": 1.0606503297094783e-07,
      "loss": 0.4363,
      "step": 3303
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.1961056459668248,
      "learning_rate": 1.0178075034601442e-07,
      "loss": 0.3704,
      "step": 3304
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.17970682190325277,
      "learning_rate": 9.758474411196706e-08,
      "loss": 0.3542,
      "step": 3305
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.19172082350713635,
      "learning_rate": 9.347701797665087e-08,
      "loss": 0.3978,
      "step": 3306
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.18947042563958155,
      "learning_rate": 8.945757556991785e-08,
      "loss": 0.3871,
      "step": 3307
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.18833139446786673,
      "learning_rate": 8.55264204436046e-08,
      "loss": 0.3766,
      "step": 3308
    },
    {
      "epoch": 1.95,
      "grad_norm": 0.1946344551939453,
      "learning_rate": 8.168355607152122e-08,
      "loss": 0.4039,
      "step": 3309
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.19530497465373736,
      "learning_rate": 7.792898584946251e-08,
      "loss": 0.4009,
      "step": 3310
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.19579497930103548,
      "learning_rate": 7.426271309520783e-08,
      "loss": 0.4132,
      "step": 3311
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.18688070611383054,
      "learning_rate": 7.068474104852118e-08,
      "loss": 0.3821,
      "step": 3312
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.19621019324623737,
      "learning_rate": 6.719507287110682e-08,
      "loss": 0.4158,
      "step": 3313
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.18754582228914488,
      "learning_rate": 6.37937116466758e-08,
      "loss": 0.3592,
      "step": 3314
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.200554814568333,
      "learning_rate": 6.048066038086831e-08,
      "loss": 0.4315,
      "step": 3315
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.18446685190761059,
      "learning_rate": 5.725592200130914e-08,
      "loss": 0.3737,
      "step": 3316
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.20120474113690331,
      "learning_rate": 5.4119499357585533e-08,
      "loss": 0.3744,
      "step": 3317
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.20872833620428924,
      "learning_rate": 5.107139522123605e-08,
      "loss": 0.4176,
      "step": 3318
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.18038584535807978,
      "learning_rate": 4.811161228576166e-08,
      "loss": 0.383,
      "step": 3319
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.194073111066338,
      "learning_rate": 4.524015316662577e-08,
      "loss": 0.3758,
      "step": 3320
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.20421077426568254,
      "learning_rate": 4.24570204011987e-08,
      "loss": 0.4027,
      "step": 3321
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.18616814783372337,
      "learning_rate": 3.9762216448868684e-08,
      "loss": 0.3928,
      "step": 3322
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.18864241678818086,
      "learning_rate": 3.715574369090869e-08,
      "loss": 0.3676,
      "step": 3323
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.21193430089157575,
      "learning_rate": 3.463760443057629e-08,
      "loss": 0.4087,
      "step": 3324
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.18544078878476022,
      "learning_rate": 3.2207800893069296e-08,
      "loss": 0.3837,
      "step": 3325
    },
    {
      "epoch": 1.96,
      "grad_norm": 0.18432018287612154,
      "learning_rate": 2.9866335225481324e-08,
      "loss": 0.3882,
      "step": 3326
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.20191828889615152,
      "learning_rate": 2.7613209496912816e-08,
      "loss": 0.4403,
      "step": 3327
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.18871253627610182,
      "learning_rate": 2.5448425698360034e-08,
      "loss": 0.4026,
      "step": 3328
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.19537379284916057,
      "learning_rate": 2.337198574274835e-08,
      "loss": 0.419,
      "step": 3329
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.18290952042136385,
      "learning_rate": 2.1383891464965554e-08,
      "loss": 0.4221,
      "step": 3330
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.19543074561388615,
      "learning_rate": 1.948414462181747e-08,
      "loss": 0.3414,
      "step": 3331
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.18511344425764348,
      "learning_rate": 1.7672746892039014e-08,
      "loss": 0.3627,
      "step": 3332
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.19153383455638,
      "learning_rate": 1.5949699876294244e-08,
      "loss": 0.3863,
      "step": 3333
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.1892980675269156,
      "learning_rate": 1.4315005097176314e-08,
      "loss": 0.409,
      "step": 3334
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.18954529984839885,
      "learning_rate": 1.2768663999207509e-08,
      "loss": 0.3934,
      "step": 3335
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.19572839344458184,
      "learning_rate": 1.1310677948839221e-08,
      "loss": 0.4073,
      "step": 3336
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.19384437473620314,
      "learning_rate": 9.94104823441866e-09,
      "loss": 0.3871,
      "step": 3337
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.21376883962625398,
      "learning_rate": 8.659776066255454e-09,
      "loss": 0.3812,
      "step": 3338
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.18417626940971296,
      "learning_rate": 7.466862576555045e-09,
      "loss": 0.3715,
      "step": 3339
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.20914923552989872,
      "learning_rate": 6.362308819451989e-09,
      "loss": 0.416,
      "step": 3340
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.19634958034455302,
      "learning_rate": 5.346115771009963e-09,
      "loss": 0.3908,
      "step": 3341
    },
    {
      "epoch": 1.97,
      "grad_norm": 0.19227965875344546,
      "learning_rate": 4.418284329188449e-09,
      "loss": 0.361,
      "step": 3342
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.19660676906902444,
      "learning_rate": 3.5788153138871515e-09,
      "loss": 0.3805,
      "step": 3343
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.20634066208383187,
      "learning_rate": 2.8277094669126868e-09,
      "loss": 0.4423,
      "step": 3344
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.18674281963839826,
      "learning_rate": 2.1649674519785836e-09,
      "loss": 0.3768,
      "step": 3345
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.19693858756590374,
      "learning_rate": 1.590589854749691e-09,
      "loss": 0.404,
      "step": 3346
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.17745029377636273,
      "learning_rate": 1.1045771827533635e-09,
      "loss": 0.391,
      "step": 3347
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.21627082820922133,
      "learning_rate": 7.069298654793777e-10,
      "loss": 0.4235,
      "step": 3348
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.19713330297784887,
      "learning_rate": 3.9764825430221866e-10,
      "loss": 0.4102,
      "step": 3349
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.1868699746011865,
      "learning_rate": 1.767326225365906e-10,
      "loss": 0.4033,
      "step": 3350
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.20307699817299302,
      "learning_rate": 4.4183165393008044e-11,
      "loss": 0.3578,
      "step": 3351
    },
    {
      "epoch": 1.98,
      "grad_norm": 0.19399685831381294,
      "learning_rate": 0.0,
      "loss": 0.3825,
      "step": 3352
    }
  ],
  "logging_steps": 1,
  "max_steps": 3352,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 838,
  "total_flos": 3318986116694016.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}