{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.995877233165369,
  "eval_steps": 500,
  "global_step": 1635,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.01832340815391663,
      "grad_norm": 0.06011037901043892,
      "learning_rate": 4e-05,
      "loss": 1.296,
      "step": 10
    },
    {
      "epoch": 0.03664681630783326,
      "grad_norm": 0.05856110155582428,
      "learning_rate": 8e-05,
      "loss": 1.3316,
      "step": 20
    },
    {
      "epoch": 0.054970224461749886,
      "grad_norm": 0.0607464499771595,
      "learning_rate": 0.00012,
      "loss": 1.2794,
      "step": 30
    },
    {
      "epoch": 0.07329363261566652,
      "grad_norm": 0.06632011383771896,
      "learning_rate": 0.00016,
      "loss": 1.3129,
      "step": 40
    },
    {
      "epoch": 0.09161704076958314,
      "grad_norm": 0.06631691753864288,
      "learning_rate": 0.0002,
      "loss": 1.2741,
      "step": 50
    },
    {
      "epoch": 0.10994044892349977,
      "grad_norm": 0.056466877460479736,
      "learning_rate": 0.00019998035748930052,
      "loss": 1.2717,
      "step": 60
    },
    {
      "epoch": 0.1282638570774164,
      "grad_norm": 0.05860245227813721,
      "learning_rate": 0.00019992143767376668,
      "loss": 1.2091,
      "step": 70
    },
    {
      "epoch": 0.14658726523133303,
      "grad_norm": 0.06553175300359726,
      "learning_rate": 0.00019982326370006058,
      "loss": 1.1926,
      "step": 80
    },
    {
      "epoch": 0.16491067338524965,
      "grad_norm": 0.07061401754617691,
      "learning_rate": 0.00019968587413584876,
      "loss": 1.1767,
      "step": 90
    },
    {
      "epoch": 0.1832340815391663,
      "grad_norm": 0.07183243334293365,
      "learning_rate": 0.000199509322954651,
      "loss": 1.1183,
      "step": 100
    },
    {
      "epoch": 0.2015574896930829,
      "grad_norm": 0.06944898515939713,
      "learning_rate": 0.00019929367951463655,
      "loss": 1.0868,
      "step": 110
    },
    {
      "epoch": 0.21988089784699955,
      "grad_norm": 0.06642703711986542,
      "learning_rate": 0.00019903902853137703,
      "loss": 1.048,
      "step": 120
    },
    {
      "epoch": 0.23820430600091616,
      "grad_norm": 0.06603793054819107,
      "learning_rate": 0.00019874547004456562,
      "loss": 1.0195,
      "step": 130
    },
    {
      "epoch": 0.2565277141548328,
      "grad_norm": 0.06488285213708878,
      "learning_rate": 0.00019841311937871675,
      "loss": 1.0014,
      "step": 140
    },
    {
      "epoch": 0.2748511223087494,
      "grad_norm": 0.05940372124314308,
      "learning_rate": 0.0001980421070978606,
      "loss": 0.9943,
      "step": 150
    },
    {
      "epoch": 0.29317453046266606,
      "grad_norm": 0.059967171400785446,
      "learning_rate": 0.00019763257895425113,
      "loss": 0.9349,
      "step": 160
    },
    {
      "epoch": 0.3114979386165827,
      "grad_norm": 0.0554397851228714,
      "learning_rate": 0.0001971846958311071,
      "loss": 0.9045,
      "step": 170
    },
    {
      "epoch": 0.3298213467704993,
      "grad_norm": 0.055131904780864716,
      "learning_rate": 0.00019669863367940935,
      "loss": 0.8799,
      "step": 180
    },
    {
      "epoch": 0.34814475492441593,
      "grad_norm": 0.04358826205134392,
      "learning_rate": 0.00019617458344877816,
      "loss": 0.8504,
      "step": 190
    },
    {
      "epoch": 0.3664681630783326,
      "grad_norm": 0.04535752162337303,
      "learning_rate": 0.00019561275101245883,
      "loss": 0.828,
      "step": 200
    },
    {
      "epoch": 0.3847915712322492,
      "grad_norm": 0.04672062397003174,
      "learning_rate": 0.00019501335708644414,
      "loss": 0.8114,
      "step": 210
    },
    {
      "epoch": 0.4031149793861658,
      "grad_norm": 0.04161343351006508,
      "learning_rate": 0.00019437663714276618,
      "loss": 0.846,
      "step": 220
    },
    {
      "epoch": 0.42143838754008245,
      "grad_norm": 0.03887801244854927,
      "learning_rate": 0.0001937028413169911,
      "loss": 0.7911,
      "step": 230
    },
    {
      "epoch": 0.4397617956939991,
      "grad_norm": 0.03659196197986603,
      "learning_rate": 0.00019299223430995323,
      "loss": 0.7669,
      "step": 240
    },
    {
      "epoch": 0.45808520384791573,
      "grad_norm": 0.03447382524609566,
      "learning_rate": 0.00019224509528376738,
      "loss": 0.782,
      "step": 250
    },
    {
      "epoch": 0.4764086120018323,
      "grad_norm": 0.028725607320666313,
      "learning_rate": 0.00019146171775215982,
      "loss": 0.7183,
      "step": 260
    },
    {
      "epoch": 0.49473202015574896,
      "grad_norm": 0.027673941105604172,
      "learning_rate": 0.0001906424094651615,
      "loss": 0.7018,
      "step": 270
    },
    {
      "epoch": 0.5130554283096656,
      "grad_norm": 0.10227353870868683,
      "learning_rate": 0.00018978749228820826,
      "loss": 0.72,
      "step": 280
    },
    {
      "epoch": 0.5313788364635822,
      "grad_norm": 0.022650673985481262,
      "learning_rate": 0.00018889730207569607,
      "loss": 0.6936,
      "step": 290
    },
    {
      "epoch": 0.5497022446174988,
      "grad_norm": 0.023469725623726845,
      "learning_rate": 0.00018797218853904037,
      "loss": 0.6765,
      "step": 300
    },
    {
      "epoch": 0.5680256527714155,
      "grad_norm": 0.018101360648870468,
      "learning_rate": 0.000187012515109292,
      "loss": 0.6799,
      "step": 310
    },
    {
      "epoch": 0.5863490609253321,
      "grad_norm": 0.016794538125395775,
      "learning_rate": 0.00018601865879436317,
      "loss": 0.6732,
      "step": 320
    },
    {
      "epoch": 0.6046724690792488,
      "grad_norm": 0.017263714224100113,
      "learning_rate": 0.00018499101003091993,
      "loss": 0.6695,
      "step": 330
    },
    {
      "epoch": 0.6229958772331654,
      "grad_norm": 0.016381224617362022,
      "learning_rate": 0.0001839299725309989,
      "loss": 0.6928,
      "step": 340
    },
    {
      "epoch": 0.641319285387082,
      "grad_norm": 0.015325487591326237,
      "learning_rate": 0.00018283596312340891,
      "loss": 0.6622,
      "step": 350
    },
    {
      "epoch": 0.6596426935409986,
      "grad_norm": 0.014056784100830555,
      "learning_rate": 0.0001817094115899799,
      "loss": 0.7612,
      "step": 360
    },
    {
      "epoch": 0.6779661016949152,
      "grad_norm": 0.015031951479613781,
      "learning_rate": 0.00018055076049672283,
      "loss": 0.6596,
      "step": 370
    },
    {
      "epoch": 0.6962895098488319,
      "grad_norm": 0.01640532910823822,
      "learning_rate": 0.00017936046501996762,
      "loss": 0.6837,
      "step": 380
    },
    {
      "epoch": 0.7146129180027485,
      "grad_norm": 0.01830482669174671,
      "learning_rate": 0.000178138992767547,
      "loss": 0.6812,
      "step": 390
    },
    {
      "epoch": 0.7329363261566652,
      "grad_norm": 0.0472831092774868,
      "learning_rate": 0.00017688682359509678,
      "loss": 0.674,
      "step": 400
    },
    {
      "epoch": 0.7512597343105818,
      "grad_norm": 0.012456170283257961,
      "learning_rate": 0.00017560444941754427,
      "loss": 0.6518,
      "step": 410
    },
    {
      "epoch": 0.7695831424644984,
      "grad_norm": 0.01401186641305685,
      "learning_rate": 0.0001742923740158595,
      "loss": 0.6418,
      "step": 420
    },
    {
      "epoch": 0.7879065506184151,
      "grad_norm": 0.015530922450125217,
      "learning_rate": 0.00017295111283914487,
      "loss": 0.6465,
      "step": 430
    },
    {
      "epoch": 0.8062299587723316,
      "grad_norm": 0.01402275450527668,
      "learning_rate": 0.0001715811928021406,
      "loss": 0.6642,
      "step": 440
    },
    {
      "epoch": 0.8245533669262483,
      "grad_norm": 0.01176263578236103,
      "learning_rate": 0.0001701831520782264,
      "loss": 0.6336,
      "step": 450
    },
    {
      "epoch": 0.8428767750801649,
      "grad_norm": 0.013003438711166382,
      "learning_rate": 0.00016875753988799982,
      "loss": 0.6469,
      "step": 460
    },
    {
      "epoch": 0.8612001832340815,
      "grad_norm": 0.011523702181875706,
      "learning_rate": 0.00016730491628351487,
      "loss": 0.6434,
      "step": 470
    },
    {
      "epoch": 0.8795235913879982,
      "grad_norm": 0.011919384822249413,
      "learning_rate": 0.00016582585192826543,
      "loss": 0.6588,
      "step": 480
    },
    {
      "epoch": 0.8978469995419148,
      "grad_norm": 0.013994649983942509,
      "learning_rate": 0.00016432092787299992,
      "loss": 0.6315,
      "step": 490
    },
    {
      "epoch": 0.9161704076958315,
      "grad_norm": 0.013580686412751675,
      "learning_rate": 0.00016279073532745553,
      "loss": 0.6782,
      "step": 500
    },
    {
      "epoch": 0.934493815849748,
      "grad_norm": 0.01364163402467966,
      "learning_rate": 0.00016123587542810118,
      "loss": 0.6334,
      "step": 510
    },
    {
      "epoch": 0.9528172240036646,
      "grad_norm": 0.013080372475087643,
      "learning_rate": 0.0001596569590019811,
      "loss": 0.6233,
      "step": 520
    },
    {
      "epoch": 0.9711406321575813,
      "grad_norm": 0.056398555636405945,
      "learning_rate": 0.00015805460632675112,
      "loss": 0.6557,
      "step": 530
    },
    {
      "epoch": 0.9894640403114979,
      "grad_norm": 0.012467793188989162,
      "learning_rate": 0.00015642944688700264,
      "loss": 0.6315,
      "step": 540
    },
    {
      "epoch": 1.0077874484654146,
      "grad_norm": 0.012495579198002815,
      "learning_rate": 0.00015478211912696929,
      "loss": 0.6177,
      "step": 550
    },
    {
      "epoch": 1.026110856619331,
      "grad_norm": 0.010633349418640137,
      "learning_rate": 0.00015311327019971413,
      "loss": 0.644,
      "step": 560
    },
    {
      "epoch": 1.0444342647732479,
      "grad_norm": 0.012223353609442711,
      "learning_rate": 0.00015142355571289533,
      "loss": 0.6502,
      "step": 570
    },
    {
      "epoch": 1.0627576729271644,
      "grad_norm": 0.012305443175137043,
      "learning_rate": 0.00014971363947121065,
      "loss": 0.6185,
      "step": 580
    },
    {
      "epoch": 1.0810810810810811,
      "grad_norm": 0.017129750922322273,
      "learning_rate": 0.0001479841932156215,
      "loss": 0.6154,
      "step": 590
    },
    {
      "epoch": 1.0994044892349977,
      "grad_norm": 0.013129614293575287,
      "learning_rate": 0.0001462358963594595,
      "loss": 0.614,
      "step": 600
    },
    {
      "epoch": 1.1177278973889144,
      "grad_norm": 0.01199612207710743,
      "learning_rate": 0.00014446943572151867,
      "loss": 0.6128,
      "step": 610
    },
    {
      "epoch": 1.136051305542831,
      "grad_norm": 0.012518757954239845,
      "learning_rate": 0.00014268550525623868,
      "loss": 0.6169,
      "step": 620
    },
    {
      "epoch": 1.1543747136967477,
      "grad_norm": 0.01321893185377121,
      "learning_rate": 0.00014088480578108454,
      "loss": 0.6402,
      "step": 630
    },
    {
      "epoch": 1.1726981218506642,
      "grad_norm": 0.012497123330831528,
      "learning_rate": 0.00013906804470123038,
      "loss": 0.613,
      "step": 640
    },
    {
      "epoch": 1.1910215300045808,
      "grad_norm": 0.01103185210376978,
      "learning_rate": 0.00013723593573165523,
      "loss": 0.6114,
      "step": 650
    },
    {
      "epoch": 1.2093449381584975,
      "grad_norm": 0.012833209708333015,
      "learning_rate": 0.00013538919861675979,
      "loss": 0.617,
      "step": 660
    },
    {
      "epoch": 1.227668346312414,
      "grad_norm": 0.058991171419620514,
      "learning_rate": 0.0001335285588476148,
      "loss": 0.6298,
      "step": 670
    },
    {
      "epoch": 1.2459917544663308,
      "grad_norm": 0.013424506410956383,
      "learning_rate": 0.00013165474737695184,
      "loss": 0.6488,
      "step": 680
    },
    {
      "epoch": 1.2643151626202473,
      "grad_norm": 0.01241598092019558,
      "learning_rate": 0.00012976850033200805,
      "loss": 0.6088,
      "step": 690
    },
    {
      "epoch": 1.2826385707741639,
      "grad_norm": 0.012560844421386719,
      "learning_rate": 0.00012787055872533865,
      "loss": 0.6032,
      "step": 700
    },
    {
      "epoch": 1.3009619789280806,
      "grad_norm": 0.010990115813910961,
      "learning_rate": 0.00012596166816371005,
      "loss": 0.6282,
      "step": 710
    },
    {
      "epoch": 1.3192853870819974,
      "grad_norm": 0.01151216309517622,
      "learning_rate": 0.00012404257855518782,
      "loss": 0.6297,
      "step": 720
    },
    {
      "epoch": 1.337608795235914,
      "grad_norm": 0.011924243532121181,
      "learning_rate": 0.0001221140438145353,
      "loss": 0.6044,
      "step": 730
    },
    {
      "epoch": 1.3559322033898304,
      "grad_norm": 0.013133584521710873,
      "learning_rate": 0.00012017682156703807,
      "loss": 0.6107,
      "step": 740
    },
    {
      "epoch": 1.3742556115437472,
      "grad_norm": 0.014631664380431175,
      "learning_rate": 0.00011823167285087063,
      "loss": 0.6213,
      "step": 750
    },
    {
      "epoch": 1.3925790196976637,
      "grad_norm": 0.011716130189597607,
      "learning_rate": 0.00011627936181812234,
      "loss": 0.6179,
      "step": 760
    },
    {
      "epoch": 1.4109024278515805,
      "grad_norm": 0.013568080961704254,
      "learning_rate": 0.00011432065543460015,
      "loss": 0.5965,
      "step": 770
    },
    {
      "epoch": 1.429225836005497,
      "grad_norm": 0.012749516405165195,
      "learning_rate": 0.00011235632317852605,
      "loss": 0.6128,
      "step": 780
    },
    {
      "epoch": 1.4475492441594136,
      "grad_norm": 0.011930575594305992,
      "learning_rate": 0.00011038713673824715,
      "loss": 0.6117,
      "step": 790
    },
    {
      "epoch": 1.4658726523133303,
      "grad_norm": 0.013386845588684082,
      "learning_rate": 0.00010841386970907785,
      "loss": 0.6186,
      "step": 800
    },
    {
      "epoch": 1.4841960604672468,
      "grad_norm": 0.012542261742055416,
      "learning_rate": 0.00010643729728939292,
      "loss": 0.5909,
      "step": 810
    },
    {
      "epoch": 1.5025194686211636,
      "grad_norm": 0.010874781757593155,
      "learning_rate": 0.0001044581959760903,
      "loss": 0.5903,
      "step": 820
    },
    {
      "epoch": 1.5208428767750801,
      "grad_norm": 0.010801080614328384,
      "learning_rate": 0.00010247734325954447,
      "loss": 0.5929,
      "step": 830
    },
    {
      "epoch": 1.5391662849289967,
      "grad_norm": 0.012022151611745358,
      "learning_rate": 0.00010049551731816902,
      "loss": 0.6117,
      "step": 840
    },
    {
      "epoch": 1.5574896930829134,
      "grad_norm": 0.011683526448905468,
      "learning_rate": 9.851349671270909e-05,
      "loss": 0.6283,
      "step": 850
    },
    {
      "epoch": 1.5758131012368302,
      "grad_norm": 0.01242094673216343,
      "learning_rate": 9.653206008038364e-05,
      "loss": 0.5901,
      "step": 860
    },
    {
      "epoch": 1.5941365093907467,
      "grad_norm": 0.011935061775147915,
      "learning_rate": 9.455198582899774e-05,
      "loss": 0.5848,
      "step": 870
    },
    {
      "epoch": 1.6124599175446632,
      "grad_norm": 0.01208607666194439,
      "learning_rate": 9.257405183114473e-05,
      "loss": 0.5912,
      "step": 880
    },
    {
      "epoch": 1.63078332569858,
      "grad_norm": 0.01227467879652977,
      "learning_rate": 9.059903511861891e-05,
      "loss": 0.5859,
      "step": 890
    },
    {
      "epoch": 1.6491067338524965,
      "grad_norm": 0.0517101027071476,
      "learning_rate": 8.862771157715847e-05,
      "loss": 0.607,
      "step": 900
    },
    {
      "epoch": 1.6674301420064133,
      "grad_norm": 0.010776874609291553,
      "learning_rate": 8.666085564163852e-05,
      "loss": 0.5788,
      "step": 910
    },
    {
      "epoch": 1.6857535501603298,
      "grad_norm": 0.013487796299159527,
      "learning_rate": 8.469923999183411e-05,
      "loss": 0.5766,
      "step": 920
    },
    {
      "epoch": 1.7040769583142463,
      "grad_norm": 0.011671481654047966,
      "learning_rate": 8.274363524887315e-05,
      "loss": 0.5976,
      "step": 930
    },
    {
      "epoch": 1.722400366468163,
      "grad_norm": 0.01118433102965355,
      "learning_rate": 8.079480967249737e-05,
      "loss": 0.6021,
      "step": 940
    },
    {
      "epoch": 1.7407237746220798,
      "grad_norm": 0.013788875192403793,
      "learning_rate": 7.88535288592514e-05,
      "loss": 0.579,
      "step": 950
    },
    {
      "epoch": 1.7590471827759964,
      "grad_norm": 0.013310333713889122,
      "learning_rate": 7.692055544171823e-05,
      "loss": 0.5979,
      "step": 960
    },
    {
      "epoch": 1.777370590929913,
      "grad_norm": 0.053230684250593185,
      "learning_rate": 7.49966487889185e-05,
      "loss": 0.5906,
      "step": 970
    },
    {
      "epoch": 1.7956939990838294,
      "grad_norm": 0.013503102585673332,
      "learning_rate": 7.308256470799256e-05,
      "loss": 0.6061,
      "step": 980
    },
    {
      "epoch": 1.8140174072377462,
      "grad_norm": 0.01030020508915186,
      "learning_rate": 7.117905514728107e-05,
      "loss": 0.5776,
      "step": 990
    },
    {
      "epoch": 1.832340815391663,
      "grad_norm": 0.01204043161123991,
      "learning_rate": 6.928686790092235e-05,
      "loss": 0.584,
      "step": 1000
    },
    {
      "epoch": 1.8506642235455795,
      "grad_norm": 0.011951706372201443,
      "learning_rate": 6.740674631508105e-05,
      "loss": 0.5693,
      "step": 1010
    },
    {
      "epoch": 1.868987631699496,
      "grad_norm": 0.011565761640667915,
      "learning_rate": 6.553942899592447e-05,
      "loss": 0.594,
      "step": 1020
    },
    {
      "epoch": 1.8873110398534128,
      "grad_norm": 0.011589662171900272,
      "learning_rate": 6.368564951946103e-05,
      "loss": 0.5961,
      "step": 1030
    },
    {
      "epoch": 1.9056344480073295,
      "grad_norm": 0.011061927303671837,
      "learning_rate": 6.184613614335476e-05,
      "loss": 0.5906,
      "step": 1040
    },
    {
      "epoch": 1.923957856161246,
      "grad_norm": 0.011009737849235535,
      "learning_rate": 6.002161152082909e-05,
      "loss": 0.5703,
      "step": 1050
    },
    {
      "epoch": 1.9422812643151626,
      "grad_norm": 0.012721202336251736,
      "learning_rate": 5.8212792416772374e-05,
      "loss": 0.5728,
      "step": 1060
    },
    {
      "epoch": 1.960604672469079,
      "grad_norm": 0.014589471742510796,
      "learning_rate": 5.6420389426156814e-05,
      "loss": 0.5722,
      "step": 1070
    },
    {
      "epoch": 1.9789280806229959,
      "grad_norm": 0.011400967836380005,
      "learning_rate": 5.464510669488073e-05,
      "loss": 0.5699,
      "step": 1080
    },
    {
      "epoch": 1.9972514887769126,
      "grad_norm": 0.011737444438040257,
      "learning_rate": 5.288764164314499e-05,
      "loss": 0.5872,
      "step": 1090
    },
    {
      "epoch": 2.015574896930829,
      "grad_norm": 0.011182826943695545,
      "learning_rate": 5.1148684691471304e-05,
      "loss": 0.5722,
      "step": 1100
    },
    {
      "epoch": 2.0338983050847457,
      "grad_norm": 0.011314311996102333,
      "learning_rate": 4.942891898947024e-05,
      "loss": 0.5697,
      "step": 1110
    },
    {
      "epoch": 2.052221713238662,
      "grad_norm": 0.04409461468458176,
      "learning_rate": 4.772902014746583e-05,
      "loss": 0.6125,
      "step": 1120
    },
    {
      "epoch": 2.070545121392579,
      "grad_norm": 0.01187494769692421,
      "learning_rate": 4.6049655971081916e-05,
      "loss": 0.5878,
      "step": 1130
    },
    {
      "epoch": 2.0888685295464957,
      "grad_norm": 0.013561342842876911,
      "learning_rate": 4.439148619889453e-05,
      "loss": 0.5681,
      "step": 1140
    },
    {
      "epoch": 2.1071919377004122,
      "grad_norm": 0.01049109362065792,
      "learning_rate": 4.2755162243253554e-05,
      "loss": 0.5731,
      "step": 1150
    },
    {
      "epoch": 2.1255153458543288,
      "grad_norm": 0.012508846819400787,
      "learning_rate": 4.114132693437511e-05,
      "loss": 0.5683,
      "step": 1160
    },
    {
      "epoch": 2.1438387540082458,
      "grad_norm": 0.011165381409227848,
      "learning_rate": 3.955061426780562e-05,
      "loss": 0.6208,
      "step": 1170
    },
    {
      "epoch": 2.1621621621621623,
      "grad_norm": 0.010869491845369339,
      "learning_rate": 3.7983649155356536e-05,
      "loss": 0.5706,
      "step": 1180
    },
    {
      "epoch": 2.180485570316079,
      "grad_norm": 0.012921607121825218,
      "learning_rate": 3.644104717960761e-05,
      "loss": 0.5615,
      "step": 1190
    },
    {
      "epoch": 2.1988089784699953,
      "grad_norm": 0.010369219817221165,
      "learning_rate": 3.492341435207509e-05,
      "loss": 0.5602,
      "step": 1200
    },
    {
      "epoch": 2.217132386623912,
      "grad_norm": 0.05246191471815109,
      "learning_rate": 3.343134687514007e-05,
      "loss": 0.5924,
      "step": 1210
    },
    {
      "epoch": 2.235455794777829,
      "grad_norm": 0.04603949189186096,
      "learning_rate": 3.1965430907830166e-05,
      "loss": 0.5805,
      "step": 1220
    },
    {
      "epoch": 2.2537792029317454,
      "grad_norm": 0.01212139893323183,
      "learning_rate": 3.0526242335546716e-05,
      "loss": 0.5783,
      "step": 1230
    },
    {
      "epoch": 2.272102611085662,
      "grad_norm": 0.010512073524296284,
      "learning_rate": 2.911434654382842e-05,
      "loss": 0.5734,
      "step": 1240
    },
    {
      "epoch": 2.2904260192395784,
      "grad_norm": 0.011081124655902386,
      "learning_rate": 2.773029819623917e-05,
      "loss": 0.5688,
      "step": 1250
    },
    {
      "epoch": 2.3087494273934954,
      "grad_norm": 0.013736708089709282,
      "learning_rate": 2.6374641016468416e-05,
      "loss": 0.6005,
      "step": 1260
    },
    {
      "epoch": 2.327072835547412,
      "grad_norm": 0.010449289344251156,
      "learning_rate": 2.5047907574729456e-05,
      "loss": 0.5788,
      "step": 1270
    },
    {
      "epoch": 2.3453962437013285,
      "grad_norm": 0.017625920474529266,
      "learning_rate": 2.375061907853867e-05,
      "loss": 0.5613,
      "step": 1280
    },
    {
      "epoch": 2.363719651855245,
      "grad_norm": 0.012163090519607067,
      "learning_rate": 2.24832851679594e-05,
      "loss": 0.5709,
      "step": 1290
    },
    {
      "epoch": 2.3820430600091616,
      "grad_norm": 0.010039297863841057,
      "learning_rate": 2.1246403715389674e-05,
      "loss": 0.5677,
      "step": 1300
    },
    {
      "epoch": 2.4003664681630785,
      "grad_norm": 0.012111610732972622,
      "learning_rate": 2.0040460629972792e-05,
      "loss": 0.5898,
      "step": 1310
    },
    {
      "epoch": 2.418689876316995,
      "grad_norm": 0.012733533047139645,
      "learning_rate": 1.8865929666707904e-05,
      "loss": 0.5982,
      "step": 1320
    },
    {
      "epoch": 2.4370132844709116,
      "grad_norm": 0.011774188838899136,
      "learning_rate": 1.7723272240335265e-05,
      "loss": 0.5649,
      "step": 1330
    },
    {
      "epoch": 2.455336692624828,
      "grad_norm": 0.013299621641635895,
      "learning_rate": 1.6612937244069328e-05,
      "loss": 0.5775,
      "step": 1340
    },
    {
      "epoch": 2.4736601007787447,
      "grad_norm": 0.012371544726192951,
      "learning_rate": 1.5535360873251027e-05,
      "loss": 0.5657,
      "step": 1350
    },
    {
      "epoch": 2.4919835089326616,
      "grad_norm": 0.013111775740981102,
      "learning_rate": 1.4490966453988187e-05,
      "loss": 0.5814,
      "step": 1360
    },
    {
      "epoch": 2.510306917086578,
      "grad_norm": 0.011858658865094185,
      "learning_rate": 1.3480164276851926e-05,
      "loss": 0.562,
      "step": 1370
    },
    {
      "epoch": 2.5286303252404947,
      "grad_norm": 0.017958352342247963,
      "learning_rate": 1.2503351435693811e-05,
      "loss": 0.5882,
      "step": 1380
    },
    {
      "epoch": 2.5469537333944112,
      "grad_norm": 0.012084643356502056,
      "learning_rate": 1.1560911671647535e-05,
      "loss": 0.6038,
      "step": 1390
    },
    {
      "epoch": 2.5652771415483278,
      "grad_norm": 0.012955896556377411,
      "learning_rate": 1.0653215222376045e-05,
      "loss": 0.5603,
      "step": 1400
    },
    {
      "epoch": 2.5836005497022447,
      "grad_norm": 0.010830081067979336,
      "learning_rate": 9.78061867662372e-06,
      "loss": 0.5596,
      "step": 1410
    },
    {
      "epoch": 2.6019239578561613,
      "grad_norm": 0.009481418877840042,
      "learning_rate": 8.943464834130289e-06,
      "loss": 0.567,
      "step": 1420
    },
    {
      "epoch": 2.620247366010078,
      "grad_norm": 0.009960277937352657,
      "learning_rate": 8.14208257096185e-06,
      "loss": 0.5627,
      "step": 1430
    },
    {
      "epoch": 2.6385707741639948,
      "grad_norm": 0.011430955491960049,
      "learning_rate": 7.376786710312045e-06,
      "loss": 0.5642,
      "step": 1440
    },
    {
      "epoch": 2.6568941823179113,
      "grad_norm": 0.010986811481416225,
      "learning_rate": 6.647877898823462e-06,
      "loss": 0.5637,
      "step": 1450
    },
    {
      "epoch": 2.675217590471828,
      "grad_norm": 0.011870593763887882,
      "learning_rate": 5.955642488478675e-06,
      "loss": 0.5611,
      "step": 1460
    },
    {
      "epoch": 2.6935409986257444,
      "grad_norm": 0.013593550771474838,
      "learning_rate": 5.300352424106781e-06,
      "loss": 0.5611,
      "step": 1470
    },
    {
      "epoch": 2.711864406779661,
      "grad_norm": 0.01099549513310194,
      "learning_rate": 4.682265136549768e-06,
      "loss": 0.5616,
      "step": 1480
    },
    {
      "epoch": 2.7301878149335774,
      "grad_norm": 0.010328186675906181,
      "learning_rate": 4.1016234415308555e-06,
      "loss": 0.5648,
      "step": 1490
    },
    {
      "epoch": 2.7485112230874944,
      "grad_norm": 0.012961314059793949,
      "learning_rate": 3.5586554442641583e-06,
      "loss": 0.5791,
      "step": 1500
    },
    {
      "epoch": 2.766834631241411,
      "grad_norm": 0.010046335868537426,
      "learning_rate": 3.0535744498435993e-06,
      "loss": 0.5641,
      "step": 1510
    },
    {
      "epoch": 2.7851580393953275,
      "grad_norm": 0.010482456535100937,
      "learning_rate": 2.5865788794459223e-06,
      "loss": 0.5629,
      "step": 1520
    },
    {
      "epoch": 2.803481447549244,
      "grad_norm": 0.015615337528288364,
      "learning_rate": 2.1578521923808713e-06,
      "loss": 0.5698,
      "step": 1530
    },
    {
      "epoch": 2.821804855703161,
      "grad_norm": 0.012672988697886467,
      "learning_rate": 1.767562814019208e-06,
      "loss": 0.5662,
      "step": 1540
    },
    {
      "epoch": 2.8401282638570775,
      "grad_norm": 0.011427894234657288,
      "learning_rate": 1.41586406962676e-06,
      "loss": 0.5991,
      "step": 1550
    },
    {
      "epoch": 2.858451672010994,
      "grad_norm": 0.011262394487857819,
      "learning_rate": 1.1028941241305046e-06,
      "loss": 0.562,
      "step": 1560
    },
    {
      "epoch": 2.8767750801649106,
      "grad_norm": 0.011531077325344086,
      "learning_rate": 8.287759278405083e-07,
      "loss": 0.5806,
      "step": 1570
    },
    {
      "epoch": 2.895098488318827,
      "grad_norm": 0.01074717566370964,
      "learning_rate": 5.936171681488301e-07,
      "loss": 0.5649,
      "step": 1580
    },
    {
      "epoch": 2.913421896472744,
      "grad_norm": 0.011177337728440762,
      "learning_rate": 3.9751022722455123e-07,
      "loss": 0.5719,
      "step": 1590
    },
    {
      "epoch": 2.9317453046266606,
      "grad_norm": 0.013694767840206623,
      "learning_rate": 2.4053214572137275e-07,
      "loss": 0.58,
      "step": 1600
    },
    {
      "epoch": 2.950068712780577,
      "grad_norm": 0.01134565845131874,
      "learning_rate": 1.2274459251220282e-07,
      "loss": 0.5592,
      "step": 1610
    },
    {
      "epoch": 2.9683921209344937,
      "grad_norm": 0.009969279170036316,
      "learning_rate": 4.4193840462536384e-08,
      "loss": 0.5669,
      "step": 1620
    },
    {
      "epoch": 2.9867155290884106,
      "grad_norm": 0.010615854524075985,
      "learning_rate": 4.9107482521071335e-09,
      "loss": 0.5561,
      "step": 1630
    }
  ],
  "logging_steps": 10,
  "max_steps": 1635,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.2063088450732032e+18,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}