{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9971671388101983,
  "eval_steps": 500,
  "global_step": 176,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0056657223796034,
      "grad_norm": 23.928797485562242,
      "learning_rate": 5.555555555555555e-07,
      "loss": 1.3301,
      "step": 1
    },
    {
      "epoch": 0.028328611898016998,
      "grad_norm": 9.168045171641722,
      "learning_rate": 2.7777777777777783e-06,
      "loss": 1.2615,
      "step": 5
    },
    {
      "epoch": 0.056657223796033995,
      "grad_norm": 3.503193912103178,
      "learning_rate": 5.555555555555557e-06,
      "loss": 1.0082,
      "step": 10
    },
    {
      "epoch": 0.08498583569405099,
      "grad_norm": 2.4268821140139236,
      "learning_rate": 8.333333333333334e-06,
      "loss": 0.8627,
      "step": 15
    },
    {
      "epoch": 0.11331444759206799,
      "grad_norm": 2.510061349082153,
      "learning_rate": 9.99604698613651e-06,
      "loss": 0.8289,
      "step": 20
    },
    {
      "epoch": 0.141643059490085,
      "grad_norm": 2.5610840886165875,
      "learning_rate": 9.951647332362511e-06,
      "loss": 0.803,
      "step": 25
    },
    {
      "epoch": 0.16997167138810199,
      "grad_norm": 2.6655687674841735,
      "learning_rate": 9.85834670020205e-06,
      "loss": 0.7805,
      "step": 30
    },
    {
      "epoch": 0.19830028328611898,
      "grad_norm": 2.3795452616040698,
      "learning_rate": 9.717066498610673e-06,
      "loss": 0.7513,
      "step": 35
    },
    {
      "epoch": 0.22662889518413598,
      "grad_norm": 2.423822398896064,
      "learning_rate": 9.529201968327618e-06,
      "loss": 0.7295,
      "step": 40
    },
    {
      "epoch": 0.254957507082153,
      "grad_norm": 2.397535445659708,
      "learning_rate": 9.296608402898306e-06,
      "loss": 0.699,
      "step": 45
    },
    {
      "epoch": 0.28328611898017,
      "grad_norm": 2.717544074024275,
      "learning_rate": 9.021582826353825e-06,
      "loss": 0.6984,
      "step": 50
    },
    {
      "epoch": 0.311614730878187,
      "grad_norm": 2.5255796668705335,
      "learning_rate": 8.706841308493092e-06,
      "loss": 0.6602,
      "step": 55
    },
    {
      "epoch": 0.33994334277620397,
      "grad_norm": 2.0707712756959604,
      "learning_rate": 8.355492141795185e-06,
      "loss": 0.6637,
      "step": 60
    },
    {
      "epoch": 0.36827195467422097,
      "grad_norm": 2.2696475360730135,
      "learning_rate": 7.971005144858554e-06,
      "loss": 0.6346,
      "step": 65
    },
    {
      "epoch": 0.39660056657223797,
      "grad_norm": 2.605539930017495,
      "learning_rate": 7.5571773955171124e-06,
      "loss": 0.625,
      "step": 70
    },
    {
      "epoch": 0.42492917847025496,
      "grad_norm": 2.291369766178497,
      "learning_rate": 7.118095732042643e-06,
      "loss": 0.5957,
      "step": 75
    },
    {
      "epoch": 0.45325779036827196,
      "grad_norm": 2.3308971412073367,
      "learning_rate": 6.65809639276034e-06,
      "loss": 0.608,
      "step": 80
    },
    {
      "epoch": 0.48158640226628896,
      "grad_norm": 2.3052197500819758,
      "learning_rate": 6.181722192664526e-06,
      "loss": 0.5965,
      "step": 85
    },
    {
      "epoch": 0.509915014164306,
      "grad_norm": 2.186441360279387,
      "learning_rate": 5.693677659945343e-06,
      "loss": 0.5714,
      "step": 90
    },
    {
      "epoch": 0.5382436260623229,
      "grad_norm": 2.0459304023279654,
      "learning_rate": 5.19878257548463e-06,
      "loss": 0.571,
      "step": 95
    },
    {
      "epoch": 0.56657223796034,
      "grad_norm": 2.111785511247743,
      "learning_rate": 4.701924374150901e-06,
      "loss": 0.5575,
      "step": 100
    },
    {
      "epoch": 0.5949008498583569,
      "grad_norm": 2.0553285107851336,
      "learning_rate": 4.2080098779639255e-06,
      "loss": 0.5573,
      "step": 105
    },
    {
      "epoch": 0.623229461756374,
      "grad_norm": 2.058760374639204,
      "learning_rate": 3.721916837797627e-06,
      "loss": 0.5227,
      "step": 110
    },
    {
      "epoch": 0.6515580736543909,
      "grad_norm": 2.204312230150294,
      "learning_rate": 3.2484457621808787e-06,
      "loss": 0.5294,
      "step": 115
    },
    {
      "epoch": 0.6798866855524079,
      "grad_norm": 1.9646053728129644,
      "learning_rate": 2.792272508920443e-06,
      "loss": 0.5231,
      "step": 120
    },
    {
      "epoch": 0.7082152974504249,
      "grad_norm": 2.2039971826045304,
      "learning_rate": 2.3579021077369047e-06,
      "loss": 0.5124,
      "step": 125
    },
    {
      "epoch": 0.7365439093484419,
      "grad_norm": 2.173776740721594,
      "learning_rate": 1.949624269947378e-06,
      "loss": 0.5126,
      "step": 130
    },
    {
      "epoch": 0.7648725212464589,
      "grad_norm": 2.03790957114551,
      "learning_rate": 1.5714710245679348e-06,
      "loss": 0.4943,
      "step": 135
    },
    {
      "epoch": 0.7932011331444759,
      "grad_norm": 1.985721888347746,
      "learning_rate": 1.227176899208849e-06,
      "loss": 0.4883,
      "step": 140
    },
    {
      "epoch": 0.8215297450424929,
      "grad_norm": 2.1787623191912737,
      "learning_rate": 9.201420390041965e-07,
      "loss": 0.4987,
      "step": 145
    },
    {
      "epoch": 0.8498583569405099,
      "grad_norm": 2.059217292081842,
      "learning_rate": 6.533986278020876e-07,
      "loss": 0.4829,
      "step": 150
    },
    {
      "epoch": 0.8781869688385269,
      "grad_norm": 2.0422032671448997,
      "learning_rate": 4.2958094322982703e-07,
      "loss": 0.4962,
      "step": 155
    },
    {
      "epoch": 0.9065155807365439,
      "grad_norm": 2.0055105526222974,
      "learning_rate": 2.5089934136108665e-07,
      "loss": 0.4896,
      "step": 160
    },
    {
      "epoch": 0.9348441926345609,
      "grad_norm": 1.9559939941635416,
      "learning_rate": 1.1911842790474637e-07,
      "loss": 0.4703,
      "step": 165
    },
    {
      "epoch": 0.9631728045325779,
      "grad_norm": 2.008224762812249,
      "learning_rate": 3.553963149013295e-08,
      "loss": 0.4793,
      "step": 170
    },
    {
      "epoch": 0.9915014164305949,
      "grad_norm": 2.117489119782366,
      "learning_rate": 9.883511496722176e-10,
      "loss": 0.4739,
      "step": 175
    },
    {
      "epoch": 0.9971671388101983,
      "eval_loss": 0.6540379524230957,
      "eval_runtime": 83.5061,
      "eval_samples_per_second": 3.617,
      "eval_steps_per_second": 0.91,
      "step": 176
    },
    {
      "epoch": 0.9971671388101983,
      "step": 176,
      "total_flos": 36798474485760.0,
      "train_loss": 0.6290039020505819,
      "train_runtime": 6093.0087,
      "train_samples_per_second": 0.926,
      "train_steps_per_second": 0.029
    }
  ],
  "logging_steps": 5,
  "max_steps": 176,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 36798474485760.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}