{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.9946524064171123,
  "eval_steps": 70,
  "global_step": 210,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.07130124777183601,
      "grad_norm": 1.4087971448898315,
      "learning_rate": 1.1904761904761905e-05,
      "loss": 2.5781,
      "step": 5
    },
    {
      "epoch": 0.14260249554367202,
      "grad_norm": 1.1262208223342896,
      "learning_rate": 2.380952380952381e-05,
      "loss": 2.5765,
      "step": 10
    },
    {
      "epoch": 0.21390374331550802,
      "grad_norm": 1.2945618629455566,
      "learning_rate": 3.571428571428572e-05,
      "loss": 2.342,
      "step": 15
    },
    {
      "epoch": 0.28520499108734404,
      "grad_norm": 0.7618772387504578,
      "learning_rate": 4.761904761904762e-05,
      "loss": 1.9415,
      "step": 20
    },
    {
      "epoch": 0.35650623885918004,
      "grad_norm": 0.7050806879997253,
      "learning_rate": 5.9523809523809524e-05,
      "loss": 1.6927,
      "step": 25
    },
    {
      "epoch": 0.42780748663101603,
      "grad_norm": 0.9037391543388367,
      "learning_rate": 7.142857142857143e-05,
      "loss": 1.6323,
      "step": 30
    },
    {
      "epoch": 0.49910873440285203,
      "grad_norm": 0.8459873795509338,
      "learning_rate": 8.333333333333334e-05,
      "loss": 1.556,
      "step": 35
    },
    {
      "epoch": 0.5704099821746881,
      "grad_norm": 0.7082933783531189,
      "learning_rate": 9.523809523809524e-05,
      "loss": 1.4586,
      "step": 40
    },
    {
      "epoch": 0.6417112299465241,
      "grad_norm": 0.625400722026825,
      "learning_rate": 9.998445910004082e-05,
      "loss": 1.4528,
      "step": 45
    },
    {
      "epoch": 0.7130124777183601,
      "grad_norm": 2.810605764389038,
      "learning_rate": 9.988952191691925e-05,
      "loss": 1.4471,
      "step": 50
    },
    {
      "epoch": 0.7843137254901961,
      "grad_norm": 2.9866161346435547,
      "learning_rate": 9.97084451044556e-05,
      "loss": 1.4675,
      "step": 55
    },
    {
      "epoch": 0.8556149732620321,
      "grad_norm": 3.7440223693847656,
      "learning_rate": 9.944154131125642e-05,
      "loss": 1.4057,
      "step": 60
    },
    {
      "epoch": 0.9269162210338681,
      "grad_norm": 0.6605167984962463,
      "learning_rate": 9.90892713754483e-05,
      "loss": 1.4725,
      "step": 65
    },
    {
      "epoch": 0.9982174688057041,
      "grad_norm": 0.5990611910820007,
      "learning_rate": 9.865224352899119e-05,
      "loss": 1.4367,
      "step": 70
    },
    {
      "epoch": 0.9982174688057041,
      "eval_loss": 1.3730539083480835,
      "eval_runtime": 7.9538,
      "eval_samples_per_second": 5.783,
      "eval_steps_per_second": 1.509,
      "step": 70
    },
    {
      "epoch": 1.0695187165775402,
      "grad_norm": 0.7159441113471985,
      "learning_rate": 9.81312123475006e-05,
      "loss": 1.3834,
      "step": 75
    },
    {
      "epoch": 1.1408199643493762,
      "grad_norm": 0.5568172335624695,
      "learning_rate": 9.752707744739145e-05,
      "loss": 1.3219,
      "step": 80
    },
    {
      "epoch": 1.2121212121212122,
      "grad_norm": 0.6985956430435181,
      "learning_rate": 9.684088193259355e-05,
      "loss": 1.2813,
      "step": 85
    },
    {
      "epoch": 1.2834224598930482,
      "grad_norm": 0.6956667304039001,
      "learning_rate": 9.607381059352038e-05,
      "loss": 1.3074,
      "step": 90
    },
    {
      "epoch": 1.3547237076648841,
      "grad_norm": 0.6513645648956299,
      "learning_rate": 9.522718786140097e-05,
      "loss": 1.2976,
      "step": 95
    },
    {
      "epoch": 1.4260249554367201,
      "grad_norm": 0.8437972068786621,
      "learning_rate": 9.430247552150673e-05,
      "loss": 1.2588,
      "step": 100
    },
    {
      "epoch": 1.4973262032085561,
      "grad_norm": 0.7591404318809509,
      "learning_rate": 9.330127018922194e-05,
      "loss": 1.2601,
      "step": 105
    },
    {
      "epoch": 1.5686274509803921,
      "grad_norm": 0.830049991607666,
      "learning_rate": 9.22253005533154e-05,
      "loss": 1.2587,
      "step": 110
    },
    {
      "epoch": 1.6399286987522281,
      "grad_norm": 0.9140297174453735,
      "learning_rate": 9.107642439117321e-05,
      "loss": 1.2856,
      "step": 115
    },
    {
      "epoch": 1.7112299465240641,
      "grad_norm": 0.9149733185768127,
      "learning_rate": 8.985662536114613e-05,
      "loss": 1.2615,
      "step": 120
    },
    {
      "epoch": 1.7825311942959001,
      "grad_norm": 0.7160300612449646,
      "learning_rate": 8.856800957755e-05,
      "loss": 1.2669,
      "step": 125
    },
    {
      "epoch": 1.8538324420677363,
      "grad_norm": 0.9131708145141602,
      "learning_rate": 8.721280197423258e-05,
      "loss": 1.2372,
      "step": 130
    },
    {
      "epoch": 1.9251336898395723,
      "grad_norm": 0.8047693967819214,
      "learning_rate": 8.579334246298593e-05,
      "loss": 1.2928,
      "step": 135
    },
    {
      "epoch": 1.9964349376114083,
      "grad_norm": 0.7446454167366028,
      "learning_rate": 8.43120818934367e-05,
      "loss": 1.2601,
      "step": 140
    },
    {
      "epoch": 1.9964349376114083,
      "eval_loss": 1.3131123781204224,
      "eval_runtime": 7.9209,
      "eval_samples_per_second": 5.807,
      "eval_steps_per_second": 1.515,
      "step": 140
    },
    {
      "epoch": 2.0677361853832443,
      "grad_norm": 0.8385710716247559,
      "learning_rate": 8.27715778213905e-05,
      "loss": 0.9909,
      "step": 145
    },
    {
      "epoch": 2.1390374331550803,
      "grad_norm": 1.6324430704116821,
      "learning_rate": 8.117449009293668e-05,
      "loss": 0.9616,
      "step": 150
    },
    {
      "epoch": 2.2103386809269163,
      "grad_norm": 1.1424412727355957,
      "learning_rate": 7.952357625193749e-05,
      "loss": 0.8888,
      "step": 155
    },
    {
      "epoch": 2.2816399286987523,
      "grad_norm": 1.1564297676086426,
      "learning_rate": 7.782168677883206e-05,
      "loss": 0.9144,
      "step": 160
    },
    {
      "epoch": 2.3529411764705883,
      "grad_norm": 1.4812451601028442,
      "learning_rate": 7.60717601689749e-05,
      "loss": 0.9302,
      "step": 165
    },
    {
      "epoch": 2.4242424242424243,
      "grad_norm": 1.4938651323318481,
      "learning_rate": 7.427681785900761e-05,
      "loss": 0.9106,
      "step": 170
    },
    {
      "epoch": 2.4955436720142603,
      "grad_norm": 1.4754260778427124,
      "learning_rate": 7.243995901002312e-05,
      "loss": 0.9016,
      "step": 175
    },
    {
      "epoch": 2.5668449197860963,
      "grad_norm": 1.40531325340271,
      "learning_rate": 7.056435515653059e-05,
      "loss": 0.8809,
      "step": 180
    },
    {
      "epoch": 2.6381461675579323,
      "grad_norm": 1.495160698890686,
      "learning_rate": 6.86532447304597e-05,
      "loss": 0.8862,
      "step": 185
    },
    {
      "epoch": 2.7094474153297683,
      "grad_norm": 1.7604504823684692,
      "learning_rate": 6.670992746965938e-05,
      "loss": 0.8497,
      "step": 190
    },
    {
      "epoch": 2.7807486631016043,
      "grad_norm": 1.8376922607421875,
      "learning_rate": 6.473775872054521e-05,
      "loss": 0.8764,
      "step": 195
    },
    {
      "epoch": 2.8520499108734403,
      "grad_norm": 1.4825749397277832,
      "learning_rate": 6.274014364473274e-05,
      "loss": 0.862,
      "step": 200
    },
    {
      "epoch": 2.9233511586452763,
      "grad_norm": 1.5822840929031372,
      "learning_rate": 6.072053133965938e-05,
      "loss": 0.8981,
      "step": 205
    },
    {
      "epoch": 2.9946524064171123,
      "grad_norm": 1.6038023233413696,
      "learning_rate": 5.868240888334653e-05,
      "loss": 0.8929,
      "step": 210
    },
    {
      "epoch": 2.9946524064171123,
      "eval_loss": 1.4369069337844849,
      "eval_runtime": 7.8778,
      "eval_samples_per_second": 5.839,
      "eval_steps_per_second": 1.523,
      "step": 210
    }
  ],
  "logging_steps": 5,
  "max_steps": 420,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 70,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 9.052285652455916e+17,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}