{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 200,
  "global_step": 412,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.04854368932038835,
      "grad_norm": 0.19167186319828033,
      "learning_rate": 4.970984274562741e-06,
      "loss": 0.5965,
      "step": 20
    },
    {
      "epoch": 0.0970873786407767,
      "grad_norm": 0.10218532383441925,
      "learning_rate": 4.884610628109082e-06,
      "loss": 0.5749,
      "step": 40
    },
    {
      "epoch": 0.14563106796116504,
      "grad_norm": 0.0955277681350708,
      "learning_rate": 4.742884015847436e-06,
      "loss": 0.5653,
      "step": 60
    },
    {
      "epoch": 0.1941747572815534,
      "grad_norm": 0.09652815014123917,
      "learning_rate": 4.549094278152631e-06,
      "loss": 0.5592,
      "step": 80
    },
    {
      "epoch": 0.24271844660194175,
      "grad_norm": 0.09276870638132095,
      "learning_rate": 4.307739774881878e-06,
      "loss": 0.5562,
      "step": 100
    },
    {
      "epoch": 0.2912621359223301,
      "grad_norm": 0.09189510345458984,
      "learning_rate": 4.024422966835137e-06,
      "loss": 0.5518,
      "step": 120
    },
    {
      "epoch": 0.33980582524271846,
      "grad_norm": 0.09179496020078659,
      "learning_rate": 3.7057203681836407e-06,
      "loss": 0.551,
      "step": 140
    },
    {
      "epoch": 0.3883495145631068,
      "grad_norm": 0.09196960926055908,
      "learning_rate": 3.3590298886062833e-06,
      "loss": 0.5482,
      "step": 160
    },
    {
      "epoch": 0.4368932038834951,
      "grad_norm": 0.09062644839286804,
      "learning_rate": 2.9923991087167657e-06,
      "loss": 0.5461,
      "step": 180
    },
    {
      "epoch": 0.4854368932038835,
      "grad_norm": 0.09341968595981598,
      "learning_rate": 2.614338474951987e-06,
      "loss": 0.5446,
      "step": 200
    },
    {
      "epoch": 0.4854368932038835,
      "eval_accuracy": 0.32638514992244894,
      "eval_loss": 0.5471854209899902,
      "eval_runtime": 32.0389,
      "eval_samples_per_second": 132.027,
      "eval_steps_per_second": 0.531,
      "step": 200
    },
    {
      "epoch": 0.5339805825242718,
      "grad_norm": 0.09235095232725143,
      "learning_rate": 2.2336237501503103e-06,
      "loss": 0.5411,
      "step": 220
    },
    {
      "epoch": 0.5825242718446602,
      "grad_norm": 0.09220809489488602,
      "learning_rate": 1.8590923054515504e-06,
      "loss": 0.543,
      "step": 240
    },
    {
      "epoch": 0.6310679611650486,
      "grad_norm": 0.0929255411028862,
      "learning_rate": 1.499437982109305e-06,
      "loss": 0.5411,
      "step": 260
    },
    {
      "epoch": 0.6796116504854369,
      "grad_norm": 0.09099574387073517,
      "learning_rate": 1.1630092850023148e-06,
      "loss": 0.5423,
      "step": 280
    },
    {
      "epoch": 0.7281553398058253,
      "grad_norm": 0.08963935077190399,
      "learning_rate": 8.576155922941548e-07,
      "loss": 0.5397,
      "step": 300
    },
    {
      "epoch": 0.7766990291262136,
      "grad_norm": 0.09014247357845306,
      "learning_rate": 5.903458796151382e-07,
      "loss": 0.5414,
      "step": 320
    },
    {
      "epoch": 0.8252427184466019,
      "grad_norm": 0.0917976126074791,
      "learning_rate": 3.6740416664589634e-07,
      "loss": 0.5401,
      "step": 340
    },
    {
      "epoch": 0.8737864077669902,
      "grad_norm": 0.09078697115182877,
      "learning_rate": 1.9396550581205208e-07,
      "loss": 0.537,
      "step": 360
    },
    {
      "epoch": 0.9223300970873787,
      "grad_norm": 0.09018886834383011,
      "learning_rate": 7.405585596397314e-08,
      "loss": 0.5391,
      "step": 380
    },
    {
      "epoch": 0.970873786407767,
      "grad_norm": 0.09031691402196884,
      "learning_rate": 1.0458629483476868e-08,
      "loss": 0.5418,
      "step": 400
    },
    {
      "epoch": 0.970873786407767,
      "eval_accuracy": 0.3268526506529188,
      "eval_loss": 0.5428585410118103,
      "eval_runtime": 26.2685,
      "eval_samples_per_second": 161.029,
      "eval_steps_per_second": 0.647,
      "step": 400
    },
    {
      "epoch": 1.0,
      "step": 412,
      "total_flos": 7.628990136885182e+18,
      "train_loss": 0.5495563189960221,
      "train_runtime": 3539.9459,
      "train_samples_per_second": 119.039,
      "train_steps_per_second": 0.116
    }
  ],
  "logging_steps": 20,
  "max_steps": 412,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 7.628990136885182e+18,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}