Model save

Browse files

Files changed (6) hide show

README.md +67 -0
all_results.json +9 -0
generation_config.json +6 -0
runs/Aug08_02-12-33_COE-CS-sv003/events.out.tfevents.1723083489.COE-CS-sv003.3073689.0 +2 -2
train_results.json +9 -0
trainer_state.json +1485 -0

README.md ADDED Viewed

	@@ -0,0 +1,67 @@

+---
+license: apache-2.0
+base_model: mistralai/Mistral-7B-Instruct-v0.2
+tags:
+- trl
+- sft
+- generated_from_trainer
+datasets:
+- generator
+model-index:
+- name: prometheus-7b-direct-0.1p-rare-seed42-response
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# prometheus-7b-direct-0.1p-rare-seed42-response
+This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on the generator dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3360
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 2
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 32
+- total_eval_batch_size: 4
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 0.3291        | 0.9998 | 1024 | 0.3360          |
+### Framework versions
+- Transformers 4.43.4
+- Pytorch 2.3.1+cu121
+- Datasets 2.19.1
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 0.9997559189650964,
+    "total_flos": 214352422502400.0,
+    "train_loss": 0.48904780531302094,
+    "train_runtime": 23248.9436,
+    "train_samples": 108847,
+    "train_samples_per_second": 1.409,
+    "train_steps_per_second": 0.044
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.43.4"
+}

runs/Aug08_02-12-33_COE-CS-sv003/events.out.tfevents.1723083489.COE-CS-sv003.3073689.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e44e541830c57986fbee3552ff2626e4156a5ed7ee1bd84e1b9c304ea7609163
-size 48427

 version https://git-lfs.github.com/spec/v1
+oid sha256:5128753a710a6e6d84b8e1c4169ababf6f659e27fd8a05dc469e852a75ae63a9
+size 49052

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 0.9997559189650964,
+    "total_flos": 214352422502400.0,
+    "train_loss": 0.48904780531302094,
+    "train_runtime": 23248.9436,
+    "train_samples": 108847,
+    "train_samples_per_second": 1.409,
+    "train_steps_per_second": 0.044
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1485 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9997559189650964,
+  "eval_steps": 500,
+  "global_step": 1024,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.000976324139614352,
+      "grad_norm": 22.493754488892705,
+      "learning_rate": 9.70873786407767e-08,
+      "loss": 1.3084,
+      "step": 1
+    },
+    {
+      "epoch": 0.00488162069807176,
+      "grad_norm": 21.407549210638884,
+      "learning_rate": 4.854368932038835e-07,
+      "loss": 1.3239,
+      "step": 5
+    },
+    {
+      "epoch": 0.00976324139614352,
+      "grad_norm": 9.073471250768227,
+      "learning_rate": 9.70873786407767e-07,
+      "loss": 1.2075,
+      "step": 10
+    },
+    {
+      "epoch": 0.01464486209421528,
+      "grad_norm": 8.959633641742187,
+      "learning_rate": 1.4563106796116506e-06,
+      "loss": 1.0555,
+      "step": 15
+    },
+    {
+      "epoch": 0.01952648279228704,
+      "grad_norm": 2.9726378530361903,
+      "learning_rate": 1.941747572815534e-06,
+      "loss": 0.9134,
+      "step": 20
+    },
+    {
+      "epoch": 0.0244081034903588,
+      "grad_norm": 2.336512387637656,
+      "learning_rate": 2.427184466019418e-06,
+      "loss": 0.8712,
+      "step": 25
+    },
+    {
+      "epoch": 0.02928972418843056,
+      "grad_norm": 2.1980743279610633,
+      "learning_rate": 2.912621359223301e-06,
+      "loss": 0.8452,
+      "step": 30
+    },
+    {
+      "epoch": 0.034171344886502315,
+      "grad_norm": 2.2947165951756108,
+      "learning_rate": 3.398058252427185e-06,
+      "loss": 0.8304,
+      "step": 35
+    },
+    {
+      "epoch": 0.03905296558457408,
+      "grad_norm": 2.231374010631092,
+      "learning_rate": 3.883495145631068e-06,
+      "loss": 0.8005,
+      "step": 40
+    },
+    {
+      "epoch": 0.043934586282645835,
+      "grad_norm": 2.323172717277238,
+      "learning_rate": 4.368932038834952e-06,
+      "loss": 0.7954,
+      "step": 45
+    },
+    {
+      "epoch": 0.0488162069807176,
+      "grad_norm": 2.6284940732364697,
+      "learning_rate": 4.854368932038836e-06,
+      "loss": 0.7828,
+      "step": 50
+    },
+    {
+      "epoch": 0.053697827678789355,
+      "grad_norm": 2.4706362764533383,
+      "learning_rate": 5.3398058252427185e-06,
+      "loss": 0.7649,
+      "step": 55
+    },
+    {
+      "epoch": 0.05857944837686112,
+      "grad_norm": 2.5587525974220915,
+      "learning_rate": 5.825242718446602e-06,
+      "loss": 0.7448,
+      "step": 60
+    },
+    {
+      "epoch": 0.06346106907493287,
+      "grad_norm": 2.367706031948518,
+      "learning_rate": 6.310679611650487e-06,
+      "loss": 0.7313,
+      "step": 65
+    },
+    {
+      "epoch": 0.06834268977300463,
+      "grad_norm": 2.3861668688704416,
+      "learning_rate": 6.79611650485437e-06,
+      "loss": 0.7285,
+      "step": 70
+    },
+    {
+      "epoch": 0.0732243104710764,
+      "grad_norm": 2.2781470198507665,
+      "learning_rate": 7.2815533980582534e-06,
+      "loss": 0.7107,
+      "step": 75
+    },
+    {
+      "epoch": 0.07810593116914816,
+      "grad_norm": 2.4811146424704855,
+      "learning_rate": 7.766990291262136e-06,
+      "loss": 0.7127,
+      "step": 80
+    },
+    {
+      "epoch": 0.08298755186721991,
+      "grad_norm": 2.3108856194291945,
+      "learning_rate": 8.25242718446602e-06,
+      "loss": 0.703,
+      "step": 85
+    },
+    {
+      "epoch": 0.08786917256529167,
+      "grad_norm": 2.4623970274283877,
+      "learning_rate": 8.737864077669904e-06,
+      "loss": 0.693,
+      "step": 90
+    },
+    {
+      "epoch": 0.09275079326336344,
+      "grad_norm": 2.2177593778006544,
+      "learning_rate": 9.223300970873788e-06,
+      "loss": 0.6974,
+      "step": 95
+    },
+    {
+      "epoch": 0.0976324139614352,
+      "grad_norm": 2.3758805201515765,
+      "learning_rate": 9.708737864077671e-06,
+      "loss": 0.6771,
+      "step": 100
+    },
+    {
+      "epoch": 0.10251403465950695,
+      "grad_norm": 2.367648483120376,
+      "learning_rate": 9.999883646674445e-06,
+      "loss": 0.703,
+      "step": 105
+    },
+    {
+      "epoch": 0.10739565535757871,
+      "grad_norm": 2.7148702269535145,
+      "learning_rate": 9.998574733951775e-06,
+      "loss": 0.6806,
+      "step": 110
+    },
+    {
+      "epoch": 0.11227727605565048,
+      "grad_norm": 2.3763948535285357,
+      "learning_rate": 9.995811848851807e-06,
+      "loss": 0.6747,
+      "step": 115
+    },
+    {
+      "epoch": 0.11715889675372224,
+      "grad_norm": 2.3041091870777564,
+      "learning_rate": 9.991595795035352e-06,
+      "loss": 0.663,
+      "step": 120
+    },
+    {
+      "epoch": 0.12204051745179399,
+      "grad_norm": 2.1733479043188644,
+      "learning_rate": 9.985927798857143e-06,
+      "loss": 0.677,
+      "step": 125
+    },
+    {
+      "epoch": 0.12692213814986575,
+      "grad_norm": 2.2480309788297284,
+      "learning_rate": 9.978809509009121e-06,
+      "loss": 0.6625,
+      "step": 130
+    },
+    {
+      "epoch": 0.13180375884793752,
+      "grad_norm": 2.138615471864636,
+      "learning_rate": 9.970242996040865e-06,
+      "loss": 0.656,
+      "step": 135
+    },
+    {
+      "epoch": 0.13668537954600926,
+      "grad_norm": 2.1218250053659853,
+      "learning_rate": 9.960230751757318e-06,
+      "loss": 0.6676,
+      "step": 140
+    },
+    {
+      "epoch": 0.14156700024408103,
+      "grad_norm": 2.067260559588222,
+      "learning_rate": 9.948775688493974e-06,
+      "loss": 0.6511,
+      "step": 145
+    },
+    {
+      "epoch": 0.1464486209421528,
+      "grad_norm": 2.0246056917859048,
+      "learning_rate": 9.93588113826975e-06,
+      "loss": 0.6514,
+      "step": 150
+    },
+    {
+      "epoch": 0.15133024164022454,
+      "grad_norm": 2.2415009500993635,
+      "learning_rate": 9.921550851817774e-06,
+      "loss": 0.6554,
+      "step": 155
+    },
+    {
+      "epoch": 0.15621186233829631,
+      "grad_norm": 2.2315931042976627,
+      "learning_rate": 9.905788997494377e-06,
+      "loss": 0.6499,
+      "step": 160
+    },
+    {
+      "epoch": 0.16109348303636808,
+      "grad_norm": 2.15809328665933,
+      "learning_rate": 9.888600160066627e-06,
+      "loss": 0.6473,
+      "step": 165
+    },
+    {
+      "epoch": 0.16597510373443983,
+      "grad_norm": 2.0579191397149836,
+      "learning_rate": 9.869989339378706e-06,
+      "loss": 0.6369,
+      "step": 170
+    },
+    {
+      "epoch": 0.1708567244325116,
+      "grad_norm": 2.031129747279531,
+      "learning_rate": 9.849961948897582e-06,
+      "loss": 0.6365,
+      "step": 175
+    },
+    {
+      "epoch": 0.17573834513058334,
+      "grad_norm": 2.3571773219128067,
+      "learning_rate": 9.828523814138344e-06,
+      "loss": 0.6256,
+      "step": 180
+    },
+    {
+      "epoch": 0.1806199658286551,
+      "grad_norm": 2.0680850663572503,
+      "learning_rate": 9.8056811709697e-06,
+      "loss": 0.6305,
+      "step": 185
+    },
+    {
+      "epoch": 0.18550158652672688,
+      "grad_norm": 2.070854390313614,
+      "learning_rate": 9.781440663800099e-06,
+      "loss": 0.6289,
+      "step": 190
+    },
+    {
+      "epoch": 0.19038320722479862,
+      "grad_norm": 1.9927467214888446,
+      "learning_rate": 9.755809343645021e-06,
+      "loss": 0.6097,
+      "step": 195
+    },
+    {
+      "epoch": 0.1952648279228704,
+      "grad_norm": 2.1885730975002544,
+      "learning_rate": 9.728794666076004e-06,
+      "loss": 0.6202,
+      "step": 200
+    },
+    {
+      "epoch": 0.20014644862094216,
+      "grad_norm": 2.0241934863146747,
+      "learning_rate": 9.700404489051974e-06,
+      "loss": 0.6218,
+      "step": 205
+    },
+    {
+      "epoch": 0.2050280693190139,
+      "grad_norm": 2.0845209209163076,
+      "learning_rate": 9.670647070633554e-06,
+      "loss": 0.6195,
+      "step": 210
+    },
+    {
+      "epoch": 0.20990969001708568,
+      "grad_norm": 1.9439387252135285,
+      "learning_rate": 9.639531066580979e-06,
+      "loss": 0.6206,
+      "step": 215
+    },
+    {
+      "epoch": 0.21479131071515742,
+      "grad_norm": 2.0645626959829184,
+      "learning_rate": 9.607065527836324e-06,
+      "loss": 0.6121,
+      "step": 220
+    },
+    {
+      "epoch": 0.2196729314132292,
+      "grad_norm": 2.0720595201040926,
+      "learning_rate": 9.573259897890794e-06,
+      "loss": 0.6146,
+      "step": 225
+    },
+    {
+      "epoch": 0.22455455211130096,
+      "grad_norm": 1.9623515996646703,
+      "learning_rate": 9.538124010037832e-06,
+      "loss": 0.5982,
+      "step": 230
+    },
+    {
+      "epoch": 0.2294361728093727,
+      "grad_norm": 2.010779905137125,
+      "learning_rate": 9.501668084512827e-06,
+      "loss": 0.6062,
+      "step": 235
+    },
+    {
+      "epoch": 0.23431779350744447,
+      "grad_norm": 2.1305241902290226,
+      "learning_rate": 9.46390272552028e-06,
+      "loss": 0.5966,
+      "step": 240
+    },
+    {
+      "epoch": 0.23919941420551624,
+      "grad_norm": 1.8945487010213031,
+      "learning_rate": 9.424838918149285e-06,
+      "loss": 0.6016,
+      "step": 245
+    },
+    {
+      "epoch": 0.24408103490358798,
+      "grad_norm": 1.9720726992448023,
+      "learning_rate": 9.384488025178214e-06,
+      "loss": 0.5908,
+      "step": 250
+    },
+    {
+      "epoch": 0.24896265560165975,
+      "grad_norm": 2.5727183425693663,
+      "learning_rate": 9.342861783769535e-06,
+      "loss": 0.5892,
+      "step": 255
+    },
+    {
+      "epoch": 0.2538442762997315,
+      "grad_norm": 2.321112395905857,
+      "learning_rate": 9.29997230205575e-06,
+      "loss": 0.6011,
+      "step": 260
+    },
+    {
+      "epoch": 0.2587258969978033,
+      "grad_norm": 2.2030364900806076,
+      "learning_rate": 9.2558320556174e-06,
+      "loss": 0.5821,
+      "step": 265
+    },
+    {
+      "epoch": 0.26360751769587504,
+      "grad_norm": 1.9812931636150968,
+      "learning_rate": 9.210453883854204e-06,
+      "loss": 0.5732,
+      "step": 270
+    },
+    {
+      "epoch": 0.2684891383939468,
+      "grad_norm": 2.085078299483637,
+      "learning_rate": 9.163850986250375e-06,
+      "loss": 0.5748,
+      "step": 275
+    },
+    {
+      "epoch": 0.2733707590920185,
+      "grad_norm": 2.2601821261422415,
+      "learning_rate": 9.11603691853518e-06,
+      "loss": 0.5704,
+      "step": 280
+    },
+    {
+      "epoch": 0.2782523797900903,
+      "grad_norm": 2.3842333315576076,
+      "learning_rate": 9.067025588739889e-06,
+      "loss": 0.5691,
+      "step": 285
+    },
+    {
+      "epoch": 0.28313400048816206,
+      "grad_norm": 2.2224957447478038,
+      "learning_rate": 9.016831253152244e-06,
+      "loss": 0.5651,
+      "step": 290
+    },
+    {
+      "epoch": 0.2880156211862338,
+      "grad_norm": 2.1614870694331993,
+      "learning_rate": 8.96546851216962e-06,
+      "loss": 0.5559,
+      "step": 295
+    },
+    {
+      "epoch": 0.2928972418843056,
+      "grad_norm": 2.005310823414414,
+      "learning_rate": 8.912952306052109e-06,
+      "loss": 0.5657,
+      "step": 300
+    },
+    {
+      "epoch": 0.29777886258237735,
+      "grad_norm": 2.38204601184677,
+      "learning_rate": 8.859297910576732e-06,
+      "loss": 0.5568,
+      "step": 305
+    },
+    {
+      "epoch": 0.3026604832804491,
+      "grad_norm": 2.1648970631056743,
+      "learning_rate": 8.804520932594061e-06,
+      "loss": 0.5579,
+      "step": 310
+    },
+    {
+      "epoch": 0.3075421039785209,
+      "grad_norm": 2.100265583460356,
+      "learning_rate": 8.748637305488537e-06,
+      "loss": 0.5551,
+      "step": 315
+    },
+    {
+      "epoch": 0.31242372467659263,
+      "grad_norm": 2.0675326704721138,
+      "learning_rate": 8.691663284543812e-06,
+      "loss": 0.5469,
+      "step": 320
+    },
+    {
+      "epoch": 0.31730534537466437,
+      "grad_norm": 2.267073420915594,
+      "learning_rate": 8.633615442214452e-06,
+      "loss": 0.5561,
+      "step": 325
+    },
+    {
+      "epoch": 0.32218696607273617,
+      "grad_norm": 2.087971484130279,
+      "learning_rate": 8.574510663305388e-06,
+      "loss": 0.5546,
+      "step": 330
+    },
+    {
+      "epoch": 0.3270685867708079,
+      "grad_norm": 1.8815922539302756,
+      "learning_rate": 8.514366140060504e-06,
+      "loss": 0.545,
+      "step": 335
+    },
+    {
+      "epoch": 0.33195020746887965,
+      "grad_norm": 2.081332065948261,
+      "learning_rate": 8.453199367161804e-06,
+      "loss": 0.5409,
+      "step": 340
+    },
+    {
+      "epoch": 0.33683182816695145,
+      "grad_norm": 2.0447565241323145,
+      "learning_rate": 8.391028136640604e-06,
+      "loss": 0.5345,
+      "step": 345
+    },
+    {
+      "epoch": 0.3417134488650232,
+      "grad_norm": 1.8534927309067777,
+      "learning_rate": 8.32787053270223e-06,
+      "loss": 0.5295,
+      "step": 350
+    },
+    {
+      "epoch": 0.34659506956309494,
+      "grad_norm": 2.0218669755624696,
+      "learning_rate": 8.263744926465744e-06,
+      "loss": 0.5339,
+      "step": 355
+    },
+    {
+      "epoch": 0.3514766902611667,
+      "grad_norm": 2.015374192428264,
+      "learning_rate": 8.198669970620177e-06,
+      "loss": 0.5296,
+      "step": 360
+    },
+    {
+      "epoch": 0.3563583109592385,
+      "grad_norm": 1.8414197150680836,
+      "learning_rate": 8.13266459399891e-06,
+      "loss": 0.5279,
+      "step": 365
+    },
+    {
+      "epoch": 0.3612399316573102,
+      "grad_norm": 1.9511713212083668,
+      "learning_rate": 8.065747996073681e-06,
+      "loss": 0.5201,
+      "step": 370
+    },
+    {
+      "epoch": 0.36612155235538196,
+      "grad_norm": 2.0198178510951035,
+      "learning_rate": 7.997939641369909e-06,
+      "loss": 0.5231,
+      "step": 375
+    },
+    {
+      "epoch": 0.37100317305345376,
+      "grad_norm": 1.9974814551196278,
+      "learning_rate": 7.929259253804903e-06,
+      "loss": 0.5127,
+      "step": 380
+    },
+    {
+      "epoch": 0.3758847937515255,
+      "grad_norm": 1.9584094585419525,
+      "learning_rate": 7.859726810950606e-06,
+      "loss": 0.503,
+      "step": 385
+    },
+    {
+      "epoch": 0.38076641444959725,
+      "grad_norm": 1.9555704803278644,
+      "learning_rate": 7.789362538222585e-06,
+      "loss": 0.5031,
+      "step": 390
+    },
+    {
+      "epoch": 0.38564803514766904,
+      "grad_norm": 1.9287309322500008,
+      "learning_rate": 7.718186902996912e-06,
+      "loss": 0.5095,
+      "step": 395
+    },
+    {
+      "epoch": 0.3905296558457408,
+      "grad_norm": 2.0211386074482998,
+      "learning_rate": 7.646220608656662e-06,
+      "loss": 0.4967,
+      "step": 400
+    },
+    {
+      "epoch": 0.39541127654381253,
+      "grad_norm": 2.039367090773858,
+      "learning_rate": 7.573484588569775e-06,
+      "loss": 0.5015,
+      "step": 405
+    },
+    {
+      "epoch": 0.4002928972418843,
+      "grad_norm": 1.9208073920098578,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 0.497,
+      "step": 410
+    },
+    {
+      "epoch": 0.40517451793995607,
+      "grad_norm": 1.8719019363210703,
+      "learning_rate": 7.425788217952744e-06,
+      "loss": 0.4955,
+      "step": 415
+    },
+    {
+      "epoch": 0.4100561386380278,
+      "grad_norm": 1.9360771987365168,
+      "learning_rate": 7.350870828957547e-06,
+      "loss": 0.4906,
+      "step": 420
+    },
+    {
+      "epoch": 0.4149377593360996,
+      "grad_norm": 1.887372218244023,
+      "learning_rate": 7.27526962478906e-06,
+      "loss": 0.4896,
+      "step": 425
+    },
+    {
+      "epoch": 0.41981938003417135,
+      "grad_norm": 1.965135998139074,
+      "learning_rate": 7.1990065961283075e-06,
+      "loss": 0.494,
+      "step": 430
+    },
+    {
+      "epoch": 0.4247010007322431,
+      "grad_norm": 1.936347943982164,
+      "learning_rate": 7.122103926166096e-06,
+      "loss": 0.474,
+      "step": 435
+    },
+    {
+      "epoch": 0.42958262143031484,
+      "grad_norm": 1.8906249170878375,
+      "learning_rate": 7.044583984150425e-06,
+      "loss": 0.4932,
+      "step": 440
+    },
+    {
+      "epoch": 0.43446424212838664,
+      "grad_norm": 1.919261654779276,
+      "learning_rate": 6.9664693188797776e-06,
+      "loss": 0.4812,
+      "step": 445
+    },
+    {
+      "epoch": 0.4393458628264584,
+      "grad_norm": 1.8837975572074062,
+      "learning_rate": 6.887782652144186e-06,
+      "loss": 0.477,
+      "step": 450
+    },
+    {
+      "epoch": 0.4442274835245301,
+      "grad_norm": 2.0761696776456904,
+      "learning_rate": 6.808546872115976e-06,
+      "loss": 0.4835,
+      "step": 455
+    },
+    {
+      "epoch": 0.4491091042226019,
+      "grad_norm": 1.9034620515101144,
+      "learning_rate": 6.728785026692113e-06,
+      "loss": 0.4692,
+      "step": 460
+    },
+    {
+      "epoch": 0.45399072492067366,
+      "grad_norm": 2.0736151869195667,
+      "learning_rate": 6.648520316790102e-06,
+      "loss": 0.4585,
+      "step": 465
+    },
+    {
+      "epoch": 0.4588723456187454,
+      "grad_norm": 2.011123921684231,
+      "learning_rate": 6.567776089599339e-06,
+      "loss": 0.4803,
+      "step": 470
+    },
+    {
+      "epoch": 0.4637539663168172,
+      "grad_norm": 1.96249455995112,
+      "learning_rate": 6.486575831789974e-06,
+      "loss": 0.4623,
+      "step": 475
+    },
+    {
+      "epoch": 0.46863558701488894,
+      "grad_norm": 2.2467468199710017,
+      "learning_rate": 6.404943162681144e-06,
+      "loss": 0.4574,
+      "step": 480
+    },
+    {
+      "epoch": 0.4735172077129607,
+      "grad_norm": 2.121288144154755,
+      "learning_rate": 6.322901827370659e-06,
+      "loss": 0.4681,
+      "step": 485
+    },
+    {
+      "epoch": 0.4783988284110325,
+      "grad_norm": 2.131811769895187,
+      "learning_rate": 6.240475689828087e-06,
+      "loss": 0.4573,
+      "step": 490
+    },
+    {
+      "epoch": 0.4832804491091042,
+      "grad_norm": 1.9175130135112601,
+      "learning_rate": 6.1576887259532695e-06,
+      "loss": 0.4465,
+      "step": 495
+    },
+    {
+      "epoch": 0.48816206980717597,
+      "grad_norm": 1.8070710350230668,
+      "learning_rate": 6.074565016602263e-06,
+      "loss": 0.4419,
+      "step": 500
+    },
+    {
+      "epoch": 0.49304369050524777,
+      "grad_norm": 1.99048575433868,
+      "learning_rate": 5.991128740582774e-06,
+      "loss": 0.4504,
+      "step": 505
+    },
+    {
+      "epoch": 0.4979253112033195,
+      "grad_norm": 1.9210921458678536,
+      "learning_rate": 5.907404167621087e-06,
+      "loss": 0.4406,
+      "step": 510
+    },
+    {
+      "epoch": 0.5028069319013913,
+      "grad_norm": 2.009882308013614,
+      "learning_rate": 5.823415651302545e-06,
+      "loss": 0.4458,
+      "step": 515
+    },
+    {
+      "epoch": 0.507688552599463,
+      "grad_norm": 1.897100615945393,
+      "learning_rate": 5.739187621987649e-06,
+      "loss": 0.4379,
+      "step": 520
+    },
+    {
+      "epoch": 0.5125701732975347,
+      "grad_norm": 1.907463353966728,
+      "learning_rate": 5.654744579705815e-06,
+      "loss": 0.4392,
+      "step": 525
+    },
+    {
+      "epoch": 0.5174517939956066,
+      "grad_norm": 2.112475304318316,
+      "learning_rate": 5.570111087028868e-06,
+      "loss": 0.4309,
+      "step": 530
+    },
+    {
+      "epoch": 0.5223334146936783,
+      "grad_norm": 1.9618257078116872,
+      "learning_rate": 5.4853117619263496e-06,
+      "loss": 0.4294,
+      "step": 535
+    },
+    {
+      "epoch": 0.5272150353917501,
+      "grad_norm": 2.023435280859463,
+      "learning_rate": 5.4003712706047055e-06,
+      "loss": 0.4412,
+      "step": 540
+    },
+    {
+      "epoch": 0.5320966560898218,
+      "grad_norm": 2.1255050556087003,
+      "learning_rate": 5.315314320332438e-06,
+      "loss": 0.4319,
+      "step": 545
+    },
+    {
+      "epoch": 0.5369782767878936,
+      "grad_norm": 2.0850824266858967,
+      "learning_rate": 5.230165652253329e-06,
+      "loss": 0.4365,
+      "step": 550
+    },
+    {
+      "epoch": 0.5418598974859653,
+      "grad_norm": 1.9630523459005746,
+      "learning_rate": 5.144950034189798e-06,
+      "loss": 0.4286,
+      "step": 555
+    },
+    {
+      "epoch": 0.546741518184037,
+      "grad_norm": 1.8594385781324942,
+      "learning_rate": 5.059692253438495e-06,
+      "loss": 0.4216,
+      "step": 560
+    },
+    {
+      "epoch": 0.5516231388821089,
+      "grad_norm": 1.9410469701406243,
+      "learning_rate": 4.97441710956025e-06,
+      "loss": 0.4146,
+      "step": 565
+    },
+    {
+      "epoch": 0.5565047595801806,
+      "grad_norm": 1.8926730305562134,
+      "learning_rate": 4.8891494071664315e-06,
+      "loss": 0.4243,
+      "step": 570
+    },
+    {
+      "epoch": 0.5613863802782524,
+      "grad_norm": 1.9153480931779545,
+      "learning_rate": 4.803913948703845e-06,
+      "loss": 0.4193,
+      "step": 575
+    },
+    {
+      "epoch": 0.5662680009763241,
+      "grad_norm": 1.9039605462186175,
+      "learning_rate": 4.71873552724027e-06,
+      "loss": 0.4098,
+      "step": 580
+    },
+    {
+      "epoch": 0.5711496216743959,
+      "grad_norm": 1.7928530873219377,
+      "learning_rate": 4.633638919252712e-06,
+      "loss": 0.4132,
+      "step": 585
+    },
+    {
+      "epoch": 0.5760312423724676,
+      "grad_norm": 1.8310404534316238,
+      "learning_rate": 4.548648877420481e-06,
+      "loss": 0.4056,
+      "step": 590
+    },
+    {
+      "epoch": 0.5809128630705395,
+      "grad_norm": 1.887050893521495,
+      "learning_rate": 4.463790123425209e-06,
+      "loss": 0.414,
+      "step": 595
+    },
+    {
+      "epoch": 0.5857944837686112,
+      "grad_norm": 1.8879105533071967,
+      "learning_rate": 4.379087340759861e-06,
+      "loss": 0.4105,
+      "step": 600
+    },
+    {
+      "epoch": 0.590676104466683,
+      "grad_norm": 1.8851096816683868,
+      "learning_rate": 4.294565167548866e-06,
+      "loss": 0.4097,
+      "step": 605
+    },
+    {
+      "epoch": 0.5955577251647547,
+      "grad_norm": 1.8909509373180151,
+      "learning_rate": 4.2102481893814504e-06,
+      "loss": 0.4038,
+      "step": 610
+    },
+    {
+      "epoch": 0.6004393458628264,
+      "grad_norm": 2.0267286135794564,
+      "learning_rate": 4.1261609321602406e-06,
+      "loss": 0.401,
+      "step": 615
+    },
+    {
+      "epoch": 0.6053209665608982,
+      "grad_norm": 1.8447881878140413,
+      "learning_rate": 4.042327854967231e-06,
+      "loss": 0.3998,
+      "step": 620
+    },
+    {
+      "epoch": 0.61020258725897,
+      "grad_norm": 1.8206856992603886,
+      "learning_rate": 3.958773342949196e-06,
+      "loss": 0.3957,
+      "step": 625
+    },
+    {
+      "epoch": 0.6150842079570418,
+      "grad_norm": 1.989687612307625,
+      "learning_rate": 3.875521700224598e-06,
+      "loss": 0.3897,
+      "step": 630
+    },
+    {
+      "epoch": 0.6199658286551135,
+      "grad_norm": 1.7577176528030285,
+      "learning_rate": 3.7925971428140655e-06,
+      "loss": 0.3974,
+      "step": 635
+    },
+    {
+      "epoch": 0.6248474493531853,
+      "grad_norm": 1.7992538376367235,
+      "learning_rate": 3.71002379159651e-06,
+      "loss": 0.3933,
+      "step": 640
+    },
+    {
+      "epoch": 0.629729070051257,
+      "grad_norm": 1.838523059460591,
+      "learning_rate": 3.627825665292899e-06,
+      "loss": 0.3961,
+      "step": 645
+    },
+    {
+      "epoch": 0.6346106907493287,
+      "grad_norm": 1.881394659994862,
+      "learning_rate": 3.546026673479755e-06,
+      "loss": 0.3938,
+      "step": 650
+    },
+    {
+      "epoch": 0.6394923114474005,
+      "grad_norm": 1.879760457077884,
+      "learning_rate": 3.464650609634403e-06,
+      "loss": 0.3929,
+      "step": 655
+    },
+    {
+      "epoch": 0.6443739321454723,
+      "grad_norm": 1.9193591824291845,
+      "learning_rate": 3.383721144213985e-06,
+      "loss": 0.3869,
+      "step": 660
+    },
+    {
+      "epoch": 0.6492555528435441,
+      "grad_norm": 2.0119373139170724,
+      "learning_rate": 3.3032618177702546e-06,
+      "loss": 0.3851,
+      "step": 665
+    },
+    {
+      "epoch": 0.6541371735416158,
+      "grad_norm": 1.9053960768739056,
+      "learning_rate": 3.2232960341021703e-06,
+      "loss": 0.3874,
+      "step": 670
+    },
+    {
+      "epoch": 0.6590187942396876,
+      "grad_norm": 1.8532461602692916,
+      "learning_rate": 3.1438470534482547e-06,
+      "loss": 0.3822,
+      "step": 675
+    },
+    {
+      "epoch": 0.6639004149377593,
+      "grad_norm": 1.9222824143537547,
+      "learning_rate": 3.064937985720717e-06,
+      "loss": 0.3737,
+      "step": 680
+    },
+    {
+      "epoch": 0.668782035635831,
+      "grad_norm": 1.7942220113298164,
+      "learning_rate": 2.9865917837833025e-06,
+      "loss": 0.3868,
+      "step": 685
+    },
+    {
+      "epoch": 0.6736636563339029,
+      "grad_norm": 1.7376810506201046,
+      "learning_rate": 2.9088312367748257e-06,
+      "loss": 0.3763,
+      "step": 690
+    },
+    {
+      "epoch": 0.6785452770319746,
+      "grad_norm": 1.9201828792137692,
+      "learning_rate": 2.8316789634803223e-06,
+      "loss": 0.3865,
+      "step": 695
+    },
+    {
+      "epoch": 0.6834268977300464,
+      "grad_norm": 1.9845153126888537,
+      "learning_rate": 2.75515740575176e-06,
+      "loss": 0.372,
+      "step": 700
+    },
+    {
+      "epoch": 0.6883085184281181,
+      "grad_norm": 1.7986907948997257,
+      "learning_rate": 2.6792888219802017e-06,
+      "loss": 0.3727,
+      "step": 705
+    },
+    {
+      "epoch": 0.6931901391261899,
+      "grad_norm": 1.8814905894540856,
+      "learning_rate": 2.604095280621354e-06,
+      "loss": 0.3719,
+      "step": 710
+    },
+    {
+      "epoch": 0.6980717598242616,
+      "grad_norm": 1.8333636960019017,
+      "learning_rate": 2.529598653776349e-06,
+      "loss": 0.3825,
+      "step": 715
+    },
+    {
+      "epoch": 0.7029533805223334,
+      "grad_norm": 1.973156240281083,
+      "learning_rate": 2.4558206108296394e-06,
+      "loss": 0.3666,
+      "step": 720
+    },
+    {
+      "epoch": 0.7078350012204052,
+      "grad_norm": 1.8104849203150244,
+      "learning_rate": 2.3827826121458713e-06,
+      "loss": 0.3681,
+      "step": 725
+    },
+    {
+      "epoch": 0.712716621918477,
+      "grad_norm": 1.7648438350645508,
+      "learning_rate": 2.3105059028275467e-06,
+      "loss": 0.3604,
+      "step": 730
+    },
+    {
+      "epoch": 0.7175982426165487,
+      "grad_norm": 1.7085215152593554,
+      "learning_rate": 2.2390115065352974e-06,
+      "loss": 0.3599,
+      "step": 735
+    },
+    {
+      "epoch": 0.7224798633146204,
+      "grad_norm": 1.7691508841496486,
+      "learning_rate": 2.16832021937259e-06,
+      "loss": 0.361,
+      "step": 740
+    },
+    {
+      "epoch": 0.7273614840126922,
+      "grad_norm": 1.7631951882565822,
+      "learning_rate": 2.0984526038366005e-06,
+      "loss": 0.3648,
+      "step": 745
+    },
+    {
+      "epoch": 0.7322431047107639,
+      "grad_norm": 1.7905281564289197,
+      "learning_rate": 2.0294289828370506e-06,
+      "loss": 0.3593,
+      "step": 750
+    },
+    {
+      "epoch": 0.7371247254088358,
+      "grad_norm": 1.7614811796241294,
+      "learning_rate": 1.9612694337847334e-06,
+      "loss": 0.3636,
+      "step": 755
+    },
+    {
+      "epoch": 0.7420063461069075,
+      "grad_norm": 1.7427057794971907,
+      "learning_rate": 1.8939937827514509e-06,
+      "loss": 0.3514,
+      "step": 760
+    },
+    {
+      "epoch": 0.7468879668049793,
+      "grad_norm": 1.8108047484780578,
+      "learning_rate": 1.8276215987030489e-06,
+      "loss": 0.354,
+      "step": 765
+    },
+    {
+      "epoch": 0.751769587503051,
+      "grad_norm": 1.8460169409837044,
+      "learning_rate": 1.7621721878072601e-06,
+      "loss": 0.3536,
+      "step": 770
+    },
+    {
+      "epoch": 0.7566512082011227,
+      "grad_norm": 1.7192504649916136,
+      "learning_rate": 1.6976645878179677e-06,
+      "loss": 0.3523,
+      "step": 775
+    },
+    {
+      "epoch": 0.7615328288991945,
+      "grad_norm": 1.7659873554952885,
+      "learning_rate": 1.6341175625375554e-06,
+      "loss": 0.3556,
+      "step": 780
+    },
+    {
+      "epoch": 0.7664144495972663,
+      "grad_norm": 1.7642790853502184,
+      "learning_rate": 1.5715495963589434e-06,
+      "loss": 0.3505,
+      "step": 785
+    },
+    {
+      "epoch": 0.7712960702953381,
+      "grad_norm": 1.8214336145120926,
+      "learning_rate": 1.509978888888894e-06,
+      "loss": 0.3557,
+      "step": 790
+    },
+    {
+      "epoch": 0.7761776909934098,
+      "grad_norm": 1.693821479855387,
+      "learning_rate": 1.4494233496541548e-06,
+      "loss": 0.3533,
+      "step": 795
+    },
+    {
+      "epoch": 0.7810593116914816,
+      "grad_norm": 1.7741424308531102,
+      "learning_rate": 1.3899005928919901e-06,
+      "loss": 0.3436,
+      "step": 800
+    },
+    {
+      "epoch": 0.7859409323895533,
+      "grad_norm": 1.702567333467084,
+      "learning_rate": 1.3314279324265922e-06,
+      "loss": 0.3484,
+      "step": 805
+    },
+    {
+      "epoch": 0.7908225530876251,
+      "grad_norm": 1.7878487144245359,
+      "learning_rate": 1.2740223766328813e-06,
+      "loss": 0.3472,
+      "step": 810
+    },
+    {
+      "epoch": 0.7957041737856968,
+      "grad_norm": 1.8410165852894955,
+      "learning_rate": 1.2177006234891548e-06,
+      "loss": 0.3462,
+      "step": 815
+    },
+    {
+      "epoch": 0.8005857944837687,
+      "grad_norm": 1.5957050689906005,
+      "learning_rate": 1.1624790557200255e-06,
+      "loss": 0.3474,
+      "step": 820
+    },
+    {
+      "epoch": 0.8054674151818404,
+      "grad_norm": 1.7840672497407395,
+      "learning_rate": 1.1083737360310487e-06,
+      "loss": 0.3366,
+      "step": 825
+    },
+    {
+      "epoch": 0.8103490358799121,
+      "grad_norm": 1.7234306868229268,
+      "learning_rate": 1.0554004024364573e-06,
+      "loss": 0.3459,
+      "step": 830
+    },
+    {
+      "epoch": 0.8152306565779839,
+      "grad_norm": 1.8763987301580518,
+      "learning_rate": 1.0035744636813188e-06,
+      "loss": 0.3399,
+      "step": 835
+    },
+    {
+      "epoch": 0.8201122772760556,
+      "grad_norm": 1.8526209211475768,
+      "learning_rate": 9.529109947594834e-07,
+      "loss": 0.3414,
+      "step": 840
+    },
+    {
+      "epoch": 0.8249938979741274,
+      "grad_norm": 1.6663345867535264,
+      "learning_rate": 9.034247325286122e-07,
+      "loss": 0.3443,
+      "step": 845
+    },
+    {
+      "epoch": 0.8298755186721992,
+      "grad_norm": 1.7676921955192966,
+      "learning_rate": 8.551300714235494e-07,
+      "loss": 0.3427,
+      "step": 850
+    },
+    {
+      "epoch": 0.834757139370271,
+      "grad_norm": 1.7531428909936075,
+      "learning_rate": 8.080410592693183e-07,
+      "loss": 0.335,
+      "step": 855
+    },
+    {
+      "epoch": 0.8396387600683427,
+      "grad_norm": 1.8118502056267978,
+      "learning_rate": 7.621713931949181e-07,
+      "loss": 0.3484,
+      "step": 860
+    },
+    {
+      "epoch": 0.8445203807664144,
+      "grad_norm": 1.6928707382034682,
+      "learning_rate": 7.175344156491432e-07,
+      "loss": 0.3392,
+      "step": 865
+    },
+    {
+      "epoch": 0.8494020014644862,
+      "grad_norm": 1.7632689910362094,
+      "learning_rate": 6.741431105195623e-07,
+      "loss": 0.338,
+      "step": 870
+    },
+    {
+      "epoch": 0.8542836221625579,
+      "grad_norm": 1.7172615010190266,
+      "learning_rate": 6.32010099355806e-07,
+      "loss": 0.3386,
+      "step": 875
+    },
+    {
+      "epoch": 0.8591652428606297,
+      "grad_norm": 1.6578979212585563,
+      "learning_rate": 5.911476376982333e-07,
+      "loss": 0.3322,
+      "step": 880
+    },
+    {
+      "epoch": 0.8640468635587015,
+      "grad_norm": 1.6474380347956434,
+      "learning_rate": 5.515676115130819e-07,
+      "loss": 0.342,
+      "step": 885
+    },
+    {
+      "epoch": 0.8689284842567733,
+      "grad_norm": 1.6988649276270078,
+      "learning_rate": 5.132815337351038e-07,
+      "loss": 0.346,
+      "step": 890
+    },
+    {
+      "epoch": 0.873810104954845,
+      "grad_norm": 1.8306999314950443,
+      "learning_rate": 4.763005409187155e-07,
+      "loss": 0.3374,
+      "step": 895
+    },
+    {
+      "epoch": 0.8786917256529168,
+      "grad_norm": 1.631803186772389,
+      "learning_rate": 4.406353899986221e-07,
+      "loss": 0.3361,
+      "step": 900
+    },
+    {
+      "epoch": 0.8835733463509885,
+      "grad_norm": 1.709090450527214,
+      "learning_rate": 4.06296455160875e-07,
+      "loss": 0.3293,
+      "step": 905
+    },
+    {
+      "epoch": 0.8884549670490602,
+      "grad_norm": 1.583186301936809,
+      "learning_rate": 3.732937248252472e-07,
+      "loss": 0.3343,
+      "step": 910
+    },
+    {
+      "epoch": 0.8933365877471321,
+      "grad_norm": 1.6042173876546493,
+      "learning_rate": 3.416367987398345e-07,
+      "loss": 0.331,
+      "step": 915
+    },
+    {
+      "epoch": 0.8982182084452038,
+      "grad_norm": 1.6876494518887346,
+      "learning_rate": 3.113348851887038e-07,
+      "loss": 0.3292,
+      "step": 920
+    },
+    {
+      "epoch": 0.9030998291432756,
+      "grad_norm": 1.6392793033144468,
+      "learning_rate": 2.8239679831341126e-07,
+      "loss": 0.3296,
+      "step": 925
+    },
+    {
+      "epoch": 0.9079814498413473,
+      "grad_norm": 1.6466791755980095,
+      "learning_rate": 2.548309555491674e-07,
+      "loss": 0.3348,
+      "step": 930
+    },
+    {
+      "epoch": 0.9128630705394191,
+      "grad_norm": 1.6369494718827322,
+      "learning_rate": 2.2864537517639618e-07,
+      "loss": 0.3329,
+      "step": 935
+    },
+    {
+      "epoch": 0.9177446912374908,
+      "grad_norm": 1.7151132649607947,
+      "learning_rate": 2.038476739883982e-07,
+      "loss": 0.334,
+      "step": 940
+    },
+    {
+      "epoch": 0.9226263119355627,
+      "grad_norm": 1.7998544094471982,
+      "learning_rate": 1.804450650757972e-07,
+      "loss": 0.3366,
+      "step": 945
+    },
+    {
+      "epoch": 0.9275079326336344,
+      "grad_norm": 1.6113958011155078,
+      "learning_rate": 1.5844435572841544e-07,
+      "loss": 0.3199,
+      "step": 950
+    },
+    {
+      "epoch": 0.9323895533317061,
+      "grad_norm": 1.620629896748232,
+      "learning_rate": 1.3785194545518965e-07,
+      "loss": 0.3331,
+      "step": 955
+    },
+    {
+      "epoch": 0.9372711740297779,
+      "grad_norm": 1.570691984682264,
+      "learning_rate": 1.1867382412269257e-07,
+      "loss": 0.3266,
+      "step": 960
+    },
+    {
+      "epoch": 0.9421527947278496,
+      "grad_norm": 1.7541787796617982,
+      "learning_rate": 1.0091557021282283e-07,
+      "loss": 0.3349,
+      "step": 965
+    },
+    {
+      "epoch": 0.9470344154259214,
+      "grad_norm": 1.6021405868365253,
+      "learning_rate": 8.458234920014685e-08,
+      "loss": 0.3242,
+      "step": 970
+    },
+    {
+      "epoch": 0.9519160361239931,
+      "grad_norm": 1.6818332750612033,
+      "learning_rate": 6.967891204937737e-08,
+      "loss": 0.3272,
+      "step": 975
+    },
+    {
+      "epoch": 0.956797656822065,
+      "grad_norm": 1.6842336140953997,
+      "learning_rate": 5.620959383343061e-08,
+      "loss": 0.3335,
+      "step": 980
+    },
+    {
+      "epoch": 0.9616792775201367,
+      "grad_norm": 1.6333432398092111,
+      "learning_rate": 4.417831247244819e-08,
+      "loss": 0.3269,
+      "step": 985
+    },
+    {
+      "epoch": 0.9665608982182085,
+      "grad_norm": 1.5870095684725907,
+      "learning_rate": 3.3588567594161625e-08,
+      "loss": 0.3167,
+      "step": 990
+    },
+    {
+      "epoch": 0.9714425189162802,
+      "grad_norm": 1.645998653655925,
+      "learning_rate": 2.4443439515933754e-08,
+      "loss": 0.3345,
+      "step": 995
+    },
+    {
+      "epoch": 0.9763241396143519,
+      "grad_norm": 1.6627361867783679,
+      "learning_rate": 1.6745588348758836e-08,
+      "loss": 0.329,
+      "step": 1000
+    },
+    {
+      "epoch": 0.9812057603124237,
+      "grad_norm": 1.6968847442052823,
+      "learning_rate": 1.0497253223502035e-08,
+      "loss": 0.328,
+      "step": 1005
+    },
+    {
+      "epoch": 0.9860873810104955,
+      "grad_norm": 1.8141716265386114,
+      "learning_rate": 5.700251639581544e-09,
+      "loss": 0.3243,
+      "step": 1010
+    },
+    {
+      "epoch": 0.9909690017085673,
+      "grad_norm": 1.6444141321089352,
+      "learning_rate": 2.355978936303127e-09,
+      "loss": 0.3277,
+      "step": 1015
+    },
+    {
+      "epoch": 0.995850622406639,
+      "grad_norm": 1.5713108568101966,
+      "learning_rate": 4.6540788698534735e-10,
+      "loss": 0.3291,
+      "step": 1020
+    },
+    {
+      "epoch": 0.9997559189650964,
+      "eval_loss": 0.3360166847705841,
+      "eval_runtime": 96.8069,
+      "eval_samples_per_second": 3.12,
+      "eval_steps_per_second": 0.785,
+      "step": 1024
+    },
+    {
+      "epoch": 0.9997559189650964,
+      "step": 1024,
+      "total_flos": 214352422502400.0,
+      "train_loss": 0.48904780531302094,
+      "train_runtime": 23248.9436,
+      "train_samples_per_second": 1.409,
+      "train_steps_per_second": 0.044
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 1024,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 214352422502400.0,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}