End of training

Browse files

Files changed (6) hide show

README.md +20 -2
all_results.json +12 -12
eval_results.json +7 -7
runs/Jul21_23-32-54_78d944cbbe34/events.out.tfevents.1721607100.78d944cbbe34.21392.1 +3 -0
train_results.json +6 -6
trainer_state.json +714 -5

README.md CHANGED Viewed

@@ -1,9 +1,24 @@
 ---
 tags:
 - generated_from_trainer
 model-index:
 - name: tinygpt2-javanese
-  results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
 # tinygpt2-javanese
-This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 ## Model description

 ---
 tags:
 - generated_from_trainer
+datasets:
+- akahana/GlotCC-V1-jav-Latn
+metrics:
+- accuracy
 model-index:
 - name: tinygpt2-javanese
+  results:
+  - task:
+      name: Causal Language Modeling
+      type: text-generation
+    dataset:
+      name: akahana/GlotCC-V1-jav-Latn default
+      type: akahana/GlotCC-V1-jav-Latn
+      args: default
+    metrics:
+    - name: Accuracy
+      type: accuracy
+      value: 0.2786154321383402
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # tinygpt2-javanese
+This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
+It achieves the following results on the evaluation set:
+- Loss: 4.7648
+- Accuracy: 0.2786
 ## Model description

all_results.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
-    "epoch": 20.0,
-    "eval_accuracy": 0.25270675362470885,
-    "eval_loss": 5.005028247833252,
-    "eval_runtime": 10.9985,
     "eval_samples": 4053,
-    "eval_samples_per_second": 368.506,
-    "eval_steps_per_second": 92.195,
-    "perplexity": 149.16129658231105,
-    "total_flos": 488922611712000.0,
-    "train_loss": 5.48259629872295,
-    "train_runtime": 4495.3437,
     "train_samples": 80219,
-    "train_samples_per_second": 356.898,
-    "train_steps_per_second": 22.308
 }

 {
+    "epoch": 30.0,
+    "eval_accuracy": 0.2786154321383402,
+    "eval_loss": 4.764777183532715,
+    "eval_runtime": 11.4146,
     "eval_samples": 4053,
+    "eval_samples_per_second": 355.07,
+    "eval_steps_per_second": 88.833,
+    "perplexity": 117.30497689511513,
+    "total_flos": 733383917568000.0,
+    "train_loss": 1.6002090492649228,
+    "train_runtime": 2253.0761,
     "train_samples": 80219,
+    "train_samples_per_second": 1068.126,
+    "train_steps_per_second": 66.762
 }

eval_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "epoch": 20.0,
-    "eval_accuracy": 0.25270675362470885,
-    "eval_loss": 5.005028247833252,
-    "eval_runtime": 10.9985,
     "eval_samples": 4053,
-    "eval_samples_per_second": 368.506,
-    "eval_steps_per_second": 92.195,
-    "perplexity": 149.16129658231105
 }

 {
+    "epoch": 30.0,
+    "eval_accuracy": 0.2786154321383402,
+    "eval_loss": 4.764777183532715,
+    "eval_runtime": 11.4146,
     "eval_samples": 4053,
+    "eval_samples_per_second": 355.07,
+    "eval_steps_per_second": 88.833,
+    "perplexity": 117.30497689511513
 }

runs/Jul21_23-32-54_78d944cbbe34/events.out.tfevents.1721607100.78d944cbbe34.21392.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2c4f7fe7d0d902e53fe05a8feb3b0176198da56613547c2c261aa31ff89215
+size 417

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 20.0,
-    "total_flos": 488922611712000.0,
-    "train_loss": 5.48259629872295,
-    "train_runtime": 4495.3437,
     "train_samples": 80219,
-    "train_samples_per_second": 356.898,
-    "train_steps_per_second": 22.308
 }

 {
+    "epoch": 30.0,
+    "total_flos": 733383917568000.0,
+    "train_loss": 1.6002090492649228,
+    "train_runtime": 2253.0761,
     "train_samples": 80219,
+    "train_samples_per_second": 1068.126,
+    "train_steps_per_second": 66.762
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 20.0,
   "eval_steps": 500,
-  "global_step": 100280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1416,12 +1416,721 @@
       "train_runtime": 4495.3437,
       "train_samples_per_second": 356.898,
       "train_steps_per_second": 22.308
     }
   ],
   "logging_steps": 500,
-  "max_steps": 100280,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 20,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -1435,7 +2144,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 488922611712000.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 30.0,
   "eval_steps": 500,
+  "global_step": 150420,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "train_runtime": 4495.3437,
       "train_samples_per_second": 356.898,
       "train_steps_per_second": 22.308
+    },
+    {
+      "epoch": 20.04387714399681,
+      "grad_norm": 2.977564573287964,
+      "learning_rate": 4.992687142667199e-05,
+      "loss": 5.0172,
+      "step": 100500
+    },
+    {
+      "epoch": 20.14359792580774,
+      "grad_norm": 2.9825730323791504,
+      "learning_rate": 4.976067012365378e-05,
+      "loss": 5.0479,
+      "step": 101000
+    },
+    {
+      "epoch": 20.243318707618666,
+      "grad_norm": 3.019160747528076,
+      "learning_rate": 4.959446882063555e-05,
+      "loss": 5.0469,
+      "step": 101500
+    },
+    {
+      "epoch": 20.343039489429597,
+      "grad_norm": 3.1838183403015137,
+      "learning_rate": 4.942826751761734e-05,
+      "loss": 5.0334,
+      "step": 102000
+    },
+    {
+      "epoch": 20.442760271240527,
+      "grad_norm": 3.0762367248535156,
+      "learning_rate": 4.926206621459912e-05,
+      "loss": 5.0445,
+      "step": 102500
+    },
+    {
+      "epoch": 20.542481053051457,
+      "grad_norm": 3.038895606994629,
+      "learning_rate": 4.909586491158091e-05,
+      "loss": 4.9971,
+      "step": 103000
+    },
+    {
+      "epoch": 20.642201834862384,
+      "grad_norm": 3.1863033771514893,
+      "learning_rate": 4.892966360856269e-05,
+      "loss": 5.0347,
+      "step": 103500
+    },
+    {
+      "epoch": 20.741922616673314,
+      "grad_norm": 2.8989017009735107,
+      "learning_rate": 4.876346230554448e-05,
+      "loss": 5.0353,
+      "step": 104000
+    },
+    {
+      "epoch": 20.841643398484244,
+      "grad_norm": 2.9284589290618896,
+      "learning_rate": 4.85975934051323e-05,
+      "loss": 5.025,
+      "step": 104500
+    },
+    {
+      "epoch": 20.941364180295174,
+      "grad_norm": 2.8213396072387695,
+      "learning_rate": 4.843139210211408e-05,
+      "loss": 5.0041,
+      "step": 105000
+    },
+    {
+      "epoch": 21.041084962106105,
+      "grad_norm": 3.0717594623565674,
+      "learning_rate": 4.826519079909587e-05,
+      "loss": 4.9886,
+      "step": 105500
+    },
+    {
+      "epoch": 21.14080574391703,
+      "grad_norm": 2.702904224395752,
+      "learning_rate": 4.809898949607765e-05,
+      "loss": 4.956,
+      "step": 106000
+    },
+    {
+      "epoch": 21.24052652572796,
+      "grad_norm": 2.5885391235351562,
+      "learning_rate": 4.793312059566547e-05,
+      "loss": 4.9718,
+      "step": 106500
+    },
+    {
+      "epoch": 21.340247307538892,
+      "grad_norm": 2.9367082118988037,
+      "learning_rate": 4.776691929264726e-05,
+      "loss": 4.9589,
+      "step": 107000
+    },
+    {
+      "epoch": 21.439968089349822,
+      "grad_norm": 2.8302552700042725,
+      "learning_rate": 4.760071798962904e-05,
+      "loss": 4.9731,
+      "step": 107500
+    },
+    {
+      "epoch": 21.53968887116075,
+      "grad_norm": 3.23287296295166,
+      "learning_rate": 4.743451668661083e-05,
+      "loss": 4.9309,
+      "step": 108000
+    },
+    {
+      "epoch": 21.63940965297168,
+      "grad_norm": 2.868462562561035,
+      "learning_rate": 4.7268647786198647e-05,
+      "loss": 4.9436,
+      "step": 108500
+    },
+    {
+      "epoch": 21.73913043478261,
+      "grad_norm": 2.8602261543273926,
+      "learning_rate": 4.710244648318043e-05,
+      "loss": 4.9688,
+      "step": 109000
+    },
+    {
+      "epoch": 21.83885121659354,
+      "grad_norm": 3.121124505996704,
+      "learning_rate": 4.6936245180162217e-05,
+      "loss": 4.9444,
+      "step": 109500
+    },
+    {
+      "epoch": 21.938571998404466,
+      "grad_norm": 2.777409553527832,
+      "learning_rate": 4.6770043877144e-05,
+      "loss": 4.9165,
+      "step": 110000
+    },
+    {
+      "epoch": 22.038292780215397,
+      "grad_norm": 2.9804909229278564,
+      "learning_rate": 4.660417497673182e-05,
+      "loss": 4.9534,
+      "step": 110500
+    },
+    {
+      "epoch": 22.138013562026327,
+      "grad_norm": 3.034639596939087,
+      "learning_rate": 4.6437973673713605e-05,
+      "loss": 4.9069,
+      "step": 111000
+    },
+    {
+      "epoch": 22.237734343837257,
+      "grad_norm": 2.7168800830841064,
+      "learning_rate": 4.627177237069539e-05,
+      "loss": 4.8861,
+      "step": 111500
+    },
+    {
+      "epoch": 22.337455125648184,
+      "grad_norm": 2.8833560943603516,
+      "learning_rate": 4.6105571067677175e-05,
+      "loss": 4.884,
+      "step": 112000
+    },
+    {
+      "epoch": 22.437175907459114,
+      "grad_norm": 2.8463797569274902,
+      "learning_rate": 4.5939702167264994e-05,
+      "loss": 4.9167,
+      "step": 112500
+    },
+    {
+      "epoch": 22.536896689270044,
+      "grad_norm": 2.765068531036377,
+      "learning_rate": 4.5773500864246776e-05,
+      "loss": 4.8929,
+      "step": 113000
+    },
+    {
+      "epoch": 22.636617471080974,
+      "grad_norm": 2.7801401615142822,
+      "learning_rate": 4.5607299561228564e-05,
+      "loss": 4.8934,
+      "step": 113500
+    },
+    {
+      "epoch": 22.7363382528919,
+      "grad_norm": 2.883640766143799,
+      "learning_rate": 4.5441098258210346e-05,
+      "loss": 4.8918,
+      "step": 114000
+    },
+    {
+      "epoch": 22.83605903470283,
+      "grad_norm": 3.0070436000823975,
+      "learning_rate": 4.5275229357798165e-05,
+      "loss": 4.894,
+      "step": 114500
+    },
+    {
+      "epoch": 22.93577981651376,
+      "grad_norm": 3.1484322547912598,
+      "learning_rate": 4.510902805477995e-05,
+      "loss": 4.8752,
+      "step": 115000
+    },
+    {
+      "epoch": 23.035500598324692,
+      "grad_norm": 3.016380786895752,
+      "learning_rate": 4.4942826751761735e-05,
+      "loss": 4.8612,
+      "step": 115500
+    },
+    {
+      "epoch": 23.13522138013562,
+      "grad_norm": 3.0375137329101562,
+      "learning_rate": 4.477662544874352e-05,
+      "loss": 4.8517,
+      "step": 116000
+    },
+    {
+      "epoch": 23.23494216194655,
+      "grad_norm": 2.926248073577881,
+      "learning_rate": 4.461075654833134e-05,
+      "loss": 4.834,
+      "step": 116500
+    },
+    {
+      "epoch": 23.33466294375748,
+      "grad_norm": 2.898101806640625,
+      "learning_rate": 4.444455524531312e-05,
+      "loss": 4.8456,
+      "step": 117000
+    },
+    {
+      "epoch": 23.43438372556841,
+      "grad_norm": 2.9906890392303467,
+      "learning_rate": 4.427835394229491e-05,
+      "loss": 4.8431,
+      "step": 117500
+    },
+    {
+      "epoch": 23.53410450737934,
+      "grad_norm": 2.9021828174591064,
+      "learning_rate": 4.4112152639276693e-05,
+      "loss": 4.8362,
+      "step": 118000
+    },
+    {
+      "epoch": 23.633825289190266,
+      "grad_norm": 2.9854063987731934,
+      "learning_rate": 4.394628373886451e-05,
+      "loss": 4.8508,
+      "step": 118500
+    },
+    {
+      "epoch": 23.733546071001197,
+      "grad_norm": 2.959423780441284,
+      "learning_rate": 4.37800824358463e-05,
+      "loss": 4.8395,
+      "step": 119000
+    },
+    {
+      "epoch": 23.833266852812127,
+      "grad_norm": 3.267308235168457,
+      "learning_rate": 4.361388113282808e-05,
+      "loss": 4.8467,
+      "step": 119500
+    },
+    {
+      "epoch": 23.932987634623057,
+      "grad_norm": 2.9600274562835693,
+      "learning_rate": 4.344767982980987e-05,
+      "loss": 4.8405,
+      "step": 120000
+    },
+    {
+      "epoch": 24.032708416433984,
+      "grad_norm": 3.0417428016662598,
+      "learning_rate": 4.328181092939769e-05,
+      "loss": 4.8078,
+      "step": 120500
+    },
+    {
+      "epoch": 24.132429198244914,
+      "grad_norm": 3.029172897338867,
+      "learning_rate": 4.311560962637947e-05,
+      "loss": 4.8216,
+      "step": 121000
+    },
+    {
+      "epoch": 24.232149980055844,
+      "grad_norm": 2.846696376800537,
+      "learning_rate": 4.294940832336126e-05,
+      "loss": 4.814,
+      "step": 121500
+    },
+    {
+      "epoch": 24.331870761866774,
+      "grad_norm": 3.2993550300598145,
+      "learning_rate": 4.278320702034304e-05,
+      "loss": 4.7863,
+      "step": 122000
+    },
+    {
+      "epoch": 24.4315915436777,
+      "grad_norm": 3.039426803588867,
+      "learning_rate": 4.261733811993086e-05,
+      "loss": 4.7851,
+      "step": 122500
+    },
+    {
+      "epoch": 24.53131232548863,
+      "grad_norm": 2.8034543991088867,
+      "learning_rate": 4.245113681691265e-05,
+      "loss": 4.7997,
+      "step": 123000
+    },
+    {
+      "epoch": 24.63103310729956,
+      "grad_norm": 3.0070390701293945,
+      "learning_rate": 4.228493551389443e-05,
+      "loss": 4.8009,
+      "step": 123500
+    },
+    {
+      "epoch": 24.730753889110492,
+      "grad_norm": 2.9534358978271484,
+      "learning_rate": 4.211873421087622e-05,
+      "loss": 4.8079,
+      "step": 124000
+    },
+    {
+      "epoch": 24.83047467092142,
+      "grad_norm": 3.184213638305664,
+      "learning_rate": 4.195286531046404e-05,
+      "loss": 4.7826,
+      "step": 124500
+    },
+    {
+      "epoch": 24.93019545273235,
+      "grad_norm": 2.946760416030884,
+      "learning_rate": 4.178666400744582e-05,
+      "loss": 4.7941,
+      "step": 125000
+    },
+    {
+      "epoch": 25.02991623454328,
+      "grad_norm": 2.929389238357544,
+      "learning_rate": 4.162046270442761e-05,
+      "loss": 4.7783,
+      "step": 125500
+    },
+    {
+      "epoch": 25.12963701635421,
+      "grad_norm": 2.9876906871795654,
+      "learning_rate": 4.145426140140939e-05,
+      "loss": 4.7433,
+      "step": 126000
+    },
+    {
+      "epoch": 25.229357798165136,
+      "grad_norm": 2.9121735095977783,
+      "learning_rate": 4.128839250099721e-05,
+      "loss": 4.7545,
+      "step": 126500
+    },
+    {
+      "epoch": 25.329078579976066,
+      "grad_norm": 2.848165273666382,
+      "learning_rate": 4.1122191197978996e-05,
+      "loss": 4.7756,
+      "step": 127000
+    },
+    {
+      "epoch": 25.428799361786997,
+      "grad_norm": 2.955857515335083,
+      "learning_rate": 4.095598989496078e-05,
+      "loss": 4.7686,
+      "step": 127500
+    },
+    {
+      "epoch": 25.528520143597927,
+      "grad_norm": 3.084696054458618,
+      "learning_rate": 4.0789788591942566e-05,
+      "loss": 4.756,
+      "step": 128000
+    },
+    {
+      "epoch": 25.628240925408853,
+      "grad_norm": 2.993539571762085,
+      "learning_rate": 4.0623919691530384e-05,
+      "loss": 4.7699,
+      "step": 128500
+    },
+    {
+      "epoch": 25.727961707219784,
+      "grad_norm": 3.0663325786590576,
+      "learning_rate": 4.0457718388512166e-05,
+      "loss": 4.7594,
+      "step": 129000
+    },
+    {
+      "epoch": 25.827682489030714,
+      "grad_norm": 3.0915310382843018,
+      "learning_rate": 4.0291517085493954e-05,
+      "loss": 4.768,
+      "step": 129500
+    },
+    {
+      "epoch": 25.927403270841644,
+      "grad_norm": 2.790329933166504,
+      "learning_rate": 4.0125315782475736e-05,
+      "loss": 4.7519,
+      "step": 130000
+    },
+    {
+      "epoch": 26.027124052652574,
+      "grad_norm": 3.1589112281799316,
+      "learning_rate": 3.9959446882063555e-05,
+      "loss": 4.7383,
+      "step": 130500
+    },
+    {
+      "epoch": 26.1268448344635,
+      "grad_norm": 2.9991183280944824,
+      "learning_rate": 3.979324557904534e-05,
+      "loss": 4.7297,
+      "step": 131000
+    },
+    {
+      "epoch": 26.22656561627443,
+      "grad_norm": 2.959322452545166,
+      "learning_rate": 3.9627044276027125e-05,
+      "loss": 4.723,
+      "step": 131500
+    },
+    {
+      "epoch": 26.32628639808536,
+      "grad_norm": 2.9168314933776855,
+      "learning_rate": 3.946084297300891e-05,
+      "loss": 4.7143,
+      "step": 132000
+    },
+    {
+      "epoch": 26.426007179896292,
+      "grad_norm": 2.9729034900665283,
+      "learning_rate": 3.929497407259673e-05,
+      "loss": 4.7176,
+      "step": 132500
+    },
+    {
+      "epoch": 26.52572796170722,
+      "grad_norm": 2.762373685836792,
+      "learning_rate": 3.9128772769578514e-05,
+      "loss": 4.7353,
+      "step": 133000
+    },
+    {
+      "epoch": 26.62544874351815,
+      "grad_norm": 3.2931153774261475,
+      "learning_rate": 3.89625714665603e-05,
+      "loss": 4.7069,
+      "step": 133500
+    },
+    {
+      "epoch": 26.72516952532908,
+      "grad_norm": 3.129920482635498,
+      "learning_rate": 3.8796370163542084e-05,
+      "loss": 4.7253,
+      "step": 134000
+    },
+    {
+      "epoch": 26.82489030714001,
+      "grad_norm": 3.0690855979919434,
+      "learning_rate": 3.86305012631299e-05,
+      "loss": 4.744,
+      "step": 134500
+    },
+    {
+      "epoch": 26.924611088950936,
+      "grad_norm": 2.957228183746338,
+      "learning_rate": 3.846429996011169e-05,
+      "loss": 4.7287,
+      "step": 135000
+    },
+    {
+      "epoch": 27.024331870761866,
+      "grad_norm": 2.922133445739746,
+      "learning_rate": 3.829809865709347e-05,
+      "loss": 4.7231,
+      "step": 135500
+    },
+    {
+      "epoch": 27.124052652572797,
+      "grad_norm": 3.0305354595184326,
+      "learning_rate": 3.813189735407526e-05,
+      "loss": 4.6755,
+      "step": 136000
+    },
+    {
+      "epoch": 27.223773434383727,
+      "grad_norm": 2.9898860454559326,
+      "learning_rate": 3.796602845366308e-05,
+      "loss": 4.6737,
+      "step": 136500
+    },
+    {
+      "epoch": 27.323494216194653,
+      "grad_norm": 3.0518152713775635,
+      "learning_rate": 3.779982715064486e-05,
+      "loss": 4.7161,
+      "step": 137000
+    },
+    {
+      "epoch": 27.423214998005584,
+      "grad_norm": 3.14530086517334,
+      "learning_rate": 3.763362584762665e-05,
+      "loss": 4.6827,
+      "step": 137500
+    },
+    {
+      "epoch": 27.522935779816514,
+      "grad_norm": 2.9844906330108643,
+      "learning_rate": 3.746742454460843e-05,
+      "loss": 4.7015,
+      "step": 138000
+    },
+    {
+      "epoch": 27.622656561627444,
+      "grad_norm": 3.1187822818756104,
+      "learning_rate": 3.730155564419625e-05,
+      "loss": 4.6907,
+      "step": 138500
+    },
+    {
+      "epoch": 27.72237734343837,
+      "grad_norm": 3.1447060108184814,
+      "learning_rate": 3.713535434117804e-05,
+      "loss": 4.6979,
+      "step": 139000
+    },
+    {
+      "epoch": 27.8220981252493,
+      "grad_norm": 3.2830941677093506,
+      "learning_rate": 3.696915303815982e-05,
+      "loss": 4.7023,
+      "step": 139500
+    },
+    {
+      "epoch": 27.92181890706023,
+      "grad_norm": 2.969634532928467,
+      "learning_rate": 3.680295173514161e-05,
+      "loss": 4.7053,
+      "step": 140000
+    },
+    {
+      "epoch": 28.02153968887116,
+      "grad_norm": 3.082902431488037,
+      "learning_rate": 3.663708283472943e-05,
+      "loss": 4.6914,
+      "step": 140500
+    },
+    {
+      "epoch": 28.121260470682092,
+      "grad_norm": 3.165813446044922,
+      "learning_rate": 3.647088153171121e-05,
+      "loss": 4.6726,
+      "step": 141000
+    },
+    {
+      "epoch": 28.22098125249302,
+      "grad_norm": 3.1427435874938965,
+      "learning_rate": 3.6304680228693e-05,
+      "loss": 4.6586,
+      "step": 141500
+    },
+    {
+      "epoch": 28.32070203430395,
+      "grad_norm": 3.179264545440674,
+      "learning_rate": 3.613847892567478e-05,
+      "loss": 4.6576,
+      "step": 142000
+    },
+    {
+      "epoch": 28.42042281611488,
+      "grad_norm": 3.1044764518737793,
+      "learning_rate": 3.59726100252626e-05,
+      "loss": 4.6713,
+      "step": 142500
+    },
+    {
+      "epoch": 28.52014359792581,
+      "grad_norm": 3.049412488937378,
+      "learning_rate": 3.5806408722244386e-05,
+      "loss": 4.6702,
+      "step": 143000
+    },
+    {
+      "epoch": 28.619864379736736,
+      "grad_norm": 3.128653049468994,
+      "learning_rate": 3.564020741922617e-05,
+      "loss": 4.6872,
+      "step": 143500
+    },
+    {
+      "epoch": 28.719585161547666,
+      "grad_norm": 3.13429856300354,
+      "learning_rate": 3.5474006116207956e-05,
+      "loss": 4.6487,
+      "step": 144000
+    },
+    {
+      "epoch": 28.819305943358597,
+      "grad_norm": 3.0185248851776123,
+      "learning_rate": 3.530780481318974e-05,
+      "loss": 4.68,
+      "step": 144500
+    },
+    {
+      "epoch": 28.919026725169527,
+      "grad_norm": 2.990931749343872,
+      "learning_rate": 3.5141935912777556e-05,
+      "loss": 4.669,
+      "step": 145000
+    },
+    {
+      "epoch": 29.018747506980453,
+      "grad_norm": 2.9707412719726562,
+      "learning_rate": 3.4975734609759345e-05,
+      "loss": 4.663,
+      "step": 145500
+    },
+    {
+      "epoch": 29.118468288791384,
+      "grad_norm": 3.247962713241577,
+      "learning_rate": 3.4809533306741126e-05,
+      "loss": 4.6391,
+      "step": 146000
+    },
+    {
+      "epoch": 29.218189070602314,
+      "grad_norm": 3.135483503341675,
+      "learning_rate": 3.4643332003722915e-05,
+      "loss": 4.6368,
+      "step": 146500
+    },
+    {
+      "epoch": 29.317909852413244,
+      "grad_norm": 3.4479868412017822,
+      "learning_rate": 3.4477463103310734e-05,
+      "loss": 4.6437,
+      "step": 147000
+    },
+    {
+      "epoch": 29.41763063422417,
+      "grad_norm": 3.3987677097320557,
+      "learning_rate": 3.4311261800292515e-05,
+      "loss": 4.6635,
+      "step": 147500
+    },
+    {
+      "epoch": 29.5173514160351,
+      "grad_norm": 3.153754234313965,
+      "learning_rate": 3.4145060497274304e-05,
+      "loss": 4.6128,
+      "step": 148000
+    },
+    {
+      "epoch": 29.61707219784603,
+      "grad_norm": 3.29654860496521,
+      "learning_rate": 3.3978859194256085e-05,
+      "loss": 4.664,
+      "step": 148500
+    },
+    {
+      "epoch": 29.71679297965696,
+      "grad_norm": 3.0110297203063965,
+      "learning_rate": 3.3812990293843904e-05,
+      "loss": 4.6438,
+      "step": 149000
+    },
+    {
+      "epoch": 29.81651376146789,
+      "grad_norm": 3.0456008911132812,
+      "learning_rate": 3.364678899082569e-05,
+      "loss": 4.6476,
+      "step": 149500
+    },
+    {
+      "epoch": 29.91623454327882,
+      "grad_norm": 3.3188984394073486,
+      "learning_rate": 3.3480587687807474e-05,
+      "loss": 4.6508,
+      "step": 150000
+    },
+    {
+      "epoch": 30.0,
+      "step": 150420,
+      "total_flos": 733383917568000.0,
+      "train_loss": 1.6002090492649228,
+      "train_runtime": 2253.0761,
+      "train_samples_per_second": 1068.126,
+      "train_steps_per_second": 66.762
     }
   ],
   "logging_steps": 500,
+  "max_steps": 150420,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 30,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 733383917568000.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null